#! /usr/bin/perl -w # Network Forensics Puzzle Contest #3 # Alan Tu # January 2, 2010 # http_analysis.pl v1.01 # Uses tshark to output the IP addresses, TCP ports, and key HTTP request and response headers from a PCAP file. # Usage: http_analysis.pl [-d tcp.port] [-R display_filter] pcap_file # Prints out HTTP information from the specified PCAP # -R passes additional display filter arguments to tshark via the -R switch # -d tells tshark (via -d tcp.port==PORT,http) to decode traffic to the specified TCP port as http # Courtesy functionality: # http_analysis.pl -w tcp.stream pcap_file output_pcap_file # Outputs the TCP stream specified by the tcp.stream index to its own PCAP file use strict; use Getopt::Std; # path to tshark executable, modify for specific environment #my $TSHARK = 'c:\progra~1\wireshark\tshark.exe'; my $TSHARK = "/usr/bin/tshark"; my $delim = ", "; # delimiter for output, default is CSV compatible our($opt_d, $opt_R, $opt_w); # options getopts("d:R:w:"); die "Cannot find tshark at $TSHARK, please check \$TSHARK variable in $0\n" unless -f $TSHARK; die "Usage: $0 [-w tcp.stream] [-d tcp.port] [-R display_filter] pcap_file\n" unless @ARGV >= 1; die "File $ARGV[0] does not exist\n" unless -f $ARGV[0]; if (defined($opt_w)) # write a PCAP with the TCP session specified by tcp.stream index { die "Usage: $0 -w tcp.stream pcap_file output_pcap_file\n" unless defined($ARGV[1]); `$TSHARK -r $ARGV[0] -w $ARGV[1] -R \"tcp.stream == $opt_w\"`; } else { my $args = "-R \"(http.request == 1 or http.response == 1)"; # basic arguments for HTTP processing if (defined($opt_R)) # passes additional filters to tshark with -R { $args .= " && $opt_R"; } $args .= "\" "; # close the -R argument regardless if (defined($opt_d)) # passes -d tcp.port==PORT,http to tshark { $args .= "-d tcp.port==$opt_d,http "; } # construct the command $args .= "-T fields -e http.request -e http.response -e eth.src -e frame.number -e frame.time -e tcp.stream -e ip.src -e tcp.srcport -e ip.dst -e tcp.dstport -e http.request.method -e http.request.uri -e http.host -e http.user_agent -e http.response.code -e http.content_type -e http.content_encoding -e http.content_length -r $ARGV[0]"; my @http = `$TSHARK $args`; # get the information # column heading my @heading = qw/frame.number frame.time tcp.stream ip.src tcp.srcport ip.dst tcp.dstport http.request.method http.request.uri http.host http.user_agent http.response.code http.content_type http.content_encoding http.content_length/; print join($delim, @heading) . "\n"; my %sources; # tracks sources of HTTP requests for (@http) # for each HTTP request or response { chomp; my @fields = split("\t", $_); $fields[4] = convert_tshark_time($fields[4]); # convert frame.time to nicer format print join($delim, @fields[3..$#fields]) . "\n"; # output $sources{$fields[2] . $delim . $fields[6] . $delim . $fields[13]}++ if $fields[0] eq 1; # track request sources } print "\nSummary of sources:\n"; my @sources = sort {$sources{$b} <=> $sources{$a}} keys %sources; # print in descending order by number of requests print join($delim, qw/eth.src ip.src http.user_agent http.request/) . "\n"; # heading print join($delim, $_, $sources{$_}) . "\n" for @sources; } # converts Wireshark's frame.time field into yyyy/mm/dd hh:mm:ss.ss sub convert_tshark_time { my %months = (Jan => "01", Feb => "02", Mar => "03", Apr => "04", May => "05", Jun => "06", Jul => "07", Aug => "08", Sep => "09", Oct => "10", Nov => "11", Dec => "12"); my $t = shift; # argument to convert my($mon, $day, $year, $hhmmss) = $t =~ /^(...)\s+(\d+), (\d+) (.+)$/; # capture fields $mon = $months{$mon}; # convert months $day = sprintf("%02d", $day); # make sure day is always two digits return "$year/$mon/$day $hhmmss"; }