#! /usr/bin/perl -w # Network Forensics Puzzle Contest #3 # Alan Tu # January 5, 2010 # stream.pl v1.35 # Uses tshark to identify or reassemble TCP streams. # Advantages: # 1. Only requires tshark 1.2.0 or later, and standard Perl modules. # 2. Reassembles streams with duplicate, missing, out of order, or overlapping TCP segments. # Usage: stream.pl [-R display_filter] pcap_file # Prints out information about each identified TCP stream from pcap_file, including stream number, time, IPs, ports, and application protocol. # -R optionally specifies a tshark display filter # stream.pl -s tcp.stream pcap_file # Outputs the application-layer data of the TCP stream identified by stream index tcp.stream from pcap_file # Each direction is output to a file dependent on the four-tupple # stream.pl -w tcp.stream pcap_file output_pcap_file # Outputs the TCP stream to its own PCAP file use strict; use Getopt::Std; use Digest::MD5; # path to tshark executable #my $TSHARK = 'c:\progra~1\wireshark\tshark.exe'; my $TSHARK = "/usr/bin/tshark"; #my $TSHARK = 'c:\progra~1\wireshark\tshark.exe'; # path to disabled_protos, will be backed up and restored if it exists my $WIRESHARK_DISABLED_PROTOS = $ENV{APPDATA} . '\wireshark\disabled_protos'; # on unix, typical location is $HOME/.wireshark/disabled_protos my $delim = ", "; # delimiter for output, default is CSV compatible our($opt_s, $opt_R, $opt_w); getopts("s:R:w:"); die "Cannot find tshark at $TSHARK, please check \$TSHARK variable in $0\n" unless -f $TSHARK; die "Usage: $0 [-s|-w tcp.stream] [-R display_filter] pcap_file\n" unless @ARGV >= 1; die "File $ARGV[0] does not exist\n" unless -f $ARGV[0]; if (defined($opt_s)) # extract a specific TCP stream { my(@packets, $subseq, $ren, $backup_file, %flows, $ip_a, $ip_b, $fn_a, $fn_b, %streams); # process packets RESTART: %flows = (); @packets = `$TSHARK -r $ARGV[0] -T fields -e frame.protocols -e ip.src -e tcp.srcport -e ip.dst -e tcp.dstport -e tcp.seq -e data.data -R \"tcp.stream == $opt_s && tcp.len > 0\"`; unless (@packets) { print STDERR "No data in stream $opt_s\n"; exit; } for (@packets) { chomp; my($proto, $src_ip, $src_port, $dst_ip, $dst_port, $seq, $data) = split("\t", $_); # determine what tshark thinks the application layer protocol is so we can disable that dissector ($proto) = $proto =~ /:tcp:([^:]+)/; if (defined($proto) && $proto ne "data") { # we disable the dissectors so tshark outputs data.data, the data of the TCP segment # if this is the first time and disabled_protos already exists, back it up if (!$subseq && -e $WIRESHARK_DISABLED_PROTOS) { $ren = 1; $subseq = 1; $backup_file = $WIRESHARK_DISABLED_PROTOS . "." . time(); my $status = rename($WIRESHARK_DISABLED_PROTOS, $backup_file); die "Cannot backup disabled_protos\n" unless $status; } elsif (!$subseq) { $subseq = 1; } # append to disabled_protos file to disable the dissector open(OUT, ">>$WIRESHARK_DISABLED_PROTOS") or die "$WIRESHARK_DISABLED_PROTOS: $!\n"; print STDERR "Disabling dissector $proto in $WIRESHARK_DISABLED_PROTOS\n"; print OUT "$proto\n"; close(OUT); goto RESTART; } unless (defined($ip_a) && defined($ip_b)) # on first run, determine IP addresses for this stream { $ip_a = $src_ip; $ip_b = $dst_ip; $fn_a = sprintf("%s.%s-%s.%s", $src_ip, $src_port, $dst_ip, $dst_port); $fn_b = sprintf("%s.%s-%s.%s", $dst_ip, $dst_port, $src_ip, $src_port); } # disregard packet if its a duplicate and contains no new TCP data next if exists($flows{$src_ip}->{$seq}) && (length($data)+1) / 3 < length($flows{$src_ip}->{$seq}); $data =~ s/://g; # remove the colons from data.data $flows{$src_ip}->{$seq} = pack("H*", $data); # convert hex to binary } for my $k (keys %flows) # for each of the two directions { my(@seq, $expected); @seq = sort {$a <=> $b} keys %{$flows{$k}}; # sort data by sequence number $expected = $seq[0]; $streams{$k} = ""; # the ordered TCP bytestream for (@seq) { my $missing = $_ - $expected; # are there missing/lost bytes? if ($missing > 0) # yes, print message for awareness { print STDERR "*** Missing packet flow from $k: offset " . ($expected - $seq[0]) . ", length $missing\n"; $expected += $missing; } elsif ($missing < 0) # duplicate packet or overlapping data { $missing = $_ + length($flows{$k}->{$_}) - $expected; # bytes of new data next if $missing <= 0; # no new data, duplicate packet $flows{$k}->{$_} = substr($flows{$k}->{$_}, -$missing); # only keep the new data } $streams{$k} .= $flows{$k}->{$_}; # append the data to the TCP bytestream $expected += length($flows{$k}->{$_}); } } if (defined($streams{$ip_a})) { open(OUT, ">$fn_a") or die "$!\n"; # output bytestream in first direction printf(STDERR "Writing file %s, length %d bytes, MD5 hash %s\n", $fn_a, length($streams{$ip_a}), Digest::MD5::md5_hex($streams{$ip_a})); binmode(OUT); print OUT $streams{$ip_a}; close(OUT); } if (defined($streams{$ip_b})) { open(OUT, ">$fn_b") or die "$!\n"; # and the other direction printf(STDERR "Writing file %s, length %d bytes, MD5 hash %s\n", $fn_b, length($streams{$ip_b}), Digest::MD5::md5_hex($streams{$ip_b})); binmode(OUT); print OUT $streams{$ip_b}; close(OUT); } unlink($WIRESHARK_DISABLED_PROTOS); # delete our copy of disabled_protos if ($ren) # if we backed up an existing copy, restore it { my $status = rename($backup_file, $WIRESHARK_DISABLED_PROTOS); die "Cannot restore disabled_protos\n" unless $status; } } elsif (defined($opt_w)) # write a specific TCP stream to its own PCAP file using tshark { die "Usage: $0 -w tcp.stream pcap_file output_pcap_file\n" unless defined($ARGV[1]); `$TSHARK -r $ARGV[0] -w $ARGV[1] -R \"tcp.stream == $opt_w\"`; } else # display available TCP streams { my(@packets, %streams); if ($opt_R) # additional display filter { $opt_R = "\"tcp && ($opt_R)\""; } else # no display filter specified { $opt_R = "\"tcp\""; } @packets = `$TSHARK -r $ARGV[0] -T fields -e tcp.stream -e frame.time -e ip.src -e tcp.srcport -e ip.dst -e tcp.dstport -e frame.protocols -R $opt_R`; for (@packets) { chomp; my($stream, $time, $src_ip, $src_port, $dst_ip, $dst_port, $proto) = split("\t", $_); next if $stream eq ""; # non-TCP # the application protocol tshark identifies ($proto) = $proto =~ /:tcp:([^:]+)/; $proto = "" unless defined($proto); # note the stream if the stream ID is new unless (exists($streams{$stream})) { $time = convert_tshark_time($time); # convert frame.time to nicer format $streams{$stream} = [join($delim, $time, $src_ip, $src_port, $dst_ip, $dst_port), $proto]; } # note the protocol if the application protocol hasn't been determined or if the application is generic data elsif ($streams{$stream}->[1] eq "" || $streams{$stream}->[1] eq "data") { $streams{$stream}->[1] = $proto; } } print join($delim, qw/tcp.stream TIME IP PORT IP PORT APP_PROTOCOL/) . "\n"; for (sort {$a <=> $b} keys(%streams)) # print the information about each stream { print join($delim, $_, $streams{$_}->[0], $streams{$_}->[1]) . "\n"; } } # converts Wireshark's frame.time field into yyyy/mm/dd hh:mm:ss.ss sub convert_tshark_time { my %months = (Jan => "01", Feb => "02", Mar => "03", Apr => "04", May => "05", Jun => "06", Jul => "07", Aug => "08", Sep => "09", Oct => "10", Nov => "11", Dec => "12"); my $t = shift; # argument to convert my($mon, $day, $year, $hhmmss) = $t =~ /^(...)\s+(\d+), (\d+) (.+)$/; # capture fields $mon = $months{$mon}; # convert months $day = sprintf("%02d", $day); # make sure day is always two digits return "$year/$mon/$day $hhmmss"; }