#!/usr/bin/perl
################################################################################
#			httpAnalyzer.pl
################################################################################
# This script reads in a pcap network capture to analyze HTTP traffic. It
# will output an HTML file in the current directory that contains the analysis
# of every HTTP transfer recorded in the pcap file. The analysis output file has
# been tested to work in Firefox 3.5+ and may be very large. Please allow it to
# fully load before attempting to interact with it.
#
# This script requires the following modules:
#   Net::LibNIDS
#   Compress::Zlib
#   MIME::Base64
#   Digest::MD5
# Additionally, Net::LibNIDS version 0.01 contains a serious bug
# (http://rt.cpan.org/Public/Bug/Display.html?id=52879) that will prevent it
# from working. A patch and a Gentoo ebuild are both available from
# http://modtwo.com
#
# Usage:
# 	httpAnalyzer.pl capture.pcap
#
# Author: Tom Samstag http://modtwo.com
# Version: 0.1
# Date: 2010-02-01
#
# Copyright 2009 Tom Samstag, modtwo (at) modtwo (dot) com
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


use strict;
use warnings;
use English;
use Net::LibNIDS;
use Compress::Zlib;
use MIME::Base64;
use Digest::MD5 qw(md5_hex);
use constant VERSION=>0.1;

if (@ARGV != 1)
{
	print STDERR "Usage: $0 pcapFile\n";
	exit 1;
}

my $filename = $ARGV[0];

Net::LibNIDS::param::set_filename($filename);
Net::LibNIDS::init();                    # processes all parameters
Net::LibNIDS::tcp_callback(\&collector ); # a callback to be called for each packet

open OUTPUT_STREAM, ">$filename.httpAnalyzer.html";
print_output_header();
Net::LibNIDS::run();                      # start the collection
print_output_footer();

my %data;
my $transfersCompleted = 0;
sub collector {
	my $connection = shift;

	if($connection->state == Net::LibNIDS::NIDS_JUST_EST()) {
		$connection->server->collect_on;  #start tracing data from server
		$connection->client->collect_on;  #start tracing data from client
	}
	elsif($connection->state == Net::LibNIDS::NIDS_DATA()) {
		my $key = $connection->client_ip . ':' . $connection->client_port . ' -> ' .
				$connection->server_ip . ':' . $connection->server_port;

		my $server_new = $connection->server->count_new;
		my $client_new = $connection->client->count_new;

		if($server_new)
		{
			$data{$key}->{server} .= $connection->server->data;
			if($data{$key}->{server} =~ /\A(GET|POST) [^\r\n]* HTTP\/1\.[01][\r\n].*?^\r\n/ms)
			{
				$data{$key}->{completed_request} = $MATCH;
				$data{$key}->{server} = $POSTMATCH;
			}
		}
		elsif($client_new)
		{
			$data{$key}->{client} .= $connection->client->data;
			if ($data{$key}->{client} =~ /\AHTTP\/1\.[01] \d+(?:.*?)^Content-Length: (\d+)[\r\n](:?.*?)^\r\n/ms)
			{
				my ($responseHeaders, $length) = ($MATCH, $1);
				if (length($POSTMATCH) >= $length)
				{
					my $content = substr($POSTMATCH, 0, $length);
					my $request = $data{$key}->{completed_request};
					$transfersCompleted++;

					print_file_transfer($key, $transfersCompleted, $request, $responseHeaders, $content);

					$data{$key}->{client} = substr($POSTMATCH, $length);
					$data{$key}->{completed_request} = '';
				}
			}
		}

	}

}

sub print_output_header
{
print OUTPUT_STREAM <<'HERE';
<!DOCTYPE html>
<html>
	<head>
		<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
		<script type="text/javascript" src="http://jquery.com/src/jquery-latest.pack.js"></script>
		<script type="text/javascript">
			$(function() {
				// hide all non-first tabs
				$('div.tabs').each(function(i) {
					$(this).children('div.tab:gt(0)').hide()
					var tablist = $(this).children('ul.tablist');
					tablist.children('li.tab').removeClass('selectedTab');
					tablist.children('li.tab:eq(0)').addClass('selectedTab');
					});

				// handle clicking on tabs
				$('div.tabs ul.tablist li.tab').click(function() {
					var tabs = $(this).parents('div.tabs');
					var tablist = tabs.children('ul.tablist');
					var tabnum = tablist.children('li.tab').index(this);
					tabs.children('div.tab').hide();
					tablist.children('li.tab').removeClass('selectedTab');
					tabs.children('div.tab:eq('+tabnum+')').show();
					tablist.children('li.tab:eq('+tabnum+')').addClass('selectedTab');
					});

				// handle xml querying
				$('div.xmlQuery input:button').click(function(){
					variable = $(this).siblings('input[name=contentVariable]').val();
					query = $(this).siblings('input[name=selector]').val();
					var parser = new DOMParser();
					xmldoc = parser.parseFromString(window[variable], "text/xml");
					result = $("<ol/>");
					serializer = new XMLSerializer();
					$(query, xmldoc).each(function(i){
						result.append( $('<li/>').text(serializer.serializeToString(this)) );
						});
					$(this).siblings('div.xmlQueryResults').html(result);
					});

				// handle clicking on a header of a collapsable
				$('div.collapsable > *:header').click(function(){
					$(this).siblings(':not(:header)').toggle();
					});

//				// add request and response keys to filter select
//				$('dl.request dt').each(function(i){
//					val = $(this).text();
//					if ($('div#filters option[request='+val+']').length == 0)
//					{
//						$('div#filters').append($('<option value="dd.request > dt">'+val+'</option>'));
//					}
//					});

				// handle filtering content
				$('div#filters input:button').click(function(){
					selector = $(this).siblings('select[name=filterField]').val();
					value = $(this).siblings('input:text').val();
					matchType = $(this).siblings('select[name=filterType]').val();
					if (value == "")
					{
						$('div.transferBox').show();
					}
					else
					{
						if (matchType == "contains")
						{
							$('div.transferBox').not(':has('+selector+':contains('+value+'))').hide();
						}
						else if (matchType == "doesNotContain")
						{
							$('div.transferBox').has(selector+':contains('+value+')').hide();
						}
					}
					});
			});

		</script>
		<style type="text/css">
pre {
	margin:0 0 20px 0;
}
pre code {
	border:1px dashed #808080;
	display:block;
	padding:5px;
	font-size: 12px;
	background:#f9f9f9;
	color:#000;
	overflow-x: auto;
	white-space:nowrap;
}

div.rollbox div.title { border: 2px; }
dl { font-family: monospace; overflow-x: auto; white-space: nowrap; }
dl dt { display:inline; font-weight: bold; width: 50em;}
dl dd { display:inline; margin-left: 0.5em; }
.request { border: 1px dotted #80FF80; background-color: #E0FFE0; }
.response { border: 1px dotted #8080FF; background-color: #E0E0FF; }
.getParams { border: 1px dotted #FF8080; background-color: #FFE0E0; }

div.tabs ul.tablist { list-style: none; padding: 0; margin: 0; }
div.tabs ul.tablist li.tab { display: inline; border: 1px solid #bbb; border-bottom-width: 0; margin: 0; padding: 0 10px; background: #eee; }
div.tabs ul.tablist li.tab:hover {  background: #ddf; }
div.tabs ul.tablist li.tab.selectedTab { border-color: black; background: white; }
div.tabs div.tab { border: 1px solid black; }

div.transferBox { border: 1px solid black; margin-bottom: 0.4em; }
div.transferBox h2 { font-size: 0.9em; margin: 0px; background-color: #E0E0E0; }
div.transferBox h3 { font-size: 0.8em; margin: 0px; background-color: #EFEFEF; }

img.inlineImage { border: 1px solid black; margin: 5px; }

	</style>
	</head>
	<body>
HERE
	print OUTPUT_STREAM "		<h1>HTTP Analysis of $filename</h1>\n";
	print OUTPUT_STREAM <<'HERE';
		<div id="filters">
			<legend for="filterField">Filter:</legend>
			<select name="filterField">
				<option value="span.srcIp"/>source.ip</option>
				<option value="span.srcPort"/>source.port</option>
				<option value="span.destIp"/>dest.ip</option>
				<option value="span.destPort"/>dest.port</option>
				<option value="h3"/>uri</option>
			</select>
			<select name="filterType">
				<option value="contains">contains</option>
				<option value="doesNotContain">does not contain</option>
			</select>
			<input type="text" name="filterText"/>
			<input type="button" value="Go!"/>
		</div>
HERE
}

sub print_file_transfer
{
	my ($key, $transferNumber, $request, $responseHeaders, $content) = @_;

	# extract the content-type from the responseHeaders
	my $contentType = '';
	chomp ($contentType = $1) if ($responseHeaders =~ /^Content-Type:\s*([^\r\n]*)/m);

	# gunzip the content if it was served with  Content-Encoding: gzip
	$content = Compress::Zlib::memGunzip($content) if ($responseHeaders =~ /^Content-Encoding:\s*gzip\s*$/mi);

	my $h2 = '';
	if ($key =~ /([0-9.]*):(\d*) -> ([0-9.]*):(\d*)/)
	{
		$h2 = "<span class='srcIp'>$1</span>:<span class='srcPort'>$2</span> - <span class='destIp'>$3</span>:<span class='destPort'>$4</span>";
	}
	if ($request =~ /^Host:\s*([^\r\n]*)/mg)
	{
		$h2 .= " ($1)";
	}

	$request =~ /^\S*\s+([^\r\n\s]*)/;
	my $h3 = $1;

	my @tabs;

	# Request
	{
		my $data = "				<dl class=\"headers request\">\n";
		foreach (split /[\r\n]+/, $request)
		{
			/^([^:]*:?)(.*)$/;
			$data .= "					<div><dt>$1</dt><dd>$2</dd></div>\n";
		}
		$data .= "				</dl>\n";
		push @tabs, {name=>"Request", data=>$data};
	}

	# Response
	{
		my $data = "				<dl class=\"headers response\">\n";
		foreach (split /[\r\n]+/, $responseHeaders)
		{
			/^([^:]*:?)(.*)$/;
			$data .= "					<div><dt>$1</dt><dd>$2</dd></div>\n";
		}
		$data .= "				</dl>\n";
		push @tabs, {name=>"Response", data=>$data};
	}

	# GET parameters
	if ($request =~ /^GET\s+[^\s\r\n\?]*\?([^\r\n\s]*)/i)
	{
		my $params = $1;
		my $data = "				<dl class=\"headers getParams\">\n";
		foreach (split /&/, $params)
		{
			/^(.*)=(.*)$/;
			$data .= "					<div><dt>$1 =</dt><dd>$2</dd></div>\n";
		}
		$data .= "				</dl>\n";
		push @tabs, {name=>"GET Params", data=>$data};
	}

	# File Info
	{
		my $size = length($content);
		my $md5 = md5_hex($content);
		my $download = "<a href=\"data:binary/octet-stream;base64,".encode_base64($content)."\">Download</a> <b>Warning</b>: downloaded files may be malicious.";
		my $data = "				<dl class=\"fileInfo\">\n";
		$data .= "					<div><dt>Size =</dt><dd>$size</dd></div>\n";
		$data .= "					<div><dt>MD5 =</dt><dd>$md5</dd></div>\n";
		$data .= "					<div><dt>Download Link</dt><dd>$download</dd></div>\n";
		$data .= "				</dl>\n";
		push @tabs, { name=>"File Info", data=>$data };
	}

	# Image
	if ($contentType =~ /^image\/(gif|jpeg|png)/i)
	{
		push @tabs, {
			name=>"Image",
			data=>"<img class=\"inlineImage\" src=\"data:$contentType;base64,".encode_base64($content)."\">"
		};
	}

	# Source
	if ($contentType =~ /^text\/xml\b/i)
	{
		my $escapedContent = $content;
		$escapedContent =~ s/&/&amp;/g;
		$escapedContent =~ s/</&lt;/g;
		$escapedContent =~ s/>/&gt;/g;
		$escapedContent =~ s/ /&nbsp;/g;
		$escapedContent =~ s/$/<br\/>/mg;
		push @tabs, {
			name=>"Source",
			data=>"
				<pre><code class='language-xml'>
					$escapedContent
				</code></pre>"
		};
	}

	# Query
	if ($contentType =~ /^text\/xml\b/i)
	{
		my $escapedContent = $content;
		$escapedContent =~ s/$/\\/mg;
		$escapedContent =~ s/\\$//;
		my $data = "<div class='xmlQuery'>
			<script>window['xmlContent$transferNumber'] = '$escapedContent';</script>
			<form>
				<input type='text' name='selector'/>
				<input type='hidden' name='contentVariable' value='xmlContent$transferNumber'/>
				<input type='button' value='Go!' />
				<div class='xmlQueryResults'>
				</div>
			</form>
		</div>";
		push @tabs, {
			name=>"Query",
			data=>$data
		};
	}

	# Apple PList
	if ($contentType =~ /^text\/xml\b/i && $content =~ m#http://www.apple.com/DTDs/PropertyList-1.0.dtd#)
	{
		$_ = $content;
		s/<\?.*?\?>//g;
		s/<!.*?>//g;
		s/<\/?plist.*?>//g;
		s/<dict>/{/g;
		s/<\/dict>/},/g;
		s/<array>/[/g;
		s/<\/array>/],/g;
		s/<string>/"/g;
		s/<\/string>/",/g;
		s/<integer>//g;
		s/<\/integer>/,/g;
		s/<key>//g;
		s/<\/key>/:  /g;
		s/<(true|false)\/>/$1,/g;
		s/,\s*$//mg;
		s/ /&nbsp;/g;
		s/$/<br\/>/mg;
		push @tabs, {
			name=>"Apple PList",
			data=>"
				<pre><code>
					$_
				</code></pre>"
		};
	}

	print OUTPUT_STREAM <<"HERE";
		<div class="transferBox collapsable">
			<h2>$h2</h2>
			<h3>$h3</h3>
			<div class="tabs" style="display:none">
				<ul class="tablist">
HERE
	print OUTPUT_STREAM "					<li class=\"tab\">$_->{name}</li>\n" foreach (@tabs);
	print OUTPUT_STREAM "				</ul>\n";

	foreach my $tab (@tabs)
	{
		print OUTPUT_STREAM "				<div class=\"tab\">\n";
		print OUTPUT_STREAM $tab->{data} . "\n";
		print OUTPUT_STREAM "				</div>\n";
	}
	print OUTPUT_STREAM "			</div>\n";
	print OUTPUT_STREAM "		</div>\n";
}

sub print_output_footer
{
	print OUTPUT_STREAM "
		<h4>Generated by httpAnalyzer.pl version ".VERSION." available from <a href=\"http://modtwo.com\">modtwo.com</a></h4>
	</body>
</html>
";
}
