#!/usr/bin/env python
# Network Forensic Evidence Statistics
# nfestats.py

#  This program is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program.  If not, see <http://www.gnu.org/licenses/>.

import sys								# to get command line arguments
import os.path, os						# to parse evidence file/directory
import struct							# to unpack MAC address
import socket							# numeric IP address to ASCII, and port to service name.
import datetime, time					# to parse timestamps
import hashlib							# md5
# from python 2.3
from optparse import OptionParser		# 
from optparse import OptionGroup		# 

try:
	import pcap
except ImportError:
	print "[i] pylibpcap module is required."
	print "[i] \thttp://sourceforge.net/projects/pylibpcap/"
	print "[i] Please install and try again."
	sys.exit(0)
try:
	import dpkt
except ImportError:
	print "[i] dpkt module is required."
	print "[i] \thttp://code.google.com/p/dpkt/"
	print "[i] Please install and try again."
	sys.exit(0)
#
## comand line parameters parser class
#
class optHandler(object):
	def __init__(self, copyright):
		self.parser = OptionParser(usage="%prog [options] -r EvidenceSource", version=copyright)
		self.options = None
		group = OptionGroup(self.parser, "Evidences Options")
		group.add_option("-r", dest="evidences_repo", help="Evidence .pcap file or directory containing a set of them (required option).")
		self.parser.add_option_group(group)
		
		group = OptionGroup(self.parser, "Visualization Options")
		group.add_option("-p", action="store_true",  default=False, dest="viewPortsStats", help="View TCP/UDP Ports statistics.")
		group.add_option("-i", action="store_false", default=False, dest="hideIPStats",    help="Hide IP statistics.")
		group.add_option("-m", action="store_true",  default=False, dest="viewMACs",	   help="View MAC addresses instead of IP addresses.")
		group.add_option("-l", default=10, dest="portsLimit", help="Maximun ports number to use in ports statistics [default: %default].")
		group.add_option("-b", default="minTs", dest="sortBy", type="choice", choices=["minTs","proto","src","dst","nPackets",], help="Value to use to sort statistics: minTs, proto, src, dst or nPackets [default: %default]")
		
		self.parser.add_option_group(group)
		group = OptionGroup(self.parser, "Filtering Options", "Processing data matching some specified patterns.")
		group.add_option("-s", dest="srcFilter", help="Filtering by source address (IP or MAC)")
		group.add_option("-d", dest="dstFilter", help="Filtering by destination address (IP or MAC)")
		group.add_option("-f", dest="flgFilter", help="Filtering by existing TCP Flags (numeric format)")
		self.parser.add_option_group(group)

	def parse(self, cmd):
		(options, args) = self.parser.parse_args(cmd[1:])
		if len(args) > 0:
			self.parser.error("unspecified arguments %s Please see --help for more details" % str(args))
		if not options.evidences_repo:
			self.parser.error("-r: Evidence file or directory containing a set of them must be specified.")
		else:
			repf = []
			if os.path.isfile(options.evidences_repo):
				repf.append(options.evidences_repo)
			elif os.path.isdir(options.evidences_repo): # set of .pcap files
				allf = os.listdir(options.evidences_repo)
				for f in allf:
					if f.find(".pcap"):
						repf.append(os.path.join(options.evidences_repo, f))
				if len(repf) == 0:
					self.parser.error("-r: Specified evidence directory without any .pcap file [%s]" % options.evidences_repo)
			else: # not set nor single file/s
				self.parser.error("-r: Evidence file or directory specified not found [%s]" % options.evidences_repo)
			options.evidences_repo = repf			
		self.options = options

#
## Network Packet ASCII representations
#
class pkt2str(object):
	def ipProtocol2String(proto):
		""" Protocol number to string description """
		if   proto == dpkt.ethernet.ETH_TYPE_IP: return "IP"
		elif proto == dpkt.ip.IP_PROTO_ICMP:	 return "ICMP"
		elif proto == dpkt.ip.IP_PROTO_TCP:		 return "TCP"
		elif proto == dpkt.ip.IP_PROTO_UDP:		 return "UDP"
		else: return "unknow"
	ipProtocol2String = staticmethod(ipProtocol2String)
	def tcpFlags2String(flags):
		""" TCP Flag field to string description """
		fdesc = ""
		if flags == 0x00:	return   "N"
		if flags & 0x1:		fdesc += "F"
		if flags & 0x2:		fdesc += "S"
		if flags & 0x4:		fdesc += "R"
		if flags & 0x8:		fdesc += "P"
		if flags & 0x10:	fdesc += "A"
		if flags & 0x20:	fdesc += "U"
		if flags & 0x40:	fdesc += "E"
		if flags & 0x80:	fdesc += "C"
		return fdesc
	tcpFlags2String = staticmethod(tcpFlags2String)
	def icmpType2String(type):
		if type == 0:				return "Echo Reply"
		elif type in (1,2,7):		return "Unassigned"
		elif type == 3:				return "Destination Unreachable"
		elif type == 4:				return "Source Quench"
		elif type == 5:				return "Redirect"
		elif type == 6:				return "Alternate Host Address"
		elif type == 8:				return "Echo"
		elif type == 9:				return "Router Advertisement"
		elif type == 10:			return "Router Solicitation"
		elif type == 11:			return "Time Exceeded"
		elif type == 12:			return "Parameter Problem"
		elif type == 13:			return "Timestamp"
		elif type == 14:			return "Timestamp Reply"
		elif type == 15:			return "Information Request"
		elif type == 16:			return "Information Reply"
		elif type == 17:			return "Address Mask Request"
		elif type == 18:			return "Address Mask Reply"
		elif type in range(19,29):	return "Reserved"
		elif type == 30:			return "Traceroute"
		elif type == 31:			return "Datagram Conversion Error"
		elif type == 32:			return "Mobile Host Redirect"
		elif type == 33:			return "IPv6 Where-Are-You"
		elif type == 34:			return "IPv6 I-Am-Here"
		elif type == 35:			return "Mobile Registration Request"
		elif type == 36:			return "Mobile Registration Reply"
		elif type == 37:			return "Domain Name Request"
		elif type == 38:			return "Domain Name Reply"
		elif type == 39:			return "SKIP"
		elif type == 40:			return "Photuris"
		elif type in range(42,255):	return "Reserved"
		else:						return "unknow"	
	icmpType2String = staticmethod(icmpType2String)
	def port2service(port):
		try:
			return socket.getservbyport(port)
		except:
			return "unknow"
	port2service = staticmethod(port2service)
	def eth_ntoa(mac):
		def octet2hex(o):
			x = hex(o)[2:] 
			if len(x) == 1: return '0' + str(x)
			else: return x
		return reduce(
			lambda x, y: x + ':' + octet2hex(y),
			struct.unpack('!BBBBBB', mac),
			''
		)[1:]
	eth_ntoa = staticmethod(eth_ntoa)

#
## IP flows stats entry
#
class IPStatEntry(object):
	""" IP Stats entry """
	def __init__(self, ts, proto, src, dst, flags):
		self.minTs = ts
		self.maxTs = ts
		self.proto = proto
		self.src = src
		self.dst = dst
		self.nPackets = 1
		self.extra = {}
		if proto != dpkt.ip.IP_PROTO_UDP:
			self.updateExtraEntry(flags)
			
	def updateTimestamps(self, timestamp):
		if self.minTs >= timestamp: self.minTs = timestamp
		if self.maxTs <= timestamp: self.maxTs = timestamp

	def updateExtraEntry(self, key):
		if key in self.extra: self.extra[key] += 1
		else:				  self.extra[key] = 1

	def toString(self):
		strOut = ''
		dt = datetime.datetime.fromtimestamp
		strFlg = ''
		for eelem in self.extra:
			if self.proto == dpkt.ip.IP_PROTO_TCP:
				fDesc = pkt2str.tcpFlags2String(eelem)
			elif self.proto == dpkt.ip.IP_PROTO_ICMP:
				fDesc = pkt2str.icmpType2String(eelem)
			strFlg = strFlg + fDesc + "/%d " % self.extra[eelem]
		
		try:
			src = socket.inet_ntoa(self.src)
			dst = socket.inet_ntoa(self.dst)
		except:
			src = " "+pkt2str.eth_ntoa(self.src)
			dst = " "+pkt2str.eth_ntoa(self.dst)
		
		return "%14s (%3s seg) %4s %15s <=>%15s %s: %d\n" % (
				dt(self.minTs).time().__str__(), 
				(dt(self.maxTs) - dt(self.minTs)).seconds,
				pkt2str.ipProtocol2String(self.proto), 
				src, dst, strFlg, self.nPackets
			)

#
## Ports stats entry
#
class PortsStatEntry(object):
	""" IP Stats entry """
	def __init__(self, proto, number, dir, flags):
		self.proto = proto
		self.number = number
		self.direction = dir
		self.flags = {}
		self.nPackets = 1
		if proto == dpkt.ip.IP_PROTO_TCP:
			self.updateFlagsEntry(flags)
	def updateFlagsEntry(self, key):
		if key:
			if key in self.flags: self.flags[key] += 1
			else:				  self.flags[key] = 1
	def toString(self):
		strOut = ''
		strFlg = ''
		for eelem in self.flags:
			if self.proto == dpkt.ip.IP_PROTO_TCP:
				fDesc = pkt2str.tcpFlags2String(eelem)
			strFlg = strFlg + fDesc + "/%d " % self.flags[eelem]
		return "%d(%s) %s : %d" % (self.number, pkt2str.port2service(self.number), strFlg, self.nPackets)

#
## Flows information gatherer class
#
class Gatherer:
	""" PCAP file information gatherer class"""
	def __init__(self, opts):
		self.evidenceFileName = None
		self.pcap = None
		self.filter = None
		self.__ipStatistics = {}
		self.__sportsStats = {}
		self.__dportsStats = {}
		self.__pLimit = None
		self.opts = opts
		self.__pcap_init()
	
	def __pcap_init(self):
		""" Gatherer instance initialization """
		self.pcap = pcap.pcapObject()
		self.__pLimit = self.opts.portsLimit
		if self.opts.srcFilter:
			self.addFilter("src " + self.opts.srcFilter, "and")
		if self.opts.dstFilter:
			self.addFilter("dst " + self.opts.dstFilter, "and")
		if self.opts.flgFilter:
			self.addFilter("tcp[13] = " + self.opts.flgFilter, "and")	
	
	def __updateIpFlowsStats(self, pkt, ts):
		""" Update IP protocol stats with one new packet data"""
		eth_pkt = dpkt.ethernet.Ethernet(pkt)
		if eth_pkt.type != dpkt.ethernet.ETH_TYPE_IP:
			return
		ip_pkt = dpkt.ip.IP(str(eth_pkt.data))
		# ip flow stats
		if self.opts.viewMACs:
			src = eth_pkt.src
			dst = eth_pkt.dst
		else:
			src = ip_pkt.src
			dst = ip_pkt.dst
		proto = ip_pkt.p
		key = hash(src)^hash(dst)^hash(proto)
		if key in self.__ipStatistics:
			self.__ipStatistics[key].updateTimestamps(ts)
			if proto == dpkt.ip.IP_PROTO_ICMP:
				type = dpkt.icmp.ICMP(str(ip_pkt.data)).type
				self.__ipStatistics[key].updateExtraEntry(type)
			elif proto == dpkt.ip.IP_PROTO_TCP:
				flags = dpkt.tcp.TCP(str(ip_pkt.data)).flags
				self.__ipStatistics[key].updateExtraEntry(flags)
			self.__ipStatistics[key].nPackets += 1
		else:
			if proto ==  dpkt.ip.IP_PROTO_TCP:
				flags = dpkt.tcp.TCP(str(ip_pkt.data)).flags
			elif proto ==  dpkt.ip.IP_PROTO_UDP:
				flags = None
			else:
				flags = dpkt.icmp.ICMP(str(ip_pkt.data)).type
			self.__ipStatistics[key] = IPStatEntry(ts, proto, src, dst, flags)

		# ports stats
		if proto == dpkt.ip.IP_PROTO_TCP:
			l4pkt = dpkt.tcp.TCP(str(ip_pkt.data))
			flags = l4pkt.flags
		elif proto == dpkt.ip.IP_PROTO_UDP:
			l4pkt = dpkt.udp.UDP(str(ip_pkt.data))
			flags = None
		else:
			l4pkt = None
		if l4pkt:
			for pair in ({"dir":"source", "num":l4pkt.sport, "container":self.__sportsStats},{"dir":"destination", "num":l4pkt.dport, "container":self.__dportsStats}):
				key = hash(pair["num"])^hash(pair["dir"])
				if key in pair["container"]:
					pair["container"][key].updateFlagsEntry(flags)
					pair["container"][key].nPackets += 1
				else:
					pair["container"][key] = PortsStatEntry(proto, pair["num"], pair["dir"], flags)

	def __md5(self, fname):
		file = open(fname)
		md5 = hashlib.md5()
		while True:
			data = file.read(8192)
			if not data: break
			md5.update(data)
		return  md5.hexdigest()

	def __sort(self, what, how):
		items = what.items()
		for i in range(0, len(items) - 1):
			swap_test = False
			for j in range(0, len(items) - i - 1):
				if getattr(items[j][1], how) > getattr(items[j + 1][1],how):
					items[j], items[j + 1] = items[j + 1], items[j]
				swap_test = True
			if swap_test == False:
				break
		return items
	
	def addFilter(self, flt, nex):
		if (self.filter):
			self.filter = self.filter + " " + nex + " (" + flt + ")"
		else:
			self.filter = "(" + flt + ")"

	def run(self):
		""" Init pcap read process and update statistics """
		if self.pcap is None:
			return False
		for evidence in self.opts.evidences_repo:
			hash = self.__md5(evidence)
			print "MD5 (%s) = %s" % (evidence, hash)
		for evidence in self.opts.evidences_repo:
			self.pcap.open_offline(evidence)
			if self.filter:
				try:
					self.pcap.setfilter(self.filter,0,0)
				except Exception as msg:
					print "ERROR: %s" % msg
					return False
			while True:
				try:
					(pktlen, data, timestamp) = self.pcap.next()
					self.__updateIpFlowsStats(data, timestamp)
				except:
					break # EOF
		return True

	def ipStats2String(self, srt):
		""" IP stats to string """
		items = self.__sort(self.__ipStatistics, srt)
		strOut = ''
		for entry in items:
			strOut = strOut + entry[1].toString()
		return strOut

	def portsStats2String(self):
		str = ''
		sl = self.__sort(self.__sportsStats, "nPackets")
		dl = self.__sort(self.__dportsStats, "nPackets")
		for idx in range(self.__pLimit):
			if idx < len(sl):
				sStr = sl[idx][1].toString()
			else:
				sStr = ''

			if idx < len(dl):
				dStr = dl[idx][1].toString()
			else:
				dStr = ''
			str = str + "%s || %s\n" % (sStr.rjust(55), dStr.ljust(55))
		return str
			
if __name__=='__main__':

	BeginAt = datetime.datetime.fromtimestamp(time.time())
	version = "\n%s  0.3" % (os.path.basename(sys.argv[0]))
	prsr = optHandler(version)
	prsr.parse(sys.argv)
	gth = Gatherer(prsr.options)
	if not gth.pcap:
		prsr.parser.error("-r: Unable to open specified evidence resource")

	if gth.run():
		output = gth.ipStats2String(prsr.options.sortBy)
		if not prsr.options.hideIPStats:
			if output:
				print output
			else:
				print "Not IP packets found"

		if prsr.options.viewPortsStats:
			output = gth.portsStats2String()
			if output:
				print "%s || %s" % ("------sources--------".rjust(55), "--------destinations------".ljust(55))
				print output
			else:
				print "No Packets found"
		EndAt = datetime.datetime.fromtimestamp(time.time())
		print "Analysis started at %s and ended at %s, stats calculated in %s " % (BeginAt, EndAt, EndAt-BeginAt)

