#!/usr/bin/ruby -W0

=begin
 After installing the prerequisites, carnivorous will search packet captures for SMTP communications and generate the following output, on a per TCP/IP socket basis:

	smtp.txt - all SMTP data excluding MIME
	smtp.b64decoded.txt - all SMTP data excluding MIME with automatic decoding of all Base64 encoding
	mime.txt - all MIME data contained in SMTP communications
	msgprt.headers - all header information contained in MIME message part 
	msgprt.txt/html - message body for message part
	msgprt.attachment - attachment for message part
	digests.txt - list of filenames, filesizes, MD5/SHA1 digests. Additionally will analyse docx extensions and provide the same information per component.

 Usage:
  ./carnivorous.rb -h
  Usage:       carnivorous.rb [options]
  --pcap, -p <s>:   pcap to scan for email traffic
   --Verbose, -V:   display verbose information
   --version, -v:   Print version and exit
      --help, -h:   Show this message

 Prerequisites:

 requires ruby/pcap for pcap handling -  see http://www.goto.info.waseda.ac.jp/~fukusima/ruby/pcap-e.html
  TO INSTALL: wget http://www.goto.info.waseda.ac.jp/~fukusima/ruby/ruby-pcap-0.6.tar.gz; tar xvzf ./ruby-pcap-0.6.tar.gz; cd ./pcap; ruby extconf.rb && make && sudo make install

 requires trollop for options parsing - see http://trollop.rubyforge.org/ 
  TO INSTALL: sudo gem install trollop

 requires ruby zip for zip file handling - see http://rubyzip.sourceforge.net/
  TO INSTALL: sudo gem install rubyzip
 
 requires tmail for mime processing - see http://tmail.rubyforge.org/
  TO INSTALL: sudo gem install tmail
 
 Tested and verified on snow leopard and ubuntu karmic koala
 Ubuntu karmic koala: sudo apt-get install ruby1.8-dev libpcap0.8-dev

 -W0 is used to produce clean script output.  The following warning is suppressed from ruby-pcap 
 /Library/Ruby/Site/1.8/universal-darwin10.0/pcap.bundle: warning: do not use Fixnums as Symbols

 Interesting linkage:
  email regex - http://stackoverflow.com/questions/535644/find-emails-address-in-mountain-of-data#
  docx analysis - http://stackoverflow.com/questions/697505/creating-microsoft-word-docx-documents-in-ruby
  web based ruby regex tool - http://rubular.com/
=end

require 'rubygems'
require 'trollop'
require 'pcaplet'
require 'tmail'
require 'zip/zipfilesystem'
require 'base64'
require 'digest/md5'
require 'digest/sha1'



# from the following format: expression=value , returns value and strips quotes
def valuesfromregex1(expression,content) 
  regex = Regexp.new(/#{expression}/)
  # search content for regex
  results=content.scan(regex)
  # derive names
  results.each do |result|
      # strip double quotes
      if result.include? '"'
        result.gsub!('"','')
      end
      # strip apostrophes
      if result.include? "'"
        result.gsub!("'","")
      end     
      # identify expression value begin
      firstequal=result.index('=')
      # remove expression="
      result.sub!(result[0..firstequal],'')
    end
  return(results)
end


# from the following format: ^expression: value$ , returns value
def valuesfromregex2(expression,content)
  regex = Regexp.new(/#{expression}/)
  # search content for regex
  results=content.scan(regex)
  # derive names
  results.each do |result| 
      firstcolon=result.index(':')
      # remove expression: (including space)
      result.sub!(result[0..firstcolon+1],'') 
      if result.include? ";"
        result.gsub!(";","")
      end      
      # remove trailing quote
      result.sub!('\n','')
    end
end


# derive file extension from filename
def extensionfromfilename(filename)
  startextension=filename.rindex(".")
  # determine extension name and normalize to lowercase
  extension=filename[startextension+1..filename.length].downcase
  return(extension)
end


# save file
def savetofile(object, filename)
  # status message
  puts "* Writing data to #{filename}" 
  # save attachment to file
  op_file = File.open(filename.to_s, "wb")
	op_file.write(object)
	op_file.close
end


# determine if the string has non-printable characters - see http://arstechnica.com/open-source/news/2005/10/linux-20051002.ars
def has_nonprint?(text)
  text.each_byte {|x| return true if x < 32 or x > 126}
end


# calculate MD5 and SHA1 sums
def getdigestsums(object)
  md5value = Digest::MD5.hexdigest(object)
  sha1value = Digest::SHA1.hexdigest(object)
  return md5value,sha1value
end


# analyze file (name, original size, md5 digest, sha1 digest)
def examinefile(filename)
  interestingfile = File.read(filename)
  # store data in results string
  results = "- Analysis of file: #{filename}\n"
  # collect information about file
  md5digest, sha1digest =  getdigestsums(interestingfile.to_s)
  results += "name = #{filename}, size = #{interestingfile.size} bytes, MD5 digest = #{md5digest}, SHA1 digest = #{sha1digest}\n"
  results += "\n"
  return results
end


# analyze .docx files (name, original size, compressed size, md5 digest, sha1 digest)
def examinedocx(docxfilename)
  # mount zipfile as a filesystem
  Zip::ZipFile.open(docxfilename) { |zipfile|
    # store data in results string       
    results = "- Analysis of docx components of file: #{zipfile}\n"    
    # collect information about docx
    zipfile.each_with_index do
      |entry, index|
      md5digest, sha1digest = getdigestsums(zipfile.file.read(entry.name))
      results += "#{index}: name = #{entry.name}, size = #{entry.size} bytes, compressed size = #{entry.compressed_size} bytes, MD5 digest = #{md5digest}, SHA1 digest = #{sha1digest}\n"
    end
  results += "\n"
  return results
  }
end



### parse command line options
opts = Trollop::options {
  version "carnivorous - of or relating to carnivores / v1.01 | richardsprings"  
  banner <<-EOS
  Usage:       carnivorous.rb [options]
  EOS
  opt :pcap, "pcap to scan for email traffic", :type => String
  opt :Verbose, "display verbose information"
}
# check for mandatory agruments
Trollop::die :pcap, "not specified" if opts[:pcap].nil?



### variables

# regex for email addresses - see http://www.regular-expressions.info/email.html for discussion
emailregex = Regexp.new(/\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b/)

# eoisockets - contains data related to events of interest [# 0/src_ip, 1/src_tcp, 2/dst_ip, 3/dst_tcp, 4/isn, 5/payload, 6/smtp, 7/smtp(b64decoded), 8/mime, 9/messages, 10/md5,sha1]
eoisockets = Array.new



### Opening pcap for analysis...

# status message
puts "--- Opening pcap #{opts.pcap} for analysis..."
# open searchpcap
searchpcap = Pcap::Capture.open_offline(opts.pcap)
# status message
puts "--- Searching pcap #{opts.pcap} for email addresses..."
# iterate through every packet
searchpcap.loop do |pkt|
  # is the packet tcp?
  if pkt.tcp?
    # does the packet contain the email address regex
    results = pkt.tcp_data.to_s.scan(emailregex)
    # if yes, the length is greater than 0
    if results.length != 0
      #display findings
      print "* #{pkt.ip_src}:#{pkt.tcp_sport} -> #{pkt.ip_dst}:#{pkt.tcp_dport}\t FOUND: "
      results.each do |result|
        print "#{result} "
      end
      puts "\n"
      # build results array used to store data
      resultsarray = [pkt.ip_src, pkt.tcp_sport, pkt.ip_dst, pkt.tcp_dport]
      # prepare storage for ISN and payload
      resultsarray += [0, String.new]
      # acquire MD5 and SHA1 outputs for pcap
      resultsarray[10] = examinefile(opts.pcap) 
      # append results to eoisockets array
      eoisockets << resultsarray 
      # keep unique results in eoisockets
      eoisockets.uniq!
    end  
  end
end
# close file
searchpcap.close
# display verbose
if ! opts.Verbose.nil?
  eoisockets.each do |socket|
    puts "Interesting Socket: #{socket[0]}:#{socket[1]} -> #{socket[2]}:#{socket[3]}"
  end
end



### Acquiring payloads of related packets...

# status message
puts "--- Acquiring payloads of related packets..."
# acquire payloads of interesting sockets
datapcap = Pcap::Capture.open_offline(opts.pcap)
# for each packet in filename
datapcap.loop do |pkt|
  # display verbose
  if ! opts.Verbose.nil? and pkt.tcp?
    puts "#{pkt.ip_src}:#{pkt.tcp_sport} -> #{pkt.ip_dst}:#{pkt.tcp_dport} #{pkt.tcp_flags_s} SeqID: #{pkt.tcp_seq} TCP_datalength: #{pkt.tcp_data_len}"
  end
  # for each socket of interest
  eoisockets.each do |socket| 
    # is the packet TCP?
    if pkt.tcp?
      # does the current packet match a socket of interest?
      if pkt.ip_src == socket[0] && pkt.tcp_sport == socket[1] && pkt.ip_dst == socket[2] && pkt.tcp_dport == socket[3]
        # record initial sequence number of socket
        if pkt.tcp_syn? == true && pkt.tcp_ack? == false
          socket[4] = pkt.tcp_seq
        end
        # if payload exists, append payload
        if pkt.tcp_data_len > 0
            payload = socket[5]
            isn = socket[4]
            # if isn is known, append payload
            if isn != 0
              # determine data positions (-1 as we count from 0)
              start = pkt.tcp_seq + pkt.ip_off - isn - 1
              # compensate for packets arriving out of order
              if start > payload.length
                # determine difference in length
                padding = start - payload.length
                # append padding
                payload += " " * padding
              end
              # insert payload into appropriate position
              payload.insert(start, pkt.tcp_data)
            elsif
              # complain
              if ! opts.Verbose.nil? and pkt.tcp?
                puts "--- ISN not known yet, skipping packet's payload 8("
              end 
              break
            end
        end
      end
    end
  end
end
# close file
datapcap.close
# remove socket if SYN/!ACK ISN not identified
eoisockets.delete_if { |socket| socket[4] == 0 }



### Separating message from SMTP traffic...

# status message
puts "--- Separating message from SMTP traffic..."
# for each socket of interest
eoisockets.each do |socket|
  # identify beginning and end of message to derive SMTP
  payload=socket[5]
  # normalize line endings to UNIX standard
  payload.gsub!(/\r\n/, "\n")
  payload.gsub!(/\r/,"\n")
  # search from the beginning for first instance of DATA 
  datastart=payload.index("DATA") 
  # search in reverse from the end of the string for "^."
  dataend=payload.rindex(/^\./)  
  # identify smtp
  socket[6] = payload[0..datastart+3] + "\n" + payload[dataend..payload.length]
  # identify message (5 for DATA and newline)
  socket[8] = payload[datastart+5..dataend-1]
  # determine socket filename prefix
  socketfilenameprefix=socket[0].to_s + "_" + socket[1].to_s + "-" + socket[2].to_s + "_" + socket[3].to_s
  # write SMTP to file
  savetofile(socket[6], socketfilenameprefix + ".smtp.txt")
  # write MIME to file
  savetofile(socket[8], socketfilenameprefix + ".mime.txt")
end



### Analyzing SMTP traffic for Base64 encoding...

# status message
puts "--- Analyzing SMTP traffic for Base64 encoding..."
# for each socket of interest
eoisockets.each do |socket|
  # create data structure for b64decoded SMTP
  socket[7] = String.new
  socket[6].each do |line|
    # b64decode of unencoded text usually results in nonprintable characters
    b64decode=Base64.decode64(line)
    # if no unprintable characters then swap the line for b64encoded (indicates b64 encoding was detected)
    if has_nonprint?(b64decode) != true
      socket[7] += b64decode + "\n"
    # otherwise preserve line as is
    elsif
      socket[7] += line
    end
  end
  # determine socket filename prefix
  socketfilenameprefix=socket[0].to_s + "_" + socket[1].to_s + "-" + socket[2].to_s + "_" + socket[3].to_s
  # write to file
  savetofile(socket[7], socketfilenameprefix + ".smtp.b64decoded.txt")
end



### Analyzing message(s)...

# for each socket of interest
eoisockets.each do |socket|
  # status message
  puts "--- Analyzing message(s)..."
  # determine socket filename prefix
  socketfilenameprefix = socket[0].to_s + "_" + socket[1].to_s + "-" + socket[2].to_s + "_" + socket[3].to_s  
  # parse MIME using tmail
  mail = TMail::Mail.parse(socket[8])

  # for multipart messages
  if mail.multipart? then
      # iterate each part
      mail.parts.each do |messagepart|
	msgpartnumber = mail.parts.index(messagepart)
	# save message headers
	messageheader = String.new	
	messagepart.each_header do |name, field|
		messageheader += "#{name}: #{field}\n"
	end
	filename_prefix=".mime.msgprt" + msgpartnumber.to_s + ".headers"
        savetofile(messageheader, socketfilenameprefix + filename_prefix + ".txt")	
        # main/type can not be identified?
        if messagepart.main_type.nil?
            filename_prefix=".mime.msgprt" + msgpartnumber.to_s
            savetofile(messagepart, socketfilenameprefix + filename_prefix + ".txt")
        # handle html/text messages
        elsif ! messagepart.disposition_param('filename')  
            filename_prefix=".mime.msgprt" + msgpartnumber.to_s
	    # if HTML, save message as HTML file
            if messagepart.content_type == "text/html"    
              savetofile(messagepart.body, socketfilenameprefix + filename_prefix + ".html")
	    # else, save message as txt file                 
	    elsif
               savetofile(messagepart.body, socketfilenameprefix + filename_prefix + ".txt")          
            end
        # handle attachments
        elsif messagepart.disposition_param('filename')
            filename_prefix=".mime.msgprt" + msgpartnumber.to_s + ".attachment."
            # determine attachment filename
            attachmentfilename=messagepart.disposition_param('filename')
            # save attachment to file (autmotatically base64 decoded)
            savetofile(messagepart.body, socketfilenameprefix + filename_prefix + attachmentfilename)
            # collect information about file attachment
            socket[10] += examinefile(socketfilenameprefix + filename_prefix + attachmentfilename)
            # analyze docx extensions (name, original size, compressed size, md5 digest, sha1 digest)
            if extensionfromfilename(attachmentfilename) == "docx"
              socket[10] += examinedocx(socketfilenameprefix + filename_prefix + attachmentfilename)
            end
        end
    end

  # for non multipart emails
  elsif
	# save message headers
	messageheader = String.new	
	filename_prefix=".mime.msg.headers"	
	mail.each_header do |name, field|
		messageheader += "#{name}: #{field}\n"
	end
        savetofile(messageheader, socketfilenameprefix + filename_prefix + ".txt")
	if ! mail.disposition_param('filename')
	    filename_prefix=".mime.msg"	
            # iF HTML, save message as HTML file
	    if mail.content_type == "text/html"    
              savetofile(mail.body, socketfilenameprefix + filename_prefix + ".html")
	    # else, save message as txt file                 
	    elsif
               savetofile(mail.body, socketfilenameprefix + filename_prefix + ".txt")          
            end
	elsif mail.disposition_param('filename')
		filename_prefix=".mime.msg.attachment."
		# determine attachment filename
		attachmentfilename=mail.disposition_param('filename')
	        # save attachment to file (autmotatically base64 decoded)
	        savetofile(mail.body.rstrip!, socketfilenameprefix + filename_prefix + attachmentfilename)
       	        # collect information about file attachment
		socket[10] += examinefile(socketfilenameprefix + filename_prefix + attachmentfilename)
		# analyze docx extensions (name, original size, compressed size, md5 digest, sha1 digest)
		if extensionfromfilename(attachmentfilename) == "docx"
		  socket[10] += examinedocx(socketfilenameprefix + filename_prefix + attachmentfilename)
		end
	end
  end
  
  # status message
  puts "--- Saving MD5/SHA1 Analysis..."
  # save MD5 / SHA1 analysis information as txt file
  savetofile(socket[10], socketfilenameprefix + ".digests.txt")
end

