#!/usr/bin/ruby # Copyright (C) 2010 Richard Springs # This program is free software: you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation, either version 3 of # the License, or (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see # . require 'rubygems' require 'singleton' require 'digest/md5' require 'digest/sha1' require 'cgi' require 'zlib' require 'trollop' require 'xmlsimple' # parse command line options opts = Trollop::options { version "scarabsieve.rb / v1.01 | richardsprings" banner <<-EOS Usage: scarabsieve.rb [options] EOS opt :directory, "directory containing webscarab log output", :type => String opt :requestlinesummary, "summarize HTTP requestlines" opt :headersearch, "search HTTP headers using specified value", :type => String opt :parametersearch, "search HTTP request parameters using specified value", :type => String opt :xmlsearch, "search plist XML transmitted in HTTP responses using specified value", :type => String opt :Verbose, "display verbose information" } # check for mandatory arguments Trollop::die :directory, "not specified" if opts[:directory].nil? class ProxyData include Singleton def initialize @req_reqline = Array.new @req_headers = Array.new @req_messagebody = Array.new @resp_statusline = Array.new @resp_headers = Array.new @resp_body = Array.new @resp_body_xml = Array.new # 0 = allrequests, 1 = headers, 2 = parameters, 3 = md5/sha1 sums carved, 4 = xmlplist @data_hash = Hash.new @tl = Time.now.localtime end # save file def savetofile(object, filename) # save attachment to file op_file = File.open(filename.to_s, "wb") op_file.write(object) op_file.close end =begin # parse http requests or responses def processfile(filename) # determine file # filenum = File.basename(filename).split("-")[0].to_i tempstring=IO.read(filename) # break down http into more simple elements v_firstlineend = tempstring.index("\n") v_httpstart = tempstring.index("\r\n\r\n") # hackish if v_httpstart.nil? : v_httpstart = tempstring.index("\n\n") end if v_httpstart.nil? : v_httpstart = tempstring.length end firstline = tempstring[0..v_firstlineend] httpheaders = tempstring[v_firstlineend..v_httpstart] httpbody = tempstring[v_httpstart..tempstring.length].gsub(/\r\n?/,'') if filename.include? "request" @req_reqline[filenum-1] = firstline @req_headers[filenum-1] = httpheaders @req_messagebody[filenum-1] = httpbody elsif filename.include? "response" @resp_statusline[filenum-1] = firstline @resp_headers[filenum-1] = httpheaders @resp_body[filenum-1] = httpbody else next end end =end # parse http requests or responses def processfile(filename) # determine file # filenum = File.basename(filename).split("-")[0].to_i # normalize to unix line endings tempstring=IO.read(filename) # break down http into more simple elements v_firstlineend = tempstring.index("\n") v_httpstart = tempstring.index("\r\n\r\n") # hackish if v_httpstart.nil? : v_httpstart = tempstring.index("\n\n") end if v_httpstart.nil? : v_httpstart = tempstring.length end firstline = tempstring[0..v_firstlineend] httpheaders = tempstring[v_firstlineend+2..v_httpstart] httpbody = tempstring[v_httpstart+4..tempstring.length] if filename.include? "request" @req_reqline[filenum-1] = firstline @req_headers[filenum-1] = httpheaders @req_messagebody[filenum-1] = httpbody elsif filename.include? "response" @resp_statusline[filenum-1] = firstline @resp_headers[filenum-1] = httpheaders @resp_body[filenum-1] = httpbody else next end end def show_allrequests tempstring = String.new # for each request, display request # and @req_reqline.each_index do |indexnumber| # ensure index exists if ! @req_reqline[indexnumber].nil? requesttolog = httpfirstline(@req_reqline[indexnumber])[1] tempstring << (indexnumber+1).to_s + "-request " + requesttolog + "\n" end end @data_hash["AllRequests"] = tempstring end def httpfirstline(firstline) temparray = Array.new # parse requestline or statusline into 3 separate elements. returns an array temparray = firstline.split return temparray end def search_headers(query, indexnum) tempstring = String.new searchqueryregex = Regexp.new(/^#{query}.*\n/i) # search request headers requestheaders = @req_headers[indexnum].gsub(/\r\n?/,"\n") if requestheaders.scan(searchqueryregex).length != 0 tempstring << (indexnum+1).to_s + "-request " requestheaders.scan(searchqueryregex).each do |match| tempstring << match end end # search response headers responseheaders = @resp_headers[indexnum].gsub(/\r\n?/,"\n") if responseheaders.scan(searchqueryregex).length != 0 tempstring << (indexnum+1).to_s + "-response " responseheaders.scan(searchqueryregex).each do |match| tempstring << match end end return tempstring end def search_allheaders(searchquery) tempstring = String.new # for each set of headers, display index # and headers @req_headers.each_index do |indexnumber| # ensure index exists if ! @req_reqline[indexnumber].nil? tempstring << search_headers(searchquery, indexnumber) end end @data_hash["HeaderSearch"] = tempstring end def search_allparameters(searchquery) tempstring = String.new # for each request, display request # and @req_reqline.each_index do |indexnumber| # ensure index exists if ! @req_reqline[indexnumber].nil? httpmethod, uri, protocol = httpfirstline(@req_reqline[indexnumber]) # identify parameters for GET METHOD if httpmethod == "GET" if uri.include?('?') parameters = uri[uri.index('?')+1..uri.length] else parameters = "" end # identify parameters for POST METHOD elsif httpmethod == "POST" parameters = @req_messagebody[indexnumber] else next # do nothing end # SEARCH PARAMETERS (first match, case insensitive) if parameters.downcase.include?(searchquery.downcase) param_start = parameters.downcase.index(searchquery.downcase) if ! parameters.index('&', param_start).nil? # parameter is terminated by ampersand param_end = parameters.index('&',param_start)-1 else # parameter is terminated by line end param_end = parameters.length end # log findings to tempstring (URL decoded) parameterstolog = CGI.unescape(parameters[param_start..param_end]) tempstring << (indexnumber+1).to_s + "-request " + parameterstolog + "\n" end end end @data_hash["ParameterSearch"] = tempstring end def parseuri(uri) # parse pseudoURI. returns an array temparray = uri.split('?') # split parameters from protocol, host, port, url requestlineurlnoparams = temparray[0] if temparray.length != 1 then requestlineurlparameters = temparray[1] else requestlineurlparameters = "" end # determine the index positions of pseudo uri elements v_protindex=requestlineurlnoparams.index('://') v_hostindex=requestlineurlnoparams.index(':',v_protindex+1) v_portindex=requestlineurlnoparams.index('/',v_hostindex) v_uriindex=requestlineurlnoparams.rindex('?') v_docindex=requestlineurlnoparams.rindex('/') protocol = requestlineurlnoparams[0,v_protindex] host = requestlineurlnoparams[v_protindex+3..v_hostindex-1] port = requestlineurlnoparams[v_hostindex+1..v_portindex-1] url = requestlineurlnoparams[v_portindex..requestlineurlnoparams.length] document = requestlineurlnoparams[v_docindex+1..requestlineurlnoparams.length] return requestlineurlnoparams,requestlineurlparameters, protocol, host, port, url, document end def gzipinflate(string) gz = Zlib::GzipReader.new(StringIO.new(string)) xml = gz.read return xml end # calculate MD5 and SHA1 sums def getdigestsums(object) md5value = Digest::MD5.hexdigest(object) sha1value = Digest::SHA1.hexdigest(object) return md5value,sha1value end def carve(conversationsdirectory) tempstring = String.new @resp_headers.each_index do |indexnumber| # ensure index exists - is there content to carve? if ! @resp_body[indexnumber].nil? contenttype = search_headers("content-type", indexnumber) transferencoding = search_headers("Transfer-Encoding", indexnumber) httpmethod, uri, protocol = httpfirstline(@req_reqline[indexnumber]) requestlineurlnoparams, requestlineurlparameters, protocol, host, port, url, document = parseuri(uri) # determine content encoding type contentencoding = search_headers("content-encoding", indexnumber) # content encoding is used if contentencoding.length > 0 contentencodingtype = contentencoding.split(":")[1].downcase.strip if transferencoding.length > 0 : transferencodingtype = transferencoding.split(":")[1].downcase.strip end if contentencodingtype.include?("gzip") && transferencodingtype != "chunked" # gzipped? inflate if so content = gzipinflate(@resp_body[indexnumber]) else # not gzipped? undetected encoding content = @resp_body[indexnumber] end else # no content encoding content = @resp_body[indexnumber] end # carve html and image content types if contenttype.include?("image/") || contenttype.include?("text/html") || contenttype.include?("application/x-javascript") carvedfilename = sprintf("./#{conversationsdirectory}%04d-response.carved.#{document}", indexnumber+1) savetofile(content, carvedfilename) # md5/sha1 analysis tempstring << "#{getdigestsums(@resp_body[indexnumber])[0]} #{getdigestsums(@resp_body[indexnumber])[1]} #{File.basename(carvedfilename)}\n" # carve text/xml content type elsif contenttype.include?("text/xml") carvedfilename = sprintf("./#{conversationsdirectory}%04d-response.carved.xml", indexnumber+1) savetofile(content, carvedfilename) # md5/sha1 analysis tempstring << "#{getdigestsums(@resp_body[indexnumber])[0]} #{getdigestsums(@resp_body[indexnumber])[1]} #{File.basename(carvedfilename)}\n" # save xml in array @resp_body_xml[indexnumber] = content end end end @data_hash["DigestSums"] = tempstring end def plistxmlsearch(query) tempstring = String.new # for each position in array @resp_body_xml.each_index do |indexnumber| # ensure index exists if ! @resp_body_xml[indexnumber].nil? xml=@resp_body_xml[indexnumber] # plist xml is NOT fun, so use regex hack that counts on following format: plistnotfun plistxmlsearchregex = Regexp.new(/.*#{query}.*<\/string>/i) @resp_body_xml[indexnumber].scan(plistxmlsearchregex).each do |match| # create XML root and parse XML newxml = XmlSimple.xml_in("" + match + "") searchresults = "#{indexnumber+1}-response #{newxml['key']}: #{newxml['string']}\n" tempstring << searchresults end end end @data_hash["PlistXMLSearch"] = tempstring end def report timestamp = sprintf("%04d%02d%02d%02d%02d%02d", @tl.year, @tl.month, @tl.day, @tl.hour, @tl.min, @tl.sec) reportstring = String.new if ! @data_hash["AllRequests"].nil? reportstring << "---- request line summary " + "-" * 50 + "\n\n" reportstring << @data_hash["AllRequests"] end if ! @data_hash["DigestSums"].nil? reportstring << "\n\n---- digest sums of carved files (MD5 SHA1 FILENAME) " + "-" * 50 + "\n\n" reportstring << @data_hash["DigestSums"] end if ! @data_hash["HeaderSearch"].nil? reportstring << "\n\n---- header search " + "-" * 50 + "\n\n" reportstring << @data_hash["HeaderSearch"] end if ! @data_hash["ParameterSearch"].nil? reportstring << "\n\n---- parameter search " + "-" * 50 + "\n\n" reportstring << @data_hash["ParameterSearch"] end if ! @data_hash["PlistXMLSearch"].nil? reportstring << "\n\n---- plist xml search " + "-" * 50 + "\n\n" reportstring << @data_hash["PlistXMLSearch"] end puts "timestamp: #{@tl}\n\n" puts reportstring end # CLASS END end # initialize data httpdata = ProxyData.instance # identify requests webscarabrequests = opts[:directory] + "*-request" # process each request/response pair Dir.glob(webscarabrequests) do |file| httpdata.processfile(file) httpdata.processfile(file.gsub!("request", "response")) end # all requests if opts[:requestlinesummary] : httpdata.show_allrequests end # header search if opts[:headersearch] : httpdata.search_allheaders(opts[:headersearch]) end # parameter search if opts[:parametersearch] : httpdata.search_allparameters(opts[:parametersearch]) end # file carving httpdata.carve(opts[:directory]) # plist xml data search if opts[:xmlsearch] : httpdata.plistxmlsearch(opts[:xmlsearch]) end # report httpdata.report