#!/usr/bin/ruby
# Copyright (C) 2010 Richard Springs
# This program is free software: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of
# the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see
# .
require 'rubygems'
require 'singleton'
require 'digest/md5'
require 'digest/sha1'
require 'cgi'
require 'zlib'
require 'trollop'
require 'xmlsimple'
# parse command line options
opts = Trollop::options {
version "scarabsieve.rb / v1.01 | richardsprings"
banner <<-EOS
Usage: scarabsieve.rb [options]
EOS
opt :directory, "directory containing webscarab log output", :type => String
opt :requestlinesummary, "summarize HTTP requestlines"
opt :headersearch, "search HTTP headers using specified value", :type => String
opt :parametersearch, "search HTTP request parameters using specified value", :type => String
opt :xmlsearch, "search plist XML transmitted in HTTP responses using specified value", :type => String
opt :Verbose, "display verbose information"
}
# check for mandatory arguments
Trollop::die :directory, "not specified" if opts[:directory].nil?
class ProxyData
include Singleton
def initialize
@req_reqline = Array.new
@req_headers = Array.new
@req_messagebody = Array.new
@resp_statusline = Array.new
@resp_headers = Array.new
@resp_body = Array.new
@resp_body_xml = Array.new
# 0 = allrequests, 1 = headers, 2 = parameters, 3 = md5/sha1 sums carved, 4 = xmlplist
@data_hash = Hash.new
@tl = Time.now.localtime
end
# save file
def savetofile(object, filename)
# save attachment to file
op_file = File.open(filename.to_s, "wb")
op_file.write(object)
op_file.close
end
=begin
# parse http requests or responses
def processfile(filename)
# determine file #
filenum = File.basename(filename).split("-")[0].to_i
tempstring=IO.read(filename)
# break down http into more simple elements
v_firstlineend = tempstring.index("\n")
v_httpstart = tempstring.index("\r\n\r\n")
# hackish
if v_httpstart.nil? : v_httpstart = tempstring.index("\n\n") end
if v_httpstart.nil? : v_httpstart = tempstring.length end
firstline = tempstring[0..v_firstlineend]
httpheaders = tempstring[v_firstlineend..v_httpstart]
httpbody = tempstring[v_httpstart..tempstring.length].gsub(/\r\n?/,'')
if filename.include? "request"
@req_reqline[filenum-1] = firstline
@req_headers[filenum-1] = httpheaders
@req_messagebody[filenum-1] = httpbody
elsif filename.include? "response"
@resp_statusline[filenum-1] = firstline
@resp_headers[filenum-1] = httpheaders
@resp_body[filenum-1] = httpbody
else
next
end
end
=end
# parse http requests or responses
def processfile(filename)
# determine file #
filenum = File.basename(filename).split("-")[0].to_i
# normalize to unix line endings
tempstring=IO.read(filename)
# break down http into more simple elements
v_firstlineend = tempstring.index("\n")
v_httpstart = tempstring.index("\r\n\r\n")
# hackish
if v_httpstart.nil? : v_httpstart = tempstring.index("\n\n") end
if v_httpstart.nil? : v_httpstart = tempstring.length end
firstline = tempstring[0..v_firstlineend]
httpheaders = tempstring[v_firstlineend+2..v_httpstart]
httpbody = tempstring[v_httpstart+4..tempstring.length]
if filename.include? "request"
@req_reqline[filenum-1] = firstline
@req_headers[filenum-1] = httpheaders
@req_messagebody[filenum-1] = httpbody
elsif filename.include? "response"
@resp_statusline[filenum-1] = firstline
@resp_headers[filenum-1] = httpheaders
@resp_body[filenum-1] = httpbody
else
next
end
end
def show_allrequests
tempstring = String.new
# for each request, display request # and
@req_reqline.each_index do |indexnumber|
# ensure index exists
if ! @req_reqline[indexnumber].nil?
requesttolog = httpfirstline(@req_reqline[indexnumber])[1]
tempstring << (indexnumber+1).to_s + "-request " + requesttolog + "\n"
end
end
@data_hash["AllRequests"] = tempstring
end
def httpfirstline(firstline)
temparray = Array.new
# parse requestline or statusline into 3 separate elements. returns an array
temparray = firstline.split
return temparray
end
def search_headers(query, indexnum)
tempstring = String.new
searchqueryregex = Regexp.new(/^#{query}.*\n/i)
# search request headers
requestheaders = @req_headers[indexnum].gsub(/\r\n?/,"\n")
if requestheaders.scan(searchqueryregex).length != 0
tempstring << (indexnum+1).to_s + "-request "
requestheaders.scan(searchqueryregex).each do |match|
tempstring << match
end
end
# search response headers
responseheaders = @resp_headers[indexnum].gsub(/\r\n?/,"\n")
if responseheaders.scan(searchqueryregex).length != 0
tempstring << (indexnum+1).to_s + "-response "
responseheaders.scan(searchqueryregex).each do |match|
tempstring << match
end
end
return tempstring
end
def search_allheaders(searchquery)
tempstring = String.new
# for each set of headers, display index # and headers
@req_headers.each_index do |indexnumber|
# ensure index exists
if ! @req_reqline[indexnumber].nil?
tempstring << search_headers(searchquery, indexnumber)
end
end
@data_hash["HeaderSearch"] = tempstring
end
def search_allparameters(searchquery)
tempstring = String.new
# for each request, display request # and
@req_reqline.each_index do |indexnumber|
# ensure index exists
if ! @req_reqline[indexnumber].nil?
httpmethod, uri, protocol = httpfirstline(@req_reqline[indexnumber])
# identify parameters for GET METHOD
if httpmethod == "GET"
if uri.include?('?')
parameters = uri[uri.index('?')+1..uri.length]
else
parameters = ""
end
# identify parameters for POST METHOD
elsif httpmethod == "POST"
parameters = @req_messagebody[indexnumber]
else
next # do nothing
end
# SEARCH PARAMETERS (first match, case insensitive)
if parameters.downcase.include?(searchquery.downcase)
param_start = parameters.downcase.index(searchquery.downcase)
if ! parameters.index('&', param_start).nil?
# parameter is terminated by ampersand
param_end = parameters.index('&',param_start)-1
else
# parameter is terminated by line end
param_end = parameters.length
end
# log findings to tempstring (URL decoded)
parameterstolog = CGI.unescape(parameters[param_start..param_end])
tempstring << (indexnumber+1).to_s + "-request " + parameterstolog + "\n"
end
end
end
@data_hash["ParameterSearch"] = tempstring
end
def parseuri(uri)
# parse pseudoURI. returns an array
temparray = uri.split('?')
# split parameters from protocol, host, port, url
requestlineurlnoparams = temparray[0]
if temparray.length != 1 then
requestlineurlparameters = temparray[1]
else
requestlineurlparameters = ""
end
# determine the index positions of pseudo uri elements
v_protindex=requestlineurlnoparams.index('://')
v_hostindex=requestlineurlnoparams.index(':',v_protindex+1)
v_portindex=requestlineurlnoparams.index('/',v_hostindex)
v_uriindex=requestlineurlnoparams.rindex('?')
v_docindex=requestlineurlnoparams.rindex('/')
protocol = requestlineurlnoparams[0,v_protindex]
host = requestlineurlnoparams[v_protindex+3..v_hostindex-1]
port = requestlineurlnoparams[v_hostindex+1..v_portindex-1]
url = requestlineurlnoparams[v_portindex..requestlineurlnoparams.length]
document = requestlineurlnoparams[v_docindex+1..requestlineurlnoparams.length]
return requestlineurlnoparams,requestlineurlparameters, protocol, host, port, url, document
end
def gzipinflate(string)
gz = Zlib::GzipReader.new(StringIO.new(string))
xml = gz.read
return xml
end
# calculate MD5 and SHA1 sums
def getdigestsums(object)
md5value = Digest::MD5.hexdigest(object)
sha1value = Digest::SHA1.hexdigest(object)
return md5value,sha1value
end
def carve(conversationsdirectory)
tempstring = String.new
@resp_headers.each_index do |indexnumber|
# ensure index exists - is there content to carve?
if ! @resp_body[indexnumber].nil?
contenttype = search_headers("content-type", indexnumber)
transferencoding = search_headers("Transfer-Encoding", indexnumber)
httpmethod, uri, protocol = httpfirstline(@req_reqline[indexnumber])
requestlineurlnoparams, requestlineurlparameters, protocol, host, port, url, document = parseuri(uri)
# determine content encoding type
contentencoding = search_headers("content-encoding", indexnumber)
# content encoding is used
if contentencoding.length > 0
contentencodingtype = contentencoding.split(":")[1].downcase.strip
if transferencoding.length > 0 : transferencodingtype = transferencoding.split(":")[1].downcase.strip end
if contentencodingtype.include?("gzip") && transferencodingtype != "chunked"
# gzipped? inflate if so
content = gzipinflate(@resp_body[indexnumber])
else
# not gzipped? undetected encoding
content = @resp_body[indexnumber]
end
else
# no content encoding
content = @resp_body[indexnumber]
end
# carve html and image content types
if contenttype.include?("image/") || contenttype.include?("text/html") || contenttype.include?("application/x-javascript")
carvedfilename = sprintf("./#{conversationsdirectory}%04d-response.carved.#{document}", indexnumber+1)
savetofile(content, carvedfilename)
# md5/sha1 analysis
tempstring << "#{getdigestsums(@resp_body[indexnumber])[0]} #{getdigestsums(@resp_body[indexnumber])[1]} #{File.basename(carvedfilename)}\n"
# carve text/xml content type
elsif contenttype.include?("text/xml")
carvedfilename = sprintf("./#{conversationsdirectory}%04d-response.carved.xml", indexnumber+1)
savetofile(content, carvedfilename)
# md5/sha1 analysis
tempstring << "#{getdigestsums(@resp_body[indexnumber])[0]} #{getdigestsums(@resp_body[indexnumber])[1]} #{File.basename(carvedfilename)}\n"
# save xml in array
@resp_body_xml[indexnumber] = content
end
end
end
@data_hash["DigestSums"] = tempstring
end
def plistxmlsearch(query)
tempstring = String.new
# for each position in array
@resp_body_xml.each_index do |indexnumber|
# ensure index exists
if ! @resp_body_xml[indexnumber].nil?
xml=@resp_body_xml[indexnumber]
# plist xml is NOT fun, so use regex hack that counts on following format: plistnotfun
plistxmlsearchregex = Regexp.new(/.*#{query}.*<\/string>/i)
@resp_body_xml[indexnumber].scan(plistxmlsearchregex).each do |match|
# create XML root and parse XML
newxml = XmlSimple.xml_in("" + match + "")
searchresults = "#{indexnumber+1}-response #{newxml['key']}: #{newxml['string']}\n"
tempstring << searchresults
end
end
end
@data_hash["PlistXMLSearch"] = tempstring
end
def report
timestamp = sprintf("%04d%02d%02d%02d%02d%02d", @tl.year, @tl.month, @tl.day, @tl.hour, @tl.min, @tl.sec)
reportstring = String.new
if ! @data_hash["AllRequests"].nil?
reportstring << "---- request line summary " + "-" * 50 + "\n\n"
reportstring << @data_hash["AllRequests"]
end
if ! @data_hash["DigestSums"].nil?
reportstring << "\n\n---- digest sums of carved files (MD5 SHA1 FILENAME) " + "-" * 50 + "\n\n"
reportstring << @data_hash["DigestSums"]
end
if ! @data_hash["HeaderSearch"].nil?
reportstring << "\n\n---- header search " + "-" * 50 + "\n\n"
reportstring << @data_hash["HeaderSearch"]
end
if ! @data_hash["ParameterSearch"].nil?
reportstring << "\n\n---- parameter search " + "-" * 50 + "\n\n"
reportstring << @data_hash["ParameterSearch"]
end
if ! @data_hash["PlistXMLSearch"].nil?
reportstring << "\n\n---- plist xml search " + "-" * 50 + "\n\n"
reportstring << @data_hash["PlistXMLSearch"]
end
puts "timestamp: #{@tl}\n\n"
puts reportstring
end
# CLASS END
end
# initialize data
httpdata = ProxyData.instance
# identify requests
webscarabrequests = opts[:directory] + "*-request"
# process each request/response pair
Dir.glob(webscarabrequests) do |file|
httpdata.processfile(file)
httpdata.processfile(file.gsub!("request", "response"))
end
# all requests
if opts[:requestlinesummary] : httpdata.show_allrequests end
# header search
if opts[:headersearch] : httpdata.search_allheaders(opts[:headersearch]) end
# parameter search
if opts[:parametersearch] : httpdata.search_allparameters(opts[:parametersearch]) end
# file carving
httpdata.carve(opts[:directory])
# plist xml data search
if opts[:xmlsearch] : httpdata.plistxmlsearch(opts[:xmlsearch]) end
# report
httpdata.report