/*
  pextract.c
  
  Extracts PE executables from pcap file or network.
  Executables XORed with a single byte 'key' are also found.

  Prerequisites: libnids, libpcap
  Compile: gcc -o pextract pextract.c -l nids
*/

// ================= Includes =================
#define _GNU_SOURCE
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <nids.h>
#include <err.h>

#define DEFAULT_PCAP_FILTER "tcp"
#define VERSION "1.0"

// ================= Windows structures/defines =================
#define IMAGE_SUBSYSTEM_UNKNOWN              0   // Unknown subsystem.
#define IMAGE_SUBSYSTEM_NATIVE               1   // Image doesn't require a subsystem.
#define IMAGE_SUBSYSTEM_WINDOWS_GUI          2   // Image runs in the Windows GUI subsystem.
#define IMAGE_SUBSYSTEM_WINDOWS_CUI   3   // Image runs in the Windows character subsystem.
#define IMAGE_SUBSYSTEM_OS2_CUI              5   // image runs in the OS/2 character subsystem.
#define IMAGE_SUBSYSTEM_POSIX_CUI            7   // image runs in the Posix character subsystem.
#define IMAGE_SUBSYSTEM_NATIVE_WINDOWS       8   // image is a native Win9x driver.
#define IMAGE_SUBSYSTEM_WINDOWS_CE_GUI    9   // Image runs in the Windows CE subsystem.

typedef uint32_t ULONG;
typedef int32_t LONG;
typedef uint16_t WORD;
typedef uint8_t UCHAR;

typedef struct _IMAGE_DOS_HEADER
{
     WORD e_magic;
     WORD e_cblp;
     WORD e_cp;
     WORD e_crlc;
     WORD e_cparhdr;
     WORD e_minalloc;
     WORD e_maxalloc;
     WORD e_ss;
     WORD e_sp;
     WORD e_csum;
     WORD e_ip;
     WORD e_cs;
     WORD e_lfarlc;
     WORD e_ovno;
     WORD e_res[4];
     WORD e_oemid;
     WORD e_oeminfo;
     WORD e_res2[10];
     LONG e_lfanew;
} IMAGE_DOS_HEADER, *PIMAGE_DOS_HEADER;

typedef struct _IMAGE_DATA_DIRECTORY
{
     ULONG VirtualAddress;
     ULONG Size;
} IMAGE_DATA_DIRECTORY, *PIMAGE_DATA_DIRECTORY;

typedef struct _IMAGE_OPTIONAL_HEADER
{
     WORD Magic;
     UCHAR MajorLinkerVersion;
     UCHAR MinorLinkerVersion;
     ULONG SizeOfCode;
     ULONG SizeOfInitializedData;
     ULONG SizeOfUninitializedData;
     ULONG AddressOfEntryPoint;
     ULONG BaseOfCode;
     ULONG BaseOfData;
     ULONG ImageBase;
     ULONG SectionAlignment;
     ULONG FileAlignment;
     WORD MajorOperatingSystemVersion;
     WORD MinorOperatingSystemVersion;
     WORD MajorImageVersion;
     WORD MinorImageVersion;
     WORD MajorSubsystemVersion;
     WORD MinorSubsystemVersion;
     ULONG Win32VersionValue;
     ULONG SizeOfImage;
     ULONG SizeOfHeaders;
     ULONG CheckSum;
     WORD Subsystem;
     WORD DllCharacteristics;
     ULONG SizeOfStackReserve;
     ULONG SizeOfStackCommit;
     ULONG SizeOfHeapReserve;
     ULONG SizeOfHeapCommit;
     ULONG LoaderFlags;
     ULONG NumberOfRvaAndSizes;
     IMAGE_DATA_DIRECTORY DataDirectory[16];
} IMAGE_OPTIONAL_HEADER, *PIMAGE_OPTIONAL_HEADER;
	
typedef struct _IMAGE_FILE_HEADER
{
     WORD Machine;
     WORD NumberOfSections;
     ULONG TimeDateStamp;
     ULONG PointerToSymbolTable;
     ULONG NumberOfSymbols;
     WORD SizeOfOptionalHeader;
     WORD Characteristics;
} IMAGE_FILE_HEADER, *PIMAGE_FILE_HEADER;
	
typedef struct _IMAGE_NT_HEADERS
{
     ULONG Signature;
     IMAGE_FILE_HEADER FileHeader;
     IMAGE_OPTIONAL_HEADER OptionalHeader;
} IMAGE_NT_HEADERS, *PIMAGE_NT_HEADERS;

typedef struct _IMAGE_SECTION_HEADER
{
     UCHAR Name[8];
     ULONG Misc;
     ULONG VirtualAddress;
     ULONG SizeOfRawData;
     ULONG PointerToRawData;
     ULONG PointerToRelocations;
     ULONG PointerToLinenumbers;
     WORD NumberOfRelocations;
     WORD NumberOfLinenumbers;
     ULONG Characteristics;
} IMAGE_SECTION_HEADER, *PIMAGE_SECTION_HEADER;

// ================= Functions =================
void
usage(void)
{
	fprintf(stderr, "Version: " VERSION "\n"
		"Usage: pextract (-i interface  | -f inputfile) [BPF filter]\n");
	exit(1);
}

// Check whether contents in buffer is a valid PE header
int isValidPEHeader(char* buffer, unsigned long size)
{
	char pe_magic[] = "\x50\x45\x00\x00\x4c\x01";
	const WORD magic_value = 267;
	// Check whether buffer contains enough bytes to contain a dos header
	if (size < sizeof(IMAGE_DOS_HEADER))
	{
		return 0;
	}
	IMAGE_DOS_HEADER* dos_header = (IMAGE_DOS_HEADER*) buffer;
	// Check whether buffer is large enough to contain a PE header
	if (size < dos_header->e_lfanew || size - dos_header->e_lfanew < sizeof(IMAGE_NT_HEADERS))
	{
		return 0;
	}
	IMAGE_NT_HEADERS* header = (IMAGE_NT_HEADERS*) (buffer + dos_header->e_lfanew);

	// A PE header should start with this sequence (PE00)
	if (memcmp(header, pe_magic, 6))
		return 0;
	// Check magic value
	if (header->OptionalHeader.Magic != magic_value)
		return 0;

	// Win32 VersionValue should be zero
	if (header->OptionalHeader.Win32VersionValue != 0)
		return 0;

	// Check whether subsystem is sane one
	if (header->OptionalHeader.Subsystem != IMAGE_SUBSYSTEM_NATIVE &&
		header->OptionalHeader.Subsystem != IMAGE_SUBSYSTEM_UNKNOWN &&
		header->OptionalHeader.Subsystem != IMAGE_SUBSYSTEM_WINDOWS_GUI &&
		header->OptionalHeader.Subsystem != IMAGE_SUBSYSTEM_WINDOWS_CUI)
		return 0;

	// Changes are that this is actually a valid PE header
	return 1;
}

// Calculate PE filesize 
// returns filesize in bytes
uint32_t determine_file_size(char* buffer)
{
	uint32_t result = 0;

	// From the DOS header we know at which offset the PE header starts
	IMAGE_DOS_HEADER* dos_header = (IMAGE_DOS_HEADER*) buffer;

	// We need the PE header to access to underlying structures for the data directories and number of sections
	IMAGE_NT_HEADERS* header = (IMAGE_NT_HEADERS*) (buffer + dos_header->e_lfanew);

	// Find start of sectionheaders; the start is calclulated by adding SizeOfOptionalHeader to the start of the 
	// optional header (which is 18h from the start of the PE header)
	char* sectionheaders_start = buffer + dos_header->e_lfanew + 0x18 + header->FileHeader.SizeOfOptionalHeader; 

	// Find the section which is located nearest to the end of the file by iterating over all sections
	uint32_t largest_pointer_to_raw_data = 0, largerst_size_of_raw_data = 0;
	uint16_t i = 0;
	for (i = 0; i < header->FileHeader.NumberOfSections; i++)
	{
		IMAGE_SECTION_HEADER* section_header = 
			(IMAGE_SECTION_HEADER*) (sectionheaders_start + i*sizeof(IMAGE_SECTION_HEADER));

		// Save the size and pointer of the section nearest to the end of file (i.e. the largests pointer)
		if (section_header->PointerToRawData > largest_pointer_to_raw_data)
		{
			largest_pointer_to_raw_data = section_header->PointerToRawData;
			largerst_size_of_raw_data = section_header->SizeOfRawData;
		}
	}

	// Pointer is releative to start of file so we now know the filesize
	result = largest_pointer_to_raw_data + largerst_size_of_raw_data;

	return result;
}

// Convert a numerical IP address to dot-separated notation
char* ntoa(char* a, unsigned int ip)
{
	int i = 0;
	char octets[4] = {0,0,0,0};
	for (i = 0; i < 4; i++)
	{
		octets[i] = (ip >> (8*i)) & 0xFF;
	}
	sprintf(a, "%d.%d.%d.%d", octets[0], octets[1], octets[2], octets[3]);

	return a;
}

// Save buffer to disk
void save_file(char* buffer, unsigned long size, char* filename)
{
	FILE* f = fopen(filename, "wb");
	fwrite(buffer, 1, size, f);
	fclose(f);
}

// Convert a connection structure to a nice string
char* addr_as_string(struct tuple4 *addr)
{
	static char s[256];
	char dst[16];
	char src[16];
	ntoa(dst, addr->daddr);
	ntoa(src, addr->saddr);
	sprintf(s, "%s:%d-%s:%d", src, addr->source, dst, addr->dest);
	return s;
}

// Extract executables from a tcp stream
void
extract_exes(struct half_stream* hs, struct tuple4* addr)
{

	char* remaining_buffer = (char*) malloc(hs->count);
	char* saved_ptr = remaining_buffer;

	// Variable to track the number of files we already found in this TCP connection
	int counter = 0;

	// Iterate over all possible one-byte XOR keys
	int c;
	for (c = 0; c < 256; c++)
	{
		// Reset buffer to beginning
		remaining_buffer = saved_ptr;

		// Xor data
		int i;
		for (i=0; i < hs->count; i++)
		{
			remaining_buffer[i] = hs->data[i] ^ c;
		}
		int remaining_size = hs->count;

		// Iterate over the data and try to find the magic value for a DOS header (MZ)
		while (1)
		{
			// find MZ string in data
			char* possible_start = (char*) memmem(remaining_buffer, remaining_size, "MZ", 2);

			if (possible_start)
			{
				// A possible match, next check whether we can find a PE header
				if (isValidPEHeader(possible_start, remaining_buffer + remaining_size - possible_start))
				{
					// Save found PE file
					uint32_t filesize = determine_file_size(possible_start);
					char filename[256];
					sprintf(filename, "%s.%d._exe", addr_as_string(addr), counter);
					printf("Saving MS exectuable as %s (%d used as xor key)\n", filename, c);
					counter++;
					save_file(possible_start, filesize, filename);
				}
			}
			else
			{
				// MZ not found, weŕe finished
				break;
			}
			remaining_size -= possible_start - remaining_buffer + 1;
			remaining_buffer = possible_start + 1;
		}
	}
	free(saved_ptr);
}

// Utility: convert nids state to readable format
char* nids_state_as_string(char state)
{
	switch (state) {
	case NIDS_JUST_EST:
		return "NIDS_JUST_EST\n";
	case NIDS_DATA:
		return "NIDS_DATA\n";
	case NIDS_CLOSE:
		return "NIDS_CLOSE\n";
	case NIDS_RESET:
		return "NIDS_RESET\n";
	case NIDS_TIMED_OUT:
		return "NIDS_TIMED_OUT\n";
	}
	return "unknown state";
}

// top-level function registered with nids
// Collect data from both sides of connections
// and check for executables at connection reset, close or time-out
void
sniff_tcp(struct tcp_stream *ts, void **yoda)
{
	switch (ts->nids_state) {

	case NIDS_JUST_EST:
		// instruct nids to collect data from both sides of tcp connection
		ts->server.collect = 1;
		ts->client.collect = 1;
		printf("Connection: %s\n", addr_as_string(&ts->addr));		

	case NIDS_DATA:
		// Instruct nids to save the data for later
		// By default, nids will discard the data
		nids_discard(ts, 0);
		break;
		
	default:
		extract_exes(&ts->client, &ts->addr);
		extract_exes(&ts->server, &ts->addr);
		break;
	}
}

// Utility; copy commandline arguments to a buffer and return this buffer
char *
copy_argv(char **argv)
{
	char **p, *buf, *src, *dst;
	u_int len = 0;
	
	p = argv;
	if (*p == 0)
		return (0);
	
	while (*p)
		len += strlen(*p++) + 1;
	
	if ((buf = (char *)malloc(len)) == NULL)
		err(1, "copy_argv: malloc");
	
	p = argv;
	dst = buf;
	
	while ((src = *p++) != NULL) {
		while ((*dst++ = *src++) != '\0')
			;
		dst[-1] = ' ';
	}
	dst[-1] = '\0';
	
	return (buf);
}

// Main function
int
main(int argc, char *argv[])
{
	int c;
	
	// Parse command-line options
	while ((c = getopt(argc, argv, "f:i:h?")) != -1) {
		switch (c) {
		case 'f':
			nids_params.filename = optarg;
			break;
		case 'i':
			nids_params.device = optarg;
			break;
		default:
			usage();
		}
	}
	argc -= optind;
	argv += optind;

	// check whether the user supplied a bpf filter and use a default one otherwise
	if (argc > 0) {
		nids_params.pcap_filter = copy_argv(argv);
	}
	else nids_params.pcap_filter = DEFAULT_PCAP_FILTER;
	
	nids_params.scan_num_hosts = 0;
	
	if (!nids_init())
		errx(1, "%s. Is your BPF correct?", nids_errbuf);
	
	nids_register_tcp(sniff_tcp);

	if (nids_params.device)
		warnx("listening on %s [%s]", nids_params.device,
		      nids_params.pcap_filter);

	if (nids_params.filename)
		warnx("reading from %s [%s]", nids_params.filename,
		      nids_params.pcap_filter);

	nids_run();
	
	exit(0);
}

