|  |  | @ -3,6 +3,23 @@ import assert from "assert"; | 
			
		
	
		
		
			
				
					
					|  |  |  | import UAParser from "ua-parser-js"; |  |  |  | import UAParser from "ua-parser-js"; | 
			
		
	
		
		
			
				
					
					|  |  |  | import readline from "readline"; |  |  |  | import readline from "readline"; | 
			
		
	
		
		
			
				
					
					|  |  |  | import { program } from "commander"; |  |  |  | import { program } from "commander"; | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | import glob from "fast-glob"; | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | program | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |   .option("--min <Number>", "The lowest count to print. Stop at this.", 1) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |   .option("--errors", "Show the erorrs so you can fix them.", false) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |   .option("--format <string>", "Output format, text or json. Ignores min for raw output.", "json") | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |   .option("--outfile <string>", "Save to file rather than stdout.") | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |   .requiredOption("--domain <String>", "Domain for the log. Gets removed as a refer.") | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |   .requiredOption("--input <String>", "Input file glob.") | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |   .description("Processes different web server logs to determine request chain frequency.") | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |   .version(0.1); | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | program.parse(); | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | const OPTS = program.opts(); | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | OPTS.min = parseInt(OPTS.min); | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | class Parser { |  |  |  | class Parser { | 
			
		
	
		
		
			
				
					
					|  |  |  |   constructor() { |  |  |  |   constructor() { | 
			
		
	
	
		
		
			
				
					|  |  | @ -139,7 +156,7 @@ class Parser { | 
			
		
	
		
		
			
				
					
					|  |  |  |   } |  |  |  |   } | 
			
		
	
		
		
			
				
					
					|  |  |  | } |  |  |  | } | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | const parse_logs = async (file_name, errors) => { |  |  |  | const parse_log_file = async (results, stats, file_name, errors) => { | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   const read_stream = fs.createReadStream(file_name); |  |  |  |   const read_stream = fs.createReadStream(file_name); | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   const rl = readline.createInterface({ |  |  |  |   const rl = readline.createInterface({ | 
			
		
	
	
		
		
			
				
					|  |  | @ -149,19 +166,8 @@ const parse_logs = async (file_name, errors) => { | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   const parser = new Parser(); |  |  |  |   const parser = new Parser(); | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   const stats = { |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     lines: 0, |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     chains: 0, |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     excluded: 0, |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     errors: 0, |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     roots: 0, |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     firsts: 0 |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |   }; |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |   const skip = /(authcheck|.*\.svg|.*\.webmanifest|.*\.js|.*\.css|.*php|socket\.io|\.env|.*\.png|.*\.txt|.*\.woff|.*\.jpg|.*\.mp4|.*\.torrent|\-|.*\.ico|\/api\/.*\?.*|.*\.html|.*\.map|.*.php)/ |  |  |  |   const skip = /(authcheck|.*\.svg|.*\.webmanifest|.*\.js|.*\.css|.*php|socket\.io|\.env|.*\.png|.*\.txt|.*\.woff|.*\.jpg|.*\.mp4|.*\.torrent|\-|.*\.ico|\/api\/.*\?.*|.*\.html|.*\.map|.*.php)/ | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   const by_ip = {}; |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |   for await (let line of rl) { |  |  |  |   for await (let line of rl) { | 
			
		
	
		
		
			
				
					
					|  |  |  |     try { |  |  |  |     try { | 
			
		
	
		
		
			
				
					
					|  |  |  |       stats.lines += 1; |  |  |  |       stats.lines += 1; | 
			
		
	
	
		
		
			
				
					|  |  | @ -173,40 +179,43 @@ const parse_logs = async (file_name, errors) => { | 
			
		
	
		
		
			
				
					
					|  |  |  |       if(data.url.match(skip)) continue; |  |  |  |       if(data.url.match(skip)) continue; | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |       // store or update the chain in the by_ip chain
 |  |  |  |       // store or update the chain in the by_ip chain
 | 
			
		
	
		
		
			
				
					
					|  |  |  |       const ip_chain = by_ip[data.ip] || []; |  |  |  |       const ip_chain = results[data.ip] || []; | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |       ip_chain.push(data); |  |  |  |       ip_chain.push(data); | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |       by_ip[data.ip] = ip_chain; |  |  |  |       results[data.ip] = ip_chain; | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |     } catch(error) { |  |  |  |     } catch(error) { | 
			
		
	
		
		
			
				
					
					|  |  |  |       if(errors) console.error(error); |  |  |  |       if(errors) console.error(error); | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |       stats.errors += 1; |  |  |  |       stats.errors += 1; | 
			
		
	
		
		
			
				
					
					|  |  |  |     } |  |  |  |     } | 
			
		
	
		
		
			
				
					
					|  |  |  |   } |  |  |  |   } | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |   return [by_ip, stats]; |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | } |  |  |  | } | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | const chain_to_set = (requests) => { |  |  |  | const parse_logs_glob = async (file_glob, errors) => { | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |   const path = new Set(); |  |  |  |   const file_list = glob.sync(file_glob); | 
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |   const results = {}; | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |   const stats = { | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     lines: 0, | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     chains: 0, | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     excluded: 0, | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     errors: 0, | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     roots: 0, | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     firsts: 0 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |   }; | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   for(let r of requests) { |  |  |  |   for(let file_name of file_list) { | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |     path.add(r.url); |  |  |  |     await parse_log_file(results, stats, file_name, errors); | 
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   } |  |  |  |   } | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   return path.values(); |  |  |  |   return [results, stats]; | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  | } |  |  |  | } | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | const chain_to_list = (requests) => { |  |  |  | const chain_to_set = (requests) => { | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |   const path = []; |  |  |  |   const path = new Set(); | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |   let seen; |  |  |  |  | 
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   for(let r of requests) { |  |  |  |   for(let r of requests) { | 
			
		
	
		
		
			
				
					
					|  |  |  |     if(r.url != seen) { |  |  |  |     path.add(r.url); | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |       path.push(r.url); |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |       seen = r.url; |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     } |  |  |  |  | 
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   } |  |  |  |   } | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   return path.values(); |  |  |  |   return path.values(); | 
			
		
	
	
		
		
			
				
					|  |  | @ -221,11 +230,11 @@ const construct_url_set = (domain, ref, full_chain) => { | 
			
		
	
		
		
			
				
					
					|  |  |  |   } |  |  |  |   } | 
			
		
	
		
		
			
				
					
					|  |  |  | } |  |  |  | } | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | const construct_request_chains = (by_ip, domain, as_set) => { |  |  |  | const construct_request_chains = (by_ip, domain) => { | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   let ip_chains = {}; |  |  |  |   let ip_chains = {}; | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   for(let [ip, requests] of Object.entries(by_ip)) { |  |  |  |   for(let [ip, requests] of Object.entries(by_ip)) { | 
			
		
	
		
		
			
				
					
					|  |  |  |     const chain = as_set ? chain_to_set(requests) : chain_to_list(requests); |  |  |  |     const chain = chain_to_set(requests); | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |     // record the initial refer to track entry to the site
 |  |  |  |     // record the initial refer to track entry to the site
 | 
			
		
	
		
		
			
				
					
					|  |  |  |     const ref = requests[0].refer; |  |  |  |     const ref = requests[0].refer; | 
			
		
	
	
		
		
			
				
					|  |  | @ -294,33 +303,14 @@ const write_results = async (stats, chains, format, outfile) => { | 
			
		
	
		
		
			
				
					
					|  |  |  |   fs.closeSync(fd); |  |  |  |   fs.closeSync(fd); | 
			
		
	
		
		
			
				
					
					|  |  |  | } |  |  |  | } | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | program |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |   .option("--no-set", "Use a Set instead of a list for chains.") |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |   .option("--min <Number>", "The lowest count to print. Stop at this.", 1) |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |   .option("--errors", "Show the erorrs so you can fix them.", false) |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |   .option("--format <string>", "Output format, text or json. Ignores min for raw output.", "json") |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |   .option("--outfile <string>", "Save to file rather than stdout.") |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |   .requiredOption("--domain <String>", "Domain for the log. Gets removed as a refer.") |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |   .requiredOption("--input <String>", "Input file.") |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |   .description("Processes different web server logs to determine request chain frequency.") |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |   .version(0.1); |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | program.parse(); |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | const OPTS = program.opts(); |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | OPTS.min = parseInt(OPTS.min); |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | assert(!isNaN(OPTS.min), `min must be a number, you have ${OPTS.min}`); |  |  |  | assert(!isNaN(OPTS.min), `min must be a number, you have ${OPTS.min}`); | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | try { |  |  |  | const [by_ip, stats] = await parse_logs_glob(OPTS.input, OPTS.errors); | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |   const [by_ip, stats] = await parse_logs(OPTS.input, OPTS.errors); |  |  |  | const chains = construct_request_chains(by_ip, OPTS.domain); | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |   const chains = construct_request_chains(by_ip, OPTS.domain, OPTS.set); |  |  |  |  | 
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   if(OPTS.outfile) { |  |  |  | if(OPTS.outfile) { | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |     write_results(stats, chains, OPTS.format, OPTS.outfile); |  |  |  |   write_results(stats, chains, OPTS.format, OPTS.outfile); | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |   } else { |  |  |  | } else { | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |     output_results(stats, chains, OPTS.format, OPTS.outfile); |  |  |  |   output_results(stats, chains, OPTS.format, OPTS.outfile); | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |   } |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | } catch(error) { |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |   console.error(error.message); |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |   process.exit(1); |  |  |  |  | 
			
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					
					|  |  |  | } |  |  |  | } | 
			
		
	
	
		
		
			
				
					|  |  | 
 |