#!/usr/bin/env python """Read lines of passer CSV format data on stdin and spit out a json structure with all of the data loaded.""" #Sample call: cat /Volumes/passer_fan/med/pfan-ssd/ipv4/10/0/0/*/10.0.0.*.passer.{recent,archive}.csv | egrep -v '(^DN,|,dns/server)' | head -1000000 | ~/med/programming/subnetgrep/subnetgrep.py -t , -k 2 10.0.0.0/24 | justone | bin/passer2json.py | jq '."10.0.0.41"' | egrep -v '(: \[\],*$|"is_router": null,)' | less import sys import os.path import ipaddress #To normalize ip addresses, specifically ipv6 import json import re sys.path.insert(0, os.getcwd()) from normalize_ip import ip_addr_obj, explode_ip #Wrapper around ipaddress functions passer2json_version = '0.5' unclean_chars = r'[^a-zA-Z0-9~|=#:@ ,_/\.\+()\[\];!\?\*-]' #We discard any lines that have characters outside this set. data_tree = {} #The dictionary that will end up holding the entire tree of data and will be output as json at the end. report_closed_server_ports = False #Should we report on closed server ports or not? Devel = True def debug(message): """Write debug message to stderr""" sys.stderr.write(message + '\n') def fail(fail_message): """Write to stderr and exit.""" debug(fail_message) #quit(1) def populate_dict(ip_addr): """Create the keys in the data_tree dictionary for the given ip.""" global data_tree if not data_tree.has_key(ip_addr): data_tree[ip_addr] = {} data_tree[ip_addr]['dns_names'] = [] #Note; cannot use sets as json can't serialize them data_tree[ip_addr]['general_names'] = [] data_tree[ip_addr]['open_server_ports'] = [] data_tree[ip_addr]['closed_server_ports'] = [] data_tree[ip_addr]['client_ports'] = [] data_tree[ip_addr]['is_router'] = None data_tree[ip_addr]['router_details'] = [] data_tree[ip_addr]['mac_addrs'] = [] data_tree[ip_addr]['ip_details'] = [] data_tree[ip_addr]['ns_for'] = [] data_tree[ip_addr]['mx_for'] = [] data_tree[ip_addr]['peers'] = [] #data_tree[ip_addr]['contact_info'] = [] #Will be storing under domain, not ip. if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description='Passer2json version ' + str(passer2json_version) + ' . Convert supplied passer format CSV lines into json format.') #parser.add_argument('-f', '--file', help='File(s) to convert', required=False, nargs='*') #parser.add_argument('-c', '--change', help='Actually change filesystem', dest='change', action='store_true') #parser.add_argument('-n', '--no-change', help='Dry run; do not actually change filesystem', dest='change', action='store_false') #parser.add_argument('-s', '--stdin-files', help='Read filenames from stdin', required=False, action='store_true') #parser.add_argument('-s', '--stdin-lines', help='Read CSV lines from stdin', required=False, action='store_true') #parser.add_argument('-t', '--target', help='Target directory into which to store json output files', required=False) #parser.add_argument('-v', '--verbose', help='Verbose, more detail about each action', action='store_true') #parser.set_defaults(change=False) args = vars(parser.parse_args()) for InLine in sys.stdin: strip_line = InLine.rstrip(' \t\r\n' + '00'.decode('hex')).replace('"', '_').replace("'", '_') if re.search(unclean_chars, strip_line): debug('Skipping for unclean characters:' + strip_line) continue Fields = strip_line.split(',') #Array of 5 objects, each of which is the if len(Fields) != 5: continue #Skip this malformed input line clean_ip_obj = ip_addr_obj(Fields[1]) if clean_ip_obj is None: continue clean_ip = explode_ip(clean_ip_obj) populate_dict(clean_ip) if Fields[0] == "DN": if Fields[2] in ('A', 'AAAA', 'CNAME', 'PTR', 'SRV'): if clean_ip != '0.0.0.0': if Fields[4]: service_string = Fields[3].rstrip('.') + ';' + Fields[4] else: service_string = Fields[3].rstrip('.') if not service_string in data_tree[clean_ip]['dns_names']: data_tree[clean_ip]['dns_names'].append(service_string) elif Fields[2] == 'NS': domain = Fields[3].rstrip('.') nameserver = Fields[4].rstrip('.') if nameserver and not nameserver in data_tree[clean_ip]['dns_names']: data_tree[clean_ip]['dns_names'].append(nameserver) if not domain in data_tree[clean_ip]['ns_for']: data_tree[clean_ip]['ns_for'].append(domain) elif Fields[2] == 'MX': domain = Fields[3].rstrip('.') mailserver = Fields[4].rstrip('.') if mailserver and not mailserver in data_tree[clean_ip]['dns_names']: data_tree[clean_ip]['dns_names'].append(mailserver) if not domain in data_tree[clean_ip]['mx_for']: data_tree[clean_ip]['mx_for'].append(domain) elif Fields[1] == '0.0.0.0' and Fields[2] == 'SOA': domain = Fields[3].rstrip('.') soa_fields = Fields[4].split(' ') #one_dns = soa_fields[0] #This is a nameserver _hostname_; would need manual conversion to an IP to store in ns_for. email_addr = soa_fields[1].replace('.', '@', 1) if not data_tree.has_key(domain): data_tree[domain] = {} data_tree[domain]['contact_info'] = [] if email_addr and not email_addr in data_tree[domain]['contact_info']: data_tree[domain]['contact_info'].append(email_addr) elif Fields[1] == '0.0.0.0' and Fields[2] == 'TXT': pass else: fail(InLine) elif Fields[0] == "DO": if Fields[1] == '0.0.0.0' and Fields[2] == 'reputation': domain = Fields[3].rstrip('.') rep_string = Fields[4] if rep_string: if not data_tree.has_key(domain): data_tree[domain] = {} if not data_tree[domain].has_key('reputation'): data_tree[domain]['reputation'] = [] if not rep_string in data_tree[domain]['reputation']: data_tree[domain]['reputation'].append(rep_string) elif Fields[0] == "NA": if Fields[2] in ('PTR', 'DHCP'): if Fields[4]: service_string = Fields[3] + ';' + Fields[4] else: service_string = Fields[3] if not service_string in data_tree[clean_ip]['general_names']: data_tree[clean_ip]['general_names'].append(service_string) else: fail(InLine) elif Fields[0] == "IP" and Fields[2] in ("IP", "service_banner"): if Fields[4] and Fields[4] != 'p0f failure': service_string = Fields[3] + ';' + Fields[4] else: service_string = Fields[3] if not service_string in data_tree[clean_ip]['ip_details']: data_tree[clean_ip]['ip_details'].append(service_string) elif Fields[0] == "PE" and Fields[2] == "traceroute" and Fields[3] in ('is_beyond', 'precedes'): peer_ip_obj = ip_addr_obj(Fields[4]) if peer_ip_obj is None: continue peer_ip = explode_ip(peer_ip_obj) populate_dict(peer_ip) if not peer_ip in data_tree[clean_ip]['peers']: data_tree[clean_ip]['peers'].append(peer_ip) if not clean_ip in data_tree[peer_ip]['peers']: data_tree[peer_ip]['peers'].append(clean_ip) elif Fields[0] in ("US", "UD"): if Fields[3] == 'open': if Fields[4]: service_string = Fields[2] + ';' + Fields[4] else: service_string = Fields[2] if not service_string in data_tree[clean_ip]['open_server_ports']: data_tree[clean_ip]['open_server_ports'].append(service_string) elif Fields[3] == 'closed': if report_closed_server_ports: if not Fields[2] in data_tree[clean_ip]['closed_server_ports']: data_tree[clean_ip]['closed_server_ports'].append(Fields[2]) else: fail(InLine) elif Fields[0] == "UC": if Fields[3] == 'open': if Fields[4]: service_string = Fields[2] + ';' + Fields[4] else: service_string = Fields[2] if not service_string in data_tree[clean_ip]['client_ports']: data_tree[clean_ip]['client_ports'].append(service_string) else: fail(InLine) elif Fields[0] == "TS": if Fields[3] == 'listening': if Fields[4]: service_string = Fields[2] + ';' + Fields[4] else: service_string = Fields[2] if not service_string in data_tree[clean_ip]['open_server_ports']: data_tree[clean_ip]['open_server_ports'].append(service_string) elif Fields[3] == 'closed': if report_closed_server_ports: if not Fields[2] in data_tree[clean_ip]['closed_server_ports']: data_tree[clean_ip]['closed_server_ports'].append(Fields[2]) elif Fields[3] == 'unknown': if Fields[4]: service_string = Fields[4] if not service_string in data_tree[clean_ip]['open_server_ports']: data_tree[clean_ip]['open_server_ports'].append(service_string) else: fail(InLine) elif Fields[0] == "TC": if Fields[3] == 'open': if Fields[4]: service_string = Fields[2] + ';' + Fields[4] else: service_string = Fields[2] if not service_string in data_tree[clean_ip]['client_ports']: data_tree[clean_ip]['client_ports'].append(service_string) else: fail(InLine) elif Fields[0] == "RO": if Fields[3] == 'router': data_tree[clean_ip]['is_router'] = True if Fields[4] and not Fields[4] in data_tree[clean_ip]['router_details']: data_tree[clean_ip]['router_details'].append(Fields[4]) else: fail(InLine) elif Fields[0] == "MA": if Fields[2] == 'Ethernet': if Fields[4]: service_string = Fields[3] + ';' + Fields[4] else: service_string = Fields[3] if not service_string in data_tree[clean_ip]['mac_addrs']: data_tree[clean_ip]['mac_addrs'].append(service_string) else: fail(InLine) elif Fields[0] == "GE": if Fields[2] in ("CC", "COUNTRY", "CSC"): #GE,10.0.0.0,CC,NU, pass else: fail(InLine) elif Fields[0] in ("NB", "NE"): pass else: fail(InLine) #debug(str(data_tree)) print(json.dumps(data_tree, sort_keys=True)) ##Check for both stdins and exit #if args['stdin-files'] and args['stdin-lines']: # print("Can't have both --stdin-files and --stdin-lines, exiting.") # sys.exit(1) #filenames = [] #This will hold tuples; [0] is the path, [1] is the filename #if args['file'] is not None: # for one_file in args['file']: # filenames.append(os.path.split(one_file)) #if args['stdin-files']: # for one_file in sys.stdin: # filenames.append(os.path.split(one_file.replace('\n', ''))) #We test later to see if these are valid files. #if len(filenames) == 0 and (not args['stdin-lines']): # debug("No filenames supplied and no request to read lines from stdin, aborting.") # sys.exit(1) #elif len(filenames) > 0 and args['stdin-lines']: # debug("Filenames supplied and also request to read lines from stdin, aborting.") # sys.exit(1) #if args['stdin-lines']: # filenames = [('', '-')] #if args['target'] is not None: # if os.path.isdir(str(args['target'])) and os.access(str(args['target']), os.W_OK): # dest_dir = args['target'] + '/' # else: # print(str(args['target']) + " does not appear to be writable, exiting.") # sys.exit(1) #else: # dest_dir = '' #verbose = args['verbose'] #for one_file_tuple in filenames: # one_path = one_file_tuple[0] # one_file = one_file_tuple[1] # full_path = os.path.join(one_path, one_file) # if one_file is None or one_file == '': # if verbose: # debug("Skipping directory " + one_path) # elif os.path.islink(full_path): # if verbose: # debug("Skipping symlink " + full_path) # elif os.path.isfile(full_path): # convert_csv_to_json(one_path, one_file, dest_dir, ".json") # else: # if verbose: # debug("Skipping " + full_path + " as it does not appear to be a file.")