import argparse import csv def main(): parser = argparse.ArgumentParser(description='Take the edgelist, and reduce it to just the papers which are in our search') parser.add_argument('-i', help='Full edgelist file') parser.add_argument('-o', help='Edgelist output file') args = parser.parse_args() with open(args.i, 'r') as in_file: i = csv.reader(in_file, delimiter= '\t') next(i) # Discard header # Get the list of nodes to keep nodes = set([x[0] for x in i]) in_file.seek(0) # Start over at the beginning with open(args.o, 'w') as o: output = csv.writer(o, delimiter = '\t') output.writerow(['to','from', 'date']) for line in i: # If the both items are in nodes, then keep the line if line[1] in nodes: output.writerow(line) if __name__ == '__main__': main()