1 from request_functions import *
9 parser = argparse.ArgumentParser(description='Output JSON of all articles which cite the articles passed in')
10 parser.add_argument('-i', help='JSON file which includes eids and citedby-count')
11 parser.add_argument('-o', help='Where to append JSON results')
12 args = parser.parse_args()
14 with open(args.i, 'r') as f:
15 # Make a dictionary of eid:citation count for each line in the file
19 eids[l['eid']] = l['citedby-count']
21 # If the script gets interrupted, we need to start where we left off
23 # Open the output file, and grab all of the eids which are already completed
24 with open(args.o, 'r') as f:
25 completed_eids = [json.loads(l)['parent_eid'] for l in f]
26 # Remove those which came from the last id (since we may have missed some)
27 if len(completed_eids) > 0:
28 last_eid = completed_eids.pop()
29 # Remove all of the lines which came from the last eid
30 subprocess.call(['sed', '-i.bak', '/parent_eid": "{}/d'.format(last_eid), args.o])
31 # Hopefully everything has worked out, because here we blow away the backup
32 remove('{}.bak'.format(args.o))
34 # If the file doesn't exist, then there aren't any completed eids
37 with open(args.o, 'a') as out_file:
38 for eid, citation_count in eids.items():
39 if citation_count != '0' and eid not in completed_eids:
40 get_cited_by(eid, out_file)
42 if __name__ == '__main__':