from request_functions import * import argparse import json import subprocess def main(): parser = argparse.ArgumentParser(description='Output JSON of abstracts and bibliography of all articles passed in.') parser.add_argument('-i', help='JSON file which includes eids') parser.add_argument('--eid', '-e', help='Single eid') parser.add_argument('-o', help='Where to append JSON results') args = parser.parse_args() if args.eid: eids = [args.eid] elif args.i: with open(args.i, 'r') as f: eids = [json.loads(line)['eid'] for line in f] else: print('Need to either pass in an eid or a json file with eids') # If the script gets interrupted, we need to start where we left off try: errors = [] with open(args.o, 'r') as f: completed_eids = [] for line in f: try: result = json.loads(line) completed_eids.append(result['abstracts-retrieval-response']['coredata']['eid']) except ValueError: errors.append(line) except IOError as e: completed_eids = [] print('{} completed eids'.format(len(completed_eids))) with open(args.o, 'a') as out_file: for eid in eids: if eid not in completed_eids: result = get_abstract(eid) if result: out_file.write(result) out_file.write('\n') else: errors.append(eid) if len(errors) > 0: with open('raw_data/missing_eids.json', 'a') as l: # Add the bad lines from the output file (l.write(e) for e in errors) if __name__ == '__main__': main()