]> code.communitydata.science - social-media-chapter.git/blob - code/data_processing/04_make_paper_subject_table.py
initial import of material for public archive into git
[social-media-chapter.git] / code / data_processing / 04_make_paper_subject_table.py
1 import json
2 import argparse
3 import csv
4
5 def main():
6
7     parser = argparse.ArgumentParser(description='Generate paper to subject mapping file from abstracts file')
8     parser.add_argument('-i', help='Abstract file')
9     parser.add_argument('-o', help='TSV output file')
10     args = parser.parse_args()
11
12     with open(args.i, 'r') as i:
13         with open(args.o, 'w') as o:
14             output = csv.writer(o, delimiter='\t')
15             output.writerow(['paper_eid','subject',
16                 'subject_code'])
17             for line in i:
18                 entries = get_entries(line)
19                 for entry in entries:
20                     output.writerow(entry)
21
22
23 def get_entries(l):
24     json_response = json.loads(l)
25     full = json_response['abstracts-retrieval-response']
26     eid = full['coredata']['eid']
27     subjects = get_subjects(full)
28     # Prepend the eid, and return the subjects
29     return [[eid,s[0],s[1]] for s in subjects]
30     return []
31
32
33 def get_subjects(abstract_response):
34     try:
35         subject_info = make_list(abstract_response['subject-areas']['subject-area'])
36     except KeyError:
37         print(result)
38         raise
39     result = []
40     for s in subject_info:
41         # Get the subject name and code, and append them
42         result.append([s['$'],s['@code']])
43     return result
44
45
46 def make_list(list_or_dict):
47     return list_or_dict if isinstance(list_or_dict, list) else [list_or_dict]
48
49 if __name__ == '__main__':
50     main()

Community Data Science Collective || Want to submit a patch?