]> code.communitydata.science - social-media-chapter.git/blob - code/data_processing/03_make_paper_aff_table.py
initial import of material for public archive into git
[social-media-chapter.git] / code / data_processing / 03_make_paper_aff_table.py
1 import json
2 import argparse
3 import csv
4
5 def main():
6
7     parser = argparse.ArgumentParser(description='Generate paper to affiliation mapping file from abstracts file')
8     parser.add_argument('-i', help='Abstract file')
9     parser.add_argument('-o', help='TSV output file')
10     args = parser.parse_args()
11
12     with open(args.i, 'r') as i:
13         with open(args.o, 'w') as o:
14             output = csv.writer(o, delimiter='\t')
15             output.writerow(['paper_eid','affiliation_id',
16                 'organization','country'])
17             for line in i:
18                 entries = get_entries(line)
19                 for entry in entries:
20                     output.writerow(entry)
21
22
23 def get_entries(l):
24     json_response = json.loads(l)
25     full = json_response['abstracts-retrieval-response']
26     head = full['item']['bibrecord']['head']
27     eid = full['coredata']['eid']
28     countries = get_aff_info(head, 'country')
29     affiliation_ids = get_aff_info(head, '@afid')
30     org_names = get_aff_info(head, 'organization')
31     if countries:
32         result = [[eid, affiliation_ids[i], org_names[i], countries[i]]
33                 for i in range(len(countries))]
34         return result
35     return []
36
37 def get_aff_info(head, affiliation_key):
38     aff_info = []
39     try:
40         affiliations = make_list(head['author-group'])
41     except KeyError:
42         return None
43     for x in affiliations:
44         if x is None:
45             continue
46         try:
47             curr_inst = x['affiliation'][affiliation_key]
48             # May return a string or a list. If it's a list, then 
49             # return the final value of that list (This is the base organization)
50             if isinstance(curr_inst, list):
51                 curr_inst = [x['$'] for x in curr_inst][-1]
52             aff_info.append(curr_inst)
53         except KeyError:
54             # If there isn't affiliation info for these authors, return empty str
55             aff_info.append('')
56     return aff_info
57
58 def make_list(list_or_dict):
59     return list_or_dict if isinstance(list_or_dict, list) else [list_or_dict]
60
61 if __name__ == '__main__':
62     main()

Community Data Science Collective || Want to submit a patch?