import json
import argparse
import csv

def main():

    parser = argparse.ArgumentParser(description='Generate paper to affiliation mapping file from abstracts file')
    parser.add_argument('-i', help='Abstract file')
    parser.add_argument('-o', help='TSV output file')
    args = parser.parse_args()

    with open(args.i, 'r') as i:
        with open(args.o, 'w') as o:
            output = csv.writer(o, delimiter='\t')
            output.writerow(['paper_eid','affiliation_id',
                'organization','country'])
            for line in i:
                entries = get_entries(line)
                for entry in entries:
                    output.writerow(entry)


def get_entries(l):
    json_response = json.loads(l)
    full = json_response['abstracts-retrieval-response']
    head = full['item']['bibrecord']['head']
    eid = full['coredata']['eid']
    countries = get_aff_info(head, 'country')
    affiliation_ids = get_aff_info(head, '@afid')
    org_names = get_aff_info(head, 'organization')
    if countries:
        result = [[eid, affiliation_ids[i], org_names[i], countries[i]]
                for i in range(len(countries))]
        return result
    return []

def get_aff_info(head, affiliation_key):
    aff_info = []
    try:
        affiliations = make_list(head['author-group'])
    except KeyError:
        return None
    for x in affiliations:
        if x is None:
            continue
        try:
            curr_inst = x['affiliation'][affiliation_key]
            # May return a string or a list. If it's a list, then 
            # return the final value of that list (This is the base organization)
            if isinstance(curr_inst, list):
                curr_inst = [x['$'] for x in curr_inst][-1]
            aff_info.append(curr_inst)
        except KeyError:
            # If there isn't affiliation info for these authors, return empty str
            aff_info.append('')
    return aff_info

def make_list(list_or_dict):
    return list_or_dict if isinstance(list_or_dict, list) else [list_or_dict]

if __name__ == '__main__':
    main()