]> code.communitydata.science - ml_measurement_error_public.git/blob - civil_comments/get_perspective_scores.py
e8e542b9bec110f051e3384ad958489b9b0e31b1
[ml_measurement_error_public.git] / civil_comments / get_perspective_scores.py
1 from googleapiclient import discovery
2 import json
3 import csv
4 from pathlib import Path
5
6 from time import sleep
7
8 from itertools import islice
9
10 API_KEY = open('perspective_api_key').read()
11
12 client = discovery.build("commentanalyzer","v1alpha",developerKey=API_KEY,discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",static_discovery=False,)
13
14 csvreader = csv.DictReader(open("all_data.csv"))
15
16 outfile = Path("perspective_results.json")
17 already_scored = set()
18 if outfile.exists():
19     already_scored = set([json.loads(l)['id'] for l in open(str(outfile),'r')])
20
21 with open("perspective_results.json",'w') as of:
22     for line in csvreader:
23         if line['id'] not in already_scored:
24             analyze_request = {'comment':{'text':line['comment_text']},
25                                'languages':['en'],
26                                'requestedAttributes':{'TOXICITY':{},
27                                                       "SEVERE_TOXICITY":{},
28                                                       "IDENTITY_ATTACK":{},
29                                                       "INSULT":{},
30                                                       "PROFANITY":{},
31                                                       "THREAT":{}}}
32             response = client.comments().analyze(body=analyze_request).execute()
33             response['id'] = line['id']
34             result = json.dumps(response)
35             of.write(result + '\n')
36             of.flush()
37
38             sleep(0.10)

Community Data Science Collective || Want to submit a patch?