X-Git-Url: https://code.communitydata.science/ml_measurement_error_public.git/blobdiff_plain/d0c5766bdf867a81a2477d2cac1d40812110af90..e17a52e23619aff74eebc144c74514f7b02d093e:/civil_comments/get_perspective_scores.py diff --git a/civil_comments/get_perspective_scores.py b/civil_comments/get_perspective_scores.py new file mode 100644 index 0000000..e8e542b --- /dev/null +++ b/civil_comments/get_perspective_scores.py @@ -0,0 +1,38 @@ +from googleapiclient import discovery +import json +import csv +from pathlib import Path + +from time import sleep + +from itertools import islice + +API_KEY = open('perspective_api_key').read() + +client = discovery.build("commentanalyzer","v1alpha",developerKey=API_KEY,discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",static_discovery=False,) + +csvreader = csv.DictReader(open("all_data.csv")) + +outfile = Path("perspective_results.json") +already_scored = set() +if outfile.exists(): + already_scored = set([json.loads(l)['id'] for l in open(str(outfile),'r')]) + +with open("perspective_results.json",'w') as of: + for line in csvreader: + if line['id'] not in already_scored: + analyze_request = {'comment':{'text':line['comment_text']}, + 'languages':['en'], + 'requestedAttributes':{'TOXICITY':{}, + "SEVERE_TOXICITY":{}, + "IDENTITY_ATTACK":{}, + "INSULT":{}, + "PROFANITY":{}, + "THREAT":{}}} + response = client.comments().analyze(body=analyze_request).execute() + response['id'] = line['id'] + result = json.dumps(response) + of.write(result + '\n') + of.flush() + + sleep(0.10)