]> code.communitydata.science - ml_measurement_error_public.git/blobdiff - civil_comments/get_perspective_scores.py
add stuff to get perspective scores from civil comments
[ml_measurement_error_public.git] / civil_comments / get_perspective_scores.py
diff --git a/civil_comments/get_perspective_scores.py b/civil_comments/get_perspective_scores.py
new file mode 100644 (file)
index 0000000..e8e542b
--- /dev/null
@@ -0,0 +1,38 @@
+from googleapiclient import discovery
+import json
+import csv
+from pathlib import Path
+
+from time import sleep
+
+from itertools import islice
+
+API_KEY = open('perspective_api_key').read()
+
+client = discovery.build("commentanalyzer","v1alpha",developerKey=API_KEY,discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",static_discovery=False,)
+
+csvreader = csv.DictReader(open("all_data.csv"))
+
+outfile = Path("perspective_results.json")
+already_scored = set()
+if outfile.exists():
+    already_scored = set([json.loads(l)['id'] for l in open(str(outfile),'r')])
+
+with open("perspective_results.json",'w') as of:
+    for line in csvreader:
+        if line['id'] not in already_scored:
+            analyze_request = {'comment':{'text':line['comment_text']},
+                               'languages':['en'],
+                               'requestedAttributes':{'TOXICITY':{},
+                                                      "SEVERE_TOXICITY":{},
+                                                      "IDENTITY_ATTACK":{},
+                                                      "INSULT":{},
+                                                      "PROFANITY":{},
+                                                      "THREAT":{}}}
+            response = client.comments().analyze(body=analyze_request).execute()
+            response['id'] = line['id']
+            result = json.dumps(response)
+            of.write(result + '\n')
+            of.flush()
+
+            sleep(0.10)

Community Data Science Collective || Want to submit a patch?