From e17a52e23619aff74eebc144c74514f7b02d093e Mon Sep 17 00:00:00 2001
From: Nathan TeBlunthuis <nathante@uw.edu>
Date: Wed, 2 Nov 2022 17:45:35 -0700
Subject: [PATCH] add stuff to get perspective scores from civil comments

---
 civil_comments/all_data.csv                   |   1 +
 civil_comments/get_perspective_scores.py      |  38 ++++++++++++++++++
 .../identity_individual_annotations.csv       |   1 +
 civil_comments/perspective_api_key.gpg        | Bin 0 -> 128 bytes
 .../toxicity_individual_annotations.csv       |   1 +
 5 files changed, 41 insertions(+)
 create mode 120000 civil_comments/all_data.csv
 create mode 100644 civil_comments/get_perspective_scores.py
 create mode 120000 civil_comments/identity_individual_annotations.csv
 create mode 100644 civil_comments/perspective_api_key.gpg
 create mode 120000 civil_comments/toxicity_individual_annotations.csv

diff --git a/civil_comments/all_data.csv b/civil_comments/all_data.csv
new file mode 120000
index 0000000..e30ea0d
--- /dev/null
+++ b/civil_comments/all_data.csv
@@ -0,0 +1 @@
+../.git/annex/objects/6v/fJ/SHA256E-s916052376--a85b5ba7e9a8cda38b91ea6e3957a4f2bfff17bb52f22c935595cbe47cc54d94.csv/SHA256E-s916052376--a85b5ba7e9a8cda38b91ea6e3957a4f2bfff17bb52f22c935595cbe47cc54d94.csv
\ No newline at end of file
diff --git a/civil_comments/get_perspective_scores.py b/civil_comments/get_perspective_scores.py
new file mode 100644
index 0000000..e8e542b
--- /dev/null
+++ b/civil_comments/get_perspective_scores.py
@@ -0,0 +1,38 @@
+from googleapiclient import discovery
+import json
+import csv
+from pathlib import Path
+
+from time import sleep
+
+from itertools import islice
+
+API_KEY = open('perspective_api_key').read()
+
+client = discovery.build("commentanalyzer","v1alpha",developerKey=API_KEY,discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",static_discovery=False,)
+
+csvreader = csv.DictReader(open("all_data.csv"))
+
+outfile = Path("perspective_results.json")
+already_scored = set()
+if outfile.exists():
+    already_scored = set([json.loads(l)['id'] for l in open(str(outfile),'r')])
+
+with open("perspective_results.json",'w') as of:
+    for line in csvreader:
+        if line['id'] not in already_scored:
+            analyze_request = {'comment':{'text':line['comment_text']},
+                               'languages':['en'],
+                               'requestedAttributes':{'TOXICITY':{},
+                                                      "SEVERE_TOXICITY":{},
+                                                      "IDENTITY_ATTACK":{},
+                                                      "INSULT":{},
+                                                      "PROFANITY":{},
+                                                      "THREAT":{}}}
+            response = client.comments().analyze(body=analyze_request).execute()
+            response['id'] = line['id']
+            result = json.dumps(response)
+            of.write(result + '\n')
+            of.flush()
+
+            sleep(0.10)
diff --git a/civil_comments/identity_individual_annotations.csv b/civil_comments/identity_individual_annotations.csv
new file mode 120000
index 0000000..20c95ea
--- /dev/null
+++ b/civil_comments/identity_individual_annotations.csv
@@ -0,0 +1 @@
+../.git/annex/objects/qP/Xw/SHA256E-s106388260--7b8e9f21c5110d32e337137f8b4fe50987ec1b59fdbfd56a4717cdc13e509ec3.csv/SHA256E-s106388260--7b8e9f21c5110d32e337137f8b4fe50987ec1b59fdbfd56a4717cdc13e509ec3.csv
\ No newline at end of file
diff --git a/civil_comments/perspective_api_key.gpg b/civil_comments/perspective_api_key.gpg
new file mode 100644
index 0000000000000000000000000000000000000000..fbeda15fc99df6aca7500f768530743b420e0db6
GIT binary patch
literal 128
zcmV-`0Du3C4Fm@R0<NqIJZa=4Ui8v$0e7#pvlt#=+9f@{Adfo$5GFCG4yb>{mA~uT
zU9gcNIn74!g|6;B_TjC^(21Nfp|Ndu!o8JZE^r+>3rRy}I~p4dV*;(_I?M_IVSv{s
i(q(yv8El}iR$4=aLOTGPF9rX8hOr_yJvV#N$7+o4g*T!A

literal 0
HcmV?d00001

diff --git a/civil_comments/toxicity_individual_annotations.csv b/civil_comments/toxicity_individual_annotations.csv
new file mode 120000
index 0000000..b02f3cb
--- /dev/null
+++ b/civil_comments/toxicity_individual_annotations.csv
@@ -0,0 +1 @@
+../.git/annex/objects/FF/WZ/SHA256E-s417648663--c85bda15b964a24869ae11f76092bde6f4b18236dd1cbe17539526b3b5b736cf.csv/SHA256E-s417648663--c85bda15b964a24869ae11f76092bde6f4b18236dd1cbe17539526b3b5b736cf.csv
\ No newline at end of file
-- 
2.39.5