From e17a52e23619aff74eebc144c74514f7b02d093e Mon Sep 17 00:00:00 2001 From: Nathan TeBlunthuis Date: Wed, 2 Nov 2022 17:45:35 -0700 Subject: [PATCH] add stuff to get perspective scores from civil comments --- civil_comments/all_data.csv | 1 + civil_comments/get_perspective_scores.py | 38 ++++++++++++++++++ .../identity_individual_annotations.csv | 1 + civil_comments/perspective_api_key.gpg | Bin 0 -> 128 bytes .../toxicity_individual_annotations.csv | 1 + 5 files changed, 41 insertions(+) create mode 120000 civil_comments/all_data.csv create mode 100644 civil_comments/get_perspective_scores.py create mode 120000 civil_comments/identity_individual_annotations.csv create mode 100644 civil_comments/perspective_api_key.gpg create mode 120000 civil_comments/toxicity_individual_annotations.csv diff --git a/civil_comments/all_data.csv b/civil_comments/all_data.csv new file mode 120000 index 0000000..e30ea0d --- /dev/null +++ b/civil_comments/all_data.csv @@ -0,0 +1 @@ +../.git/annex/objects/6v/fJ/SHA256E-s916052376--a85b5ba7e9a8cda38b91ea6e3957a4f2bfff17bb52f22c935595cbe47cc54d94.csv/SHA256E-s916052376--a85b5ba7e9a8cda38b91ea6e3957a4f2bfff17bb52f22c935595cbe47cc54d94.csv \ No newline at end of file diff --git a/civil_comments/get_perspective_scores.py b/civil_comments/get_perspective_scores.py new file mode 100644 index 0000000..e8e542b --- /dev/null +++ b/civil_comments/get_perspective_scores.py @@ -0,0 +1,38 @@ +from googleapiclient import discovery +import json +import csv +from pathlib import Path + +from time import sleep + +from itertools import islice + +API_KEY = open('perspective_api_key').read() + +client = discovery.build("commentanalyzer","v1alpha",developerKey=API_KEY,discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",static_discovery=False,) + +csvreader = csv.DictReader(open("all_data.csv")) + +outfile = Path("perspective_results.json") +already_scored = set() +if outfile.exists(): + already_scored = set([json.loads(l)['id'] for l in open(str(outfile),'r')]) + +with open("perspective_results.json",'w') as of: + for line in csvreader: + if line['id'] not in already_scored: + analyze_request = {'comment':{'text':line['comment_text']}, + 'languages':['en'], + 'requestedAttributes':{'TOXICITY':{}, + "SEVERE_TOXICITY":{}, + "IDENTITY_ATTACK":{}, + "INSULT":{}, + "PROFANITY":{}, + "THREAT":{}}} + response = client.comments().analyze(body=analyze_request).execute() + response['id'] = line['id'] + result = json.dumps(response) + of.write(result + '\n') + of.flush() + + sleep(0.10) diff --git a/civil_comments/identity_individual_annotations.csv b/civil_comments/identity_individual_annotations.csv new file mode 120000 index 0000000..20c95ea --- /dev/null +++ b/civil_comments/identity_individual_annotations.csv @@ -0,0 +1 @@ +../.git/annex/objects/qP/Xw/SHA256E-s106388260--7b8e9f21c5110d32e337137f8b4fe50987ec1b59fdbfd56a4717cdc13e509ec3.csv/SHA256E-s106388260--7b8e9f21c5110d32e337137f8b4fe50987ec1b59fdbfd56a4717cdc13e509ec3.csv \ No newline at end of file diff --git a/civil_comments/perspective_api_key.gpg b/civil_comments/perspective_api_key.gpg new file mode 100644 index 0000000000000000000000000000000000000000..fbeda15fc99df6aca7500f768530743b420e0db6 GIT binary patch literal 128 zcmV-`0Du3C4Fm@R0{mA~uT zU9gcNIn74!g|6;B_TjC^(21Nfp|Ndu!o8JZE^r+>3rRy}I~p4dV*;(_I?M_IVSv{s i(q(yv8El}iR$4=aLOTGPF9rX8hOr_yJvV#N$7+o4g*T!A literal 0 HcmV?d00001 diff --git a/civil_comments/toxicity_individual_annotations.csv b/civil_comments/toxicity_individual_annotations.csv new file mode 120000 index 0000000..b02f3cb --- /dev/null +++ b/civil_comments/toxicity_individual_annotations.csv @@ -0,0 +1 @@ +../.git/annex/objects/FF/WZ/SHA256E-s417648663--c85bda15b964a24869ae11f76092bde6f4b18236dd1cbe17539526b3b5b736cf.csv/SHA256E-s417648663--c85bda15b964a24869ae11f76092bde6f4b18236dd1cbe17539526b3b5b736cf.csv \ No newline at end of file -- 2.39.2