from googleapiclient import discovery import json import csv from pathlib import Path from time import sleep from itertools import islice API_KEY = open('perspective_api_key').read() client = discovery.build("commentanalyzer","v1alpha",developerKey=API_KEY,discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",static_discovery=False,) csvreader = csv.DictReader(open("all_data.csv"), dialect='unix') outfile = Path("perspective_results.json") already_scored = set() if outfile.exists(): already_scored = set([json.loads(l)['id'] for l in open(str(outfile),'r')]) print(f"loaded {len(already_scored)} scored comments") with open("perspective_results.json",'a') as of: for line in csvreader: if line['id'] not in already_scored and len(line.get('comment_text','')) > 0: analyze_request = {'comment':{'text':line['comment_text']}, 'languages':['en'], 'requestedAttributes':{'TOXICITY':{}, "SEVERE_TOXICITY":{}, "IDENTITY_ATTACK":{}, "INSULT":{}, "PROFANITY":{}, "THREAT":{}}} response = client.comments().analyze(body=analyze_request).execute() response['id'] = line['id'] result = json.dumps(response) of.write(result + '\n') of.flush() sleep(0.10)