From e71b896cecbd7ecc3934ad5ba99cadac30c144e4 Mon Sep 17 00:00:00 2001 From: Kaylea Champion Date: Fri, 27 Mar 2020 18:08:43 -0700 Subject: [PATCH 1/1] makes TSV makes JSON --- bin/fetch_daily_views.py | 43 ++++++++++++++++------------------------ 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/bin/fetch_daily_views.py b/bin/fetch_daily_views.py index 7ce9c5a..ac925ed 100755 --- a/bin/fetch_daily_views.py +++ b/bin/fetch_daily_views.py @@ -5,9 +5,7 @@ # This script assumes the presence of the COVID-19 repo. # # It (1) reads in the article list and then (2) calls the Wikimedia API to -# fetch view information for each article. Output is to a (3) JSON, TSV, and -# Feather file. -# +# fetch view information for each article. Output is to (3) JSON and TSV. # ############################################################################### @@ -25,6 +23,7 @@ import csv import time import os.path import datetime +#import feather @@ -64,8 +63,9 @@ def main(): articleList.append(currentLine) j_Out = outputPath + "dailyviews" + queryDate + ".json" - with open(j_Out, 'w') as outfile: - outfile.write("[") + t_Out = outputPath + "dailyviews" + queryDate + ".tsv" + + j = [] i = 0 #iterator to deal with end of file @@ -74,34 +74,25 @@ def main(): i = i+1 url= "https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/all-agents/" url= url + a + "/daily/" + queryDate + "/" + queryDate #for now, single date at a time - - response = requests.get(url) if response.ok: + jd = json.loads(response.content) + j.append(jd["items"][0]) + time.sleep(.1) - #do json entry - j=json.loads(response.content) - with open(j_Out, 'a') as j_outfile: - json.dump(j, j_outfile) - if i < len(articleList): - j_outfile.write(",\n") - else: #at end of file - j_outfile.write("\n") - - #do tsv entry - #with open(outputPath + "dailyviews" + queryDate + ".tsv", 'a') as t_outfile: - # dw = csv.DictWriter(t_outfile, sorted(j[0].keys()), delimiter='\t') - # if i==1: - # dw.writeheader() - # dw.writerows(j) - time.sleep(.1) + #all data in j now, make json file + with open(j_Out, 'w') as j_outfile: + json.dump(j, j_outfile, indent=2) - with open(j_Out, 'a') as j_outfile: - j_outfile.write("]") + with open(t_Out, 'w') as t_outfile: + dw = csv.DictWriter(t_outfile, sorted(j[0].keys()), delimiter='\t') + dw.writeheader() + dw.writerows(j) - #read the json back in and make a feather file? + f_Out = outputPath + "dailyviews" + queryDate + ".feather" + #read the json back in and make a feather file? if __name__ == "__main__": -- 2.39.5