reorganizes comments

[covid19.git] / bin / fetch_daily_views.py
diff --git a/bin/fetch_daily_views.py b/bin/fetch_daily_views.py

index 7ce9c5ac9597bca4dad0abc8cc28cd34d5a8b5d2..724cf6b46eb549c3940ac01d5a1875c1267a24c1 100755 (executable)
--- a/bin/fetch_daily_views.py
+++ b/bin/fetch_daily_views.py
@@ -5,19 +5,11 @@
  # This script assumes the presence of the COVID-19 repo.
  # 
  # It (1) reads in the article list and then (2) calls the Wikimedia API to 
-# fetch view information for each article. Output is to a (3) JSON, TSV, and 
-# Feather file.
-#
+# fetch view information for each article. Output is to (3) JSON and TSV.
  #
  ###############################################################################
  
  
-#1 Load up the list of article names
-
-#2 Repeatedly call the API with that list of names
-
-#3 Save results as a TSV
-
  import requests
  import argparse
  import json
@@ -25,6 +17,7 @@ import csv
  import time
  import os.path
  import datetime
+#import feather
  
  
  
@@ -56,6 +49,8 @@ def main():
  
  
      articleList = []
+#1 Load up the list of article names
+
      with open(articleFile, 'r') as infileHandle:
          #theInfile = csv.reader(infileHandle, quotechar='"')
          theInfile = csv.reader(infileHandle)
@@ -64,44 +59,39 @@ def main():
              articleList.append(currentLine)
  
      j_Out = outputPath + "dailyviews" + queryDate + ".json"
-    with open(j_Out, 'w') as outfile:
-        outfile.write("[")
+    t_Out = outputPath + "dailyviews" + queryDate + ".tsv"
+
+    j = []
  
      i = 0 #iterator to deal with end of file
  
+#2 Repeatedly call the API with that list of names
+
      for a in articleList:
          a = a[0] #destringify
          i = i+1
          url= "https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/all-agents/"
          url= url + a + "/daily/" + queryDate + "/" + queryDate #for now, single date at a time
-
-
          response = requests.get(url)
          if response.ok:
+            jd = json.loads(response.content)
+            j.append(jd["items"][0])
+            time.sleep(.1)
  
-            #do json entry
-            j=json.loads(response.content)
-            with open(j_Out, 'a') as j_outfile: 
-                json.dump(j, j_outfile)
-                if i < len(articleList):
-                    j_outfile.write(",\n")
-                else: #at end of file
-                    j_outfile.write("\n")
-
-            #do tsv entry
-            #with open(outputPath + "dailyviews" + queryDate + ".tsv", 'a') as t_outfile: 
-            #    dw = csv.DictWriter(t_outfile, sorted(j[0].keys()), delimiter='\t')
-            #    if i==1:
-            #        dw.writeheader()
-            #    dw.writerows(j)
+#3 Save results as a JSON and TSV
  
-            time.sleep(.1)
+    #all data in j now, make json file
+    with open(j_Out, 'w') as j_outfile: 
+        json.dump(j, j_outfile, indent=2)
  
-    with open(j_Out, 'a') as j_outfile:
-        j_outfile.write("]")
+    with open(t_Out, 'w') as t_outfile:
+        dw = csv.DictWriter(t_outfile, sorted(j[0].keys()), delimiter='\t')
+        dw.writeheader()
+        dw.writerows(j)
  
-    #read the json back in and make a feather file?
  
+    f_Out = outputPath + "dailyviews" + queryDate + ".feather"
+    #read the json back in and make a feather file? 
  
  
  if __name__ == "__main__":