]> code.communitydata.science - covid19.git/commitdiff
Merge pull request #17 from makoshark/master
authorKaylea Champion <khascall@gmail.com>
Thu, 2 Apr 2020 21:18:05 +0000 (14:18 -0700)
committerGitHub <noreply@github.com>
Thu, 2 Apr 2020 21:18:05 +0000 (14:18 -0700)
changes to support historical view data

cron-wikipedia_revisions.sh
cron-wikipedia_views.sh
wikipedia/scripts/fetch_enwiki_daily_views.py

index a53bc3f7009a6779c20fe86b4e55fb6d407d608b..5ba02ed43d92212c9124542b37c32a7362f1c099 100644 (file)
@@ -3,10 +3,10 @@
 TZ="UTC"
 date_string=$(date +%Y%m%d)
 
-./wikipedia/scripts/wikiproject_scraper.py 2> >(tee wikipedia/logs/enwp-wikiproject_scraper-${date_string}.log)
-
 revs_log="enwp-revisions-${date_string}.log"
-./wikipedia/scripts/fetch_enwiki_revisions.py 2> >(tee wikipedia/logs/${rev_log})
+./wikipedia/scripts/wikiproject_scraper.py 2> >(tee wikipedia/logs/${revs_log})
+
+./wikipedia/scripts/fetch_enwiki_revisions.py 2> >(tee -a wikipedia/logs/${revs_log})
 mv wikipedia/logs/${revs_log} /var/www/covid19/wikipedia/logs/
 
 revs_tsv="digobs_covid19-wikipedia-enwiki_revisions-${date_string}.tsv"
index 361165729831f34f9e458573a0039575099d3f94..4afe380ee1a75902e5e019061924c52dee6d4c78 100644 (file)
@@ -1,13 +1,13 @@
 #!/bin/bash -x
 
 TZ="UTC"
-date_string=$(date +%Y%m%d)
+date_string=${OVERRIDE_DATE_STRING:-$(date +%Y%m%d)}
 
-./wikipedia/scripts/wikiproject_scraper.py 2> >(tee wikipedia/logs/enwp-wikiproject_scraper-${date_string}.log)
+view_log="enwp-daily_views-${date_string}.log"
+./wikipedia/scripts/wikiproject_scraper.py 2> >(tee wikipedia/logs/${view_log})
 
 # get the list of files
-view_log="enwp-daily_views-${date_string}.log"
-./wikipedia/scripts/fetch_enwiki_daily_views.py 2> >(tee wikipedia/logs/${view_log})
+./wikipedia/scripts/fetch_enwiki_daily_views.py -d "${date_string}" 2> >(tee -a wikipedia/logs/${view_log})
 mv wikipedia/logs/${view_log} /var/www/covid19/wikipedia/logs/${view_log}
 mv wikipedia/data/digobs_covid19-wikipedia-enwiki_dailyviews-${date_string}.tsv /var/www/covid19/wikipedia/
 
index 829343de318c9aaf906350d2ad57880482547ce8..9f147e07a30cc20e78b27ec1afb71c64be582b1e 100755 (executable)
@@ -58,8 +58,8 @@ def main():
     logging.info(f"Last commit: {digobs.git_hash()}")
 
     #1 Load up the list of article names
-    j_outfilename = os.path.join(outputPath, f"digobs_covid19-wikipedia-enwiki_dailyviews-{export_date}.json")
-    t_outfilename = os.path.join(outputPath, f"digobs_covid19-wikipedia-enwiki_dailyviews-{export_date}.tsv")
+    j_outfilename = os.path.join(outputPath, f"digobs_covid19-wikipedia-enwiki_dailyviews-{query_date}.json")
+    t_outfilename = os.path.join(outputPath, f"digobs_covid19-wikipedia-enwiki_dailyviews-{query_date}.tsv")
 
     with open(articleFile, 'r') as infile:
         articleList = list(map(str.strip, infile))
@@ -82,6 +82,7 @@ def main():
             else:
                 failure = failure + 1
                 logging.warning(f"Failure: {response.status_code} from {url}")
+                continue
 
             # start writing the CSV File if it doesn't exist yet
             try:

Community Data Science Collective || Want to submit a patch?