]> code.communitydata.science - covid19.git/blobdiff - cron-wikipedia_views.sh
rename scripts
[covid19.git] / cron-wikipedia_views.sh
index ec3d4ec0ae1bb0dda79e33f2a7a411632d741843..851cc25a813240303d8f9c0c68a7023f0821fe63 100644 (file)
@@ -1,16 +1,27 @@
 #!/bin/bash -x
 
 TZ="UTC"
-date_string=$(date +%Y%m%d)
+date_string=${OVERRIDE_DATE_STRING:-$(date +%Y%m%d)}
 
-./wikipedia/scripts/wikiproject_scraper.py 2> >(tee wikipedia/logs/enwp-wikiproject_scraper-${date_string}.log)
+view_log="daily_views-${date_string}.log"
+./wikipedia/scripts/wikiproject_scraper.py 2> >(tee wikipedia/logs/${view_log})
+
+wd_log="wd-page-crawler-${date_string}.log"
+python3 ./real-time-wiki-covid-tracker/PageCrawler.py -a "./wikipedia/resources/enwp_wikiproject_covid19_articles.txt" 2> >(tee wikipedia/logs/${wd_log})
 
 # get the list of files
-view_log="enwp-daily_views-${date_string}.log"
-./wikipedia/scripts/fetch_enwiki_daily_views.py 2> >(tee wikipedia/logs/${view_log})
-cp wikipedia/logs/${view_log} /var/www/covid19/wikipedia/logs/${view_log}
-cp wikipedia/data/digobs_covid19-wikipedia-enwiki_dailyviews-${date_string}.tsv /var/www/covid19/wikipedia/
+./wikipedia/scripts/fetch_daily_views.py -d "${date_string}" 2> >(tee -a wikipedia/logs/${view_log})
+mv wikipedia/logs/${view_log} /var/www/covid19/wikipedia/logs/${view_log}
+
+cd wikipedia/data
+find */${date_string}/*dailyviews*.tsv | while read line; do
+    mkdir -p /var/www/covid19/wikipedia/$line
+    mv $line /var/www/covid19/wikipedia/$line
+done
 
-# xz wikipedia/data/digobs_covid19-wikipedia-enwiki_dailyviews-${date_string}.json
-cp wikipedia/data/digobs_covid19-wikipedia-enwiki_dailyviews-${date_string}.json /var/www/covid19/wikipedia/
+find */${date_string}/*dailyviews*.json | while read line; do
+    mkdir -p /var/www/covid19/wikipedia/$line
+    mv $line /var/www/covid19/wikipedia/$line
+done
 
+cd ../..

Community Data Science Collective || Want to submit a patch?