]> code.communitydata.science - covid19.git/commitdiff
update cron scripts with new data format
authorNathan TeBlunthuis <nathante@uw.edu>
Sat, 4 Apr 2020 22:20:34 +0000 (15:20 -0700)
committerNathan TeBlunthuis <nathante@uw.edu>
Sat, 4 Apr 2020 22:20:34 +0000 (15:20 -0700)
cron-wikipedia_revisions.sh
cron-wikipedia_views.sh

index 5ba02ed43d92212c9124542b37c32a7362f1c099..555fd46197838b327b9b2a5a2acf6a78f7bf088b 100644 (file)
@@ -6,12 +6,25 @@ date_string=$(date +%Y%m%d)
 revs_log="enwp-revisions-${date_string}.log"
 ./wikipedia/scripts/wikiproject_scraper.py 2> >(tee wikipedia/logs/${revs_log})
 
-./wikipedia/scripts/fetch_enwiki_revisions.py 2> >(tee -a wikipedia/logs/${revs_log})
+wd_log="wd-page-crawler-${date_string}.log"
+python3 ./real-time-wiki-covid-tracker/PageCrawler.py -a "./wikipedia/resources/enwp_wikiproject_covid19_articles.txt" 2> >(tee wikipedia/logs/${wd_log})
+
+./wikipedia/scripts/fetch_revisions.py 2> >(tee -a wikipedia/logs/${revs_log})
 mv wikipedia/logs/${revs_log} /var/www/covid19/wikipedia/logs/
 
-revs_tsv="digobs_covid19-wikipedia-enwiki_revisions-${date_string}.tsv"
-mv wikipedia/data/${revs_tsv} /var/www/covid19/wikipedia
+python3 ./wikipedia/scripts/copy_revisions_data.py ${date_string}
+
+cd wikipedia/data
+xz */${date_string}/*revisions*.json
+
+find */${date_string}/*revisions*.xz | while read line; do
+    mkdir -p /var/www/covid9/wikipedia/$line
+    mv $line /var/www/covid19/wikipedia/$line
+done
+
+find */${date_string}/*revisions*.tsv | while read line; do
+    mkdir -p /var/www/covid19/wikipedia/$line
+    mv $line /var/www/covid19/wikipedia/$line
+done
 
-revs_json="digobs_covid19-wikipedia-enwiki_revisions-${date_string}.json"
-xz wikipedia/data/${revs_json}
-mv wikipedia/data/${revs_json}.xz /var/www/covid19/wikipedia
+cd ../..
index 4afe380ee1a75902e5e019061924c52dee6d4c78..851cc25a813240303d8f9c0c68a7023f0821fe63 100644 (file)
@@ -3,14 +3,25 @@
 TZ="UTC"
 date_string=${OVERRIDE_DATE_STRING:-$(date +%Y%m%d)}
 
-view_log="enwp-daily_views-${date_string}.log"
+view_log="daily_views-${date_string}.log"
 ./wikipedia/scripts/wikiproject_scraper.py 2> >(tee wikipedia/logs/${view_log})
 
+wd_log="wd-page-crawler-${date_string}.log"
+python3 ./real-time-wiki-covid-tracker/PageCrawler.py -a "./wikipedia/resources/enwp_wikiproject_covid19_articles.txt" 2> >(tee wikipedia/logs/${wd_log})
+
 # get the list of files
-./wikipedia/scripts/fetch_enwiki_daily_views.py -d "${date_string}" 2> >(tee -a wikipedia/logs/${view_log})
+./wikipedia/scripts/fetch_daily_views.py -d "${date_string}" 2> >(tee -a wikipedia/logs/${view_log})
 mv wikipedia/logs/${view_log} /var/www/covid19/wikipedia/logs/${view_log}
-mv wikipedia/data/digobs_covid19-wikipedia-enwiki_dailyviews-${date_string}.tsv /var/www/covid19/wikipedia/
 
-# xz wikipedia/data/digobs_covid19-wikipedia-enwiki_dailyviews-${date_string}.json
-mv wikipedia/data/digobs_covid19-wikipedia-enwiki_dailyviews-${date_string}.json /var/www/covid19/wikipedia/
+cd wikipedia/data
+find */${date_string}/*dailyviews*.tsv | while read line; do
+    mkdir -p /var/www/covid19/wikipedia/$line
+    mv $line /var/www/covid19/wikipedia/$line
+done
+
+find */${date_string}/*dailyviews*.json | while read line; do
+    mkdir -p /var/www/covid19/wikipedia/$line
+    mv $line /var/www/covid19/wikipedia/$line
+done
 
+cd ../..

Community Data Science Collective || Want to submit a patch?