]> code.communitydata.science - covid19.git/blobdiff - cron-wikipedia_revisions.sh
Merge branch 'master' of github.com:CommunityDataScienceCollective/COVID-19_Digital_O...
[covid19.git] / cron-wikipedia_revisions.sh
diff --git a/cron-wikipedia_revisions.sh b/cron-wikipedia_revisions.sh
new file mode 100644 (file)
index 0000000..5ba02ed
--- /dev/null
@@ -0,0 +1,17 @@
+#!/bin/bash -x
+
+TZ="UTC"
+date_string=$(date +%Y%m%d)
+
+revs_log="enwp-revisions-${date_string}.log"
+./wikipedia/scripts/wikiproject_scraper.py 2> >(tee wikipedia/logs/${revs_log})
+
+./wikipedia/scripts/fetch_enwiki_revisions.py 2> >(tee -a wikipedia/logs/${revs_log})
+mv wikipedia/logs/${revs_log} /var/www/covid19/wikipedia/logs/
+
+revs_tsv="digobs_covid19-wikipedia-enwiki_revisions-${date_string}.tsv"
+mv wikipedia/data/${revs_tsv} /var/www/covid19/wikipedia
+
+revs_json="digobs_covid19-wikipedia-enwiki_revisions-${date_string}.json"
+xz wikipedia/data/${revs_json}
+mv wikipedia/data/${revs_json}.xz /var/www/covid19/wikipedia

Community Data Science Collective || Want to submit a patch?