]> code.communitydata.science - covid19.git/blob - cron-wikipedia_revisions.sh
Merge pull request #20 from makoshark/master
[covid19.git] / cron-wikipedia_revisions.sh
1 #!/bin/bash -x
2
3 WORKING_DIR="/home/SOC.NORTHWESTERN.EDU/bmh1867/covid19"
4 cd $WORKING_DIR
5
6 TZ="UTC"
7 date_string=$(date +%Y%m%d)
8
9 revs_log="enwp-revisions-${date_string}.log"
10 ./wikipedia/scripts/wikiproject_scraper.py 2> >(tee wikipedia/logs/${revs_log})
11
12 ./wikipedia/scripts/fetch_enwiki_revisions.py 2> >(tee -a wikipedia/logs/${revs_log})
13 mv wikipedia/logs/${revs_log} /var/www/covid19/wikipedia/logs/
14
15 revs_tsv="digobs_covid19-wikipedia-enwiki_revisions-${date_string}.tsv"
16 mv wikipedia/data/${revs_tsv} /var/www/covid19/wikipedia
17
18 revs_json="digobs_covid19-wikipedia-enwiki_revisions-${date_string}.json"
19 xz wikipedia/data/${revs_json}
20 mv wikipedia/data/${revs_json}.xz /var/www/covid19/wikipedia
21

Community Data Science Collective || Want to submit a patch?