]> code.communitydata.science - covid19.git/blob - cron-wikipedia_revisions.sh
update cron scripts with new data format
[covid19.git] / cron-wikipedia_revisions.sh
1 #!/bin/bash -x
2
3 TZ="UTC"
4 date_string=$(date +%Y%m%d)
5
6 revs_log="enwp-revisions-${date_string}.log"
7 ./wikipedia/scripts/wikiproject_scraper.py 2> >(tee wikipedia/logs/${revs_log})
8
9 wd_log="wd-page-crawler-${date_string}.log"
10 python3 ./real-time-wiki-covid-tracker/PageCrawler.py -a "./wikipedia/resources/enwp_wikiproject_covid19_articles.txt" 2> >(tee wikipedia/logs/${wd_log})
11
12 ./wikipedia/scripts/fetch_revisions.py 2> >(tee -a wikipedia/logs/${revs_log})
13 mv wikipedia/logs/${revs_log} /var/www/covid19/wikipedia/logs/
14
15 python3 ./wikipedia/scripts/copy_revisions_data.py ${date_string}
16
17 cd wikipedia/data
18 xz */${date_string}/*revisions*.json
19
20 find */${date_string}/*revisions*.xz | while read line; do
21     mkdir -p /var/www/covid9/wikipedia/$line
22     mv $line /var/www/covid19/wikipedia/$line
23 done
24
25 find */${date_string}/*revisions*.tsv | while read line; do
26     mkdir -p /var/www/covid19/wikipedia/$line
27     mv $line /var/www/covid19/wikipedia/$line
28 done
29
30 cd ../..

Community Data Science Collective || Want to submit a patch?