]> code.communitydata.science - covid19.git/blob - cron-wikipedia_views.sh
update cron scripts with new data format
[covid19.git] / cron-wikipedia_views.sh
1 #!/bin/bash -x
2
3 TZ="UTC"
4 date_string=${OVERRIDE_DATE_STRING:-$(date +%Y%m%d)}
5
6 view_log="daily_views-${date_string}.log"
7 ./wikipedia/scripts/wikiproject_scraper.py 2> >(tee wikipedia/logs/${view_log})
8
9 wd_log="wd-page-crawler-${date_string}.log"
10 python3 ./real-time-wiki-covid-tracker/PageCrawler.py -a "./wikipedia/resources/enwp_wikiproject_covid19_articles.txt" 2> >(tee wikipedia/logs/${wd_log})
11
12 # get the list of files
13 ./wikipedia/scripts/fetch_daily_views.py -d "${date_string}" 2> >(tee -a wikipedia/logs/${view_log})
14 mv wikipedia/logs/${view_log} /var/www/covid19/wikipedia/logs/${view_log}
15
16 cd wikipedia/data
17 find */${date_string}/*dailyviews*.tsv | while read line; do
18     mkdir -p /var/www/covid19/wikipedia/$line
19     mv $line /var/www/covid19/wikipedia/$line
20 done
21
22 find */${date_string}/*dailyviews*.json | while read line; do
23     mkdir -p /var/www/covid19/wikipedia/$line
24     mv $line /var/www/covid19/wikipedia/$line
25 done
26
27 cd ../..

Community Data Science Collective || Want to submit a patch?