X-Git-Url: https://code.communitydata.science/covid19.git/blobdiff_plain/ff96d52cb92966dd0c487e6452aeab70773cf3e6..cfe21254d9b026cd99884a2b274a78622b057637:/cron-wikipedia_views.sh diff --git a/cron-wikipedia_views.sh b/cron-wikipedia_views.sh index 3611657..851cc25 100644 --- a/cron-wikipedia_views.sh +++ b/cron-wikipedia_views.sh @@ -1,16 +1,27 @@ #!/bin/bash -x TZ="UTC" -date_string=$(date +%Y%m%d) +date_string=${OVERRIDE_DATE_STRING:-$(date +%Y%m%d)} -./wikipedia/scripts/wikiproject_scraper.py 2> >(tee wikipedia/logs/enwp-wikiproject_scraper-${date_string}.log) +view_log="daily_views-${date_string}.log" +./wikipedia/scripts/wikiproject_scraper.py 2> >(tee wikipedia/logs/${view_log}) + +wd_log="wd-page-crawler-${date_string}.log" +python3 ./real-time-wiki-covid-tracker/PageCrawler.py -a "./wikipedia/resources/enwp_wikiproject_covid19_articles.txt" 2> >(tee wikipedia/logs/${wd_log}) # get the list of files -view_log="enwp-daily_views-${date_string}.log" -./wikipedia/scripts/fetch_enwiki_daily_views.py 2> >(tee wikipedia/logs/${view_log}) +./wikipedia/scripts/fetch_daily_views.py -d "${date_string}" 2> >(tee -a wikipedia/logs/${view_log}) mv wikipedia/logs/${view_log} /var/www/covid19/wikipedia/logs/${view_log} -mv wikipedia/data/digobs_covid19-wikipedia-enwiki_dailyviews-${date_string}.tsv /var/www/covid19/wikipedia/ -# xz wikipedia/data/digobs_covid19-wikipedia-enwiki_dailyviews-${date_string}.json -mv wikipedia/data/digobs_covid19-wikipedia-enwiki_dailyviews-${date_string}.json /var/www/covid19/wikipedia/ +cd wikipedia/data +find */${date_string}/*dailyviews*.tsv | while read line; do + mkdir -p /var/www/covid19/wikipedia/$line + mv $line /var/www/covid19/wikipedia/$line +done + +find */${date_string}/*dailyviews*.json | while read line; do + mkdir -p /var/www/covid19/wikipedia/$line + mv $line /var/www/covid19/wikipedia/$line +done +cd ../..