X-Git-Url: https://code.communitydata.science/covid19.git/blobdiff_plain/5739d1c4042bc57aa8b3da46158704454ae9692d..eae5464fd2d095ab62173c563230aed9d05a5893:/cron-wikipedia_revisions.sh diff --git a/cron-wikipedia_revisions.sh b/cron-wikipedia_revisions.sh index 71ee752..555fd46 100644 --- a/cron-wikipedia_revisions.sh +++ b/cron-wikipedia_revisions.sh @@ -3,15 +3,28 @@ TZ="UTC" date_string=$(date +%Y%m%d) -./wikipedia/scripts/wikiproject_scraper.py 2> >(tee wikipedia/logs/enwp-wikiproject_scraper-${date_string}.log) - revs_log="enwp-revisions-${date_string}.log" -./wikipedia/scripts/fetch_enwiki_revisions.py 2> >(tee wikipedia/logs/${rev_log}) -cp wikipedia/logs/${rev_log} /var/www/covid19/wikipedia/logs/ +./wikipedia/scripts/wikiproject_scraper.py 2> >(tee wikipedia/logs/${revs_log}) + +wd_log="wd-page-crawler-${date_string}.log" +python3 ./real-time-wiki-covid-tracker/PageCrawler.py -a "./wikipedia/resources/enwp_wikiproject_covid19_articles.txt" 2> >(tee wikipedia/logs/${wd_log}) + +./wikipedia/scripts/fetch_revisions.py 2> >(tee -a wikipedia/logs/${revs_log}) +mv wikipedia/logs/${revs_log} /var/www/covid19/wikipedia/logs/ + +python3 ./wikipedia/scripts/copy_revisions_data.py ${date_string} + +cd wikipedia/data +xz */${date_string}/*revisions*.json + +find */${date_string}/*revisions*.xz | while read line; do + mkdir -p /var/www/covid9/wikipedia/$line + mv $line /var/www/covid19/wikipedia/$line +done -revs_tsv="digobs_covid19-wikipedia-enwiki_revisions-${date_string}.tsv" -cp wikipedia/data/${revs_tsv} /var/www/covid19/wikipedia +find */${date_string}/*revisions*.tsv | while read line; do + mkdir -p /var/www/covid19/wikipedia/$line + mv $line /var/www/covid19/wikipedia/$line +done -revs_json="digobs_covid19-wikipedia-enwiki_revisions-${date_string}.json" -xz wikipedia/data/${revs_json} -cp wikipedia/data/${revs_json}.xz /var/www/covid19/wikipedia +cd ../..