X-Git-Url: https://code.communitydata.science/mediawiki_dump_tools.git/blobdiff_plain/ff689c71dd3c06e11681ef7caeed7195ea0dba3f..bc1f5428f0a501d92eb89b840f245a6e8bf9e89d:/wikiq_users/run_wikiq_users_cluster.sh diff --git a/wikiq_users/run_wikiq_users_cluster.sh b/wikiq_users/run_wikiq_users_cluster.sh new file mode 100755 index 0000000..beca0f9 --- /dev/null +++ b/wikiq_users/run_wikiq_users_cluster.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +spark-submit --master spark://n0649:18899 wikiq_users_spark.py --output-format parquet -i "/com/output/wikiq-enwiki-20180301/enwiki-20180301-pages-meta-history*.tsv" -o "/com/output/wikiq-users-enwiki-20180301-parquet/" --num-partitions 500