]> code.communitydata.science - mediawiki_dump_tools.git/blob - wikiq_users/run_wikiq_users_cluster.sh
beca0f9a8770aa8baa8b55a519232a8f71c7d7cb
[mediawiki_dump_tools.git] / wikiq_users / run_wikiq_users_cluster.sh
1 #!/usr/bin/env bash
2 spark-submit --master  spark://n0649:18899 wikiq_users_spark.py --output-format parquet  -i "/com/output/wikiq-enwiki-20180301/enwiki-20180301-pages-meta-history*.tsv" -o  "/com/output/wikiq-users-enwiki-20180301-parquet/" --num-partitions 500

Community Data Science Collective || Want to submit a patch?