X-Git-Url: https://code.communitydata.science/mediawiki_dump_tools.git/blobdiff_plain/bc1f5428f0a501d92eb89b840f245a6e8bf9e89d..3d12865c4ec7db67123443abb3d5dc26ee0f300d:/wikiq_users/run_wikiq_users_cluster.sh diff --git a/wikiq_users/run_wikiq_users_cluster.sh b/wikiq_users/run_wikiq_users_cluster.sh index beca0f9..84e23f0 100755 --- a/wikiq_users/run_wikiq_users_cluster.sh +++ b/wikiq_users/run_wikiq_users_cluster.sh @@ -1,2 +1,2 @@ #!/usr/bin/env bash -spark-submit --master spark://n0649:18899 wikiq_users_spark.py --output-format parquet -i "/com/output/wikiq-enwiki-20180301/enwiki-20180301-pages-meta-history*.tsv" -o "/com/output/wikiq-users-enwiki-20180301-parquet/" --num-partitions 500 +spark-submit --master spark://n0649:18899 wikiq_users_spark.py --output-format parquet -i "/com/output/wikiq-enwiki-persist-sequence-20180301/enwiki/enwiki-20180301-pages-meta-history*.tsv" -o "/com/output/wikiq-users-enwiki-20180301-parquet/" --num-partitions 500 --schema-opt persistence+collapse