X-Git-Url: https://code.communitydata.science/cdsc_reddit.git/blobdiff_plain/e6294b5b90135a5163441c8dc62252dd6a188412..55b75ea6fcf421e95f4fe6b180dcec6e64676619:/datasets/submissions_2_parquet.sh?ds=inline diff --git a/datasets/submissions_2_parquet.sh b/datasets/submissions_2_parquet.sh old mode 100644 new mode 100755 index f133069..81a5753 --- a/datasets/submissions_2_parquet.sh +++ b/datasets/submissions_2_parquet.sh @@ -1,8 +1,8 @@ +#!/usr/bin/env bash ## this should be run manually since we don't have a nice way to wait on parallel_sql jobs -#!/usr/bin/env bash -./parse_submissions.sh +srun -p compute-bigmem -A comdata --nodes=1 --mem-per-cpu=9g -c 40 --time=120:00:00 python3 $(pwd)/submissions_2_parquet_part1.py gen_task_list start_spark_and_run.sh 1 $(pwd)/submissions_2_parquet_part2.py