From: Nate E TeBlunthuis Date: Tue, 7 Jul 2020 06:27:18 +0000 (-0700) Subject: Script to run both parts of submissions_2_parquet.sh X-Git-Url: https://code.communitydata.science/cdsc_reddit.git/commitdiff_plain/33e088492c2b1d49daadcb859656a0c81d81e2b8?ds=sidebyside;hp=--cc Script to run both parts of submissions_2_parquet.sh --- 33e088492c2b1d49daadcb859656a0c81d81e2b8 diff --git a/.gitignore b/.gitignore index 95b7622..c46a786 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /#*# /.#* +/job_script.sh diff --git a/submissions_2_parquet.sh b/submissions_2_parquet.sh new file mode 100644 index 0000000..d1c6bce --- /dev/null +++ b/submissions_2_parquet.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +# part2 should be run on one ore more spark nodes + +./submissions_2_parquet_part1.py + +start_spark_and_run.sh 1 $(pwd)/submissions_2_parquet_part2.py + +