]> code.communitydata.science - cdsc_reddit.git/blob - start_spark_and_run.sh
make pass keyword arg to dataframe.drop
[cdsc_reddit.git] / start_spark_and_run.sh
1
2 #!/usr/bin/env bash
3
4 # Script to start a spark cluster and run a script on klone
5 source $SPARK_CONF_DIR/spark-env.sh
6 echo "#!/usr/bin/bash" > job_script.sh
7 echo "source ~/.bashrc" >> job_script.sh
8 echo "export PYSPARK_PYTHON=python3" >> job.script.sh
9 echo "export JAVA_HOME=/gscratch/comdata/local/open-jdk" >> job.script.sh
10 echo "export SPARK_CONF_DIR=/gscratch/comdata/local/spark_config" >> job.script.sh
11 echo "echo \$(hostname)" >> job_script.sh
12 echo "source $SPARK_CONF_DIR/spark-env.sh" >> job.script.sh
13 echo "start_spark_cluster.sh" >> job_script.sh
14 echo "spark-submit --verbose --master spark://\$(hostname):$SPARK_MASTER_PORT $2 ${@:3}" >> job_script.sh
15 echo "stop-all.sh" >> job_script.sh
16 #echo "singularity instance stop --all" >> job_script.sh
17 chmod +x job_script.sh
18
19 let "cpus = $1 * 40" 
20 salloc -p compute-bigmem -A comdata --nodes=$1 --time=48:00:00 -c 40 --mem=362G --exclusive srun -n1 job_script.sh
21

Community Data Science Collective || Want to submit a patch?