X-Git-Url: https://code.communitydata.science/cdsc_reddit.git/blobdiff_plain/f28effe2c38ce2f7656901861c90e36fdfcdb04c..2d1c8013f2a59cde10b5169ee61edea3a4f35aca:/checkpoint_parallelsql.sbatch diff --git a/checkpoint_parallelsql.sbatch b/checkpoint_parallelsql.sbatch new file mode 100644 index 0000000..a54aab1 --- /dev/null +++ b/checkpoint_parallelsql.sbatch @@ -0,0 +1,24 @@ +#!/bin/bash +## parallel_sql_job.sh +#SBATCH --job-name=tf_subreddit_comments +## Allocation Definition +#SBATCH --account=comdata-ckpt +#SBATCH --partition=ckpt +## Resources +## Nodes. This should always be 1 for parallel-sql. +#SBATCH --nodes=1 +## Walltime (12 hours) +#SBATCH --time=12:00:00 +## Memory per node +#SBATCH --mem=100G +#SBATCH --cpus-per-task=4 +#SBATCH --ntasks=1 + + +module load parallel_sql + +#Put here commands to load other modules (e.g. matlab etc.) +#Below command means that parallel_sql will get tasks from the database +#and run them on the node (in parallel). So a 16 core node will have +#16 tasks running at one time. +parallel-sql --sql -a parallel --exit-on-term --jobs 4