X-Git-Url: https://code.communitydata.science/cdsc_reddit.git/blobdiff_plain/98c1317af5da5aafd1e7acb31911ca4333312571..197518a222a321a8027c3dc5a4121350c47d0779:/datasets/checkpoint_parallelsql.sbatch?ds=sidebyside diff --git a/datasets/checkpoint_parallelsql.sbatch b/datasets/checkpoint_parallelsql.sbatch deleted file mode 100644 index dd61e65..0000000 --- a/datasets/checkpoint_parallelsql.sbatch +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash -## parallel_sql_job.sh -#SBATCH --job-name=tf_subreddit_comments -## Allocation Definition -#SBATCH --account=comdata-ckpt -#SBATCH --partition=ckpt -## Resources -## Nodes. This should always be 1 for parallel-sql. -#SBATCH --nodes=1 -## Walltime (12 hours) -#SBATCH --time=12:00:00 -## Memory per node -#SBATCH --mem=32G -#SBATCH --cpus-per-task=4 -#SBATCH --ntasks=1 -#SBATCH -D /gscratch/comdata/users/nathante/cdsc-reddit -source ./bin/activate -module load parallel_sql -echo $(which perl) -conda list pyarrow -which python3 -#Put here commands to load other modules (e.g. matlab etc.) -#Below command means that parallel_sql will get tasks from the database -#and run them on the node (in parallel). So a 16 core node will have -#16 tasks running at one time. -parallel-sql --sql -a parallel --exit-on-term --jobs 4