]> code.communitydata.science - cdsc_reddit.git/blobdiff - datasets/run_comments_jobs.sbatch
changes for archiving.
[cdsc_reddit.git] / datasets / run_comments_jobs.sbatch
diff --git a/datasets/run_comments_jobs.sbatch b/datasets/run_comments_jobs.sbatch
new file mode 100644 (file)
index 0000000..ce5f3e4
--- /dev/null
@@ -0,0 +1,24 @@
+#!/bin/bash
+## tf reddit comments
+#SBATCH --job-name="cdsc_reddit; parse comment dumps"
+## Allocation Definition
+#SBATCH --account=comdata
+#SBATCH --partition=compute-bigmem
+## Resources
+## Nodes. This should always be 1 for parallel-sql.
+#SBATCH --nodes=1    
+## Walltime (12 hours)
+#SBATCH --time=24:00:00
+## Memory per node
+#SBATCH --mem=8G
+#SBATCH --cpus-per-task=1
+#SBATCH --ntasks=1
+#SBATCH 
+#SBATCH --chdir /gscratch/comdata/users/nathante/partitioning_reddit/dataverse/cdsc_reddit/datasets
+#SBATCH --output=comments_jobs/%A_%a.out
+#SBATCH --error=comments_jobs/%A_%a.out
+. /opt/ohpc/admin/lmod/lmod/init/profile
+source ~/.bashrc
+TASK_NUM=$(( SLURM_ARRAY_TASK_ID + $1))
+TASK_CALL=$(sed -n ${TASK_NUM}p ./comments_task_list.sh)
+${TASK_CALL}

Community Data Science Collective || Want to submit a patch?