]> code.communitydata.science - cdsc_reddit.git/blobdiff - checkpoint_parallelsql.sbatch
Use groupby - joins instead of windows
[cdsc_reddit.git] / checkpoint_parallelsql.sbatch
diff --git a/checkpoint_parallelsql.sbatch b/checkpoint_parallelsql.sbatch
new file mode 100644 (file)
index 0000000..a54aab1
--- /dev/null
@@ -0,0 +1,24 @@
+#!/bin/bash
+## parallel_sql_job.sh
+#SBATCH --job-name=tf_subreddit_comments
+## Allocation Definition
+#SBATCH --account=comdata-ckpt
+#SBATCH --partition=ckpt
+## Resources
+## Nodes. This should always be 1 for parallel-sql.
+#SBATCH --nodes=1    
+## Walltime (12 hours)
+#SBATCH --time=12:00:00
+## Memory per node
+#SBATCH --mem=100G
+#SBATCH --cpus-per-task=4
+#SBATCH --ntasks=1
+
+
+module load parallel_sql
+
+#Put here commands to load other modules (e.g. matlab etc.)
+#Below command means that parallel_sql will get tasks from the database
+#and run them on the node (in parallel). So a 16 core node will have
+#16 tasks running at one time.
+parallel-sql --sql -a parallel --exit-on-term --jobs 4

Community Data Science Collective || Want to submit a patch?