]> code.communitydata.science - cdsc_reddit.git/blob - datasets/run_comments_jobs.sbatch
make pass keyword arg to dataframe.drop
[cdsc_reddit.git] / datasets / run_comments_jobs.sbatch
1 #!/bin/bash
2 ## tf reddit comments
3 #SBATCH --job-name="cdsc_reddit; parse comment dumps"
4 ## Allocation Definition
5 #SBATCH --account=comdata
6 #SBATCH --partition=compute-bigmem
7 ## Resources
8 ## Nodes. This should always be 1 for parallel-sql.
9 #SBATCH --nodes=1    
10 ## Walltime (12 hours)
11 #SBATCH --time=24:00:00
12 ## Memory per node
13 #SBATCH --mem=8G
14 #SBATCH --cpus-per-task=1
15 #SBATCH --ntasks=1
16 #SBATCH 
17 #SBATCH --chdir /gscratch/comdata/users/nathante/partitioning_reddit/dataverse/cdsc_reddit/datasets
18 #SBATCH --output=comments_jobs/%A_%a.out
19 #SBATCH --error=comments_jobs/%A_%a.out
20 . /opt/ohpc/admin/lmod/lmod/init/profile
21 source ~/.bashrc
22 TASK_NUM=$(( SLURM_ARRAY_TASK_ID + $1))
23 TASK_CALL=$(sed -n ${TASK_NUM}p ./comments_task_list.sh)
24 ${TASK_CALL}

Community Data Science Collective || Want to submit a patch?