]> code.communitydata.science - cdsc_reddit.git/blob - clustering/Makefile
Merge branch 'master' of code:cdsc_reddit
[cdsc_reddit.git] / clustering / Makefile
1 #srun_cdsc='srun -p comdata-int -A comdata --time=300:00:00 --time-min=00:15:00 --mem=100G --ntasks=1 --cpus-per-task=28'
2 all:/gscratch/comdata/output/reddit_clustering/comment_authors_10000.feather /gscratch/comdata/output/reddit_clustering/comment_terms_10000.feather /gscratch/comdata/output/reddit_clustering/subreddit_author_tf_similarities_10000.feather /gscratch/comdata/output/reddit_tsne/subreddit_author_tf_similarities_10000.feather /gscratch/comdata/output/reddit_tsne/comment_authors_10000.feather
3 #all:/gscratch/comdata/output/reddit_clustering/comment_authors_10000.feather /gscratch/comdata/output/reddit_clustering/comment_terms_10000.feather /gscratch/comdata/output/reddit_clustering/subreddit_author_tf_similarities_10000.feather /gscratch/comdata/output/reddit_clustering/wang_similarity_10000.feather /gscratch/comdata/output/reddit_tsne/subreddit_author_tf_similarities_10000.feather /gscratch/comdata/output/reddit_tsne/wang_similarity_10000.feather /gscratch/comdata/output/reddit_tsne/comment_authors_10000.feather
4
5 /gscratch/comdata/output/reddit_clustering/comment_authors_10000.feather:clustering.py /gscratch/comdata/output/reddit_similarity/comment_authors_10000.feather
6 #       $srun_cdsc python3
7         start_spark_and_run.sh 1 clustering.py /gscratch/comdata/output/reddit_similarity/comment_authors_10000.feather /gscratch/comdata/output/reddit_clustering/comment_authors_10000.feather ---max_iter=400 --convergence_iter=15 --preference_quantile=0.85 --damping=0.85
8
9 /gscratch/comdata/output/reddit_clustering/comment_terms_10000.feather:clustering.py /gscratch/comdata/output/reddit_similarity/comment_terms_10000.feather
10 #       $srun_cdsc python3
11         start_spark_and_run.sh 1 clustering.py /gscratch/comdata/output/reddit_similarity/comment_terms_10000.feather /gscratch/comdata/output/reddit_clustering/comment_terms_10000.feather ---max_iter=1000 --convergence_iter=15 --preference_quantile=0.9 --damping=0.5
12
13 /gscratch/comdata/output/reddit_clustering/subreddit_author_tf_similarities_10000.feather:clustering.py /gscratch/comdata/output/reddit_similarity/subreddit_author_tf_similarities_10000.parquet
14 #       $srun_cdsc
15         start_spark_and_run.sh 1 clustering.py /gscratch/comdata/output/reddit_similarity/subreddit_author_tf_similarities_10000.parquet /gscratch/comdata/output/reddit_clustering/subreddit_author_tf_similarities_10000.feather ---max_iter=400 --convergence_iter=15 --preference_quantile=0.5 --damping=0.85
16
17 # it's pretty difficult to get a result that isn't one huge megacluster. A sign that it's bullcrap
18 # /gscratch/comdata/output/reddit_clustering/wang_similarity_10000.feather:clustering.py /gscratch/comdata/output/reddit_similarity/wang_similarity_10000.feather
19 #       ./clustering.py /gscratch/comdata/output/reddit_similarity/wang_similarity_10000.feather /gscratch/comdata/output/reddit_clustering/wang_similarity_10000.feather ---max_iter=400 --convergence_iter=15 --preference_quantile=0.9 --damping=0.85
20
21 /gscratch/comdata/output/reddit_tsne/subreddit_author_tf_similarities_10000.feather:fit_tsne.py /gscratch/comdata/output/reddit_similarity/subreddit_author_tf_similarities_10000.parquet
22
23         start_spark_and_run.sh 1 fit_tsne.py --similarities=/gscratch/comdata/output/reddit_similarity/subreddit_author_tf_similarities_10000.parquet --output=/gscratch/comdata/output/reddit_tsne/subreddit_author_tf_similarities_10000.feather
24
25
26 # /gscratch/comdata/output/reddit_tsne/wang_similarity_10000.feather:fit_tsne.py /gscratch/comdata/output/reddit_similarity/wang_similarity_10000.feather
27
28 #       python3 fit_tsne.py --similarities=/gscratch/comdata/output/reddit_similarity/wang_similarity_10000.feather --output=/gscratch/comdata/output/reddit_tsne/wang_similarity_10000.feather
29
30 /gscratch/comdata/output/reddit_tsne/comment_authors_10000.feather:clustering.py /gscratch/comdata/output/reddit_similarity/comment_authors_10000.feather
31 #       $srun_cdsc python3
32         start_spark_and_run.sh 1 fit_tsne.py --similarities=/gscratch/comdata/output/reddit_similarity/comment_authors_10000.feather --output=/gscratch/comdata/output/reddit_tsne/comment_authors_10000.feather

Community Data Science Collective || Want to submit a patch?