]> code.communitydata.science - cdsc_reddit.git/commitdiff
version of weekly_cosine_similarities.py from klone
authorNate E TeBlunthuis <nathante@klone-login01.hyak.local>
Thu, 22 Apr 2021 17:38:10 +0000 (10:38 -0700)
committerNate E TeBlunthuis <nathante@klone-login01.hyak.local>
Thu, 22 Apr 2021 17:38:10 +0000 (10:38 -0700)
similarities/weekly_cosine_similarities.py

index f9c96664b1a8aae485d39e1bfb112430800b4c8e..aeafe743ca1f0ecfe832741d2081fcbee5ae3a14 100644 (file)
@@ -61,7 +61,7 @@ def cosine_similarities_weekly(tfidf_path, outfile, term_colname, min_df = None,
         list(pool.map(weeks,week_similarities_helper))
 
 def author_cosine_similarities_weekly(outfile, min_df=2 , included_subreddits=None, topN=500):
-    return cosine_similarities_weekly('/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_authors_100k.parquet',
+    return cosine_similarities_weekly('/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_authors_30k.parquet',
                                       outfile,
                                       'author',
                                       min_df,
@@ -69,7 +69,7 @@ def author_cosine_similarities_weekly(outfile, min_df=2 , included_subreddits=No
                                       topN)
 
 def term_cosine_similarities_weekly(outfile, min_df=None, included_subreddits=None, topN=500):
-    return cosine_similarities_weekly('/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_terms_100k.parquet',
+    return cosine_similarities_weekly('/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_terms_30k.parquet',
                                       outfile,
                                       'term',
                                       min_df,

Community Data Science Collective || Want to submit a patch?