]> code.communitydata.science - cdsc_reddit.git/blobdiff - similarities/tfidf.py
Merge branch 'master' of code:cdsc_reddit into excise_reindex
[cdsc_reddit.git] / similarities / tfidf.py
index 94dcbf59e7e2171552bd219a14a1a2373d6b19a3..19d30138457843df62ef440d3a75acc45b41df87 100644 (file)
@@ -52,7 +52,7 @@ def tfidf_terms(outpath='/gscratch/comdata/output/reddit_similarity/tfidf/commen
 
 def tfidf_authors_weekly(outpath='/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_authors.parquet',
                          topN=None,
-                         include_subreddits=None):
+                         included_subreddits=None):
 
     return tfidf_weekly("/gscratch/comdata/output/reddit_ngrams/comment_authors.parquet",
                         outpath,
@@ -63,7 +63,8 @@ def tfidf_authors_weekly(outpath='/gscratch/comdata/output/reddit_similarity/tfi
                         )
 
 def tfidf_terms_weekly(outpath='/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_terms.parquet',
-                       topN=25000):
+                       topN=None,
+                       included_subreddits=None):
 
 
     return tfidf_weekly("/gscratch/comdata/output/reddit_ngrams/comment_terms.parquet",
@@ -71,7 +72,7 @@ def tfidf_terms_weekly(outpath='/gscratch/comdata/output/reddit_similarity/tfidf
                         topN,
                         'term',
                         [],
-                        included_subreddits=None
+                        included_subreddits=included_subreddits
                         )
 
 

Community Data Science Collective || Want to submit a patch?