]> code.communitydata.science - cdsc_reddit.git/blobdiff - similarities/tfidf.py
Some improvements to run affinity clustering on larger dataset and
[cdsc_reddit.git] / similarities / tfidf.py
index 5b1c0c94d450da2d7259bc828404c779c734d62c..b7b4e6361f11e901b46ba5be73f1ec83e74001a8 100644 (file)
@@ -45,7 +45,7 @@ def tfidf_terms(outpath='/gscratch/comdata/output/reddit_similarity/tfidf/commen
                  []
                  )
 
-def tfidf_authors_weekly(outpath='/gscratch/comdata/output/reddit_similarity/tfidf/comment_authors.parquet',
+def tfidf_authors_weekly(outpath='/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_authors.parquet',
                   topN=25000):
 
     return tfidf_weekly("/gscratch/comdata/output/reddit_ngrams/comment_authors.parquet",
@@ -55,7 +55,7 @@ def tfidf_authors_weekly(outpath='/gscratch/comdata/output/reddit_similarity/tfi
                  ['[deleted]','AutoModerator']
                  )
 
-def tfidf_terms_weekly(outpath='/gscratch/comdata/output/reddit_similarity/tfidf/comment_terms.parquet',
+def tfidf_terms_weekly(outpath='/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_terms.parquet',
                 topN=25000):
 
     return tfidf_weekly("/gscratch/comdata/output/reddit_ngrams/comment_terms.parquet",

Community Data Science Collective || Want to submit a patch?