From: Nate E TeBlunthuis Date: Thu, 22 Apr 2021 17:38:10 +0000 (-0700) Subject: version of weekly_cosine_similarities.py from klone X-Git-Url: https://code.communitydata.science/cdsc_reddit.git/commitdiff_plain/34e0a0a30de8ef1e6aac5e588b4591d6afa69a19?ds=inline version of weekly_cosine_similarities.py from klone --- diff --git a/similarities/weekly_cosine_similarities.py b/similarities/weekly_cosine_similarities.py index f9c9666..aeafe74 100644 --- a/similarities/weekly_cosine_similarities.py +++ b/similarities/weekly_cosine_similarities.py @@ -61,7 +61,7 @@ def cosine_similarities_weekly(tfidf_path, outfile, term_colname, min_df = None, list(pool.map(weeks,week_similarities_helper)) def author_cosine_similarities_weekly(outfile, min_df=2 , included_subreddits=None, topN=500): - return cosine_similarities_weekly('/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_authors_100k.parquet', + return cosine_similarities_weekly('/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_authors_30k.parquet', outfile, 'author', min_df, @@ -69,7 +69,7 @@ def author_cosine_similarities_weekly(outfile, min_df=2 , included_subreddits=No topN) def term_cosine_similarities_weekly(outfile, min_df=None, included_subreddits=None, topN=500): - return cosine_similarities_weekly('/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_terms_100k.parquet', + return cosine_similarities_weekly('/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_terms_30k.parquet', outfile, 'term', min_df,