-def term_cosine_similarities_weekly(outfile, min_df=None, included_subreddits=None, topN=500):
- return cosine_similarities_weekly('/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_terms_100k.parquet',
- outfile,
- 'term',
- min_df,
- included_subreddits,
- topN)
+def term_cosine_similarities_weekly(outfile, infile='/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_terms.parquet', min_df=None, max_df=None, included_subreddits=None, topN=None):
+ return cosine_similarities_weekly(infile,
+ outfile,
+ 'term',
+ min_df,
+ max_df,
+ included_subreddits,
+ topN)
+
+
+def author_cosine_similarities_weekly_lsi(outfile, infile = '/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_authors_test.parquet', min_df=2, max_df=None, included_subreddits=None, topN=None,n_components=100,lsi_model=None):
+ return cosine_similarities_weekly_lsi(infile,
+ outfile,
+ 'author',
+ min_df,
+ max_df,
+ included_subreddits,
+ topN,
+ n_components=n_components,
+ lsi_model=lsi_model)
+
+
+def term_cosine_similarities_weekly_lsi(outfile, infile = '/gscratch/comdata/output/reddit_similarity/tfidf_weekly/comment_terms.parquet', min_df=None, max_df=None, included_subreddits=None, topN=500,n_components=100,lsi_model=None):
+ return cosine_similarities_weekly_lsi(infile,
+ outfile,
+ 'term',
+ min_df,
+ max_df,
+ included_subreddits,
+ topN,
+ n_components=n_components,
+ lsi_model=lsi_model)