def lsi_similarities(inpath, term_colname, outfile, min_df=None, max_df=None, included_subreddits=None, topN=None, from_date=None, to_date=None, tfidf_colname='tf_idf',n_components=100,n_iter=5,random_state=1968,algorithm='arpack',lsi_model=None):
print(n_components,flush=True)
+
if lsi_model is None:
if type(n_components) == list:
- lsi_model = Path(outfile) / f'{max(n_components)}_{term_colname}s_LSIMOD.pkl'
+ lsi_model = Path(outfile) / f'{max(n_components)}_{term_colname}_LSIMOD.pkl'
else:
- lsi_model = Path(outfile) / f'{n_components}_{term_colname}s_LSIMOD.pkl'
+ lsi_model = Path(outfile) / f'{n_components}_{term_colname}_LSIMOD.pkl'
simfunc = partial(lsi_column_similarities,n_components=n_components,n_iter=n_iter,random_state=random_state,algorithm=algorithm,lsi_model_save=lsi_model)