X-Git-Url: https://code.communitydata.science/cdsc_reddit.git/blobdiff_plain/65deba5e4e4ad9e3f23e82573491f7d6b190e644..refs/heads/synced/excise_reindex:/similarities/lsi_similarities.py?ds=inline diff --git a/similarities/lsi_similarities.py b/similarities/lsi_similarities.py index 565e53f..57a2d0d 100644 --- a/similarities/lsi_similarities.py +++ b/similarities/lsi_similarities.py @@ -21,12 +21,13 @@ from functools import partial def lsi_similarities(inpath, term_colname, outfile, min_df=None, max_df=None, included_subreddits=None, topN=None, from_date=None, to_date=None, tfidf_colname='tf_idf',n_components=100,n_iter=5,random_state=1968,algorithm='arpack',lsi_model=None): print(n_components,flush=True) + if lsi_model is None: if type(n_components) == list: - lsi_model = Path(outfile) / f'{max(n_components)}_{term_colname}s_LSIMOD.pkl' + lsi_model = Path(outfile) / f'{max(n_components)}_{term_colname}_LSIMOD.pkl' else: - lsi_model = Path(outfile) / f'{n_components}_{term_colname}s_LSIMOD.pkl' + lsi_model = Path(outfile) / f'{n_components}_{term_colname}_LSIMOD.pkl' simfunc = partial(lsi_column_similarities,n_components=n_components,n_iter=n_iter,random_state=random_state,algorithm=algorithm,lsi_model_save=lsi_model)