X-Git-Url: https://code.communitydata.science/cdsc_reddit.git/blobdiff_plain/e6294b5b90135a5163441c8dc62252dd6a188412..56269deee3d33620550d67bdd3c1a7b64eb3f7e4:/similarities/cosine_similarities.py diff --git a/similarities/cosine_similarities.py b/similarities/cosine_similarities.py index ae080d5..54b9599 100644 --- a/similarities/cosine_similarities.py +++ b/similarities/cosine_similarities.py @@ -3,7 +3,7 @@ from pyspark.sql import SparkSession import pandas as pd import fire from pathlib import Path -from similarities_helper import prep_tfidf_entries, read_tfidf_matrix, select_topN_subreddits +from similarities_helper import prep_tfidf_entries, read_tfidf_matrix, select_topN_subreddits, column_similarities def cosine_similarities(infile, term_colname, outfile, min_df=None, included_subreddits=None, topN=500, exclude_phrases=False):