X-Git-Url: https://code.communitydata.science/cdsc_reddit.git/blobdiff_plain/7df8436067dba9a9e6867424002d01593e4bcd25..95905cfc8b46a93d643c53dd9666ac6b65a516b6:/similarities/similarities_helper.py diff --git a/similarities/similarities_helper.py b/similarities/similarities_helper.py index 7f8a639..0d49a56 100644 --- a/similarities/similarities_helper.py +++ b/similarities/similarities_helper.py @@ -35,7 +35,7 @@ def reindex_tfidf(infile, term_colname, min_df=None, max_df=None, included_subre if included_subreddits is None: included_subreddits = select_topN_subreddits(topN) else: - included_subreddits = set(open(included_subreddits)) + included_subreddits = set(map(str.strip,map(str.lower,open(included_subreddits)))) ds_filter = ds.field("subreddit").isin(included_subreddits)