]> code.communitydata.science - cdsc_reddit.git/blobdiff - similarities/similarities_helper.py
Merge branch 'charliepatch' of code:cdsc_reddit into charliepatch
[cdsc_reddit.git] / similarities / similarities_helper.py
index 7f8a639aeecf255ed3db0e47f4ad14769cb5ceb4..0d49a56579649194175e313191bdd174370bc0dd 100644 (file)
@@ -35,7 +35,7 @@ def reindex_tfidf(infile, term_colname, min_df=None, max_df=None, included_subre
     if included_subreddits is None:
         included_subreddits = select_topN_subreddits(topN)
     else:
-        included_subreddits = set(open(included_subreddits))
+        included_subreddits = set(map(str.strip,map(str.lower,open(included_subreddits))))
 
     ds_filter = ds.field("subreddit").isin(included_subreddits)
 

Community Data Science Collective || Want to submit a patch?