]> code.communitydata.science - cdsc_reddit.git/blobdiff - similarities/similarities_helper.py
Merge branch 'master' of code:cdsc_reddit into excise_reindex
[cdsc_reddit.git] / similarities / similarities_helper.py
index e59563e396bc0988cf645dc80a6cba27997a512e..1492983f88695111af812c600c7ece03e7abe802 100644 (file)
@@ -32,7 +32,7 @@ def reindex_tfidf(infile, term_colname, min_df=None, max_df=None, included_subre
     if included_subreddits is None:
         included_subreddits = select_topN_subreddits(topN)
     else:
     if included_subreddits is None:
         included_subreddits = select_topN_subreddits(topN)
     else:
-        included_subreddits = set(open(included_subreddits))
+        included_subreddits = set(map(str.strip,map(str.lower,open(included_subreddits))))
 
     ds_filter = ds.field("subreddit").isin(included_subreddits)
 
 
     ds_filter = ds.field("subreddit").isin(included_subreddits)
 

Community Data Science Collective || Want to submit a patch?