]> code.communitydata.science - cdsc_reddit.git/blobdiff - similarities/tfidf.py
Merge branch 'master' of code:cdsc_reddit into excise_reindex
[cdsc_reddit.git] / similarities / tfidf.py
index 7f579faabb7092dacfb210f913f7b243d4c79337..110536eeb22b5c13132ff17b33d882fc47da63b7 100644 (file)
@@ -15,10 +15,9 @@ def _tfidf_wrapper(func, inpath, outpath, topN, term_colname, exclude, included_
     else:
         include_subs = select_topN_subreddits(topN)
 
     else:
         include_subs = select_topN_subreddits(topN)
 
-    df = func(df, include_subs, term_colname)
-
-    df.write.parquet(outpath,mode='overwrite',compression='snappy')
+    dfwriter = func(df, include_subs, term_colname)
 
 
+    dfwriter.parquet(outpath,mode='overwrite',compression='snappy')
     spark.stop()
 
 def tfidf(inpath, outpath, topN, term_colname, exclude, included_subreddits):
     spark.stop()
 
 def tfidf(inpath, outpath, topN, term_colname, exclude, included_subreddits):

Community Data Science Collective || Want to submit a patch?