]> code.communitydata.science - cdsc_reddit.git/blobdiff - tfidf_comments.py
git-annex in nathante@mox2.hyak.local:/gscratch/comdata/users/nathante/cdsc-reddit
[cdsc_reddit.git] / tfidf_comments.py
index 9e1a437d417df955b6ea5f84c9f5c9b2b87b2136..65d24204446d09ecf66afa30b23f3e118f51a2de 100644 (file)
@@ -15,3 +15,4 @@ include_subs = {s.strip('\n') for s in include_subs}
 df = build_tfidf_dataset(df, include_subs, 'term')
 
 df.write.parquet('/gscratch/comdata/users/nathante/subreddit_tfidf.parquet',mode='overwrite',compression='snappy')
+spark.stop()

Community Data Science Collective || Want to submit a patch?