X-Git-Url: https://code.communitydata.science/cdsc_reddit.git/blobdiff_plain/4447c60265c5c5de3281ca135461d91ab5339f03..6baa08889b2f46c14f2baa5e3d2136cf165b1673:/tfidf_comments.py diff --git a/tfidf_comments.py b/tfidf_comments.py index 9e1a437..65d2420 100644 --- a/tfidf_comments.py +++ b/tfidf_comments.py @@ -15,3 +15,4 @@ include_subs = {s.strip('\n') for s in include_subs} df = build_tfidf_dataset(df, include_subs, 'term') df.write.parquet('/gscratch/comdata/users/nathante/subreddit_tfidf.parquet',mode='overwrite',compression='snappy') +spark.stop()