df = build_tfidf_dataset(df, include_subs, 'term')
df.write.parquet('/gscratch/comdata/users/nathante/subreddit_tfidf.parquet',mode='overwrite',compression='snappy')
df = build_tfidf_dataset(df, include_subs, 'term')
df.write.parquet('/gscratch/comdata/users/nathante/subreddit_tfidf.parquet',mode='overwrite',compression='snappy')