- entries = dataset.to_table(columns=['tf_idf','subreddit_id_new',term_id_new],filter=ds.field('week')==week).to_pandas()
- return(csr_matrix((entries.tf_idf,(entries[term_id_new]-1, entries.subreddit_id_new-1))))
+ entries = dataset.to_table(columns=[tfidf_colname,'subreddit_id_new', term_id_new],filter=ds.field('week')==week).to_pandas()
+ return(csr_matrix((entries[tfidf_colname], (entries[term_id_new]-1, entries.subreddit_id_new-1))))
+
+def read_tfidf_matrix(path, term_colname, tfidf_colname='tf_idf'):
+ term = term_colname
+ term_id = term + '_id'
+ term_id_new = term + '_id_new'
+ dataset = ds.dataset(path,format='parquet')
+ print(f"tfidf_colname:{tfidf_colname}")
+ entries = dataset.to_table(columns=[tfidf_colname, 'subreddit_id_new',term_id_new]).to_pandas()
+ return(csr_matrix((entries[tfidf_colname],(entries[term_id_new]-1, entries.subreddit_id_new-1))))
+