]> code.communitydata.science - cdsc_reddit.git/commitdiff
bugfix in completing tfidf similarity matrices.
authorNate E TeBlunthuis <nathante@n2347.hyak.local>
Thu, 12 Nov 2020 19:47:53 +0000 (11:47 -0800)
committerNate E TeBlunthuis <nathante@n2347.hyak.local>
Thu, 12 Nov 2020 19:47:53 +0000 (11:47 -0800)
author_cosine_similarity.py
term_cosine_similarity.py

index 7137da478008e26c2f05fc134f336973e94bb35a..08001c2165460bbea2b7f01d32944d67ed36c52f 100644 (file)
@@ -71,8 +71,8 @@ https://stanford.edu/~rezab/papers/dimsum.pdf. If similarity_threshold=0 we get
     similarities = similarities.join(df, on='j')
     similarities = similarities.rename(columns={'subreddit':"subreddit_j"})
 
     similarities = similarities.join(df, on='j')
     similarities = similarities.rename(columns={'subreddit':"subreddit_j"})
 
-    similarities.write_feather(output_feather)
-    similarities.write_csv(output_csv)
+    similarities.to_feather(output_feather)
+    similarities.to_csv(output_csv)
     return similarities
     
 if __name__ == '__main__':
     return similarities
     
 if __name__ == '__main__':
index f4f1c6edf76e33bbb41fc74a1de207a8390dca9e..48132a83649c8271cade025913c0bbcc2bac72e7 100644 (file)
@@ -71,8 +71,8 @@ https://stanford.edu/~rezab/papers/dimsum.pdf. If similarity_threshold=0 we get
     similarities = similarities.join(df, on='j')
     similarities = similarities.rename(columns={'subreddit':"subreddit_j"})
 
     similarities = similarities.join(df, on='j')
     similarities = similarities.rename(columns={'subreddit':"subreddit_j"})
 
-    similarities.write_feather(output_feather)
-    similarities.write_csv(output_csv)
+    similarities.to_feather(output_feather)
+    similarities.to_csv(output_csv)
     return similarities
     
 if __name__ == '__main__':
     return similarities
     
 if __name__ == '__main__':

Community Data Science Collective || Want to submit a patch?