]> code.communitydata.science - cdsc_reddit.git/blobdiff - similarities/weekly_cosine_similarities.py
Updating to support wang-style user overlaps.
[cdsc_reddit.git] / similarities / weekly_cosine_similarities.py
index 54856b030d10aa123e609da067ec6dcc9f74df62..4d496f0ea6d60647fe4b2811f788a3f82536642d 100644 (file)
@@ -35,7 +35,7 @@ def cosine_similarities_weekly(tfidf_path, outfile, term_colname, min_df = None,
     subreddit_names['subreddit_id_new'] = subreddit_names['subreddit_id_new'] - 1
     spark.stop()
 
-d    weeks = sorted(list(subreddit_names.week.drop_duplicates()))
+    weeks = sorted(list(subreddit_names.week.drop_duplicates()))
     for week in weeks:
         print(f"loading matrix: {week}")
         mat = read_tfidf_matrix_weekly(tempdir.name, term_colname, week)

Community Data Science Collective || Want to submit a patch?