X-Git-Url: https://code.communitydata.science/cdsc_reddit.git/blobdiff_plain/e6294b5b90135a5163441c8dc62252dd6a188412..56269deee3d33620550d67bdd3c1a7b64eb3f7e4:/similarities/weekly_cosine_similarities.py diff --git a/similarities/weekly_cosine_similarities.py b/similarities/weekly_cosine_similarities.py index 2b3c90b..54856b0 100644 --- a/similarities/weekly_cosine_similarities.py +++ b/similarities/weekly_cosine_similarities.py @@ -35,7 +35,7 @@ def cosine_similarities_weekly(tfidf_path, outfile, term_colname, min_df = None, subreddit_names['subreddit_id_new'] = subreddit_names['subreddit_id_new'] - 1 spark.stop() - weeks = list(subreddit_names.week.drop_duplicates()) +d weeks = sorted(list(subreddit_names.week.drop_duplicates())) for week in weeks: print(f"loading matrix: {week}") mat = read_tfidf_matrix_weekly(tempdir.name, term_colname, week)