From: Nate E TeBlunthuis Date: Thu, 28 Jan 2021 04:22:24 +0000 (-0800) Subject: add visualization for 10000 subreddits based on author-tf similarities. X-Git-Url: https://code.communitydata.science/cdsc_reddit.git/commitdiff_plain/554660275fe525733918aa0e25d0c4ea86dc5a41?ds=inline add visualization for 10000 subreddits based on author-tf similarities. --- diff --git a/visualization/Makefile b/visualization/Makefile index e69de29..97a7038 100644 --- a/visualization/Makefile +++ b/visualization/Makefile @@ -0,0 +1,11 @@ +all: subreddit_author_tf_similarities_10000.html #comment_authors_10000.html + +# wang_tsne_10000.html +# wang_tsne_10000.html:/gscratch/comdata/output/reddit_tsne/wang_similarity_10000.feather /gscratch/comdata/output/reddit_clustering/wang_similarity_10000.feather tsne_vis.py +# python3 tsne_vis.py --tsne_data=/gscratch/comdata/output/reddit_tsne/wang_similarity_10000.feather --clusters=/gscratch/comdata/output/reddit_clustering/wang_similarity_10000.feather --output=wang_tsne_10000.html + +# comment_authors_10000.html:/gscratch/comdata/output/reddit_tsne/comment_authors_10000.feather /gscratch/comdata/output/reddit_clustering/comment_authors_10000.feather tsne_vis.py +# python3 tsne_vis.py --tsne_data=/gscratch/comdata/output/reddit_similarity/comment_authors_10000.feather --clusters=/gscratch/comdata/output/reddit_clustering/comment_authors_10000.feather --output=comment_authors_10000.html + +subreddit_author_tf_similarities_10000.html:/gscratch/comdata/output/reddit_tsne/subreddit_author_tf_similarities_10000.feather /gscratch/comdata/output/reddit_clustering/subreddit_author_tf_similarities_10000.feather tsne_vis.py + start_spark_and_run.sh 1 tsne_vis.py --tsne_data=/gscratch/comdata/output/reddit_tsne/subreddit_author_tf_similarities_10000.feather --clusters=/gscratch/comdata/output/reddit_clustering/subreddit_author_tf_similarities_10000.feather --output=subreddit_author_tf_similarities_10000.html diff --git a/visualization/subreddit_author_tf_similarities_10000.html b/visualization/subreddit_author_tf_similarities_10000.html new file mode 100644 index 0000000..7cfbd0e --- /dev/null +++ b/visualization/subreddit_author_tf_similarities_10000.html @@ -0,0 +1,35 @@ + + + + + + + + + +
+ + + \ No newline at end of file diff --git a/visualization/subreddit_author_tf_similarities_10000_viewport.html b/visualization/subreddit_author_tf_similarities_10000_viewport.html new file mode 100644 index 0000000..6e686bd --- /dev/null +++ b/visualization/subreddit_author_tf_similarities_10000_viewport.html @@ -0,0 +1,35 @@ + + + + + + + + + +
+ + + \ No newline at end of file diff --git a/visualization/tsne_vis.py b/visualization/tsne_vis.py index 4b42e8e..c192d21 100644 --- a/visualization/tsne_vis.py +++ b/visualization/tsne_vis.py @@ -155,13 +155,13 @@ def build_visualization(tsne_data, clusters, output): if __name__ == "__main__": fire.Fire(build_visualization) -commenter_data = pd.read_feather("tsne_author_fit.feather") -clusters = pd.read_feather('author_3000_clusters.feather') -commenter_data = assign_cluster_colors(commenter_data,clusters,10,8) -commenter_zoom_plot = zoom_plot(commenter_data) -commenter_viewport_plot = viewport_plot(commenter_data) -commenter_zoom_plot.save("subreddit_commenters_tsne_3000.html") -commenter_viewport_plot.save("subreddit_commenters_tsne_3000_viewport.html") +# commenter_data = pd.read_feather("tsne_author_fit.feather") +# clusters = pd.read_feather('author_3000_clusters.feather') +# commenter_data = assign_cluster_colors(commenter_data,clusters,10,8) +# commenter_zoom_plot = zoom_plot(commenter_data) +# commenter_viewport_plot = viewport_plot(commenter_data) +# commenter_zoom_plot.save("subreddit_commenters_tsne_3000.html") +# commenter_viewport_plot.save("subreddit_commenters_tsne_3000_viewport.html") # chart = chart.properties(width=10000,height=10000) # chart.save("test_tsne_whole.svg")