]> code.communitydata.science - cdsc_reddit.git/blobdiff - visualization/tsne_vis.py
remove nsfw subs from topN
[cdsc_reddit.git] / visualization / tsne_vis.py
index 915cd7e9019a095855b5c3bc512cee80acbef49c..a52d812257f1603b9a86c12dfd2a944858923132 100644 (file)
@@ -5,6 +5,7 @@ alt.data_transformers.enable('default')
 from sklearn.neighbors import NearestNeighbors
 import pandas as pd
 from numpy import random
 from sklearn.neighbors import NearestNeighbors
 import pandas as pd
 from numpy import random
+import fire
 import numpy as np
 
 def base_plot(plot_data):
 import numpy as np
 
 def base_plot(plot_data):
@@ -100,26 +101,31 @@ def assign_cluster_colors(tsne_data, clusters, n_colors, n_neighbors = 4):
     tsne_data = tsne_data.merge(colors,on='cluster')
     return(tsne_data)
 
     tsne_data = tsne_data.merge(colors,on='cluster')
     return(tsne_data)
 
-term_data = pd.read_feather("tsne_subreddit_fit.feather")
-clusters = pd.read_feather("term_3000_clusters.feather")
+def build_visualization(tsne_data, clusters, output):
 
 
-tsne_data = assign_cluster_colors(term_data,clusters,10,8)
+    tsne_data = pd.read_feather(tsne_data)
+    clusters = pd.read_feather(clusters)
 
 
-term_zoom_plot = zoom_plot(tsne_data)
+    tsne_data = assign_cluster_colors(tsne_data,clusters,10,8)
 
 
-term_zoom_plot.save("subreddit_terms_tsne_3000.html")
+    term_zoom_plot = zoom_plot(tsne_data)
 
 
-term_viewport_plot = viewport_plot(tsne_data)
+    term_zoom_plot.save(output)
 
 
-term_viewport_plot.save("subreddit_terms_tsne_3000_viewport.html")
+    term_viewport_plot = viewport_plot(tsne_data)
 
 
-commenter_data = pd.read_feather("tsne_author_fit.feather")
-clusters = pd.read_feather('author_3000_clusters.feather')
-commenter_data = assign_cluster_colors(commenter_data,clusters,10,8)
-commenter_zoom_plot = zoom_plot(commenter_data)
-commenter_viewport_plot = viewport_plot(commenter_data)
-commenter_zoom_plot.save("subreddit_commenters_tsne_3000.html")
-commenter_viewport_plot.save("subreddit_commenters_tsne_3000_viewport.html")
+    term_viewport_plot.save(output.replace(".html","_viewport.html"))
+
+if __name__ == "__main__":
+    fire.Fire(build_visualization)
+
+# commenter_data = pd.read_feather("tsne_author_fit.feather")
+# clusters = pd.read_feather('author_3000_clusters.feather')
+# commenter_data = assign_cluster_colors(commenter_data,clusters,10,8)
+# commenter_zoom_plot = zoom_plot(commenter_data)
+# commenter_viewport_plot = viewport_plot(commenter_data)
+# commenter_zoom_plot.save("subreddit_commenters_tsne_3000.html")
+# commenter_viewport_plot.save("subreddit_commenters_tsne_3000_viewport.html")
 
 # chart = chart.properties(width=10000,height=10000)
 # chart.save("test_tsne_whole.svg")
 
 # chart = chart.properties(width=10000,height=10000)
 # chart.save("test_tsne_whole.svg")

Community Data Science Collective || Want to submit a patch?