X-Git-Url: https://code.communitydata.science/cdsc_reddit.git/blobdiff_plain/7df8436067dba9a9e6867424002d01593e4bcd25..8d1df5b26ee80fee639e5b3ecd057fe8e72f166c:/clustering/hdbscan_clustering.py

diff --git a/clustering/hdbscan_clustering.py b/clustering/hdbscan_clustering.py
index 888554a..4f4e0d6 100644
--- a/clustering/hdbscan_clustering.py
+++ b/clustering/hdbscan_clustering.py
@@ -28,6 +28,13 @@ def test_select_hdbscan_clustering():
     cluster_selection_methods=['eom'];
     lsi_dimensions='all'
 
+    df = pd.read_csv("test_hdbscan/selection_data.csv")
+    test_select_hdbscan_clustering()
+    check_clusters = pd.read_feather("test_hdbscan/500_2_2_0.1_eom.feather")
+    silscores = pd.read_feather("test_hdbscan/silhouette_samples500_2_2_0.1_eom.feather")
+    c = check_clusters.merge(silscores,on='subreddit')#    fire.Fire(select_hdbscan_clustering)
+
+
 @dataclass
 class hdbscan_clustering_result(clustering_result):
     min_cluster_size:int
@@ -165,8 +172,4 @@ def make_KNN_plots():
     KNN_distances_plot(mat,k=2,outname='authors-tf_knn_dist2.png')
 
 if __name__ == "__main__":
-    df = pd.read_csv("test_hdbscan/selection_data.csv")
-    test_select_hdbscan_clustering()
-    check_clusters = pd.read_feather("test_hdbscan/500_2_2_0.1_eom.feather")
-    silscores = pd.read_feather("test_hdbscan/silhouette_samples500_2_2_0.1_eom.feather")
-    c = check_clusters.merge(silscores,on='subreddit')#    fire.Fire(select_hdbscan_clustering)
+    fire.Fire(select_hdbscan_clustering)