]> code.communitydata.science - cdsc_reddit.git/blobdiff - clustering/clustering.py
update clustering scripts
[cdsc_reddit.git] / clustering / clustering.py
index 85be3fe1a1b42d490731ed67bd7e420a9d0242d1..6ee78420824c0af5cdf59410eff7bda5226e39c1 100755 (executable)
@@ -3,7 +3,7 @@
 import sys
 import pandas as pd
 import numpy as np
 import sys
 import pandas as pd
 import numpy as np
-from sklearn.cluster import AffinityPropagation, KMeans
+from sklearn.cluster import AffinityPropagation
 import fire
 from pathlib import Path
 from multiprocessing import cpu_count
 import fire
 from pathlib import Path
 from multiprocessing import cpu_count
@@ -46,24 +46,6 @@ def _affinity_clustering(mat, subreddits, output, damping=0.9, max_iter=100000,
     print(f"saved {output}")
     return clustering
 
     print(f"saved {output}")
     return clustering
 
-def kmeans_clustering(similarities, *args, **kwargs):
-    subreddits, mat = read_similarity_mat(similarities)
-    mat = sim_to_dist(mat)
-    clustering = _kmeans_clustering(mat, *args, **kwargs)
-    cluster_data = process_clustering_result(clustering, subreddits)
-    return(cluster_data)
-
-def _kmeans_clustering(mat, output, n_clusters, n_init=10, max_iter=100000, random_state=1968, verbose=True):
-
-    clustering = KMeans(n_clusters=n_clusters,
-                        n_init=n_init,
-                        max_iter=max_iter,
-                        random_state=random_state,
-                        verbose=verbose
-                        ).fit(mat)
-
-    return clustering
-
 
 
 if __name__ == "__main__":
 
 
 if __name__ == "__main__":

Community Data Science Collective || Want to submit a patch?