]> code.communitydata.science - cdsc_reddit.git/blobdiff - clustering/hdbscan_clustering_lsi.py
commit changes from smap project.
[cdsc_reddit.git] / clustering / hdbscan_clustering_lsi.py
index 73b5276712436cc2376bb6fd6252297b49f364eb..a4c1efd5a2192c0acbcd3ab48920acf5680f7f75 100644 (file)
@@ -59,7 +59,7 @@ class _hdbscan_lsi_grid_sweep(grid_sweep):
 
         self.lsi_dim = lsi_dim
         self.jobtype = hdbscan_lsi_job
 
         self.lsi_dim = lsi_dim
         self.jobtype = hdbscan_lsi_job
-        super().__init__(self.jobtype, inpath, outpath, self.namer, self.lsi_dim, *args, **kwargs)
+        super().__init__(self.jobtype, inpath, outpath, self.namer, [self.lsi_dim], *args, **kwargs)
 
 
     def namer(self, *args, **kwargs):
 
 
     def namer(self, *args, **kwargs):
@@ -67,7 +67,7 @@ class _hdbscan_lsi_grid_sweep(grid_sweep):
         s += f"_lsi-{self.lsi_dim}"
         return s
 
         s += f"_lsi-{self.lsi_dim}"
         return s
 
-def run_hdbscan_lsi_grid_sweep(savefile, inpath, outpath,  min_cluster_sizes=[2], min_samples=[1], cluster_selection_epsilons=[0], cluster_selection_methods=['eom'],lsi_dimensions='all'):
+def run_hdbscan_lsi_grid_sweep(savefile, inpath, outpath,  min_cluster_sizes=[2], min_samples=[1], cluster_selection_epsilons=[0], cluster_selection_methods=[1],lsi_dimensions='all'):
     """Run hdbscan clustering once or more with different parameters.
     
     Usage:
     """Run hdbscan clustering once or more with different parameters.
     
     Usage:
@@ -87,11 +87,11 @@ def run_hdbscan_lsi_grid_sweep(savefile, inpath, outpath,  min_cluster_sizes=[2]
     obj = hdbscan_lsi_grid_sweep(inpath,
                                  lsi_dimensions,
                                  outpath,
     obj = hdbscan_lsi_grid_sweep(inpath,
                                  lsi_dimensions,
                                  outpath,
-                                 map(int,min_cluster_sizes),
-                                 map(int,min_samples),
-                                 map(float,cluster_selection_epsilons),
-                                 cluster_selection_methods
-                                 )
+                                 list(map(int,min_cluster_sizes)),
+                                 list(map(int,min_samples)),
+                                 list(map(float,cluster_selection_epsilons)),
+                                 cluster_selection_methods)
+                                 
 
     obj.run(10)
     obj.save(savefile)
 
     obj.run(10)
     obj.save(savefile)

Community Data Science Collective || Want to submit a patch?