self.lsi_dim = lsi_dim
self.jobtype = hdbscan_lsi_job
- super().__init__(self.jobtype, inpath, outpath, self.namer, self.lsi_dim, *args, **kwargs)
+ super().__init__(self.jobtype, inpath, outpath, self.namer, [self.lsi_dim], *args, **kwargs)
def namer(self, *args, **kwargs):
s += f"_lsi-{self.lsi_dim}"
return s
-def run_hdbscan_lsi_grid_sweep(savefile, inpath, outpath, min_cluster_sizes=[2], min_samples=[1], cluster_selection_epsilons=[0], cluster_selection_methods=['eom'],lsi_dimensions='all'):
+def run_hdbscan_lsi_grid_sweep(savefile, inpath, outpath, min_cluster_sizes=[2], min_samples=[1], cluster_selection_epsilons=[0], cluster_selection_methods=[1],lsi_dimensions='all'):
"""Run hdbscan clustering once or more with different parameters.
Usage:
obj = hdbscan_lsi_grid_sweep(inpath,
lsi_dimensions,
outpath,
- map(int,min_cluster_sizes),
- map(int,min_samples),
- map(float,cluster_selection_epsilons),
- cluster_selection_methods
- )
+ list(map(int,min_cluster_sizes)),
+ list(map(int,min_samples)),
+ list(map(float,cluster_selection_epsilons)),
+ cluster_selection_methods)
+
obj.run(10)
obj.save(savefile)