X-Git-Url: https://code.communitydata.science/cdsc_reddit.git/blobdiff_plain/4cb7eeec80c5a9c8f49339acd378c515e290ed81..7b130a30af863dfa727d80d9fea23648dcc9d5d8:/clustering/hdbscan_clustering_lsi.py?ds=inline diff --git a/clustering/hdbscan_clustering_lsi.py b/clustering/hdbscan_clustering_lsi.py index 73b5276..a4c1efd 100644 --- a/clustering/hdbscan_clustering_lsi.py +++ b/clustering/hdbscan_clustering_lsi.py @@ -59,7 +59,7 @@ class _hdbscan_lsi_grid_sweep(grid_sweep): self.lsi_dim = lsi_dim self.jobtype = hdbscan_lsi_job - super().__init__(self.jobtype, inpath, outpath, self.namer, self.lsi_dim, *args, **kwargs) + super().__init__(self.jobtype, inpath, outpath, self.namer, [self.lsi_dim], *args, **kwargs) def namer(self, *args, **kwargs): @@ -67,7 +67,7 @@ class _hdbscan_lsi_grid_sweep(grid_sweep): s += f"_lsi-{self.lsi_dim}" return s -def run_hdbscan_lsi_grid_sweep(savefile, inpath, outpath, min_cluster_sizes=[2], min_samples=[1], cluster_selection_epsilons=[0], cluster_selection_methods=['eom'],lsi_dimensions='all'): +def run_hdbscan_lsi_grid_sweep(savefile, inpath, outpath, min_cluster_sizes=[2], min_samples=[1], cluster_selection_epsilons=[0], cluster_selection_methods=[1],lsi_dimensions='all'): """Run hdbscan clustering once or more with different parameters. Usage: @@ -87,11 +87,11 @@ def run_hdbscan_lsi_grid_sweep(savefile, inpath, outpath, min_cluster_sizes=[2] obj = hdbscan_lsi_grid_sweep(inpath, lsi_dimensions, outpath, - map(int,min_cluster_sizes), - map(int,min_samples), - map(float,cluster_selection_epsilons), - cluster_selection_methods - ) + list(map(int,min_cluster_sizes)), + list(map(int,min_samples)), + list(map(float,cluster_selection_epsilons)), + cluster_selection_methods) + obj.run(10) obj.save(savefile)