2 from affinity_clustering import affinity_clustering_result, affinity_job, affinity_grid_sweep
3 from grid_sweep import grid_sweep
4 from lsi_base import lsi_result_mixin, lsi_grid_sweep, lsi_mixin
5 from dataclasses import dataclass
8 class affinity_clustering_result_lsi(affinity_clustering_result, lsi_result_mixin):
12 class affinity_lsi_job(affinity_job, lsi_mixin):
13 def __init__(self, infile, outpath, name, lsi_dims, *args, **kwargs):
14 super().__init__(infile,
19 super().set_lsi_dims(lsi_dims)
22 result = super().get_info()
23 self.result = affinity_clustering_result_lsi(**result.__dict__,
24 lsi_dimensions=self.lsi_dims)
27 class affinity_lsi_grid_sweep(lsi_grid_sweep):
34 convergence_iters=[30],
35 preference_quantiles=[0.5]):
37 super().__init__(affinity_lsi_job,
38 _affinity_lsi_grid_sweep,
48 class _affinity_lsi_grid_sweep(grid_sweep):
55 self.lsi_dim = lsi_dim
56 self.jobtype = affinity_lsi_job
57 super().__init__(self.jobtype,
65 def namer(self, *args, **kwargs):
66 s = affinity_grid_sweep.namer(self, *args[1:], **kwargs)
67 s += f"_lsi-{self.lsi_dim}"
70 def run_affinity_lsi_grid_sweep(savefile, inpath, outpath, dampings=[0.8], max_iters=[3000], convergence_iters=[30], preference_quantiles=[0.5], lsi_dimensions='all',n_cores=30):
71 """Run affinity clustering once or more with different parameters.
74 affinity_clustering.py --savefile=SAVEFILE --inpath=INPATH --outpath=OUTPATH --max_iters=<csv> --dampings=<csv> --preference_quantiles=<csv> --lsi_dimensions: either "all" or one or more available lsi similarity dimensions at INPATH.
77 savefile: path to save the metadata and diagnostics
78 inpath: path to folder containing feather files with LSI similarity labeled matrices of subreddit similarities.
79 outpath: path to output fit kmeans clusterings.
80 dampings:one or more numbers in [0.5, 1). damping parameter in affinity propagatin clustering.
81 preference_quantiles:one or more numbers in (0,1) for selecting the 'preference' parameter.
82 convergence_iters:one or more integers of number of iterations without improvement before stopping.
83 max_iters: one or more numbers of different maximum interations.
84 lsi_dimensions: either "all" or one or more available lsi similarity dimensions at INPATH.
87 obj = affinity_lsi_grid_sweep(inpath,
92 map(int,convergence_iters),
93 map(float,preference_quantiles))
98 if __name__ == "__main__":
99 fire.Fire(run_affinity_lsi_grid_sweep)