1 from pathlib import Path
2 from multiprocessing import Pool, cpu_count
3 from itertools import product, chain
7 def __init__(self, jobtype, inpath, outpath, namer, *args):
11 grid = list(product(*args))
13 outpath = Path(outpath)
15 self.grid = [(inpath,outpath,namer(*g)) + g for g in grid]
16 self.jobs = [jobtype(*g) for g in self.grid]
18 def run(self, cores=20):
19 if cores is not None and cores > 1:
20 with Pool(cores) as pool:
21 infos = pool.map(self.jobtype.get_info, self.jobs)
23 infos = map(self.jobtype.get_info, self.jobs)
25 self.infos = pd.DataFrame(infos)
28 def save(self, outcsv):
32 outcsv.parent.mkdir(parents=True, exist_ok=True)
33 self.infos.to_csv(outcsv)
36 class twoway_grid_sweep(grid_sweep):
37 def __init__(self, jobtype, inpath, outpath, namer, args1, args2, *args, **kwargs):
38 self.jobtype = jobtype
40 prod1 = product(* args1.values())
41 prod2 = product(* args2.values())
42 grid1 = [dict(zip(args1.keys(), pargs)) for pargs in prod1]
43 grid2 = [dict(zip(args2.keys(), pargs)) for pargs in prod2]
44 grid = product(grid1, grid2)
46 outpath = Path(outpath)
48 self.grid = [(inpath,outpath,namer(**(g[0] | g[1])), g[0], g[1], *args) for g in grid]
49 self.jobs = [jobtype(*g) for g in self.grid]