1 from pathlib import Path
2 from multiprocessing import Pool, cpu_count
3 from itertools import product, chain
7 def __init__(self, jobtype, inpath, outpath, namer, *args):
10 grid = list(product(*args))
12 outpath = Path(outpath)
14 self.grid = [(inpath,outpath,namer(*g)) + g for g in grid]
15 self.jobs = [jobtype(*g) for g in self.grid]
17 def run(self, cores=20):
18 if cores is not None and cores > 1:
19 with Pool(cores) as pool:
20 infos = pool.map(self.jobtype.get_info, self.jobs)
22 infos = map(self.jobtype.get_info, self.jobs)
24 self.infos = pd.DataFrame(infos)
27 def save(self, outcsv):
31 outcsv.parent.mkdir(parents=True, exist_ok=True)
32 self.infos.to_csv(outcsv)