1 from subprocess import Popen, PIPE
3 from collections import defaultdict
7 def find_dumps(dumpdir, base_pattern):
9 files = glob.glob(path.join(dumpdir,base_pattern))
11 # build a dictionary of possible extensions for each dump
12 dumpext = defaultdict(list)
14 fname, ext = path.splitext(fpath)
15 dumpext[fname].append(ext)
17 ext_priority = ['.zst','.xz','.bz2','.gz']
19 for base, exts in dumpext.items():
20 ext = [ext for ext in ext_priority if ext in exts][0]
23 def open_fileset(files):
26 lines = open_input_file(fh)
29 def open_input_file(input_filename):
30 if re.match(r'.*\.7z$', input_filename):
31 cmd = ["7za", "x", "-so", input_filename, '*']
32 elif re.match(r'.*\.gz$', input_filename):
33 cmd = ["zcat", input_filename]
34 elif re.match(r'.*\.bz2$', input_filename):
35 cmd = ["bzcat", "-dk", input_filename]
36 elif re.match(r'.*\.bz', input_filename):
37 cmd = ["bzcat", "-dk", input_filename]
38 elif re.match(r'.*\.xz', input_filename):
39 cmd = ["xzcat",'-dk', '-T 20',input_filename]
40 elif re.match(r'.*\.zst',input_filename):
41 cmd = ['/kloneusr/bin/zstd','-dck', input_filename, '--memory=2048MB --stdout']
42 elif re.match(r'.*\.gz',input_filename):
43 cmd = ['gzip','-dc', input_filename]
45 input_file = Popen(cmd, stdout=PIPE).stdout
46 except NameError as e:
48 input_file = open(input_filename, 'r')