1 from subprocess import Popen, PIPE
 
   3 from collections import defaultdict
 
   7 def find_dumps(dumpdir, base_pattern):
 
   9     files = glob.glob(path.join(dumpdir,base_pattern))
 
  11     # build a dictionary of possible extensions for each dump
 
  12     dumpext = defaultdict(list)
 
  14         fname, ext = path.splitext(fpath)
 
  15         dumpext[fname].append(ext)
 
  17     ext_priority = ['.zst','.xz','.bz2','.gz']
 
  19     for base, exts in dumpext.items():
 
  20         ext = [ext for ext in ext_priority if ext in exts][0]
 
  23 def open_fileset(files):
 
  26         lines = open_input_file(fh)
 
  30 def open_input_file(input_filename):
 
  31     if re.match(r'.*\.7z$', input_filename):
 
  32         cmd = ["7za", "x", "-so", input_filename, '*'] 
 
  33     elif re.match(r'.*\.gz$', input_filename):
 
  34         cmd = ["zcat", input_filename] 
 
  35     elif re.match(r'.*\.bz2$', input_filename):
 
  36         cmd = ["bzcat", "-dk", input_filename] 
 
  37     elif re.match(r'.*\.bz', input_filename):
 
  38         cmd = ["bzcat", "-dk", input_filename] 
 
  39     elif re.match(r'.*\.xz', input_filename):
 
  40         cmd = ["xzcat",'-dk', '-T 20',input_filename]
 
  41     elif re.match(r'.*\.zst',input_filename):
 
  42         cmd = ['zstd','-dck', input_filename]
 
  43     elif re.match(r'.*\.gz',input_filename):
 
  44         cmd = ['gzip','-dc', input_filename]
 
  46         input_file = Popen(cmd, stdout=PIPE).stdout
 
  47     except NameError as e:
 
  49         input_file = open(input_filename, 'r')