helper.py

   1 from subprocess import Popen, PIPE
   2 import re
   3 from collections import defaultdict
   4 from os import path
   5 import glob
   6
   7 def find_dumps(dumpdir, base_pattern):
   8
   9     files = glob.glob(path.join(dumpdir,base_pattern))
  10
  11     # build a dictionary of possible extensions for each dump
  12     dumpext = defaultdict(list)
  13     for fpath in files:
  14         fname, ext = path.splitext(fpath)
  15         dumpext[fname].append(ext)
  16
  17     ext_priority = ['.zst','.xz','.bz2']
  18
  19     for base, exts in dumpext.items():
  20         found = False
  21         if len(exts) == 1:
  22             yield base + exts[0]
  23             found = True
  24         else:
  25             for ext in ext_priority:
  26                 if ext in exts:
  27                     yield base + ext
  28                     found = True
  29         assert(found == True)
  30
  31 def open_fileset(files):
  32     for fh in files:
  33         print(fh)
  34         lines = open_input_file(fh)
  35         for line in lines:
  36             yield line
  37
  38 def open_input_file(input_filename):
  39     if re.match(r'.*\.7z$', input_filename):
  40         cmd = ["7za", "x", "-so", input_filename, '*']
  41     elif re.match(r'.*\.gz$', input_filename):
  42         cmd = ["zcat", input_filename]
  43     elif re.match(r'.*\.bz2$', input_filename):
  44         cmd = ["bzcat", "-dk", input_filename]
  45     elif re.match(r'.*\.bz', input_filename):
  46         cmd = ["bzcat", "-dk", input_filename]
  47     elif re.match(r'.*\.xz', input_filename):
  48         cmd = ["xzcat",'-dk', '-T 20',input_filename]
  49     elif re.match(r'.*\.zst',input_filename):
  50         cmd = ['zstd','-dck', input_filename]
  51     try:
  52         input_file = Popen(cmd, stdout=PIPE).stdout
  53     except NameError as e:
  54         print(e)
  55         input_file = open(input_filename, 'r')
  56     return input_file
  57