X-Git-Url: https://code.communitydata.science/cdsc_reddit.git/blobdiff_plain/aa84a7df032dcb20bb284892f12cdac4853f31aa..13eb95b3b06bd51324e0d05c73a44b5e8e830295:/helper.py diff --git a/helper.py b/helper.py index b401cad..8f1dfe2 100644 --- a/helper.py +++ b/helper.py @@ -14,7 +14,7 @@ def find_dumps(dumpdir, base_pattern): fname, ext = path.splitext(fpath) dumpext[fname].append(ext) - ext_priority = ['.zst','.xz','.bz2'] + ext_priority = ['.zst','.xz','.bz2','.gz'] for base, exts in dumpext.items(): ext = [ext for ext in ext_priority if ext in exts][0] @@ -40,6 +40,8 @@ def open_input_file(input_filename): cmd = ["xzcat",'-dk', '-T 20',input_filename] elif re.match(r'.*\.zst',input_filename): cmd = ['zstd','-dck', input_filename] + elif re.match(r'.*\.gz',input_filename): + cmd = ['gzip','-dc', input_filename] try: input_file = Popen(cmd, stdout=PIPE).stdout except NameError as e: