X-Git-Url: https://code.communitydata.science/mediawiki_dump_tools.git/blobdiff_plain/3f9da4074733981fa1dda6e88af75a19054e6c52..d2746879d09ca64418de41f34a33b11b205a101d:/wikiq diff --git a/wikiq b/wikiq index f25874e..7a2f8e4 100755 --- a/wikiq +++ b/wikiq @@ -272,11 +272,11 @@ class WikiqParser(): def open_input_file(input_filename): - if re.match(r'.*\.7z', input_filename): - cmd = ["7za", "x", "-so", input_filename, '*.xml'] - elif re.match(r'.*\.gz', input_filename): + if re.match(r'.*\.7z$', input_filename): + cmd = ["7za", "x", "-so", input_filename, '*'] + elif re.match(r'.*\.gz$', input_filename): cmd = ["zcat", input_filename] - elif re.match(r'.*\.bz2', input_filename): + elif re.match(r'.*\.bz2$', input_filename): cmd = ["zcat", input_filename] try: @@ -322,24 +322,25 @@ if len(args.dumpfiles) > 0: for filename in args.dumpfiles: input_file = open_input_file(filename) - # open file for output + # open directory for output + if args.output_dir: + output_dir = args.output_dir[0] + else: + output_dir = "." + + print("Processing file: %s" % filename, file=sys.stderr) + if args.stdout: output_file = sys.stdout else: - if args.output_dir: - output_dir = args.output_dir[0] - else: - output_dir = "." - filename = os.path.join(output_dir, os.path.basename(filename)) output_file = open_output_file(filename) wikiq = WikiqParser(input_file, output_file, - collapse_user=args.collapse_user, + collapse_user=args.collapse_user, persist=args.persist, urlencode=args.urlencode) - print("Processing file: %s" % filename, file=sys.stderr) wikiq.process() @@ -348,7 +349,7 @@ if len(args.dumpfiles) > 0: output_file.close() else: wikiq = WikiqParser(sys.stdin, sys.stdout, - collapse_user=args.collapse_user, + collapse_user=args.collapse_user, persist=args.persist, urlencode=args.urlencode) wikiq.process()