- fix regex for filename/filetype matches
- unload all files not just ones with end with xml in 7z archives
- fix bug that broke stdout
- minor cosmetic fixes
- updated mediawiki-utilities submodule to latest version
-Subproject commit beba46e3eee8e0582cc3a5515dfa658ffbd18f9d
+Subproject commit f7329417ebb2f03d1e9b8a626236a3c0ce65c814
def open_input_file(input_filename):
def open_input_file(input_filename):
- if re.match(r'.*\.7z', input_filename):
- cmd = ["7za", "x", "-so", input_filename, '*.xml']
- elif re.match(r'.*\.gz', input_filename):
+ if re.match(r'.*\.7z$', input_filename):
+ cmd = ["7za", "x", "-so", input_filename, '*']
+ elif re.match(r'.*\.gz$', input_filename):
cmd = ["zcat", input_filename]
cmd = ["zcat", input_filename]
- elif re.match(r'.*\.bz2', input_filename):
+ elif re.match(r'.*\.bz2$', input_filename):
cmd = ["zcat", input_filename]
try:
cmd = ["zcat", input_filename]
try:
for filename in args.dumpfiles:
input_file = open_input_file(filename)
for filename in args.dumpfiles:
input_file = open_input_file(filename)
+ # open directory for output
+ if args.output_dir:
+ output_dir = args.output_dir[0]
+ else:
+ output_dir = "."
+
+ print("Processing file: %s" % filename, file=sys.stderr)
+
if args.stdout:
output_file = sys.stdout
else:
if args.stdout:
output_file = sys.stdout
else:
- if args.output_dir:
- output_dir = args.output_dir[0]
- else:
- output_dir = "."
-
filename = os.path.join(output_dir, os.path.basename(filename))
output_file = open_output_file(filename)
wikiq = WikiqParser(input_file, output_file,
filename = os.path.join(output_dir, os.path.basename(filename))
output_file = open_output_file(filename)
wikiq = WikiqParser(input_file, output_file,
- collapse_user=args.collapse_user,
+ collapse_user=args.collapse_user,
persist=args.persist,
urlencode=args.urlencode)
persist=args.persist,
urlencode=args.urlencode)
- print("Processing file: %s" % filename, file=sys.stderr)
output_file.close()
else:
wikiq = WikiqParser(sys.stdin, sys.stdout,
output_file.close()
else:
wikiq = WikiqParser(sys.stdin, sys.stdout,
- collapse_user=args.collapse_user,
+ collapse_user=args.collapse_user,
persist=args.persist,
urlencode=args.urlencode)
wikiq.process()
persist=args.persist,
urlencode=args.urlencode)
wikiq.process()