from mw.xml_dump import Iterator
from mw.lib import persistence
-from mw.lib import reverts
+import mwreverts
from urllib.parse import quote
TO_ENCODE = ('title', 'editor')
PERSISTENCE_RADIUS=7
state = persistence.State()
window = deque(maxlen=PERSISTENCE_RADIUS)
- rev_detector = reverts.Detector()
+ rev_detector = mwreverts.Detector()
# Iterate through a page's revisions
for rev in page:
elif re.match(r'.*\.gz$', input_filename):
cmd = ["zcat", input_filename]
elif re.match(r'.*\.bz2$', input_filename):
- cmd = ["zcat", input_filename]
+ cmd = ["bzcat", "-dk", input_filename]
try:
input_file = Popen(cmd, stdout=PIPE).stdout