]> code.communitydata.science - mediawiki_dump_tools.git/blobdiff - wikiq
migrate reverts to python-mwreverts
[mediawiki_dump_tools.git] / wikiq
diff --git a/wikiq b/wikiq
index 7a2f8e45ca20d61464e042704f675f214fd0be09..ad4d549c4cefa7a4220fe2ab3f399b7d3668a7b2 100755 (executable)
--- a/wikiq
+++ b/wikiq
@@ -15,7 +15,7 @@ from hashlib import sha1
 
 from mw.xml_dump import Iterator
 from mw.lib import persistence
 
 from mw.xml_dump import Iterator
 from mw.lib import persistence
-from mw.lib import reverts
+import mwreverts
 from urllib.parse import quote
 TO_ENCODE = ('title', 'editor')
 PERSISTENCE_RADIUS=7
 from urllib.parse import quote
 TO_ENCODE = ('title', 'editor')
 PERSISTENCE_RADIUS=7
@@ -147,7 +147,7 @@ class WikiqParser():
                 state = persistence.State()
                 window = deque(maxlen=PERSISTENCE_RADIUS)
 
                 state = persistence.State()
                 window = deque(maxlen=PERSISTENCE_RADIUS)
 
-            rev_detector = reverts.Detector()
+            rev_detector = mwreverts.Detector()
 
             # Iterate through a page's revisions
             for rev in page:
 
             # Iterate through a page's revisions
             for rev in page:
@@ -277,7 +277,7 @@ def open_input_file(input_filename):
     elif re.match(r'.*\.gz$', input_filename):
         cmd = ["zcat", input_filename] 
     elif re.match(r'.*\.bz2$', input_filename):
     elif re.match(r'.*\.gz$', input_filename):
         cmd = ["zcat", input_filename] 
     elif re.match(r'.*\.bz2$', input_filename):
-        cmd = ["zcat", input_filename] 
+        cmd = ["bzcat", "-dk", input_filename] 
 
     try:
         input_file = Popen(cmd, stdout=PIPE).stdout
 
     try:
         input_file = Popen(cmd, stdout=PIPE).stdout

Community Data Science Collective || Want to submit a patch?