]> code.communitydata.science - mediawiki_dump_tools.git/blobdiff - wikiq
fix schema bugs.
[mediawiki_dump_tools.git] / wikiq
diff --git a/wikiq b/wikiq
index bffbbf4cd10aefffcb206908c51a894a7fc70344..75c1af8045ad016eb4c574ba0a8ab3db6f96198b 100755 (executable)
--- a/wikiq
+++ b/wikiq
@@ -250,13 +250,13 @@ class RevDataBase():
         pa.field("revid", pa.int64()),
         pa.field("date_time", pa.timestamp('ms')),
         pa.field("articleid",pa.int64()),
-        pa.field("editorid",pa.int64()),
+        pa.field("editorid",pa.int64(), nullable=True),
         pa.field("title",pa.string()),
         pa.field("namespace",pa.int32()),
         pa.field("deleted",pa.bool_()),
-        pa.field("test_chars",pa.int32()),
-        pa.field("revert",pa.bool_()),
-        pa.field("reverteds",pa.list_(pa.int64())),
+        pa.field("text_chars",pa.int32()),
+        pa.field("revert",pa.bool_(), nullable=True),
+        pa.field("reverteds",pa.list_(pa.int64()), nullable=True),
         pa.field("sha1",pa.string()),
         pa.field("minor",pa.bool_()),
         pa.field("editor",pa.string()),
@@ -429,12 +429,12 @@ class WikiqParser():
         else:
             sys.exit('Each regular expression *must* come with a corresponding label and vice versa.')
 
-    def matchmake(self, rev, rev_data):
-        rev_data = self.matchmake_revision(rev.text, rev_data)
+    def matchmake_revision(self, rev, rev_data):
+        rev_data = self.matchmake_text(rev.text, rev_data)
         rev_data = self.matchmake_comment(rev.comment, rev_data)
         return rev_data
 
-    def matchmake_revision(self, text, rev_data):
+    def matchmake_text(self, text, rev_data):
          return self.matchmake_pairs(text, rev_data, self.regex_revision_pairs)
 
     def matchmake_comment(self, comment, rev_data):
@@ -518,7 +518,7 @@ class WikiqParser():
                                              namespace = namespace
                                              )
 
-                rev_data = self.matchmake(rev, rev_data)
+                rev_data = self.matchmake_revision(rev, rev_data)
 
                 if not rev.deleted.text:
                     # rev.text can be None if the page has no text

Community Data Science Collective || Want to submit a patch?