pa.field("revid", pa.int64()),
pa.field("date_time", pa.timestamp('ms')),
pa.field("articleid",pa.int64()),
- pa.field("editorid",pa.int64()),
+ pa.field("editorid",pa.int64(), nullable=True),
pa.field("title",pa.string()),
pa.field("namespace",pa.int32()),
pa.field("deleted",pa.bool_()),
- pa.field("test_chars",pa.int32()),
- pa.field("revert",pa.bool_()),
- pa.field("reverteds",pa.list_(pa.int64())),
+ pa.field("text_chars",pa.int32()),
+ pa.field("revert",pa.bool_(), nullable=True),
+ pa.field("reverteds",pa.list_(pa.int64()), nullable=True),
pa.field("sha1",pa.string()),
pa.field("minor",pa.bool_()),
pa.field("editor",pa.string()),
else:
sys.exit('Each regular expression *must* come with a corresponding label and vice versa.')
- def matchmake(self, rev, rev_data):
- rev_data = self.matchmake_revision(rev.text, rev_data)
+ def matchmake_revision(self, rev, rev_data):
+ rev_data = self.matchmake_text(rev.text, rev_data)
rev_data = self.matchmake_comment(rev.comment, rev_data)
return rev_data
- def matchmake_revision(self, text, rev_data):
+ def matchmake_text(self, text, rev_data):
return self.matchmake_pairs(text, rev_data, self.regex_revision_pairs)
def matchmake_comment(self, comment, rev_data):
namespace = namespace
)
- rev_data = self.matchmake(rev, rev_data)
+ rev_data = self.matchmake_revision(rev, rev_data)
if not rev.deleted.text:
# rev.text can be None if the page has no text