]> code.communitydata.science - mediawiki_dump_tools.git/commitdiff
Merge branch 'parquet_support' of code:mediawiki_dump_tools into parquet_support
authorNathan TeBlunthuis <nathanteblunthuis@gmail.com>
Thu, 29 May 2025 03:54:52 +0000 (20:54 -0700)
committerNathan TeBlunthuis <nathanteblunthuis@gmail.com>
Thu, 29 May 2025 03:54:52 +0000 (20:54 -0700)
wikiq

diff --git a/wikiq b/wikiq
index 2cb0c3a8752d7ea8ce5293fe3ea707882f636a18..75c1af8045ad016eb4c574ba0a8ab3db6f96198b 100755 (executable)
--- a/wikiq
+++ b/wikiq
@@ -250,13 +250,13 @@ class RevDataBase():
         pa.field("revid", pa.int64()),
         pa.field("date_time", pa.timestamp('ms')),
         pa.field("articleid",pa.int64()),
-        pa.field("editorid",pa.int64()),
+        pa.field("editorid",pa.int64(), nullable=True),
         pa.field("title",pa.string()),
         pa.field("namespace",pa.int32()),
         pa.field("deleted",pa.bool_()),
-        pa.field("test_chars",pa.int32()),
-        pa.field("revert",pa.bool_()),
-        pa.field("reverteds",pa.list_(pa.int64())),
+        pa.field("text_chars",pa.int32()),
+        pa.field("revert",pa.bool_(), nullable=True),
+        pa.field("reverteds",pa.list_(pa.int64()), nullable=True),
         pa.field("sha1",pa.string()),
         pa.field("minor",pa.bool_()),
         pa.field("editor",pa.string()),

Community Data Science Collective || Want to submit a patch?