]> code.communitydata.science - mediawiki_dump_tools.git/blobdiff - wikiq
bugfix. column name text_chars
[mediawiki_dump_tools.git] / wikiq
diff --git a/wikiq b/wikiq
index a377d19a9469028a7763b37c9b2f46313c72b27c..fb198f3a72f8cbaa37096d3dfd7e5da7fef2e573 100755 (executable)
--- a/wikiq
+++ b/wikiq
@@ -254,7 +254,7 @@ class RevDataBase():
         pa.field("title",pa.string()),
         pa.field("namespace",pa.int32()),
         pa.field("deleted",pa.bool_()),
         pa.field("title",pa.string()),
         pa.field("namespace",pa.int32()),
         pa.field("deleted",pa.bool_()),
-        pa.field("test_chars",pa.int32()),
+        pa.field("text_chars",pa.int32()),
         pa.field("revert",pa.bool_()),
         pa.field("reverteds",pa.list_(pa.int64())),
         pa.field("sha1",pa.string()),
         pa.field("revert",pa.bool_()),
         pa.field("reverteds",pa.list_(pa.int64())),
         pa.field("sha1",pa.string()),
@@ -518,7 +518,7 @@ class WikiqParser():
                                              namespace = namespace
                                              )
 
                                              namespace = namespace
                                              )
 
-                rev_data = self.matchmake(rev, rev_data)
+                rev_data = self.matchmake_revision(rev, rev_data)
 
                 if not rev.deleted.text:
                     # rev.text can be None if the page has no text
 
                 if not rev.deleted.text:
                     # rev.text can be None if the page has no text
@@ -661,7 +661,7 @@ class WikiqParser():
 
         outtable = rows_to_table(self.parquet_buffer, self.schema)
         if self.pq_writer is None:
 
         outtable = rows_to_table(self.parquet_buffer, self.schema)
         if self.pq_writer is None:
-            self.pq_writer = pq.ParquetWriter(self.output_file, schema, flavor='spark')
+            self.pq_writer = pq.ParquetWriter(self.output_file, self.schema, flavor='spark')
 
         self.pq_writer.write_table(outtable)
         self.parquet_buffer = []
 
         self.pq_writer.write_table(outtable)
         self.parquet_buffer = []

Community Data Science Collective || Want to submit a patch?