X-Git-Url: https://code.communitydata.science/mediawiki_dump_tools.git/blobdiff_plain/1aea601a3018bbd5c7b4c67da92fdd7a388d8393..3e645b5e586077fdcb59454dc5a9e05c64823191:/wikiq diff --git a/wikiq b/wikiq index 2cb0c3a..fb198f3 100755 --- a/wikiq +++ b/wikiq @@ -254,7 +254,7 @@ class RevDataBase(): pa.field("title",pa.string()), pa.field("namespace",pa.int32()), pa.field("deleted",pa.bool_()), - pa.field("test_chars",pa.int32()), + pa.field("text_chars",pa.int32()), pa.field("revert",pa.bool_()), pa.field("reverteds",pa.list_(pa.int64())), pa.field("sha1",pa.string()), @@ -661,7 +661,7 @@ class WikiqParser(): outtable = rows_to_table(self.parquet_buffer, self.schema) if self.pq_writer is None: - self.pq_writer = pq.ParquetWriter(self.output_file, schema, flavor='spark') + self.pq_writer = pq.ParquetWriter(self.output_file, self.schema, flavor='spark') self.pq_writer.write_table(outtable) self.parquet_buffer = []