From c666302b4a51e3569a1b86a15c2291255ce32386 Mon Sep 17 00:00:00 2001 From: Nate E TeBlunthuis Date: Thu, 9 Jul 2020 17:12:14 -0700 Subject: [PATCH 1/1] remove is_submitter field from submissions which doesn't exist. --- submissions_2_parquet_part1.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/submissions_2_parquet_part1.py b/submissions_2_parquet_part1.py index 131391b..23b1200 100755 --- a/submissions_2_parquet_part1.py +++ b/submissions_2_parquet_part1.py @@ -16,7 +16,7 @@ import pyarrow.parquet as pq def parse_submission(post, names = None): if names is None: - names = ['id','author','subreddit','title','created_utc','permalink','url','domain','score','ups','downs','over_18','has_media','selftext','retrieved_on','num_comments','gilded','edited','time_edited','subreddit_type','subreddit_id','subreddit_subscribers','name','is_self','stickied','is_submitter','quarantine','error'] + names = ['id','author','subreddit','title','created_utc','permalink','url','domain','score','ups','downs','over_18','has_media','selftext','retrieved_on','num_comments','gilded','edited','time_edited','subreddit_type','subreddit_id','subreddit_subscribers','name','is_self','stickied','quarantine','error'] try: post = json.loads(post) @@ -93,7 +93,6 @@ schema = pa.schema([ pa.field('name',pa.string(),nullable=True), pa.field('is_self',pa.bool_(),nullable=True), pa.field('stickied',pa.bool_(),nullable=True), - pa.field('is_submitter',pa.bool_(),nullable=True), pa.field('quarantine',pa.bool_(),nullable=True), pa.field('error',pa.string(),nullable=True)]) -- 2.39.5