From: Nate E TeBlunthuis Date: Fri, 10 Jul 2020 00:12:14 +0000 (-0700) Subject: remove is_submitter field from submissions which doesn't exist. X-Git-Url: https://code.communitydata.science/cdsc_reddit.git/commitdiff_plain/c666302b4a51e3569a1b86a15c2291255ce32386?ds=sidebyside;hp=aa84a7df032dcb20bb284892f12cdac4853f31aa remove is_submitter field from submissions which doesn't exist. --- diff --git a/submissions_2_parquet_part1.py b/submissions_2_parquet_part1.py index 131391b..23b1200 100755 --- a/submissions_2_parquet_part1.py +++ b/submissions_2_parquet_part1.py @@ -16,7 +16,7 @@ import pyarrow.parquet as pq def parse_submission(post, names = None): if names is None: - names = ['id','author','subreddit','title','created_utc','permalink','url','domain','score','ups','downs','over_18','has_media','selftext','retrieved_on','num_comments','gilded','edited','time_edited','subreddit_type','subreddit_id','subreddit_subscribers','name','is_self','stickied','is_submitter','quarantine','error'] + names = ['id','author','subreddit','title','created_utc','permalink','url','domain','score','ups','downs','over_18','has_media','selftext','retrieved_on','num_comments','gilded','edited','time_edited','subreddit_type','subreddit_id','subreddit_subscribers','name','is_self','stickied','quarantine','error'] try: post = json.loads(post) @@ -93,7 +93,6 @@ schema = pa.schema([ pa.field('name',pa.string(),nullable=True), pa.field('is_self',pa.bool_(),nullable=True), pa.field('stickied',pa.bool_(),nullable=True), - pa.field('is_submitter',pa.bool_(),nullable=True), pa.field('quarantine',pa.bool_(),nullable=True), pa.field('error',pa.string(),nullable=True)])