+submissions = submissions.filter(f.col("CreatedAt") <= datetime(2020,4,13))
+
+prop_nsfw = submissions.select(['subreddit','over_18']).groupby('subreddit').agg(f.mean(f.col('over_18').astype('double')).alias('prop_nsfw'))
+
+df = spark.read.parquet("../../data/reddit_comments_by_subreddit.parquet")
+df = df.filter(f.col("CreatedAt") <= datetime(2020,4,13))