X-Git-Url: https://code.communitydata.science/cdsc_reddit.git/blobdiff_plain/94c7a74bd9a17acf4cca61da48557a93d49cee36:/reddit_bz2_2parquet.py..6dca79a41fbd5d1e967ba2cbd12da55310e5858f:/comments_2_parquet.py?ds=sidebyside diff --git a/reddit_bz2_2parquet.py b/comments_2_parquet.py similarity index 98% rename from reddit_bz2_2parquet.py rename to comments_2_parquet.py index 93c3d45..3042f58 100755 --- a/reddit_bz2_2parquet.py +++ b/comments_2_parquet.py @@ -6,7 +6,7 @@ from pyspark.sql.types import * from pyspark import SparkConf, SparkContext from pyspark.sql import SparkSession, SQLContext -conf = SparkConf().setAppName("Reddit to bz2") +conf = SparkConf().setAppName("Reddit comments to parquet") conf = conf.set('spark.sql.crossJoin.enabled',"true") spark = SparkSession.builder.getOrCreate()