]> code.communitydata.science - cdsc_reddit.git/blobdiff - comments_2_parquet.py
Secondary sort for the by_author dataset should be CreatedAt.
[cdsc_reddit.git] / comments_2_parquet.py
index cff16342dede2f0d04f71aeb36d0c4e1f21ab999..bd853f72daf8899a371c06cde85150bf2fb3b418 100755 (executable)
@@ -1,7 +1,7 @@
 
 #!/usr/bin/env python3
 import pyspark
-nfrom pyspark.sql import functions as f
+from pyspark.sql import functions as f
 from pyspark.sql.types import *
 from pyspark import SparkConf, SparkContext
 from pyspark.sql import SparkSession, SQLContext

Community Data Science Collective || Want to submit a patch?