]> code.communitydata.science - cdsc_reddit.git/blobdiff - similarities/tfidf.py
git-annex in
[cdsc_reddit.git] / similarities / tfidf.py
index 01b0b20a0b94f6348834818b6762f3805cbfa8ea..bbae528c0145fdcb98fcda7a9072400c43c60ebb 100644 (file)
@@ -4,7 +4,7 @@ from pyspark.sql import functions as f
 from similarities_helper import tfidf_dataset, build_weekly_tfidf_dataset, select_topN_subreddits
 
 def _tfidf_wrapper(func, inpath, outpath, topN, term_colname, exclude, included_subreddits):
-    spark = SparkSession.builder.getOrCreate()y
+    spark = SparkSession.builder.getOrCreate()
 
     df = spark.read.parquet(inpath)
 

Community Data Science Collective || Want to submit a patch?