X-Git-Url: https://code.communitydata.science/cdsc_reddit.git/blobdiff_plain/541e125b28dbca5c06d2160a5cd59ce112657b2a..197518a222a321a8027c3dc5a4121350c47d0779:/similarities/tfidf.py?ds=inline diff --git a/similarities/tfidf.py b/similarities/tfidf.py index 01b0b20..bbae528 100644 --- a/similarities/tfidf.py +++ b/similarities/tfidf.py @@ -4,7 +4,7 @@ from pyspark.sql import functions as f from similarities_helper import tfidf_dataset, build_weekly_tfidf_dataset, select_topN_subreddits def _tfidf_wrapper(func, inpath, outpath, topN, term_colname, exclude, included_subreddits): - spark = SparkSession.builder.getOrCreate()y + spark = SparkSession.builder.getOrCreate() df = spark.read.parquet(inpath)