import numpy as np
from pyspark.sql import functions as f
from pyspark.sql import SparkSession
-from choose_clusters import load_clusters, load_densities
+from .choose_clusters import load_clusters, load_densities
import fire
from pathlib import Path
-def main(term_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_terms_10000.feather",
+def build_cluster_timeseries(term_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_terms_10000.feather",
author_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_authors_10000.feather",
term_densities_path="/gscratch/comdata/output/reddit_density/comment_terms_10000.feather",
author_densities_path="/gscratch/comdata/output/reddit_density/comment_authors_10000.feather",
ts.write.parquet(output, mode='overwrite')
if __name__ == "__main__":
- fire.Fire(main)
+ fire.Fire(build_cluster_timeseries)