]> code.communitydata.science - cdsc_reddit.git/commitdiff
add script for pulling cluster timeseries
authorNate E TeBlunthuis <nathante@mox2.hyak.local>
Tue, 11 May 2021 01:24:22 +0000 (18:24 -0700)
committerNate E TeBlunthuis <nathante@mox2.hyak.local>
Tue, 11 May 2021 01:24:22 +0000 (18:24 -0700)
timeseries/cluster_timeseries.py

index 07507d74c037ca870bc57e83357d8847d9506a46..91fa705af34f4242d05f8e7d28e4d212e0fc1419 100644 (file)
@@ -2,11 +2,11 @@ import pandas as pd
 import numpy as np
 from pyspark.sql import functions as f
 from pyspark.sql import SparkSession
-from choose_clusters import load_clusters, load_densities
+from .choose_clusters import load_clusters, load_densities
 import fire
 from pathlib import Path
 
-def main(term_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_terms_10000.feather",
+def build_cluster_timeseries(term_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_terms_10000.feather",
          author_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_authors_10000.feather",
          term_densities_path="/gscratch/comdata/output/reddit_density/comment_terms_10000.feather",
          author_densities_path="/gscratch/comdata/output/reddit_density/comment_authors_10000.feather",
@@ -34,4 +34,4 @@ def main(term_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_
     ts.write.parquet(output, mode='overwrite')
 
 if __name__ == "__main__":
-    fire.Fire(main)
+    fire.Fire(build_cluster_timeseries)

Community Data Science Collective || Want to submit a patch?