import pandas as pd
from pandas.core.groupby import DataFrameGroupBy as GroupBy
+from pathlib import Path
import fire
import numpy as np
import sys
sys.path.append("..")
sys.path.append("../similarities")
-from similarities.similarities_helper import reindex_tfidf, reindex_tfidf_time_interval
+from similarities.similarities_helper import reindex_tfidf
# this is the mean of the ratio of the overlap to the focal size.
# mean shared membership per focal community member
def overlap_density(inpath, outpath, agg = pd.DataFrame.sum):
df = pd.read_feather(inpath)
- df = df.drop('subreddit',1)
+ df = df.drop('_subreddit',1)
np.fill_diagonal(df.values,0)
df = agg(df, 0).reset_index()
df = df.rename({0:'overlap_density'},axis='columns')
+ outpath = Path(outpath)
+ outpath.parent.mkdir(parents=True, exist_ok = True)
df.to_feather(outpath)
return df
# exclude the diagonal
df = df.loc[df.subreddit != df.variable]
res = agg(df.groupby(['subreddit','week'])).reset_index()
+ outpath = Path(outpath)
+ outpath.parent.mkdir(parents=True, exist_ok = True)
res.to_feather(outpath)
return res