X-Git-Url: https://code.communitydata.science/cdsc_reddit.git/blobdiff_plain/56269deee3d33620550d67bdd3c1a7b64eb3f7e4..f0176d9f0de93f0e4f3ab1d676c852c2e5fad3b3:/clustering/clustering.py diff --git a/clustering/clustering.py b/clustering/clustering.py old mode 100644 new mode 100755 index e652304..4cde717 --- a/clustering/clustering.py +++ b/clustering/clustering.py @@ -14,8 +14,9 @@ def affinity_clustering(similarities, output, damping=0.9, max_iter=100000, conv df = pd.read_feather(similarities) n = df.shape[0] - mat = np.array(df.drop('subreddit',1)) + mat = np.array(df.drop('_subreddit',1)) mat[range(n),range(n)] = 1 + assert(all(np.diag(mat)==1)) preference = np.quantile(mat,preference_quantile)