]> code.communitydata.science - cdsc_reddit.git/blobdiff - clustering/clustering.py
Changes for cosine similarities on klone.
[cdsc_reddit.git] / clustering / clustering.py
index e6523045267fd93c1424b63ff46af81e5f02b289..4cde71787eb5f208a0e51afb68ef57f1f99c1106 100755 (executable)
@@ -14,8 +14,9 @@ def affinity_clustering(similarities, output, damping=0.9, max_iter=100000, conv
 
     df = pd.read_feather(similarities)
     n = df.shape[0]
-    mat = np.array(df.drop('subreddit',1))
+    mat = np.array(df.drop('_subreddit',1))
     mat[range(n),range(n)] = 1
+    assert(all(np.diag(mat)==1))
 
     preference = np.quantile(mat,preference_quantile)
 

Community Data Science Collective || Want to submit a patch?