X-Git-Url: https://code.communitydata.science/cdsc_reddit.git/blobdiff_plain/4dc949de5fb8d3eac04bae125c819100002c9522..7b130a30af863dfa727d80d9fea23648dcc9d5d8:/clustering/fit_tsne.py diff --git a/clustering/fit_tsne.py b/clustering/fit_tsne.py index c9f45f6..55d7239 100644 --- a/clustering/fit_tsne.py +++ b/clustering/fit_tsne.py @@ -17,7 +17,7 @@ def fit_tsne(similarities, output, learning_rate=750, perplexity=50, n_iter=1000 df = pd.read_feather(similarities) n = df.shape[0] - mat = np.array(df.drop('subreddit',1),dtype=np.float64) + mat = np.array(df.drop('_subreddit',1),dtype=np.float64) mat[range(n),range(n)] = 1 mat[mat > 1] = 1 dist = 2*np.arccos(mat)/np.pi @@ -26,7 +26,7 @@ def fit_tsne(similarities, output, learning_rate=750, perplexity=50, n_iter=1000 tsne_fit_whole = tsne_fit_model.fit_transform(dist) - plot_data = pd.DataFrame({'x':tsne_fit_whole[:,0],'y':tsne_fit_whole[:,1], 'subreddit':df.subreddit}) + plot_data = pd.DataFrame({'x':tsne_fit_whole[:,0],'y':tsne_fit_whole[:,1], '_subreddit':df['_subreddit']}) plot_data.to_feather(output)