对于这个数据集,我的 k-means 模型的预测似乎只考虑了水平轴,尽管聚类中心似乎是合理的。
这个分类有问题吗?请注意背景中网格的颜色。
我用的是scikit-learn,这里是分类和可视化的代码片段。
model = KMeans(n_clusters = 5)
model.fit(df_stuff[['Stuff','Other Stuff']])
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.scatter(df_stuff['Stuff'], df_stuff['Other Stuff'],c=model.labels_,s=80,cmap='rainbow')
ax.set_xlabel('Stuff')
ax.set_ylabel('Other Stuff')
ax.set_title('Strange Clusters')
# Draw Cluster Centers
for center in model.cluster_centers_:
ax.scatter(center[0],center[1],c='black',s=5120,alpha=0.2)
# Draw Cluster Grid
cluster_grid = {'x': [], 'y': [], 'cluster': []}
for x in np.linspace(df_stuff['Stuff'].min(),df_stuff['Stuff'].max(),25):
for y in np.linspace(0.35,0.6,25):
cluster_grid['x'].append(x)
cluster_grid['y'].append(y)
cluster_grid['cluster'].append(model.predict([[x,y]])[0])
ax.scatter(cluster_grid['x'],cluster_grid['y'],c=cluster_grid['cluster'],cmap='rainbow',alpha=0.4,s=10)