我在我的数据集上应用了 scikit-learn 形式的 kmeans 方法,然后尝试绘制数据和我的集群,但不断收到错误,并且不知道还要做什么。
这是我现在的代码:
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
def construct_dict(list_keys, list_values):
res = {}
for i in range(len(list_keys)):
k=list_keys[i]
for v in set(list_values):
res[k]=res.get(k,{})
res[k][v]=res[k].get(v,0)
res[k][list_values[i]]+=1
return res
def print_result(matrix, assigs, y):
pred=list(set(y))
res = construct_dict(assigs,y)
print(res, "is a clustering obtained when K=", nclusts)
silhouette_avg = silhouette_score(matrix, assigs) #Compute the mean
Silhouette Coefficient of all samples
print("For %d clusters the average sillouette score is: %f" % (nclusts, silhouette_avg))
return res
def bcubed(assigs, y, calcule='precision'):
#calcule = precision ou recall
summatory = 0.0
n = len(y)
if calcule=='precision':
list_dicts = construct_dict(assigs,y).values()
elif calcule=='recall':
list_dicts = construct_dict(y,assigs).values()
for dic in list_dicts:
values_dict= dic.values()
n_elem_dic=sum(values_dict)
for value in values_dict:
summatory +=(value-1)*value/n_elem_dic
return summatory/n
def k_means(nclusts, matrix, y):
kmeans = KMeans(n_clusters=nclusts, random_state=0).fit(matrix)
assigs=list(kmeans.labels_)
res=print_result(matrix, assigs, y)
print('Precision BCubed:',bcubed(assigs,y))
print('Recall BCubed:',bcubed(assigs,y,'recall'))
return assigs
for nclusts in [2,3,4,5,10,20,30]:
k_means(nclusts, X_pca, y)
print("--------------")
输出:
{0: {'ALL': 12, 'AML': 14}, 1: {'ALL': 35, 'AML': 11}} is a clustering
obtained when K= 2 For 2 clusters the average sillouette score is:
0.147925 Precision BCubed: 0.5602471200297287 Recall BCubed: 0.5528841607565012
...
import pandas as pd
from matplotlib import pyplot as plt
pd.DataFrame(X_pca).T.plot()
plt.show()
输出:情节很好。
现在给我错误的部分是:
import pandas as pd
k_means(5,X_pca,y)
pd.DataFrame(assigs).T.plot()
plt.show()
输出:
NameError: name 'assigs' is not defined
我做错了什么?
最佳答案
您似乎没有保存 kmeans 返回的内容。这有效吗?
import pandas as pd
assigs = k_means(5,X_pca,y)
pd.DataFrame(assigs).T.plot()
plt.show()
关于python - 名称错误 : name 'assigs' is not defined,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/53224929/