“11.2scikit-learn里的k-均值算法”的版本间的差异
来自CloudWiki
(创建页面,内容为“# -*- coding: utf-8 -*- from sklearn.cluster import KMeans from sklearn.externals import joblib import numpy final = open('c:/test/final.dat' , 'r') data = [line.s…”) |
|||
第1行: | 第1行: | ||
− | + | -*- coding: utf-8 -*- | |
from sklearn.cluster import KMeans | from sklearn.cluster import KMeans | ||
from sklearn.externals import joblib | from sklearn.externals import joblib | ||
第9行: | 第9行: | ||
feature = [[float(x) for x in row[3:]] for row in data] | feature = [[float(x) for x in row[3:]] for row in data] | ||
− | + | 调用kmeans类 | |
clf = KMeans(n_clusters=9) | clf = KMeans(n_clusters=9) | ||
s = clf.fit(feature) | s = clf.fit(feature) | ||
print s | print s | ||
− | + | 9个中心 | |
print clf.cluster_centers_ | print clf.cluster_centers_ | ||
− | + | 每个样本所属的簇 | |
print clf.labels_ | print clf.labels_ | ||
− | + | 用来评估簇的个数是否合适,距离越小说明簇分的越好,选取临界点的簇个数 | |
print clf.inertia_ | print clf.inertia_ | ||
− | + | 进行预测 | |
print clf.predict(feature) | print clf.predict(feature) | ||
− | + | 保存模型 | |
joblib.dump(clf , 'c:/km.pkl') | joblib.dump(clf , 'c:/km.pkl') | ||
− | + | 载入保存的模型 | |
clf = joblib.load('c:/km.pkl') | clf = joblib.load('c:/km.pkl') | ||
''' | ''' | ||
− | + | 用来评估簇的个数是否合适,距离越小说明簇分的越好,选取临界点的簇个数 | |
for i in range(5,30,1): | for i in range(5,30,1): | ||
clf = KMeans(n_clusters=i) | clf = KMeans(n_clusters=i) |
2018年5月28日 (一) 13:48的最新版本
-*- coding: utf-8 -*- from sklearn.cluster import KMeans from sklearn.externals import joblib import numpy
final = open('c:/test/final.dat' , 'r')
data = [line.strip().split('\t') for line in final] feature = [[float(x) for x in row[3:]] for row in data]
调用kmeans类 clf = KMeans(n_clusters=9) s = clf.fit(feature) print s
9个中心 print clf.cluster_centers_
每个样本所属的簇 print clf.labels_
用来评估簇的个数是否合适,距离越小说明簇分的越好,选取临界点的簇个数 print clf.inertia_
进行预测 print clf.predict(feature)
保存模型 joblib.dump(clf , 'c:/km.pkl')
载入保存的模型 clf = joblib.load('c:/km.pkl')
用来评估簇的个数是否合适,距离越小说明簇分的越好,选取临界点的簇个数 for i in range(5,30,1):
clf = KMeans(n_clusters=i) s = clf.fit(feature) print i , clf.inertia_
来源:网络