>>>>>>>># from numpy import*import operator
青海大学农牧学院
创建⼀个数据集,包含2个类别共4个样本
def createDataSet():
# ⽣成⼀个矩阵,每⾏表⽰⼀个样本
group = array([[1.0,0.9],[1.0,1.0],[0.1,0.2],[0.0,0.1]]) # 4个样本分别所属的类别
labels =['A','A','B','B']
return group, labels
KNN分类算法函数定义
def kNNClassify(newInput, dataSet, labels, k):
numSamples = dataSet.shape[0]# shape[0]表⽰⾏数# # step 1: 计算距离[
# 假如:
# Newinput:[1,0,2]
# Dataset:
# [1,0,1]
# [2,1,3]
# [1,0,2]
# 计算过程即为:
# 1、求差
# [1,0,1] [1,0,2]
# [1,0,1] [1,0,2]
# [2,1,3] -- [1,0,2]
# [1,0,2] [1,0,2]
# =
# [0,0,-1]
# [1,1,1]
# [0,0,-1]
# 2、对差值平⽅
# [0,0,1]
# [1,1,1]
# [0,0,1]
# 3、将平⽅后的差值累加
# [1]
# [3]
# [1]
# 4、将上⼀步骤的值求开⽅,即得距离
# [1]
特别的爱电影# [1.73]
# [1]
#
# ]
# tile(A, reps): 构造⼀个矩阵,通过A重复reps次得到
# the following copy numSamples rows for dataSet
diff = tile(newInput,(numSamples,1))- dataSet # 按元素求差值squaredDiff = diff **2# 将差值平⽅
squaredDist =sum(squaredDiff, axis =1)# 按⾏累加distance = squaredDist **0.5# 将差值平⽅和求开⽅,即得距离# # step 2: 对距离排序
# argsort() 返回排序后的索引值
sortedDistIndices = argsort(distance)
classCount ={}# define a dictionary (can be append element) for i in xrange(k):
# # step 3: 选择k个最近邻
voteLabel = labels[sortedDistIndices[i]]
# # step 4: 计算k个最近邻中各类别出现的次数
# when the key voteLabel is not in dictionary classCount, get() # will return 0
classCount[voteLabel]= (voteLabel,0)+1
# # step 5: 返回出现次数最多的类别标签
maxCount =0
for key, value in classCount.items():
if value > maxCount:
maxCount = value
maxIndex = key
return maxIndex
KNN-TEST的测试代码: