knn-self-practice

这是我的自己写的第一个KNN比较简单的练习案例,有关于KNN的介绍请参考我的上篇博文

This is my first simple exercise case of KNN written by myself. For an introduction to KNN, please refer to my last blog post.

我在这里

#导包
import numpy as np
from matplotlib import pyplot as plt
import operator
%matplotlib inline
#自我创建数据集
data=[
    [0.8,1.8],
    [0.9,2.1],
    [1.0,1.5],
    [1.2,1.9],
    [1.3,2.0],
    [2.5,1.7],
    [2.8,1.5],
    [2.5,1.4],
    [2.7,1.9],
    [2.6,1.8],
    [1.9,3.3],
    [2.0,2.9],
    [2.2,2.8],
    [2.1,2.9],
    [1.8,3.0],
]
label=['a','a','a','a','a','b','b','b','b','b','c','c','c','c','c']
print(label)
train_data = np.array(data)
print(train_data)

x_1=[]
y_1=[]
x_2=[]
y_2=[]
x_3=[]
y_3=[]

for i in range(5):
        x_1.append(data[i][0])
        y_1.append(data[i][1])
        x_2.append(data[i+5][0])
        y_2.append(data[i+5][1])
        x_3.append(data[i+10][0])
        y_3.append(data[i+10][1])
x = []
y = []
for j in range(15):
    x.append(data[j][0])
    y.append(data[j][1])
plt.scatter(x,y)
print(f'{x_1} n {y_1}n{x_2} n  {y_2}n{x_3}n {y_3} ')
f,ax=plt.subplots(1,1,figsize=(10,10))
for i in range (5):
        ax.scatter(x_1[i],y_1[i],label='skitcat',color='r',marker='o')
        ax.scatter(x_2[i],y_2[i],label='skitcat',color='b',marker='o')
        ax.scatter(x_3[i],y_3[i],label='skitcat',color='g',marker='o')
test = [[1.5,2.85]]##自定义点,从图中可以看出,很明显属于第三类
x_test=1.5
y_test=2.85
ax.scatter(test[0][0],test[0][1],label='skitcat',color='m',marker = 'x')
test = np.array(test)

从图中可以看出,很明显属于第三类

#定义距离公式
def d_euc(x, y):#欧式距离
    d = np.sqrt(np.sum(np.square(x- y)))
    return d
def KNN(train_data,test,label,k):
    distance=[]
    for i in train_data:
        distance.append(d_euc(i,train_data))
    distance = np.array(distance)
    index = distance.argsort()  # 获取按距离大小排序后的索引
    #print(index)
    sort_dis = np.sort(distance)
    count={}
    o=0
    print(label)
    for i in index:
           o=o+1
        label_vote=label[i]
        count[label_vote] = count.get(label_vote,0)+1   ##返回特定的键值,否则返回 0
        if o>k:
            break
        print(label_vote)
    print(count)
    final_outcome=majory_vote(count)
    return final_outcome
#定义决策方案——多数表决法
def majory_vote(count):
    sorted_class_count = sorted(
        count.items(), key=operator.itemgetter(1), reverse=True)
    return sorted_class_count
label=['a','a','a','a','a','b','b','b','b','b','c','c','c','c','c']
test=[3.0,2.0]
test = np.array(test)
final_label = KNN(train_data,test,label, 6)
final_label  

最后结果

)

可以发现最初我们看到的真实结果一样属于c类。