[機器學習]03-基于核密度估計(KDE)的鳶尾花數據集分類

關鍵點：

使用核密度估計（KDE）?估計類別條件概率密度（高斯核，帶寬=0.2）
采用最大后驗概率（MAP）?決策準則進行分類

程序代碼：

import random
import matplotlib
from sklearn.neighbors import KernelDensity
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocatordata_dict = {}
train_data = {}
test_data = {}matplotlib.rcParams.update({'font.size': 5})   #整體原始字體大小'''
加載數據形成字典，并且隨機打亂形成每類訓練數據45個，測試數據5個
'''
with open('Iris數據txt版.txt', 'r') as file:for line in file:line = line.strip()data = line.split('\t')if len(data) >= 3:try:category = data[0]attribute1 = eval(data[1])attribute2 = eval(data[2])if category not in data_dict:data_dict[category] = {'Length': [], 'Width': []}data_dict[category]['Length'].append(attribute1)data_dict[category]['Width'].append(attribute2)except ValueError:print(f"Invalid data in line: {line}")continue
for category, attributes in data_dict.items():print(f'種類: {category}')print(len(attributes["Length"]))print(len(attributes["Width"]))print(f'屬性1: {attributes["Length"]}')print(f'屬性2: {attributes["Width"]}')for category, attributes in data_dict.items():lengths = attributes['Length']widths = attributes['Width']train_indices = random.sample(range(len(lengths)), 45)test_indices = [i for i in range(len(lengths)) if i not in train_indices]train_data[category] = {'Length': [lengths[i] for i in train_indices],'Width': [widths[i] for i in train_indices]}test_data[category] = {'Length': [lengths[i] for i in test_indices],'Width': [widths[i] for i in test_indices]}prior_rate = 1.0/len(data_dict)
#print("訓練數據：")
#print(train_data)
#print(train_data['1']['Length'])
#print(train_data['1']['Width'])
#print(len(train_data['1']['Length']))
#print("測試數據：")
#print(test_data)
#print(test_data['1']['Length'])
#print(test_data['1']['Width'])
#print(len(test_data['1']['Length']))'''
使用Python自帶的k近鄰法（KNN算法）計算概率密度
'''
knn_models = {}
smoothed_data = {}for category, attributes in train_data.items():lengths = np.array(attributes['Length']).reshape(-1, 1)widths = np.array(attributes['Width']).reshape(-1, 1)knn_length = KernelDensity(kernel='gaussian', bandwidth=0.2).fit(lengths)knn_width = KernelDensity(kernel='gaussian', bandwidth=0.2).fit(widths)knn_models[category] = {'Length': knn_length, 'Width': knn_width}length_range = np.around(np.linspace(4, 8, 40), 1)width_range = np.around(np.linspace(2, 4.5, 25), 1)length_mesh, width_mesh = np.meshgrid(length_range, width_range)log_density_length = knn_length.score_samples(length_mesh.reshape(-1, 1))log_density_width = knn_width.score_samples(width_mesh.reshape(-1, 1))total_density = np.exp(log_density_length) + np.exp(log_density_width)total_density = total_density.reshape(length_mesh.shape)smoothed_data[category] = {'Length': length_mesh, 'Width': width_mesh, 'ProbabilityDensity': total_density}fig, axs = plt.subplots(1, 3, figsize=(18, 6), subplot_kw={'projection': '3d'})for idx, (category, data) in enumerate(smoothed_data.items()):ax = axs[idx]ax.set_title(f'Density Of Category: {category}',fontsize = 14)ax.plot_surface(data['Length'], data['Width'], data['ProbabilityDensity'], cmap='coolwarm')x_major_locator = MultipleLocator(0.1)y_major_locator = MultipleLocator(0.1)z_major_locator = MultipleLocator(0.2)ax.xaxis.set_major_locator(x_major_locator)ax.yaxis.set_major_locator(y_major_locator)ax.zaxis.set_major_locator(z_major_locator)ax.set_xlabel('Length',fontsize = 10)ax.set_ylabel('Width',fontsize = 10)ax.set_zlabel('ProbabilityDensity',fontsize = 10)plt.tight_layout()
plt.show()right = 0
all = 0for category1,data1 in test_data.items():print(category1,data1)for i,j in zip(data1['Length'],data1['Width']):desired_probability_density = {}for idx,(category2, data2) in enumerate(smoothed_data.items()):length_mesh = data2['Length']width_mesh = data2['Width']probability_density = data2['ProbabilityDensity']#獲取長度、寬度和概率密度數據length_index = np.abs(length_mesh[0] - i).argmin()width_index = np.abs(width_mesh[:, 0] - j).argmin()#找對應的索引desired_probability_density[category2] = probability_density[width_index, length_index]#用索引找對應的概率密度predict = max(desired_probability_density,key = desired_probability_density.get)#取后驗概率最大的類別print(category1,predict)all += 1if category1 == predict:right += 1print("正確率：",right/all)

運行結果：

種類: 1
50
50
屬性1: [5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5.0, 5.0, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5.0, 5.5, 4.9, 4.4, 5.1, 5.0, 4.5, 4.4, 5.0, 5.1, 4.8, 5.1, 4.6, 5.3, 5.0]
屬性2: [3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3.0, 3.0, 4.0, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3.0, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.6, 3.0, 3.4, 3.5, 2.3, 3.2, 3.5, 3.8, 3.0, 3.8, 3.2, 3.7, 3.3]
種類: 2
50
50
屬性1: [7.0, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5.0, 5.9, 6.0, 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7, 6.0, 5.7, 5.5, 5.5, 5.8, 6.0, 5.4, 6.0, 6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5.0, 5.6, 5.7, 5.7, 6.2, 5.1, 5.7]
屬性2: [3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.0, 2.2, 2.9, 2.9, 3.1, 3.0, 2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3.0, 2.8, 3.0, 2.9, 2.6, 2.4, 2.4, 2.7, 2.7, 3.0, 3.4, 3.1, 2.3, 3, 2.5, 2.6, 3.0, 2.6, 2.3, 2.7, 3.0, 2.9, 2.9, 2.5, 2.8]
種類: 3
50
50
屬性1: [6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5, 7.7, 7.7, 6.0, 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2, 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6.0, 6.9, 6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9]
屬性2: [3.3, 2.7, 3.0, 2.9, 3.0, 3.0, 2.5, 2.9, 2.5, 3.6, 3.2, 2.7, 3.0, 2.5, 2.8, 3.2, 3.0, 3.8, 2.6, 2.2, 3.2, 2.8, 2.8, 2.7, 3.3, 3.2, 2.8, 3.0, 2.8, 3.0, 2.8, 3.8, 2.8, 2.8, 2.6, 3.0, 3.4, 3.1, 3.0, 3.1, 3.1, 3.1, 2.7, 3.2, 3.3, 3, 2.5, 3, 3.4, 3]
1 {'Length': [5.1, 5.0, 5.4, 5.1, 5.1], 'Width': [3.3, 3.0, 3.4, 3.4, 3.8]}
1 1
1 1
1 1
1 1
1 1
2 {'Length': [6.5, 5.4, 6.7, 5.6, 5.1], 'Width': [2.8, 3.0, 3.1, 3, 2.5]}
2 3
2 2
2 3
2 2
2 1
3 {'Length': [5.8, 6.9, 7.4, 6.4, 6.9], 'Width': [2.7, 3.2, 2.8, 3.1, 3.1]}
3 2
3 3
3 3
3 3
3 3
正確率： 0.7333333333333333
進程已結束,退出代碼0

本文來自互聯網用戶投稿，該文觀點僅代表作者本人，不代表本站立場。本站僅提供信息存儲空間服務，不擁有所有權，不承擔相關法律責任。
如若轉載，請注明出處：http://www.pswp.cn/web/92601.shtml
繁體地址，請注明出處：http://hk.pswp.cn/web/92601.shtml
英文地址，請注明出處：http://en.pswp.cn/web/92601.shtml

如若內容造成侵權/違法違規/事實不符，請聯系多彩編程網進行投訴反饋email:809451989@qq.com，一經查實，立即刪除！