classdef NaiveBayesClassifier %只进行所有属性是 分类属性 的分类情形 properties(SetAccess = private, GetAccess = private ) %diff_cls_values 和 cls_percent 是一一对应关系。 diff_cls_values; %标签值 (列) cls_percent; %标签值对应的百分比 (列) cls_atr_value; %元组 代表第i类别中第j属性的 所有值 cls_atr_perc; %元组 代表第i类别中第j属性中不同值的百分比 end methods function obj = train_classifier(obj, training_set) % training set: [x11 x12 ... x1n c11; ... ; xm1 xm2 ...xmn cmn] % 为数值型矩阵(使用前先进行转换) 类别标签属性放在最后一维 %获得训练集,为数值形式表示,其中类别 disp('start to training ...'); [train_r, training_c] = size(training_set); %自动统计所有的类别标签对应的个数,以及百分比,i.e.计算P(Ci) Table = tabulate(training_set(:,end)); obj.diff_cls_values = Table(:, 1); % cls_count = Table(:, 2); obj.cls_percent = Table(:, 3); %为每一个类别建立一个训练子集,以提高速度 dif_cls_val_num = size(obj.diff_cls_values, 1); cou_cls = zeros(dif_cls_val_num, 1); for j = 1 : dif_cls_val_num cou_cls(j, 1) = 1; end for i = 1 : train_r for j = 1 : dif_cls_val_num %类别 if obj.diff_cls_values(j, 1) == training_set(i, end) subdataset_for_cls{j}(cou_cls(j, 1), : ) = training_set(i, :); cou_cls(j, 1) = cou_cls(j, 1) + 1; end end end %下面计算P(xi | Ci) for i = 1 : dif_cls_val_num; % 每个类别 for j = 1 : training_c - 1 %每个属性 RET= tabulate(subdataset_for_cls{i}(:,j)); obj.cls_atr_value{i,j} = RET(:, 1); % 列 obj.cls_atr_perc{i,j} = RET(:, 3); % 列(百分比) end end disp('finish training.'); end function correct_ratio = classify_data(obj, test_set) %test set: [x11 x12 ... x1n c11; ... ; xm1 xm2 ...xmn cmn] %类别标签属性放在最后一维 disp('start to classify ...'); [test_r, test_c] = size(test_set); correct_num = 0; for i = 1 : test_r %每个测试元组 pred_label = obj.diff_cls_values(1,1); great_prob = 0; for j = 1 : size(obj.diff_cls_values, 1) % 每个类别 prob = 1; for k = 1 : test_c - 1 %对于每个属性 valu_ind = 0; for t = 1 : size(obj.cls_atr_value{j,k}, 1) %每个值 if obj.cls_atr_value{j,k}(t, 1) == test_set(i, k) valu_ind = t; break; end end if valu_ind == 0 prob = 0; break; end prob = prob * obj.cls_atr_perc{j,k}(valu_ind, 1); end 上海应用技术学院教学管理信息系统 prob = prob * obj.cls_percent(j, 1); if prob > great_prob great_prob = prob; pred_label = obj.diff_cls_values(j, 1); end end if pred_label == test_set(i, end) correct_num = correct_num + 1; end end correct_ratio = correct_num / test_r;美商业飞船将首飞 disp('correct raion'); disp(correct_ratio); disp('finish the classify task'); end end end 基于面向对象的matlab编程,使用举例: % clear; clc; % trainFilePath = 'trans_'; trainFilePath = 'coll_abalone_mush_0.'; testFilePath = 'trans_'; training_set = dlmread(trainFilePath); test_set = dlmread(testFilePath); NaiveClassifier = NaiveBayesClassifier(); NaiveClassifier = ain_classifier(training_set); correctRatio = NaiveClassifier.classify_data(test_set); disp(correctRatio); |
classdef NaiveBayesClassifierCC %可以处理属性为连续属性与离散属性混合的情形 properties(SetAccess = private, GetAccess = private) attr_type; %用于记录属性的类型: 连续(1)或离散(0) (行) diff_cls_values; %训练集中所有类别标签值 (列) cls_percent; %标签值对应的百分比 (列) %连续属性表 conti_attr_means; %均值 (如果该维不是连续数属性,则设为系统默认值)(类别数*维数) conti_attr_deviation; %标准差 %离散属性表 (如果该维不是离散属性,则设为系统默认值) cls_atr_value; %元组 代表第i类别中第j属性的 所有值 cls_atr_perc; %元组 代表第i类别中第j属性中不同值的百分比 end methods function obj = train_classifier(obj, training_set, attr_type) disp('start to training ...'); [train_r, training_c] = size(training_set); obj.attr_type = attr_type; cate_count = 1; cont_count = 1; %自动统计所有的类别标签对应的个数,以及百分比,i.e.计算P(Ci) Table = tabulate(training_set(:,end)); obj.diff_cls_values = Table(:, 1); % cls_count = Table(:, 2); obj.cls_percent = Table(:, 3); %为每一个类别建立一个训练子集,以提高速度(以便于求离散属性对应的百分比) dif_cls_val_num = size(obj.diff_cls_values, 1); %不同类别个数值 cou_cls = ones(dif_cls_val_num, 1); for i = 1 : train_r for j = 1 : dif_cls_val_num %类别 if obj.diff_cls_values(j, 1) == training_set(i, end) subdataset_for_cls{j}(cou_cls(j, 1), : ) = training_set(i, :); cou_cls(j, 1) = cou_cls(j, 1) + 1; end end end conti_attr_means = zeros(dif_cls_val_num, training_c-1); conti_attr_deviation = zeros(dif_cls_val_num, training_c-1); %下面计算P(xi | Ci) 或者均值与方差 for i = 1 : dif_cls_val_num; % 每个类别 for j = 1 : training_c - 1 %每个属性 if attr_type(1, j) %连续属性 conti_attr_means(i, j) = mean(subdataset_for_cls{i}(:,j)); %均值 conti_attr_deviation(i, j) = std(subdataset_for_cls{i}(:,j)); %标准差 else % 离散属性 RET= tabulate(subdataset_for_cls{i}(:,j)); obj.cls_atr_value{i,j} = RET(:, 1); % 列 obj.cls_atr_perc{i,j} = RET(:, 3); % 列(百分比) end end end i_attr_means = conti_attr_means; i_attr_deviation = conti_attr_deviation; disp('finish training.'); end dif function correct_ratio = classify_data(obj, test_set) %test set: [x11 x12 ... x1n c11; ... ; xm1 xm2 ...xmn cmn] %类别标签属性放在最后一维 disp('start to classify ...'); [test_r, test_c] = size(test_set); correct_num = 0; for i = 1 : test_r %每个测试元组 pred_label = obj.diff_cls_values(1,1); great_prob = 0; for j = 1 : size(obj.diff_cls_values, 1) % 每个类别 prob = 1; for k = 1 : test_c - 1 %对于每个属性 if obj.attr_type(1,k) %连续属性 mean_ = i_attr_means(j, k); std_dev = i_attr_deviation(j, k); value_ = test_set(i, k); prob = prob * _prob(value_, mean_, std_dev); else %离散属性 valu_ind = 0; for t = 1 : size(obj.cls_atr_value{j,k}, 1) %每个值 if obj.cls_atr_value{j,k}(t, 1) == test_set(i, k) valu_ind = t; break; end end if valu_ind == 0 prob = 0; break; end prob = prob * obj.cls_atr_perc{j,k}(valu_ind, 1); end end prob = prob * obj.cls_percent(j, 1); if prob > great_prob great_prob = prob; pred_label = obj.diff_cls_values(j, 1); end end if pred_label == test_set(i, end) correct_num = correct_num + 1; end end correct_ratio = correct_num / test_r; disp('correct raion'); disp(correct_ratio); disp('finish the classify task'); end function prob = get_prob(obj, value, mean_v, std_dev) %std_dev: 标准差 mean: 均值 denominator = (2*pi)^0.5 * std_dev; expont = - (value-mean_v)^2/(2*std_dev^2); prob = exp(expont) / denominator; end end end |
本文发布于:2024-09-20 16:33:14,感谢您对本站的认可!
本文链接:https://www.17tex.com/xueshu/721055.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
留言与评论(共有 0 条评论) |