Commit 33090034 authored by YU Xiyue's avatar YU Xiyue
Browse files

2

parent d2d1073a
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>
\ No newline at end of file
......@@ -22,16 +22,16 @@ class LinearClassification:
"""'
需要你实现的部分
"""
self.W = np.zeros([1, 9])
N = train_features.shape[1]
self.W = np.zeros([1, N + 1])
lc = np.ones([train_features.shape[0], 1])
X = np.concatenate((lc, train_features), axis=1)
for n in range(self.epochs):
y = np.dot(X, np.transpose(self.W))
delta = np.transpose(y - train_labels)
derivative = (np.dot(delta, X) + self.Lambda * self.W) / train_labels.size
derivative = 2 * (np.dot(delta, X) + self.Lambda * self.W) / train_labels.size
self.W = self.W - self.lr * derivative
'''根据训练好的参数对测试数据test_features进行预测,返回预测结果
预测结果的数据类型应为np数组,shape=(test_num,1) test_num为测试数据的数目'''
def predict(self, test_features):
......
......@@ -2,25 +2,52 @@ import numpy as np
import math
from collections import Counter
from process_data import load_and_process_data
from evaluation import get_micro_F1,get_macro_F1,get_acc
from evaluation import get_micro_F1, get_macro_F1, get_acc
class NaiveBayes:
'''参数初始化
"""参数初始化
Pc: P(c) 每个类别c的概率分布
Pxc: P(c|x) 每个特征的条件概率
'''
"""
def __init__(self):
self.Pc={}
self.Pxc={}
self.Pc = {}
self.Pxc = {}
'''
通过训练集计算先验概率分布p(c)和条件概率分布p(x|c)
建议全部取log,避免相乘为0
'''
def fit(self,traindata,trainlabel,featuretype):
'''
def fit(self, traindata, trainlabel, featuretype):
"""
需要你实现的部分
'''
"""
c = Counter(trainlabel.flatten())
N = len(c)
for key, val in c.items():
self.Pc[key] = (val + 1) / (trainlabel.size + N)
for d in range(traindata.shape[1]): # 对各个维度分别进行处理
column = traindata[:, d]
if featuretype[d] == 1:
xc = {}
for xi, cy in zip(column.flatten(), trainlabel.flatten()):
if cy not in xc:
xc[cy] = []
xc[cy].append(xi)
for key, val in xc.items():
temp = np.array(val)
self.Pxc[(d, key)] = (np.average(temp), np.var(temp))
else:
xc = {}
Ni = len(np.unique(column))
for xi, cy in zip(column.flatten(), trainlabel.flatten()):
if (xi, cy) not in xc:
xc[(xi, cy)] = 0
xc[(xi, cy)] += 1
for key, val in xc.items():
self.Pxc[(d, key[0], key[1])] = (val + 1) / (c[key[1]] + Ni)
'''
......@@ -28,26 +55,49 @@ class NaiveBayes:
返回预测结果,预测结果的数据类型应为np数组,shape=(test_num,1) test_num为测试数据的数目
feature_type为0-1数组,表示特征的数据类型,0表示离散型,1表示连续型
'''
def predict(self,features,featuretype):
'''
def predict(self, features, featuretype):
"""
需要你实现的部分
'''
"""
N = features.shape[0]
pred = []
def gauss(v, sigma, mu):
exponent = -(np.power(v-sigma, 2))/(2*np.power(mu, 2))
res = math.log((1/(np.sqrt(2*np.pi)*mu)))+exponent
return res
for x in features:
prob = {}
for k in self.Pc:
temp = math.log(self.Pc[k])
for i in range(x.shape[0]):
if featuretype[i] == 1:
n, p = self.Pxc[(i, k)]
temp += gauss(x[i], n, p)
else:
temp += math.log(self.Pxc[(i, x[i], k)])
prob[k] = temp
pred.append(max(prob, key=prob.get))
pred = np.array(pred).reshape([features.shape[0], 1])
print(pred)
return pred
def main():
# 加载训练集和测试集
train_data,train_label,test_data,test_label=load_and_process_data()
feature_type=[0,1,1,1,1,1,1,1] #表示特征的数据类型,0表示离散型,1表示连续型
train_data, train_label, test_data, test_label = load_and_process_data()
feature_type = [0, 1, 1, 1, 1, 1, 1, 1] # 表示特征的数据类型,0表示离散型,1表示连续型
Nayes=NaiveBayes()
Nayes.fit(train_data,train_label,feature_type) # 在训练集上计算先验概率和条件概率
Nayes = NaiveBayes()
Nayes.fit(train_data, train_label, feature_type) # 在训练集上计算先验概率和条件概率
pred=Nayes.predict(test_data,feature_type) # 得到测试集上的预测结果
pred = Nayes.predict(test_data, feature_type) # 得到测试集上的预测结果
# 计算准确率Acc及多分类的F1-score
print("Acc: "+str(get_acc(test_label,pred)))
print("macro-F1: "+str(get_macro_F1(test_label,pred)))
print("micro-F1: "+str(get_micro_F1(test_label,pred)))
print("Acc: " + str(get_acc(test_label, pred)))
print("macro-F1: " + str(get_macro_F1(test_label, pred)))
print("micro-F1: " + str(get_micro_F1(test_label, pred)))
main()
\ No newline at end of file
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment