Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
YU Xiyue
AI_lab
Commits
33090034
Commit
33090034
authored
Jul 10, 2021
by
YU Xiyue
Browse files
2
parent
d2d1073a
Changes
3
Hide whitespace changes
Inline
Side-by-side
.idea/vcs.xml
0 → 100644
View file @
33090034
<?xml version="1.0" encoding="UTF-8"?>
<project
version=
"4"
>
<component
name=
"VcsDirectoryMappings"
>
<mapping
directory=
"$PROJECT_DIR$"
vcs=
"Git"
/>
</component>
</project>
\ No newline at end of file
src1/linearclassification.py
View file @
33090034
...
...
@@ -22,16 +22,16 @@ class LinearClassification:
"""'
需要你实现的部分
"""
self
.
W
=
np
.
zeros
([
1
,
9
])
N
=
train_features
.
shape
[
1
]
self
.
W
=
np
.
zeros
([
1
,
N
+
1
])
lc
=
np
.
ones
([
train_features
.
shape
[
0
],
1
])
X
=
np
.
concatenate
((
lc
,
train_features
),
axis
=
1
)
for
n
in
range
(
self
.
epochs
):
y
=
np
.
dot
(
X
,
np
.
transpose
(
self
.
W
))
delta
=
np
.
transpose
(
y
-
train_labels
)
derivative
=
(
np
.
dot
(
delta
,
X
)
+
self
.
Lambda
*
self
.
W
)
/
train_labels
.
size
derivative
=
2
*
(
np
.
dot
(
delta
,
X
)
+
self
.
Lambda
*
self
.
W
)
/
train_labels
.
size
self
.
W
=
self
.
W
-
self
.
lr
*
derivative
'''根据训练好的参数对测试数据test_features进行预测,返回预测结果
预测结果的数据类型应为np数组,shape=(test_num,1) test_num为测试数据的数目'''
def
predict
(
self
,
test_features
):
...
...
src1/nBayesClassifier.py
View file @
33090034
...
...
@@ -2,25 +2,52 @@ import numpy as np
import
math
from
collections
import
Counter
from
process_data
import
load_and_process_data
from
evaluation
import
get_micro_F1
,
get_macro_F1
,
get_acc
from
evaluation
import
get_micro_F1
,
get_macro_F1
,
get_acc
class
NaiveBayes
:
'''
参数初始化
"""
参数初始化
Pc: P(c) 每个类别c的概率分布
Pxc: P(c|x) 每个特征的条件概率
'''
"""
def
__init__
(
self
):
self
.
Pc
=
{}
self
.
Pxc
=
{}
self
.
Pc
=
{}
self
.
Pxc
=
{}
'''
通过训练集计算先验概率分布p(c)和条件概率分布p(x|c)
建议全部取log,避免相乘为0
'''
def
fit
(
self
,
traindata
,
trainlabel
,
featuretype
):
'''
def
fit
(
self
,
traindata
,
trainlabel
,
featuretype
):
"""
需要你实现的部分
'''
"""
c
=
Counter
(
trainlabel
.
flatten
())
N
=
len
(
c
)
for
key
,
val
in
c
.
items
():
self
.
Pc
[
key
]
=
(
val
+
1
)
/
(
trainlabel
.
size
+
N
)
for
d
in
range
(
traindata
.
shape
[
1
]):
# 对各个维度分别进行处理
column
=
traindata
[:,
d
]
if
featuretype
[
d
]
==
1
:
xc
=
{}
for
xi
,
cy
in
zip
(
column
.
flatten
(),
trainlabel
.
flatten
()):
if
cy
not
in
xc
:
xc
[
cy
]
=
[]
xc
[
cy
].
append
(
xi
)
for
key
,
val
in
xc
.
items
():
temp
=
np
.
array
(
val
)
self
.
Pxc
[(
d
,
key
)]
=
(
np
.
average
(
temp
),
np
.
var
(
temp
))
else
:
xc
=
{}
Ni
=
len
(
np
.
unique
(
column
))
for
xi
,
cy
in
zip
(
column
.
flatten
(),
trainlabel
.
flatten
()):
if
(
xi
,
cy
)
not
in
xc
:
xc
[(
xi
,
cy
)]
=
0
xc
[(
xi
,
cy
)]
+=
1
for
key
,
val
in
xc
.
items
():
self
.
Pxc
[(
d
,
key
[
0
],
key
[
1
])]
=
(
val
+
1
)
/
(
c
[
key
[
1
]]
+
Ni
)
'''
...
...
@@ -28,26 +55,49 @@ class NaiveBayes:
返回预测结果,预测结果的数据类型应为np数组,shape=(test_num,1) test_num为测试数据的数目
feature_type为0-1数组,表示特征的数据类型,0表示离散型,1表示连续型
'''
def
predict
(
self
,
features
,
featuretype
):
'''
def
predict
(
self
,
features
,
featuretype
):
"""
需要你实现的部分
'''
"""
N
=
features
.
shape
[
0
]
pred
=
[]
def
gauss
(
v
,
sigma
,
mu
):
exponent
=
-
(
np
.
power
(
v
-
sigma
,
2
))
/
(
2
*
np
.
power
(
mu
,
2
))
res
=
math
.
log
((
1
/
(
np
.
sqrt
(
2
*
np
.
pi
)
*
mu
)))
+
exponent
return
res
for
x
in
features
:
prob
=
{}
for
k
in
self
.
Pc
:
temp
=
math
.
log
(
self
.
Pc
[
k
])
for
i
in
range
(
x
.
shape
[
0
]):
if
featuretype
[
i
]
==
1
:
n
,
p
=
self
.
Pxc
[(
i
,
k
)]
temp
+=
gauss
(
x
[
i
],
n
,
p
)
else
:
temp
+=
math
.
log
(
self
.
Pxc
[(
i
,
x
[
i
],
k
)])
prob
[
k
]
=
temp
pred
.
append
(
max
(
prob
,
key
=
prob
.
get
))
pred
=
np
.
array
(
pred
).
reshape
([
features
.
shape
[
0
],
1
])
print
(
pred
)
return
pred
def
main
():
# 加载训练集和测试集
train_data
,
train_label
,
test_data
,
test_label
=
load_and_process_data
()
feature_type
=
[
0
,
1
,
1
,
1
,
1
,
1
,
1
,
1
]
#
表示特征的数据类型,0表示离散型,1表示连续型
train_data
,
train_label
,
test_data
,
test_label
=
load_and_process_data
()
feature_type
=
[
0
,
1
,
1
,
1
,
1
,
1
,
1
,
1
]
#
表示特征的数据类型,0表示离散型,1表示连续型
Nayes
=
NaiveBayes
()
Nayes
.
fit
(
train_data
,
train_label
,
feature_type
)
# 在训练集上计算先验概率和条件概率
Nayes
=
NaiveBayes
()
Nayes
.
fit
(
train_data
,
train_label
,
feature_type
)
# 在训练集上计算先验概率和条件概率
pred
=
Nayes
.
predict
(
test_data
,
feature_type
)
# 得到测试集上的预测结果
pred
=
Nayes
.
predict
(
test_data
,
feature_type
)
# 得到测试集上的预测结果
# 计算准确率Acc及多分类的F1-score
print
(
"Acc: "
+
str
(
get_acc
(
test_label
,
pred
)))
print
(
"macro-F1: "
+
str
(
get_macro_F1
(
test_label
,
pred
)))
print
(
"micro-F1: "
+
str
(
get_micro_F1
(
test_label
,
pred
)))
print
(
"Acc: "
+
str
(
get_acc
(
test_label
,
pred
)))
print
(
"macro-F1: "
+
str
(
get_macro_F1
(
test_label
,
pred
)))
print
(
"micro-F1: "
+
str
(
get_micro_F1
(
test_label
,
pred
)))
main
()
\ No newline at end of file
main
()
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment