Compare commits
5 Commits
b39d143adc
...
44f6362737
Author | SHA1 | Date |
---|---|---|
![]() |
44f6362737 | 4 years ago |
![]() |
16de235b4c | 4 years ago |
![]() |
fcccc4f1d9 | 4 years ago |
![]() |
f48cfe8bdf | 4 years ago |
![]() |
2c35d2522b | 4 years ago |
@ -0,0 +1,39 @@
|
|||||||
|
'''
|
||||||
|
Author: SJ2050
|
||||||
|
Date: 2021-11-21 17:22:02
|
||||||
|
LastEditTime: 2021-11-21 22:05:09
|
||||||
|
Version: v0.0.1
|
||||||
|
Description: Use softmax regression method to solve multiclass classification problems.
|
||||||
|
Copyright © 2021 SJ2050
|
||||||
|
'''
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn.datasets import load_digits
|
||||||
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
from sklearn.metrics import confusion_matrix
|
||||||
|
from sklearn.metrics import accuracy_score
|
||||||
|
from softmax_regression import SoftmaxRegression
|
||||||
|
|
||||||
|
# load data
|
||||||
|
digits = load_digits()
|
||||||
|
x_train = digits.data[:-500]
|
||||||
|
y_train = digits.target[:-500]
|
||||||
|
softmax_reg = SoftmaxRegression()
|
||||||
|
softmax_reg.train(x_train, y_train)
|
||||||
|
|
||||||
|
# plot confusion matrix
|
||||||
|
x_test = digits.data[-500:]
|
||||||
|
y_test = digits.target[-500:]
|
||||||
|
pred_train = softmax_reg.predict(x_train)
|
||||||
|
pred_test = softmax_reg.predict(x_test)
|
||||||
|
|
||||||
|
print(f'accuracy train = {accuracy_score(y_train, pred_train)}')
|
||||||
|
print(f'accuracy test = {accuracy_score(y_test, pred_test)}')
|
||||||
|
|
||||||
|
cm = confusion_matrix(y_test, pred_test)
|
||||||
|
plt.matshow(cm)
|
||||||
|
plt.title(u'Confusion Matrix')
|
||||||
|
plt.colorbar()
|
||||||
|
plt.ylabel(u'Groundtruth')
|
||||||
|
plt.xlabel(u'Predict')
|
||||||
|
plt.show()
|
@ -0,0 +1,38 @@
|
|||||||
|
'''
|
||||||
|
Author: SJ2050
|
||||||
|
Date: 2021-11-21 18:24:41
|
||||||
|
LastEditTime: 2021-11-21 18:50:47
|
||||||
|
Version: v0.0.1
|
||||||
|
Description: Use sklearn to solve logistic regression problems.
|
||||||
|
Copyright © 2021 SJ2050
|
||||||
|
'''
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn.datasets import load_digits
|
||||||
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
from sklearn.metrics import confusion_matrix
|
||||||
|
from sklearn.metrics import accuracy_score
|
||||||
|
|
||||||
|
# load data
|
||||||
|
digits = load_digits()
|
||||||
|
x_train = digits.data[:-500]
|
||||||
|
y_train = digits.target[:-500]
|
||||||
|
|
||||||
|
log_reg=LogisticRegression()
|
||||||
|
log_reg.fit(x_train, y_train)
|
||||||
|
|
||||||
|
# plot confusion matrix
|
||||||
|
x_test = digits.data[-500:]
|
||||||
|
y_test = digits.target[-500:]
|
||||||
|
pred_train = log_reg.predict(x_train)
|
||||||
|
pred_test = log_reg.predict(x_test)
|
||||||
|
|
||||||
|
print(f'accuracy train = {accuracy_score(y_train, pred_train)}')
|
||||||
|
print(f'accuracy test = {accuracy_score(y_test, pred_test)}')
|
||||||
|
|
||||||
|
cm = confusion_matrix(y_test, pred_test)
|
||||||
|
plt.matshow(cm)
|
||||||
|
plt.title(u'Confusion Matrix')
|
||||||
|
plt.colorbar()
|
||||||
|
plt.ylabel(u'Groundtruth')
|
||||||
|
plt.xlabel(u'Predict')
|
||||||
|
plt.show()
|
@ -0,0 +1,106 @@
|
|||||||
|
'''
|
||||||
|
Author: SJ2050
|
||||||
|
Date: 2021-11-21 17:06:31
|
||||||
|
LastEditTime: 2021-11-21 22:29:52
|
||||||
|
Version: v0.0.1
|
||||||
|
Description: Softmax regerssion.
|
||||||
|
Copyright © 2021 SJ2050
|
||||||
|
'''
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
def softmax(Z):
|
||||||
|
assert(len(Z.shape) == 2 and Z.shape[1] == 1, 'Z should be a column vector!')
|
||||||
|
Z_exp = np.exp(Z)
|
||||||
|
return Z_exp/Z_exp.sum(0, keepdims=True)
|
||||||
|
|
||||||
|
class SoftmaxRegression():
|
||||||
|
def __init__(self):
|
||||||
|
self.is_trained = False
|
||||||
|
pass
|
||||||
|
|
||||||
|
def train(self, train_data, train_label, num_iterations=150, alpha=0.01):
|
||||||
|
self.train_data = train_data
|
||||||
|
self.train_label = train_label
|
||||||
|
self.classes = np.unique(self.train_label)
|
||||||
|
self.out_dim = len(self.classes)
|
||||||
|
|
||||||
|
train_data_num, self.inp_dim = np.shape(self.train_data)
|
||||||
|
self.weights = np.random.random((self.inp_dim, self.out_dim))
|
||||||
|
self.b = np.random.random((self.out_dim, 1))
|
||||||
|
|
||||||
|
y = lambda k, cls: 1 if k == cls else 0
|
||||||
|
weights_grad = [[] for i in range(self.out_dim)]
|
||||||
|
for j in range(num_iterations):
|
||||||
|
# print(f'iteration: {j}')
|
||||||
|
data_index = list(range(train_data_num))
|
||||||
|
for i in range(train_data_num):
|
||||||
|
rand_index = int(np.random.uniform(0, len(data_index)))
|
||||||
|
# x_vec = np.vstack(self.train_data[rand_index])
|
||||||
|
x_vec = self.train_data[rand_index].reshape(-1, 1)
|
||||||
|
softmax_values = softmax(np.dot(self.weights.T, x_vec)+self.b)[:, 0]
|
||||||
|
label =self.train_label[rand_index]
|
||||||
|
cls = np.argwhere(self.classes == label)[0][0]
|
||||||
|
error = lambda k: y(k, cls)-softmax_values[k]
|
||||||
|
|
||||||
|
for k in range(self.out_dim):
|
||||||
|
err = error(k)
|
||||||
|
# self.weights += np.pad(alpha*err*x_vec, ((0, 0), (k, self.out_dim-1-k)), \
|
||||||
|
# 'constant', constant_values=0)
|
||||||
|
weights_grad[k] = (alpha*err*x_vec)[:, 0]
|
||||||
|
# print(self.weights)
|
||||||
|
self.b[k, 0] += alpha*err
|
||||||
|
self.weights += np.array(weights_grad).T
|
||||||
|
|
||||||
|
del(data_index[rand_index])
|
||||||
|
|
||||||
|
self.is_trained = True
|
||||||
|
|
||||||
|
|
||||||
|
def predict(self, predict_data):
|
||||||
|
if self.is_trained:
|
||||||
|
predict_num = len(predict_data)
|
||||||
|
result = np.empty(predict_num)
|
||||||
|
for i in range(predict_num):
|
||||||
|
# x_vec = np.vstack(predict_data[i])
|
||||||
|
x_vec = predict_data[i].reshape(-1, 1)
|
||||||
|
result[i] = self.classes[np.argmax(softmax(np.dot(self.weights.T, x_vec)+self.b))]
|
||||||
|
|
||||||
|
return result
|
||||||
|
else:
|
||||||
|
print('Need training before predicting!!')
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# test binary classsfication
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import sklearn.datasets
|
||||||
|
from sklearn.metrics import accuracy_score
|
||||||
|
|
||||||
|
def plot_decision_boundary(predict_func, data, label):
|
||||||
|
"""画出结果图
|
||||||
|
Args:
|
||||||
|
pred_func (callable): 预测函数
|
||||||
|
data (numpy.ndarray): 训练数据集合
|
||||||
|
label (numpy.ndarray): 训练数据标签
|
||||||
|
"""
|
||||||
|
x_min, x_max = data[:, 0].min() - .5, data[:, 0].max() + .5
|
||||||
|
y_min, y_max = data[:, 1].min() - .5, data[:, 1].max() + .5
|
||||||
|
h = 0.01
|
||||||
|
|
||||||
|
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
|
||||||
|
|
||||||
|
Z = predict_func(np.c_[xx.ravel(), yy.ravel()])
|
||||||
|
Z = Z.reshape(xx.shape)
|
||||||
|
|
||||||
|
plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral) #画出登高线并填充
|
||||||
|
plt.scatter(data[:, 0], data[:, 1], c=label, cmap=plt.cm.Spectral)
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
data, label = sklearn.datasets.make_moons(200, noise=0.30)
|
||||||
|
plt.scatter(data[:,0], data[:,1], c=label)
|
||||||
|
plt.title("Original Data")
|
||||||
|
|
||||||
|
softmax_reg = SoftmaxRegression()
|
||||||
|
softmax_reg.train(data, label, 200)
|
||||||
|
plot_decision_boundary(lambda x: softmax_reg.predict(x), data, label)
|
||||||
|
y_train = softmax_reg.predict(data)
|
||||||
|
print(f'accuracy train = {accuracy_score(label, y_train)}')
|
After Width: | Height: | Size: 21 KiB |
After Width: | Height: | Size: 22 KiB |
After Width: | Height: | Size: 17 KiB |
After Width: | Height: | Size: 17 KiB |
Loading…
Reference in new issue