parent
810a213a0f
commit
072f435620
@ -0,0 +1,191 @@
|
||||
'''
|
||||
Author: SJ2050
|
||||
Date: 2022-01-21 16:57:04
|
||||
LastEditTime: 2022-01-22 10:58:38
|
||||
Version: v0.0.1
|
||||
Description: Full connection multi-layer perceptron class declaration.
|
||||
Copyright © 2022 SJ2050
|
||||
'''
|
||||
import numpy as np
|
||||
from sklearn.metrics import accuracy_score
|
||||
|
||||
class FullConnectionMLP:
|
||||
# nodes_size: each layers size.
|
||||
# activation_func: activation function of hidden layers
|
||||
# (only support `sigmoid` now).
|
||||
# output_activation_func: activation function of output layer.
|
||||
# If value is `sigmoid`, then error function
|
||||
# will use mean square loss function. If value
|
||||
# is `softmax`, then cross entropy loss function
|
||||
# will be used.
|
||||
def __init__(self, nodes_size,
|
||||
activation_func='sigmoid',
|
||||
output_activation_func='softmax'):
|
||||
self.nodes_size = nodes_size
|
||||
self.activation_func = activation_func
|
||||
self.output_activation_func = output_activation_func
|
||||
|
||||
total_layer_num = len(nodes_size)
|
||||
self.layer_num = total_layer_num
|
||||
self.layers = [np.array([]) for _ in range(total_layer_num)]
|
||||
self.w = []
|
||||
self.b = []
|
||||
# initialize weights and biases
|
||||
for i in range(0, total_layer_num-1):
|
||||
self.w.append(np.random.random((nodes_size[i], nodes_size[i+1]))*2-1)
|
||||
self.b.append(np.zeros(nodes_size[i+1]))
|
||||
|
||||
def __activate_func(self, activation_name):
|
||||
if activation_name == 'sigmoid':
|
||||
y_func = lambda x: 1/(1+np.exp(-x))
|
||||
dy_func = lambda y: y*(1-y)
|
||||
elif activation_name == 'softmax':
|
||||
y_func = lambda x: np.exp(x)/np.sum(np.exp(x), axis=1, keepdims=True)
|
||||
dy_func = lambda yi, yj, equal: yi*(1-yi) if equal else -yi*yj
|
||||
else:
|
||||
raise RuntimeError('Unsupported activation name!!')
|
||||
|
||||
return (y_func, dy_func)
|
||||
|
||||
def forward(self, X_input):
|
||||
# for input layer
|
||||
self.layers[0] = X_input
|
||||
|
||||
# for hidden layers
|
||||
X = X_input
|
||||
(y_func, _) = self.__activate_func(self.activation_func)
|
||||
for i in range(0, self.layer_num - 2):
|
||||
w = self.w[i]
|
||||
b = self.b[i]
|
||||
X = y_func(np.matmul(X, w)+b)
|
||||
self.layers[i+1] = X
|
||||
|
||||
# for output layer
|
||||
(y_func, _) = self.__activate_func(self.output_activation_func)
|
||||
w = self.w[-1]
|
||||
b = self.b[-1]
|
||||
X = y_func(np.matmul(X, w)+b)
|
||||
self.layers[-1] = X
|
||||
|
||||
def backward(self, X_input, label_tags, eps = 0.1):
|
||||
t = label_tags
|
||||
|
||||
self.forward(X_input)
|
||||
D = []
|
||||
# delta of output layer
|
||||
x = self.layers[-2]
|
||||
y = self.layers[-1]
|
||||
if (self.output_activation_func == 'sigmoid'):
|
||||
# use mean square loss function
|
||||
(_, dy_func) = self.__activate_func(self.output_activation_func)
|
||||
d = dy_func(y)*(t-y)
|
||||
elif (self.output_activation_func == 'softmax'):
|
||||
# use cross entropy loss function
|
||||
(_, dy_func) = self.__activate_func(self.output_activation_func)
|
||||
d = (t-y)
|
||||
else:
|
||||
raise RuntimeError('Unsupported output_activation name!!')
|
||||
D.insert(0, d)
|
||||
|
||||
if (self.activation_func == 'sigmoid'):
|
||||
(_, dy_func) = self.__activate_func(self.activation_func)
|
||||
for j in range(self.layer_num-2, 0, -1):
|
||||
i = j - 1
|
||||
y = self.layers[j]
|
||||
w = self.w[j]
|
||||
d = dy_func(y)*np.matmul(d, w.T)
|
||||
D.insert(0, d)
|
||||
else:
|
||||
raise RuntimeError('activation function of hidden layer only support sigmoid now!')
|
||||
|
||||
# update weights and biases
|
||||
for j in range(self.layer_num-1, 0, -1):
|
||||
i = j - 1
|
||||
x = self.layers[i]
|
||||
d = D[i]
|
||||
self.w[i] += eps*np.matmul(x.T, d)/x.shape[0]
|
||||
self.b[i] += eps*np.sum(d, axis=0)/x.shape[0]
|
||||
|
||||
def evaluate(self, label_tags):
|
||||
output_layer = self.layers[-1]
|
||||
loss = np.mean(0.5*np.linalg.norm(output_layer - label_tags, axis=1)**2)
|
||||
|
||||
y_pred = np.argmax(output_layer, axis=1)
|
||||
y_true = np.argmax(label_tags, axis=1)
|
||||
acc = accuracy_score(y_true, y_pred)
|
||||
|
||||
return (loss, acc)
|
||||
|
||||
def train(self, X_input, label_tags, eps=0.1, iter_num=2000, eval_num=100, batch_size=100):
|
||||
print('Training...')
|
||||
for i in range(iter_num):
|
||||
if (batch_size < 0):
|
||||
batch_input = X_input
|
||||
batch_tags = label_tags
|
||||
else:
|
||||
index = np.random.randint(0, X_input.shape[0], batch_size)
|
||||
batch_input = X_input[index]
|
||||
batch_tags = label_tags[index]
|
||||
self.backward(batch_input, batch_tags, eps)
|
||||
|
||||
if (i+1) % eval_num == 0:
|
||||
self.forward(X_input)
|
||||
loss, acc = self.evaluate(label_tags)
|
||||
print(f'{i+1}th training of {iter_num}: Loss={loss}, acc={acc}.')
|
||||
print('Training finished!')
|
||||
return self.layers[-1]
|
||||
|
||||
def predict(self, X_input):
|
||||
self.forward(X_input)
|
||||
return self.layers[-1]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from sklearn.datasets import load_digits
|
||||
from sklearn.metrics import confusion_matrix
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# load data
|
||||
digits = load_digits()
|
||||
X = digits.data
|
||||
Y = digits.target
|
||||
X -= X.min()
|
||||
X /= X.max()
|
||||
|
||||
x_train = X[:-500]
|
||||
y_train = Y[:-500]
|
||||
y_train_tags = np.zeros((x_train.shape[0], 10))
|
||||
for i in range(10):
|
||||
y_train_tags[np.where(y_train == i), i] = 1
|
||||
|
||||
# initialize NN model
|
||||
nodes_size = [64, 100, 10]
|
||||
fc_mlp = FullConnectionMLP(nodes_size, activation_func='sigmoid',
|
||||
output_activation_func='softmax')
|
||||
pred_train = fc_mlp.train(x_train, y_train_tags, eps=0.1, iter_num=20000,
|
||||
eval_num=100, batch_size=100)
|
||||
(loss, acc) = fc_mlp.evaluate(y_train_tags)
|
||||
print('--------------------------------')
|
||||
print(f'train: Loss={loss}, acc={acc}.')
|
||||
|
||||
x_test = X[-500:]
|
||||
y_test = Y[-500:]
|
||||
y_test_tags = np.zeros((x_test.shape[0], 10))
|
||||
for i in range(10):
|
||||
y_test_tags[np.where(y_test == i), i] = 1
|
||||
|
||||
y_res = fc_mlp.predict(x_test)
|
||||
y_test_pred = np.argmax(y_res, axis=1)
|
||||
# print(f'aa {y_test_pred[0]}; {y_res[0]}')
|
||||
# print(f'bb {y_test_pred[1]}; {y_res[1]}')
|
||||
(loss, acc) = fc_mlp.evaluate(y_test_tags)
|
||||
print('--------------------------------')
|
||||
print(f'predict: Loss={loss}, acc={acc}.')
|
||||
|
||||
cm = confusion_matrix(y_test, y_test_pred)
|
||||
plt.matshow(cm)
|
||||
plt.title(u'Confusion Matrix')
|
||||
plt.colorbar()
|
||||
plt.ylabel(u'Groundtruth')
|
||||
plt.xlabel(u'Predict')
|
||||
plt.show()
|
@ -0,0 +1,155 @@
|
||||
'''
|
||||
Author: SJ2050
|
||||
Date: 2022-01-16 17:16:10
|
||||
LastEditTime: 2022-01-22 10:09:33
|
||||
Version: v0.0.1
|
||||
Description: Forward-Propagation and Back-Propagation algorithms of MLP using function.
|
||||
Copyright © 2022 SJ2050
|
||||
'''
|
||||
import numpy as np
|
||||
from sklearn.metrics import accuracy_score
|
||||
|
||||
# NN = {'nodes_size': [ ],
|
||||
# 'layers': [ ],
|
||||
# 'w': [ ],
|
||||
# 'b': [ ]}
|
||||
|
||||
def sigmoid(x):
|
||||
y = 1 / (1+np.exp(-x))
|
||||
return y
|
||||
|
||||
def sigmoid_derivative(y):
|
||||
return y * (1-y)
|
||||
|
||||
def initialize(nodes_size):
|
||||
NN = {}
|
||||
NN['nodes_size'] = nodes_size
|
||||
total_layer_num = len(nodes_size)
|
||||
NN['layers'] = [np.array([]) for _ in range(total_layer_num)]
|
||||
NN['w'] = []
|
||||
NN['b'] = []
|
||||
|
||||
for i in range(0, total_layer_num-1):
|
||||
NN['w'].append(np.random.random((nodes_size[i], nodes_size[i+1]))*2-1)
|
||||
NN['b'].append(np.zeros(nodes_size[i+1]))
|
||||
|
||||
return NN
|
||||
|
||||
def forward_propagation(NN, input_layer):
|
||||
layer_num = len(NN['nodes_size'])
|
||||
NN['layers'][0] = input_layer
|
||||
|
||||
x = input_layer
|
||||
for i in range(0, layer_num-1):
|
||||
w = NN['w'][i]
|
||||
b = NN['b'][i]
|
||||
x = sigmoid(np.matmul(x, w)+b)
|
||||
NN['layers'][i+1] = x
|
||||
|
||||
def backward_propagation(NN, input_layer, tags, eps = 0.01):
|
||||
layer_num = len(NN['nodes_size'])
|
||||
t = tags
|
||||
|
||||
forward_propagation(NN, input_layer)
|
||||
D = []
|
||||
# compute delta
|
||||
for j in range(layer_num-1, 0, -1):
|
||||
i = j - 1
|
||||
x = NN['layers'][i]
|
||||
y = NN['layers'][j]
|
||||
if j == layer_num - 1:
|
||||
d = sigmoid_derivative(y)*(t-y)
|
||||
else:
|
||||
w = NN['w'][j]
|
||||
d = sigmoid_derivative(y)*np.matmul(d, w.T)
|
||||
|
||||
D.insert(0, d)
|
||||
|
||||
# update weights and biases
|
||||
for j in range(layer_num-1, 0, -1):
|
||||
i = j - 1
|
||||
x = NN['layers'][i]
|
||||
d = D[i]
|
||||
NN['w'][i] += eps*np.matmul(x.T, d) / x.shape[0]
|
||||
NN['b'][i] += eps*np.sum(d, axis=0) / x.shape[0]
|
||||
|
||||
def train(NN, input_layer, tags, eps, iter_num, eval_num, batch_size):
|
||||
print('Training...')
|
||||
for i in range(iter_num):
|
||||
# print(f'{i+1}th training of {iter_num}.')
|
||||
if (batch_size < 0):
|
||||
batch_input = input_layer
|
||||
batch_tags = tags
|
||||
else:
|
||||
index = np.random.randint(0, input_layer.shape[0], batch_size)
|
||||
batch_input = input_layer[index]
|
||||
batch_tags = tags[index]
|
||||
backward_propagation(NN, batch_input, batch_tags, eps)
|
||||
|
||||
if (i+1) % eval_num == 0:
|
||||
forward_propagation(NN, input_layer)
|
||||
loss, acc = evaluate(NN, tags)
|
||||
print(f'{i+1}th training of {iter_num}: Loss={loss}, acc={acc}.')
|
||||
print('Training finished!')
|
||||
return NN['layers'][-1]
|
||||
|
||||
def predict(NN, input_layer):
|
||||
forward_propagation(NN, input_layer)
|
||||
return NN['layers'][-1]
|
||||
|
||||
def evaluate(NN, tags):
|
||||
output_layer = NN['layers'][-1]
|
||||
loss = np.mean(0.5*np.linalg.norm(output_layer - tags, axis=1)**2)
|
||||
|
||||
y_pred = np.argmax(output_layer, axis=1)
|
||||
y_true = np.argmax(tags, axis=1)
|
||||
acc = accuracy_score(y_true, y_pred)
|
||||
|
||||
# print(f'Loss = {loss}, acc = {acc}.')
|
||||
return (loss, acc)
|
||||
|
||||
if __name__ == '__main__':
|
||||
from sklearn import datasets, linear_model
|
||||
import matplotlib.pyplot as plt
|
||||
# generate sample data
|
||||
np.random.seed(0)
|
||||
X, y_true = datasets.make_moons(400, noise=0.20)
|
||||
tags = np.zeros((X.shape[0], 2))
|
||||
tags[np.where(y_true == 0), 0] = 1
|
||||
tags[np.where(y_true == 1), 1] = 1
|
||||
|
||||
x_train = X[:200]
|
||||
y_train_tags = tags[:200]
|
||||
x_test = X[200:]
|
||||
y_test_tags = tags[200:]
|
||||
# plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Spectral)
|
||||
# plt.show()
|
||||
|
||||
# initialize NN model
|
||||
nodes_size = [2, 8, 2]
|
||||
NN = initialize(nodes_size)
|
||||
# train
|
||||
y_res = train(NN, x_train, y_train_tags,
|
||||
eps=0.1, iter_num=100000,
|
||||
eval_num=1000, batch_size=-1)
|
||||
y_train_pred = np.argmax(y_res, axis=1)
|
||||
(loss, acc) = evaluate(NN, y_train_tags)
|
||||
print('--------------------------------')
|
||||
print(f'train: Loss={loss}, acc={acc}.')
|
||||
|
||||
# predict result
|
||||
y_res = predict(NN, x_test)
|
||||
y_test_pred = np.argmax(y_res, axis=1)
|
||||
|
||||
# plot
|
||||
plt.scatter(x_test[:, 0], x_test[:, 1], c=y_true[200:], cmap=plt.cm.Spectral)
|
||||
plt.title("ground truth")
|
||||
plt.show()
|
||||
|
||||
plt.scatter(x_test[:, 0], x_test[:, 1], c=y_test_pred, cmap=plt.cm.Spectral)
|
||||
plt.title("predicted")
|
||||
plt.show()
|
||||
|
||||
(loss, acc) = evaluate(NN, y_test_tags)
|
||||
print('--------------------------------')
|
||||
print(f'predict: Loss={loss}, acc={acc}.')
|
@ -0,0 +1,41 @@
|
||||
'''
|
||||
Author: SJ2050
|
||||
Date: 2022-01-21 12:01:47
|
||||
LastEditTime: 2022-01-22 11:04:40
|
||||
Version: v0.0.1
|
||||
Description: Full connection multi-layer perceptron using sklearn.
|
||||
Copyright © 2022 SJ2050
|
||||
'''
|
||||
import numpy as np
|
||||
from sklearn.neural_network import MLPClassifier
|
||||
from sklearn.datasets import load_digits
|
||||
from sklearn.metrics import confusion_matrix
|
||||
from sklearn.metrics import accuracy_score
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# load data
|
||||
digits = load_digits()
|
||||
X = digits.data
|
||||
Y = digits.target
|
||||
X -= X.min()
|
||||
X /= X.max()
|
||||
|
||||
x_train = X[:-500]
|
||||
y_train = Y[:-500]
|
||||
mlp = MLPClassifier(hidden_layer_sizes=(100), max_iter=10000)
|
||||
mlp.fit(x_train,y_train)
|
||||
|
||||
x_test = X[-500:]
|
||||
y_test = Y[-500:]
|
||||
predictions = mlp.predict(x_test)
|
||||
acc = accuracy_score(y_test, predictions)
|
||||
print('--------------------------------')
|
||||
print(f'predict: acc = {acc}.')
|
||||
|
||||
cm = confusion_matrix(y_test, predictions)
|
||||
plt.matshow(cm)
|
||||
plt.title(u'Confusion Matrix')
|
||||
plt.colorbar()
|
||||
plt.ylabel(u'Groundtruth')
|
||||
plt.xlabel(u'Predict')
|
||||
plt.show()
|
After Width: | Height: | Size: 1.9 MiB |
After Width: | Height: | Size: 51 KiB |
After Width: | Height: | Size: 52 KiB |
After Width: | Height: | Size: 16 KiB |
After Width: | Height: | Size: 15 KiB |
After Width: | Height: | Size: 15 KiB |
Loading…
Reference in new issue