You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
192 lines
6.1 KiB
192 lines
6.1 KiB
'''
|
|
Author: SJ2050
|
|
Date: 2022-01-21 16:57:04
|
|
LastEditTime: 2022-01-22 10:58:38
|
|
Version: v0.0.1
|
|
Description: Full connection multi-layer perceptron class declaration.
|
|
Copyright © 2022 SJ2050
|
|
'''
|
|
import numpy as np
|
|
from sklearn.metrics import accuracy_score
|
|
|
|
class FullConnectionMLP:
|
|
# nodes_size: each layers size.
|
|
# activation_func: activation function of hidden layers
|
|
# (only support `sigmoid` now).
|
|
# output_activation_func: activation function of output layer.
|
|
# If value is `sigmoid`, then error function
|
|
# will use mean square loss function. If value
|
|
# is `softmax`, then cross entropy loss function
|
|
# will be used.
|
|
def __init__(self, nodes_size,
|
|
activation_func='sigmoid',
|
|
output_activation_func='softmax'):
|
|
self.nodes_size = nodes_size
|
|
self.activation_func = activation_func
|
|
self.output_activation_func = output_activation_func
|
|
|
|
total_layer_num = len(nodes_size)
|
|
self.layer_num = total_layer_num
|
|
self.layers = [np.array([]) for _ in range(total_layer_num)]
|
|
self.w = []
|
|
self.b = []
|
|
# initialize weights and biases
|
|
for i in range(0, total_layer_num-1):
|
|
self.w.append(np.random.random((nodes_size[i], nodes_size[i+1]))*2-1)
|
|
self.b.append(np.zeros(nodes_size[i+1]))
|
|
|
|
def __activate_func(self, activation_name):
|
|
if activation_name == 'sigmoid':
|
|
y_func = lambda x: 1/(1+np.exp(-x))
|
|
dy_func = lambda y: y*(1-y)
|
|
elif activation_name == 'softmax':
|
|
y_func = lambda x: np.exp(x)/np.sum(np.exp(x), axis=1, keepdims=True)
|
|
dy_func = lambda yi, yj, equal: yi*(1-yi) if equal else -yi*yj
|
|
else:
|
|
raise RuntimeError('Unsupported activation name!!')
|
|
|
|
return (y_func, dy_func)
|
|
|
|
def forward(self, X_input):
|
|
# for input layer
|
|
self.layers[0] = X_input
|
|
|
|
# for hidden layers
|
|
X = X_input
|
|
(y_func, _) = self.__activate_func(self.activation_func)
|
|
for i in range(0, self.layer_num - 2):
|
|
w = self.w[i]
|
|
b = self.b[i]
|
|
X = y_func(np.matmul(X, w)+b)
|
|
self.layers[i+1] = X
|
|
|
|
# for output layer
|
|
(y_func, _) = self.__activate_func(self.output_activation_func)
|
|
w = self.w[-1]
|
|
b = self.b[-1]
|
|
X = y_func(np.matmul(X, w)+b)
|
|
self.layers[-1] = X
|
|
|
|
def backward(self, X_input, label_tags, eps = 0.1):
|
|
t = label_tags
|
|
|
|
self.forward(X_input)
|
|
D = []
|
|
# delta of output layer
|
|
x = self.layers[-2]
|
|
y = self.layers[-1]
|
|
if (self.output_activation_func == 'sigmoid'):
|
|
# use mean square loss function
|
|
(_, dy_func) = self.__activate_func(self.output_activation_func)
|
|
d = dy_func(y)*(t-y)
|
|
elif (self.output_activation_func == 'softmax'):
|
|
# use cross entropy loss function
|
|
(_, dy_func) = self.__activate_func(self.output_activation_func)
|
|
d = (t-y)
|
|
else:
|
|
raise RuntimeError('Unsupported output_activation name!!')
|
|
D.insert(0, d)
|
|
|
|
if (self.activation_func == 'sigmoid'):
|
|
(_, dy_func) = self.__activate_func(self.activation_func)
|
|
for j in range(self.layer_num-2, 0, -1):
|
|
i = j - 1
|
|
y = self.layers[j]
|
|
w = self.w[j]
|
|
d = dy_func(y)*np.matmul(d, w.T)
|
|
D.insert(0, d)
|
|
else:
|
|
raise RuntimeError('activation function of hidden layer only support sigmoid now!')
|
|
|
|
# update weights and biases
|
|
for j in range(self.layer_num-1, 0, -1):
|
|
i = j - 1
|
|
x = self.layers[i]
|
|
d = D[i]
|
|
self.w[i] += eps*np.matmul(x.T, d)/x.shape[0]
|
|
self.b[i] += eps*np.sum(d, axis=0)/x.shape[0]
|
|
|
|
def evaluate(self, label_tags):
|
|
output_layer = self.layers[-1]
|
|
loss = np.mean(0.5*np.linalg.norm(output_layer - label_tags, axis=1)**2)
|
|
|
|
y_pred = np.argmax(output_layer, axis=1)
|
|
y_true = np.argmax(label_tags, axis=1)
|
|
acc = accuracy_score(y_true, y_pred)
|
|
|
|
return (loss, acc)
|
|
|
|
def train(self, X_input, label_tags, eps=0.1, iter_num=2000, eval_num=100, batch_size=100):
|
|
print('Training...')
|
|
for i in range(iter_num):
|
|
if (batch_size < 0):
|
|
batch_input = X_input
|
|
batch_tags = label_tags
|
|
else:
|
|
index = np.random.randint(0, X_input.shape[0], batch_size)
|
|
batch_input = X_input[index]
|
|
batch_tags = label_tags[index]
|
|
self.backward(batch_input, batch_tags, eps)
|
|
|
|
if (i+1) % eval_num == 0:
|
|
self.forward(X_input)
|
|
loss, acc = self.evaluate(label_tags)
|
|
print(f'{i+1}th training of {iter_num}: Loss={loss}, acc={acc}.')
|
|
print('Training finished!')
|
|
return self.layers[-1]
|
|
|
|
def predict(self, X_input):
|
|
self.forward(X_input)
|
|
return self.layers[-1]
|
|
|
|
|
|
if __name__ == '__main__':
|
|
from sklearn.datasets import load_digits
|
|
from sklearn.metrics import confusion_matrix
|
|
import matplotlib.pyplot as plt
|
|
|
|
# load data
|
|
digits = load_digits()
|
|
X = digits.data
|
|
Y = digits.target
|
|
X -= X.min()
|
|
X /= X.max()
|
|
|
|
x_train = X[:-500]
|
|
y_train = Y[:-500]
|
|
y_train_tags = np.zeros((x_train.shape[0], 10))
|
|
for i in range(10):
|
|
y_train_tags[np.where(y_train == i), i] = 1
|
|
|
|
# initialize NN model
|
|
nodes_size = [64, 100, 10]
|
|
fc_mlp = FullConnectionMLP(nodes_size, activation_func='sigmoid',
|
|
output_activation_func='softmax')
|
|
pred_train = fc_mlp.train(x_train, y_train_tags, eps=0.1, iter_num=20000,
|
|
eval_num=100, batch_size=100)
|
|
(loss, acc) = fc_mlp.evaluate(y_train_tags)
|
|
print('--------------------------------')
|
|
print(f'train: Loss={loss}, acc={acc}.')
|
|
|
|
x_test = X[-500:]
|
|
y_test = Y[-500:]
|
|
y_test_tags = np.zeros((x_test.shape[0], 10))
|
|
for i in range(10):
|
|
y_test_tags[np.where(y_test == i), i] = 1
|
|
|
|
y_res = fc_mlp.predict(x_test)
|
|
y_test_pred = np.argmax(y_res, axis=1)
|
|
# print(f'aa {y_test_pred[0]}; {y_res[0]}')
|
|
# print(f'bb {y_test_pred[1]}; {y_res[1]}')
|
|
(loss, acc) = fc_mlp.evaluate(y_test_tags)
|
|
print('--------------------------------')
|
|
print(f'predict: Loss={loss}, acc={acc}.')
|
|
|
|
cm = confusion_matrix(y_test, y_test_pred)
|
|
plt.matshow(cm)
|
|
plt.title(u'Confusion Matrix')
|
|
plt.colorbar()
|
|
plt.ylabel(u'Groundtruth')
|
|
plt.xlabel(u'Predict')
|
|
plt.show()
|