You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

192 lines
6.1 KiB

'''
Author: SJ2050
Date: 2022-01-21 16:57:04
LastEditTime: 2022-01-22 10:58:38
Version: v0.0.1
Description: Full connection multi-layer perceptron class declaration.
Copyright © 2022 SJ2050
'''
import numpy as np
from sklearn.metrics import accuracy_score
class FullConnectionMLP:
# nodes_size: each layers size.
# activation_func: activation function of hidden layers
# (only support `sigmoid` now).
# output_activation_func: activation function of output layer.
# If value is `sigmoid`, then error function
# will use mean square loss function. If value
# is `softmax`, then cross entropy loss function
# will be used.
def __init__(self, nodes_size,
activation_func='sigmoid',
output_activation_func='softmax'):
self.nodes_size = nodes_size
self.activation_func = activation_func
self.output_activation_func = output_activation_func
total_layer_num = len(nodes_size)
self.layer_num = total_layer_num
self.layers = [np.array([]) for _ in range(total_layer_num)]
self.w = []
self.b = []
# initialize weights and biases
for i in range(0, total_layer_num-1):
self.w.append(np.random.random((nodes_size[i], nodes_size[i+1]))*2-1)
self.b.append(np.zeros(nodes_size[i+1]))
def __activate_func(self, activation_name):
if activation_name == 'sigmoid':
y_func = lambda x: 1/(1+np.exp(-x))
dy_func = lambda y: y*(1-y)
elif activation_name == 'softmax':
y_func = lambda x: np.exp(x)/np.sum(np.exp(x), axis=1, keepdims=True)
dy_func = lambda yi, yj, equal: yi*(1-yi) if equal else -yi*yj
else:
raise RuntimeError('Unsupported activation name!!')
return (y_func, dy_func)
def forward(self, X_input):
# for input layer
self.layers[0] = X_input
# for hidden layers
X = X_input
(y_func, _) = self.__activate_func(self.activation_func)
for i in range(0, self.layer_num - 2):
w = self.w[i]
b = self.b[i]
X = y_func(np.matmul(X, w)+b)
self.layers[i+1] = X
# for output layer
(y_func, _) = self.__activate_func(self.output_activation_func)
w = self.w[-1]
b = self.b[-1]
X = y_func(np.matmul(X, w)+b)
self.layers[-1] = X
def backward(self, X_input, label_tags, eps = 0.1):
t = label_tags
self.forward(X_input)
D = []
# delta of output layer
x = self.layers[-2]
y = self.layers[-1]
if (self.output_activation_func == 'sigmoid'):
# use mean square loss function
(_, dy_func) = self.__activate_func(self.output_activation_func)
d = dy_func(y)*(t-y)
elif (self.output_activation_func == 'softmax'):
# use cross entropy loss function
(_, dy_func) = self.__activate_func(self.output_activation_func)
d = (t-y)
else:
raise RuntimeError('Unsupported output_activation name!!')
D.insert(0, d)
if (self.activation_func == 'sigmoid'):
(_, dy_func) = self.__activate_func(self.activation_func)
for j in range(self.layer_num-2, 0, -1):
i = j - 1
y = self.layers[j]
w = self.w[j]
d = dy_func(y)*np.matmul(d, w.T)
D.insert(0, d)
else:
raise RuntimeError('activation function of hidden layer only support sigmoid now!')
# update weights and biases
for j in range(self.layer_num-1, 0, -1):
i = j - 1
x = self.layers[i]
d = D[i]
self.w[i] += eps*np.matmul(x.T, d)/x.shape[0]
self.b[i] += eps*np.sum(d, axis=0)/x.shape[0]
def evaluate(self, label_tags):
output_layer = self.layers[-1]
loss = np.mean(0.5*np.linalg.norm(output_layer - label_tags, axis=1)**2)
y_pred = np.argmax(output_layer, axis=1)
y_true = np.argmax(label_tags, axis=1)
acc = accuracy_score(y_true, y_pred)
return (loss, acc)
def train(self, X_input, label_tags, eps=0.1, iter_num=2000, eval_num=100, batch_size=100):
print('Training...')
for i in range(iter_num):
if (batch_size < 0):
batch_input = X_input
batch_tags = label_tags
else:
index = np.random.randint(0, X_input.shape[0], batch_size)
batch_input = X_input[index]
batch_tags = label_tags[index]
self.backward(batch_input, batch_tags, eps)
if (i+1) % eval_num == 0:
self.forward(X_input)
loss, acc = self.evaluate(label_tags)
print(f'{i+1}th training of {iter_num}: Loss={loss}, acc={acc}.')
print('Training finished!')
return self.layers[-1]
def predict(self, X_input):
self.forward(X_input)
return self.layers[-1]
if __name__ == '__main__':
from sklearn.datasets import load_digits
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
# load data
digits = load_digits()
X = digits.data
Y = digits.target
X -= X.min()
X /= X.max()
x_train = X[:-500]
y_train = Y[:-500]
y_train_tags = np.zeros((x_train.shape[0], 10))
for i in range(10):
y_train_tags[np.where(y_train == i), i] = 1
# initialize NN model
nodes_size = [64, 100, 10]
fc_mlp = FullConnectionMLP(nodes_size, activation_func='sigmoid',
output_activation_func='softmax')
pred_train = fc_mlp.train(x_train, y_train_tags, eps=0.1, iter_num=20000,
eval_num=100, batch_size=100)
(loss, acc) = fc_mlp.evaluate(y_train_tags)
print('--------------------------------')
print(f'train: Loss={loss}, acc={acc}.')
x_test = X[-500:]
y_test = Y[-500:]
y_test_tags = np.zeros((x_test.shape[0], 10))
for i in range(10):
y_test_tags[np.where(y_test == i), i] = 1
y_res = fc_mlp.predict(x_test)
y_test_pred = np.argmax(y_res, axis=1)
# print(f'aa {y_test_pred[0]}; {y_res[0]}')
# print(f'bb {y_test_pred[1]}; {y_res[1]}')
(loss, acc) = fc_mlp.evaluate(y_test_tags)
print('--------------------------------')
print(f'predict: Loss={loss}, acc={acc}.')
cm = confusion_matrix(y_test, y_test_pred)
plt.matshow(cm)
plt.title(u'Confusion Matrix')
plt.colorbar()
plt.ylabel(u'Groundtruth')
plt.xlabel(u'Predict')
plt.show()