''' Author: SJ2050 Date: 2022-01-21 16:57:04 LastEditTime: 2022-01-22 10:58:38 Version: v0.0.1 Description: Full connection multi-layer perceptron class declaration. Copyright © 2022 SJ2050 ''' import numpy as np from sklearn.metrics import accuracy_score class FullConnectionMLP: # nodes_size: each layers size. # activation_func: activation function of hidden layers # (only support `sigmoid` now). # output_activation_func: activation function of output layer. # If value is `sigmoid`, then error function # will use mean square loss function. If value # is `softmax`, then cross entropy loss function # will be used. def __init__(self, nodes_size, activation_func='sigmoid', output_activation_func='softmax'): self.nodes_size = nodes_size self.activation_func = activation_func self.output_activation_func = output_activation_func total_layer_num = len(nodes_size) self.layer_num = total_layer_num self.layers = [np.array([]) for _ in range(total_layer_num)] self.w = [] self.b = [] # initialize weights and biases for i in range(0, total_layer_num-1): self.w.append(np.random.random((nodes_size[i], nodes_size[i+1]))*2-1) self.b.append(np.zeros(nodes_size[i+1])) def __activate_func(self, activation_name): if activation_name == 'sigmoid': y_func = lambda x: 1/(1+np.exp(-x)) dy_func = lambda y: y*(1-y) elif activation_name == 'softmax': y_func = lambda x: np.exp(x)/np.sum(np.exp(x), axis=1, keepdims=True) dy_func = lambda yi, yj, equal: yi*(1-yi) if equal else -yi*yj else: raise RuntimeError('Unsupported activation name!!') return (y_func, dy_func) def forward(self, X_input): # for input layer self.layers[0] = X_input # for hidden layers X = X_input (y_func, _) = self.__activate_func(self.activation_func) for i in range(0, self.layer_num - 2): w = self.w[i] b = self.b[i] X = y_func(np.matmul(X, w)+b) self.layers[i+1] = X # for output layer (y_func, _) = self.__activate_func(self.output_activation_func) w = self.w[-1] b = self.b[-1] X = y_func(np.matmul(X, w)+b) self.layers[-1] = X def backward(self, X_input, label_tags, eps = 0.1): t = label_tags self.forward(X_input) D = [] # delta of output layer x = self.layers[-2] y = self.layers[-1] if (self.output_activation_func == 'sigmoid'): # use mean square loss function (_, dy_func) = self.__activate_func(self.output_activation_func) d = dy_func(y)*(t-y) elif (self.output_activation_func == 'softmax'): # use cross entropy loss function (_, dy_func) = self.__activate_func(self.output_activation_func) d = (t-y) else: raise RuntimeError('Unsupported output_activation name!!') D.insert(0, d) if (self.activation_func == 'sigmoid'): (_, dy_func) = self.__activate_func(self.activation_func) for j in range(self.layer_num-2, 0, -1): i = j - 1 y = self.layers[j] w = self.w[j] d = dy_func(y)*np.matmul(d, w.T) D.insert(0, d) else: raise RuntimeError('activation function of hidden layer only support sigmoid now!') # update weights and biases for j in range(self.layer_num-1, 0, -1): i = j - 1 x = self.layers[i] d = D[i] self.w[i] += eps*np.matmul(x.T, d)/x.shape[0] self.b[i] += eps*np.sum(d, axis=0)/x.shape[0] def evaluate(self, label_tags): output_layer = self.layers[-1] loss = np.mean(0.5*np.linalg.norm(output_layer - label_tags, axis=1)**2) y_pred = np.argmax(output_layer, axis=1) y_true = np.argmax(label_tags, axis=1) acc = accuracy_score(y_true, y_pred) return (loss, acc) def train(self, X_input, label_tags, eps=0.1, iter_num=2000, eval_num=100, batch_size=100): print('Training...') for i in range(iter_num): if (batch_size < 0): batch_input = X_input batch_tags = label_tags else: index = np.random.randint(0, X_input.shape[0], batch_size) batch_input = X_input[index] batch_tags = label_tags[index] self.backward(batch_input, batch_tags, eps) if (i+1) % eval_num == 0: self.forward(X_input) loss, acc = self.evaluate(label_tags) print(f'{i+1}th training of {iter_num}: Loss={loss}, acc={acc}.') print('Training finished!') return self.layers[-1] def predict(self, X_input): self.forward(X_input) return self.layers[-1] if __name__ == '__main__': from sklearn.datasets import load_digits from sklearn.metrics import confusion_matrix import matplotlib.pyplot as plt # load data digits = load_digits() X = digits.data Y = digits.target X -= X.min() X /= X.max() x_train = X[:-500] y_train = Y[:-500] y_train_tags = np.zeros((x_train.shape[0], 10)) for i in range(10): y_train_tags[np.where(y_train == i), i] = 1 # initialize NN model nodes_size = [64, 100, 10] fc_mlp = FullConnectionMLP(nodes_size, activation_func='sigmoid', output_activation_func='softmax') pred_train = fc_mlp.train(x_train, y_train_tags, eps=0.1, iter_num=20000, eval_num=100, batch_size=100) (loss, acc) = fc_mlp.evaluate(y_train_tags) print('--------------------------------') print(f'train: Loss={loss}, acc={acc}.') x_test = X[-500:] y_test = Y[-500:] y_test_tags = np.zeros((x_test.shape[0], 10)) for i in range(10): y_test_tags[np.where(y_test == i), i] = 1 y_res = fc_mlp.predict(x_test) y_test_pred = np.argmax(y_res, axis=1) # print(f'aa {y_test_pred[0]}; {y_res[0]}') # print(f'bb {y_test_pred[1]}; {y_res[1]}') (loss, acc) = fc_mlp.evaluate(y_test_tags) print('--------------------------------') print(f'predict: Loss={loss}, acc={acc}.') cm = confusion_matrix(y_test, y_test_pred) plt.matshow(cm) plt.title(u'Confusion Matrix') plt.colorbar() plt.ylabel(u'Groundtruth') plt.xlabel(u'Predict') plt.show()