machine-learning/homework_05_nn/homework/codes/mlp_class.py

'''
Author: SJ2050
Date: 2022-01-21 16:57:04
LastEditTime: 2022-01-22 10:58:38
Version: v0.0.1
Description: Full connection multi-layer perceptron class declaration.
Copyright © 2022 SJ2050
'''
import numpy as np
from sklearn.metrics import accuracy_score

class FullConnectionMLP:
  # nodes_size: each layers size.
  # activation_func: activation function of hidden layers
  #                  (only support `sigmoid` now).
  # output_activation_func: activation function of output layer.
  #                         If value is `sigmoid`, then error function
  #                         will use mean square loss function. If value
  #                         is `softmax`, then cross entropy loss function
  #                         will be used.
  def __init__(self, nodes_size,
               activation_func='sigmoid',
               output_activation_func='softmax'):
    self.nodes_size = nodes_size
    self.activation_func = activation_func
    self.output_activation_func = output_activation_func

    total_layer_num        = len(nodes_size)
    self.layer_num         = total_layer_num
    self.layers = [np.array([]) for _ in range(total_layer_num)]
    self.w      = []
    self.b      = []
    # initialize weights and biases
    for i in range(0, total_layer_num-1):
      self.w.append(np.random.random((nodes_size[i], nodes_size[i+1]))*2-1)
      self.b.append(np.zeros(nodes_size[i+1]))

  def __activate_func(self, activation_name):
    if activation_name == 'sigmoid':
      y_func  = lambda x: 1/(1+np.exp(-x))
      dy_func = lambda y: y*(1-y)
    elif activation_name == 'softmax':
      y_func  = lambda x: np.exp(x)/np.sum(np.exp(x), axis=1, keepdims=True)
      dy_func = lambda yi, yj, equal: yi*(1-yi) if equal else -yi*yj
    else:
      raise RuntimeError('Unsupported activation name!!')

    return (y_func, dy_func)

  def forward(self, X_input):
    # for input layer
    self.layers[0] = X_input

    # for hidden layers
    X = X_input
    (y_func, _) = self.__activate_func(self.activation_func)
    for i in range(0, self.layer_num - 2):
      w = self.w[i]
      b = self.b[i]
      X = y_func(np.matmul(X, w)+b)
      self.layers[i+1] = X

    # for output layer
    (y_func, _) = self.__activate_func(self.output_activation_func)
    w = self.w[-1]
    b = self.b[-1]
    X = y_func(np.matmul(X, w)+b)
    self.layers[-1] = X

  def backward(self, X_input, label_tags, eps = 0.1):
    t = label_tags

    self.forward(X_input)
    D = []
    # delta of output layer
    x = self.layers[-2]
    y = self.layers[-1]
    if (self.output_activation_func == 'sigmoid'):
      # use mean square loss function
      (_, dy_func) = self.__activate_func(self.output_activation_func)
      d = dy_func(y)*(t-y)
    elif (self.output_activation_func == 'softmax'):
      # use cross entropy loss function
      (_, dy_func) = self.__activate_func(self.output_activation_func)
      d = (t-y)
    else:
      raise RuntimeError('Unsupported output_activation name!!')
    D.insert(0, d)

    if (self.activation_func == 'sigmoid'):
      (_, dy_func) = self.__activate_func(self.activation_func)
      for j in range(self.layer_num-2, 0, -1):
        i = j - 1
        y = self.layers[j]
        w = self.w[j]
        d = dy_func(y)*np.matmul(d, w.T)
        D.insert(0, d)
    else:
      raise RuntimeError('activation function of hidden layer only support sigmoid now!')

    # update weights and biases
    for j in range(self.layer_num-1, 0, -1):
      i = j - 1
      x = self.layers[i]
      d = D[i]
      self.w[i] += eps*np.matmul(x.T, d)/x.shape[0]
      self.b[i] += eps*np.sum(d, axis=0)/x.shape[0]

  def evaluate(self, label_tags):
    output_layer = self.layers[-1]
    loss = np.mean(0.5*np.linalg.norm(output_layer - label_tags, axis=1)**2)

    y_pred = np.argmax(output_layer, axis=1)
    y_true = np.argmax(label_tags, axis=1)
    acc    = accuracy_score(y_true, y_pred)

    return (loss, acc)

  def train(self, X_input, label_tags, eps=0.1, iter_num=2000, eval_num=100, batch_size=100):
    print('Training...')
    for i in range(iter_num):
      if (batch_size < 0):
        batch_input = X_input
        batch_tags  = label_tags
      else:
        index       = np.random.randint(0, X_input.shape[0], batch_size)
        batch_input = X_input[index]
        batch_tags  = label_tags[index]
      self.backward(batch_input, batch_tags, eps)

      if (i+1) % eval_num == 0:
        self.forward(X_input)
        loss, acc = self.evaluate(label_tags)
        print(f'{i+1}th training of {iter_num}: Loss={loss}, acc={acc}.')
    print('Training finished!')
    return self.layers[-1]

  def predict(self, X_input):
    self.forward(X_input)
    return self.layers[-1]


if __name__ == '__main__':
  from sklearn.datasets import load_digits
  from sklearn.metrics import confusion_matrix
  import matplotlib.pyplot as plt

  # load data
  digits = load_digits()
  X = digits.data
  Y = digits.target
  X -= X.min()
  X /= X.max()

  x_train = X[:-500]
  y_train = Y[:-500]
  y_train_tags = np.zeros((x_train.shape[0], 10))
  for i in range(10):
    y_train_tags[np.where(y_train == i), i] = 1

  # initialize NN model
  nodes_size = [64, 100, 10]
  fc_mlp     = FullConnectionMLP(nodes_size, activation_func='sigmoid',
                                 output_activation_func='softmax')
  pred_train = fc_mlp.train(x_train, y_train_tags, eps=0.1, iter_num=20000,
                            eval_num=100, batch_size=100)
  (loss, acc) = fc_mlp.evaluate(y_train_tags)
  print('--------------------------------')
  print(f'train: Loss={loss}, acc={acc}.')

  x_test = X[-500:]
  y_test = Y[-500:]
  y_test_tags = np.zeros((x_test.shape[0], 10))
  for i in range(10):
    y_test_tags[np.where(y_test == i), i] = 1

  y_res       = fc_mlp.predict(x_test)
  y_test_pred = np.argmax(y_res, axis=1)
  # print(f'aa {y_test_pred[0]}; {y_res[0]}')
  # print(f'bb {y_test_pred[1]}; {y_res[1]}')
  (loss, acc) = fc_mlp.evaluate(y_test_tags)
  print('--------------------------------')
  print(f'predict: Loss={loss}, acc={acc}.')

  cm = confusion_matrix(y_test, y_test_pred)
  plt.matshow(cm)
  plt.title(u'Confusion Matrix')
  plt.colorbar()
  plt.ylabel(u'Groundtruth')
  plt.xlabel(u'Predict')
  plt.show()