%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
import time
from IPython import display

Multi Layer Perceptron

Implementaton of a MLP with one hidden layer and one linear output unit

class MLP:
    """
    Implementation based on the book:
    [Bishop2006] Bishop, Christopher M. Pattern recognition and machine learning. Vol. 1. New York: springer, 2006.
    """
    def __init__(self, n_input, n_hidden, n_output, activation='tanh',
                 learning_rate=0.001, lr_policy='fixed', stepsize=20,
                 gamma=0.1, low=0.0001, high=0.1):
        self.n_input = n_input
        self.n_hidden = n_hidden
        self.n_output = n_output
        self.lr = learning_rate
        self.activation = activation

        self.lr_policy = lr_policy
        self.gamma = gamma
        self.low = low
        self.high = high
        self.stepsize = stepsize
        self.step = 0

        self.initialize()

    def initialize(self):
        """
        Initialization based on the article:
        [Xavier10] Y. Bengio, X. Glorot, Understanding the difficulty of training deep feedforward neuralnetworks, AISTATS 2010
        """
        if self.activation == 'linear':
            hi_limit = 0.5
            oh_limit = 0.5
        elif self.activation == 'tanh':
            hi_limit = 6/(np.sqrt(self.n_input + self.n_hidden))
            oh_limit = 0.5
        elif self.activation == 'sin':
            hi_limit = 12/(np.sqrt(self.n_input + self.n_hidden))
            oh_limit = 1
        self.w_hi = np.random.uniform(-hi_limit,hi_limit,size=(self.n_hidden, (self.n_input + 1)))
        self.w_oh = np.random.uniform(-oh_limit,oh_limit,size=(self.n_output, (self.n_hidden + 1)))


    def train(self,X,T):
        for x, t in zip(X,T):
            [a_hidden, z_hidden, y] = self.forward(x)
            [d_hidden, d_output] = self.compute_deltas(y,t,z_hidden)
            self.update(x,d_hidden,z_hidden,d_output)

            self.step += 1
            if self.lr_policy == 'step':
                if self.step%self.stepsize == 0:
                    self.lr *= self.gamma
            elif self.lr_policy == 'rand':
                if self.step%self.stepsize == 0:
                    self.lr = np.random.uniform(low=self.low, high=self.high)

    def test(self, X):
        y = np.zeros(np.size(X))
        for i, x in enumerate(X):
            y[i] = self.output(x)
        return y

    def feature_extraction(self, X):
        z_hidden = np.zeros((np.size(X), self.n_hidden + 1))
        for i, x in enumerate(X):
            z_hidden[i] = self.hidden_output(x)
        return z_hidden

    def error(self, X, t):
        y = self.test(X)
        return np.sum(np.subtract(y,t)**2)/2

    def mean_error(self, X, t):
        return self.error(X,t)/np.size(t)

    def hidden_output(self, x):
        [a_hidden, z_hidden, y] = self.forward(x)
        return z_hidden

    def output(self, x):
        [a_hidden, z_hidden, y] = self.forward(x)
        return y

    def all_output(self, x):
        return self.forward(x)

    def forward(self, x):
        """
        Forward pass
        eq. (5.62), (5.63), (5.64)
        """
        input_pattern = np.append(x,1)
        a_hidden = np.dot(self.w_hi, input_pattern)
        if self.activation == 'linear':
            z_hidden = a_hidden
        elif self.activation == 'tanh':
            z_hidden = np.tanh(a_hidden)
        elif self.activation == 'sin':
            z_hidden = np.sin(a_hidden)
        z_hidden = np.append(z_hidden,1)
        y = np.dot(self.w_oh, z_hidden)
        return [a_hidden, z_hidden, y]

    def compute_deltas(self, y, t, z_hidden):
        d_output = self.compute_delta_output(y,t)
        d_hidden = self.compute_delta_hidden(z_hidden,d_output)
        return [d_hidden, d_output]

    def compute_delta_hidden(self, z_hidden, d_output):
        """
        Eq. (5.56)
        It do not compute the delta for the hidden bias.
        As it is not used on the backpropagation
        """
        d_hidden = np.zeros(self.n_hidden)
        for j in range(self.n_hidden):
            # sumation part
            sumation = 0
            for k in range(np.size(d_output)):
                sumation += self.w_oh[k,j]*d_output[k]
            # derivative of activation function
            if self.activation == 'tanh':
                derivative = (1 - z_hidden[j]**2)
            elif self.activation == 'linear':
                derivative = z_hidden[j]
            elif self.activation == 'sin':
                derivative = np.cos(z_hidden[j])
            else:
                print("Unknown activation function")
                exit(0)
            # Multiplication
            d_hidden[j] = derivative*sumation

        return d_hidden

    def compute_delta_output(self,y,t):
        return y - t

    def update(self, x, d_hidden, z_hidden, d_output):
        x = np.append(x,1)
        #print("n_input = {0}\n"
        #      "n_hidden = {1}\n"
        #      "n_output = {2}\n"
        #      "w_hi size = {3}\n"
        #      "w_oh size = {4}".format(self.n_input, self.n_hidden, self.n_output,
        #                               self.w_hi.shape, self.w_oh.shape))
        for k in range(self.n_output):
            for j in range(self.n_hidden + 1):
                self.w_oh[k,j] -= self.lr*d_output[k]*z_hidden[j]

        for j in range(self.n_hidden):
            for i in range(self.n_input + 1):
                self.w_hi[j,i] -= self.lr*d_hidden[j]*x[i]

    def print_architecture(self):
        print(self.string_architecture())

    def string_architecture(self):
        string = ("ANN with next architecture\n"
                    "n_input = {0}\n"
                    "n_hidden = {1}\n"
                    "n_output = {2}\n"
                    "lr_policy = {3}\n"
                    "lr = {4}").format(self.n_input,self.n_hidden, self.n_output, self.lr_policy, self.lr)
        if self.lr_policy == 'step':
            string += ("stepsize = {0}\n"
                       "gamma = {1}\n").format(self.stepsize, self.gamma)
        string += "Input to hidden weights\n"
        string += np.array_str(self.w_hi)
        string += "\nHidden to output weights\n"
        string += np.array_str(self.w_oh)
        return string

Auxiliar functions

To visualize and store the results

def update_plot(X,Y,grid,prediction,plt):
    """
    Updates the image of the actual plot with the original target
    and the prediction of the model
    """
    plt.clf()
    plt.scatter(X,Y,color='red')
    plt.plot(grid,prediction)
    display.clear_output(wait=True)
    display.display(plt.gcf())

def export_plot(plt,filename,format='pdf'):
    """
    Exports the figure into the project folder
    suported formats : eps, jpeg, jpg, pdf, pgf, png, ps, raw, rgba, svg, svgz, tif, tiff.
    """
    formated_filename = "{0}_{1}.{2}".format(time.strftime("%Y%m%d_%H%M%S"), filename, format)
    plt.savefig(formated_filename, format=format)

Pattern to approximate

N = 50 # datapoints

X = np.random.uniform(-1,1,N)*np.pi
grid = np.linspace(-1,1,150)*(np.pi + 0.5)

PATTERN = 'linear' # 'linear', 'abs', 'squared', 'sin', 'cos', 'sin_cos'
BIAS = 1
NOISE = 0.5

if PATTERN == 'sin':
    original = np.sin(grid)
    pattern = np.sin(X)
elif PATTERN == 'linear':
    original = grid
    pattern = X
elif PATTERN == 'abs':
    original = np.abs(grid)
    pattern = np.abs(X)
elif PATTERN == 'squared':
    original = grid**2
    pattern = X**2
elif PATTERN == 'sin':
    original = np.sin(grid)
    pattern = np.sin(X)
elif PATTERN == 'cos':
    original = np.cos(grid)
    pattern = np.cos(X)
elif PATTERN == 'sin_cos':
    original = np.sin(np.cos(grid)*np.pi)
    pattern = np.sin(np.cos(X)*np.pi)
else:
    original = grid
    pattern = X

original += BIAS
pattern += BIAS
if NOISE > 0:
    Y = pattern + np.random.normal(size=N, scale=NOISE)
else:
    Y = pattern

plt.plot(grid, original, color='red', label='original')
plt.scatter(X, Y, color='red', label='samples')
plt.ylabel('target')
plt.xlabel('x')
plt.legend()
export_plot(plt,filename="target_pattern")

Especification of the MLP

it shows the initial weights

n_input = 1
n_hidden = 3
n_output = 1
activation = 'tanh' # 'tanh', 'linear'

# Learning rate parameters
lr_policy = 'step' # 'step', 'rand', 'fixed'
lr = 0.1
gamma = 0.1
low = 0.0001
high = 0.1
stepsize = 100*N

EPOCHS = 200

mlp = MLP(n_input, n_hidden, n_output, activation=activation, learning_rate=lr, 
          lr_policy=lr_policy, stepsize=stepsize, gamma=gamma, low=low, high=high)
mlp.print_architecture()

ANN with next architecture
n_input = 1
n_hidden = 3
n_output = 1
lr_policy = step
lr = 0.1stepsize = 5000
gamma = 0.1
Input to hidden weights
[[ 0.40417416  1.49094941]
 [-1.20886204 -2.08435003]
 [-1.66809993  2.23600049]]
Hidden to output weights
[[-0.45058766  0.4275441   0.38442166 -0.2102832 ]]

Initial prediction

It contains the randomly initialized weights and bias

plt.clf()
prediction = mlp.test(grid)
plt.plot(grid, original, 'r-.', color='red', label='original')
plt.scatter(X, Y, color='red', label='samples')
plt.plot(grid, prediction, label='target')

# plot formatting
plt.legend()
plt.ylabel('y')
plt.xlabel('x')
export_plot(plt,"initial_prediction")

Training

plt.clf()
error = np.zeros(EPOCHS)
for epoch in range(EPOCHS):
    prediction = mlp.test(grid)
    plt.plot(grid,original, 'r-.', color='red', label='original')
    update_plot(X,Y,grid,prediction,plt)
    error[epoch] = mlp.mean_error(X,Y)
    
    mlp.train(X,Y)

plt.plot(grid,original, 'r-.', color='red', label='original')
plt.legend(['target', 'original', 'samples'])
plt.ylabel('target')
plt.xlabel('x')
export_plot(plt,"final_prediction")

Training error

plt.clf()
plt.plot(range(1,EPOCHS), error[1:])
plt.xlabel("Epoch")
plt.ylabel("Mean squared error")
export_plot(plt,"training_error")

Hidden representation

Red dots are the samples of the target pattern
Blue line is the model prediction in all the interval
The rest of the colors corresponds to the hidden activations

plt.clf()
prediction = mlp.test(grid)
plt.plot(grid, original, 'r-.', color='red', label='original')
plt.scatter(X,Y,color='red', label="samples")
plt.plot(grid,prediction, label="prediction", linewidth=2)
z_hidden = mlp.feature_extraction(grid)
for i, z in enumerate(np.transpose(z_hidden)):
    if i == n_hidden:
        plt.plot(grid, z, '-.', label="hbias")
    else:
        plt.plot(grid, z, '--', label="hz{0}".format(i))

plt.xlabel('input')
plt.ylabel('output')
plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05),
          fancybox=True, shadow=True, ncol=5)
export_plot(plt,"hidden_representation")

mlp.print_architecture()

ANN with next architecture
n_input = 1
n_hidden = 3
n_output = 1
lr_policy = step
lr = 0.001stepsize = 5000
gamma = 0.1
Input to hidden weights
[[ 0.91438292  0.81354023]
 [-1.09836804  1.37885202]
 [-1.66521348  5.71241854]]
Hidden to output weights
[[ 1.25876836 -1.12094093 -0.85961849  1.01692897]]