In [1]:
from math import sqrt

%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

from keras.models import Sequential
from keras.initializations import normal
from keras.utils.np_utils import to_categorical
from keras.regularizers import l2
from keras.layers import Dense, Activation
from keras.optimizers import Adam
Using Theano backend.

Generate data

Mean subtraction: Input data already has zero-mean and no need to demean.

In [2]:
# copied from http://cs231n.github.io/neural-networks-case-study/

N = 100 # number of points per class
D = 2 # dimensionality
K = 3 # number of classes
X = np.zeros((N * K,D)) # data matrix (each row = single example)
y = np.zeros(N * K, dtype='uint8') # class labels
for j in range(K):
  ix = range(N * j, N * (j + 1))
  r = np.linspace(0.0, 1, N) # radius
  t = np.linspace(j * 4,(j + 1) * 4,N) + np.random.randn(N) * 0.2 # theta
  X[ix] = np.c_[r * np.sin(t), r * np.cos(t)]
  y[ix] = j
# lets visualize the data:
fig, ax = plt.subplots()
ax.scatter(X[:, 0], X[:, 1], c=y, s=40, cmap=plt.cm.Spectral)
ax.grid()
In [3]:
X[:5, :]
Out[3]:
array([[ 0.        ,  0.        ],
       [ 0.00192242,  0.00991639],
       [ 0.00579717,  0.01935238],
       [ 0.00871944,  0.02902146],
       [ 0.01147983,  0.03873887]])
In [4]:
y
Out[4]:
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2], dtype=uint8)
In [5]:
print(X.shape)
print(y.shape)
(300, 2)
(300,)

Build model via Keras

Initial weight: Should be drawn from the normal distribution with stdev of $\sqrt{2/n}$ where $n$ is the number of input units.
Regularisation: L2 regularisation
y: Vector y needs to be converted by to_categorical()

In [6]:
model = Sequential()
model.add(Dense(100, input_dim=2, activation="relu", W_regularizer=l2(0.01),
               init=lambda shape, name: normal(shape, scale=sqrt(2/2), name=name)))
model.add(Dense(3, activation="softmax", W_regularizer=l2(0.01),
               init=lambda shape, name: normal(shape, scale=sqrt(2/100), name=name)))
model.compile(optimizer=Adam(), loss="categorical_crossentropy", metrics=['accuracy'])
In [7]:
model.summary()
____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
====================================================================================================
dense_1 (Dense)                  (None, 100)           300         dense_input_1[0][0]              
____________________________________________________________________________________________________
dense_2 (Dense)                  (None, 3)             303         dense_1[0][0]                    
====================================================================================================
Total params: 603
____________________________________________________________________________________________________
In [8]:
history = model.fit(X, to_categorical(y, 3), batch_size=1, nb_epoch=100, verbose=2)
Epoch 1/100
0s - loss: 0.9299 - acc: 0.5133
Epoch 2/100
0s - loss: 0.7303 - acc: 0.5367
Epoch 3/100
0s - loss: 0.6711 - acc: 0.5833
Epoch 4/100
0s - loss: 0.6249 - acc: 0.6700
Epoch 5/100
0s - loss: 0.5796 - acc: 0.7267
Epoch 6/100
0s - loss: 0.5401 - acc: 0.7867
Epoch 7/100
0s - loss: 0.5005 - acc: 0.8333
Epoch 8/100
0s - loss: 0.4603 - acc: 0.8667
Epoch 9/100
0s - loss: 0.4250 - acc: 0.8900
Epoch 10/100
0s - loss: 0.3869 - acc: 0.9067
Epoch 11/100
0s - loss: 0.3568 - acc: 0.9367
Epoch 12/100
0s - loss: 0.3290 - acc: 0.9300
Epoch 13/100
0s - loss: 0.3030 - acc: 0.9467
Epoch 14/100
0s - loss: 0.2819 - acc: 0.9367
Epoch 15/100
0s - loss: 0.2600 - acc: 0.9533
Epoch 16/100
0s - loss: 0.2413 - acc: 0.9600
Epoch 17/100
0s - loss: 0.2281 - acc: 0.9600
Epoch 18/100
0s - loss: 0.2139 - acc: 0.9633
Epoch 19/100
0s - loss: 0.2010 - acc: 0.9700
Epoch 20/100
0s - loss: 0.1903 - acc: 0.9633
Epoch 21/100
0s - loss: 0.1793 - acc: 0.9733
Epoch 22/100
0s - loss: 0.1699 - acc: 0.9733
Epoch 23/100
0s - loss: 0.1627 - acc: 0.9767
Epoch 24/100
0s - loss: 0.1542 - acc: 0.9767
Epoch 25/100
0s - loss: 0.1491 - acc: 0.9733
Epoch 26/100
0s - loss: 0.1437 - acc: 0.9800
Epoch 27/100
0s - loss: 0.1374 - acc: 0.9867
Epoch 28/100
0s - loss: 0.1318 - acc: 0.9800
Epoch 29/100
0s - loss: 0.1279 - acc: 0.9867
Epoch 30/100
0s - loss: 0.1245 - acc: 0.9867
Epoch 31/100
0s - loss: 0.1210 - acc: 0.9767
Epoch 32/100
0s - loss: 0.1169 - acc: 0.9900
Epoch 33/100
0s - loss: 0.1161 - acc: 0.9867
Epoch 34/100
0s - loss: 0.1127 - acc: 0.9933
Epoch 35/100
0s - loss: 0.1104 - acc: 0.9867
Epoch 36/100
0s - loss: 0.1082 - acc: 0.9867
Epoch 37/100
0s - loss: 0.1050 - acc: 0.9900
Epoch 38/100
0s - loss: 0.1032 - acc: 0.9867
Epoch 39/100
0s - loss: 0.1018 - acc: 0.9800
Epoch 40/100
0s - loss: 0.0993 - acc: 0.9900
Epoch 41/100
0s - loss: 0.0976 - acc: 0.9900
Epoch 42/100
0s - loss: 0.0964 - acc: 0.9900
Epoch 43/100
0s - loss: 0.0960 - acc: 0.9833
Epoch 44/100
0s - loss: 0.0947 - acc: 0.9867
Epoch 45/100
0s - loss: 0.0931 - acc: 0.9900
Epoch 46/100
0s - loss: 0.0909 - acc: 0.9867
Epoch 47/100
0s - loss: 0.0917 - acc: 0.9800
Epoch 48/100
0s - loss: 0.0903 - acc: 0.9867
Epoch 49/100
0s - loss: 0.0883 - acc: 0.9900
Epoch 50/100
0s - loss: 0.0878 - acc: 0.9933
Epoch 51/100
0s - loss: 0.0867 - acc: 0.9933
Epoch 52/100
0s - loss: 0.0875 - acc: 0.9900
Epoch 53/100
0s - loss: 0.0852 - acc: 0.9900
Epoch 54/100
0s - loss: 0.0854 - acc: 0.9867
Epoch 55/100
0s - loss: 0.0854 - acc: 0.9900
Epoch 56/100
0s - loss: 0.0841 - acc: 0.9933
Epoch 57/100
0s - loss: 0.0829 - acc: 0.9933
Epoch 58/100
0s - loss: 0.0824 - acc: 0.9933
Epoch 59/100
0s - loss: 0.0811 - acc: 0.9900
Epoch 60/100
0s - loss: 0.0815 - acc: 0.9933
Epoch 61/100
0s - loss: 0.0834 - acc: 0.9900
Epoch 62/100
0s - loss: 0.0806 - acc: 0.9933
Epoch 63/100
0s - loss: 0.0797 - acc: 0.9933
Epoch 64/100
0s - loss: 0.0803 - acc: 0.9900
Epoch 65/100
0s - loss: 0.0796 - acc: 0.9933
Epoch 66/100
0s - loss: 0.0787 - acc: 0.9900
Epoch 67/100
0s - loss: 0.0792 - acc: 0.9900
Epoch 68/100
0s - loss: 0.0789 - acc: 0.9900
Epoch 69/100
0s - loss: 0.0785 - acc: 0.9933
Epoch 70/100
0s - loss: 0.0787 - acc: 0.9900
Epoch 71/100
0s - loss: 0.0778 - acc: 0.9900
Epoch 72/100
0s - loss: 0.0783 - acc: 0.9900
Epoch 73/100
0s - loss: 0.0760 - acc: 0.9900
Epoch 74/100
0s - loss: 0.0773 - acc: 0.9867
Epoch 75/100
0s - loss: 0.0768 - acc: 0.9933
Epoch 76/100
0s - loss: 0.0766 - acc: 0.9900
Epoch 77/100
0s - loss: 0.0760 - acc: 0.9900
Epoch 78/100
0s - loss: 0.0765 - acc: 0.9800
Epoch 79/100
0s - loss: 0.0757 - acc: 0.9900
Epoch 80/100
0s - loss: 0.0753 - acc: 0.9900
Epoch 81/100
0s - loss: 0.0763 - acc: 0.9900
Epoch 82/100
0s - loss: 0.0748 - acc: 0.9933
Epoch 83/100
0s - loss: 0.0751 - acc: 0.9867
Epoch 84/100
0s - loss: 0.0750 - acc: 0.9900
Epoch 85/100
0s - loss: 0.0750 - acc: 0.9933
Epoch 86/100
0s - loss: 0.0746 - acc: 0.9867
Epoch 87/100
0s - loss: 0.0749 - acc: 0.9900
Epoch 88/100
0s - loss: 0.0748 - acc: 0.9933
Epoch 89/100
0s - loss: 0.0746 - acc: 0.9900
Epoch 90/100
0s - loss: 0.0734 - acc: 0.9867
Epoch 91/100
0s - loss: 0.0740 - acc: 0.9867
Epoch 92/100
0s - loss: 0.0736 - acc: 0.9867
Epoch 93/100
0s - loss: 0.0738 - acc: 0.9933
Epoch 94/100
0s - loss: 0.0736 - acc: 0.9867
Epoch 95/100
0s - loss: 0.0731 - acc: 0.9967
Epoch 96/100
0s - loss: 0.0723 - acc: 0.9900
Epoch 97/100
0s - loss: 0.0738 - acc: 0.9933
Epoch 98/100
0s - loss: 0.0727 - acc: 0.9933
Epoch 99/100
0s - loss: 0.0716 - acc: 0.9933
Epoch 100/100
0s - loss: 0.0720 - acc: 0.9933

Result

In [9]:
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(10, 4))
ax1.set_ylabel("Loss")
ax2.set_ylabel("Accuarcy")
ax1.set_xlabel("Epoch")
ax2.set_xlabel("Epoch")
ax1.grid()
ax2.grid()
ax1.plot(history.epoch, history.history["loss"])
ax2.plot(history.epoch, history.history["acc"])
Out[9]:
[<matplotlib.lines.Line2D at 0x10f722400>]

Predict

In [10]:
def traverse(o, tree_types=(list, tuple)):
    if isinstance(o, tree_types):
        for value in o:
            for subvalue in traverse(value, tree_types):
                yield subvalue
    else:
        yield o
In [11]:
n_linspace = 100
axis_grid = np.linspace(-1.5, 1.5, n_linspace)
grid_x, grid_y = np.meshgrid(axis_grid, axis_grid)
x_test = [(i, j) for i, j in zip(traverse(grid_x.tolist()), traverse(grid_y.tolist()))]
In [13]:
y_hat = model.predict(x_test)
y_hat_grid = np.argmax(y_hat, axis=1).reshape((n_linspace, n_linspace))
In [14]:
grid_x, grid_y = np.meshgrid(axis_grid, axis_grid)
fig, ax = plt.subplots()
ax.contourf(grid_x, grid_y, y_hat_grid, cmap=plt.cm.Spectral)
ax.scatter(X[:, 0], X[:, 1], c=y, s=40, cmap=plt.cm.Spectral)
Out[14]:
<matplotlib.collections.PathCollection at 0x10f736b38>