Recognizing Hand-Written Digits with Neural Networks#
Recognizing Hand-Written Digits Using Numpy#
Each image has 8*8 = 64 pixels
input = 64
[0, 0, 1, 0, …, 0]
batch size = 100
hidden neurons = 50
output = 10
using relu activation function
This is a copy of the test set of the UCI ML hand-written digits datasets https://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
sns.set()
import sys
from collections import defaultdict
from sklearn import datasets
# load data
digits = datasets.load_digits()
# prepare training sets
N, D_in, H, D_out = 100, 64, 50, 10 # batch size, input, hidden, output dimension
k = 0.9 # the fraction traning data
learning_rate = 1e-6 # 1e-1
L = len(digits.data)
l = int(L*k)
print(L, l)
1797 1617
Batches = {}
M = 200 # number of batches
for j in range(M):
index=list(np.random.randint(l, size=N)) # randomly sample N data points
y = np.zeros((N, 10))
y[np.arange(N), list(digits.target[index])] = 1
x=digits.data[index]
Batches[j]=[x,y]
# softmax
def softmax(x):
e_x = np.exp(x - np.max(x)) # to avoid inf
return e_x / e_x.sum(axis=0)
def softmaxByRow(x):
e_x = np.exp(x - x.max(axis=1, keepdims=True))
return e_x / e_x.sum(axis=1, keepdims=True)
# flush print
def flushPrint(d):
sys.stdout.write('\r')
sys.stdout.write(str(d))
sys.stdout.flush()
w1 = np.random.randn(D_in, H)/H
w2 = np.random.randn(H, D_out)/H
w1c = w1.copy() # for comprision in viz
w2c = w2.copy()
Loss=defaultdict(lambda:[])
# traning
for t in range(200):# epoch_num
flushPrint('epoch ='+str( t))
for j in Batches:
x,y=Batches[j]
# Forward
h = x.dot(w1)
h_relu = np.maximum(h, 0)
y_pred = h_relu.dot(w2)
y_pred_soft=softmaxByRow(y_pred)
# loss
loss = np.square(y_pred_soft-y).sum()
Loss[j].append([t,loss])
# Backprop
grad_y_pred = 2.0 * (y_pred_soft-y)
grad_w2 = h_relu.T.dot(grad_y_pred)
grad_h_relu = grad_y_pred.dot(w2.T)
grad_h = grad_h_relu.copy()
grad_h[h < 0] = 0
grad_w1 = x.T.dot(grad_h)
# Update weights
w1 -= learning_rate * grad_w1
w2 -= learning_rate * grad_w2
epoch =199
https://pytorch.org/tutorials/beginner/pytorch_with_examples.html#pytorch-tensors
learning_rate = 1e-6
for t in range(500):
# Forward pass: compute predicted y
h = x.mm(w1)
h_relu = h.clamp(min=0)
y_pred = h_relu.mm(w2)
# Compute and print loss
loss = (y_pred - y).pow(2).sum().item()
if t % 100 == 99:
print(t, loss)
# Backprop to compute gradients
# of w1 and w2 with respect to loss
grad_y_pred = 2.0 * (y_pred - y)
grad_w2 = h_relu.t().mm(grad_y_pred)
grad_h_relu = grad_y_pred.mm(w2.t())
grad_h = grad_h_relu.clone()
grad_h[h < 0] = 0
grad_w1 = x.t().mm(grad_h)
# Update weights using gradient descent
w1 -= learning_rate * grad_w1
w2 -= learning_rate * grad_w2
# Test
TestData=digits.data[-(L-l):]
PredictData=np.maximum(TestData.dot(w1),0).dot(w2)
compare=np.argmax(PredictData,axis=1)-digits.target[-(L-l):]
Accuracy=list(compare).count(0)/float(len(compare))
Accuracy
0.9388888888888889
Recognizing Hand-Written Digits Using Pytorch#
Using relu for only one time
learning rate = 0.1
choose to use MSELoss
Convert y_batch from the form of [1] to the form of [0,1,0, 0, 0, 0, 0, 0, 0, 0]
from sklearn.model_selection import train_test_split
import torch
from torch import nn, optim, from_numpy
import numpy as np
Xtrain, Xtest, ytrain, ytest = train_test_split(digits.data, digits.target,
train_size = 0.9, test_size = 0.1, random_state=1)
Xtrain = torch.tensor(Xtrain, dtype = torch.float32)
ytrain = torch.tensor(ytrain, dtype = torch.int64)
Xtest = torch.tensor(Xtest, dtype = torch.float32)
ytest = torch.tensor(ytest, dtype = torch.int64)
batch_size = 100
train = torch.utils.data.TensorDataset(Xtrain, ytrain)
train_loader = torch.utils.data.DataLoader(dataset=train,
batch_size=batch_size,
shuffle=True)
test = torch.utils.data.TensorDataset(Xtest, ytest)
test_loader = torch.utils.data.DataLoader(dataset=test,
batch_size=batch_size,
shuffle=True)
from torch.nn import functional as F
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.l1 = nn.Linear(64, 50)
self.l2 = nn.Linear(50, 10)
def forward(self, x):
out = F.relu(self.l1(x))
out = self.l2(out)
y_pred = F.softmax(out, dim = -1)
return y_pred
# our model
model = Model()
criterion = nn.MSELoss(reduction='mean')
optimizer = optim.SGD(model.parameters(), lr=1e-1) # learning rate is very sensitive
def get_prob(y_batch):
y = np.zeros((len(y_batch), 10))
y[np.arange(len(y_batch)), list(y_batch)] = 1
y = torch.tensor(y, dtype = torch.float32)
return y
# Training loop
num_epoch = 100
for k, epoch in enumerate(range(num_epoch)):
for x_batch, y_batch in train_loader:
# Forward pass: Compute predicted y by passing x to the model
y_pred = model(x_batch)
y_batch = get_prob(y_batch)
# Compute and print loss
loss = criterion(y_pred, y_batch)
#loss = criterion(torch.max(y_pred, 1)[1], y_batch)
if k % 100 ==0:
print(f'Epoch: {epoch}/num_epoch | Loss: {loss.item():.4f}')
# Zero gradients, perform a backward pass, and update the weights.
optimizer.zero_grad()
loss.backward()
optimizer.step()
Epoch: 0/num_epoch | Loss: 0.1011
Epoch: 0/num_epoch | Loss: 0.1043
Epoch: 0/num_epoch | Loss: 0.0998
Epoch: 0/num_epoch | Loss: 0.0977
Epoch: 0/num_epoch | Loss: 0.0963
Epoch: 0/num_epoch | Loss: 0.0921
Epoch: 0/num_epoch | Loss: 0.0955
Epoch: 0/num_epoch | Loss: 0.0981
Epoch: 0/num_epoch | Loss: 0.0964
Epoch: 0/num_epoch | Loss: 0.0915
Epoch: 0/num_epoch | Loss: 0.0840
Epoch: 0/num_epoch | Loss: 0.0882
Epoch: 0/num_epoch | Loss: 0.0974
Epoch: 0/num_epoch | Loss: 0.0893
Epoch: 0/num_epoch | Loss: 0.0843
Epoch: 0/num_epoch | Loss: 0.0885
Epoch: 0/num_epoch | Loss: 0.0624
Model Validation
# Test the Model
model.eval() # Change model to 'eval' mode
correct = 0
total = 0
for xval, yval in test_loader:
outputs = model(xval)
_, predicted = torch.max(outputs.data, 1)
total += yval.size(0)
correct += (predicted == yval).sum()
print('Test Accuracy: %d %%' % (100 * correct / total))
Test Accuracy: 96 %
Recognizing Hand-Written Digits with CNN Using Pytorch#
from sklearn.model_selection import train_test_split
import torch
from torch import nn, optim, from_numpy
import numpy as np
Xtrain, Xtest, ytrain, ytest = train_test_split(digits.data, digits.target,
train_size = 0.9, test_size = 0.1, random_state=1)
# reshape the vector of length 64 to a matrix of 8*8
Xtrain = [i.reshape(8, 8) for i in Xtrain]
Xtest = [i.reshape(8, 8) for i in Xtest]
Xtrain = torch.tensor(Xtrain, dtype = torch.float32)
ytrain = torch.tensor(ytrain, dtype = torch.int64)
Xtest = torch.tensor(Xtest, dtype = torch.float32)
ytest = torch.tensor(ytest, dtype = torch.int64)
batch_size = 100
train = torch.utils.data.TensorDataset(Xtrain, ytrain)
train_loader = torch.utils.data.DataLoader(dataset=train,
batch_size=batch_size,
shuffle=True)
test = torch.utils.data.TensorDataset(Xtest, ytest)
test_loader = torch.utils.data.DataLoader(dataset=test,
batch_size=batch_size,
shuffle=True)
Xtrain.shape
torch.Size([1617, 8, 8])
Xtest.shape
torch.Size([180, 8, 8])
Xtrain[0]
tensor([[ 0., 0., 2., 13., 16., 16., 7., 0.],
[ 0., 0., 12., 15., 12., 16., 10., 0.],
[ 0., 0., 16., 9., 0., 14., 6., 0.],
[ 0., 0., 3., 0., 4., 16., 1., 0.],
[ 0., 0., 0., 10., 14., 16., 6., 0.],
[ 0., 0., 3., 16., 16., 11., 2., 0.],
[ 0., 0., 0., 9., 14., 0., 0., 0.],
[ 0., 0., 2., 15., 6., 0., 0., 0.]])
Xtest[0]
tensor([[ 0., 0., 6., 16., 11., 0., 0., 0.],
[ 0., 0., 9., 16., 16., 5., 0., 0.],
[ 0., 0., 8., 16., 16., 4., 0., 0.],
[ 0., 0., 10., 16., 13., 0., 0., 0.],
[ 0., 0., 13., 16., 12., 0., 0., 0.],
[ 0., 0., 10., 16., 9., 0., 0., 0.],
[ 0., 0., 9., 16., 10., 0., 0., 0.],
[ 0., 0., 4., 15., 16., 3., 0., 0.]])
# CNN Model (2 conv layer)
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1, 32, kernel_size=3, padding=2), # in_channels = 1, out_channels = 32, kernel_size= 3
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2))
self.layer2 = nn.Sequential(
nn.Conv2d(32, 64, kernel_size=3, padding=2),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2))
self.fc = nn.Linear(3*3*64, 10)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = out.view(out.size(0), -1)
out = self.fc(out)
return out
cnn = CNN()
# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn.parameters(), lr=1e-1)
# Train the Model
from torch.autograd import Variable
num_epoch = 100
for epoch in range(num_epoch):
for i, (images, labels) in enumerate(train_loader):
# reshape the shape of data
images = images.view(len(images), 1, 8, 8)
# Forward + Backward + Optimize
optimizer.zero_grad()
outputs = cnn(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
if (i+1) % 10 == 0:
print ('Epoch [%d/%d], Iter [%d/%d] Loss: %.4f'
%(epoch+1, num_epoch, i+1, len(train)//batch_size, loss.data.item()))
Epoch [1/100], Iter [10/16] Loss: 2.5073
Epoch [2/100], Iter [10/16] Loss: 2.3236
Epoch [3/100], Iter [10/16] Loss: 2.2836
Epoch [4/100], Iter [10/16] Loss: 1.9957
Epoch [5/100], Iter [10/16] Loss: 1.4108
Epoch [6/100], Iter [10/16] Loss: 0.5429
Epoch [7/100], Iter [10/16] Loss: 0.3105
Epoch [8/100], Iter [10/16] Loss: 0.1456
Epoch [9/100], Iter [10/16] Loss: 0.2635
Epoch [10/100], Iter [10/16] Loss: 0.0572
Epoch [11/100], Iter [10/16] Loss: 0.0202
Epoch [12/100], Iter [10/16] Loss: 0.0370
Epoch [13/100], Iter [10/16] Loss: 0.0279
Epoch [14/100], Iter [10/16] Loss: 0.0654
Epoch [15/100], Iter [10/16] Loss: 0.0075
Epoch [16/100], Iter [10/16] Loss: 0.0093
Epoch [17/100], Iter [10/16] Loss: 0.0052
Epoch [18/100], Iter [10/16] Loss: 0.0049
Epoch [19/100], Iter [10/16] Loss: 0.0021
Epoch [20/100], Iter [10/16] Loss: 0.0009
Epoch [21/100], Iter [10/16] Loss: 0.0025
Epoch [22/100], Iter [10/16] Loss: 0.0005
Epoch [23/100], Iter [10/16] Loss: 0.0009
Epoch [24/100], Iter [10/16] Loss: 0.0011
Epoch [25/100], Iter [10/16] Loss: 0.0002
Epoch [26/100], Iter [10/16] Loss: 0.0004
Epoch [27/100], Iter [10/16] Loss: 0.0003
Epoch [28/100], Iter [10/16] Loss: 0.0003
Epoch [29/100], Iter [10/16] Loss: 0.0002
Epoch [30/100], Iter [10/16] Loss: 0.0001
Epoch [31/100], Iter [10/16] Loss: 0.0002
Epoch [32/100], Iter [10/16] Loss: 0.0002
Epoch [33/100], Iter [10/16] Loss: 0.0003
Epoch [34/100], Iter [10/16] Loss: 0.0003
Epoch [35/100], Iter [10/16] Loss: 0.0002
Epoch [36/100], Iter [10/16] Loss: 0.0002
Epoch [37/100], Iter [10/16] Loss: 0.0002
Epoch [38/100], Iter [10/16] Loss: 0.0001
Epoch [39/100], Iter [10/16] Loss: 0.0002
Epoch [40/100], Iter [10/16] Loss: 0.0001
Epoch [41/100], Iter [10/16] Loss: 0.0003
Epoch [42/100], Iter [10/16] Loss: 0.0001
Epoch [43/100], Iter [10/16] Loss: 0.0002
Epoch [44/100], Iter [10/16] Loss: 0.0001
Epoch [45/100], Iter [10/16] Loss: 0.0000
Epoch [46/100], Iter [10/16] Loss: 0.0002
Epoch [47/100], Iter [10/16] Loss: 0.0001
Epoch [48/100], Iter [10/16] Loss: 0.0002
Epoch [49/100], Iter [10/16] Loss: 0.0001
Epoch [50/100], Iter [10/16] Loss: 0.0001
Epoch [51/100], Iter [10/16] Loss: 0.0002
Epoch [52/100], Iter [10/16] Loss: 0.0000
Epoch [53/100], Iter [10/16] Loss: 0.0001
Epoch [54/100], Iter [10/16] Loss: 0.0001
Epoch [55/100], Iter [10/16] Loss: 0.0000
Epoch [56/100], Iter [10/16] Loss: 0.0000
Epoch [57/100], Iter [10/16] Loss: 0.0001
Epoch [58/100], Iter [10/16] Loss: 0.0001
Epoch [59/100], Iter [10/16] Loss: 0.0001
Epoch [60/100], Iter [10/16] Loss: 0.0001
Epoch [61/100], Iter [10/16] Loss: 0.0001
Epoch [62/100], Iter [10/16] Loss: 0.0001
Epoch [63/100], Iter [10/16] Loss: 0.0001
Epoch [64/100], Iter [10/16] Loss: 0.0001
Epoch [65/100], Iter [10/16] Loss: 0.0000
Epoch [66/100], Iter [10/16] Loss: 0.0000
Epoch [67/100], Iter [10/16] Loss: 0.0000
Epoch [68/100], Iter [10/16] Loss: 0.0000
Epoch [69/100], Iter [10/16] Loss: 0.0001
Epoch [70/100], Iter [10/16] Loss: 0.0001
Epoch [71/100], Iter [10/16] Loss: 0.0001
Epoch [72/100], Iter [10/16] Loss: 0.0000
Epoch [73/100], Iter [10/16] Loss: 0.0001
Epoch [74/100], Iter [10/16] Loss: 0.0001
Epoch [75/100], Iter [10/16] Loss: 0.0001
Epoch [76/100], Iter [10/16] Loss: 0.0000
Epoch [77/100], Iter [10/16] Loss: 0.0001
Epoch [78/100], Iter [10/16] Loss: 0.0001
Epoch [79/100], Iter [10/16] Loss: 0.0001
Epoch [80/100], Iter [10/16] Loss: 0.0000
Epoch [81/100], Iter [10/16] Loss: 0.0000
Epoch [82/100], Iter [10/16] Loss: 0.0000
Epoch [83/100], Iter [10/16] Loss: 0.0000
Epoch [84/100], Iter [10/16] Loss: 0.0000
Epoch [85/100], Iter [10/16] Loss: 0.0000
Epoch [86/100], Iter [10/16] Loss: 0.0000
Epoch [87/100], Iter [10/16] Loss: 0.0000
Epoch [88/100], Iter [10/16] Loss: 0.0000
Epoch [89/100], Iter [10/16] Loss: 0.0000
Epoch [90/100], Iter [10/16] Loss: 0.0000
Epoch [91/100], Iter [10/16] Loss: 0.0000
Epoch [92/100], Iter [10/16] Loss: 0.0000
Epoch [93/100], Iter [10/16] Loss: 0.0000
Epoch [94/100], Iter [10/16] Loss: 0.0001
Epoch [95/100], Iter [10/16] Loss: 0.0000
Epoch [96/100], Iter [10/16] Loss: 0.0000
Epoch [97/100], Iter [10/16] Loss: 0.0000
Epoch [98/100], Iter [10/16] Loss: 0.0000
Epoch [99/100], Iter [10/16] Loss: 0.0000
Epoch [100/100], Iter [10/16] Loss: 0.0000
# Test the Model
model.eval() # Change model to 'eval' mode
correct = 0
total = 0
for xval, yval in test_loader:
# reshape the shape of data
xval = xval.view(len(xval), 1, 8, 8)
outputs = cnn(xval)
_, predicted = torch.max(outputs.data, 1)
total += yval.size(0)
correct += (predicted == yval).sum()
print('Test Accuracy: %d %%' % (100 * correct / total))
Test Accuracy: 98 %