import numpy as np
import torch.nn as nn
import torch
import torch.optim as optim

with open('mnist.npy','rb') as f:
    pics=np.load(f)
    labels=np.load(f)
print(f'Pics shape: {pics.shape}')
print(f'labels shape: {labels.shape}')

Pics shape: (60000, 28, 28)
labels shape: (60000,)

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(   # takes in a 1x28x28
            nn.Conv2d(
                in_channels=1,              
                out_channels=16,            
                kernel_size=5,              
                stride=1,                   
                padding=2,                  
            ),                              
            nn.ReLU(),                      
            nn.MaxPool2d(kernel_size=2),    
        ) # outputs a 16x14x14
        self.conv2 = nn.Sequential(  # takes in a 16x14x14
            nn.Conv2d(16, 32, 5, 1, 2),     
            nn.ReLU(),                      
            nn.MaxPool2d(2),                
        ) # outputs a 32x7x7
        self.out = nn.Linear(32 * 7 * 7, 10)
    def forward(self, x):
        x = self.conv1(x) # 16x14x14
        x = self.conv2(x) # 32x7x7
        x = x.view(x.size(0), -1) # flatten this into a single vector of size 32*7*7=1568
        output = self.out(x) # combine into 10 output logits
        return output

from sklearn.model_selection import train_test_split
Xtr,Xte,ytr,yte=train_test_split(pics,labels)
ntr,k,_=Xtr.shape
nte,k,_=Xte.shape

Xtr = torch.tensor(Xtr, dtype=torch.float32).reshape(ntr,1,k,k).to('cuda')
ytr = torch.tensor(ytr).to('cuda')
Xte = torch.tensor(Xte, dtype=torch.float32).reshape(nte,1,k,k).to('cuda')
yte = torch.tensor(yte).to('cuda')

c=CNN().to('cuda')
print(c)

CNN(
  (conv1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (out): Linear(in_features=1568, out_features=10, bias=True)
)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(c.parameters(), lr=.01)

EPOCHS = 600

for epoch in range(EPOCHS):
    predictions = c(Xtr)
    loss = criterion(predictions, ytr)
        
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
        
    if epoch % 100 == 0:
        with torch.no_grad():
            pred = c(Xte)
            test_loss = criterion(pred, yte)
        print(f'Training Loss: {loss}, Testing Loss: {test_loss}')

Training Loss: 25.252174377441406, Testing Loss: 174.01025390625
Training Loss: 0.16313998401165009, Testing Loss: 0.17874178290367126
Training Loss: 0.09560582786798477, Testing Loss: 0.12043824791908264
Training Loss: 0.07425129413604736, Testing Loss: 0.10984784364700317
Training Loss: 0.060848090797662735, Testing Loss: 0.10514787584543228
Training Loss: 0.050651900470256805, Testing Loss: 0.10732713341712952

torch.save(c,'myMnistConv')

from sklearn.metrics import confusion_matrix
def build_confusion(predictions,truth,title):
    cm=confusion_matrix(predictions,truth)
    import plotly.graph_objects as go
    # Create a Plotly heatmap
    fig = go.Figure(data=go.Heatmap(
        z=cm,
        x=[f'Predicted {i}' for i in range(cm.shape[0])],
        y=[f'Actual {i}' for i in range(cm.shape[0])],
        colorscale='Viridis',  # You can choose any colorscale you like
        colorbar=dict(title='Count')
    ))
    
    # Update layout
    fig.update_layout(
        title=title,
        xaxis_title='Predicted label',
        yaxis_title='Actual label',
    )
    
    # Show plot
    fig.show()
trainPred=np.argmax(c(Xtr).detach().cpu().numpy(),axis=1)
trainTruth=ytr.cpu().numpy()
build_confusion(trainPred,trainTruth,'Training CM')
testPred=np.argmax(c(Xte).detach().cpu().numpy(),axis=1)
testTruth=yte.cpu().numpy()
build_confusion(testPred,testTruth,'Testing CM')

from sklearn.metrics import accuracy_score

print(f'Training accuracy is {accuracy_score(trainTruth,trainPred)}.')
print(f'Testing accuracy is {accuracy_score(testTruth,testPred)}.')

Training accuracy is 0.9875333333333334.
Testing accuracy is 0.9682.

from torch.utils.data import Dataset
from torchvision.transforms import v2

class MnistDataset(Dataset):
    def __init__(self,X,y,transforms=None):
        self.X=X
        self.y=y
        self.transforms=transforms

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self,idx):
        theX=self.X[idx,:,:]
        they=self.y[idx]

        if self.transforms:
            theX=self.transforms(theX)
        return theX, they

trainTransforms=v2.Compose([
    v2.RandomRotation(degrees=20),
    v2.RandomAdjustSharpness(sharpness_factor=2)
])
trainingSet=MnistDataset(Xtr.cpu(),ytr.cpu(),transforms=trainTransforms)
testingSet=MnistDataset(Xte.cpu(),yte.cpu())

from torch.utils.data import DataLoader

trainLoader=DataLoader(trainingSet,batch_size=1000,shuffle=True,num_workers=4)
testLoader=DataLoader(testingSet,batch_size=1000,shuffle=False,num_workers=4)

model=CNN().to('cuda')

EPOCHS=101

criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(),lr=.01)

for epoch in range(EPOCHS):
    totalloss=0
    for batch, (X,y) in enumerate(trainLoader):
        X=X.to('cuda')
        y=y.to('cuda')
        predictions=model(X)
        loss=criterion(predictions,y)
    
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        totalloss+=loss.item()


    if epoch%20==0:
        totalloss/=len(trainLoader)
        print('Train error',totalloss,epoch)
        test_loss=0
        
        with torch.no_grad():
            for X,y in testLoader:
                X=X.to('cuda')
                y=y.to('cuda')
                pred=model(X)
                test_loss+=criterion(pred,y).item()
            test_loss/=len(testLoader)
        print('  Test error',test_loss,epoch)

Train error 5.906119389004178 0
  Test error 2.1022006193796794 0
Train error 0.1787058432896932 20
  Test error 0.12685690422852833 20
Train error 0.1483608391549852 40
  Test error 0.11265955716371537 40
Train error 0.14338245474629932 60
  Test error 0.10846348355213802 60
Train error 0.14501166525814269 80
  Test error 0.13462353845437366 80
Train error 0.14703123503261142 100
  Test error 0.13238660196463267 100

trainPred=np.argmax(model(Xtr).detach().cpu().numpy(),axis=1)
trainTruth=ytr.cpu()
build_confusion(trainPred,trainTruth,'Training CM')
testPred=np.argmax(model(Xte).detach().cpu().numpy(),axis=1)
testTruth=yte.cpu()
build_confusion(testPred,testTruth,'Testing CM')
print(f'Training accuracy is {accuracy_score(trainTruth,trainPred)}.')
print(f'Testing accuracy is {accuracy_score(testTruth,testPred)}.')

Training accuracy is 0.9641555555555555.
Testing accuracy is 0.9602666666666667.

Building a convolutional network to classify MNIST digits¶

Our model¶

Train/test split!¶

Reshaping our data¶

Training!¶

Saving the model¶

Looking at the results¶

Image Augmentation¶