from fastai.vision.all import *

Try manual convolution for MNIST_SAMPLE!

top_edge = tensor([[-1,-1,-1],
                   [0,0,0],
                   [1,1,1]]).float()
path = untar_data(URLs.MNIST_SAMPLE)
Path.BASE_PATH = path
path.ls()
(#3) [Path('train'),Path('labels.csv'),Path('valid')]
im3 = Image.open(path/'train'/'3'/'12.png')
show_image(im3)
im3_t = tensor(im3)
im3_t[:3,:3] * top_edge
tensor([[-0., -0., -0.],
        [0., 0., 0.],
        [0., 0., 0.]])
im3_t[:3,:3] * top_edge.sum()
tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
df = pd.DataFrame(im3_t[:10,:20])
df
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
5 0 0 0 12 99 91 142 155 246 182 155 155 155 155 131 52 0 0 0 0
6 0 0 0 138 254 254 254 254 254 254 254 254 254 254 254 252 210 122 33 0
7 0 0 0 220 254 254 254 235 189 189 189 189 150 189 205 254 254 254 75 0
8 0 0 0 35 74 35 35 25 0 0 0 0 0 0 13 224 254 254 153 0
9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 90 254 254 247 53 0
df.style.background_gradient('Greys')
  0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
5 0 0 0 12 99 91 142 155 246 182 155 155 155 155 131 52 0 0 0 0
6 0 0 0 138 254 254 254 254 254 254 254 254 254 254 254 252 210 122 33 0
7 0 0 0 220 254 254 254 235 189 189 189 189 150 189 205 254 254 254 75 0
8 0 0 0 35 74 35 35 25 0 0 0 0 0 0 13 224 254 254 153 0
9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 90 254 254 247 53 0
df.style.set_properties(**{'font-size':'6pt'}).background_gradient('Greys')
  0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
5 0 0 0 12 99 91 142 155 246 182 155 155 155 155 131 52 0 0 0 0
6 0 0 0 138 254 254 254 254 254 254 254 254 254 254 254 252 210 122 33 0
7 0 0 0 220 254 254 254 235 189 189 189 189 150 189 205 254 254 254 75 0
8 0 0 0 35 74 35 35 25 0 0 0 0 0 0 13 224 254 254 153 0
9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 90 254 254 247 53 0
(im3_t[4:7,6:9]*top_edge).sum()
tensor(762.)
(im3_t[5:8,6:9]*top_edge).sum()
tensor(135.)
(im3_t[7:10,17:20]*top_edge).sum()
tensor(-29.)
def apply_kernel(row, col, kernel):
    return (im3_t[row-1:row+2,col-1:col+2]*kernel).sum()
apply_kernel(5,7, top_edge)
tensor(762.)
tmp = []
for i in range(1,5):
    l = [(i,j) for j in range(1,5)]
    tmp.append(l)
tmp
[[(1, 1), (1, 2), (1, 3), (1, 4)],
 [(2, 1), (2, 2), (2, 3), (2, 4)],
 [(3, 1), (3, 2), (3, 3), (3, 4)],
 [(4, 1), (4, 2), (4, 3), (4, 4)]]
[[(i,j) for j in range(1,5)] for i in range(1,5)]
[[(1, 1), (1, 2), (1, 3), (1, 4)],
 [(2, 1), (2, 2), (2, 3), (2, 4)],
 [(3, 1), (3, 2), (3, 3), (3, 4)],
 [(4, 1), (4, 2), (4, 3), (4, 4)]]
def apply_kernel_(k):
    ''''apply specified 'kernel' to all 28*28 dimention'''
    return tensor([[apply_kernel(i,j,k) for j in range(1,27)] for i in range(1,27)])

show_image(apply_kernel_(top_edge))
<AxesSubplot:>
left_edge = tensor([[-1,1,0],
                    [-1,1,0],
                    [-1,1,0],]).float()
show_image(apply_kernel_(left_edge))
<AxesSubplot:>
diag1_edge = torch.eye(3).flip(0)
diag1_edge[1,0] = -1.
diag1_edge[0,1] = -1.
diag1_edge
tensor([[ 0., -1.,  1.],
        [-1.,  1.,  0.],
        [ 1.,  0.,  0.]])
diag2_edge = diag1_edge.flip(1);diag2_edge
tensor([[ 1., -1.,  0.],
        [ 0.,  1., -1.],
        [ 0.,  0.,  1.]])
edge_kernels = torch.stack([
    left_edge,
    top_edge,
    diag1_edge,
    diag2_edge,
])
edge_kernels.shape
torch.Size([4, 3, 3])
dls = DataBlock(
    (ImageBlock(cls=PILImageBW), CategoryBlock),
    get_items=get_image_files,
    splitter=GrandparentSplitter(),
    get_y=parent_label).dataloaders(path)
xb, yb = first(dls.valid)
xb.shape, yb.shape
(torch.Size([64, 1, 28, 28]), torch.Size([64]))
xb, yb = to_cpu(xb), to_cpu(yb)

kernel shpae is $ ({out\_channels}, \frac, kH , kW) $ , where ${groups}$ could divide the ${in\_channels}$ into # of the ${groups}$ to be masked by the # of ${out\_channels}$ respectively.

edge_kernels.shape,edge_kernels.unsqueeze(1).shape,edge_kernels[:,None,:,:].shape
(torch.Size([4, 3, 3]), torch.Size([4, 1, 3, 3]), torch.Size([4, 1, 3, 3]))
edge_kernels = edge_kernels.unsqueeze(1)
batch_features = F.conv2d(xb, edge_kernels)
batch_features.shape
torch.Size([64, 4, 26, 26])
show_image(batch_features[0,0])
show_image(batch_features[0,1])
show_image(batch_features[1,0])
show_image(batch_features[1,1])
<AxesSubplot:>
def new_len(n, ks, stride): return (n + 2*(ks//2) - ks) // stride + 1 # After Conv2d
new_len(28, 3, 1)
28
simple_net = nn.Sequential(
    nn.Linear(28*28, 30),
    nn.ReLU(),
    nn.Linear(30, 1)
)
simple_net
Sequential(
  (0): Linear(in_features=784, out_features=30, bias=True)
  (1): ReLU()
  (2): Linear(in_features=30, out_features=1, bias=True)
)
broken_cnn = nn.Sequential(
    nn.Conv2d(1, 30, 3, padding=1),
    nn.ReLU(),
    nn.Conv2d(30, 1, 3, padding=1)
)
broken_cnn
Sequential(
  (0): Conv2d(1, 30, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (2): Conv2d(30, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
broken_cnn(xb).shape
torch.Size([64, 1, 28, 28])
def conv(ni, nf, ks=3, act=True):
    res = nn.Conv2d(ni, nf, stride=2, kernel_size=ks, padding=ks//2)
    if act: res = nn.Sequential(res, nn.ReLU())
    return res
simple_cnn = nn.Sequential(
    conv(1, 4),
    conv(4, 8),
    conv(8, 16),
    conv(16,32),
    conv(32, 2, act=False),
    Flatten(),
)
Learner(dls, simple_cnn).summary()
Sequential (Input shape: 64 x 1 x 28 x 28)
============================================================================
Layer (type)         Output Shape         Param #    Trainable 
============================================================================
                     64 x 4 x 14 x 14    
Conv2d                                    40         True      
ReLU                                                           
____________________________________________________________________________
                     64 x 8 x 7 x 7      
Conv2d                                    296        True      
ReLU                                                           
____________________________________________________________________________
                     64 x 16 x 4 x 4     
Conv2d                                    1168       True      
ReLU                                                           
____________________________________________________________________________
                     64 x 32 x 2 x 2     
Conv2d                                    4640       True      
ReLU                                                           
____________________________________________________________________________
                     64 x 2 x 1 x 1      
Conv2d                                    578        True      
____________________________________________________________________________
                     64 x 2              
Flatten                                                        
____________________________________________________________________________

Total params: 6,722
Total trainable params: 6,722
Total non-trainable params: 0

Optimizer used: <function Adam at 0x7fc89882f040>
Loss function: FlattenedLoss of CrossEntropyLoss()

Callbacks:
  - TrainEvalCallback
  - CastToTensor
  - Recorder
  - ProgressCallback
xb, yb = xb.to('cuda'), yb.to('cuda')
simple_cnn(xb).shape
torch.Size([64, 2])
learn = Learner(dls, simple_cnn, loss_func=F.cross_entropy, metrics=accuracy)
learn.summary()
Sequential (Input shape: 64 x 1 x 28 x 28)
============================================================================
Layer (type)         Output Shape         Param #    Trainable 
============================================================================
                     64 x 4 x 14 x 14    
Conv2d                                    40         True      
ReLU                                                           
____________________________________________________________________________
                     64 x 8 x 7 x 7      
Conv2d                                    296        True      
ReLU                                                           
____________________________________________________________________________
                     64 x 16 x 4 x 4     
Conv2d                                    1168       True      
ReLU                                                           
____________________________________________________________________________
                     64 x 32 x 2 x 2     
Conv2d                                    4640       True      
ReLU                                                           
____________________________________________________________________________
                     64 x 2 x 1 x 1      
Conv2d                                    578        True      
____________________________________________________________________________
                     64 x 2              
Flatten                                                        
____________________________________________________________________________

Total params: 6,722
Total trainable params: 6,722
Total non-trainable params: 0

Optimizer used: <function Adam at 0x7fc89882f040>
Loss function: <function cross_entropy at 0x7fc8bead5ca0>

Callbacks:
  - TrainEvalCallback
  - CastToTensor
  - Recorder
  - ProgressCallback
learn.fit_one_cycle(2, 0.01)
epoch train_loss valid_loss accuracy time
0 0.056674 0.038932 0.988714 00:07
1 0.022213 0.023022 0.992149 00:03
m = learn.model[0]
m
Sequential(
  (0): Conv2d(1, 4, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (1): ReLU()
)
m[0].weight.shape, m[0].bias.shape, 
(torch.Size([4, 1, 3, 3]), torch.Size([4]))
path = untar_data(URLs.MNIST)
path.ls()
(#2) [Path('/root/.fastai/data/mnist_png/training'),Path('/root/.fastai/data/mnist_png/testing')]
def get_dls(bs=64):
    return DataBlock(
        blocks=(ImageBlock(cls=PILImageBW), CategoryBlock),
        get_items=get_image_files,
        splitter=GrandparentSplitter('training', 'testing'),
        get_y=parent_label,
        batch_tfms=Normalize(),
    ).dataloaders(path, bs=bs)
dls = get_dls()
dls.show_batch(figsize=(4,4))
def simple_cnn():
    return nn.Sequential(
        conv(1, 8, ks=5),
        conv(8, 16),
        conv(16,32),
        conv(32,64),
        conv(64, 10, act=False),
        Flatten(),
    )
Learner(dls, simple_cnn()).summary()
Sequential (Input shape: 64 x 1 x 28 x 28)
============================================================================
Layer (type)         Output Shape         Param #    Trainable 
============================================================================
                     64 x 8 x 14 x 14    
Conv2d                                    208        True      
ReLU                                                           
____________________________________________________________________________
                     64 x 16 x 7 x 7     
Conv2d                                    1168       True      
ReLU                                                           
____________________________________________________________________________
                     64 x 32 x 4 x 4     
Conv2d                                    4640       True      
ReLU                                                           
____________________________________________________________________________
                     64 x 64 x 2 x 2     
Conv2d                                    18496      True      
ReLU                                                           
____________________________________________________________________________
                     64 x 10 x 1 x 1     
Conv2d                                    5770       True      
____________________________________________________________________________
                     64 x 10             
Flatten                                                        
____________________________________________________________________________

Total params: 30,282
Total trainable params: 30,282
Total non-trainable params: 0

Optimizer used: <function Adam at 0x7fc89882f040>
Loss function: FlattenedLoss of CrossEntropyLoss()

Callbacks:
  - TrainEvalCallback
  - CastToTensor
  - Recorder
  - ProgressCallback
from fastai.callback.hook import *

def fit(epochs=1):
    learn = Learner(dls, simple_cnn(), loss_func=F.cross_entropy,
                    metrics=accuracy, cbs=ActivationStats(with_hist=True))
    learn.fit(epochs, 0.06)
    return learn

learn = fit()
/root/mambaforge/lib/python3.9/site-packages/fastai/callback/core.py:72: UserWarning: You are shadowing an attribute (modules) that exists in the learner. Use `self.learn.modules` to avoid this
  warn(f"You are shadowing an attribute ({name}) that exists in the learner. Use `self.learn.{name}` to avoid this")
epoch train_loss valid_loss accuracy time
0 2.306356 2.308944 0.100900 01:06
learn.activation_stats.plot_layer_stats(0)
learn.activation_stats.plot_layer_stats(-2)
dls = get_dls(512)
learn = fit()
epoch train_loss valid_loss accuracy time
0 0.557525 0.320446 0.896600 00:49
learn.activation_stats.plot_layer_stats(-2)
from fastai.callback.hook import *

def fit(epochs=1):
    learn = Learner(dls, simple_cnn(), loss_func=F.cross_entropy,
                    metrics=accuracy, cbs=ActivationStats(with_hist=True))
    learn.fit_one_cycle(epochs, 0.06)
    return learn

learn = fit()
epoch train_loss valid_loss accuracy time
0 0.220707 0.074086 0.976800 00:41
learn.recorder.plot_sched()
learn.activation_stats.plot_layer_stats(-2)
learn.activation_stats.color_dim(-2)
def conv(ni, nf, ks=3, act=True):
    res = nn.Conv2d(ni, nf, stride=2, kernel_size=ks, padding=ks//2)
    res = nn.Sequential(res, nn.BatchNorm2d(nf))
    if act: res = nn.Sequential(res, nn.ReLU())
    return res

learn = fit()
epoch train_loss valid_loss accuracy time
0 0.136115 0.058227 0.985200 00:47
learn.activation_stats.color_dim(-2)
def fit(epochs=1, lr=0.06):
    learn = Learner(dls, simple_cnn(), loss_func=F.cross_entropy,
                    metrics=accuracy, cbs=ActivationStats(with_hist=True))
    learn.fit_one_cycle(epochs, 0.06)
    return learn

learn = fit(5, lr=0.1)
learn = fit(5, lr=0.1)
epoch train_loss valid_loss accuracy time
0 0.216674 0.145344 0.955200 00:55
1 0.082331 0.063296 0.981300 00:48
2 0.048664 0.046084 0.985700 00:47
3 0.031943 0.029971 0.990000 00:46
4 0.015729 0.025128 0.992100 00:52
epoch train_loss valid_loss accuracy time
0 0.213008 0.103855 0.967300 00:45
1 0.080208 0.058328 0.981600 00:50
2 0.050969 0.041169 0.987600 00:53
3 0.032378 0.029437 0.991200 00:46
4 0.016578 0.023753 0.992100 00:47