from fastai.vision.all import *

batch_size = 5
n_classes = 3

W = torch.randn(9, n_classes)
b = torch.randn(1)
X = torch.randn(batch_size, 9)
y = F.relu(X@W + b)
y.shape, y

(torch.Size([5, 3]),
 tensor([[1.6193, 0.2690, 0.0000],
         [0.0000, 2.2364, 0.0000],
         [6.1974, 0.0000, 3.0223],
         [0.0000, 0.0000, 3.4538],
         [2.7525, 0.0000, 2.7033]]))

target {0,1,2}

t = torch.empty(batch_size, dtype=torch.long).random_(n_classes)
t

tensor([2, 2, 2, 1, 2])

loss = nn.CrossEntropyLoss()(y, t)
loss

tensor(2.3818)

F.nll_loss(F.log_softmax(y, dim=1), t)

tensor(2.3818)

y.softmax(dim=1)

tensor([[0.6863, 0.1778, 0.1359],
        [0.0880, 0.8239, 0.0880],
        [0.9580, 0.0019, 0.0400],
        [0.0297, 0.0297, 0.9405],
        [0.4961, 0.0316, 0.4723]])

y.softmax(dim=1).sum(dim=1)

tensor([1.0000, 1.0000, 1.0000, 1.0000, 1.0000])

y.log_softmax(dim=1).sum(dim=1)

tensor([-4.0992, -5.0538, -9.5012, -7.0915, -4.9047])

y.softmax(dim=1).log().sum(dim=1)

tensor([-4.0992, -5.0538, -9.5012, -7.0915, -4.9047])

F.nll_loss(y.softmax(dim=1).log(), t)

tensor(2.3818)

y0 = y.softmax(dim=1)
y0 = y0.log()
y0 = y0[range(batch_size), t.tolist()]
y0 = -y0.mean()
y0

tensor(2.3818)

nn.CrossEntropyLoss()(y, t)

tensor(2.3818)