《动手学深度学习》 Pytorch ver. 阅读后练习
- 《动手学深度学习》原书地址:https://github.com/d2l-ai/d2l-zh
- 《动手学深度学习》(Pytorch ver.):https://tangshusen.me/Dive-into-DL-PyTorch/#/
知识架构:
本文的主要作用是在阅读过程中做一些摘录。对于「机器学习」领域,c7w 虽然曾尝试从各个领域入门,也尝试训过一些模型,但是还是缺少系统性、结构性的学习。希望阅读本书能带来更多的收获吧。
与前面的一些笔记相比,本文更加侧重于「实践」。也就是说切实地提升自己的代码能力。
本部分包含:
- {Finished} [5-5] 实现一个可以实现表情识别的类 CNN 网络并训练,重点在于造出一个机器学习的框架,然后评估其准确率。
- {Finished} [5-11, 5-12] 实现 ResNet 和 DenseNet,注意体会怎样才能使得运算维度匹配。
[5-5] CNN
主要是把训练模型的轮子连抄带造写了一遍。
train.py
from toolkit.dataset import *
import torch
from toolkit.utils import get_device
from toolkit.net import *
from toolkit.procedure import *
# By c7w, created on 2022/1/27.
'''
Usage:
+ Define your model in toolkit/net.py
+ Define your dataset in toolkit/dataset.py
+ Define configuration in main.py
'''
device = get_device()
device = 'cpu'
print(f"Now on {device}")
# Configuration Here
config = {
'epochs': 10000,
'batch_size': 16,
# 'optimizer': in training stage
'early_stop': 20,
'save_path': 'save/model2-rms.pth'
}
if __name__ == "__main__":
# Data Preparation Stage
tr_data = Data('train')
vd_data = Data('valid')
tt_data = Data('test')
tr_set = DataLoader(tr_data, config['batch_size'], shuffle=True, drop_last=False)
vd_set = DataLoader(vd_data, config['batch_size'], shuffle=False, drop_last=False)
tt_set = DataLoader(tt_data, config['batch_size'], shuffle=False, drop_last=False)
# Training Stage
model = LeNet().to(device)
# config['optimizer'] = torch.optim.Adam(model.parameters())
config['optimizer'] = torch.optim.RMSprop(model.parameters())
model_loss, model_loss_record = train(tr_set, vd_set, model, config, device)
# Test Stage
del model
model = LeNet().to(device)
model.load_state_dict(torch.load(config['save_path']))
acc = test(tt_set, model, device)
print(acc)
toolkit/dataset.py
import torch
import random
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from icecream import ic
from PIL import Image
class Data(Dataset):
def __init__(self, mode):
self.mode = mode
data = pd.read_csv("./project/data.csv")
usage = "Test" if mode == "test" else "Training"
data = data.loc[data.Usage == usage]
features = []
labels = []
for r, row in data.iterrows():
labels.append( int(row['emotion']) )
feature = [int(number) / 255 for number in row['pixels'].split(" ")]
features.append(torch.Tensor(feature).view(1, 48, 48))
self.data = list(zip(features, labels))
random.shuffle(self.data)
# im = Image.fromarray((self.data[0][0].view(48, 48) * 255).numpy())
# im = im.convert('L')
# ic(self.data[0][0], self.data[0][1])
# im.show()
l = len(self.data) // 10
if mode == 'valid':
self.data = self.data[-l : ]
else:
self.data = self.data[ : len(self.data) - l]
print(f"Reading {mode} set finished with {len(self.data)} samples in total.")
print("Example:")
print(self.data[0])
print("\n")
def __getitem__(self, index):
return self.data[index]
def __len__(self):
return len(self.data)
toolkit/net.py
import torch.nn as nn
class LeNet(nn.Module):
def __init__(self):
super(LeNet, self).__init__()
self.net = nn.Sequential(
# Conv Layer
nn.Conv2d(1, 16, kernel_size=9, padding=4),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(16, 32, kernel_size=5, padding=2),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(32, 64, kernel_size=5, padding=2),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Flatten(),
# FC Layer
nn.Linear(64*6*6, 84),
nn.BatchNorm1d(84),
nn.ReLU(),
nn.Linear(84, 7)
)
self.Loss = nn.CrossEntropyLoss()
def forward(self, x):
return self.net(x)
def loss(self, pred, y):
return self.Loss(pred, y).mean()
toolkit/procedure.py
import torch
import numpy as np
def train(tr_set, vd_set, model, config, device):
max_epoch_count = config['epochs']
optimizer = config['optimizer']
loss_record = {'train': [], 'valid': []}
curr_min_loss = 1145141919810.0
early_stop_cnt = 0
epoch = 0
while epoch < max_epoch_count:
model.train()
for x, y in tr_set:
optimizer.zero_grad()
x, y = x.to(device), y.to(device)
pred = model(x)
l = model.loss(pred, y)
l.backward()
optimizer.step()
loss_record['train'].append(l.detach().cpu().item())
valid_mse = validate(vd_set, model, device)
print('Epoch {:4d} completed, tr_loss = {:.4f}'.format(epoch + 1, valid_mse))
if valid_mse < curr_min_loss:
# Save model if model improved
curr_min_loss = valid_mse
print('Saving model (epoch = {:4d}, loss = {:.4f})'.format(epoch + 1, curr_min_loss))
torch.save(model.state_dict(), config['save_path']) # Save model to specified path
early_stop_cnt = 0
else:
early_stop_cnt += 1
epoch += 1
loss_record['valid'].append(valid_mse)
if early_stop_cnt > config['early_stop']:
# Stop training if your model stops improving for "config['early_stop']" epochs.
break
print(f'Finished training after {epoch} epochs.')
return curr_min_loss, loss_record
def validate(vd_set, model, device):
model.eval()
total_loss = 0
for x, y in vd_set:
x, y = x.to(device), y.to(device)
with torch.no_grad():
pred = model(x)
vd_loss = model.loss(pred, y)
total_loss += vd_loss.detach().cpu().item() * len(x)
total_loss = total_loss / len(vd_set.dataset) # compute averaged loss
return total_loss
def test(tt_set, model, device):
model.eval()
total_right = 0
for x, y in tt_set:
x, y = x.to(device), y.to(device)
with torch.no_grad():
pred = model(x).detach().cpu().numpy()
for i, logit in enumerate(pred):
if np.argmax(logit) == y[i]: total_right += 1
acc = total_right / len(tt_set.dataset) # compute averaged loss
return acc
def predict(tt_set, model, device):
model.eval()
preds = []
for x in tt_set:
x = x.to(device)
with torch.no_grad():
pred = model(x)
preds.append(pred.detach().cpu())
preds = torch.cat(preds, dim=0).numpy()
return preds
toolkit/utils.py
import torch
def get_device():
return 'cuda' if torch.cuda.is_available() else 'cpu'
def get_one_hot(k, dim):
t = torch.zeros(size=(dim, ))
t[k] = 1
return t
- 训练结果
Now on cpu
Reading train set finished with 25839 samples in total.
Example:
(tensor([[[0.2000, 0.1922, 0.2118, ..., 0.1804, 0.2392, 0.2863],
[0.1922, 0.1843, 0.1647, ..., 0.2392, 0.2078, 0.2588],
[0.1961, 0.1804, 0.1608, ..., 0.2039, 0.1569, 0.2000],
...,
[0.3098, 0.1961, 0.2353, ..., 0.2980, 0.3255, 0.3412],
[0.2706, 0.1647, 0.2471, ..., 0.3098, 0.3294, 0.3373],
[0.2392, 0.1686, 0.2157, ..., 0.3255, 0.3333, 0.3490]]]), 3)
Reading valid set finished with 2870 samples in total.
Example:
(tensor([[[0.1725, 0.1333, 0.1451, ..., 0.3176, 0.3216, 0.4510],
[0.1333, 0.1333, 0.1255, ..., 0.3137, 0.3216, 0.4471],
[0.1333, 0.1216, 0.1137, ..., 0.3137, 0.3216, 0.4471],
...,
[0.3922, 0.3255, 0.4039, ..., 0.4000, 0.4510, 0.5137],
[0.3882, 0.3098, 0.3961, ..., 0.3843, 0.4549, 0.5333],
[0.4000, 0.2745, 0.3725, ..., 0.3961, 0.4392, 0.5137]]]), 3)
Reading test set finished with 6461 samples in total.
Example:
(tensor([[[0.7373, 0.7608, 0.7255, ..., 0.8549, 0.8157, 0.8275],
[0.7529, 0.7608, 0.7137, ..., 0.8588, 0.8314, 0.8157],
[0.7804, 0.7451, 0.7137, ..., 0.8510, 0.8353, 0.8157],
...,
[0.4784, 0.5765, 0.5804, ..., 0.6863, 0.5451, 0.4235],
[0.2549, 0.3373, 0.4588, ..., 0.5804, 0.5373, 0.5608],
[0.3608, 0.3961, 0.6275, ..., 0.5569, 0.3176, 0.6314]]]), 3)
Epoch 1 completed, tr_loss = 1.4529
Saving model (epoch = 1, loss = 1.4529)
Epoch 2 completed, tr_loss = 1.2995
Saving model (epoch = 2, loss = 1.2995)
Epoch 3 completed, tr_loss = 1.1131
Saving model (epoch = 3, loss = 1.1131)
Epoch 4 completed, tr_loss = 1.0275
Saving model (epoch = 4, loss = 1.0275)
Epoch 5 completed, tr_loss = 1.1062
Epoch 6 completed, tr_loss = 0.8805
Saving model (epoch = 6, loss = 0.8805)
Epoch 7 completed, tr_loss = 0.7520
Saving model (epoch = 7, loss = 0.7520)
Epoch 8 completed, tr_loss = 0.8390
Epoch 9 completed, tr_loss = 0.8715
Epoch 10 completed, tr_loss = 0.6758
Saving model (epoch = 10, loss = 0.6758)
Epoch 11 completed, tr_loss = 0.6634
Saving model (epoch = 11, loss = 0.6634)
Epoch 12 completed, tr_loss = 0.5063
Saving model (epoch = 12, loss = 0.5063)
Epoch 13 completed, tr_loss = 0.5055
Saving model (epoch = 13, loss = 0.5055)
Epoch 14 completed, tr_loss = 0.6266
Epoch 15 completed, tr_loss = 0.4653
Saving model (epoch = 15, loss = 0.4653)
Epoch 16 completed, tr_loss = 0.4373
Saving model (epoch = 16, loss = 0.4373)
Epoch 17 completed, tr_loss = 0.3892
Saving model (epoch = 17, loss = 0.3892)
Epoch 18 completed, tr_loss = 0.4048
Epoch 19 completed, tr_loss = 0.4376
Epoch 20 completed, tr_loss = 0.3657
Saving model (epoch = 20, loss = 0.3657)
Epoch 21 completed, tr_loss = 0.3765
Epoch 22 completed, tr_loss = 0.3329
Saving model (epoch = 22, loss = 0.3329)
Epoch 23 completed, tr_loss = 0.3969
Epoch 24 completed, tr_loss = 0.3382
Epoch 25 completed, tr_loss = 0.3283
Saving model (epoch = 25, loss = 0.3283)
Epoch 26 completed, tr_loss = 0.3192
Saving model (epoch = 26, loss = 0.3192)
Epoch 27 completed, tr_loss = 0.3671
Epoch 28 completed, tr_loss = 0.3457
Epoch 29 completed, tr_loss = 0.3352
Epoch 30 completed, tr_loss = 0.3461
Epoch 31 completed, tr_loss = 0.3258
Epoch 32 completed, tr_loss = 0.3097
Saving model (epoch = 32, loss = 0.3097)
Epoch 33 completed, tr_loss = 0.3976
Epoch 34 completed, tr_loss = 0.3364
Epoch 35 completed, tr_loss = 0.3275
Epoch 36 completed, tr_loss = 0.3179
Epoch 37 completed, tr_loss = 0.3415
Epoch 38 completed, tr_loss = 0.3471
Epoch 39 completed, tr_loss = 0.3302
Epoch 40 completed, tr_loss = 0.3407
Epoch 41 completed, tr_loss = 0.4045
Epoch 42 completed, tr_loss = 0.3310
Epoch 43 completed, tr_loss = 0.3626
Epoch 44 completed, tr_loss = 0.3288
Epoch 45 completed, tr_loss = 0.3600
Epoch 46 completed, tr_loss = 0.3866
Epoch 47 completed, tr_loss = 0.3613
Epoch 48 completed, tr_loss = 0.3402
Epoch 49 completed, tr_loss = 0.3562
Epoch 50 completed, tr_loss = 0.3674
Epoch 51 completed, tr_loss = 0.3733
Epoch 52 completed, tr_loss = 0.3461
Epoch 53 completed, tr_loss = 0.3542
Finished training after 53 epochs.
0.5304132487231079
Resnet & DenseNet (5-11, 5-12)
main.py
随机生成数据,模拟 batch_size = 4
,input_channels = 3
, pic_size = 96x96
的情况,然后将其丢入实现的网络中查看运行结果,没有发生错误则说明维度对应正确。
import torch
from ResNet import ResNet
from DenseNet import DenseNet
from icecream import ic as print
data = torch.randn(size=(4, 3, 96, 96))
# print(data)
# net = ResNet(3)
net = DenseNet(3)
print(net)
# print(net(data))
print(net(data).shape)
ResNet.py
本文件中实现了 ResNet-18.
import torch.nn as nn
# Implementation of ResNet-18
class Residual(nn.Module):
# Stride: to control the height/width of the manipulating data
def __init__(self, in_channels, out_channels, stride=1):
super().__init__()
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=stride)
self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
# If in_channels != out_channels
# Then use 1x1 conv layer to change channel size
self.conv3 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride) if in_channels != out_channels else None
self.bn = nn.BatchNorm2d(out_channels)
self.bn2 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU()
def forward(self, x):
y = self.conv1(x)
y = self.bn(y)
y = self.relu(y)
y = self.conv2(y)
y = self.bn2(y)
if self.conv3:
x = self.conv3(x)
return self.relu(x + y)
def resnet_block(in_channels, out_channels, num_residuals, first_block=False):
blk = []
for i in range(num_residuals):
if i == 0 and not first_block:
blk.append(Residual(in_channels, out_channels, 2))
else:
blk.append(Residual(out_channels, out_channels))
return nn.Sequential(*blk)
class ResNet(nn.Module):
def __init__(self, in_channels):
super().__init__()
self.start = nn.Sequential(
nn.Conv2d(in_channels, 64, kernel_size=7, stride=2, padding=3),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
)
self.residual = nn.ModuleList([
resnet_block(64, 64, 2, True),
resnet_block(64, 128, 2),
resnet_block(128, 256, 2),
resnet_block(256, 512, 2),
])
self.output = nn.Sequential(
nn.Flatten(),
nn.Linear(512, 10),
)
def forward(self, x):
old_shape = x.shape
x = self.start(x)
for rb in self.residual:
x = rb(x)
x = x.mean(dim=(2,3)).view(old_shape[0], -1, 1, 1)
x = self.output(x)
return x
ic| net: ResNet(
(start): Sequential(
(0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
)
(residual): ModuleList(
(0): Sequential(
(0): Residual(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
)
(1): Residual(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
)
)
(1): Sequential(
(0): Residual(
(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(conv3): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2))
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
)
(1): Residual(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
)
)
(2): Sequential(
(0): Residual(
(conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(conv3): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2))
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
)
(1): Residual(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
)
)
(3): Sequential(
(0): Residual(
(conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(conv3): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2))
(bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
)
(1): Residual(
(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
)
)
)
(output): Sequential(
(0): Flatten(start_dim=1, end_dim=-1)
(1): Linear(in_features=512, out_features=10, bias=True)
)
)
ic| net(data).shape: torch.Size([4, 10])
DenseNet.py
本文件实现了 DenseNet.
import enum
import torch
import torch.nn as nn
# Implementation of DenseNet
def conv_block(in_channels, out_channels):
return nn.Sequential(
nn.BatchNorm2d(in_channels),
nn.ReLU(),
nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
)
class DenseBlock(nn.Module):
# Out_channels number is the increasing rate of channels
def __init__(self, num_convs, in_channels, out_channels):
super().__init__()
net = []
for i in range(num_convs):
in_c = in_channels + i * out_channels
net.append(conv_block(in_c, out_channels))
self.net = nn.ModuleList(net)
self.out_channels = in_channels + num_convs * out_channels
def forward(self, x):
for blk in self.net:
y = blk(x)
x = torch.cat((x, y), dim = 1)
return x
def transition_block(in_channels, out_channels):
return nn.Sequential(
nn.BatchNorm2d(in_channels),
nn.ReLU(),
nn.Conv2d(in_channels, out_channels, kernel_size=1),
nn.AvgPool2d(kernel_size=2, stride=2)
)
class DenseNet(nn.Module):
def __init__(self, in_channels):
super().__init__()
self.start = nn.Sequential(
nn.Conv2d(in_channels, 64, kernel_size=7, padding=3, stride=2),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, padding=1, stride=2)
)
dense_list = []
num_channels, growth_rate = 64, 32
num_convs_in_dense_blocks = [4, 4, 4, 4]
for i, num_convs in enumerate(num_convs_in_dense_blocks):
DB = DenseBlock(num_convs, num_channels, growth_rate)
num_channels = DB.out_channels
dense_list.append(DB)
if i != len(num_convs_in_dense_blocks) - 1:
dense_list.append( transition_block(num_channels, num_channels // 2))
num_channels = num_channels // 2
self.dense = nn.ModuleList(dense_list)
self.output = nn.Sequential(
nn.BatchNorm2d(num_channels),
nn.ReLU()
)
self.fc = nn.Sequential(
nn.Flatten(),
nn.Linear(num_channels, 10)
)
def forward(self, x):
x = self.start(x)
for layer in self.dense:
x = layer(x)
x = self.output(x)
x = x.mean(dim=(2, 3))
x = self.fc(x)
return x
ic| net: DenseNet(
(start): Sequential(
(0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
)
(dense): ModuleList(
(0): DenseBlock(
(net): ModuleList(
(0): Sequential(
(0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU()
(2): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(1): Sequential(
(0): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU()
(2): Conv2d(96, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(2): Sequential(
(0): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU()
(2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(3): Sequential(
(0): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU()
(2): Conv2d(160, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
)
(1): Sequential(
(0): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU()
(2): Conv2d(192, 96, kernel_size=(1, 1), stride=(1, 1))
(3): AvgPool2d(kernel_size=2, stride=2, padding=0)
)
(2): DenseBlock(
(net): ModuleList(
(0): Sequential(
(0): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU()
(2): Conv2d(96, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(1): Sequential(
(0): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU()
(2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(2): Sequential(
(0): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU()
(2): Conv2d(160, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(3): Sequential(
(0): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU()
(2): Conv2d(192, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
)
(3): Sequential(
(0): BatchNorm2d(224, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU()
(2): Conv2d(224, 112, kernel_size=(1, 1), stride=(1, 1))
(3): AvgPool2d(kernel_size=2, stride=2, padding=0)
)
(4): DenseBlock(
(net): ModuleList(
(0): Sequential(
(0): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU()
(2): Conv2d(112, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(1): Sequential(
(0): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU()
(2): Conv2d(144, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(2): Sequential(
(0): BatchNorm2d(176, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU()
(2): Conv2d(176, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(3): Sequential(
(0): BatchNorm2d(208, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU()
(2): Conv2d(208, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
)
(5): Sequential(
(0): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU()
(2): Conv2d(240, 120, kernel_size=(1, 1), stride=(1, 1))
(3): AvgPool2d(kernel_size=2, stride=2, padding=0)
)
(6): DenseBlock(
(net): ModuleList(
(0): Sequential(
(0): BatchNorm2d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU()
(2): Conv2d(120, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(1): Sequential(
(0): BatchNorm2d(152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU()
(2): Conv2d(152, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(2): Sequential(
(0): BatchNorm2d(184, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU()
(2): Conv2d(184, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(3): Sequential(
(0): BatchNorm2d(216, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU()
(2): Conv2d(216, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
)
)
(output): Sequential(
(0): BatchNorm2d(248, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): ReLU()
)
(fc): Sequential(
(0): Flatten(start_dim=1, end_dim=-1)
(1): Linear(in_features=248, out_features=10, bias=True)
)
)
ic| net(data).shape: torch.Size([4, 10])