当前位置：首页 > news >正文

CNN 架构演进：从 LeNet 到 EfficientNet

news 2026/5/13 18:18:18

CNN 架构演进：从 LeNet 到 EfficientNet

1. 技术分析

1.1 CNN 架构演进历程

卷积神经网络经历了从简单到复杂的演进：

CNN 架构演进 LeNet (1998) → AlexNet (2012) → VGG (2014) → ResNet (2015) → EfficientNet (2019)

1.2 经典 CNN 架构对比

模型	层数	参数	Top-1 准确率	特点
LeNet	5	60K	99% (MNIST)	经典架构
AlexNet	8	60M	83% (ImageNet)	ReLU + Dropout
VGG	16/19	138M	92%	统一 3x3 卷积
ResNet	152	60M	96%	残差连接
EfficientNet	B0-B7	5M-66M	77%-88%	复合缩放

1.3 CNN 核心组件

CNN 核心组件 卷积层: 特征提取 池化层: 降采样 激活函数: 非线性变换 全连接层: 分类 残差连接: 梯度传播

2. 核心功能实现

2.1 LeNet 实现

import torch import torch.nn as nn import torch.nn.functional as F class LeNet(nn.Module): def __init__(self, num_classes=10): super().__init__() self.conv1 = nn.Conv2d(1, 6, kernel_size=5) self.conv2 = nn.Conv2d(6, 16, kernel_size=5) self.fc1 = nn.Linear(16 * 4 * 4, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, num_classes) def forward(self, x): x = F.max_pool2d(F.relu(self.conv1(x)), 2) x = F.max_pool2d(F.relu(self.conv2(x)), 2) x = x.view(-1, 16 * 4 * 4) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x

2.2 ResNet 实现

class ResidualBlock(nn.Module): def __init__(self, in_channels, out_channels, stride=1): super().__init__() self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1) self.bn1 = nn.BatchNorm2d(out_channels) self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1) self.bn2 = nn.BatchNorm2d(out_channels) self.shortcut = nn.Sequential() if stride != 1 or in_channels != out_channels: self.shortcut = nn.Sequential( nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride), nn.BatchNorm2d(out_channels) ) def forward(self, x): residual = self.shortcut(x) x = F.relu(self.bn1(self.conv1(x))) x = self.bn2(self.conv2(x)) x += residual x = F.relu(x) return x class ResNet(nn.Module): def __init__(self, block, num_blocks, num_classes=10): super().__init__() self.in_channels = 64 self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1) self.bn1 = nn.BatchNorm2d(64) self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) self.fc = nn.Linear(512, num_classes) def _make_layer(self, block, out_channels, num_blocks, stride): strides = [stride] + [1] * (num_blocks - 1) layers = [] for stride in strides: layers.append(block(self.in_channels, out_channels, stride)) self.in_channels = out_channels return nn.Sequential(*layers) def forward(self, x): x = F.relu(self.bn1(self.conv1(x))) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) x = F.avg_pool2d(x, 4) x = x.view(x.size(0), -1) x = self.fc(x) return x def ResNet18(num_classes=10): return ResNet(ResidualBlock, [2, 2, 2, 2], num_classes) def ResNet34(num_classes=10): return ResNet(ResidualBlock, [3, 4, 6, 3], num_classes)

2.3 EfficientNet 实现

class MBConv(nn.Module): def __init__(self, in_channels, out_channels, expansion_factor=6, stride=1): super().__init__() hidden_dim = in_channels * expansion_factor self.conv = nn.Sequential( nn.Conv2d(in_channels, hidden_dim, kernel_size=1), nn.BatchNorm2d(hidden_dim), nn.ReLU6(), nn.Conv2d(hidden_dim, hidden_dim, kernel_size=3, stride=stride, padding=1, groups=hidden_dim), nn.BatchNorm2d(hidden_dim), nn.ReLU6(), nn.Conv2d(hidden_dim, out_channels, kernel_size=1), nn.BatchNorm2d(out_channels) ) self.shortcut = nn.Sequential() if stride == 1 and in_channels == out_channels: self.shortcut = nn.Identity() def forward(self, x): residual = self.shortcut(x) x = self.conv(x) x += residual return x class EfficientNet(nn.Module): def __init__(self, width_mult=1.0, depth_mult=1.0, num_classes=1000): super().__init__() base_channels = int(32 * width_mult) self.stem = nn.Sequential( nn.Conv2d(3, base_channels, kernel_size=3, stride=2, padding=1), nn.BatchNorm2d(base_channels), nn.ReLU6() ) self.blocks = self._make_blocks(width_mult, depth_mult) self.head = nn.Sequential( nn.Conv2d(self._get_last_channels(width_mult), int(1280 * width_mult), kernel_size=1), nn.BatchNorm2d(int(1280 * width_mult)), nn.ReLU6(), nn.AdaptiveAvgPool2d(1) ) self.fc = nn.Linear(int(1280 * width_mult), num_classes) def _make_blocks(self, width_mult, depth_mult): blocks = [] config = [ (1, 16, 1, 3), (6, 24, 2, 3), (6, 40, 2, 5), (6, 80, 3, 3), (6, 112, 3, 5), (6, 192, 4, 5), (6, 320, 1, 3) ] in_channels = int(32 * width_mult) for exp_factor, out_channels, repeats, kernel_size in config: out_channels = int(out_channels * width_mult) repeats = int(repeats * depth_mult) for i in range(repeats): stride = 2 if i == 0 and in_channels != out_channels else 1 blocks.append(MBConv(in_channels, out_channels, exp_factor, stride)) in_channels = out_channels return nn.Sequential(*blocks) def _get_last_channels(self, width_mult): return int(320 * width_mult) def forward(self, x): x = self.stem(x) x = self.blocks(x) x = self.head(x) x = x.view(x.size(0), -1) x = self.fc(x) return x

3. 性能对比

3.1 CNN 模型对比

模型	参数(M)	FLOPs(G)	Top-1	Top-5
VGG-16	138	15.5	71.5%	90.1%
ResNet-50	25	4.1	76.1%	92.8%
ResNet-152	60	11.6	78.5%	94.1%
EfficientNet-B0	5.3	0.39	77.3%	93.3%
EfficientNet-B7	66	37	84.4%	97.1%

3.2 模型缩放效果

EfficientNet	Width	Depth	Resolution	Top-1
B0	1.0	1.0	224	77.3%
B1	1.0	1.1	240	79.1%
B2	1.1	1.2	260	80.1%
B3	1.2	1.4	300	81.6%
B4	1.4	1.8	380	83.0%

3.3 推理速度对比

模型	速度(imgs/s)	内存(GB)
ResNet-18	1000	1.2
ResNet-50	600	2.0
EfficientNet-B0	1500	0.8
EfficientNet-B3	800	1.5

4. 最佳实践

4.1 CNN 模型选择

def select_cnn_model(task_type, constraints): if constraints.get('speed', False): return EfficientNet(width_mult=1.0, depth_mult=1.0) elif constraints.get('accuracy', False): return EfficientNet(width_mult=1.8, depth_mult=2.6) else: return ResNet50() class CNNFactory: @staticmethod def create(config): if config['type'] == 'resnet': return ResNet18(num_classes=config['num_classes']) elif config['type'] == 'efficientnet': return EfficientNet( width_mult=config.get('width_mult', 1.0), depth_mult=config.get('depth_mult', 1.0), num_classes=config['num_classes'] )

4.2 CNN 训练流程

class CNNTrainer: def __init__(self, model, optimizer, scheduler, loss_fn, device='cuda'): self.model = model.to(device) self.optimizer = optimizer self.scheduler = scheduler self.loss_fn = loss_fn self.device = device def train_step(self, inputs, targets): self.optimizer.zero_grad() inputs = inputs.to(self.device) targets = targets.to(self.device) outputs = self.model(inputs) loss = self.loss_fn(outputs, targets) loss.backward() self.optimizer.step() self.scheduler.step() return loss.item() def evaluate(self, dataloader): self.model.eval() correct = 0 total = 0 with torch.no_grad(): for inputs, targets in dataloader: inputs = inputs.to(self.device) targets = targets.to(self.device) outputs = self.model(inputs) predictions = torch.argmax(outputs, dim=1) correct += (predictions == targets).sum().item() total += targets.size(0) return correct / total