当前位置: 首页 > news >正文

DAY 40

# DAY 40 简单 CNN

知识回顾:

1. 数据增强

2. 卷积神经网络定义的写法

3. batch 归一化:调整一个批次的分布,常用与图像数据

4. 特征图:只有卷积操作输出的才叫特征图

5. 调度器:直接修改基础学习率


卷积操作常见流程如下:

- 输入 → 卷积层 → Batch 归一化层(可选)→ 池化层 → 激活函数 → 下一层

- Flatten → Dense (with Dropout, 可选) → Dense (Output)

作业:尝试手动修改不同的调度器和 CNN 的结构,观察训练的差异。

import torch import torch.nn as nn import torch.optim as optim import matplotlib.pyplot as plt from torch.utils.data import DataLoader from torchvision import datasets, transforms # ---------------------- 1. 数据准备 ---------------------- # 数据预处理(CIFAR-10) transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) # CIFAR-10官方均值/标准差 ]) # 加载数据集 train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform) test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform) train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True) test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False) # 设备选择 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") epochs = 20 # 统一训练轮次,保证对比公平 # ---------------------- 2. 定义不同的CNN结构 ---------------------- # 原CNN(对照组) class CNN_Base(nn.Module): def __init__(self): super(CNN_Base, self).__init__() # 卷积块1 self.conv1 = nn.Conv2d(3, 32, 3, padding=1) self.bn1 = nn.BatchNorm2d(32) self.relu1 = nn.ReLU() self.pool1 = nn.MaxPool2d(2) # 卷积块2 self.conv2 = nn.Conv2d(32, 64, 3, padding=1) self.bn2 = nn.BatchNorm2d(64) self.relu2 = nn.ReLU() self.pool2 = nn.MaxPool2d(2) # 卷积块3 self.conv3 = nn.Conv2d(64, 128, 3, padding=1) self.bn3 = nn.BatchNorm2d(128) self.relu3 = nn.ReLU() self.pool3 = nn.MaxPool2d(2) # 全连接层 self.fc1 = nn.Linear(128 * 4 * 4, 512) self.dropout = nn.Dropout(0.5) self.fc2 = nn.Linear(512, 10) def forward(self, x): x = self.pool1(self.relu1(self.bn1(self.conv1(x)))) x = self.pool2(self.relu2(self.bn2(self.conv2(x)))) x = self.pool3(self.relu3(self.bn3(self.conv3(x)))) x = x.view(-1, 128 * 4 * 4) x = self.dropout(self.relu3(self.fc1(x))) x = self.fc2(x) return x # 变体1:轻量化CNN(减少通道数、简化结构) class CNN_Light(nn.Module): def __init__(self): super(CNN_Light, self).__init__() # 卷积块1(通道数减半) self.conv1 = nn.Conv2d(3, 16, 3, padding=1) self.bn1 = nn.BatchNorm2d(16) self.relu1 = nn.ReLU() self.pool1 = nn.MaxPool2d(2) # 卷积块2(减少1个卷积块) self.conv2 = nn.Conv2d(16, 32, 3, padding=1) self.bn2 = nn.BatchNorm2d(32) self.relu2 = nn.ReLU() self.pool2 = nn.MaxPool2d(2) # 全连接层(神经元数减半) self.fc1 = nn.Linear(32 * 8 * 8, 256) # 尺寸:32×8×8(因少了1次池化) self.dropout = nn.Dropout(0.3) # 降低Dropout率 self.fc2 = nn.Linear(256, 10) def forward(self, x): x = self.pool1(self.relu1(self.bn1(self.conv1(x)))) # 尺寸:16×16×16 x = self.pool2(self.relu2(self.bn2(self.conv2(x)))) # 尺寸:32×8×8 x = x.view(-1, 32 * 8 * 8) x = self.dropout(self.relu1(self.fc1(x))) x = self.fc2(x) return x # 变体2:加深CNN(增加卷积块、提升通道数) class CNN_Deep(nn.Module): def __init__(self): super(CNN_Deep, self).__init__() # 卷积块1 self.conv1 = nn.Conv2d(3, 32, 3, padding=1) self.bn1 = nn.BatchNorm2d(32) self.relu1 = nn.ReLU() self.pool1 = nn.MaxPool2d(2) # 卷积块2 self.conv2 = nn.Conv2d(32, 64, 3, padding=1) self.bn2 = nn.BatchNorm2d(64) self.relu2 = nn.ReLU() self.pool2 = nn.MaxPool2d(2) # 卷积块3 self.conv3 = nn.Conv2d(64, 128, 3, padding=1) self.bn3 = nn.BatchNorm2d(128) self.relu3 = nn.ReLU() self.pool3 = nn.MaxPool2d(2) # 新增卷积块4 self.conv4 = nn.Conv2d(128, 256, 3, padding=1) self.bn4 = nn.BatchNorm2d(256) self.relu4 = nn.ReLU() self.pool4 = nn.MaxPool2d(2) # 新增池化 # 全连接层(适配新尺寸) self.fc1 = nn.Linear(256 * 2 * 2, 512) # 尺寸:256×2×2(多1次池化) self.dropout = nn.Dropout(0.5) self.fc2 = nn.Linear(512, 10) def forward(self, x): x = self.pool1(self.relu1(self.bn1(self.conv1(x)))) x = self.pool2(self.relu2(self.bn2(self.conv2(x)))) x = self.pool3(self.relu3(self.bn3(self.conv3(x)))) x = self.pool4(self.relu4(self.bn4(self.conv4(x)))) # 新增卷积块处理 x = x.view(-1, 256 * 2 * 2) x = self.dropout(self.relu3(self.fc1(x))) x = self.fc2(x) return x # ---------------------- 3. 训练函数 ---------------------- def train(model, train_loader, test_loader, criterion, optimizer, scheduler, device, epochs): model.train() all_iter_losses = [] iter_indices = [] train_acc_history = [] test_acc_history = [] train_loss_history = [] test_loss_history = [] for epoch in range(epochs): running_loss = 0.0 correct = 0 total = 0 for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = criterion(output, target) loss.backward() optimizer.step() # 记录损失 iter_loss = loss.item() all_iter_losses.append(iter_loss) iter_indices.append(epoch * len(train_loader) + batch_idx + 1) # 统计训练指标 running_loss += iter_loss _, predicted = output.max(1) total += target.size(0) correct += predicted.eq(target).sum().item() if (batch_idx + 1) % 100 == 0: print(f'Epoch: {epoch+1}/{epochs} | Batch: {batch_idx+1}/{len(train_loader)} ' f'| 单Batch损失: {iter_loss:.4f} | 累计平均损失: {running_loss/(batch_idx+1):.4f}') # 训练指标统计 epoch_train_loss = running_loss / len(train_loader) epoch_train_acc = 100. * correct / total train_acc_history.append(epoch_train_acc) train_loss_history.append(epoch_train_loss) # 测试阶段 model.eval() test_loss = 0 correct_test = 0 total_test = 0 with torch.no_grad(): for data, target in test_loader: data, target = data.to(device), target.to(device) output = model(data) test_loss += criterion(output, target).item() _, predicted = output.max(1) total_test += target.size(0) correct_test += predicted.eq(target).sum().item() epoch_test_loss = test_loss / len(test_loader) epoch_test_acc = 100. * correct_test / total_test test_acc_history.append(epoch_test_acc) test_loss_history.append(epoch_test_loss) # 更新学习率(不同调度器的step方式一致) scheduler.step(epoch_test_loss) print(f'Epoch {epoch+1}/{epochs} 完成 | 训练准确率: {epoch_train_acc:.2f}% | 测试准确率: {epoch_test_acc:.2f}%') # 绘图 plot_iter_losses(all_iter_losses, iter_indices) plot_epoch_metrics(train_acc_history, test_acc_history, train_loss_history, test_loss_history) return epoch_test_acc # ---------------------- 4. 绘图函数 ---------------------- def plot_iter_losses(losses, indices): plt.figure(figsize=(10, 4)) plt.plot(indices, losses, 'b-', alpha=0.7, label='Iteration Loss') plt.xlabel('Iteration(Batch序号)') plt.ylabel('损失值') plt.title('每个 Iteration 的训练损失') plt.legend() plt.grid(True) plt.tight_layout() plt.show() def plot_epoch_metrics(train_acc, test_acc, train_loss, test_loss): epochs = range(1, len(train_acc) + 1) plt.figure(figsize=(12, 4)) # 准确率 plt.subplot(1, 2, 1) plt.plot(epochs, train_acc, 'b-', label='训练准确率') plt.plot(epochs, test_acc, 'r-', label='测试准确率') plt.xlabel('Epoch') plt.ylabel('准确率 (%)') plt.title('训练和测试准确率') plt.legend() plt.grid(True) # 损失 plt.subplot(1, 2, 2) plt.plot(epochs, train_loss, 'b-', label='训练损失') plt.plot(epochs, test_loss, 'r-', label='测试损失') plt.xlabel('Epoch') plt.ylabel('损失值') plt.title('训练和测试损失') plt.legend() plt.grid(True) plt.tight_layout() plt.show() # ---------------------- 5. 多个对比实验 ---------------------- criterion = nn.CrossEntropyLoss() # 统一损失函数 # 实验1:原CNN + ReduceLROnPlateau(对照组) print("\n===== 实验1:原CNN + ReduceLROnPlateau =====") model1 = CNN_Base().to(device) optimizer1 = optim.Adam(model1.parameters(), lr=0.001) scheduler1 = optim.lr_scheduler.ReduceLROnPlateau(optimizer1, mode='min', patience=3, factor=0.5) acc1 = train(model1, train_loader, test_loader, criterion, optimizer1, scheduler1, device, epochs) # 实验2:原CNN + StepLR(仅改调度器) print("\n===== 实验2:原CNN + StepLR =====") model2 = CNN_Base().to(device) optimizer2 = optim.Adam(model2.parameters(), lr=0.001) scheduler2 = optim.lr_scheduler.StepLR(optimizer2, step_size=5, gamma=0.5) # 每5个epoch降50% acc2 = train(model2, train_loader, test_loader, criterion, optimizer2, scheduler2, device, epochs) # 实验3:轻量化CNN + ReduceLROnPlateau(仅改结构) print("\n===== 实验3:轻量化CNN + ReduceLROnPlateau =====") model3 = CNN_Light().to(device) optimizer3 = optim.Adam(model3.parameters(), lr=0.001) scheduler3 = optim.lr_scheduler.ReduceLROnPlateau(optimizer3, mode='min', patience=3, factor=0.5) acc3 = train(model3, train_loader, test_loader, criterion, optimizer3, scheduler3, device, epochs) # 实验4:加深CNN + CosineAnnealingLR(改结构+调度器) print("\n===== 实验4:加深CNN + CosineAnnealingLR =====") model4 = CNN_Deep().to(device) optimizer4 = optim.Adam(model4.parameters(), lr=0.001) scheduler4 = optim.lr_scheduler.CosineAnnealingLR(optimizer4, T_max=10) # 周期10个epoch acc4 = train(model4, train_loader, test_loader, criterion, optimizer4, scheduler4, device, epochs) # 打印最终结果对比 print("\n===== 所有实验最终测试准确率 =====") print(f"实验1(原CNN+ReduceLROnPlateau):{acc1:.2f}%") print(f"实验2(原CNN+StepLR):{acc2:.2f}%") print(f"实验3(轻量化CNN+ReduceLROnPlateau):{acc3:.2f}%") print(f"实验4(加深CNN+CosineAnnealingLR):{acc4:.2f}%")

@浙大疏锦行

http://www.jsqmd.com/news/114525/

相关文章:

  • 2025年合肥口碑不错的装修设计品牌企业推荐,诚信的装修设计公司/企业全解析 - 工业推荐榜
  • 基于 Python 的股票数据可视化是金融数据分析领域的重要应用
  • Open-AutoGLM开机自启实战:systemd服务配置的8个核心要点
  • 2025合肥装修设计公司TOP5权威推荐:装修设计找哪家? - myqiye
  • 2025年比较好的卫浴缓冲隐藏轨/定制缓冲隐藏轨品牌厂家排行榜 - 品牌宣传支持者
  • 基于 Django 框架开发的青岛滨海学院增值性评价课程考核系统
  • 2025年上海股权转让个税合规处理/股权投融资/股东干股法律风险防范律师推荐 - myqiye
  • 2025年靠谱的记忆棉枕行业内知名厂家排行榜 - 品牌宣传支持者
  • oracle 12c查看执行过的sql及当前正在执行的sql - 实践
  • 基于 Django 框架开发的青岛滨海学院升学信息管理系统
  • 中国家装公司十强:环保与品质双驱,金煌家装以全产业链实力登顶 - 速递信息
  • Open-AutoGLM延迟高怎么办:3种紧急优化策略立即生效
  • 2025年市场可靠的冷却塔填料生产厂家排行榜,方形逆流冷却塔/闭式冷却塔/圆形逆流冷却塔/冷却塔填料/冷却塔填料工厂哪家权威 - 品牌推荐师
  • 探索机器视觉:从工业相机采集到模板匹配
  • 用户资料报协议(UDP)详解
  • 电力电缆生产厂家推荐2025年权威认证!涵中低压、低压、中压变频电缆生产厂家推荐(12月更新) - 品牌2026
  • 【Open-AutoGLM进程管理终极指南】:掌握高效自动化运维的5大核心技巧
  • 2025年合肥靠谱有实力的装修专业公司排行榜,口碑优选不错的装修专业公司推荐 - 工业推荐榜
  • 虾皮上新怎么增加流量
  • Open-AutoGLM启动报错怎么办:3步快速定位并解决90%常见故障
  • 医疗客服也能AI化?Linly-Talker在行业场景中的落地实践
  • 2025年靠谱的微晶铸石板厂家最新用户好评榜 - 品牌宣传支持者
  • Open-AutoGLM日志系统揭秘,掌握这6个参数才算真正入门
  • 大数据领域数据复制的性能优化策略
  • 2025年井口装置涂装推荐制造商排名,专业快速涂装服务商全解析 - mypinpai
  • 揭秘Open-AutoGLM定时任务配置难点:3步实现零误差任务调度
  • 2025合肥诚信装修企业TOP5推荐:售后完善的装修专业公司助力安心家装 - myqiye
  • Linly-Talker与HeyGen对比:谁才是中小企业的最佳选择?
  • 你真的会开日志吗?Open-AutoGLM运行日志开启的5个致命误区
  • Open-AutoGLM启动异常深度解析(专家级排障流程曝光)