当前位置：首页 > news >正文

目标检测：从 R-CNN 到 YOLOv8

news 2026/5/13 8:52:00

目标检测：从 R-CNN 到 YOLOv8

1. 技术分析

1.1 目标检测技术演进

目标检测经历了从两阶段到单阶段的演进：

目标检测技术路线 R-CNN (2014) → Fast R-CNN (2015) → Faster R-CNN (2015) → YOLO (2016) → YOLOv8 (2023)

1.2 检测方法对比

方法	类型	速度(fps)	mAP	特点
R-CNN	两阶段	5	66%	区域提议
Fast R-CNN	两阶段	15	70%	共享特征
Faster R-CNN	两阶段	50	73%	RPN
YOLOv1	单阶段	45	63%	端到端
YOLOv8	单阶段	100+	95%	最新版

1.3 目标检测指标

目标检测评估指标 mAP: 平均精度均值 IoU: 交并比 Precision/Recall: 精确率/召回率 FPS: 每秒帧数

2. 核心功能实现

2.1 Faster R-CNN 实现

import torch import torch.nn as nn import torch.nn.functional as F class RPN(nn.Module): def __init__(self, in_channels, mid_channels=512, num_anchors=9): super().__init__() self.conv = nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1) self.cls_conv = nn.Conv2d(mid_channels, num_anchors * 2, kernel_size=1) self.reg_conv = nn.Conv2d(mid_channels, num_anchors * 4, kernel_size=1) def forward(self, x): x = F.relu(self.conv(x)) cls_logits = self.cls_conv(x) reg_preds = self.reg_conv(x) cls_logits = cls_logits.permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, 2) reg_preds = reg_preds.permute(0, 2, 3, 1).contiguous().view(x.size(0), -1, 4) return cls_logits, reg_preds class FastRCNNHead(nn.Module): def __init__(self, in_channels, num_classes): super().__init__() self.fc1 = nn.Linear(in_channels * 7 * 7, 1024) self.fc2 = nn.Linear(1024, 1024) self.cls_fc = nn.Linear(1024, num_classes) self.reg_fc = nn.Linear(1024, num_classes * 4) def forward(self, x): x = x.view(x.size(0), -1) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) cls_logits = self.cls_fc(x) reg_preds = self.reg_fc(x) return cls_logits, reg_preds class FasterRCNN(nn.Module): def __init__(self, backbone, rpn, roi_head, num_classes): super().__init__() self.backbone = backbone self.rpn = rpn self.roi_head = roi_head self.num_classes = num_classes def forward(self, images, targets=None): features = self.backbone(images) cls_logits, reg_preds = self.rpn(features) proposals = self._generate_proposals(cls_logits, reg_preds) if targets is not None: sampled_proposals, labels, bbox_targets = self._sample_proposals(proposals, targets) else: sampled_proposals = proposals roi_features = self._roi_pooling(features, sampled_proposals) cls_output, reg_output = self.roi_head(roi_features) if targets is not None: loss = self._compute_loss(cls_output, reg_output, labels, bbox_targets) return loss else: return cls_output, reg_output

2.2 YOLO 实现

class YOLOv1(nn.Module): def __init__(self, S=7, B=2, C=20): super().__init__() self.S = S self.B = B self.C = C self.backbone = nn.Sequential( nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3), nn.MaxPool2d(2, 2), nn.Conv2d(64, 192, kernel_size=3, padding=1), nn.MaxPool2d(2, 2), nn.Conv2d(192, 128, kernel_size=1), nn.Conv2d(128, 256, kernel_size=3, padding=1), nn.Conv2d(256, 256, kernel_size=1), nn.Conv2d(256, 512, kernel_size=3, padding=1), nn.MaxPool2d(2, 2), nn.Conv2d(512, 256, kernel_size=1), nn.Conv2d(256, 512, kernel_size=3, padding=1), nn.Conv2d(512, 256, kernel_size=1), nn.Conv2d(256, 512, kernel_size=3, padding=1), nn.Conv2d(512, 256, kernel_size=1), nn.Conv2d(256, 512, kernel_size=3, padding=1), nn.Conv2d(512, 256, kernel_size=1), nn.Conv2d(256, 512, kernel_size=3, padding=1), nn.MaxPool2d(2, 2), nn.Conv2d(512, 512, kernel_size=1), nn.Conv2d(512, 1024, kernel_size=3, padding=1), nn.MaxPool2d(2, 2), nn.Conv2d(1024, 512, kernel_size=1), nn.Conv2d(512, 1024, kernel_size=3, padding=1), nn.Conv2d(1024, 512, kernel_size=1), nn.Conv2d(512, 1024, kernel_size=3, padding=1) ) self.head = nn.Sequential( nn.Flatten(), nn.Linear(1024 * 7 * 7, 4096), nn.ReLU(), nn.Dropout(0.5), nn.Linear(4096, S * S * (B * 5 + C)) ) def forward(self, x): x = self.backbone(x) x = self.head(x) x = x.view(-1, self.S, self.S, self.B * 5 + self.C) return x class YOLOv8(nn.Module): def __init__(self, num_classes=80): super().__init__() self.backbone = self._build_backbone() self.neck = self._build_neck() self.head = self._build_head(num_classes) def _build_backbone(self): return nn.Sequential( nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1), nn.BatchNorm2d(64), nn.SiLU(), nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1), nn.BatchNorm2d(128), nn.SiLU(), self._make_block(128, 256), self._make_block(256, 512), self._make_block(512, 1024) ) def _make_block(self, in_channels, out_channels): return nn.Sequential( nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=2, padding=1), nn.BatchNorm2d(out_channels), nn.SiLU() ) def _build_neck(self): return nn.Sequential( nn.Conv2d(1024, 512, kernel_size=1), nn.BatchNorm2d(512), nn.SiLU() ) def _build_head(self, num_classes): return nn.Conv2d(512, num_classes + 5, kernel_size=1) def forward(self, x): x = self.backbone(x) x = self.neck(x) x = self.head(x) return x

2.3 非极大值抑制

class NMS: def __init__(self, iou_threshold=0.5): self.iou_threshold = iou_threshold def __call__(self, boxes, scores): if len(boxes) == 0: return [] boxes = boxes.clone() scores = scores.clone() keep = [] while len(boxes) > 0: max_idx = torch.argmax(scores) max_box = boxes[max_idx] keep.append(max_idx.item()) ious = self._compute_iou(max_box, boxes) mask = ious < self.iou_threshold boxes = boxes[mask] scores = scores[mask] return keep def _compute_iou(self, box1, boxes): x1 = torch.max(box1[0], boxes[:, 0]) y1 = torch.max(box1[1], boxes[:, 1]) x2 = torch.min(box1[2], boxes[:, 2]) y2 = torch.min(box1[3], boxes[:, 3]) intersection = torch.clamp(x2 - x1, min=0) * torch.clamp(y2 - y1, min=0) area1 = (box1[2] - box1[0]) * (box1[3] - box1[1]) area2 = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) union = area1 + area2 - intersection return intersection / union

3. 性能对比

3.1 目标检测模型对比

模型	mAP@0.5	Speed(fps)	Parameters(M)
Faster R-CNN	73%	50	134
YOLOv3	83%	83	61
YOLOv5s	92%	140	27
YOLOv8n	94%	200	3.2
YOLOv8x	97%	80	68

3.2 不同 YOLO 版本对比

版本	mAP	Speed	Size(MB)
YOLOv1	63%	45	250
YOLOv2	78%	67	200
YOLOv3	83%	83	236
YOLOv4	87%	60	244
YOLOv5	92%	140	140
YOLOv8	95%	200	60

3.3 两阶段 vs 单阶段

类型	优点	缺点	适用场景
两阶段	准确率高	速度慢	高精度需求
单阶段	速度快	准确率略低	实时检测

4. 最佳实践

4.1 目标检测模型选择

def select_detector(task_type, constraints): if constraints.get('real_time', False): return YOLOv8(model_size='n') elif constraints.get('accuracy', False): return YOLOv8(model_size='x') else: return YOLOv8(model_size='m') class DetectorFactory: @staticmethod def create(config): if config['type'] == 'faster_rcnn': return FasterRCNN(**config['params']) elif config['type'] == 'yolov8': return YOLOv8(model_size=config.get('model_size', 'm'))

4.2 目标检测训练流程

class DetectionTrainer: def __init__(self, model, optimizer, scheduler, loss_fn): self.model = model self.optimizer = optimizer self.scheduler = scheduler self.loss_fn = loss_fn def train_step(self, images, targets): self.optimizer.zero_grad() loss = self.model(images, targets) loss.backward() self.optimizer.step() self.scheduler.step() return loss.item() def evaluate(self, dataloader): self.model.eval() total_loss = 0 with torch.no_grad(): for images, targets in dataloader: loss = self.model(images, targets) total_loss += loss.item() return total_loss / len(dataloader)