最近 YOLO26 出来了,我还没使用过 yolo ,用它来做一个测试。注意:直接用OCR 识别验证码更加方便。用YOLO 不太合适,我只是为了做测试!
codes.yml 文件如下:
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license# COCO8 dataset (first 8 images from COCO train2017) by Ultralytics # Documentation: https://docs.ultralytics.com/datasets/detect/coco8/ # Example usage: yolo train data=coco8.yaml # parent # ├── ultralytics # └── datasets # └── coco8 ← downloads here (1 MB)# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] path: D:\learn\yolo\dataset\yolo_dataset_2026-04-28 # dataset root dir train: images/train # train images (relative to 'path') 4 images val: images/val # val images (relative to 'path') 4 images test: # test images (optional)# Classes names:0: "0"1: "1"2: "2"3: "3"4: "4"5: "5"6: "6"7: "7"8: "8"9: "9"10: "+"11: "-"12: "*"13: "/"14: "="15: "?"
yolo cli 命令如下
#训练,由于只是测试,所以没配置其他训练参数 yolo detect train data="D:\learn\yolo\ultralytics\ultralytics\cfg\datasets\codes.yaml" model=yolo26n.pt epochs=100 imgsz=640 batch =0.5 workers=1#验证 yolo detect val model=codes.pt data="D:\learn\yolo\ultralytics\ultralytics\cfg\datasets\codes.yaml" nc=16#预测 yolo predict model=codes.pt source="D:\learn\yolo\ultralytics\ultralytics\test\codes_processed.png"#导出 yolo export model=codes.pt format=onnx opset=18
python 脚本如下:
import time
import requests
from ultralytics import YOLO
from PIL import Image
import numpy as np# Load a pretrained YOLO26n model
model = YOLO("../codes2.pt")# 生成随机码并下载验证码图片
random_str = str(int(time.time() * 1000))
url = f"https://sit-elderly.gdjywpt.cn:9000/auth/code/image?randomStr={random_str}"print(f"正在下载验证码图片...")
print(f"请求URL: {url}")try:response = requests.get(url, timeout=10)response.raise_for_status()# 保存图片到上一级目录with open("codes.png", "wb") as f:f.write(response.content)print("验证码图片下载成功: codes.png")
except Exception as e:print(f"下载验证码图片失败: {e}")exit(1)results = model("codes.png")# 处理预测结果
for result in results:# 获取检测框boxes = result.boxesif boxes is not None:# 获取分类IDclass_ids = boxes.cls.cpu().numpy()# 获取置信度confidences = boxes.conf.cpu().numpy()# 获取边界框坐标 (xyxy格式: x1, y1, x2, y2)box_coords = boxes.xyxy.cpu().numpy()# 获取分类名称列表names = result.names# 将检测结果组合成列表,每个元素包含位置信息和检测信息detections = []for i, (class_id, confidence, box) in enumerate(zip(class_ids, confidences, box_coords)):x1, y1, x2, y2 = boxcenter_x = (x1 + x2) / 2 # 计算中心点x坐标detections.append({'index': i,'class_id': int(class_id),'confidence': confidence,'class_name': names[int(class_id)],'box': box,'center_x': center_x,'x1': x1})# 按照中心点x坐标从左到右排序detections.sort(key=lambda d: d['center_x'])# 打印排序后的检测结果print(f"检测到 {len(detections)} 个目标(已按从左到右排序):")print("-" * 50)def calculate_overlap_ratio(box1, box2):"""计算两个框的重叠比例(相对于较小框的面积)"""x1_1, y1_1, x2_1, y2_1 = box1x1_2, y1_2, x2_2, y2_2 = box2# 计算交集区域inter_x1 = max(x1_1, x1_2)inter_y1 = max(y1_1, y1_2)inter_x2 = min(x2_1, x2_2)inter_y2 = min(y2_1, y2_2)# 检查是否有交集if inter_x2 < inter_x1 or inter_y2 < inter_y1:return 0.0# 计算交集面积inter_area = (inter_x2 - inter_x1) * (inter_y2 - inter_y1)# 计算两个框的面积area1 = (x2_1 - x1_1) * (y2_1 - y1_1)area2 = (x2_2 - x1_2) * (y2_2 - y1_2)# 使用较小框的面积作为基准min_area = min(area1, area2)if min_area == 0:return 0.0# 返回重叠比例return inter_area / min_areadef filter_overlapping_detections(detections, overlap_threshold=0.5):"""过滤重叠的检测框,对于相同class_id且重叠超过阈值的,只保留第一个"""filtered = []used_indices = set()for i in range(len(detections)):if i in used_indices:continue# 添加当前检测到结果列表filtered.append(i)# 检查后续检测是否与当前检测重叠for j in range(i + 1, len(detections)):if j in used_indices:continue# 只检查相同class_id的if detections[i]['class_id'] != detections[j]['class_id']:continue# 计算重叠比例overlap_ratio = calculate_overlap_ratio(detections[i]['box'],detections[j]['box'])# 如果重叠超过阈值,标记为已使用(跳过)if overlap_ratio > overlap_threshold:used_indices.add(j)print(f" [过滤] 目标{j + 1}与目标{i + 1}重叠{overlap_ratio:.2%},已跳过")return filtered# 过滤重叠的检测valid_indices = filter_overlapping_detections(detections, overlap_threshold=0.5)allClasName = ""for idx, det_idx in enumerate(valid_indices, 1):det = detections[det_idx]print(f"目标 {idx}:")print(f" 分类ID: {det['class_id']}")print(f" 分类名称: {det['class_name']}")print(f" 置信度: {det['confidence']:.4f}")print(f" 边界框: x1={det['box'][0]:.1f}, y1={det['box'][1]:.1f}, x2={det['box'][2]:.1f}, y2={det['box'][3]:.1f}")print(f" 中心点x坐标: {det['center_x']:.1f}")print("-" * 50)allClasName = f"{allClasName} {det['class_name']}"print(allClasName)else:print("未检测到任何目标")
不要问我python 什么意思,我也不懂。ai给我生成的!!!
效果图如下:

简诉下遇到的问题:
1.训练图片数量比较少,准确率低
2.标注错了好多图片,导致准确率非常低
3.由于数字是空心的,导致部分数字重复识别出来,例如 3+2=? 的图片识别出来 3 3 + 2 = ?,这个时候 class_id 重复,且堆叠 超过 50% 的,我就丢弃后面重复的结果得到 3 + 2 = ?
