当前位置：首页 > news >正文

面向生产环境：实时手机检测-通用API封装+批量图片检测脚本示例

news 2026/6/6 14:54:19

面向生产环境：实时手机检测-通用API封装+批量图片检测脚本示例

1. 项目概述与核心价值

实时手机检测是计算机视觉领域的一个实用应用，能够在图像或视频流中快速准确地识别和定位手机设备。这个技术在实际生产环境中有着广泛的应用场景，比如智能安防监控、公共场所的行为分析、智能零售的用户行为追踪等。

传统的手机检测方案往往面临两个主要问题：要么检测速度太慢无法满足实时需求，要么为了追求速度而牺牲了检测精度。本项目基于DAMO-YOLO框架，提供了一个兼顾精度和速度的解决方案。

核心优势：

高精度检测：基于先进的DAMO-YOLO架构，检测准确率超越传统YOLO系列
实时性能：优化后的模型推理速度快，满足生产环境实时需求
简单易用：提供清晰的API接口和批量处理脚本，降低使用门槛
生产就绪：封装完善的错误处理和性能优化，适合直接部署

2. 技术架构解析

2.1 DAMO-YOLO框架优势

DAMO-YOLO是一个专门为工业落地设计的目标检测框架，相比传统的YOLO系列方法，它在保持高速推理的同时显著提升了检测精度。框架采用"large neck, small head"的设计理念，通过更加充分的信息融合来提升检测效果。

架构组成：

Backbone (MAE-NAS)：负责特征提取，采用神经架构搜索技术优化
Neck (GFPN)：特征金字塔网络，增强多尺度特征融合能力
Head (ZeroHead)：检测头部分，输出最终的检测结果

这种设计使得模型能够更好地处理不同尺寸和姿态的手机，在各种复杂环境下都能保持稳定的检测性能。

2.2 模型性能对比

从提供的性能对比图可以看出，DAMO-YOLO在精度和速度的平衡上明显优于其他YOLO变体。这意味着在生产环境中，我们既能够获得准确的检测结果，又能够满足实时性的要求。

3. 环境搭建与快速部署

3.1 基础环境要求

在开始之前，确保你的系统满足以下要求：

# 系统要求 Python 3.7+ CUDA 11.0+ (如果使用GPU加速) 至少8GB内存（处理大批量图片时建议16GB+） # 依赖安装 pip install torch torchvision pip install modelscope pip install gradio pip install opencv-python pip install numpy pip install pillow

3.2 模型加载与初始化

使用ModelScope加载预训练的手机检测模型非常简单：

from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks def load_phone_detection_model(): """ 加载手机检测模型 返回配置好的检测pipeline """ try: # 创建目标检测pipeline detector = pipeline( task=Tasks.domain_specific_object_detection, model='damo/cv_tinynas_object-detection_damoyolo_phone' ) print("模型加载成功！") return detector except Exception as e: print(f"模型加载失败: {str(e)}") return None

4. 通用API封装设计

4.1 核心检测API

为了便于在生产环境中集成，我们封装了一个通用的检测API：

import cv2 import numpy as np from typing import List, Dict, Any class PhoneDetector: def __init__(self, model_path: str = None): """ 初始化手机检测器 :param model_path: 可选的自定义模型路径 """ self.detector = load_phone_detection_model() self.class_names = ['phone'] # 检测类别名称 def detect_single_image(self, image_path: str, confidence_threshold: float = 0.5) -> Dict[str, Any]: """ 检测单张图片中的手机 :param image_path: 图片路径 :param confidence_threshold: 置信度阈值 :return: 检测结果字典 """ try: # 读取图片 image = cv2.imread(image_path) if image is None: raise ValueError(f"无法读取图片: {image_path}") # 执行检测 result = self.detector(image_path) # 解析检测结果 detections = self._parse_detection_result(result, confidence_threshold) return { 'success': True, 'image_path': image_path, 'detections': detections, 'total_phones': len(detections) } except Exception as e: return { 'success': False, 'image_path': image_path, 'error': str(e), 'detections': [] } def _parse_detection_result(self, result: Dict, confidence_threshold: float) -> List[Dict]: """ 解析模型输出结果 """ detections = [] if 'boxes' in result and 'scores' in result and 'labels' in result: boxes = result['boxes'] scores = result['scores'] labels = result['labels'] for i, (box, score, label) in enumerate(zip(boxes, scores, labels)): if score >= confidence_threshold and label < len(self.class_names): detection = { 'id': i, 'bbox': [float(coord) for coord in box], # [x1, y1, x2, y2] 'confidence': float(score), 'label': self.class_names[label], 'center_x': (box[0] + box[2]) / 2, 'center_y': (box[1] + box[3]) / 2, 'width': box[2] - box[0], 'height': box[3] - box[1] } detections.append(detection) return detections

4.2 批量处理API

对于生产环境中的大批量图片处理需求，我们提供了批量处理接口：

import os from concurrent.futures import ThreadPoolExecutor import time class BatchPhoneDetector(PhoneDetector): def __init__(self, max_workers: int = 4): """ 批量手机检测器 :param max_workers: 最大并发工作线程数 """ super().__init__() self.max_workers = max_workers def detect_batch_images(self, image_dir: str, output_dir: str = None, confidence_threshold: float = 0.5) -> Dict[str, Any]: """ 批量检测目录中的图片 :param image_dir: 图片目录路径 :param output_dir: 输出目录路径（可选） :param confidence_threshold: 置信度阈值 :return: 批量检测结果统计 """ start_time = time.time() # 获取所有图片文件 image_files = self._get_image_files(image_dir) total_files = len(image_files) if total_files == 0: return { 'success': False, 'error': f'在目录 {image_dir} 中未找到图片文件', 'processed_files': 0 } # 使用线程池并发处理 results = [] with ThreadPoolExecutor(max_workers=self.max_workers) as executor: future_to_file = { executor.submit(self.detect_single_image, img_path, confidence_threshold): img_path for img_path in image_files } for future in future_to_file: try: result = future.result() results.append(result) # 如果需要保存结果图片 if output_dir and result['success']: self._save_detection_result(result, output_dir) except Exception as e: results.append({ 'success': False, 'image_path': future_to_file[future], 'error': str(e) }) # 统计结果 processing_time = time.time() - start_time successful_detections = sum(1 for r in results if r['success']) total_phones = sum(r['total_phones'] for r in results if r['success']) return { 'total_files': total_files, 'successful_files': successful_detections, 'failed_files': total_files - successful_detections, 'total_phones_detected': total_phones, 'processing_time_seconds': processing_time, 'avg_time_per_image': processing_time / total_files if total_files > 0 else 0, 'detailed_results': results } def _get_image_files(self, directory: str) -> List[str]: """获取目录中的所有图片文件""" valid_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'] image_files = [] for file in os.listdir(directory): if any(file.lower().endswith(ext) for ext in valid_extensions): image_files.append(os.path.join(directory, file)) return image_files def _save_detection_result(self, result: Dict, output_dir: str): """保存检测结果（带标注的图片）""" if not os.path.exists(output_dir): os.makedirs(output_dir) image = cv2.imread(result['image_path']) if image is not None: # 在图片上绘制检测框 for detection in result['detections']: bbox = detection['bbox'] confidence = detection['confidence'] # 绘制矩形框 cv2.rectangle(image, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 2) # 添加置信度标签 label = f"Phone: {confidence:.2f}" cv2.putText(image, label, (int(bbox[0]), int(bbox[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) # 保存结果图片 filename = os.path.basename(result['image_path']) output_path = os.path.join(output_dir, f"detected_{filename}") cv2.imwrite(output_path, image)

5. 批量图片检测脚本示例

5.1 命令行工具实现

为了方便在生产环境中使用，我们提供了一个完整的命令行工具：

#!/usr/bin/env python3 """ 批量手机检测命令行工具 支持单张图片检测、批量图片检测、结果导出等功能 """ import argparse import json import csv from datetime import datetime def main(): parser = argparse.ArgumentParser(description='批量手机检测工具') parser.add_argument('--input', '-i', required=True, help='输入图片路径或图片目录路径') parser.add_argument('--output', '-o', default='detection_results', help='输出目录路径，默认: detection_results') parser.add_argument('--confidence', '-c', type=float, default=0.5, help='检测置信度阈值，默认: 0.5') parser.add_argument('--workers', '-w', type=int, default=4, help='并发工作线程数，默认: 4') parser.add_argument('--format', '-f', choices=['json', 'csv', 'both'], default='json', help='结果输出格式，默认: json') parser.add_argument('--visualize', '-v', action='store_true', help='是否生成带检测框的可视化图片') args = parser.parse_args() # 初始化检测器 print("正在加载手机检测模型...") detector = BatchPhoneDetector(max_workers=args.workers) # 执行检测 if os.path.isfile(args.input): # 单张图片检测 print(f"检测单张图片: {args.input}") result = detector.detect_single_image(args.input, args.confidence) if result['success']: print(f"检测成功！发现 {result['total_phones']} 部手机") # 保存可视化结果 if args.visualize: os.makedirs(args.output, exist_ok=True) detector._save_detection_result(result, args.output) print(f"可视化结果已保存至: {args.output}") else: print(f"检测失败: {result['error']}") elif os.path.isdir(args.input): # 批量图片检测 print(f"批量检测目录: {args.input}") results = detector.detect_batch_images( args.input, args.output if args.visualize else None, args.confidence ) # 输出统计信息 print(f"\n批量检测完成！") print(f"处理文件总数: {results['total_files']}") print(f"成功处理: {results['successful_files']}") print(f"失败文件: {results['failed_files']}") print(f"检测到手机总数: {results['total_phones_detected']}") print(f"总处理时间: {results['processing_time_seconds']:.2f} 秒") print(f"平均每张图片处理时间: {results['avg_time_per_image']:.2f} 秒") # 保存详细结果 self._export_results(results, args.output, args.format) else: print(f"错误: 输入路径 {args.input} 不存在或不可访问") return 1 return 0 def _export_results(self, results: Dict, output_dir: str, format_type: str): """导出检测结果到文件""" os.makedirs(output_dir, exist_ok=True) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") if format_type in ['json', 'both']: json_path = os.path.join(output_dir, f"detection_results_{timestamp}.json") with open(json_path, 'w', encoding='utf-8') as f: json.dump(results, f, indent=2, ensure_ascii=False) print(f"JSON结果已保存至: {json_path}") if format_type in ['csv', 'both']: csv_path = os.path.join(output_dir, f"detection_results_{timestamp}.csv") self._export_to_csv(results, csv_path) print(f"CSV结果已保存至: {csv_path}") def _export_to_csv(self, results: Dict, csv_path: str): """导出为CSV格式""" with open(csv_path, 'w', newline='', encoding='utf-8') as csvfile: fieldnames = ['image_path', 'detection_count', 'detection_details'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for result in results['detailed_results']: if result['success']: details = '; '.join([ f"bbox[{d['bbox']}] conf[{d['confidence']:.2f}]" for d in result['detections'] ]) writer.writerow({ 'image_path': result['image_path'], 'detection_count': result['total_phones'], 'detection_details': details }) if __name__ == "__main__": import sys import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.exit(main())

5.2 使用示例

# 单张图片检测 python phone_detector.py --input path/to/your/image.jpg --output results --visualize # 批量图片检测 python phone_detector.py --input path/to/image/folder --output batch_results --workers 8 --format both # 调整检测灵敏度 python phone_detector.py --input path/to/image.jpg --confidence 0.7

6. 生产环境部署建议

6.1 性能优化策略

在生产环境中部署手机检测服务时，考虑以下优化建议：

class OptimizedPhoneDetector(PhoneDetector): def __init__(self, use_gpu: bool = True, model_precision: str = 'fp16'): """ 优化版手机检测器 :param use_gpu: 是否使用GPU加速 :param model_precision: 模型精度 (fp32, fp16, int8) """ super().__init__() self.use_gpu = use_gpu self.model_precision = model_precision self._optimize_model() def _optimize_model(self): """模型优化配置""" if self.use_gpu: # GPU加速配置 self.detector.model = self.detector.model.cuda() if self.model_precision == 'fp16': # 半精度推理，提升速度减少内存占用 self.detector.model = self.detector.model.half() elif self.model_precision == 'int8': # 整数量化，进一步优化性能 self.detector.model = self._quantize_model(self.detector.model) def warmup(self, warmup_iterations: int = 10): """模型预热，避免首次推理延迟""" dummy_input = torch.randn(1, 3, 640, 640) if self.use_gpu: dummy_input = dummy_input.cuda() for _ in range(warmup_iterations): with torch.no_grad(): _ = self.detector.model(dummy_input)

6.2 监控与日志

在生产环境中，完善的监控和日志系统至关重要：

import logging from prometheus_client import Counter, Gauge, Histogram class MonitoredPhoneDetector(PhoneDetector): def __init__(self): super().__init__() self._setup_metrics() self._setup_logging() def _setup_metrics(self): """设置性能监控指标""" self.detection_counter = Counter('phone_detections_total', 'Total phone detections') self.error_counter = Counter('detection_errors_total', 'Total detection errors') self.processing_time = Histogram('detection_processing_seconds', 'Detection processing time') self.confidence_gauge = Gauge('detection_confidence', 'Detection confidence values') def _setup_logging(self): """设置日志系统""" logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler('phone_detection.log'), logging.StreamHandler() ] ) self.logger = logging.getLogger(__name__) def detect_single_image(self, image_path: str, confidence_threshold: float = 0.5): """带监控的检测方法""" start_time = time.time() try: result = super().detect_single_image(image_path, confidence_threshold) # 记录指标 processing_time = time.time() - start_time self.processing_time.observe(processing_time) self.detection_counter.inc(result['total_phones']) if result['success']: for detection in result['detections']: self.confidence_gauge.set(detection['confidence']) self.logger.info(f"检测完成: {image_path}, 发现 {result['total_phones']} 部手机") return result except Exception as e: self.error_counter.inc() self.logger.error(f"检测失败: {image_path}, 错误: {str(e)}") raise