当前位置: 首页 > news >正文

文字识别准确率

import easyocr
import cv2
import os
import numpy as npdef calculate_ocr_accuracy(image_path, ground_truth_text=None):reader = easyocr.Reader(['ch_sim', 'en'])try:image = cv2.imread(image_path)if image is None:raise ValueError(f"无法读取图片: {image_path}")results = reader.readtext(image)recognized_text = ""confidence_scores = []for (bbox, text, confidence) in results:recognized_text += text + " "confidence_scores.append(confidence)recognized_text = recognized_text.strip()print("=" * 50)print("OCR识别结果:")print("=" * 50)for i, (bbox, text, confidence) in enumerate(results):print(f"文本块 {i+1}: '{text}' (置信度: {confidence:.4f})")print(f"\n完整识别文本: {recognized_text}")avg_confidence = np.mean(confidence_scores) if confidence_scores else 0print(f"\n平均置信度: {avg_confidence:.4f}")if ground_truth_text:accuracy = calculate_text_accuracy(ground_truth_text, recognized_text)print(f"文本准确率: {accuracy:.2f}%")return {'recognized_text': recognized_text,'ground_truth': ground_truth_text,'accuracy': accuracy,'avg_confidence': avg_confidence,'details': results}else:return {'recognized_text': recognized_text,'avg_confidence': avg_confidence,'details': results}except Exception as e:print(f"处理图片时出错: {e}")return Nonedef calculate_text_accuracy(ground_truth, recognized):gt_clean = ''.join(ground_truth.split()).lower()rec_clean = ''.join(recognized.split()).lower()distance = levenshtein_distance(gt_clean, rec_clean)max_len = max(len(gt_clean), len(rec_clean))if max_len == 0:return 100.0accuracy = (1 - distance / max_len) * 100return accuracydef levenshtein_distance(s1, s2):if len(s1) < len(s2):return levenshtein_distance(s2, s1)if len(s2) == 0:return len(s1)previous_row = range(len(s2) + 1)for i, c1 in enumerate(s1):current_row = [i + 1]for j, c2 in enumerate(s2):insertions = previous_row[j + 1] + 1deletions = current_row[j] + 1substitutions = previous_row[j] + (c1 != c2)current_row.append(min(insertions, deletions, substitutions))previous_row = current_rowreturn previous_row[-1]def batch_ocr_accuracy_test(image_folder, ground_truths):total_accuracy = 0total_confidence = 0count = 0print("开始批量OCR准确率测试...")print("=" * 60)for filename, truth_text in ground_truths.items():image_path = os.path.join(image_folder, filename)if os.path.exists(image_path):print(f"\n处理图片: {filename}")result = calculate_ocr_accuracy(image_path, truth_text)if result and 'accuracy' in result:total_accuracy += result['accuracy']total_confidence += result['avg_confidence']count += 1if count > 0:print("\n" + "=" * 60)print("批量测试结果汇总:")print(f"测试图片数量: {count}")print(f"平均准确率: {total_accuracy/count:.2f}%")print(f"平均置信度: {total_confidence/count:.4f}")print("=" * 60)def main():image_path = r"C:\Users\18306\Desktop\picture\test_image.jpg"if not os.path.exists(image_path):print(f"错误: 图片文件不存在 - {image_path}")returnprint("开始OCR识别...")result = calculate_ocr_accuracy(image_path)if result:print("\n识别完成!")else:print("识别失败!")if __name__ == "__main__":main()

image

http://www.jsqmd.com/news/39802/

相关文章:

  • 【d-bus】gdbus-codegen 使用教程
  • logstash配置和启动
  • 最近改论文的诡异经历…… - BUAA
  • newDay21
  • 2025广东封闭式管理学校最新TOP5评测:重塑少年言行,见证成长蜕变
  • 2025年广东军事化训练学校/机构最新TOP5权威评测:铸就坚毅品格,领航成长之路
  • 2025年广东青少年感恩教育学校/机构最新TOP5推荐:家庭教育、心理健康,科学评测
  • 2025广东法制教育机构/学校最新TOP5评测:心理健康、素质拓展、行为矫正全覆盖
  • 2025年广东青少年素质拓展训练学校最新TOP5实力榜:以规范养习惯,护航成长之路
  • 2025年广东青少年行为矫正学校TOP5权威评测:科学矫正护航成长未来
  • 高级程序语言设计第五次个人作业
  • 2025年贵州贵阳母婴护理机构最新TOP5评测:守护母婴健康的专业力量
  • gcc编译时满屏错误信息咋办?
  • 2025贵州贵阳月子会所最新TOP5评测:产后恢复优选,守护母婴健康
  • 2025年贵州贵阳月子中心最新TOP5专业评测:守护母婴健康新标杆
  • Excel VBA 自定义排序
  • 基于GWO灰狼优化的XGBoost序列预测算法matlab仿真
  • 2025广东住房公积金提取机构最新TOP5评测:因为正规,所以高效
  • 2025广东公积金提取代办中介最新TOP5评测:高效引领行业合规标准
  • 2025年深圳公积金提取最新TOP5评测:专业高效合规,引领行业标准
  • 《Chrome 开发者工具:前端调试必备》
  • 使用 vLLM 本地部署 Qwen3-Embedding-8B 模型并接入 Dify 完整指南 - yi
  • 《VS Code:高效编程的插件与配置》
  • 11.13 NOTE
  • 2025广州公积金提取服务最新TOP5权威评测:专业合规引领行业标杆
  • 用Rust 解析验证码:结合 Tesseract OCR 进行文本识别
  • 10.26 NOTE
  • 10.22 NOTE
  • 题解:CF2106D Flower Boy
  • 使用 Maven 内置的版本号(Version)统一控制功能