当前位置：首页 > news >正文

文字识别准确率

news 2026/7/5 12:24:09

import easyocr
import cv2
import os
import numpy as npdef calculate_ocr_accuracy(image_path, ground_truth_text=None):reader = easyocr.Reader(['ch_sim', 'en'])try:image = cv2.imread(image_path)if image is None:raise ValueError(f"无法读取图片: {image_path}")results = reader.readtext(image)recognized_text = ""confidence_scores = []for (bbox, text, confidence) in results:recognized_text += text + " "confidence_scores.append(confidence)recognized_text = recognized_text.strip()print("=" * 50)print("OCR识别结果:")print("=" * 50)for i, (bbox, text, confidence) in enumerate(results):print(f"文本块 {i+1}: '{text}' (置信度: {confidence:.4f})")print(f"\n完整识别文本: {recognized_text}")avg_confidence = np.mean(confidence_scores) if confidence_scores else 0print(f"\n平均置信度: {avg_confidence:.4f}")if ground_truth_text:accuracy = calculate_text_accuracy(ground_truth_text, recognized_text)print(f"文本准确率: {accuracy:.2f}%")return {'recognized_text': recognized_text,'ground_truth': ground_truth_text,'accuracy': accuracy,'avg_confidence': avg_confidence,'details': results}else:return {'recognized_text': recognized_text,'avg_confidence': avg_confidence,'details': results}except Exception as e:print(f"处理图片时出错: {e}")return Nonedef calculate_text_accuracy(ground_truth, recognized):gt_clean = ''.join(ground_truth.split()).lower()rec_clean = ''.join(recognized.split()).lower()distance = levenshtein_distance(gt_clean, rec_clean)max_len = max(len(gt_clean), len(rec_clean))if max_len == 0:return 100.0accuracy = (1 - distance / max_len) * 100return accuracydef levenshtein_distance(s1, s2):if len(s1) < len(s2):return levenshtein_distance(s2, s1)if len(s2) == 0:return len(s1)previous_row = range(len(s2) + 1)for i, c1 in enumerate(s1):current_row = [i + 1]for j, c2 in enumerate(s2):insertions = previous_row[j + 1] + 1deletions = current_row[j] + 1substitutions = previous_row[j] + (c1 != c2)current_row.append(min(insertions, deletions, substitutions))previous_row = current_rowreturn previous_row[-1]def batch_ocr_accuracy_test(image_folder, ground_truths):total_accuracy = 0total_confidence = 0count = 0print("开始批量OCR准确率测试...")print("=" * 60)for filename, truth_text in ground_truths.items():image_path = os.path.join(image_folder, filename)if os.path.exists(image_path):print(f"\n处理图片: {filename}")result = calculate_ocr_accuracy(image_path, truth_text)if result and 'accuracy' in result:total_accuracy += result['accuracy']total_confidence += result['avg_confidence']count += 1if count > 0:print("\n" + "=" * 60)print("批量测试结果汇总:")print(f"测试图片数量: {count}")print(f"平均准确率: {total_accuracy/count:.2f}%")print(f"平均置信度: {total_confidence/count:.4f}")print("=" * 60)def main():image_path = r"C:\Users\18306\Desktop\picture\test_image.jpg"if not os.path.exists(image_path):print(f"错误: 图片文件不存在 - {image_path}")returnprint("开始OCR识别...")result = calculate_ocr_accuracy(image_path)if result:print("\n识别完成！")else:print("识别失败！")if __name__ == "__main__":main()