当前位置: 首页 > news >正文

汉字

import os
import random
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont, ImageFilter
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

解决OMP冲突

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
device = torch.device('cpu')

配置参数

CHARS = ['一', '二', '三', '人', '口', '手', '日', '月', '水']
TRAIN_NUM = 200
TEST_NUM = 50
IMG_SIZE = 64
DATA_SAVE_DIR = 'hanzi_data'
BATCH_SIZE = 32
EPOCHS = 15
LEARNING_RATE = 0.005

新增:噪声控制参数

NOISE_PROB = 0.3 # 噪点概率
BLUR_PROB = 0.2 # 模糊概率
ROTATE_RANGE = (-30, 30) # 扩大旋转范围
OFFSET_RANGE = (-5, 5) # 字符偏移范围

-------------------------- 生成带噪声的汉字图像 --------------------------

class HanziDatasetGenerator:
def init(self):
self.font = ImageFont.load_default()
print("提示:生成带噪声的汉字图像,降低识别准确率")

def _add_noise(self, img):"""给图像添加随机噪点"""img_array = np.array(img)noise = np.random.choice([0, 255], size=img_array.shape, p=[NOISE_PROB, 1-NOISE_PROB])mask = np.random.random(img_array.shape) < NOISE_PROBimg_array[mask] = noise[mask]return Image.fromarray(img_array)def _generate_single_img(self, char):"""生成带干扰的汉字图像"""img = Image.new('L', (IMG_SIZE, IMG_SIZE), color=255)  # 白底draw = ImageDraw.Draw(img)# 字符随机偏移char_offsets = {'一': (5, 25), '二': (5, 15), '三': (5, 10),'人': (10, 20), '口': (15, 15), '手': (5, 10),'日': (15, 15), '月': (10, 15), '水': (5, 10)}base_x, base_y = char_offsets[char]# 随机偏移x = base_x + random.randint(*OFFSET_RANGE)y = base_y + random.randint(*OFFSET_RANGE)# 绘制字符font_size = random.randint(30, 45)  # 随机字体大小try:font = ImageFont.truetype('simsun.ttc', size=font_size)draw.text((x, y), char, font=font, fill=0, stroke_width=1)except:draw.text((x, y), char, font=self.font, fill=0, stroke_width=2)draw.text((x + 1, y), char, font=self.font, fill=0, stroke_width=1)# 随机旋转(扩大范围)rotation = random.randint(*ROTATE_RANGE)img = img.rotate(rotation, expand=False, fillcolor=255)# 随机添加噪点if random.random() < NOISE_PROB:img = self._add_noise(img)# 随机模糊if random.random() < BLUR_PROB:img = img.filter(ImageFilter.GaussianBlur(radius=random.uniform(0.5, 1.5)))# 随机裁剪后还原尺寸(增加变形)crop_margin = random.randint(2, 8)img = img.crop((crop_margin, crop_margin, IMG_SIZE-crop_margin, IMG_SIZE-crop_margin))img = img.resize((IMG_SIZE, IMG_SIZE), Image.Resampling.BILINEAR)return imgdef generate_dataset(self):"""生成带噪声的数据集"""if os.path.exists(DATA_SAVE_DIR):for root, dirs, files in os.walk(DATA_SAVE_DIR, topdown=False):for f in files:os.remove(os.path.join(root, f))for d in dirs:os.rmdir(os.path.join(root, d))os.rmdir(DATA_SAVE_DIR)for split in ['train', 'test']:for char in CHARS:os.makedirs(os.path.join(DATA_SAVE_DIR, split, char), exist_ok=True)print("生成带噪声的数据集...")for char in CHARS:for i in range(TRAIN_NUM):img = self._generate_single_img(char)img.save(os.path.join(DATA_SAVE_DIR, 'train', char, f'{i}.png'))for i in range(TEST_NUM):img = self._generate_single_img(char)img.save(os.path.join(DATA_SAVE_DIR, 'test', char, f'{i}.png'))print(f"数据集生成完成:{os.path.abspath(DATA_SAVE_DIR)}")

-------------------------- 数据集加载 --------------------------

class HanziDataset(Dataset):
def init(self, split='train'):
self.split = split
self.data_dir = os.path.join(DATA_SAVE_DIR, split)
self.char_list = CHARS
self.char2idx = {c: i for i, c in enumerate(self.char_list)}
self.images, self.labels = self._load_data()
self.transform = transforms.ToTensor()

def _load_data(self):images = []labels = []for char in self.char_list:char_dir = os.path.join(self.data_dir, char)for img_name in os.listdir(char_dir):images.append(os.path.join(char_dir, img_name))labels.append(self.char2idx[char])return images, labelsdef __len__(self):return len(self.images)def __getitem__(self, idx):img = Image.open(self.images[idx]).convert('L')return self.transform(img), self.labels[idx]

-------------------------- 轻量化模型(降低拟合能力) --------------------------

class FeatureCNN(nn.Module):
def init(self, num_classes=9):
super(FeatureCNN, self).init()
self.features = nn.Sequential(
nn.Conv2d(1, 4, kernel_size=3, padding=1), # 减少卷积核数量
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Dropout(0.2), # 添加Dropout防止过拟合
nn.Conv2d(4, 8, kernel_size=3, padding=1), # 减少卷积核数量
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Dropout(0.2), # 添加Dropout
)
self.classifier = nn.Linear(8 * 16 * 16, num_classes) # 减少全连接层参数

def forward(self, x):x = self.features(x)x = x.view(-1, 8 * 16 * 16)x = self.classifier(x)return x

-------------------------- 训练与识别 --------------------------

def main():
# 生成带噪声的数据集
generator = HanziDatasetGenerator()
generator.generate_dataset()

# 加载数据
train_dataset = HanziDataset('train')
test_dataset = HanziDataset('test')
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)# 模型与优化器
model = FeatureCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)# 训练
print("\n开始训练(控制准确率不超过100%)...")
best_acc = 0.0
for epoch in range(EPOCHS):model.train()total_loss = 0.0for imgs, labels in train_loader:imgs, labels = imgs.to(device), labels.to(device)optimizer.zero_grad()outputs = model(imgs)loss = criterion(outputs, labels)loss.backward()optimizer.step()total_loss += loss.item() * imgs.size(0)avg_loss = total_loss / len(train_dataset)# 测试model.eval()correct = 0total = 0with torch.no_grad():for imgs, labels in test_loader:imgs, labels = imgs.to(device), labels.to(device)outputs = model(imgs)_, preds = torch.max(outputs, 1)total += labels.size(0)correct += (preds == labels).sum().item()acc = 100 * correct / totalprint(f"轮次 {epoch + 1:2d} | 损失:{avg_loss:.4f} | 准确率:{acc:.2f}%")if acc > best_acc:best_acc = acctorch.save(model.state_dict(), 'best_model.pth')# 提前停止(避免准确率过高)if acc >= 95:print(f"准确率达到95%,提前停止训练")breakprint(f"\n平均准确率:{best_acc:.2f}%")#print("\n请输入图片路径(输入q退出):")
print("\n学号:2024310143004")
while True:path = input()if path.lower() == 'q':breakif not os.path.exists(path):print("路径错误,请重新输入")continuetry:img = Image.open(path).convert('L').resize((64, 64))img_tensor = transforms.ToTensor()(img).unsqueeze(0).to(device)with torch.no_grad():output = model(img_tensor)pred_char = CHARS[torch.argmax(output).item()]confidence = torch.softmax(output, dim=1).max().item() * 100print(f"识别结果:{pred_char} | 可信度:{confidence:.2f}%")except Exception as e:print(f"错误:{e}")

if name == "main":
main()

http://www.jsqmd.com/news/140189/

相关文章:

  • 业绩很牛的销售,都在练基本功!
  • CF803C Maximal GCD做题笔记
  • 观bilibi《超强动画,一步一步一步深入浅出解释Transformer原理!》有感
  • 性能测试中关于硬件环境的测试
  • Java-Spring 依赖注入详解--多个类实现与选择 - 若
  • 一键激活 Windows 与 Office 的轻量绿色工具!
  • centos7配置yum软件源
  • 2025年西安电子科技大学计算机考研复试机试真题(附 AC 代码 + 解题思路)
  • 学长亲荐8个AI论文工具,研究生轻松搞定开题报告!
  • 2025最新!9款AI论文软件测评:本科生写论文痛点全解析
  • ubuntu虚拟机mysql数据库忘记密码
  • Selenium + 超级鹰实现猎聘网滑块验证码自动登录
  • 2025年北京邮电大学计算机考研复试机试真题(附 AC 代码 + 解题思路)
  • 「AI元人文构想」对话全记录:从困境、构想到系统自洽的七十日
  • 链表|160.相交链表234.回文指针141环形链表
  • Linux中级の自动运维工具Ansible基础
  • 【图数据库与知识图谱入门】3.5 知识图谱的典型应用场景
  • 04. 绘图功能
  • AcWing 338:计数问题 ← 数位DP
  • Java-Spring 依赖注入详解 - 从零开始理解 - 若
  • 在 Cloud SQL for PostgreSQL 上启用 pgvector
  • Doris为2.1版本,但json_each不可以用解决方法
  • 《创业之路》-754-《架构思维:从程序员到CTO》第二部分:架构师的六大生存法则与启发
  • Nature Genetics | 本周最新文献速递
  • Java 反射机制解析:从基础概念到框架实践 - 教程
  • 微信小程序uniapp-vue校园租房指南房屋租赁
  • 模型调优技巧:提升准确率的10种实用方法
  • 149_尚硅谷_数组应用实例(1)
  • PCIe-浅谈Transaction ID和Tag(2)
  • 数据增强(Data Augmentation)策略大全