.json标记转换.txt格式小工具
当使用自建数据库对YOLO等模型进行训练时,有时候会碰到无法直接使用.json文件进行训练的问题,而labelme有时候标出来是.json格式。这里提供一个工具脚本,在训练前先运行一遍可以有效解决该问题。该脚本原本用于YOLO-v8-seg模型训练,这边类别数和名称清根据自己需要进行设置。
(该脚本只依赖标准库,不用pip一堆)
(适用范围说明:支持所有有实例分割功能的模型(即名称带-seg的版本),但不支持没有的)
import os import json import random def convert_labelme_to_yolo_seg(json_dir, img_dir, output_dir, train_ratio=0.8, keep_classes=None): if keep_classes is None: keep_classes = ['您的分类类别1', '您的分类类别2', '您的分类类别3', '您的分类类别4'] class_to_id = {name: idx for idx, name in enumerate(keep_classes)} num_classes = len(class_to_id) # 创建输出子目录 for sub in ['images/train', 'images/val', 'labels/train', 'labels/val']: os.makedirs(os.path.join(output_dir, sub), exist_ok=True) # 收集所有 JSON 文件 json_files = [f for f in os.listdir(json_dir) if f.endswith('.json')] if not json_files: print("错误:没有找到 JSON 文件") return random.shuffle(json_files) split_idx = int(len(json_files) * train_ratio) train_files = json_files[:split_idx] val_files = json_files[split_idx:] def process_files(file_list, subset): for json_file in file_list: json_path = os.path.join(json_dir, json_file) with open(json_path, 'r', encoding='utf-8') as f: data = json.load(f) # 获取图像文件名 img_name = data.get('imagePath') if not img_name: base = json_file.replace('.json', '') if os.path.exists(os.path.join(img_dir, base + '.jpg')): img_name = base + '.jpg' elif os.path.exists(os.path.join(img_dir, base + '.png')): img_name = base + '.png' else: print(f"跳过 {json_file}:找不到图像") continue # 复制图像(手动读写,避免 shutil) src_img = os.path.join(img_dir, img_name) dst_img = os.path.join(output_dir, 'images', subset, img_name) if os.path.exists(src_img): with open(src_img, 'rb') as fsrc: with open(dst_img, 'wb') as fdst: fdst.write(fsrc.read()) else: print(f"图像不存在:{src_img},跳过") continue # 生成标签文件 label_file = json_file.replace('.json', '.txt') label_path = os.path.join(output_dir, 'labels', subset, label_file) img_w = data.get('imageWidth') img_h = data.get('imageHeight') if img_w is None or img_h is None: print(f"跳过 {json_file}:缺少图像尺寸") continue with open(label_path, 'w') as f: for shape in data.get('shapes', []): label = shape.get('label') if label not in class_to_id: continue class_id = class_to_id[label] shape_type = shape.get('shape_type', 'polygon') points = shape.get('points', []) # 矩形 → 四边形(四个顶点) if shape_type == 'rectangle' and len(points) == 2: x1, y1 = points[0] x2, y2 = points[1] points = [[x1, y1], [x2, y1], [x2, y2], [x1, y2]] if len(points) < 3: print(f"警告:{json_file} 中 {label} 点数不足3,跳过") continue # 归一化坐标并写入 norm = [] for (x, y) in points: nx = max(0.0, min(1.0, x / img_w)) ny = max(0.0, min(1.0, y / img_h)) norm.extend([nx, ny]) f.write(f"{class_id} " + " ".join(f"{p:.6f}" for p in norm) + "\n") # 若标签文件为空则删除 if os.path.getsize(label_path) == 0: os.remove(label_path) print(f"注意:{json_file} 没有有效标注,已删除空标签文件") print(f"训练集: {len(train_files)} 张") process_files(train_files, 'train') print(f"验证集: {len(val_files)} 张") process_files(val_files, 'val') # 生成 dataset.yaml yaml_path = os.path.join(output_dir, 'dataset.yaml') with open(yaml_path, 'w') as f: f.write(f"path: .\n") f.write(f"train: images/train\n") f.write(f"val: images/val\n") f.write(f"nc: {num_classes}\n") f.write(f"names: {keep_classes}\n") print(f"已生成 {yaml_path}") if __name__ == '__main__': convert_labelme_to_yolo_seg( json_dir='data/training_data/annotations', img_dir='data/training_data/image', output_dir='data/yolo_dataset', train_ratio=0.8, keep_classes=['您的分类类别1', '您的分类类别2', '您的分类类别3', '您的分类类别4'] )