当前位置：首页 > news >正文

YOLOv8目标检测部署RK3588全过程，附代码pt-＞onnx-＞rknn，附【详细代码】

news 2026/3/27 1:30:41

一、训练yolov8模型，得到最佳权重文件best.pt

二、pt转onnx,即best.pt->best11.onnx

1、对下载的YOLOv8代码修改

2、加入模型导出功能，

3、导出指令采用如下代码

三、ONNX转RKNN

四、RK3588部署

1、拷贝rknn文件到rk3588板子内

2、执行代码段rknnlite_inference.py

一、训练yolov8模型，得到最佳权重文件best.pt

训练过程省略。。。。

二、pt转onnx,即best.pt->best11.onnx

1、对下载的YOLOv8代码修改，

位置：ultralytics_yolov8_main/ultralytics/nn/modules/head.py

在Detect类添加如下代码段：

y = [] for i in range(self.nl): y.append(self.cv2[i](x[i])) cls = torch.sigmoid(self.cv3[i](x[i])) cls_sum = torch.clamp(cls.sum(1, keepdim=True), 0, 1) y.append(cls) y.append(cls_sum) return y

如下图所示：

2、加入模型导出功能，

对model.py中的 _load函数添加如下代码段，位置：ultralytics_yolov8-main\ultralytics\engine\model.py

print("=========== onnx =========== ") import torch self.model.fuse() self.model.eval() self.model.load_state_dict(torch.load('./model/best11.pt', map_location='cpu'), strict=False) dummy_input = torch.randn(1, 3, 640, 640) input_names = ["data"] output_names = ["reg1", "cls1", "reg2", "cls2", "reg3", "cls3"] torch.onnx.export(self.model, dummy_input, "./best111.onnx", verbose=False, input_names=input_names, output_names=output_names, opset_version=12) print("======================== convert onnx Finished! .... ")

3、导出指令采用如下代码：

from ultralytics import YOLO model = YOLO('./model/best11.pt') results = model(task='detect', mode='predict', source='C:/Users\lzy06\Desktop\zxq/relaticdata', line_thickness=3, show=False, save=True, device='cpu')

确认导出成功，即可忽略所报错误。

使用网页链接https://netron.app打开onnx文件，查看输出是否一致，如下所示：

三、ONNX转RKNN

配置该环境需要在linux系统下，先安装rknn_toolkit2，安装过程可网上查阅。。。

验证安装完毕：终端输入python 后再输入from rknn.api import RKNN，是否报错。

转换代码onnx2rknn.py如下：

import os import urllib import traceback import time import sys import numpy as np import cv2 from rknn.api import RKNN from math import exp ONNX_MODEL = './best111.onnx' RKNN_MODEL = './best111.rknn' DATASET = './dataset.txt' QUANTIZE = False CLASSES = ['broke', 'good', 'lose'] meshgrid = [] class_num = len(CLASSES) headNum = 3 strides = [8, 16, 32] mapSize = [[80, 80], [40, 40], [20, 20]] nmsThresh = 0.5 objectThresh = 0.5 input_imgH = 640 input_imgW = 640 class DetectBox: def __init__(self, classId, score, xmin, ymin, xmax, ymax): self.classId = classId self.score = score self.xmin = xmin self.ymin = ymin self.xmax = xmax self.ymax = ymax def GenerateMeshgrid(): for index in range(headNum): for i in range(mapSize[index][0]): for j in range(mapSize[index][1]): meshgrid.append(j + 0.5) meshgrid.append(i + 0.5) def IOU(xmin1, ymin1, xmax1, ymax1, xmin2, ymin2, xmax2, ymax2): xmin = max(xmin1, xmin2) ymin = max(ymin1, ymin2) xmax = min(xmax1, xmax2) ymax = min(ymax1, ymax2) innerWidth = xmax - xmin innerHeight = ymax - ymin innerWidth = innerWidth if innerWidth > 0 else 0 innerHeight = innerHeight if innerHeight > 0 else 0 innerArea = innerWidth * innerHeight area1 = (xmax1 - xmin1) * (ymax1 - ymin1) area2 = (xmax2 - xmin2) * (ymax2 - ymin2) total = area1 + area2 - innerArea return innerArea / total def NMS(detectResult): predBoxs = [] sort_detectboxs = sorted(detectResult, key=lambda x: x.score, reverse=True) for i in range(len(sort_detectboxs)): xmin1 = sort_detectboxs[i].xmin ymin1 = sort_detectboxs[i].ymin xmax1 = sort_detectboxs[i].xmax ymax1 = sort_detectboxs[i].ymax classId = sort_detectboxs[i].classId if sort_detectboxs[i].classId != -1: predBoxs.append(sort_detectboxs[i]) for j in range(i + 1, len(sort_detectboxs), 1): if classId == sort_detectboxs[j].classId: xmin2 = sort_detectboxs[j].xmin ymin2 = sort_detectboxs[j].ymin xmax2 = sort_detectboxs[j].xmax ymax2 = sort_detectboxs[j].ymax iou = IOU(xmin1, ymin1, xmax1, ymax1, xmin2, ymin2, xmax2, ymax2) if iou > nmsThresh: sort_detectboxs[j].classId = -1 return predBoxs def sigmoid(x): return 1 / (1 + exp(-x)) def postprocess(out, img_h, img_w): print('postprocess ... ') detectResult = [] output = [] for i in range(len(out)): print(out[i].shape) output.append(out[i].reshape((-1))) scale_h = img_h / input_imgH scale_w = img_w / input_imgW gridIndex = -2 cls_index = 0 cls_max = 0 for index in range(headNum): reg = output[index * 2 + 0] cls = output[index * 2 + 1] for h in range(mapSize[index][0]): for w in range(mapSize[index][1]): gridIndex += 2 if 1 == class_num: cls_max = sigmoid(cls[0 * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w]) cls_index = 0 else: for cl in range(class_num): cls_val = cls[cl * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w] if 0 == cl: cls_max = cls_val cls_index = cl else: if cls_val > cls_max: cls_max = cls_val cls_index = cl cls_max = sigmoid(cls_max) if cls_max > objectThresh: regdfl = [] for lc in range(4): sfsum = 0 locval = 0 for df in range(16): temp = exp(reg[((lc * 16) + df) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w]) reg[((lc * 16) + df) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w] = temp sfsum += temp for df in range(16): sfval = reg[((lc * 16) + df) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w] / sfsum locval += sfval * df regdfl.append(locval) x1 = (meshgrid[gridIndex + 0] - regdfl[0]) * strides[index] y1 = (meshgrid[gridIndex + 1] - regdfl[1]) * strides[index] x2 = (meshgrid[gridIndex + 0] + regdfl[2]) * strides[index] y2 = (meshgrid[gridIndex + 1] + regdfl[3]) * strides[index] xmin = x1 * scale_w ymin = y1 * scale_h xmax = x2 * scale_w ymax = y2 * scale_h xmin = xmin if xmin > 0 else 0 ymin = ymin if ymin > 0 else 0 xmax = xmax if xmax < img_w else img_w ymax = ymax if ymax < img_h else img_h box = DetectBox(cls_index, cls_max, xmin, ymin, xmax, ymax) detectResult.append(box) # NMS print('detectResult:', len(detectResult)) predBox = NMS(detectResult) return predBox def export_rknn_inference(img): # Create RKNN object rknn = RKNN(verbose=False) # pre-process config print('--> Config model') rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]], quantized_algorithm='normal', quantized_method='channel', target_platform='rk3588') print('done') # Load ONNX model print('--> Loading model') ret = rknn.load_onnx(model=ONNX_MODEL) if ret != 0: print('Load model failed!') exit(ret) print('done') # Build model print('--> Building model') ret = rknn.build(do_quantization=QUANTIZE, dataset=DATASET, rknn_batch_size=1) if ret != 0: print('Build model failed!') exit(ret) print('done') # Export RKNN model print('--> Export rknn model') ret = rknn.export_rknn(RKNN_MODEL) if ret != 0: print('Export rknn model failed!') exit(ret) print('done') # Init runtime environment print('--> Init runtime environment') ret = rknn.init_runtime() # ret = rknn.init_runtime(target='rk3566') if ret != 0: print('Init runtime environment failed!') exit(ret) print('done') # Inference print('--> Running model') outputs = rknn.inference(inputs=[img]) rknn.release() print('done') return outputs if __name__ == '__main__': print('This is main ...') GenerateMeshgrid() img_path = './dataset/00003.png' orig_img = cv2.imread(img_path) img_h, img_w = orig_img.shape[:2] origimg = cv2.resize(orig_img, (input_imgW, input_imgH), interpolation=cv2.INTER_LINEAR) origimg = cv2.cvtColor(origimg, cv2.COLOR_BGR2RGB) img = np.expand_dims(origimg, 0) outputs = export_rknn_inference(img) out = [] for i in range(len(outputs)): out.append(outputs[i]) predbox = postprocess(out, img_h, img_w) print(len(predbox)) for i in range(len(predbox)): xmin = int(predbox[i].xmin) ymin = int(predbox[i].ymin) xmax = int(predbox[i].xmax) ymax = int(predbox[i].ymax) classId = predbox[i].classId score = predbox[i].score cv2.rectangle(orig_img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) ptext = (xmin, ymin) title = CLASSES[classId] + ":%.2f" % (score) cv2.putText(orig_img, title, ptext, cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2, cv2.LINE_AA) cv2.imwrite('./test_rknn_result.jpg', orig_img) # cv2.imshow("test", origimg) # cv2.waitKey(0)

四、RK3588部署

1、拷贝rknn文件到rk3588板子内。

2、执行代码段rknnlite_inference.py

（其实和onnx2rknn.py代码一样，唯独区别于板子上是使用RKNNLite）我这里修改了代码，补充了视频和图像文件的测试代码段，如下所示：

import glob import os import urllib import traceback import time import sys import numpy as np import cv2 from rknnlite.api import RKNNLite from math import exp RKNN_MODEL = './model/detect_FQ.rknn' dataset_file = './dataset.txt' img_folder = "./dataset" video_path = "00001.mp4" video_inference = False result_path = './detect_result' CLASSES = ['broke', 'good', 'lose'] meshgrid = [] class_num = len(CLASSES) headNum = 3 strides = [8, 16, 32] mapSize = [[80, 80], [40, 40], [20, 20]] nmsThresh = 0.5 objectThresh = 0.5 input_imgH = 640 input_imgW = 640 class DetectBox: def __init__(self, classId, score, xmin, ymin, xmax, ymax): self.classId = classId self.score = score self.xmin = xmin self.ymin = ymin self.xmax = xmax self.ymax = ymax def GenerateMeshgrid(): for index in range(headNum): for i in range(mapSize[index][0]): for j in range(mapSize[index][1]): meshgrid.append(j + 0.5) meshgrid.append(i + 0.5) def IOU(xmin1, ymin1, xmax1, ymax1, xmin2, ymin2, xmax2, ymax2): xmin = max(xmin1, xmin2) ymin = max(ymin1, ymin2) xmax = min(xmax1, xmax2) ymax = min(ymax1, ymax2) innerWidth = xmax - xmin innerHeight = ymax - ymin innerWidth = innerWidth if innerWidth > 0 else 0 innerHeight = innerHeight if innerHeight > 0 else 0 innerArea = innerWidth * innerHeight area1 = (xmax1 - xmin1) * (ymax1 - ymin1) area2 = (xmax2 - xmin2) * (ymax2 - ymin2) total = area1 + area2 - innerArea return innerArea / total def NMS(detectResult): predBoxs = [] sort_detectboxs = sorted(detectResult, key=lambda x: x.score, reverse=True) for i in range(len(sort_detectboxs)): xmin1 = sort_detectboxs[i].xmin ymin1 = sort_detectboxs[i].ymin xmax1 = sort_detectboxs[i].xmax ymax1 = sort_detectboxs[i].ymax classId = sort_detectboxs[i].classId if sort_detectboxs[i].classId != -1: predBoxs.append(sort_detectboxs[i]) for j in range(i + 1, len(sort_detectboxs), 1): if classId == sort_detectboxs[j].classId: xmin2 = sort_detectboxs[j].xmin ymin2 = sort_detectboxs[j].ymin xmax2 = sort_detectboxs[j].xmax ymax2 = sort_detectboxs[j].ymax iou = IOU(xmin1, ymin1, xmax1, ymax1, xmin2, ymin2, xmax2, ymax2) if iou > nmsThresh: sort_detectboxs[j].classId = -1 return predBoxs def sigmoid(x): return 1 / (1 + exp(-x)) def postprocess(out, img_h, img_w): print('postprocess ... ') detectResult = [] output = [] for i in range(len(out)): print(out[i].shape) output.append(out[i].reshape((-1))) scale_h = img_h / input_imgH scale_w = img_w / input_imgW gridIndex = -2 cls_index = 0 cls_max = 0 for index in range(headNum): reg = output[index * 2 + 0] cls = output[index * 2 + 1] for h in range(mapSize[index][0]): for w in range(mapSize[index][1]): gridIndex += 2 if 1 == class_num: cls_max = sigmoid(cls[0 * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w]) cls_index = 0 else: for cl in range(class_num): cls_val = cls[cl * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w] if 0 == cl: cls_max = cls_val cls_index = cl else: if cls_val > cls_max: cls_max = cls_val cls_index = cl cls_max = sigmoid(cls_max) if cls_max > objectThresh: regdfl = [] for lc in range(4): sfsum = 0 locval = 0 for df in range(16): temp = exp(reg[((lc * 16) + df) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w]) reg[((lc * 16) + df) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][ 1] + w] = temp sfsum += temp for df in range(16): sfval = reg[((lc * 16) + df) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][ 1] + w] / sfsum locval += sfval * df regdfl.append(locval) x1 = (meshgrid[gridIndex + 0] - regdfl[0]) * strides[index] y1 = (meshgrid[gridIndex + 1] - regdfl[1]) * strides[index] x2 = (meshgrid[gridIndex + 0] + regdfl[2]) * strides[index] y2 = (meshgrid[gridIndex + 1] + regdfl[3]) * strides[index] xmin = x1 * scale_w ymin = y1 * scale_h xmax = x2 * scale_w ymax = y2 * scale_h xmin = xmin if xmin > 0 else 0 ymin = ymin if ymin > 0 else 0 xmax = xmax if xmax < img_w else img_w ymax = ymax if ymax < img_h else img_h box = DetectBox(cls_index, cls_max, xmin, ymin, xmax, ymax) detectResult.append(box) # NMS print('detectResult:', len(detectResult)) predBox = NMS(detectResult) return predBox def export_rknnlite_inference(img): # Create RKNN object rknnlite = RKNNLite(verbose=False) # Load ONNX model print('--> Loading model') ret = rknnlite.load_rknn(RKNN_MODEL) if ret != 0: print('Load model failed!') exit(ret) print('done') # Init runtime environment print('--> Init runtime environment') # ret = rknnlite.init_runtime() ret = rknnlite.init_runtime(core_mask=RKNNLite.NPU_CORE_0_1_2) if ret != 0: print('Init runtime environment failed!') exit(ret) print('done') # Inference print('--> Running model') outputs = rknnlite.inference(inputs=[img]) rknnlite.release() print('done') return outputs def get_dataset_txt(dataset_path, dataset_savefile): file_data = glob.glob(os.path.join(dataset_path, "*.png")) with open(dataset_savefile, "r") as f: for file in file_data: f.readlines(f"{file}\n") if __name__ == '__main__': print('This is main ...') GenerateMeshgrid() isExist = os.path.exists(result_path) if not isExist: os.makedirs(result_path) if video_inference == False: print('--> image -----------------------------------------') img_names = os.listdir(img_folder) initime = time.time() num = 0 for name in img_names: img_path = os.path.join(img_folder, name) num += 1 start = time.time() orig_img = cv2.imread(img_path) img_h, img_w = orig_img.shape[:2] origimg = cv2.resize(orig_img, (input_imgW, input_imgH), interpolation=cv2.INTER_LINEAR) origimg = cv2.cvtColor(origimg, cv2.COLOR_BGR2RGB) img = np.expand_dims(origimg, 0) outputs = export_rknnlite_inference(img) out = [] for i in range(len(outputs)): out.append(outputs[i]) predbox = postprocess(out, img_h, img_w) print('detect:', len(predbox)) fps = 1 / (time.time() - start) print('fps: ', fps, num / (time.time() - initime)) for i in range(len(predbox)): xmin = int(predbox[i].xmin) ymin = int(predbox[i].ymin) xmax = int(predbox[i].xmax) ymax = int(predbox[i].ymax) classId = predbox[i].classId score = predbox[i].score cv2.rectangle(orig_img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) ptext = (xmin, ymin) title = CLASSES[classId] + ":%.2f" % (score) cv2.putText(orig_img, title, ptext, cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2, cv2.LINE_AA) cv2.imwrite(f'./{result_path}/{name}', orig_img) cv2.imshow("test", orig_img) cv2.waitKey(1) end = time.time() print('avgTimes:', num / (end - initime), num, end - initime) else: print('--> video -----------------------------------------') cap = cv2.VideoCapture(video_path) initime = time.time() num = 0 v = cv2.VideoWriter(f'./{result_path}/detect.avi', cv2.VideoWriter_fourcc(*'MJPG'), 30, (1920, 1080)) while (cap.isOpened()): num += 1 ret, frame = cap.read() print('ret:', ret) if not ret: break start = time.time() img_h, img_w = frame.shape[:2] frame = cv2.resize(frame, (input_imgW, input_imgH), interpolation=cv2.INTER_LINEAR) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) img = np.expand_dims(frame, 0) outputs = export_rknnlite_inference(img) out = [] for i in range(len(outputs)): out.append(outputs[i]) predbox = postprocess(out, img_h, img_w) print('detect:', len(predbox)) fps = 1 / (time.time() - start) print('fps: ', fps, num / (time.time() - initime)) for i in range(len(predbox)): xmin = int(predbox[i].xmin) ymin = int(predbox[i].ymin) xmax = int(predbox[i].xmax) ymax = int(predbox[i].ymax) classId = predbox[i].classId score = predbox[i].score print(f'point score :', CLASSES[classId], score) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) ptext = (xmin, ymin) title = CLASSES[classId] + ":%.2f" % (score) cv2.putText(frame, title, ptext, cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2, cv2.LINE_AA) cv2.imshow("output", frame) # i = cv2.resize(frame, (640, 640)) v.write(frame) cv2.imwrite(f'./{result_path}/test_rknn_result.jpg', frame) cv2.waitKey(1) end = time.time() print('avgTimes:', num / (end - initime), num, end-initime)

大功告成，创作不易，辛苦点个赞？？？？

查看全文

http://www.jsqmd.com/news/530392/