当前位置：首页 > news >正文

yolo-world开放词汇检测onnxruntime和tensorrt推理

news 2025/12/23 19:18:27

onnxruntime推理

导出onnx模型：

from ultralytics import YOLOWorld,YOLO
model = YOLOWorld('yolov8s-worldv2.pt')
model.set_classes(["glasses","shoes"])
model.save("yolo_world.pt")
model = YOLO('yolo_world.pt')
model.export(format="onnx", opset=12)

onnx模型结构如下图所示：
在这里插入图片描述

推理代码：

import cv2
import math
import numpy as np
import onnxruntimeclass_names = ["glasses","shoes"]  
input_shape = (640, 640) 
score_threshold = 0.1 
nms_threshold = 0.5def nms(boxes, scores, score_threshold, nms_threshold):x1 = boxes[:, 0]y1 = boxes[:, 1]x2 = boxes[:, 2]y2 = boxes[:, 3]areas = (y2 - y1 + 1) * (x2 - x1 + 1)keep = []index = scores.argsort()[::-1] while index.size > 0:i = index[0]keep.append(i)x11 = np.maximum(x1[i], x1[index[1:]]) y11 = np.maximum(y1[i], y1[index[1:]])x22 = np.minimum(x2[i], x2[index[1:]])y22 = np.minimum(y2[i], y2[index[1:]])w = np.maximum(0, x22 - x11 + 1)                              h = np.maximum(0, y22 - y11 + 1) overlaps = w * hious = overlaps / (areas[i] + areas[index[1:]] - overlaps)idx = np.where(ious <= nms_threshold)[0]index = index[idx + 1]return keepdef xywh2xyxy(x):y = np.copy(x)y[:, 0] = x[:, 0] - x[:, 2] / 2y[:, 1] = x[:, 1] - x[:, 3] / 2y[:, 2] = x[:, 0] + x[:, 2] / 2y[:, 3] = x[:, 1] + x[:, 3] / 2return ydef filter_box(outputs): #过滤掉无用的框    outputs = np.squeeze(outputs)boxes = []scores = []class_ids = []classes_scores = outputs[4:(4+len(class_names)), ...]  for i in range(outputs.shape[1]):              class_id = np.argmax(classes_scores[...,i])score = classes_scores[class_id][i]if score > score_threshold:boxes.append(np.concatenate([outputs[:4, i], np.array([score, class_id])]))scores.append(score)class_ids.append(class_id) boxes = np.array(boxes)boxes = xywh2xyxy(boxes)scores = np.array(scores)indices = nms(boxes, scores, score_threshold, nms_threshold) boxes = boxes[indices]return boxesdef letterbox(im, new_shape=(416, 416), color=(114, 114, 114)):# Resize and pad image while meeting stride-multiple constraintsshape = im.shape[:2]  # current shape [height, width]# Scale ratio (new / old)r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])# Compute paddingnew_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))    dw, dh = (new_shape[1] - new_unpad[0])/2, (new_shape[0] - new_unpad[1])/2  # wh padding top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))left, right = int(round(dw - 0.1)), int(round(dw + 0.1))if shape[::-1] != new_unpad:  # resizeim = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add borderreturn imdef scale_boxes(boxes, output_shape):# Rescale boxes (xyxy) from self.input_shape to shapegain = min(input_shape[0] / output_shape[0], input_shape[1] / output_shape[1])  # gain  = old / newpad = (input_shape[1] - output_shape[1] * gain) / 2, (input_shape[0] - output_shape[0] * gain) / 2  # wh paddingboxes[..., [0, 2]] -= pad[0]  # x paddingboxes[..., [1, 3]] -= pad[1]  # y paddingboxes[..., :4] /= gainboxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, output_shape[1])  # x1, x2boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, output_shape[0])  # y1, y2return boxesdef draw(image, box_data):box_data = scale_boxes(box_data, image.shape)boxes = box_data[...,:4].astype(np.int32) scores = box_data[...,4]classes = box_data[...,5].astype(np.int32)for box, score, cl in zip(boxes, scores, classes):top, left, right, bottom = boxcv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 1)cv2.putText(image, '{0} {1:.2f}'.format(class_names[cl], score), (top, left), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 1)if __name__=="__main__":image = cv2.imread('bus.jpg', -1)input = letterbox(image, input_shape)input = input[:, :, ::-1].transpose(2, 0, 1).astype(dtype=np.float32)  #BGR2RGB和HWC2CHWinput = input / 255.0input_tensor = []input_tensor.append(input)onnx_session = onnxruntime.InferenceSession('yolo_world.onnx', providers=['CPUExecutionProvider', 'CUDAExecutionProvider'])input_name = []for node in onnx_session.get_inputs():input_name.append(node.name)output_name = []for node in onnx_session.get_outputs():output_name.append(node.name)inputs = {}for name in input_name:inputs[name] =  np.array(input_tensor)outputs = onnx_session.run(None, inputs)[0]boxes = filter_box(outputs)draw(image, boxes)cv2.imwrite('result.jpg', image)

tensorrt推理

通过trtexec转换onnx模型得到engine模型：

./trtexec.exe --onnx=yolo_world.onnx --saveEngine=yolo_world.engine

推理代码：

import cv2
import math
import numpy as np
import tensorrt as trt
import pycuda.autoinit 
import pycuda.driver as cuda class_names = ["glasses","shoes"]  
input_shape = (640, 640) 
score_threshold = 0.1 
nms_threshold = 0.5def nms(boxes, scores, score_threshold, nms_threshold):x1 = boxes[:, 0]y1 = boxes[:, 1]x2 = boxes[:, 2]y2 = boxes[:, 3]areas = (y2 - y1 + 1) * (x2 - x1 + 1)keep = []index = scores.argsort()[::-1] while index.size > 0:i = index[0]keep.append(i)x11 = np.maximum(x1[i], x1[index[1:]]) y11 = np.maximum(y1[i], y1[index[1:]])x22 = np.minimum(x2[i], x2[index[1:]])y22 = np.minimum(y2[i], y2[index[1:]])w = np.maximum(0, x22 - x11 + 1)                              h = np.maximum(0, y22 - y11 + 1) overlaps = w * hious = overlaps / (areas[i] + areas[index[1:]] - overlaps)idx = np.where(ious <= nms_threshold)[0]index = index[idx + 1]return keepdef xywh2xyxy(x):y = np.copy(x)y[:, 0] = x[:, 0] - x[:, 2] / 2y[:, 1] = x[:, 1] - x[:, 3] / 2y[:, 2] = x[:, 0] + x[:, 2] / 2y[:, 3] = x[:, 1] + x[:, 3] / 2return ydef filter_box(outputs): #过滤掉无用的框    outputs = np.squeeze(outputs)boxes = []scores = []class_ids = []classes_scores = outputs[4:(4+len(class_names)), ...]  for i in range(outputs.shape[1]):              class_id = np.argmax(classes_scores[...,i])score = classes_scores[class_id][i]if score > score_threshold:boxes.append(np.concatenate([outputs[:4, i], np.array([score, class_id])]))scores.append(score)class_ids.append(class_id) boxes = np.array(boxes)boxes = xywh2xyxy(boxes)scores = np.array(scores)indices = nms(boxes, scores, score_threshold, nms_threshold) boxes = boxes[indices]return boxesdef letterbox(im, new_shape=(416, 416), color=(114, 114, 114)):# Resize and pad image while meeting stride-multiple constraintsshape = im.shape[:2]  # current shape [height, width]# Scale ratio (new / old)r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])# Compute paddingnew_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))    dw, dh = (new_shape[1] - new_unpad[0])/2, (new_shape[0] - new_unpad[1])/2  # wh padding top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))left, right = int(round(dw - 0.1)), int(round(dw + 0.1))if shape[::-1] != new_unpad:  # resizeim = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add borderreturn imdef scale_boxes(boxes, output_shape):# Rescale boxes (xyxy) from self.input_shape to shapegain = min(input_shape[0] / output_shape[0], input_shape[1] / output_shape[1])  # gain  = old / newpad = (input_shape[1] - output_shape[1] * gain) / 2, (input_shape[0] - output_shape[0] * gain) / 2  # wh paddingboxes[..., [0, 2]] -= pad[0]  # x paddingboxes[..., [1, 3]] -= pad[1]  # y paddingboxes[..., :4] /= gainboxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, output_shape[1])  # x1, x2boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, output_shape[0])  # y1, y2return boxesdef draw(image, box_data):box_data = scale_boxes(box_data, image.shape)boxes = box_data[...,:4].astype(np.int32) scores = box_data[...,4]classes = box_data[...,5].astype(np.int32)for box, score, cl in zip(boxes, scores, classes):top, left, right, bottom = boxcv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 1)cv2.putText(image, '{0} {1:.2f}'.format(class_names[cl], score), (top, left), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 1)if __name__=="__main__":logger = trt.Logger(trt.Logger.WARNING)with open("yolo_world.engine", "rb") as f, trt.Runtime(logger) as runtime:engine = runtime.deserialize_cuda_engine(f.read())context = engine.create_execution_context()inputs_host = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(0)), dtype=np.float32)outputs_host = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(1)), dtype=np.float32)inputs_device = cuda.mem_alloc(inputs_host.nbytes)outputs_device = cuda.mem_alloc(outputs_host.nbytes)stream = cuda.Stream()image = cv2.imread('bus.jpg', -1)input = letterbox(image, input_shape)input = input[:, :, ::-1].transpose(2, 0, 1).astype(dtype=np.float32)  #BGR2RGB和HWC2CHWinput = input / 255.0input = np.expand_dims(input, axis=0)     np.copyto(inputs_host, input.ravel())with engine.create_execution_context() as context:cuda.memcpy_htod_async(inputs_device, inputs_host, stream)context.execute_async_v2(bindings=[int(inputs_device), int(outputs_device)], stream_handle=stream.handle)cuda.memcpy_dtoh_async(outputs_host, outputs_device, stream)stream.synchronize()  boxes = filter_box(outputs_host.reshape(context.get_binding_shape(1)))draw(image, boxes)cv2.imwrite('result.jpg', image)