tensorrt使用的一些脚本
- 打印trt输入和输出的尺寸
- 打印trt模型的推理速度
- 打印onnx输入和输出的尺寸
- 将包含多个输入和输出的onnx模型转化为trt
- 合并两个onnx模型,并行合并
打印trt输入和输出的尺寸
import tensorrt as trt
TRT_LOGGER = trt.Logger(trt.Logger.INFO)def load_engine(engine_file_path):with open(engine_file_path, "rb") as f:runtime = trt.Runtime(TRT_LOGGER)engine = runtime.deserialize_cuda_engine(f.read())return engine
def print_engine_info(engine):print("Engine has {} bindings.".format(engine.num_bindings))for i in range(engine.num_bindings):binding_name = engine.get_binding_name(i)binding_shape = engine.get_binding_shape(i)binding_dtype = engine.get_binding_dtype(i)is_input = engine.binding_is_input(i)if is_input:print(f"Input {i}: Name = {binding_name}, Shape = {binding_shape}, DType = {binding_dtype}")else:print(f"Output {i}: Name = {binding_name}, Shape = {binding_shape}, DType = {binding_dtype}")
engine = load_engine("combined_1.engine")
print_engine_info(engine)
打印trt模型的推理速度
import tensorrt as trt
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit
import timeTRT_LOGGER = trt.Logger(trt.Logger.ERROR)
def load_engine(engine_file_path):with open(engine_file_path, "rb") as f:runtime = trt.Runtime(TRT_LOGGER)engine = runtime.deserialize_cuda_engine(f.read())return engine
def infer_and_measure_speed(engine, input_data_list, iterations=1000):context = engine.create_execution_context()num_bindings = engine.num_bindingsnum_inputs = sum([engine.binding_is_input(i) for i in range(num_bindings)])num_outputs = num_bindings - num_inputsinput_names = [engine.get_tensor_name(i) for i in range(num_inputs)]output_names = [engine.get_tensor_name(i) for i in range(num_inputs, num_bindings)]for i, input_data in enumerate(input_data_list):context.set_binding_shape(i, input_data.shape)