YOLOv10部署教程，使用tensorRT部署，有轉化和推理代碼

一、使用平臺
- 1. 轉化onnx模型
- 轉化trt模型
模型推理
全部的代碼

論文題目：YOLOv10: Real-Time End-to-End Object Detection
研究單位：清華大學
論文鏈接：http://arxiv.org/abs/2405.14458
代碼鏈接：https://github.com/THU-MIG/yolov10

作者提供的模型性能評價圖，如下：
在這里插入圖片描述
YOLOv10-N:https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10n.pt
YOLOv10-S:https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10s.pt
YOLOv10-M:https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10m.pt
YOLOv10-B:https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10b.pt
YOLOv10-L:https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10l.pt
YOLOv10-X:https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10x.pt
推理時間速度很快，最主要是不需要后處理，就是網絡比較難訓練，有spa多占用了幾g顯存，并且收斂較慢

一、使用平臺

win10、TensorRT=8.6.1

1. 轉化onnx模型

git clone https://github.com/THU-MIG/yolov10.git
conda create -n YOLO python=3.9
conda activate YOLO
cd yolov10
pip install -r requirements.txt

下載pt模型
用下面代碼轉化

# -*- coding: utf-8 -*-
# @Time    : 2024/6/13 10:54
# @Site    : 
# @File    : export.py
# @Comment :
from ultralytics import YOLOv10# Load a model
model = YOLOv10(r"yolov10s.pt")  # load an official model# Export the model
model.export(format="onnx",device='0',batch=2,opset=12, half=True)"""
Argument	Type	Default	Description
format	str	'torchscript'	Target format for the exported model, such as 'onnx', 'torchscript', 'tensorflow', or others, defining compatibility with various deployment environments.
imgsz	int or tuple	640	Desired image size for the model input. Can be an integer for square images or a tuple (height, width) for specific dimensions.
keras	bool	False	Enables export to Keras format for TensorFlow SavedModel, providing compatibility with TensorFlow serving and APIs.
optimize	bool	False	Applies optimization for mobile devices when exporting to TorchScript, potentially reducing model size and improving performance.
half	bool	False	Enables FP16 (half-precision) quantization, reducing model size and potentially speeding up inference on supported hardware.
int8	bool	False	Activates INT8 quantization, further compressing the model and speeding up inference with minimal accuracy loss, primarily for edge devices.
dynamic	bool	False	Allows dynamic input sizes for ONNX and TensorRT exports, enhancing flexibility in handling varying image dimensions.
simplify	bool	False	Simplifies the model graph for ONNX exports with onnxslim, potentially improving performance and compatibility.
opset	int	None	Specifies the ONNX opset version for compatibility with different ONNX parsers and runtimes. If not set, uses the latest supported version.
workspace	float	4.0	Sets the maximum workspace size in GiB for TensorRT optimizations, balancing memory usage and performance.
nms	bool	False	Adds Non-Maximum Suppression (NMS) to the CoreML export, essential for accurate and efficient detection post-processing.
batch	int	1	Specifies export model batch inference size or the max number of images the exported model will process concurrently in predict mode.
"""

轉化trt模型

import onnx
import tensorrt as trt
# import sys
# sys.setrecursionlimit(500000)def onnx_export_engine(workspace,onnx_path,trt_path):#創建構建器logger=trt.Logger(trt.Logger.WARNING)builder=trt.Builder(logger)#創建一個構建配置config=builder.create_builder_config()config.max_workspace_size=workspace*1<<30#創建網絡定義flag=(1<<int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))network=builder.create_network(flag)#導入onnx模型parser=trt.OnnxParser(network,logger)if not parser.parse_from_file(str(onnx_path)):raise RuntimeError(f'failed to load ONNX file: {onnx}')inputs=[network.get_input(i) for i in range(network.num_inputs)]outputs=[network.get_output(i) for i in  range(network.num_outputs)]# network.get_input(0).setAllowedFormats(int)# network.get_input(1).setAllowedFormats(int)# for inp in inputs:#     LOGGER.info(f'{prefix}\tinput "{inp.name}" with shape {inp.shape} and dtype {inp.dtype}')# for out in outputs:#     LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}')## LOGGER.info(f'{prefix} building FP{16 if builder.platform_has_fast_fp16 else 32} engine in {f}')# if builder.platform_has_fast_fp16:##     config.set_flag(trt.BuilderFlag.FP16)# config.set_flag(trt.BuilderFlag.FP16)engine_path=trt_pathwith builder.build_serialized_network(network,config) as engine:with open(engine_path,'wb') as t:# t.write(engine.serialize())t.write(engine)print('轉化完成')if __name__ == '__main__':onnx_path='weights2/best.onnx'trt_path='end2end.engine'onnx_export_engine(4,onnx_path,trt_path)

模型推理

定義變量

from models import TRTModule  # isort:skip
import argparse
import cv2
from numpy import ndarray
import time
import random
import numpy as np
import os
import pickle
from collections import defaultdict, namedtuple
from pathlib import Path
from typing import List, Optional, Tuple, Union
import onnx
import tensorrt as trt
import torchos.environ['CUDA_MODULE_LOADING'] = 'LAZY'
random.seed(0)# detection model classes
CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus','train', 'truck', 'boat', 'traffic light', 'fire hydrant','stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog','horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe','backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee','skis', 'snowboard', 'sports ball', 'kite', 'baseball bat','baseball glove', 'skateboard', 'surfboard', 'tennis racket','bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl','banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot','hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch','potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop','mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven','toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase','scissors', 'teddy bear', 'hair drier', 'toothbrush')
# # three:
# CLASSES = (
#     'person', 'sports ball', 'car'
# )# colors for per classes
COLORS = {cls: [random.randint(0, 255) for _ in range(3)]for i, cls in enumerate(CLASSES)
}
# image suffixs
SUFFIXS = ('.bmp', '.dng', '.jpeg', '.jpg', '.mpo', '.png', '.tif', '.tiff','.webp', '.pfm')

定義模型加載類

class TRTModule(torch.nn.Module):dtypeMapping = {trt.bool: torch.bool,trt.int8: torch.int8,trt.int32: torch.int32,trt.float16: torch.float16,trt.float32: torch.float32}def __init__(self, weight: Union[str, Path],device: Optional[torch.device]) -> None:super(TRTModule, self).__init__()self.weight = Path(weight) if isinstance(weight, str) else weightself.device = device if device is not None else torch.device('cuda:0')self.stream = torch.cuda.Stream(device=device)self.__init_engine()self.__init_bindings()def __init_engine(self) -> None:logger = trt.Logger(trt.Logger.WARNING)trt.init_libnvinfer_plugins(logger, namespace='')with trt.Runtime(logger) as runtime:model = runtime.deserialize_cuda_engine(self.weight.read_bytes())context = model.create_execution_context()num_bindings = model.num_bindingsnames = [model.get_binding_name(i) for i in range(num_bindings)]self.bindings: List[int] = [0] * num_bindingsnum_inputs, num_outputs = 0, 0for i in range(num_bindings):if model.binding_is_input(i):num_inputs += 1else:num_outputs += 1self.num_bindings = num_bindingsself.num_inputs = num_inputsself.num_outputs = num_outputsself.model = modelself.context = contextself.input_names = names[:num_inputs]self.output_names = names[num_inputs:]self.idx = list(range(self.num_outputs))def __init_bindings(self) -> None:idynamic = odynamic = FalseTensor = namedtuple('Tensor', ('name', 'dtype', 'shape'))inp_info = []out_info = []for i, name in enumerate(self.input_names):assert self.model.get_binding_name(i) == namedtype = self.dtypeMapping[self.model.get_binding_dtype(i)]shape = tuple(self.model.get_binding_shape(i))if -1 in shape:idynamic |= Trueinp_info.append(Tensor(name, dtype, shape))for i, name in enumerate(self.output_names):i += self.num_inputsassert self.model.get_binding_name(i) == namedtype = self.dtypeMapping[self.model.get_binding_dtype(i)]shape = tuple(self.model.get_binding_shape(i))if -1 in shape:odynamic |= Trueout_info.append(Tensor(name, dtype, shape))if not odynamic:self.output_tensor = [torch.empty(info.shape, dtype=info.dtype, device=self.device)for info in out_info]self.idynamic = idynamicself.odynamic = odynamicself.inp_info = inp_infoself.out_info