diff --git a/py/torch_tensorrt/_features.py b/py/torch_tensorrt/_features.py index bee0c3dbf0..e1cffb5c3a 100644 --- a/py/torch_tensorrt/_features.py +++ b/py/torch_tensorrt/_features.py @@ -1,3 +1,4 @@ +import importlib import os import sys from collections import namedtuple @@ -15,6 +16,7 @@ "dynamo_frontend", "fx_frontend", "refit", + "qdp_plugin", ], ) @@ -39,14 +41,24 @@ _FX_FE_AVAIL = True _REFIT_AVAIL = True +if importlib.util.find_spec("tensorrt.plugin"): + _QDP_PLUGIN_AVAIL = True +else: + _QDP_PLUGIN_AVAIL = False + ENABLED_FEATURES = FeatureSet( - _TS_FE_AVAIL, _TORCHTRT_RT_AVAIL, _DYNAMO_FE_AVAIL, _FX_FE_AVAIL, _REFIT_AVAIL + _TS_FE_AVAIL, + _TORCHTRT_RT_AVAIL, + _DYNAMO_FE_AVAIL, + _FX_FE_AVAIL, + _REFIT_AVAIL, + _QDP_PLUGIN_AVAIL, ) def _enabled_features_str() -> str: enabled = lambda x: "ENABLED" if x else "DISABLED" - out_str: str = f"Enabled Features:\n - Dynamo Frontend: {enabled(_DYNAMO_FE_AVAIL)}\n - Torch-TensorRT Runtime: {enabled(_TORCHTRT_RT_AVAIL)}\n - FX Frontend: {enabled(_FX_FE_AVAIL)}\n - TorchScript Frontend: {enabled(_TS_FE_AVAIL)}\n" # type: ignore[no-untyped-call] + out_str: str = f"Enabled Features:\n - Dynamo Frontend: {enabled(_DYNAMO_FE_AVAIL)}\n - Torch-TensorRT Runtime: {enabled(_TORCHTRT_RT_AVAIL)}\n - FX Frontend: {enabled(_FX_FE_AVAIL)}\n - TorchScript Frontend: {enabled(_TS_FE_AVAIL)}\n - Refit: {enabled(_REFIT_AVAIL)}\n - QDP Plugin: {enabled(_QDP_PLUGIN_AVAIL)}\n" # type: ignore[no-untyped-call] return out_str @@ -64,6 +76,22 @@ def not_implemented(*args: List[Any], **kwargs: Dict[str, Any]) -> Any: return wrapper +def needs_qdp_plugin(f: Callable[..., Any]) -> Callable[..., Any]: + def wrapper(*args: List[Any], **kwargs: Dict[str, Any]) -> Any: + if ENABLED_FEATURES.qdp_plugin: + return f(*args, **kwargs) + else: + + def not_implemented(*args: List[Any], **kwargs: Dict[str, Any]) -> Any: + raise NotImplementedError( + "TensorRT QDP(Quick Deploy Plugins) not available, requires TensorRT 10.7.0 or higher" + ) + + return not_implemented(*args, **kwargs) + + return wrapper + + def needs_refit(f: Callable[..., Any]) -> Callable[..., Any]: def wrapper(*args: List[Any], **kwargs: Dict[str, Any]) -> Any: if ENABLED_FEATURES.refit: diff --git a/py/torch_tensorrt/dynamo/conversion/impl/unsqueeze.py b/py/torch_tensorrt/dynamo/conversion/impl/unsqueeze.py index 3dacc2fbe4..02ecf98bfe 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/unsqueeze.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/unsqueeze.py @@ -1,14 +1,18 @@ -from typing import List, Optional, Sequence +import logging +from typing import List, Optional, Sequence, cast from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext from torch_tensorrt.dynamo.conversion.converter_utils import ( + get_positive_dim, get_trt_tensor, set_layer_name, ) from torch_tensorrt.dynamo.types import TRTTensor +logger = logging.getLogger(__name__) + def unsqueeze( ctx: ConversionContext, @@ -18,12 +22,87 @@ def unsqueeze( input: TRTTensor, dim: int, ) -> TRTTensor: + from importlib.metadata import version + + if version("tensorrt") < "10.7.0": + logger.warning( + f"IUnsqueezeLayer is supported starting from TensorRT 10.7.0, using the old unsqueeze implementation in the current TensorRT version: {version('tensorrt')}" + ) + return unsqueeze_old(ctx, target, source_ir, name, input, dim) axes = get_trt_tensor(ctx, dim, f"{name}_axes") layer = ctx.net.add_unsqueeze(input, axes) set_layer_name(layer, target, name, source_ir) return layer.get_output(0) +# old implementation for jetson due to IUnsqueezeLayer was not supported prior to 10.7.0 +def unsqueeze_old( + ctx: ConversionContext, + target: Target, + source_ir: Optional[SourceIR], + name: str, + input: TRTTensor, + dim: int, +) -> TRTTensor: + input_val = get_trt_tensor(ctx, input, f"{name}_input") + if not isinstance(input_val, TRTTensor): + raise RuntimeError( + f"unsqueeze received input {input_val} that is not part " + "of the TensorRT region!" + ) + + dim = cast(int, dim) + + input_shape_size = len(input_val.shape) + dim = get_positive_dim(dim, input_shape_size + 1) + + intermediate_dim = 0 + dynamic_shape_cnt = 0 + # if unsqueeze the last dimensions, we can directly append to the shape + if dim == input_shape_size: + intermediate_dim = dim + else: + # since maximum of one dimension is permitted to be specified as -1 + # find the intermediate_dim which has only 1 dynamic_shape_cnt + # and then we can add a transpose after reshape if it is not the final shape we want + for i, s in reversed(list(enumerate(input_val.shape))): + if i >= dim: + if s == -1: + dynamic_shape_cnt += 1 + if dynamic_shape_cnt > 1: + intermediate_dim = i + 1 + break + if i == dim: + intermediate_dim = i + break + # calculate the new_shape for the shuffle layer's reshape_dims + new_shape = list( + tuple(input_val.shape)[:intermediate_dim] + + (1,) + + tuple(input_val.shape)[intermediate_dim:] + ) + for i, s in enumerate(new_shape): + if i < intermediate_dim and s == -1: + new_shape[i] = 0 + layer = ctx.net.add_shuffle(input_val) + layer.reshape_dims = tuple(new_shape) + # if the intermediate_dim is not the final dim we want to unsqueeze, add a second_transpose after reshape + if intermediate_dim != dim: + # calculate the second_transpose for the shuffle layer + permutation = [*range(0, len(new_shape))] + # for example: if the reshape_dims is (3, 3, 5, 1, 5) and the final shape we want is (3, 1, 3, 5, 5) + # here intermediate_dim=3, dim=1, we need to move intermediate_dim before [dim: intermediate_dim) + new_permutation = ( + tuple(permutation[:dim]) + + (intermediate_dim,) + + tuple(permutation[dim:intermediate_dim]) + + tuple(permutation[intermediate_dim + 1 :]) + ) + layer.second_transpose = new_permutation + set_layer_name(layer, target, name, source_ir) + return layer.get_output(0) + + def broadcast_in_dim( ctx: ConversionContext, target: Target, diff --git a/py/torch_tensorrt/dynamo/conversion/plugins/_generate_plugin.py b/py/torch_tensorrt/dynamo/conversion/plugins/_generate_plugin.py index 8f5f173a7b..b41e1460f5 100644 --- a/py/torch_tensorrt/dynamo/conversion/plugins/_generate_plugin.py +++ b/py/torch_tensorrt/dynamo/conversion/plugins/_generate_plugin.py @@ -3,12 +3,12 @@ from types import FunctionType from typing import Any, Callable, Tuple -import tensorrt.plugin as trtp import torch from sympy import lambdify from torch._dynamo.source import LocalSource from torch._subclasses.fake_tensor import FakeTensorMode from torch.fx.experimental.symbolic_shapes import DimDynamic, ShapeEnv +from torch_tensorrt._features import needs_qdp_plugin _LOGGER: logging.Logger = logging.getLogger(__name__) @@ -28,6 +28,13 @@ def mksym( def _generate_plugin(plugin_name: str) -> None: + try: + import tensorrt.plugin as trtp + except ImportError as e: + raise RuntimeError( + "Unable to import TensorRT plugin. TensorRT version must be 10.7.0 or higher to support for Triton based TensorRT plugins" + ) + namespace, name = plugin_name.split("::") # retrieve the corresponding torch operation using the passed in string @@ -211,6 +218,7 @@ def _generic_plugin_impl( trtp.impl(plugin_name)(plugin_impl) +@needs_qdp_plugin def generate_plugin(plugin_name: str) -> None: """ Generate the Plugin using external kernels and TensorRT Quick Deployable Plugin APIs. diff --git a/py/torch_tensorrt/dynamo/conversion/plugins/_generate_plugin_converter.py b/py/torch_tensorrt/dynamo/conversion/plugins/_generate_plugin_converter.py index 99ea3bc356..a16eaf7982 100644 --- a/py/torch_tensorrt/dynamo/conversion/plugins/_generate_plugin_converter.py +++ b/py/torch_tensorrt/dynamo/conversion/plugins/_generate_plugin_converter.py @@ -4,12 +4,9 @@ import numpy as np import tensorrt as trt - -# Seems like a bug in TensorRT -import tensorrt.plugin as trtp import torch -from tensorrt.plugin._lib import QDP_REGISTRY from torch.fx.node import Argument, Node, Target +from torch_tensorrt._features import needs_qdp_plugin from torch_tensorrt.dynamo._settings import CompilationSettings from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext from torch_tensorrt.dynamo.conversion._ConverterRegistry import ( @@ -32,6 +29,15 @@ def _generate_plugin_converter( supports_dynamic_shapes: bool = False, requires_output_allocator: bool = False, ) -> DynamoConverterImplSignature: + try: + import tensorrt.plugin as trtp + + except ImportError as e: + raise RuntimeError( + "Unable to import TensorRT plugin. TensorRT version must be 10.7.0 or higher to support for Triton based TensorRT plugins" + ) + from tensorrt.plugin._lib import QDP_REGISTRY + torch_target = getattr(getattr(torch.ops, namespace), op_name) overload_str = overload if overload else "" overload_name = overload_str if overload else "default" @@ -101,6 +107,7 @@ def custom_kernel_converter( return custom_kernel_converter +@needs_qdp_plugin def generate_plugin_converter( plugin_id: str, capability_validator: Optional[Callable[[Node, CompilationSettings], bool]] = None,