From 2f19666afddbdd4dcc43a39c9e5106227c4d269e Mon Sep 17 00:00:00 2001 From: Evan Li Date: Thu, 24 Jul 2025 14:54:29 -0700 Subject: [PATCH 1/8] decompose prelu --- py/torch_tensorrt/dynamo/lowering/_decomposition_groups.py | 1 + 1 file changed, 1 insertion(+) diff --git a/py/torch_tensorrt/dynamo/lowering/_decomposition_groups.py b/py/torch_tensorrt/dynamo/lowering/_decomposition_groups.py index 9d28ae70a5..bc897bc8fd 100644 --- a/py/torch_tensorrt/dynamo/lowering/_decomposition_groups.py +++ b/py/torch_tensorrt/dynamo/lowering/_decomposition_groups.py @@ -105,6 +105,7 @@ aten.norm, aten.ones, aten.ones_like, + aten._prelu_kernel, aten._prelu_kernel_backward, aten._reshape_alias, aten.rad2deg, From 79b3153f9ca0bd03b8cdd11f11cb516173b74b9a Mon Sep 17 00:00:00 2001 From: Evan Li Date: Mon, 28 Jul 2025 13:55:42 -0700 Subject: [PATCH 2/8] revert decomp and disable forcing to insert ICast Layer --- py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py | 2 +- py/torch_tensorrt/dynamo/lowering/_decomposition_groups.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py index fe9a01b06c..71ee8d459c 100644 --- a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py +++ b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py @@ -1094,7 +1094,7 @@ def aten_ops_clone_copy_dtype( name, args[0], kwargs.get("dtype", args[0].dtype), - force_layer=True, + force_layer=False, ) diff --git a/py/torch_tensorrt/dynamo/lowering/_decomposition_groups.py b/py/torch_tensorrt/dynamo/lowering/_decomposition_groups.py index bc897bc8fd..9d28ae70a5 100644 --- a/py/torch_tensorrt/dynamo/lowering/_decomposition_groups.py +++ b/py/torch_tensorrt/dynamo/lowering/_decomposition_groups.py @@ -105,7 +105,6 @@ aten.norm, aten.ones, aten.ones_like, - aten._prelu_kernel, aten._prelu_kernel_backward, aten._reshape_alias, aten.rad2deg, From cdc31d70360da31e26bfc0792009f0bb6455d6ec Mon Sep 17 00:00:00 2001 From: Evan Li Date: Mon, 28 Jul 2025 14:26:37 -0700 Subject: [PATCH 3/8] update prelu --- py/torch_tensorrt/dynamo/conversion/impl/prelu.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/py/torch_tensorrt/dynamo/conversion/impl/prelu.py b/py/torch_tensorrt/dynamo/conversion/impl/prelu.py index 166ce16367..f489ccb503 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/prelu.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/prelu.py @@ -2,6 +2,7 @@ from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR +from torch_tensorrt.dynamo.conversion import impl from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext from torch_tensorrt.dynamo.conversion.converter_utils import set_layer_name from torch_tensorrt.dynamo.types import TRTTensor @@ -15,6 +16,12 @@ def prelu( input: TRTTensor, weight: TRTTensor, ) -> TRTTensor: - layer = ctx.net.add_parametric_relu(input, weight) + # TRT requires that the slopes tensor must be unidirectional broadcastable to the input tensor: + # the rank of the two tensors must be the same, and all dimensions of the slopes tensor must + # either equal the input tensor or be 1. The output tensor has the same shape as the input tensor. + input, weight = impl.elementwise.broadcast( + ctx, input, weight, f"{name}_broadcast_input", f"{name}_broadcast_weight" + ) + layer = ctx.net.add_parametric_relu(input, slopes=weight) set_layer_name(layer, target, name, source_ir) return layer.get_output(0) From d1d18b9c7b780293617225b45560b6c41d6e8ef6 Mon Sep 17 00:00:00 2001 From: Evan Li Date: Tue, 29 Jul 2025 12:40:01 -0700 Subject: [PATCH 4/8] revert prelu impl --- py/torch_tensorrt/dynamo/conversion/impl/prelu.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/py/torch_tensorrt/dynamo/conversion/impl/prelu.py b/py/torch_tensorrt/dynamo/conversion/impl/prelu.py index f489ccb503..166ce16367 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/prelu.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/prelu.py @@ -2,7 +2,6 @@ from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR -from torch_tensorrt.dynamo.conversion import impl from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext from torch_tensorrt.dynamo.conversion.converter_utils import set_layer_name from torch_tensorrt.dynamo.types import TRTTensor @@ -16,12 +15,6 @@ def prelu( input: TRTTensor, weight: TRTTensor, ) -> TRTTensor: - # TRT requires that the slopes tensor must be unidirectional broadcastable to the input tensor: - # the rank of the two tensors must be the same, and all dimensions of the slopes tensor must - # either equal the input tensor or be 1. The output tensor has the same shape as the input tensor. - input, weight = impl.elementwise.broadcast( - ctx, input, weight, f"{name}_broadcast_input", f"{name}_broadcast_weight" - ) - layer = ctx.net.add_parametric_relu(input, slopes=weight) + layer = ctx.net.add_parametric_relu(input, weight) set_layer_name(layer, target, name, source_ir) return layer.get_output(0) From cf9e7bdee48aec7345927b6312bd4eab5f1c21f5 Mon Sep 17 00:00:00 2001 From: Evan Li Date: Thu, 7 Aug 2025 19:04:24 -0700 Subject: [PATCH 5/8] fix identity issue --- .../dynamo/conversion/_TRTInterpreter.py | 13 ++++++++++--- .../dynamo/conversion/aten_ops_converters.py | 6 +++--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py index b8d4994fca..a72e52c522 100644 --- a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py +++ b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py @@ -440,7 +440,7 @@ def check_weight_equal( except Exception: return torch.all(sd_weight == network_weight) - @needs_refit + @needs_refit # type: ignore[misc] def _save_weight_mapping(self) -> None: """ Construct the weight name mapping from engine weight name to state_dict weight name. @@ -577,7 +577,7 @@ def _save_weight_mapping(self) -> None: gc.collect() torch.cuda.empty_cache() - @needs_refit + @needs_refit # type: ignore[misc] def _insert_engine_to_cache(self, hash_val: str, serialized_engine: bytes) -> None: # TODO: @Evan is waiting for TRT's feature to cache the weight-stripped engine # if not self.compilation_settings.strip_engine_weights: @@ -605,7 +605,7 @@ def _insert_engine_to_cache(self, hash_val: str, serialized_engine: bytes) -> No ), ) - @needs_refit + @needs_refit # type: ignore[misc] def _pull_cached_engine(self, hash_val: str) -> Optional[TRTInterpreterResult]: # query the cached TRT engine cached_data = self.engine_cache.check(hash_val) # type: ignore[union-attr] @@ -941,7 +941,14 @@ def output(self, target: str, args: Any, kwargs: Any) -> List[Any]: f"Specified output dtypes ({len(self.output_dtypes)}) differ from number of outputs ({len(outputs)})" ) + marked_outputs_ids = [] for i, output in enumerate(outputs): + # In some cases, the same output tensor may be marked multiple times, such as _to_oppy, + # so we skip marking if the output is already marked + if id(output) in marked_outputs_ids: + continue + marked_outputs_ids.append(id(output)) + name = f"output{i}" output_dtype = dtype.unknown diff --git a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py index 71ee8d459c..c648a870f8 100644 --- a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py +++ b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py @@ -1123,7 +1123,7 @@ def aten_ops_clone_copy_placeholder( name, args[0], kwargs.get("dtype", args[0].dtype), - force_layer=True, + force_layer=False, ) @@ -1226,7 +1226,7 @@ def aten_ops_sum( name, sum_, kwargs["output_dtype"], - force_layer=True, + force_layer=False, ) else: return sum_ @@ -3229,7 +3229,7 @@ def aten_ops_copy( name, src, src.dtype, - force_layer=True, + force_layer=False, ) From 11550dffade920e895199a1a7bb903e2c215f3c9 Mon Sep 17 00:00:00 2001 From: Evan Li Date: Fri, 8 Aug 2025 16:39:25 -0700 Subject: [PATCH 6/8] set force_layer=True for some aten ops --- py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py index c648a870f8..c4dd431818 100644 --- a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py +++ b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py @@ -1123,7 +1123,7 @@ def aten_ops_clone_copy_placeholder( name, args[0], kwargs.get("dtype", args[0].dtype), - force_layer=False, + force_layer=True, ) @@ -3229,7 +3229,7 @@ def aten_ops_copy( name, src, src.dtype, - force_layer=False, + force_layer=True, ) From dbb5c78188a41148f23e7abbec533b345703c1c9 Mon Sep 17 00:00:00 2001 From: Evan Li Date: Tue, 12 Aug 2025 10:14:49 -0700 Subject: [PATCH 7/8] fix typo --- py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py index a72e52c522..ed04d8792f 100644 --- a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py +++ b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py @@ -943,7 +943,7 @@ def output(self, target: str, args: Any, kwargs: Any) -> List[Any]: marked_outputs_ids = [] for i, output in enumerate(outputs): - # In some cases, the same output tensor may be marked multiple times, such as _to_oppy, + # In some cases, the same output tensor may be marked multiple times, such as _to_copy, # so we skip marking if the output is already marked if id(output) in marked_outputs_ids: continue From 9745a5352b528f818c4b3c3021b6f63d71f09c65 Mon Sep 17 00:00:00 2001 From: Evan Li Date: Tue, 12 Aug 2025 11:39:58 -0700 Subject: [PATCH 8/8] add comments --- py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py index c4dd431818..3df1b19645 100644 --- a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py +++ b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py @@ -1094,7 +1094,7 @@ def aten_ops_clone_copy_dtype( name, args[0], kwargs.get("dtype", args[0].dtype), - force_layer=False, + force_layer=False, # force_layer=False results in better performance ) @@ -1226,7 +1226,7 @@ def aten_ops_sum( name, sum_, kwargs["output_dtype"], - force_layer=False, + force_layer=False, # force_layer=False results in better performance ) else: return sum_