fix: we now store the traced symbolic functions from compile time in the metadata to use in the case of reexport. Also removes the need to access the real tensorrt engine during reexport

narendasan · narendasan · commit c4a5c3de598b · 2026-02-12T07:39:16.000Z
diff --git a/core/runtime/TRTEngine.cpp b/core/runtime/TRTEngine.cpp
@@ -325,6 +325,10 @@ std::string TRTEngine::get_engine_layer_info() {
   return inspector->getEngineInformation(nvinfer1::LayerInformationFormat::kJSON);
 }
 
+std::string TRTEngine::get_serialized_metadata() {
+  return this->serialized_metadata;
+}
+
 std::vector<at::Tensor> TRTEngine::infer_outputs(std::vector<std::vector<int64_t>> input_shapes) {
   std::vector<at::Tensor> outputs;
   TORCHTRT_CHECK(
diff --git a/core/runtime/TRTEngine.h b/core/runtime/TRTEngine.h
@@ -158,6 +158,7 @@ struct TRTEngine : torch::CustomClassHolder {
   void set_profile_format(std::string profile_format);
   void disable_profiling();
   std::string get_engine_layer_info();
+  std::string get_serialized_metadata();
 
   void dump_engine_layer_info_to_file(const std::string& path);
   void dump_engine_layer_info();
diff --git a/core/runtime/register_jit_hooks.cpp b/core/runtime/register_jit_hooks.cpp
@@ -88,6 +88,7 @@ static auto TORCHTRT_UNUSED TRTEngineTSRegistrtion =
         .def("dump_engine_layer_info_to_file", &TRTEngine::dump_engine_layer_info_to_file)
         .def("dump_engine_layer_info", &TRTEngine::dump_engine_layer_info)
         .def("get_engine_layer_info", &TRTEngine::get_engine_layer_info)
+        .def("get_serialized_metadata", &TRTEngine::get_serialized_metadata)
         .def("infer_outputs", &TRTEngine::infer_outputs)
         .def("reset_captured_graph", &TRTEngine::reset_captured_graph)
         .def("set_output_tensors_as_unowned", &TRTEngine::set_output_tensors_as_unowned)
diff --git a/py/torch_tensorrt/dynamo/_compiler.py b/py/torch_tensorrt/dynamo/_compiler.py
@@ -10,6 +10,7 @@
 import torch
 from torch.export import ExportedProgram
 from torch.fx.node import Target
+
 from torch_tensorrt._Device import Device
 from torch_tensorrt._enums import EngineCapability, dtype
 from torch_tensorrt._features import needs_cross_compile
@@ -564,7 +565,7 @@ def compile(
 
     if not kwargs.get("use_explicit_typing", False):
         warnings.warn(
-            "`use_explicit_typing` is deprecated. This setting will be removed and you should enable autocast instead.",
+            "`use_explicit_typing` is deprecated. use_explicit_types is now on by default, this setting will be removed and you should enable autocast to recover weak typing behavior.",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -1042,7 +1043,6 @@ def preserve_module_specs(
             trt_modules[name] = trt_module
 
             if _debugger_config:
-
                 if _debugger_config.save_engine_profile:
                     if settings.use_python_runtime:
                         if _debugger_config.profile_format != "cudagraph":
diff --git a/py/torch_tensorrt/dynamo/_exporter.py b/py/torch_tensorrt/dynamo/_exporter.py
@@ -17,6 +17,7 @@
     OutputSpec,
     TensorArgument,
 )
+
 from torch_tensorrt.dynamo.runtime._TorchTensorRTModule import ENGINE_IDX, NAME_IDX
 
 
@@ -270,7 +271,26 @@ def inline_torch_modules(gm: torch.fx.GraphModule) -> torch.fx.GraphModule:
                         gm.graph.erase_node(gm_added_placeholder_inputs[idx])
 
                 # Replace the pytorch submodule node (call_module) with the inlined subgraph output
-                gm_node.replace_all_uses_with(submodule_output)
+                # Special handling when submodule returns multiple outputs (tuple)
+                if isinstance(submodule_output, tuple):
+                    # The fallback module has multiple outputs
+                    # Find getitem nodes that extract from this module call and replace them directly
+                    getitem_users = [
+                        user
+                        for user in list(gm_node.users.keys())
+                        if user.op == "call_function"
+                        and user.target is operator.getitem
+                    ]
+                    for user in getitem_users:
+                        # getitem extracts element idx from the tuple
+                        _, idx = user.args
+                        # Replace this getitem with the actual node from the tuple
+                        user.replace_all_uses_with(submodule_output[idx])
+                        # Erase the getitem node since it's no longer needed
+                        gm.graph.erase_node(user)
+                else:
+                    # Single output - normal replacement
+                    gm_node.replace_all_uses_with(submodule_output)
 
                 # copy the attributes of the submodule into gm (graph_copy doesn't do this)
                 copy_submodule_attributes(gm, submodule, gm_node.name)
diff --git a/py/torch_tensorrt/dynamo/conversion/_conversion.py b/py/torch_tensorrt/dynamo/conversion/_conversion.py
@@ -2,11 +2,9 @@
 
 import io
 import logging
-from typing import Any, List, NamedTuple, Optional, Sequence
+from typing import Any, Dict, List, NamedTuple, Optional, Sequence
 
-import tensorrt as trt
 import torch
-
 from torch_tensorrt._enums import dtype
 from torch_tensorrt._features import ENABLED_FEATURES
 from torch_tensorrt._Input import Input
@@ -27,6 +25,8 @@
 )
 from torch_tensorrt.logging import TRT_LOGGER
 
+import tensorrt as trt
+
 logger = logging.getLogger(__name__)
 
 
@@ -36,9 +36,7 @@ class SerializedInterpreterResult(NamedTuple):
     output_names: Sequence[str]
     weight_name_map: Optional[dict[Any, Any]]
     requires_output_allocator: bool
-    symbolic_shape_expressions: Optional[
-        str
-    ]  # Base64-encoded serialized symbolic shape mapping
+    symbolic_shape_expressions: List[Dict[str, Any]]
 
 
 def infer_module_output_dtypes(
@@ -108,6 +106,7 @@ def pull_cached_engine(
     engine_cache: BaseEngineCache,
     settings: CompilationSettings,
     inputs: Sequence[Input],
+    symbolic_shape_expressions: List[Dict[str, Any]],
 ) -> Optional[SerializedInterpreterResult]:
     if hash_val is None:
         logger.warning(
@@ -137,16 +136,16 @@ def pull_cached_engine(
         setting_compatiblity, incompattible_settings = settings_are_compatible(
             settings, cached_engine_compilation_settings
         )
-        assert setting_compatiblity, (
-            f"Attempted to refit a cached engine with incompatible settings: {incompattible_settings}, (old_settings: {cached_engine_compilation_settings}, new_settings: {settings})"
-        )
+        assert (
+            setting_compatiblity
+        ), f"Attempted to refit a cached engine with incompatible settings: {incompattible_settings}, (old_settings: {cached_engine_compilation_settings}, new_settings: {settings})"
 
         for i, e in enumerate(
             [Input.equivalent_spec(c, i) for c, i in zip(cached_engine_inputs, inputs)]
         ):
-            assert e, (
-                f"Attempted to refit a cached engine built for a different input size (input: {i}, cached size: {cached_engine_inputs[i]}, new size: {inputs[i]}"
-            )
+            assert (
+                e
+            ), f"Attempted to refit a cached engine built for a different input size (input: {i}, cached size: {cached_engine_inputs[i]}, new size: {inputs[i]}"
 
         logger.info(
             f"Found the cached engine with hash {hash_val} that corresponds to this graph. It is directly loaded."
@@ -190,6 +189,7 @@ def pull_cached_engine(
             output_names=output_names,
             weight_name_map=weight_name_map,
             requires_output_allocator=requires_output_allocator,
+            symbolic_shape_expressions=symbolic_shape_expressions,
         )
     return None
 
@@ -210,6 +210,12 @@ def interpret_module_to_result(
         SerializedInterpreterResult
     """
 
+    symbolic_shape_expressions = extract_symbolic_shape_expressions(module)
+    if symbolic_shape_expressions is None:
+        raise RuntimeError(
+            "Failed to extract symbolic shape expressions from source FX graph partition"
+        )
+
     # engine_cache could be None if:
     # 1) engine_cache is not passed in when calling this function like convert_exported_program_to_serialized_trt_engine etc., or
     # 2) both cache_built_engines and reuse_cached_engines are False
@@ -242,7 +248,12 @@ def interpret_module_to_result(
             )
         else:
             serialized_interpreter_result = pull_cached_engine(
-                hash_val, module, engine_cache, settings, inputs
+                hash_val,
+                module,
+                engine_cache,
+                settings,
+                inputs,
+                symbolic_shape_expressions,
             )
             if serialized_interpreter_result is not None:  # hit the cache
                 return serialized_interpreter_result
@@ -251,11 +262,8 @@ def interpret_module_to_result(
         module, truncate_double=settings.truncate_double
     )
 
-    # Extract symbolic shape expressions before interpretation
-    # This captures the symbolic relationship between input and output shapes
-    symbolic_shape_expressions = extract_symbolic_shape_expressions(module)
     logger.debug(
-        f"Extracted symbolic shape expressions: {len(symbolic_shape_expressions) if symbolic_shape_expressions else 0} bytes"
+        f"Extracted symbolic shape expressions: {len(symbolic_shape_expressions) if symbolic_shape_expressions else 0} outputs"
     )
 
     interpreter = TRTInterpreter(
diff --git a/py/torch_tensorrt/dynamo/conversion/_symbolic_shape_capture.py b/py/torch_tensorrt/dynamo/conversion/_symbolic_shape_capture.py
@@ -6,43 +6,29 @@
 output shapes without pattern matching.
 """
 
-import base64
-import pickle
-from typing import Any, Dict, List, Optional, Tuple
+import logging
+from typing import Any, Dict, List, Optional
 
 import torch
 
+logger = logging.getLogger(__name__)
+
 
 def extract_symbolic_shape_expressions(
     module: torch.fx.GraphModule,
-) -> Optional[str]:
+) -> Optional[List[Dict[str, Any]]]:
     """
-    Extract symbolic shape expressions from an FX graph and serialize them.
+    Extract symbolic shape expressions from an FX graph.
 
-    This captures the relationship between input placeholder shapes and output shapes,
-    storing the symbolic expressions (as sympy expressions) that can be deserialized
-    and evaluated in the meta kernel.
+    This captures the symbolic expressions (as sympy expressions) for output shapes
+    that can be applied to input fake tensors at runtime.
 
     Args:
         module: FX GraphModule with symbolic shapes in node metadata
 
     Returns:
-        Base64-encoded serialized mapping of {output_idx: {dim_idx: sympy_expr}} or None if no symbolic shapes
+        List of dicts containing shape_exprs and dtype for each output, or None if extraction fails
     """
-    # Find input placeholders (excluding parameters/buffers)
-    input_placeholders = []
-    for node in module.graph.nodes:
-        if node.op == "placeholder" and "val" in node.meta:
-            val = node.meta["val"]
-            # Skip parameters and buffers (they're also placeholders but not inputs)
-            if isinstance(val, torch.Tensor) and not node.name.startswith(
-                "_frozen_param"
-            ):
-                # Check if this is an actual input (not a parameter)
-                # Parameters typically have names like "p_weight", "p_bias"
-                if not node.name.startswith("p_"):
-                    input_placeholders.append(node)
-
     # Find output node
     output_nodes = [node for node in module.graph.nodes if node.op == "output"]
     if not output_nodes:
@@ -55,59 +41,37 @@ def extract_symbolic_shape_expressions(
     if not isinstance(output_args, (tuple, list)):
         output_args = (output_args,)
 
-    # Build mapping of output shapes
-    # Format: {output_idx: {dim_idx: sympy_expr}}
-    output_shape_mapping: Dict[int, Dict[int, Any]] = {}
-
-    has_symbolic_shapes = False
-
-    for out_idx, out_arg in enumerate(output_args):
+    # Collect shape expressions and dtypes for each output
+    output_info = []
+    for out_arg in output_args:
         if not hasattr(out_arg, "meta") or "val" not in out_arg.meta:
-            continue
+            logger.warning(
+                "When processing symbolic shapes for TensorRT engine, found no metadata in FX Graph"
+            )
+            return None
 
         out_val = out_arg.meta["val"]
-        if not hasattr(out_val, "shape"):
-            continue
-
-        dim_mapping = {}
-        for dim_idx, dim_size in enumerate(out_val.shape):
-            dim_mapping[dim_idx] = dim_size.node.expr
-
-        output_shape_mapping[out_idx] = dim_mapping
-
-    # Serialize the mapping and base64 encode it
-    # Note: We can pickle sympy expressions but not SymInt objects directly
-    # Base64 encoding is needed because the pickled data is binary and needs to be stored as a C++ std::string
-    try:
-        pickled = pickle.dumps(output_shape_mapping)
-        encoded = base64.b64encode(pickled).decode("utf-8")
-        return encoded
-    except Exception as e:
-        import logging
-
-        logger = logging.getLogger(__name__)
-        logger.warning(f"Failed to serialize symbolic shape expressions: {e}")
-        return None
-
-
-def deserialize_symbolic_shape_expressions(
-    serialized: str,
-) -> Optional[Dict[int, Dict[int, Any]]]:
-    """
-    Deserialize symbolic shape expressions.
-
-    Args:
-        serialized: Base64-encoded pickled mapping from extract_symbolic_shape_expressions
-
-    Returns:
-        Dictionary mapping {output_idx: {dim_idx: sympy_expr}}
-    """
-    try:
-        decoded = base64.b64decode(serialized.encode("utf-8"))
-        return pickle.loads(decoded)
-    except Exception as e:
-        import logging
-
-        logger = logging.getLogger(__name__)
-        logger.warning(f"Failed to deserialize symbolic shape expressions: {e}")
-        return None
+        if not isinstance(out_val, torch.Tensor):
+            logger.warning(
+                "When processing symbolic shapes for TensorRT engine, output is not a tensor"
+            )
+            return None
+
+        # Extract shape as sympy expressions (can be pickled)
+        shape_exprs = []
+        for dim_size in out_val.shape:
+            if isinstance(dim_size, torch.SymInt):
+                # Store the sympy expression, which can be pickled
+                shape_exprs.append(dim_size.node.expr)
+            else:
+                # Store concrete integer
+                shape_exprs.append(int(dim_size))
+
+        output_info.append(
+            {
+                "shape_exprs": shape_exprs,
+                "dtype": out_val.dtype,
+            }
+        )
+
+    return output_info if output_info else None
diff --git a/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py b/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py
@@ -4,7 +4,6 @@
 from contextlib import nullcontext
 from typing import Any, Dict, List, Optional, Sequence, Tuple
 
-import tensorrt as trt
 import torch
 import torch_tensorrt
 from torch.nn import Module
@@ -22,6 +21,8 @@
     multi_gpu_device_check,
 )
 
+import tensorrt as trt
+
 logger = logging.getLogger(__name__)
 
 
@@ -131,6 +132,7 @@ def __init__(
         settings: CompilationSettings = CompilationSettings(),
         weight_name_map: Optional[dict[Any, Any]] = None,
         requires_output_allocator: bool = False,
+        symbolic_shape_expressions: Optional[List[Dict[str, Any]]] = None,
         _debugger_config: Optional[DebuggerConfig] = None,
     ):
         """Takes a name, target device, serialized TensorRT engine, and binding names / order and constructs
@@ -146,6 +148,7 @@ def __init__(
             settings (torch_tensorrt.dynamo.CompilationSettings): Settings used to compile engine, assumes engine was built with default compilation settings if object not passed
             weight_name_map (dict): Mapping of engine weight name to state_dict weight name
             requires_output_allocator (bool): Boolean flag indicating if the converter creates operators which require an Output Allocator to run (e.g. data dependent operators)
+            symbolic_shape_expressions (List[str]): List of symbolic shape expressions for each output binding
 
         Example:
 
@@ -222,6 +225,7 @@ def __init__(
         self.cudagraphs_enabled = torch_tensorrt.runtime.get_cudagraphs_mode()
         # If the output tensor is not owned by the engine (output_tensors_are_unowned=True), we need to create a new output tensor in each forward pass
         self.output_tensors_are_unowned = False
+        self.symbolic_shape_expressions = symbolic_shape_expressions
         if self.serialized_engine is not None and not self.settings.lazy_engine_init:
             self.setup_engine()
 
@@ -462,7 +466,6 @@ def create_output_allocator(self) -> None:
             self.output_allocator = DynamicOutputAllocator(output_dtypes_dict)
 
     def forward(self, *inputs: torch.Tensor) -> torch.Tensor | Tuple[torch.Tensor, ...]:
-
         def run_standard_execution() -> torch.Tensor | Tuple[torch.Tensor, ...]:
             shape_changed = self.validate_input_shapes(contiguous_inputs)
             (
diff --git a/py/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.py b/py/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.py
diff --git a/py/torch_tensorrt/dynamo/runtime/meta_ops/register_meta_ops.py b/py/torch_tensorrt/dynamo/runtime/meta_ops/register_meta_ops.py
diff --git a/tests/py/dynamo/models/test_meta_kernel_shape_inference.py b/tests/py/dynamo/models/test_meta_kernel_shape_inference.py
diff --git a/tests/py/dynamo/models/test_reexport.py b/tests/py/dynamo/models/test_reexport.py