nv-auto-deploy
diff --git a/‎tensorrt_llm/_torch/auto_deploy/transformations/_graph.py‎
Lines changed: 5 additions & 4 deletions b/‎tensorrt_llm/_torch/auto_deploy/transformations/_graph.py‎
Lines changed: 5 additions & 4 deletions
@@ -96,23 +96,24 @@ def named_graphmodules(gm: fx.GraphModule) -> Iterator[Tuple[str, fx.GraphModule
             yield name, m
 
 
-def _move_single_gm_to_device(
-    gm: GraphModule, device: torch.device, recompile_graph: bool = False
-) -> None:
+def _move_single_gm_to_device(gm: GraphModule, device: torch.device) -> None:
     """Move one GraphModule and its nodes to the specified device in-place.
     Partially inspired by https://github.com/pytorch/pytorch/blob/05cb98f91d49df9eadfcb3fc29bbd1b621d88860/torch/export/passes/__init__.py#L11
     """
     # move state dict
     gm.to(device)
+    recompile_graph = False
 
     for node in gm.graph.nodes:
         # move all the nodes kwargs with burnt-in device
         if "device" in node.kwargs:
+            recompile_graph = True
             kwargs = node.kwargs.copy()
             kwargs["device"] = device
             node.kwargs = kwargs
 
         if is_op(node, torch.ops.aten.to.device):
+            recompile_graph = True
             args = list(node.args)
             args[1] = device
             node.args = tuple(args)
@@ -135,7 +136,7 @@ def move_to_device(gm: fx.GraphModule, device: DeviceLikeType) -> fx.GraphModule
 
     for _, subgm in reversed(list(named_graphmodules(gm))):
         # recompile graph to update self generated codes in subgraph
-        _move_single_gm_to_device(subgm, device, subgm is not gm)
+        _move_single_gm_to_device(subgm, device)
 
 
 def _is_impure_node(node: Node) -> bool: