Enable cuda graph for LLMs for NvTensorRtRtx EP (microsoft#1645)

anujj · web-flow · commit a3cc24dd515d · 2025-08-18T12:34:04.000-07:00
- Enable cuda graph for LLMs for NvTensorRtRtx EP
- change the flag name from nv_cuda_graph_enable to enable_cuda_graph
diff --git a/src/models/model.cpp b/src/models/model.cpp
@@ -584,7 +584,7 @@ DeviceInterface* SetProviderSessionOptions(OrtSessionOptions& session_options,
         bool is_multi_profile_enabled = IsMultiProfileEnabled(config.model.decoder.session_options);
         ConfigureNvTensorRtRTxProfile(config, session_options, is_multi_profile_enabled);
         if (IsGraphCaptureEnabled(config.model.decoder.session_options)) {
-          session_options.AddConfigEntry("ep.nvtensorrtrtxexecutionprovider.nv_cuda_graph_enable", "1");
+          session_options.AddConfigEntry("ep.nvtensorrtrtxexecutionprovider.enable_cuda_graph", "1");
         }
         p_device = GetDeviceInterface(DeviceType::NvTensorRtRtx);
       }
diff --git a/src/python/py/models/builder.py b/src/python/py/models/builder.py
@@ -94,7 +94,7 @@ def __init__(
             },
             "dml": {},
             "webgpu": {},
-            "NvTensorRtRtx": {},
+            "NvTensorRtRtx": {"enable_cuda_graph": "1"} if extra_options.get("enable_cuda_graph", False) else {},
         }
 
         # Map input names to their types and shapes

Original file line number	Diff line number	Diff line change
`@@ -584,7 +584,7 @@ DeviceInterface* SetProviderSessionOptions(OrtSessionOptions& session_options,`
`584`	`584`	`bool is_multi_profile_enabled = IsMultiProfileEnabled(config.model.decoder.session_options);`
`585`	`585`	`ConfigureNvTensorRtRTxProfile(config, session_options, is_multi_profile_enabled);`
`586`	`586`	`if (IsGraphCaptureEnabled(config.model.decoder.session_options)) {`
`587`		`- session_options.AddConfigEntry("ep.nvtensorrtrtxexecutionprovider.nv_cuda_graph_enable", "1");`
	`587`	`+ session_options.AddConfigEntry("ep.nvtensorrtrtxexecutionprovider.enable_cuda_graph", "1");`
`588`	`588`	`}`
`589`	`589`	`p_device = GetDeviceInterface(DeviceType::NvTensorRtRtx);`
`590`	`590`	`}`
Original file line number	Diff line number	Diff line change
`@@ -94,7 +94,7 @@ def __init__(`
`94`	`94`	`},`
`95`	`95`	`"dml": {},`
`96`	`96`	`"webgpu": {},`
`97`		`- "NvTensorRtRtx": {},`
	`97`	`+ "NvTensorRtRtx": {"enable_cuda_graph": "1"} if extra_options.get("enable_cuda_graph", False) else {},`
`98`	`98`	`}`
`99`	`99`
`100`	`100`	`# Map input names to their types and shapes`