From f1ea1d9993bfea9636ea8196c04804779942519f Mon Sep 17 00:00:00 2001 From: Yuanle Liu Date: Fri, 26 Sep 2025 02:05:58 -0700 Subject: [PATCH 1/2] Simplify CUDAGraph creation logic Refactor CUDAGraph initialization to always use unique memory pool if configured. --- .../graph_optimization/cudagraph_piecewise_backend.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fastdeploy/model_executor/graph_optimization/cudagraph_piecewise_backend.py b/fastdeploy/model_executor/graph_optimization/cudagraph_piecewise_backend.py index 79ff9ea0e1..e225d2b43f 100644 --- a/fastdeploy/model_executor/graph_optimization/cudagraph_piecewise_backend.py +++ b/fastdeploy/model_executor/graph_optimization/cudagraph_piecewise_backend.py @@ -20,7 +20,6 @@ import paddle.jit.dy2static.utils as jit_utils import paddle.nn.layer -from paddle.base.core import CUDAGraph from paddle.device.cuda import graphs from fastdeploy import envs @@ -92,7 +91,10 @@ def __init__(self, fd_config: FDConfig, runnable: Callable): self.cudagraph_capture_sizes = fd_config.graph_opt_config.cudagraph_capture_sizes self.warm_up_size = fd_config.graph_opt_config.cudagraph_num_of_warmups self.real_shape_to_captured_size = fd_config.graph_opt_config.real_shape_to_captured_size + self.unique_memory_pool_id = None if self.fd_config.graph_opt_config.use_unique_memory_pool: + from paddle.base.core import CUDAGraph + self.unique_memory_pool_id = CUDAGraph.gen_new_memory_pool_id() self._create_entry_dict() @@ -166,11 +168,7 @@ def __call__(self, **kwargs): input_addresses = [x.data_ptr() for (_, x) in kwargs.items() if isinstance(x, paddle.Tensor)] entry.input_addresses = input_addresses - new_grpah = ( - graphs.CUDAGraph(pool_id=self.unique_memory_pool_id) - if self.fd_config.graph_opt_config.use_unique_memory_pool - else graphs.CUDAGraph() - ) + new_grpah = graphs.CUDAGraph(pool_id=self.unique_memory_pool_id) paddle.device.synchronize() # Capture From 36820c5cd6a645e3adc64c224b505ba33d8ea230 Mon Sep 17 00:00:00 2001 From: Yuanle Liu Date: Fri, 26 Sep 2025 02:08:43 -0700 Subject: [PATCH 2/2] Conditionally import CUDAGraph based on CUDA compilation --- .../graph_optimization/cudagraph_piecewise_backend.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fastdeploy/model_executor/graph_optimization/cudagraph_piecewise_backend.py b/fastdeploy/model_executor/graph_optimization/cudagraph_piecewise_backend.py index e225d2b43f..3465b60928 100644 --- a/fastdeploy/model_executor/graph_optimization/cudagraph_piecewise_backend.py +++ b/fastdeploy/model_executor/graph_optimization/cudagraph_piecewise_backend.py @@ -93,9 +93,10 @@ def __init__(self, fd_config: FDConfig, runnable: Callable): self.real_shape_to_captured_size = fd_config.graph_opt_config.real_shape_to_captured_size self.unique_memory_pool_id = None if self.fd_config.graph_opt_config.use_unique_memory_pool: - from paddle.base.core import CUDAGraph + if paddle.is_compiled_with_cuda(): + from paddle.base.core import CUDAGraph - self.unique_memory_pool_id = CUDAGraph.gen_new_memory_pool_id() + self.unique_memory_pool_id = CUDAGraph.gen_new_memory_pool_id() self._create_entry_dict() self.cuda_graph_manager = None