Add execution_stream param to MambaHybridCacheManager.

SimengLiu-nv · SimengLiu-nv · commit ed5a0512d2dd · 2025-12-30T10:46:55.000-08:00
Signed-off-by: SimengLiu-nv &lt;simengl@nvidia.com&gt;
diff --git a/tensorrt_llm/_torch/pyexecutor/mamba_cache_manager.py b/tensorrt_llm/_torch/pyexecutor/mamba_cache_manager.py
@@ -197,6 +197,7 @@ def __init__(
         dtype: DataType = DataType.HALF,
         spec_config: Optional["DecodingBaseConfig"] = None,
         is_estimating_kv_cache: bool = False,
+        execution_stream: Optional[torch.cuda.Stream] = None,
     ) -> None:
 
         # mamba hybrid cache requires block reuse to be disabled in KV cache config
@@ -234,6 +235,7 @@ def __init__(
             spec_config=spec_config,
             layer_mask=layer_mask,
             is_estimating_kv_cache=is_estimating_kv_cache,
+            execution_stream=execution_stream,
         )
 
     def prepare_resources(self, scheduled_batch: ScheduledRequests):