Skip to content

Commit ed5a051

Browse files
committed
Add execution_stream param to MambaHybridCacheManager.
Signed-off-by: SimengLiu-nv <simengl@nvidia.com>
1 parent ea1c136 commit ed5a051

File tree

1 file changed

+2
-0
lines changed

1 file changed

+2
-0
lines changed

tensorrt_llm/_torch/pyexecutor/mamba_cache_manager.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ def __init__(
197197
dtype: DataType = DataType.HALF,
198198
spec_config: Optional["DecodingBaseConfig"] = None,
199199
is_estimating_kv_cache: bool = False,
200+
execution_stream: Optional[torch.cuda.Stream] = None,
200201
) -> None:
201202

202203
# mamba hybrid cache requires block reuse to be disabled in KV cache config
@@ -234,6 +235,7 @@ def __init__(
234235
spec_config=spec_config,
235236
layer_mask=layer_mask,
236237
is_estimating_kv_cache=is_estimating_kv_cache,
238+
execution_stream=execution_stream,
237239
)
238240

239241
def prepare_resources(self, scheduled_batch: ScheduledRequests):

0 commit comments

Comments
 (0)