We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent ea1c136 commit ed5a051Copy full SHA for ed5a051
tensorrt_llm/_torch/pyexecutor/mamba_cache_manager.py
@@ -197,6 +197,7 @@ def __init__(
197
dtype: DataType = DataType.HALF,
198
spec_config: Optional["DecodingBaseConfig"] = None,
199
is_estimating_kv_cache: bool = False,
200
+ execution_stream: Optional[torch.cuda.Stream] = None,
201
) -> None:
202
203
# mamba hybrid cache requires block reuse to be disabled in KV cache config
@@ -234,6 +235,7 @@ def __init__(
234
235
spec_config=spec_config,
236
layer_mask=layer_mask,
237
is_estimating_kv_cache=is_estimating_kv_cache,
238
+ execution_stream=execution_stream,
239
)
240
241
def prepare_resources(self, scheduled_batch: ScheduledRequests):
0 commit comments