We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 1b79934 commit 35ef38cCopy full SHA for 35ef38c
python/sglang/srt/server_args.py
@@ -1356,16 +1356,6 @@ def _handle_model_specific_adjustments(self):
1356
f"- Decode: {decode_attn_backend}\n"
1357
)
1358
1359
- if (
1360
- prefill_attn_backend == "trtllm_mha"
1361
- or decode_attn_backend == "trtllm_mha"
1362
- ):
1363
- # TODO: support swa kv indices translation for trtllm_mha attention backend
1364
- self.disable_hybrid_swa_memory = True
1365
- logger.warning(
1366
- "Disable hybrid SWA memory for GPT-OSS model with trtllm_mha attention backend."
1367
- )
1368
-
1369
quant_method = get_quantization_config(hf_config)
1370
is_mxfp4_quant_format = quant_method == "mxfp4"
1371
if is_blackwell_supported():
0 commit comments