Skip to content

Commit 9b94d6e

Browse files
py-andy-cjeejeelee
andauthored
Enable 4bit bnb prequant MOE (#21548)
Signed-off-by: Jee Jee Li <[email protected]> Co-authored-by: Jee Jee Li <[email protected]>
1 parent 1891a26 commit 9b94d6e

File tree

2 files changed

+4
-8
lines changed

2 files changed

+4
-8
lines changed

vllm/model_executor/model_loader/bitsandbytes_loader.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -427,14 +427,10 @@ def _get_bnb_target_modules(self, model: nn.Module) -> None:
427427
elif isinstance(module, FusedMoE) and hasattr(
428428
module.quant_method, "quant_config"):
429429
# TODO: support FusedMoE with prequant and 8bit.
430-
if self.pre_quant:
430+
if self.pre_quant and self.load_8bit:
431431
raise ValueError(
432-
"Prequant BitsAndBytes models with FusedMoE is not "
433-
"supported yet.")
434-
if self.load_8bit:
435-
raise ValueError(
436-
"BitsAndBytes 8bit quantization with FusedMoE is not "
437-
"supported yet.")
432+
"Prequant BitsAndBytes 8bit models with FusedMoE "
433+
"is not supported yet.")
438434
# Get the corresponding weight name using module name and
439435
# expert_params_mapping.
440436

vllm/model_executor/models/qwen3_moe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -684,4 +684,4 @@ def load_weights(self, weights: Iterable[tuple[str,
684684
return loader.load_weights(weights)
685685

686686
def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
687-
return self.model.get_expert_mapping()
687+
return self.model.get_expert_mapping()

0 commit comments

Comments
 (0)