Skip to content

Commit 54febef

Browse files
jeejeeleeaarnphm
authored andcommitted
[oss] Init gpt-oss bf16 support (vllm-project#22508)
Signed-off-by: Jee Jee Li <[email protected]>
1 parent a234cd0 commit 54febef

File tree

4 files changed

+342
-125
lines changed

4 files changed

+342
-125
lines changed

vllm/model_executor/layers/fused_moe/config.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,8 @@ class FusedMoEConfig:
324324

325325
max_num_tokens: int = envs.VLLM_MOE_DP_CHUNK_SIZE
326326

327+
has_bias: bool = False
328+
327329
def __post_init__(self):
328330
if self.dp_size > 1:
329331
logger.debug_once("Using FusedMoEConfig::max_num_tokens=%d",
@@ -413,7 +415,8 @@ def make(
413415
in_dtype: torch.dtype,
414416
max_num_tokens: int = envs.VLLM_MOE_DP_CHUNK_SIZE,
415417
quant_config: Optional[Union[FusedMoEQuantConfig,
416-
QuantizationConfig]] = None
418+
QuantizationConfig]] = None,
419+
has_bias: bool = False,
417420
) -> "FusedMoEConfig":
418421

419422
_quant_config: Optional[FusedMoEQuantConfig] = None
@@ -482,4 +485,5 @@ def make(
482485
in_dtype=in_dtype,
483486
quant_config=_quant_config,
484487
max_num_tokens=max_num_tokens,
488+
has_bias=has_bias,
485489
)

0 commit comments

Comments
 (0)