Skip to content

Commit 1244948

Browse files
authored
[Log] Optimize Log for FP8MOE (#25709)
Signed-off-by: yewentao256 <[email protected]>
1 parent a73f649 commit 1244948

File tree

1 file changed

+4
-4
lines changed
  • vllm/model_executor/layers/quantization

1 file changed

+4
-4
lines changed

vllm/model_executor/layers/quantization/fp8.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,8 @@ def __init__(self, quant_config: Fp8Config, layer: torch.nn.Module):
467467
logger.info_once("DeepGemm disabled: FlashInfer MOE is"
468468
" enabled.")
469469
elif (is_deep_gemm_supported()):
470-
logger.info_once("Using DeepGemm kernels for Fp8MoEMethod.")
470+
logger.debug_once(
471+
"DeepGemm kernels available for Fp8MoEMethod.")
471472
self.allow_deep_gemm = True
472473
else:
473474
logger.warning_once(
@@ -481,9 +482,8 @@ def __init__(self, quant_config: Fp8Config, layer: torch.nn.Module):
481482
elif (current_platform.is_cuda()
482483
and current_platform.is_device_capability(100)
483484
and not self.flashinfer_moe_backend):
484-
logger.info_once(
485-
"Using CutlassBlockScaledGroupedGemm kernels for Fp8 MOE "
486-
"on SM100.")
485+
logger.debug_once(
486+
"CutlassBlockScaledGroupedGemm available for Fp8MoEMethod.")
487487
self.allow_cutlass_block_scaled_grouped_gemm = True
488488

489489
def create_weights(self, layer: Module, num_experts: int, hidden_size: int,

0 commit comments

Comments
 (0)