@@ -232,12 +232,13 @@ def should_skip_trtllm(
232232 QuantAlgo .W4A16_MXFP4 ,
233233 QuantAlgo .W4A8_MXFP4_MXFP8 ,
234234 }
235-
236- if quant_algo not in trtllm_gen_quant_algos :
235+ # Quant_algo==None (BF16 path) also falls through and must meet the should_skip_trtllm criteria
236+ if quant_algo is not None and quant_algo not in trtllm_gen_quant_algos :
237237 return None
238238
239239 num_experts = model_config .num_experts
240240 top_k = model_config .top_k
241+ hidden_size = model_config .hidden_size
241242 intermediate_size = model_config .intermediate_size
242243
243244 # Check: num_experts must be divisible by 4
@@ -255,11 +256,22 @@ def should_skip_trtllm(
255256 f"TRTLLMGenFusedMoE requires num_experts > top_k "
256257 f"(got num_experts={ num_experts } , top_k={ top_k } )"
257258 )
259+
260+ if quant_algo is None :
261+ if swiglu_gptoss_style :
262+ return "TRTLLMGenFusedMoE BF16 path does not support bias/swiglu custom parameters."
263+
264+ if hidden_size % 128 != 0 or intermediate_size % 128 != 0 :
265+ return (
266+ "TRTLLMGenFusedMoE BF16 path requires hidden_size and intermediate_size "
267+ f"to be multiples of 128 (got h={ hidden_size } , i={ intermediate_size } )."
268+ )
269+ return None
270+
258271 # W4A8_MXFP4_MXFP8 with non-128-aligned hidden_size or intermediate_size
259272 # causes block_scale_interleave_reverse to fail with
260273 # "rows of Interleaved block scales should be multiple of 128".
261274 if quant_algo == QuantAlgo .W4A8_MXFP4_MXFP8 :
262- hidden_size = model_config .hidden_size
263275 if hidden_size % 128 != 0 or intermediate_size % 128 != 0 :
264276 return (
265277 f"TRTLLMGenFusedMoE W4A8_MXFP4_MXFP8 with non-128-aligned "
0 commit comments