Skip to content

Commit ba293b3

Browse files
committed
fix CI
Signed-off-by: Dylan Chen <[email protected]>
1 parent 8ddb4c5 commit ba293b3

File tree

2 files changed

+8
-9
lines changed

2 files changed

+8
-9
lines changed

cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderXQAImplJIT/decoderXQAImplJIT.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,14 @@ bool DecoderXQAImplJIT::shouldUse(XQAParams const& umbrellaXQAParams, bool forCo
124124
bool hasPerfGain = mayHavePerfGain(xqaParams);
125125
if (!hasPerfGain)
126126
{
127+
if (xqaParams.kv_cache_data_type == DATA_TYPE_E4M3
128+
&& (xqaParams.data_type == DATA_TYPE_BF16 || xqaParams.data_type == DATA_TYPE_FP16))
129+
{
130+
TLLM_LOG_DEBUG(
131+
"JIT XQA is selected in the generation phase for fp16/bf16 input and e4m3 kv cache because MMHA "
132+
"does not support this combination.");
133+
return true;
134+
}
127135
TLLM_LOG_DEBUG("JIT XQA is not used: maybe no performance gain");
128136
return false;
129137
}

cpp/tensorrt_llm/kernels/xqaDispatcher.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -244,15 +244,6 @@ bool XqaDispatcher::shouldUse(XQAParams const& params)
244244
return true;
245245
}
246246

247-
if (params.kv_cache_data_type == DATA_TYPE_E4M3
248-
&& (params.data_type == DATA_TYPE_BF16 || params.data_type == DATA_TYPE_FP16))
249-
{
250-
TLLM_LOG_DEBUG(
251-
"XQA kernels are selected in the generation phase for fp16/bf16 input and e4m3 kv cache because MMHA does "
252-
"not support this combination.");
253-
return true;
254-
}
255-
256247
return mDecoderXqaRunner->shouldUse(params, /*forConfigurePlugin=*/false);
257248
}
258249

0 commit comments

Comments
 (0)