Skip to content

Commit c10bebc

Browse files
Use direct conversion from BF16 to F32 without intermediate F16. (#4492)
Fixes #4522. This also fixes the 'L0 error 0x78000011' failure of test_mxfp8_mxfp4_matmul.
1 parent ccd14a9 commit c10bebc

File tree

1 file changed

+8
-0
lines changed

1 file changed

+8
-0
lines changed

third_party/intel/lib/TritonIntelGPUToLLVM/ElementwiseOpToLLVM.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1043,6 +1043,14 @@ struct FpToFpOpConversion
10431043
return outVals;
10441044
}
10451045

1046+
if (srcElementType.isBF16() && dstElementType.isF32()) {
1047+
SmallVector<Value> outVals;
1048+
for (Value v : operands[0]) {
1049+
outVals.push_back(intel::convertBf16ToFp32(loc, rewriter, v));
1050+
}
1051+
return outVals;
1052+
}
1053+
10461054
bool useFP16IntermediateSrc = srcElementType.isF32();
10471055
bool isDstFP32 = dstElementType.isF32();
10481056
Type srcType = useFP16IntermediateSrc ? f16_ty : srcElementType;

0 commit comments

Comments
 (0)