Skip to content

Commit 9aefe92

Browse files
authored
Hot fix after #4811; don't create vector with only one element (#4831)
To avoid: ```bash inductor/test_flex_decoding.py::TestFlexDecodingXPU::test_builtin_score_mods_different_block_size_float32_score_mod1_BLOCK_SIZE2_xpu_float32 L0 build module failed. Log: error: undefined reference to `_Z25__spirv_RoundFToTF32INTELDv1_f' in function: '__spirv_RoundFToTF32INTEL(float vector[1])' called by kernel: 'triton_tem_fused_0' error: backend compiler failed build. Error during Intel loadBinary: ZE_RESULT_ERROR_MODULE_BUILD_FAILURE ``` CI Flex attn: https://github.com/intel/intel-xpu-backend-for-triton/actions/runs/16691809088 (4h38m, no more these issues) Flex decoding: https://github.com/intel/intel-xpu-backend-for-triton/actions/runs/16691844598 (2h58m, no more these issues) Signed-off-by: Anatoly Myachev <[email protected]>
1 parent 1c11bc2 commit 9aefe92

File tree

1 file changed

+11
-13
lines changed
  • third_party/intel/lib/TritonIntelGPUToLLVM/DotOpToLLVM

1 file changed

+11
-13
lines changed

third_party/intel/lib/TritonIntelGPUToLLVM/DotOpToLLVM/DPAS.cpp

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -351,19 +351,17 @@ class DotOpDPASConversionHelper {
351351
for (int repOuter = 0; repOuter < repClusterOuter; ++repOuter) {
352352
for (int repInner = 0; repInner < repClusterInner; ++repInner) {
353353
Value matVal = rewriter.create<LLVM::UndefOp>(loc, dotOpTy);
354-
for (int k = 0; k < numElemsPerOperand; ++k) {
355-
matVal = tb.insert_element(dotOpTy, matVal, elems[offset++],
356-
tb.i32_val(k));
357-
}
358-
if (isFToTF32Enabled) {
359-
auto t32Val = rewriter.create<TritonGEN::FToTf32Op>(loc, matVal)
360-
.getResult();
361-
vals[{b, i * repClusterOuter + repOuter,
362-
j * repClusterInner + repInner}] = t32Val;
363-
} else {
364-
vals[{b, i * repClusterOuter + repOuter,
365-
j * repClusterInner + repInner}] = matVal;
366-
}
354+
if (numElemsPerOperand != 1)
355+
for (int k = 0; k < numElemsPerOperand; ++k)
356+
matVal = tb.insert_element(dotOpTy, matVal, elems[offset++],
357+
tb.i32_val(k));
358+
else
359+
matVal = elems[offset++];
360+
if (isFToTF32Enabled)
361+
matVal = rewriter.create<TritonGEN::FToTf32Op>(loc, matVal)
362+
.getResult();
363+
vals[{b, i * repClusterOuter + repOuter,
364+
j * repClusterInner + repInner}] = matVal;
367365
}
368366
}
369367
}

0 commit comments

Comments
 (0)