Skip to content

Commit 7f50dcf

Browse files
committed
Prevent UB in div/rem instructions during optimization
1 parent e7292af commit 7f50dcf

File tree

1 file changed

+12
-0
lines changed

1 file changed

+12
-0
lines changed

third_party/intel/triton_xpu.cc

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "llvm/Passes/PassPlugin.h"
77
#include "llvm/Passes/StandardInstrumentations.h"
88
#include "llvm/Transforms/InstCombine/InstCombine.h"
9+
#include "llvm/Transforms/Scalar/DivRemPairs.h"
910

1011
#include "intel/include/Dialect/TritonGEN/IR/TritonGENDialect.h"
1112
#include "intel/include/Dialect/TritonIntelGPU/IR/Dialect.h"
@@ -204,6 +205,17 @@ void init_triton_intel(py::module &&m) {
204205
fpm.addPass(BreakStructPhiNodesPass());
205206
fpm.addPass(InstCombinePass());
206207
});
208+
pb.registerPeepholeEPCallback(
209+
[&](llvm::FunctionPassManager &fpm, llvm::OptimizationLevel level) {
210+
// The Triton masked load pattern can generate instances where the
211+
// mask false path appears to cause undefined behavior during
212+
// computation. Even though the result of that behavior will never be
213+
// used, LLVM can choose to optimize away the false path resulting in
214+
// an incorrect result for the kernel. Adding `DivRemPairsPass`
215+
// introduces freeze instructions which prevent UB from leaking into
216+
// div/rem instructions.
217+
fpm.addPass(DivRemPairsPass());
218+
});
207219
mpm.addPass(pb.buildPerModuleDefaultPipeline(opt));
208220
mpm.run(*mod, mam);
209221
});

0 commit comments

Comments
 (0)