File tree Expand file tree Collapse file tree 1 file changed +12
-0
lines changed Expand file tree Collapse file tree 1 file changed +12
-0
lines changed Original file line number Diff line number Diff line change 66#include " llvm/Passes/PassPlugin.h"
77#include " llvm/Passes/StandardInstrumentations.h"
88#include " llvm/Transforms/InstCombine/InstCombine.h"
9+ #include " llvm/Transforms/Scalar/DivRemPairs.h"
910
1011#include " intel/include/Dialect/TritonGEN/IR/TritonGENDialect.h"
1112#include " intel/include/Dialect/TritonIntelGPU/IR/Dialect.h"
@@ -204,6 +205,17 @@ void init_triton_intel(py::module &&m) {
204205 fpm.addPass (BreakStructPhiNodesPass ());
205206 fpm.addPass (InstCombinePass ());
206207 });
208+ pb.registerPeepholeEPCallback (
209+ [&](llvm::FunctionPassManager &fpm, llvm::OptimizationLevel level) {
210+ // The Triton masked load pattern can generate instances where the
211+ // mask false path appears to cause undefined behavior during
212+ // computation. Even though the result of that behavior will never be
213+ // used, LLVM can choose to optimize away the false path resulting in
214+ // an incorrect result for the kernel. Adding `DivRemPairsPass`
215+ // introduces freeze instructions which prevent UB from leaking into
216+ // div/rem instructions.
217+ fpm.addPass (DivRemPairsPass ());
218+ });
207219 mpm.addPass (pb.buildPerModuleDefaultPipeline (opt));
208220 mpm.run (*mod, mam);
209221 });
You can’t perform that action at this time.
0 commit comments