File tree Expand file tree Collapse file tree 1 file changed +12
-0
lines changed Expand file tree Collapse file tree 1 file changed +12
-0
lines changed Original file line number Diff line number Diff line change 66#include " llvm/Passes/PassPlugin.h"
77#include " llvm/Passes/StandardInstrumentations.h"
88#include " llvm/Transforms/InstCombine/InstCombine.h"
9+ #include " llvm/Transforms/Scalar/DivRemPairs.h"
910
1011#include " intel/include/Dialect/TritonGEN/IR/TritonGENDialect.h"
1112#include " intel/include/Dialect/TritonIntelGPU/IR/Dialect.h"
@@ -206,6 +207,17 @@ void init_triton_intel(py::module &&m) {
206207 fpm.addPass (BreakStructPhiNodesPass ());
207208 fpm.addPass (InstCombinePass ());
208209 });
210+ pb.registerPeepholeEPCallback (
211+ [&](llvm::FunctionPassManager &fpm, llvm::OptimizationLevel level) {
212+ // The Triton masked load pattern can generate instances where the
213+ // mask false path appears to cause undefined behavior during
214+ // computation. Even though the result of that behavior will never be
215+ // used, LLVM can choose to optimize away the false path resulting in
216+ // an incorrect result for the kernel. Adding `DivRemPairsPass`
217+ // introduces freeze instructions which prevent UB from leaking into
218+ // div/rem instructions.
219+ fpm.addPass (DivRemPairsPass ());
220+ });
209221 mpm.addPass (pb.buildPerModuleDefaultPipeline (opt));
210222 mpm.run (*mod, mam);
211223 });
You can’t perform that action at this time.
0 commit comments