Skip to content

Commit cc4cf05

Browse files
committed
LLVM freeze instruction between mask and div 2/?
1 parent 2b0fb50 commit cc4cf05

File tree

2 files changed

+8
-1
lines changed

2 files changed

+8
-1
lines changed

third_party/intel/lib/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
add_subdirectory(Analysis)
22
add_subdirectory(Dialect)
33
add_subdirectory(GPUToTritonGEN)
4+
add_subdirectory(LLVMIR)
45
add_subdirectory(Target)
56
add_subdirectory(TritonAnnotateModule)
67
add_subdirectory(TritonGENToLLVM)

third_party/intel/triton_xpu.cc

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "intel/include/TritonAnnotateModule/Passes.h"
1818
#include "intel/include/TritonIntelGPUToLLVM/Passes.h"
1919
#include "intel/include/TritonToTritonGPUWarp/Passes.h"
20+
#include "intel/lib/LLVMIR/LLVMPasses.h"
2021

2122
#include "triton/Target/SPIRV/SPIRVTranslation.h"
2223
#include "triton/Tools/Sys/GetEnv.hpp"
@@ -205,6 +206,7 @@ void init_triton_intel(py::module &&m) {
205206
fpm.addPass(BreakStructPhiNodesPass());
206207
fpm.addPass(InstCombinePass());
207208
});
209+
#if 1
208210
pb.registerPeepholeEPCallback(
209211
[&](llvm::FunctionPassManager &fpm, llvm::OptimizationLevel level) {
210212
// The Triton masked load pattern can generate instances where the
@@ -214,8 +216,12 @@ void init_triton_intel(py::module &&m) {
214216
// an incorrect result for the kernel. Adding `DivRemPairsPass`
215217
// introduces freeze instructions which prevent UB from leaking into
216218
// div/rem instructions.
217-
fpm.addPass(DivRemPairsPass());
219+
// fpm.addPass(DivRemPairsPass());
220+
fpm.addPass(FreezeMaskedDivRemPass());
218221
});
222+
#else
223+
mpm.addPass(createModuleToFunctionPassAdaptor(FreezeMaskedDivRemPass()));
224+
#endif
219225
mpm.addPass(pb.buildPerModuleDefaultPipeline(opt));
220226
mpm.run(*mod, mam);
221227
});

0 commit comments

Comments
 (0)