Skip to content

Commit c94bbe6

Browse files
committed
LLVM freeze instruction between mask and div 2/?
1 parent e9c317d commit c94bbe6

File tree

2 files changed

+8
-1
lines changed

2 files changed

+8
-1
lines changed

third_party/intel/lib/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
add_subdirectory(Analysis)
22
add_subdirectory(Dialect)
33
add_subdirectory(GPUToTritonGEN)
4+
add_subdirectory(LLVMIR)
45
add_subdirectory(Target)
56
add_subdirectory(TritonAnnotateModule)
67
add_subdirectory(TritonGENToLLVM)

third_party/intel/triton_xpu.cc

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "intel/include/TritonAnnotateModule/Passes.h"
1818
#include "intel/include/TritonIntelGPUToLLVM/Passes.h"
1919
#include "intel/include/TritonToTritonGPUWarp/Passes.h"
20+
#include "intel/lib/LLVMIR/LLVMPasses.h"
2021

2122
#include "triton/Target/SPIRV/SPIRVTranslation.h"
2223
#include "triton/Tools/Sys/GetEnv.hpp"
@@ -208,6 +209,7 @@ void init_triton_intel(py::module &&m) {
208209
fpm.addPass(BreakStructPhiNodesPass());
209210
fpm.addPass(InstCombinePass());
210211
});
212+
#if 1
211213
pb.registerPeepholeEPCallback(
212214
[&](llvm::FunctionPassManager &fpm, llvm::OptimizationLevel level) {
213215
// The Triton masked load pattern can generate instances where the
@@ -217,8 +219,12 @@ void init_triton_intel(py::module &&m) {
217219
// an incorrect result for the kernel. Adding `DivRemPairsPass`
218220
// introduces freeze instructions which prevent UB from leaking into
219221
// div/rem instructions.
220-
fpm.addPass(DivRemPairsPass());
222+
// fpm.addPass(DivRemPairsPass());
223+
fpm.addPass(FreezeMaskedDivRemPass());
221224
});
225+
#else
226+
mpm.addPass(createModuleToFunctionPassAdaptor(FreezeMaskedDivRemPass()));
227+
#endif
222228
mpm.addPass(pb.buildPerModuleDefaultPipeline(opt));
223229
mpm.run(*mod, mam);
224230
});

0 commit comments

Comments
 (0)