Skip to content

Commit 4392fde

Browse files
committed
LLVM freeze instruction between mask and div 2/?
1 parent a7839c0 commit 4392fde

File tree

2 files changed

+8
-1
lines changed

2 files changed

+8
-1
lines changed

third_party/intel/lib/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
add_subdirectory(Analysis)
22
add_subdirectory(Dialect)
33
add_subdirectory(GPUToTritonGEN)
4+
add_subdirectory(LLVMIR)
45
add_subdirectory(Target)
56
add_subdirectory(TritonAnnotateModule)
67
add_subdirectory(TritonGENToLLVM)

third_party/intel/triton_xpu.cc

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "intel/include/TritonAnnotateModule/Passes.h"
1818
#include "intel/include/TritonIntelGPUToLLVM/Passes.h"
1919
#include "intel/include/TritonToTritonGPUWarp/Passes.h"
20+
#include "intel/lib/LLVMIR/LLVMPasses.h"
2021

2122
#include "triton/Target/SPIRV/SPIRVTranslation.h"
2223
#include "triton/Tools/Sys/GetEnv.hpp"
@@ -207,6 +208,7 @@ void init_triton_intel(py::module &&m) {
207208
fpm.addPass(BreakStructPhiNodesPass());
208209
fpm.addPass(InstCombinePass());
209210
});
211+
#if 1
210212
pb.registerPeepholeEPCallback(
211213
[&](llvm::FunctionPassManager &fpm, llvm::OptimizationLevel level) {
212214
// The Triton masked load pattern can generate instances where the
@@ -216,8 +218,12 @@ void init_triton_intel(py::module &&m) {
216218
// an incorrect result for the kernel. Adding `DivRemPairsPass`
217219
// introduces freeze instructions which prevent UB from leaking into
218220
// div/rem instructions.
219-
fpm.addPass(DivRemPairsPass());
221+
// fpm.addPass(DivRemPairsPass());
222+
fpm.addPass(FreezeMaskedDivRemPass());
220223
});
224+
#else
225+
mpm.addPass(createModuleToFunctionPassAdaptor(FreezeMaskedDivRemPass()));
226+
#endif
221227
mpm.addPass(pb.buildPerModuleDefaultPipeline(opt));
222228
mpm.run(*mod, mam);
223229
});

0 commit comments

Comments
 (0)