Skip to content

Commit 6fcd40f

Browse files
mshelegoigcbot
authored andcommitted
Prevent constant folding inside loops
When vector constants are loaded using wrregions only (without genx.constant intrinsics) they are immediately folded back. Keep the ones that are inside loops so they could be moved out by licm
1 parent 79467be commit 6fcd40f

File tree

4 files changed

+57
-9
lines changed

4 files changed

+57
-9
lines changed

IGC/VectorCompiler/include/GenXRegionUtils.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,10 +166,12 @@ inline raw_ostream &operator<<(raw_ostream &OS, const RdWrRegionSequence &RWS) {
166166

167167
Value *simplifyRegionInst(Instruction *Inst, const DataLayout *DL = nullptr,
168168
const GenXSubtarget *ST = nullptr,
169-
const DominatorTree *DT = nullptr);
169+
const DominatorTree *DT = nullptr,
170+
const Loop *L = nullptr);
170171
bool simplifyRegionInsts(Function *F, const DataLayout *DL = nullptr,
171172
const GenXSubtarget *ST = nullptr,
172-
const DominatorTree *DT = nullptr);
173+
const DominatorTree *DT = nullptr,
174+
const LoopInfo *LI = nullptr);
173175

174176
bool IsLinearVectorConstantInts(Value *v, int64_t &start, int64_t &stride);
175177

IGC/VectorCompiler/lib/GenXCodeGen/GenXPostLegalization.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2017-2022 Intel Corporation
3+
Copyright (C) 2017-2024 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -60,6 +60,7 @@ namespace {
6060
class GenXPostLegalization : public FunctionPass {
6161
const DataLayout *DL = nullptr;
6262
const GenXSubtarget *ST = nullptr;
63+
const LoopInfo *LI = nullptr;
6364
public:
6465
static char ID;
6566
explicit GenXPostLegalization() : FunctionPass(ID) { }
@@ -77,6 +78,7 @@ char GenXPostLegalization::ID = 0;
7778
namespace llvm { void initializeGenXPostLegalizationPass(PassRegistry &); }
7879
INITIALIZE_PASS_BEGIN(GenXPostLegalization, "GenXPostLegalization",
7980
"GenXPostLegalization", false, false)
81+
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
8082
INITIALIZE_PASS_END(GenXPostLegalization, "GenXPostLegalization",
8183
"GenXPostLegalization", false, false)
8284

@@ -87,6 +89,8 @@ FunctionPass *llvm::createGenXPostLegalizationPass() {
8789

8890
void GenXPostLegalization::getAnalysisUsage(AnalysisUsage &AU) const {
8991
AU.addRequired<TargetPassConfig>();
92+
AU.addRequired<LoopInfoWrapperPass>();
93+
AU.addPreserved<LoopInfoWrapperPass>();
9094
AU.setPreservesCFG();
9195
}
9296

@@ -99,6 +103,7 @@ bool GenXPostLegalization::runOnFunction(Function &F)
99103
ST = &getAnalysis<TargetPassConfig>()
100104
.getTM<GenXTargetMachine>()
101105
.getGenXSubtarget();
106+
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
102107

103108
VectorDecomposer VD(ST);
104109

@@ -140,7 +145,7 @@ bool GenXPostLegalization::runOnFunction(Function &F)
140145
// Run the vector decomposer for this function.
141146
Modified |= VD.run(*DL);
142147
// Cleanup region reads and writes.
143-
Modified |= simplifyRegionInsts(&F, DL, ST);
148+
Modified |= simplifyRegionInsts(&F, DL, ST, nullptr, LI);
144149
// Cleanup constant loads.
145150
std::vector<CallInst *> ConstList;
146151
Modified |= cleanupConstantLoads(&F, ConstList);

IGC/VectorCompiler/lib/GenXCodeGen/GenXRegionUtils.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2017-2023 Intel Corporation
3+
Copyright (C) 2017-2024 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -1132,7 +1132,7 @@ static Value *simplifyRegionRead(Instruction *Inst, const DataLayout *DL) {
11321132
// Simplify a region read or write.
11331133
Value *llvm::genx::simplifyRegionInst(Instruction *Inst, const DataLayout *DL,
11341134
const GenXSubtarget *ST,
1135-
const DominatorTree *DT) {
1135+
const DominatorTree *DT, const Loop *L) {
11361136
if (Inst->use_empty())
11371137
return nullptr;
11381138

@@ -1161,7 +1161,11 @@ Value *llvm::genx::simplifyRegionInst(Instruction *Inst, const DataLayout *DL,
11611161
}
11621162

11631163
if (Constant *C = ConstantFoldGenX(Inst, *DL))
1164-
return C;
1164+
// The instruction can be folded to a constant, so other simplifications are
1165+
// not relevant. If the instruction is inside a loop it shouldn't be
1166+
// substituted with a constant here so it could be moved out from the loop
1167+
// later.
1168+
return L ? nullptr : C;
11651169

11661170
if (auto *BCI = dyn_cast<BitCastInst>(Inst); BCI && DL && ST) {
11671171
NewVal = simplifyBitCastFromRegionRead(BCI, *DL, *ST);
@@ -1194,12 +1198,14 @@ Value *llvm::genx::simplifyRegionInst(Instruction *Inst, const DataLayout *DL,
11941198

11951199
bool llvm::genx::simplifyRegionInsts(Function *F, const DataLayout *DL,
11961200
const GenXSubtarget *ST,
1197-
const DominatorTree *DT) {
1201+
const DominatorTree *DT,
1202+
const LoopInfo *LI) {
11981203
bool Changed = false;
11991204
for (auto &BB : *F) {
1205+
auto *L = LI->getLoopFor(&BB);
12001206
for (auto I = BB.begin(); I != BB.end();) {
12011207
Instruction *Inst = &*I++;
1202-
if (auto V = simplifyRegionInst(Inst, DL, ST, DT)) {
1208+
if (auto V = simplifyRegionInst(Inst, DL, ST, DT, L)) {
12031209
if (isa<Instruction>(V) &&
12041210
!genx::isSafeToReplace_CheckAVLoadKillOrForbiddenUser(
12051211
Inst, cast<Instruction>(V), DT))
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; RUN: %opt %use_old_pass_manager% -GenXPostLegalization -licm -march=genx64 -mcpu=XeHPC -mtriple=spir64 -S < %s | FileCheck %s
10+
11+
define void @test(i64 %arg) {
12+
entry:
13+
; CHECK-LABEL: entry
14+
; CHECK-NEXT: [[ZEROES_0:%[^ ]+]] = call <64 x i64> @llvm.genx.wrregioni.v64i64.v16i64.i16.i1(<64 x i64> undef, <16 x i64> zeroinitializer, i32 16, i32 16, i32 1, i16 0, i32 undef, i1 true)
15+
; CHECK-NEXT: [[ZEROES_16:%[^ ]+]] = call <64 x i64> @llvm.genx.wrregioni.v64i64.v16i64.i16.i1(<64 x i64> [[ZEROES_0]], <16 x i64> zeroinitializer, i32 16, i32 16, i32 1, i16 128, i32 undef, i1 true)
16+
; CHECK-NEXT: [[ZEROES_32:%[^ ]+]] = call <64 x i64> @llvm.genx.wrregioni.v64i64.v16i64.i16.i1(<64 x i64> [[ZEROES_16]], <16 x i64> zeroinitializer, i32 16, i32 16, i32 1, i16 256, i32 undef, i1 true)
17+
; CHECK-NEXT: [[ZEROES_48:%[^ ]+]] = call <64 x i64> @llvm.genx.wrregioni.v64i64.v16i64.i16.i1(<64 x i64> [[ZEROES_32]], <16 x i64> zeroinitializer, i32 16, i32 16, i32 1, i16 384, i32 undef, i1 true)
18+
; CHECK-NEXT: br label %loop
19+
br label %loop
20+
21+
loop:
22+
%offset = phi i32 [ 0, %entry ], [ %offset.next, %loop ]
23+
%offset.zext = zext i32 %offset to i64
24+
%addr = add i64 %arg, %offset.zext
25+
; CHECK: tail call void @llvm.vc.internal.lsc.store.ugm.i1.v2i8.i64.v64i64(i1 true, i8 3, i8 4, i8 8, <2 x i8> zeroinitializer, i64 0, i64 %addr, i16 1, i32 0, <64 x i64> [[ZEROES_48]])
26+
tail call void @llvm.vc.internal.lsc.store.ugm.i1.v2i8.i64.v64i64(i1 true, i8 3, i8 4, i8 8, <2 x i8> zeroinitializer, i64 0, i64 %addr, i16 1, i32 0, <64 x i64> zeroinitializer)
27+
%offset.next = add nuw nsw i32 %offset, 512
28+
%cmp = icmp ult i32 %offset.next, 32256
29+
br i1 %cmp, label %loop, label %end
30+
31+
end:
32+
ret void
33+
}
34+
35+
declare void @llvm.vc.internal.lsc.store.ugm.i1.v2i8.i64.v64i64(i1, i8, i8, i8, <2 x i8>, i64, i64, i16, i32, <64 x i64>)

0 commit comments

Comments
 (0)