Skip to content

Commit 8338f43

Browse files
mmereckiigcbot
authored andcommitted
Add SLM fences before EOT to complete all pending SLM stores
The `AddRequiredMemoryFences` finds all SLM stores and atomic opartions that are not followed be an SLM fence before the EOT. If any unfenced instructions are found, the pass inserts an LSC SLM fence in the nearest common post-dominator block.
1 parent 6a215e5 commit 8338f43

File tree

8 files changed

+494
-0
lines changed

8 files changed

+494
-0
lines changed

IGC/Compiler/CISACodeGen/ShaderCodeGen.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ SPDX-License-Identifier: MIT
7474
#include "Compiler/CISACodeGen/FPRoundingModeCoalescing.hpp"
7575

7676
#include "Compiler/CISACodeGen/SLMConstProp.hpp"
77+
#include "Compiler/Legalizer/AddRequiredMemoryFences.h"
7778
#include "Compiler/Optimizer/OpenCLPasses/GenericAddressResolution/GenericAddressDynamicResolution.hpp"
7879
#include "Compiler/Optimizer/OpenCLPasses/PrivateMemory/PrivateMemoryUsageAnalysis.hpp"
7980
#include "Compiler/Optimizer/OpenCLPasses/PrivateMemory/PrivateMemoryResolution.hpp"
@@ -1116,6 +1117,14 @@ void AddLegalizationPasses(CodeGenContext& ctx, IGCPassManager& mpm, PSSignature
11161117
mpm.add(new RuntimeValueLegalizationPass());
11171118
}
11181119

1120+
if (ctx.m_instrTypes.hasLocalLoadStore &&
1121+
ctx.platform.hasLSC() &&
1122+
!ctx.platform.NeedsLSCFenceUGMBeforeEOT() && // VISA will add the fence
1123+
IGC_IS_FLAG_DISABLED(DisableAddRequiredMemoryFencesPass))
1124+
{
1125+
mpm.add(createAddRequiredMemoryFencesPass());
1126+
}
1127+
11191128
mpm.add(createInstSimplifyLegacyPass());
11201129
// This pass inserts bitcasts for vector loads/stores.
11211130
// This pass could be moved further toward EmitPass.
Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
/*========================== begin_copyright_notice ============================
2+
3+
Copyright (C) 2024 Intel Corporation
4+
5+
SPDX-License-Identifier: MIT
6+
7+
============================= end_copyright_notice ===========================*/
8+
9+
#include "CISACodeGen/helper.h"
10+
#include "common/IGCIRBuilder.h"
11+
#include "Compiler/IGCPassSupport.h"
12+
#include "GenISAIntrinsics/GenIntrinsicInst.h"
13+
#include "GenISAIntrinsics/GenIntrinsics.h"
14+
#include "Probe/Assertion.h"
15+
#include "AddRequiredMemoryFences.h"
16+
17+
#include "common/LLVMWarningsPush.hpp"
18+
#include "llvm/ADT/PostOrderIterator.h"
19+
#include "llvm/Analysis/LoopInfo.h"
20+
#include "llvm/IR/CFG.h"
21+
#include "llvm/IR/Dominators.h"
22+
#include "common/LLVMWarningsPop.hpp"
23+
24+
using namespace llvm;
25+
26+
namespace IGC
27+
{
28+
////////////////////////////////////////////////////////////////////////////////
29+
// @brief This pass inserts SLM fences after the last SLM store or SLM atomic
30+
// instruction(s) in the function.
31+
class AddRequiredMemoryFences : public llvm::FunctionPass
32+
{
33+
public:
34+
static char ID;
35+
36+
AddRequiredMemoryFences();
37+
38+
void getAnalysisUsage(llvm::AnalysisUsage& AU) const
39+
{
40+
AU.setPreservesCFG();
41+
AU.addRequired<PostDominatorTreeWrapperPass>();
42+
AU.addPreserved<PostDominatorTreeWrapperPass>();
43+
AU.addRequired<LoopInfoWrapperPass>();
44+
AU.addPreserved<LoopInfoWrapperPass>();
45+
}
46+
47+
StringRef getPassName() const { return "AddRequiredMemoryFences"; }
48+
49+
bool runOnFunction(Function& F);
50+
};
51+
char AddRequiredMemoryFences::ID = 0;
52+
53+
// Register pass to igc-opt
54+
#define PASS_FLAG "igc-add-required-memory-fences"
55+
#define PASS_DESCRIPTION "Add memory fences required by the HW memory model"
56+
#define PASS_CFG_ONLY false
57+
#define PASS_ANALYSIS false
58+
IGC_INITIALIZE_PASS_BEGIN(AddRequiredMemoryFences, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
59+
IGC_INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
60+
IGC_INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
61+
IGC_INITIALIZE_PASS_END(AddRequiredMemoryFences, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
62+
#undef PASS_ANALYSIS
63+
#undef PASS_CFG_ONLY
64+
#undef PASS_DESCRIPTION
65+
#undef PASS_FLAG
66+
67+
////////////////////////////////////////////////////////////////////////////////
68+
AddRequiredMemoryFences::AddRequiredMemoryFences() : FunctionPass(ID)
69+
{
70+
initializeAddRequiredMemoryFencesPass(*PassRegistry::getPassRegistry());
71+
}
72+
73+
////////////////////////////////////////////////////////////////////////////////
74+
FunctionPass* createAddRequiredMemoryFencesPass()
75+
{
76+
return new AddRequiredMemoryFences();
77+
}
78+
79+
////////////////////////////////////////////////////////////////////////////////
80+
inline bool IsSlmFence(Instruction* inst)
81+
{
82+
if (GenIntrinsicInst* intr = dyn_cast<GenIntrinsicInst>(inst))
83+
{
84+
const GenISAIntrinsic::ID id = intr->getIntrinsicID();
85+
if (id == GenISAIntrinsic::GenISA_LSCFence &&
86+
LSC_SFID::LSC_SLM == getImmValueEnum<LSC_SFID>(intr->getOperand(0)))
87+
{
88+
return true;
89+
}
90+
else if (id == GenISAIntrinsic::GenISA_memoryfence &&
91+
false == getImmValueBool(intr->getOperand(5)))
92+
{
93+
return true;
94+
}
95+
}
96+
return false;
97+
}
98+
99+
////////////////////////////////////////////////////////////////////////////////
100+
inline bool IsSlmStoreOrAtomic(Instruction* inst)
101+
{
102+
Instruction* store = nullptr;
103+
if (GenIntrinsicInst* intr = dyn_cast<GenIntrinsicInst>(inst))
104+
{
105+
// This pass assumes that the input shader is optimized, only
106+
// instructions with no uses are considered as needing the fence.
107+
const GenISAIntrinsic::ID id = intr->getIntrinsicID();
108+
if (IsStatelessMemStoreIntrinsic(id) ||
109+
(intr->getNumUses() == 0 &&
110+
IsStatelessMemAtomicIntrinsic(*intr, id)))
111+
{
112+
store = intr;
113+
}
114+
}
115+
else
116+
{
117+
store = dyn_cast<StoreInst>(inst);
118+
}
119+
if (store)
120+
{
121+
Value* ptr = GetBufferOperand(store);
122+
IGC_ASSERT(ptr && ptr->getType()->isPointerTy());
123+
if (ptr &&
124+
ptr->getType()->getPointerAddressSpace() == ADDRESS_SPACE_LOCAL)
125+
{
126+
return true;
127+
}
128+
}
129+
return false;
130+
}
131+
132+
////////////////////////////////////////////////////////////////////////////////
133+
bool AddRequiredMemoryFences::runOnFunction(Function& F)
134+
{
135+
PostDominatorTree* const PDT = &getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
136+
LoopInfo* const LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
137+
138+
bool modified = false;
139+
140+
// The high-level algorithm is:
141+
// for each function exit:
142+
// - walk the post-dominator tree in the depth-first order, for each basic
143+
// block check instructions, start from the last instruction:
144+
// - break if a basic block contains an SLM fence
145+
// - or break is a basic block contains an SLM store or SLM atomic
146+
// instruction, remember the store/atomic instruction
147+
// - find the common post-dominator block for all unfenced SLM store
148+
// or atomic instructions
149+
// - if the common post-dominator block is in a loop find the outermost
150+
// loop, and find the common post-dominator block for all loop exits
151+
// - insert an SLM fence at the end of the common post-dominator block
152+
for (BasicBlock* rootBB : PDT->roots())
153+
{
154+
if (isa<UnreachableInst>(rootBB->getTerminator()))
155+
{
156+
continue;
157+
}
158+
SmallPtrSet<BasicBlock*, 16> seen{ rootBB };
159+
SmallVector<BasicBlock*, 16> worklist{ rootBB };
160+
SmallVector<BasicBlock*, 8> unfenced;
161+
bool hasUnfencedSlmStore = false;
162+
bool hasSlmFence = false;
163+
while (!worklist.empty())
164+
{
165+
BasicBlock* BB = worklist.back();
166+
worklist.pop_back();
167+
seen.insert(BB);
168+
for (auto II = BB->rbegin(), IE = BB->rend(); II != IE; ++II)
169+
{
170+
if (IsSlmFence(&(*II)))
171+
{
172+
hasSlmFence = true;
173+
break;
174+
}
175+
else if (IsSlmStoreOrAtomic(&(*II)))
176+
{
177+
hasUnfencedSlmStore = true;
178+
break;
179+
}
180+
}
181+
if (hasUnfencedSlmStore)
182+
{
183+
unfenced.push_back(BB);
184+
}
185+
else if (!hasSlmFence)
186+
{
187+
for (BasicBlock* pred : predecessors(BB))
188+
{
189+
if (seen.count(pred) == 0)
190+
{
191+
worklist.push_back(pred);
192+
}
193+
}
194+
}
195+
}
196+
if (!unfenced.empty())
197+
{
198+
// Lambda finds a common post-dominator block for a set of basic blocks.
199+
auto FindPostDominator = [&PDT](const auto& blocks)
200+
{
201+
auto it = blocks.begin();
202+
BasicBlock* postDomBB = *it++;
203+
for (; it != blocks.end(); ++it)
204+
{
205+
postDomBB = PDT->findNearestCommonDominator(postDomBB, *it);
206+
}
207+
return postDomBB;
208+
};
209+
BasicBlock* postDomBB = FindPostDominator(unfenced);
210+
Loop* L = LI->getLoopFor(postDomBB);
211+
if (L)
212+
{
213+
while (!L->isOutermost())
214+
{
215+
L = L->getParentLoop();
216+
}
217+
SmallVector<BasicBlock*, 4> exitBlocks;
218+
L->getUniqueExitBlocks(exitBlocks);
219+
postDomBB = FindPostDominator(exitBlocks);
220+
}
221+
IGC_ASSERT(postDomBB);
222+
IGCIRBuilder<> IRB(postDomBB->getTerminator());
223+
Function* fenceFuncPtr = GenISAIntrinsic::getDeclaration(
224+
F.getParent(),
225+
GenISAIntrinsic::GenISA_LSCFence);
226+
Value* args[] =
227+
{
228+
IRB.getInt32(LSC_SLM),
229+
IRB.getInt32(LSC_SCOPE_GROUP),
230+
IRB.getInt32(LSC_FENCE_OP_NONE)
231+
};
232+
IRB.CreateCall(fenceFuncPtr, args);
233+
modified = true;
234+
}
235+
}
236+
return modified;
237+
}
238+
} // namespace IGC
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
/*========================== begin_copyright_notice ============================
2+
3+
Copyright (C) 2024 Intel Corporation
4+
5+
SPDX-License-Identifier: MIT
6+
7+
============================= end_copyright_notice ===========================*/
8+
9+
#pragma once
10+
#include "common/LLVMWarningsPush.hpp"
11+
#include "llvm/Pass.h"
12+
#include "llvm/PassRegistry.h"
13+
#include "common/LLVMWarningsPop.hpp"
14+
namespace IGC
15+
{
16+
llvm::FunctionPass* createAddRequiredMemoryFencesPass();
17+
void initializeAddRequiredMemoryFencesPass(llvm::PassRegistry&);
18+
}

IGC/Compiler/Legalizer/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ include_directories("${CMAKE_CURRENT_SOURCE_DIR}")
1010

1111

1212
set(IGC_BUILD__SRC__Legalizer
13+
"${CMAKE_CURRENT_SOURCE_DIR}/AddRequiredMemoryFences.cpp"
1314
"${CMAKE_CURRENT_SOURCE_DIR}/InstLegalChecker.cpp"
1415
"${CMAKE_CURRENT_SOURCE_DIR}/InstPromoter.cpp"
1516
"${CMAKE_CURRENT_SOURCE_DIR}/TypeLegalizer.cpp"
@@ -18,6 +19,7 @@ set(IGC_BUILD__SRC__Legalizer
1819
set(IGC_BUILD__SRC__Compiler_Legalizer ${IGC_BUILD__SRC__Legalizer} PARENT_SCOPE)
1920

2021
set(IGC_BUILD__HDR__Legalizer
22+
"${CMAKE_CURRENT_SOURCE_DIR}/AddRequiredMemoryFences.h"
2123
"${CMAKE_CURRENT_SOURCE_DIR}/InstLegalChecker.h"
2224
"${CMAKE_CURRENT_SOURCE_DIR}/InstPromoter.h"
2325
"${CMAKE_CURRENT_SOURCE_DIR}/TypeLegalizer.h"
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
10+
; REQUIRES: llvm-14-plus
11+
; RUN: igc_opt --opaque-pointers %s -S --platformdg2 --inputcs --igc-add-required-memory-fences | FileCheck %s
12+
13+
14+
; Test that pass added an SLM fence after the store.
15+
define void @f2(ptr addrspace(3) %address, i1 %cond) {
16+
Label-0:
17+
br i1 %cond, label %Label-True, label %Label-End
18+
Label-True:
19+
store i32 0, ptr addrspace(3) %address
20+
br label %Label-End
21+
Label-End:
22+
ret void
23+
}
24+
; CHECK-LABEL: define void @f2
25+
; CHECK-LABEL: Label-True:
26+
; CHECK-NEXT: store i32 0, ptr addrspace(3) %address
27+
; CHECK-NEXT: call void @llvm.genx.GenISA.LSCFence(i32 3, i32 0, i32 0)
28+
; CHECK-NOT: call void @llvm.genx.GenISA.LSCFence({{.*}})
29+
; CHECK: ret void
30+
31+
; Test that pass added an SLM fence in the last basic block (common post dominator)
32+
define void @f3(ptr addrspace(3) %address, i1 %cond) {
33+
Label-0:
34+
store i32 0, ptr addrspace(3) %address
35+
br i1 %cond, label %Label-True, label %Label-End
36+
Label-True:
37+
store i32 0, ptr addrspace(3) %address
38+
br label %Label-End
39+
Label-End:
40+
ret void
41+
}
42+
; CHECK-LABEL: define void @f3
43+
; CHECK-NOT: call void @llvm.genx.GenISA.LSCFence({{.*}})
44+
; CHECK-LABEL: Label-End:
45+
; CHECK-NEXT: call void @llvm.genx.GenISA.LSCFence(i32 3, i32 0, i32 0)
46+
; CHECK-NOT: call void @llvm.genx.GenISA.LSCFence({{.*}})
47+
; CHECK: ret void
48+
49+
; Test that pass made no changes, all stores are fenced.
50+
define void @f4(ptr addrspace(3) %address, i1 %cond) {
51+
Label-0:
52+
br i1 %cond, label %Label-True, label %Label-End
53+
Label-True:
54+
store i32 0, ptr addrspace(3) %address
55+
br label %Label-End
56+
Label-End:
57+
call void @llvm.genx.GenISA.LSCFence(i32 3, i32 0, i32 0)
58+
ret void
59+
}
60+
; CHECK-LABEL: define void @f4
61+
; CHECK-LABEL: Label-True:
62+
; CHECK: store i32 0, ptr addrspace(3) %address
63+
; CHECK-NOT: call void @llvm.genx.GenISA.LSCFence({{.*}})
64+
; CHECK-LABEL: Label-End:
65+
; CHECK: call void @llvm.genx.GenISA.LSCFence(i32 3, i32 0, i32 0)
66+
; CHECK-NOT: call void @llvm.genx.GenISA.LSCFence({{.*}})
67+
; CHECK: ret void
68+
69+
; Function Attrs: convergent nounwind
70+
declare void @llvm.genx.GenISA.LSCFence(i32, i32, i32) #2
71+
; Function Attrs: argmemonly nounwind
72+
declare i32 @llvm.genx.GenISA.intatomicraw.i32.p3(ptr addrspace(3), i32, i32, i32) #3
73+
74+
attributes #1 = { "null-pointer-is-valid"="true" }
75+
attributes #2 = { convergent nounwind }
76+
attributes #3 = { argmemonly nounwind }
77+
attributes #4 = { nounwind }

0 commit comments

Comments
 (0)