Skip to content

Commit 02e8acf

Browse files
agrabezhigcbot
authored andcommitted
Enable BCR for kernels with low register pressure
Adding IGCRegisterPressurePublisher pass to make register pressure estimation available in CISABuilder. Enable BCR for kernels with low register pressure.
1 parent 48d72f9 commit 02e8acf

File tree

8 files changed

+145
-5
lines changed

8 files changed

+145
-5
lines changed

IGC/Compiler/CISACodeGen/CISABuilder.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4042,7 +4042,13 @@ void CEncoder::InitVISABuilderOptions(TARGET_PLATFORM VISAPlatform, bool canAbor
40424042
if (IGC_IS_FLAG_ENABLED(EnableBCR)) {
40434043
SaveOption(vISA_enableBCR, true);
40444044
}
4045-
if (context->type == ShaderType::OPENCL_SHADER && m_program->m_Platform->limitedBCR()) {
4045+
4046+
auto funcInfoMD = context->getMetaDataUtils()->getFunctionsInfoItem(m_program->entry);
4047+
uint32_t MaxRegPressure = funcInfoMD->getMaxRegPressure()->getMaxPressure();
4048+
uint32_t RegPressureThreshold = (uint32_t) (context->getNumGRFPerThread(true) * 0.6);
4049+
4050+
if (context->type == ShaderType::OPENCL_SHADER &&
4051+
(m_program->m_Platform->limitedBCR() || MaxRegPressure < RegPressureThreshold)) {
40464052
SaveOption(vISA_enableBCR, true);
40474053
}
40484054
if (context->type == ShaderType::OPENCL_SHADER && m_program->m_Platform->supportDpasInstruction()) {

IGC/Compiler/CISACodeGen/IGCLivenessAnalysis.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -572,3 +572,41 @@ bool IGCRegisterPressurePrinter::runOnFunction(llvm::Function &F) {
572572

573573
return false;
574574
}
575+
576+
577+
char IGCRegisterPressurePublisher::ID = 0;
578+
// Register pass to igc-opt
579+
#define PASS_FLAG3 "igc-pressure-publisher"
580+
#define PASS_DESCRIPTION3 "puts metadata with register pressure estimation"
581+
#define PASS_CFG_ONLY1 false
582+
#define PASS_ANALYSIS1 false
583+
IGC_INITIALIZE_PASS_BEGIN(IGCRegisterPressurePublisher, PASS_FLAG3, PASS_DESCRIPTION3, PASS_CFG_ONLY1, PASS_ANALYSIS1)
584+
IGC_INITIALIZE_PASS_DEPENDENCY(WIAnalysis)
585+
IGC_INITIALIZE_PASS_DEPENDENCY(CodeGenContextWrapper)
586+
IGC_INITIALIZE_PASS_DEPENDENCY(IGCFunctionExternalRegPressureAnalysis)
587+
IGC_INITIALIZE_PASS_DEPENDENCY(IGCLivenessAnalysis)
588+
IGC_INITIALIZE_PASS_END(IGCRegisterPressurePublisher, PASS_FLAG3, PASS_DESCRIPTION3, PASS_CFG_ONLY1, PASS_ANALYSIS1)
589+
590+
IGCRegisterPressurePublisher::IGCRegisterPressurePublisher() : FunctionPass(ID) {
591+
initializeIGCRegisterPressurePublisherPass(*PassRegistry::getPassRegistry());
592+
};
593+
594+
bool IGCRegisterPressurePublisher::runOnFunction(llvm::Function &F) {
595+
596+
ExternalPressure = getAnalysis<IGCFunctionExternalRegPressureAnalysis>().getExternalPressureForFunction(&F);
597+
RPE = &getAnalysis<IGCLivenessAnalysis>();
598+
WI = &getAnalysis<WIAnalysis>();
599+
CGCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
600+
MaxPressureInFunction = 0;
601+
602+
unsigned int SIMD = numLanes(RPE->bestGuessSIMDSize(&F));
603+
604+
// if we have some published metadata already don't do anything
605+
bool NotPublishedAlready = RPE->checkPublishRegPressureMetadata(F) == 0;
606+
607+
if (NotPublishedAlready) {
608+
MaxPressureInFunction = RPE->getMaxRegCountForFunction(F, SIMD, &WI->Runner);
609+
RPE->publishRegPressureMetadata(F, MaxPressureInFunction + ExternalPressure);
610+
}
611+
return true;
612+
}

IGC/Compiler/CISACodeGen/IGCLivenessAnalysis.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,19 @@ class IGCLivenessAnalysis : public llvm::FunctionPass, public IGCLivenessAnalysi
8888
if (MDUtils->findFunctionsInfoItem(&F) != MDUtils->end_FunctionsInfo()) {
8989
IGC::IGCMD::FunctionInfoMetaDataHandle funcInfoMD = MDUtils->getFunctionsInfoItem(&F);
9090
funcInfoMD->getMaxRegPressure()->setMaxPressure(MaxPressure);
91+
MDUtils->save(F.getContext());
9192
}
9293
}
9394

95+
unsigned checkPublishRegPressureMetadata(llvm::Function &F) {
96+
unsigned Result = 0;
97+
if (MDUtils->findFunctionsInfoItem(&F) != MDUtils->end_FunctionsInfo()) {
98+
IGC::IGCMD::FunctionInfoMetaDataHandle funcInfoMD = MDUtils->getFunctionsInfoItem(&F);
99+
Result = funcInfoMD->getMaxRegPressure()->getMaxPressure();
100+
}
101+
return Result;
102+
}
103+
94104
unsigned int getMaxRegCountForBB(llvm::BasicBlock &BB, unsigned int SIMD, WIAnalysisRunner *WI = nullptr) {
95105
InsideBlockPressureMap PressureMap;
96106
collectPressureForBB(BB, PressureMap, SIMD, WI);
@@ -269,6 +279,7 @@ class IGCRegisterPressurePrinter : public llvm::FunctionPass {
269279
virtual bool runOnFunction(llvm::Function &F) override;
270280
virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
271281
AU.setPreservesAll();
282+
AU.addRequired<MetaDataUtilsWrapper>();
272283
AU.addRequired<IGCLivenessAnalysis>();
273284
AU.addRequired<CodeGenContextWrapper>();
274285
AU.addRequired<WIAnalysis>();
@@ -279,4 +290,30 @@ class IGCRegisterPressurePrinter : public llvm::FunctionPass {
279290
static char ID;
280291
};
281292

293+
294+
class IGCRegisterPressurePublisher : public llvm::FunctionPass {
295+
296+
IGCLivenessAnalysis *RPE = nullptr;
297+
WIAnalysis *WI = nullptr;
298+
CodeGenContext *CGCtx = nullptr;
299+
300+
unsigned int ExternalPressure = 0;
301+
unsigned int MaxPressureInFunction = 0;
302+
303+
public:
304+
llvm::StringRef getPassName() const override { return "IGCRegPressurePublisher"; }
305+
virtual ~IGCRegisterPressurePublisher() {}
306+
virtual bool runOnFunction(llvm::Function &F) override;
307+
virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
308+
AU.setPreservesAll();
309+
AU.addRequired<MetaDataUtilsWrapper>();
310+
AU.addRequired<IGCLivenessAnalysis>();
311+
AU.addRequired<CodeGenContextWrapper>();
312+
AU.addRequired<WIAnalysis>();
313+
AU.addRequired<IGCFunctionExternalRegPressureAnalysis>();
314+
}
315+
IGCRegisterPressurePublisher();
316+
static char ID;
317+
};
318+
282319
}; // namespace IGC

IGC/Compiler/CISACodeGen/ShaderCodeGen.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,12 @@ void AddAnalysisPasses(CodeGenContext &ctx, IGCPassManager &mpm) {
365365
mpm.add(createCleanPHINodePass());
366366
if (IGC_IS_FLAG_SET(DumpRegPressureEstimate))
367367
mpm.add(new IGCRegisterPressurePrinter("final"));
368+
369+
// save RPE results in metadata
370+
if (ctx.type == ShaderType::OPENCL_SHADER) {
371+
mpm.add(new IGCRegisterPressurePublisher());
372+
}
373+
368374
// Let Layout be the last pass before Emit Pass
369375
mpm.add(new Layout());
370376
if(IGC_IS_FLAG_ENABLED(EnableDropTargetBBs)) {

IGC/Compiler/InitializePasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ void initializeRematChainsAnalysisPass(llvm::PassRegistry &);
270270
void initializeVectorShuffleAnalysisPass(llvm::PassRegistry &);
271271
void initializeIGCLivenessAnalysisPass(llvm::PassRegistry &);
272272
void initializeIGCRegisterPressurePrinterPass(llvm::PassRegistry &);
273+
void initializeIGCRegisterPressurePublisherPass(llvm::PassRegistry &);
273274
void initializeIGCVectorizerPass(llvm::PassRegistry &);
274275
void initializeIGCFunctionExternalRegPressureAnalysisPass(llvm::PassRegistry &);
275276
void initializePromoteConstantStructsPass(llvm::PassRegistry &);

IGC/Compiler/MetaDataApi/MetaDataApi.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ FunctionInfoMetaData::FunctionInfoMetaData(const llvm::MDNode *pNode, bool hasId
306306
m_ThreadGroupSize(new ThreadGroupSizeMetaData(getNamedNode(pNode, "thread_group_size"), true)),
307307
m_ThreadGroupSizeHint(new ThreadGroupSizeMetaData(getNamedNode(pNode, "thread_group_size_hint"), true)),
308308
m_SubGroupSize(new SubGroupSizeMetaData(getNamedNode(pNode, "sub_group_size"), true)),
309-
m_MaxRegPressure(new MaxRegPressureMetaData(getNamedNode(pNode, "max_reg_pressure"), true)),
309+
m_MaxRegPressure(new MaxRegPressureMetaDataHandle::ObjectType("max_reg_pressure")),
310310
m_OpenCLVectorTypeHint(new VectorTypeHintMetaData(getNamedNode(pNode, "opencl_vec_type_hint"), true)),
311311
m_pNode(pNode) {}
312312

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; REQUIRES: llvm-14-plus, regkeys
10+
; RUN: igc_opt --opaque-pointers -igc-pressure-publisher -S %s | FileCheck %s
11+
; RUN: igc_opt --typed-pointers -igc-pressure-publisher -S %s | FileCheck %s
12+
13+
; Test checks that RegisterPressurePublisher writes max_reg_pressure in metadata
14+
15+
; CHECK: "max_reg_pressure", i32 {{[0-9]+}}
16+
17+
define spir_kernel void @testNoUnif(float addrspace(1)* %out, float addrspace(1)* %in, <8 x i32> %r0, <8 x i32> %payloadHeader, <3 x i32> %localSize, i16 %localIdX, i16 %localIdY, i16 %localIdZ, i32 %bufferOffset, i32 %bufferOffset1) {
18+
entry:
19+
%0 = extractelement <3 x i32> %localSize, i64 0
20+
%1 = extractelement <3 x i32> %localSize, i64 1
21+
%localIdZ2 = zext i16 %localIdZ to i32
22+
%mul.i.i = mul i32 %1, %localIdZ2
23+
%localIdY4 = zext i16 %localIdY to i32
24+
%add.i.i = add i32 %mul.i.i, %localIdY4
25+
%mul4.i.i = mul i32 %0, %add.i.i
26+
%localIdX6 = zext i16 %localIdX to i32
27+
%add6.i.i = add i32 %mul4.i.i, %localIdX6
28+
%conv.i = zext i32 %add6.i.i to i64
29+
%arrayidx = getelementptr inbounds float, float addrspace(1)* %in, i64 %conv.i
30+
%2 = load float, float addrspace(1)* %arrayidx, align 4
31+
%arrayidx1 = getelementptr inbounds float, float addrspace(1)* %out, i64 %conv.i
32+
store float %2, float addrspace(1)* %arrayidx1, align 4
33+
ret void
34+
}
35+
36+
!igc.functions = !{!1}
37+
38+
!1 = !{void (float addrspace(1)*, float addrspace(1)*, <8 x i32>, <8 x i32>, <3 x i32>, i16, i16, i16, i32, i32)* @testNoUnif, !2}
39+
!2 = !{!3, !4, !5}
40+
!3 = !{!"function_type", i32 0}
41+
!4 = !{!"implicit_arg_desc", !7, !8, !9, !10, !11, !12, !13, !15}
42+
!5 = !{!"thread_group_size", i32 16, i32 32, i32 32}
43+
!7 = !{i32 0}
44+
!8 = !{i32 1}
45+
!9 = !{i32 6}
46+
!10 = !{i32 8}
47+
!11 = !{i32 9}
48+
!12 = !{i32 10}
49+
!13 = !{i32 15, !14}
50+
!14 = !{!"explicit_arg_num", i32 0}
51+
!15 = !{i32 15, !16}
52+
!16 = !{!"explicit_arg_num", i32 1}

IGC/ocloc_tests/retry/final_test_pytorch3.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,14 @@
1010
; RUN: ocloc compile -llvm_input -file %t.bc -device mtl -options "-igc_opts 'DisableCodeScheduling=1,VISAOptions=-asmToConsole'" &> %t_output.ll
1111
; RUN: FileCheck --input-file %t_output.ll %s
1212

13-
; CHECK://.kernel _ZTSN2at15AtenIpexTypeXPU4impl35FastGroupRadixSortImplKernelFunctorIbxLi1024ELb0ELb1EtLi32ELi4ENS0_19GroupRadixProcesserIbLi1024ELi32ELi4ELb0EttjLi4EEEbEE
14-
; CHECK://.spill size {{([5-6][0-9]{3})}}
15-
; CHECK: end of thread
13+
; This test checks that after kernel recompilation there is no more spills
1614

1715
; CHECK://.kernel _ZTSN2at15AtenIpexTypeXPU4impl35FastGroupRadixSortImplKernelFunctorIbxLi1024ELb0ELb1EtLi32ELi4ENS0_19GroupRadixProcesserIbLi1024ELi32ELi4ELb0EttjLi4EEEbEE
1816
; CHECK-NOT://.spill size
1917
; CHECK: end of thread
2018

19+
; CHECK: warning: [RetryManager] Start recompilation of the kernel
20+
2121
; ModuleID = 'reduced.ll'
2222
source_filename = "reduced.ll"
2323

0 commit comments

Comments
 (0)