Skip to content

Commit a034a5a

Browse files
ichenkaiigcbot
authored andcommitted
Extended unroll optimization to new core
Extended unroll optimization to new core.
1 parent 7e71fc9 commit a034a5a

File tree

6 files changed

+109
-88
lines changed

6 files changed

+109
-88
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7993,6 +7993,9 @@ void EmitPass::emitSampleInstruction(SampleIntrinsic* inst)
79937993
bool zeroLOD = m_currShader->m_Platform->supportSampleAndLd_lz() && inst->ZeroLOD() &&
79947994
!m_currShader->m_Platform->WaDisableSampleLz();
79957995

7996+
ModuleMetaData* modMD = getAnalysis<MetaDataUtilsWrapper>().getModuleMetaData();
7997+
auto& predicationMap = modMD->predicationMap;
7998+
79967999
ResourceLoop(resource, sampler, [&](CVariable* flag, CVariable*& destination,
79978000
ResourceDescriptor resource, bool needLoop) {
79988001

@@ -8003,13 +8006,20 @@ void EmitPass::emitSampleInstruction(SampleIntrinsic* inst)
80038006
destination->GetNumberElement() * 2, ISA_TYPE_HF, EALIGN_GRF, false, CName::NONE);
80048007
}
80058008

8006-
if (m_currShader->m_Platform->getWATable().Wa_22011157800 && !IGC_IS_FLAG_DISABLED(DiableWaSamplerNoMask))
8009+
if (m_currShader->m_Platform->needWaSamplerNoMask())
80078010
{
80088011
m_encoder->SetNoMask();
80098012
}
80108013
else
80118014
{
8012-
m_encoder->SetPredicate(flag);
8015+
if (predicationMap.count(inst))
8016+
{
8017+
m_encoder->SetPredicate(m_currShader->GetSymbol(cast<Instruction>(predicationMap[inst])));
8018+
}
8019+
else
8020+
{
8021+
m_encoder->SetPredicate(flag);
8022+
}
80138023
}
80148024
m_encoder->Sample(
80158025
opCode,
@@ -19082,6 +19092,9 @@ void EmitPass::emitLSCVectorLoad(Instruction* inst,
1908219092

1908319093
eOffset = BroadcastIfUniform(eOffset);
1908419094

19095+
ModuleMetaData* modMD = getAnalysis<MetaDataUtilsWrapper>().getModuleMetaData();
19096+
auto& predicationMap = modMD->predicationMap;
19097+
1908519098
SamplerDescriptor sampler;
1908619099
ResourceLoop(resource, sampler, [&](CVariable* flag, CVariable*& destination,
1908719100
ResourceDescriptor resource, bool needLoop) {
@@ -19138,7 +19151,14 @@ void EmitPass::emitLSCVectorLoad(Instruction* inst,
1913819151
dVisaTy, (uint16_t)eltOffBytes, (uint16_t)nbelts);
1913919152
}
1914019153

19141-
m_encoder->SetPredicate(IGC_IS_FLAG_ENABLED(UseVMaskPredicateForLoads) ? GetCombinedVMaskPred(flag) : flag);
19154+
if (predicationMap.count(inst))
19155+
{
19156+
m_encoder->SetPredicate(m_currShader->GetSymbol(cast<Instruction>(predicationMap[inst])));
19157+
}
19158+
else
19159+
{
19160+
m_encoder->SetPredicate(IGC_IS_FLAG_ENABLED(UseVMaskPredicateForLoads) ? GetCombinedVMaskPred(flag) : flag);
19161+
}
1914219162

1914319163
VectorMessage::MESSAGE_KIND messageType = VecMessInfo.insts[i].kind;
1914419164
IGC_ASSERT_MESSAGE(

IGC/Compiler/CISACodeGen/Platform.hpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1444,9 +1444,17 @@ bool hasBarrierControlFlowOpt() const
14441444
return enabled;
14451445
}
14461446

1447+
bool needWaSamplerNoMask() const
1448+
{
1449+
return m_WaTable.Wa_22011157800 && !IGC_IS_FLAG_DISABLED(DiableWaSamplerNoMask);
1450+
}
1451+
14471452
bool hasSlowSameSBIDLoad() const
14481453
{
1449-
return isCoreChildOf(IGFX_XE2_HPG_CORE);
1454+
bool bYes = false;
1455+
bYes = isCoreChildOf(IGFX_XE_HPG_CORE);
1456+
1457+
return bYes && !needWaSamplerNoMask();
14501458
}
14511459

14521460
bool canDoMultipleLineMOVOpt() const

IGC/Compiler/CISACodeGen/ResourceLoopUnroll.cpp

Lines changed: 29 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ bool ResourceLoopUnroll::emitResourceLoop(llvm::CallInst* CI)
138138
LLVM3DBuilder<> builder(context, platform);
139139

140140
auto createResLoopIter = [&builder, this]
141-
(Instruction* inst, BasicBlock* checkBB, BasicBlock* sendBB, BasicBlock* nextBB, BasicBlock* exitBB)
141+
(Instruction* inst, BasicBlock* checkBB, BasicBlock* nextBB, BasicBlock* exitBB)
142142
{
143143
Value* resource = nullptr;
144144
Value* sampler = nullptr;
@@ -176,9 +176,8 @@ bool ResourceLoopUnroll::emitResourceLoop(llvm::CallInst* CI)
176176
textureNew = texture;
177177
pairTextureNew = pairTexture;
178178

179-
// initially, they are true in CreateICmpEQ
180-
Value* textureCond = builder.getTrue();
181-
Value* samplerCond = builder.getTrue();
179+
Value* textureCond = nullptr;
180+
Value* samplerCond = nullptr;
182181

183182
// need care about pairTexture uniform???
184183
if (!m_WIAnalysis->isUniform(pairTexture))
@@ -211,20 +210,32 @@ bool ResourceLoopUnroll::emitResourceLoop(llvm::CallInst* CI)
211210
samplerCond = builder.CreateICmpEQ(sampler, samplerNew);
212211
}
213212

214-
// if textureNew == uniform && samplerNew == uniform
215-
cond = builder.CreateAnd(textureCond, samplerCond);
213+
if (textureCond && samplerCond)
214+
{
215+
// if textureNew == uniform && samplerNew == uniform
216+
cond = builder.CreateAnd(textureCond, samplerCond);
217+
}
218+
else if (textureCond)
219+
{
220+
cond = textureCond;
221+
}
222+
else if (samplerCond)
223+
{
224+
cond = samplerCond;
225+
}
216226
}
217227

218-
builder.CreateCondBr(cond, sendBB, nextBB);
228+
llvm::Instruction* predSendInstr = inst->clone();
229+
SetResourceOperand(predSendInstr, resourceNew, pairTextureNew, textureNew, samplerNew);
230+
predSendInstr->setName("resLoopSubIterSend");
231+
builder.Insert(predSendInstr);
232+
233+
// add the cmp/instruction combo to our predication map
234+
m_pCodeGenContext->getModuleMetaData()->predicationMap[predSendInstr] = cond;
219235

220-
// Fill sendBB
221-
builder.SetInsertPoint(sendBB);
222-
llvm::Instruction* clonedSend = inst->clone();
223-
SetResourceOperand(clonedSend, resourceNew, pairTextureNew, textureNew, samplerNew);
224-
clonedSend->setName("resLoopSubIterSend");
225-
builder.Insert(clonedSend);
226-
builder.CreateBr(exitBB);
227-
return clonedSend;
236+
builder.CreateCondBr(cond, exitBB, nextBB);
237+
238+
return predSendInstr;
228239
};
229240

230241
//////////////////////////////////////////////////////////////////////////
@@ -243,13 +254,13 @@ bool ResourceLoopUnroll::emitResourceLoop(llvm::CallInst* CI)
243254
{
244255
// Basicblocks for loop
245256
BasicBlock* partialCheckBB = BasicBlock::Create(context, "partial_check", BB->getParent(), before);
246-
BasicBlock* partialSendBB = BasicBlock::Create(context, "partial_send", BB->getParent(), before);
247257

248-
auto send = createResLoopIter(CI, partialCheckBB, partialSendBB, before, mergeBB);
258+
auto send = createResLoopIter(CI, partialCheckBB, before, mergeBB);
249259

250-
PN->addIncoming(send, partialSendBB);
260+
PN->addIncoming(send, partialCheckBB);
251261
before = partialCheckBB;
252262
}
263+
253264
// latch goes back to last created BB, which actually will be first BB due to ordering of creating and "before" poitner
254265
builder.SetInsertPoint(latch);
255266
builder.CreateBr(before);

IGC/Compiler/tests/ResourceLoopUnroll/ResourceloopUnrollNestedLsc.ll

Lines changed: 18 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -22,47 +22,39 @@ define spir_kernel void @test1(i32 %src1, i32 %val, i32 addrspace(1)* %dst) {
2222
; CHECK-NEXT: [[NONUNIFORM:%.*]] = zext i16 [[SVN]] to i32
2323
; CHECK-NEXT: [[NONUNIFORMRESOURCE:%.*]] = inttoptr i32 [[NONUNIFORM]] to <4 x float> addrspace(2621440)*
2424
; CHECK-NEXT: [[OFFSET:%.*]] = add i32 [[SRC1:%.*]], 1
25-
; CHECK-NEXT: br label [[PARTIAL_CHECK7:%.*]]
26-
; CHECK: partial_check7:
25+
; CHECK-NEXT: br label [[PARTIAL_CHECK5:%.*]]
26+
; CHECK: partial_check5:
2727
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0)
2828
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP1]])
29-
; CHECK-NEXT: [[FIRSTACTIVERES9:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP2]], i32 0)
30-
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES9]]
31-
; CHECK-NEXT: br i1 [[TMP3]], label [[PARTIAL_SEND8:%.*]], label [[PARTIAL_CHECK4:%.*]]
32-
; CHECK: partial_send8:
33-
; CHECK-NEXT: [[TMP4:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES9]], i32 [[OFFSET]], i32 4, i1 false)
34-
; CHECK-NEXT: br label [[UNROLL_MERGE:%.*]]
35-
; CHECK: partial_check4:
29+
; CHECK-NEXT: [[FIRSTACTIVERES6:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP2]], i32 0)
30+
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES6]]
31+
; CHECK-NEXT: [[TMP4:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES6]], i32 [[OFFSET]], i32 4, i1 false)
32+
; CHECK-NEXT: br i1 [[TMP3]], label [[UNROLL_MERGE:%.*]], label [[PARTIAL_CHECK3:%.*]]
33+
; CHECK: partial_check3:
3634
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0)
3735
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP5]])
38-
; CHECK-NEXT: [[FIRSTACTIVERES6:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP6]], i32 0)
39-
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES6]]
40-
; CHECK-NEXT: br i1 [[TMP7]], label [[PARTIAL_SEND5:%.*]], label [[PARTIAL_CHECK1:%.*]]
41-
; CHECK: partial_send5:
42-
; CHECK-NEXT: [[TMP8:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES6]], i32 [[OFFSET]], i32 4, i1 false)
43-
; CHECK-NEXT: br label [[UNROLL_MERGE]]
36+
; CHECK-NEXT: [[FIRSTACTIVERES4:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP6]], i32 0)
37+
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES4]]
38+
; CHECK-NEXT: [[TMP8:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES4]], i32 [[OFFSET]], i32 4, i1 false)
39+
; CHECK-NEXT: br i1 [[TMP7]], label [[UNROLL_MERGE]], label [[PARTIAL_CHECK1:%.*]]
4440
; CHECK: partial_check1:
4541
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0)
4642
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP9]])
47-
; CHECK-NEXT: [[FIRSTACTIVERES3:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP10]], i32 0)
48-
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES3]]
49-
; CHECK-NEXT: br i1 [[TMP11]], label [[PARTIAL_SEND2:%.*]], label [[PARTIAL_CHECK:%.*]]
50-
; CHECK: partial_send2:
51-
; CHECK-NEXT: [[TMP12:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES3]], i32 [[OFFSET]], i32 4, i1 false)
52-
; CHECK-NEXT: br label [[UNROLL_MERGE]]
43+
; CHECK-NEXT: [[FIRSTACTIVERES2:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP10]], i32 0)
44+
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES2]]
45+
; CHECK-NEXT: [[TMP12:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES2]], i32 [[OFFSET]], i32 4, i1 false)
46+
; CHECK-NEXT: br i1 [[TMP11]], label [[UNROLL_MERGE]], label [[PARTIAL_CHECK:%.*]]
5347
; CHECK: partial_check:
5448
; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0)
5549
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP13]])
5650
; CHECK-NEXT: [[FIRSTACTIVERES:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP14]], i32 0)
5751
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES]]
58-
; CHECK-NEXT: br i1 [[TMP15]], label [[PARTIAL_SEND:%.*]], label [[LATCH:%.*]]
59-
; CHECK: partial_send:
6052
; CHECK-NEXT: [[TMP16:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES]], i32 [[OFFSET]], i32 4, i1 false)
61-
; CHECK-NEXT: br label [[UNROLL_MERGE]]
53+
; CHECK-NEXT: br i1 [[TMP15]], label [[UNROLL_MERGE]], label [[LATCH:%.*]]
6254
; CHECK: latch:
63-
; CHECK-NEXT: br label [[PARTIAL_CHECK7]]
55+
; CHECK-NEXT: br label [[PARTIAL_CHECK5]]
6456
; CHECK: unroll-merge:
65-
; CHECK-NEXT: [[TMP17:%.*]] = phi <3 x i32> [ [[TMP16]], [[PARTIAL_SEND]] ], [ [[TMP12]], [[PARTIAL_SEND2]] ], [ [[TMP8]], [[PARTIAL_SEND5]] ], [ [[TMP4]], [[PARTIAL_SEND8]] ], !MyUniqueExclusiveLoadMetadata !24
57+
; CHECK-NEXT: [[TMP17:%.*]] = phi <3 x i32> [ [[TMP16]], [[PARTIAL_CHECK]] ], [ [[TMP12]], [[PARTIAL_CHECK1]] ], [ [[TMP8]], [[PARTIAL_CHECK3]] ], [ [[TMP4]], [[PARTIAL_CHECK5]] ], !MyUniqueExclusiveLoadMetadata !24
6658
; CHECK-NEXT: [[OUT:%.*]] = extractelement <3 x i32> [[TMP17]], i32 [[VAL:%.*]]
6759
; CHECK-NEXT: store i32 [[OUT]], i32 addrspace(1)* [[DST:%.*]], align 1
6860
; CHECK-NEXT: ret void

0 commit comments

Comments
 (0)