Skip to content

Commit 9e2fefd

Browse files
ichenkaiigcbot
authored andcommitted
Revert the swap of last loop of load and goto in ResourceLoop
The swap of load and goto in last loop in ResourceLoop Unroll seems no benefit and could cause unexpected side effect.
1 parent e56ec07 commit 9e2fefd

File tree

3 files changed

+19
-51
lines changed

3 files changed

+19
-51
lines changed

IGC/Compiler/CISACodeGen/ResourceLoopUnroll.cpp

Lines changed: 5 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ bool ResourceLoopUnroll::emitResourceLoop(llvm::CallInst* CI)
138138
LLVM3DBuilder<> builder(context, platform);
139139

140140
auto createResLoopIter = [&builder, this]
141-
(Instruction* inst, BasicBlock* checkBB, BasicBlock* sendBB, BasicBlock* nextBB, BasicBlock* exitBB)
141+
(Instruction* inst, BasicBlock* checkBB, BasicBlock* nextBB, BasicBlock* exitBB)
142142
{
143143
Value* resource = nullptr;
144144
Value* sampler = nullptr;
@@ -223,38 +223,16 @@ bool ResourceLoopUnroll::emitResourceLoop(llvm::CallInst* CI)
223223
IGC_ASSERT(0);
224224
}
225225

226-
// Here we swap the last loop load and goto, such as
227-
// From
228-
// (P89) lsc_load.ugm.ca.ca(M1, 16) V1395:d32x3 bss(firstActiveRes)[V1385] : a32 /// $1953
229-
// (!P89) goto (M1, 16) ___realTimePathTracingRayGeneration__YAXXZ_093_partial_check1736 /// $1954
230-
// To
231-
// (!P89) goto (M1, 16) ___realTimePathTracingRayGeneration__YAXXZ_093_partial_check1736 /// $1954
232-
// (P89) lsc_load.ugm.ca.ca(M1, 16) V1395:d32x3 bss(firstActiveRes)[V1385] : a32 /// $1953
233-
// However, as CreateCondBr is generating terminator, we put the last send into a BB.
234-
// Without swapping, each iteration, the load is loading some channels.
235-
if (sendBB)
236-
{
237-
builder.CreateCondBr(cond, sendBB, nextBB);
238-
builder.SetInsertPoint(sendBB);
239-
}
240-
241226
llvm::Instruction* predSendInstr = inst->clone();
242227
SetResourceOperand(predSendInstr, resourceNew, pairTextureNew, textureNew, samplerNew);
243228
predSendInstr->setName("resLoopSubIterSend");
244229
builder.Insert(predSendInstr);
245230

246-
if (sendBB)
247-
{
248-
builder.CreateBr(exitBB);
249-
}
250-
else
251-
{
252-
builder.CreateCondBr(cond, exitBB, nextBB);
253-
}
254-
255231
// add the cmp/instruction combo to our predication map
256232
m_pCodeGenContext->getModuleMetaData()->predicationMap[predSendInstr] = cond;
257233

234+
builder.CreateCondBr(cond, exitBB, nextBB);
235+
258236
return predSendInstr;
259237
};
260238

@@ -275,12 +253,10 @@ bool ResourceLoopUnroll::emitResourceLoop(llvm::CallInst* CI)
275253
{
276254
// Basicblocks for loop
277255
BasicBlock* partialCheckBB = BasicBlock::Create(context, "partial_check", BB->getParent(), before);
278-
// Since it's created from the end, the i == 0 is the last loop
279-
BasicBlock* lastSendBB = (i == 0) ? BasicBlock::Create(context, "last_send", BB->getParent(), before) : nullptr;
280256

281-
send = createResLoopIter(CI, partialCheckBB, lastSendBB, before, mergeBB);
257+
send = createResLoopIter(CI, partialCheckBB, before, mergeBB);
282258

283-
PN->addIncoming(send, lastSendBB ? lastSendBB : partialCheckBB);
259+
PN->addIncoming(send, partialCheckBB);
284260
before = partialCheckBB;
285261
}
286262

IGC/Compiler/tests/ResourceLoopUnroll/ResourceloopUnrollNestedLsc.ll

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,12 @@ define spir_kernel void @test1(i32 %src1, i32 %val, i32 addrspace(1)* %dst) {
4949
; CHECK-LL-NEXT: [[TMP14:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP13]])
5050
; CHECK-LL-NEXT: [[FIRSTACTIVERES:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP14]], i32 0)
5151
; CHECK-LL-NEXT: [[TMP15:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES]]
52-
; CHECK-LL-NEXT: br i1 [[TMP15]], label [[LAST_SEND:%.*]], label [[LATCH:%.*]]
53-
; CHECK-LL: last_send:
5452
; CHECK-LL-NEXT: [[TMP16:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES]], i32 [[OFFSET]], i32 4, i1 false)
55-
; CHECK-LL-NEXT: br label [[UNROLL_MERGE]]
53+
; CHECK-LL-NEXT: br i1 [[TMP15]], label [[UNROLL_MERGE]], label [[LATCH:%.*]]
5654
; CHECK-LL: latch:
5755
; CHECK-LL-NEXT: br label [[PARTIAL_CHECK5]]
5856
; CHECK-LL: unroll-merge:
59-
; CHECK-LL-NEXT: [[TMP17:%.*]] = phi <3 x i32> [ [[TMP16]], [[LAST_SEND]] ], [ [[TMP12]], [[PARTIAL_CHECK1]] ], [ [[TMP8]], [[PARTIAL_CHECK3]] ], [ [[TMP4]], [[PARTIAL_CHECK5]] ], !MyUniqueExclusiveLoadMetadata !24
57+
; CHECK-LL-NEXT: [[TMP17:%.*]] = phi <3 x i32> [ [[TMP16]], [[PARTIAL_CHECK]] ], [ [[TMP12]], [[PARTIAL_CHECK1]] ], [ [[TMP8]], [[PARTIAL_CHECK3]] ], [ [[TMP4]], [[PARTIAL_CHECK5]] ], !MyUniqueExclusiveLoadMetadata !24
6058
; CHECK-LL-NEXT: [[OUT:%.*]] = extractelement <3 x i32> [[TMP17]], i32 [[VAL:%.*]]
6159
; CHECK-LL-NEXT: store i32 [[OUT]], i32 addrspace(1)* [[DST:%.*]], align 1
6260
; CHECK-LL-NEXT: ret void
@@ -79,7 +77,7 @@ define spir_kernel void @test1(i32 %src1, i32 %val, i32 addrspace(1)* %dst) {
7977
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveRes6(0,0)<1> r[A0(0),0]<0;1,0>:ud
8078
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P2 nonuniform_0(0,0)<1;1,0> firstActiveRes6(0,0)<0;1,0>
8179
; CHECK-VISAASM-NEXT: (P2) lsc_load.ugm.ca.ca (M1, 16) V0032:d32x3 bss(firstActiveRes6)[offset]:a32
82-
; CHECK-VISAASM-NEXT: (P2) goto (M1, 16) _test1_007_unroll_merge
80+
; CHECK-VISAASM-NEXT: (P2) goto (M1, 16) _test1_006_unroll_merge
8381
;
8482
; CHECK-VISAASM: _test1_002_partial_check3:
8583
; CHECK-VISAASM-NEXT: setp (M1_NM, 16) P3 0x0:ud
@@ -92,7 +90,7 @@ define spir_kernel void @test1(i32 %src1, i32 %val, i32 addrspace(1)* %dst) {
9290
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveRes4(0,0)<1> r[A1(0),0]<0;1,0>:ud
9391
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P4 nonuniform_0(0,0)<1;1,0> firstActiveRes4(0,0)<0;1,0>
9492
; CHECK-VISAASM-NEXT: (P4) lsc_load.ugm.ca.ca (M1, 16) V0032:d32x3 bss(firstActiveRes4)[offset]:a32
95-
; CHECK-VISAASM-NEXT: (P4) goto (M1, 16) _test1_007_unroll_merge
93+
; CHECK-VISAASM-NEXT: (P4) goto (M1, 16) _test1_006_unroll_merge
9694
;
9795
; CHECK-VISAASM: _test1_003_partial_check1:
9896
; CHECK-VISAASM-NEXT: setp (M1_NM, 16) P5 0x0:ud
@@ -105,7 +103,7 @@ define spir_kernel void @test1(i32 %src1, i32 %val, i32 addrspace(1)* %dst) {
105103
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveRes2(0,0)<1> r[A2(0),0]<0;1,0>:ud
106104
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P6 nonuniform_0(0,0)<1;1,0> firstActiveRes2(0,0)<0;1,0>
107105
; CHECK-VISAASM-NEXT: (P6) lsc_load.ugm.ca.ca (M1, 16) V0032:d32x3 bss(firstActiveRes2)[offset]:a32
108-
; CHECK-VISAASM-NEXT: (P6) goto (M1, 16) _test1_007_unroll_merge
106+
; CHECK-VISAASM-NEXT: (P6) goto (M1, 16) _test1_006_unroll_merge
109107
;
110108
; CHECK-VISAASM: _test1_004_partial_check:
111109
; CHECK-VISAASM-NEXT: setp (M1_NM, 16) P7 0x0:ud
@@ -117,12 +115,10 @@ define spir_kernel void @test1(i32 %src1, i32 %val, i32 addrspace(1)* %dst) {
117115
; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A3(0)<1> &nonuniform_0 ShuffleTmp_2(0,0)<0;1,0>
118116
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveRes(0,0)<1> r[A3(0),0]<0;1,0>:ud
119117
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P8 nonuniform_0(0,0)<1;1,0> firstActiveRes(0,0)<0;1,0>
120-
; CHECK-VISAASM-NEXT: (!P8) goto (M1, 16) _test1_001_partial_check5
121-
;
122-
; CHECK-VISAASM: _test1_005_last_send:
123118
; CHECK-VISAASM-NEXT: (P8) lsc_load.ugm.ca.ca (M1, 16) V0032:d32x3 bss(firstActiveRes)[offset]:a32
119+
; CHECK-VISAASM-NEXT: (!P8) goto (M1, 16) _test1_001_partial_check5
124120
;
125-
; CHECK-VISAASM: _test1_007_unroll_merge:
121+
; CHECK-VISAASM: _test1_006_unroll_merge:
126122
; CHECK-VISAASM-NEXT: mul (M1_NM, 1) V0061(0,0)<1> val_0(0,0)<0;1,0> 0x40:uw
127123
; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A4(0)<1> &V0032 V0061(0,0)<0;1,0>
128124
; CHECK-VISAASM-NEXT: mov (M1, 16) out(0,0)<1> r[A4(0),0]<8;8,1>:d

IGC/Compiler/tests/ResourceLoopUnroll/ResourceloopUnrollNestedSampler.ll

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,12 @@ define spir_kernel void @test1(<64 x i32> %src, float addrspace(1)* %dst) {
5353
; CHECK-LL-NEXT: [[TMP14:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP13]])
5454
; CHECK-LL-NEXT: [[FIRSTACTIVESAMPLER:%.*]] = call <4 x float> addrspace(2752518)* @llvm.genx.GenISA.WaveShuffleIndex.p2752518v4f32(<4 x float> addrspace(2752518)* [[NONUNIFORMSAMPLER]], i32 [[TMP14]], i32 0)
5555
; CHECK-LL-NEXT: [[TMP15:%.*]] = icmp eq <4 x float> addrspace(2752518)* [[NONUNIFORMSAMPLER]], [[FIRSTACTIVESAMPLER]]
56-
; CHECK-LL-NEXT: br i1 [[TMP15]], label [[LAST_SEND:%.*]], label [[LATCH:%.*]]
57-
; CHECK-LL: last_send:
5856
; CHECK-LL-NEXT: [[TMP16:%.*]] = tail call fast <4 x float> @llvm.genx.GenISA.sampleLptr.v4f32.f32.p2621443__2D_DIM_Resource.p2621443__2D_DIM_Resource.p2752518v4f32(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, %__2D_DIM_Resource.0 addrspace(2621450)* undef, %__2D_DIM_Resource.0 addrspace(2621450)* [[NONUNIFORMTEXTURE]], <4 x float> addrspace(2752518)* [[FIRSTACTIVESAMPLER]], i32 0, i32 0, i32 0)
59-
; CHECK-LL-NEXT: br label [[UNROLL_MERGE]]
57+
; CHECK-LL-NEXT: br i1 [[TMP15]], label [[UNROLL_MERGE]], label [[LATCH:%.*]]
6058
; CHECK-LL: latch:
6159
; CHECK-LL-NEXT: br label [[PARTIAL_CHECK5]]
6260
; CHECK-LL: unroll-merge:
63-
; CHECK-LL-NEXT: [[TMP17:%.*]] = phi <4 x float> [ [[TMP16]], [[LAST_SEND]] ], [ [[TMP12]], [[PARTIAL_CHECK1]] ], [ [[TMP8]], [[PARTIAL_CHECK3]] ], [ [[TMP4]], [[PARTIAL_CHECK5]] ], !MyUniqueExclusiveLoadMetadata !24
61+
; CHECK-LL-NEXT: [[TMP17:%.*]] = phi <4 x float> [ [[TMP16]], [[PARTIAL_CHECK]] ], [ [[TMP12]], [[PARTIAL_CHECK1]] ], [ [[TMP8]], [[PARTIAL_CHECK3]] ], [ [[TMP4]], [[PARTIAL_CHECK5]] ], !MyUniqueExclusiveLoadMetadata !24
6462
; CHECK-LL-NEXT: [[OUT:%.*]] = extractelement <4 x float> [[TMP17]], i32 0
6563
; CHECK-LL-NEXT: store float [[OUT]], float addrspace(1)* [[DST:%.*]], align 4
6664
; CHECK-LL-NEXT: ret void
@@ -86,7 +84,7 @@ define spir_kernel void @test1(<64 x i32> %src, float addrspace(1)* %dst) {
8684
; CHECK-VISAASM-NEXT: movs (M1_NM, 1) S31(0) firstActiveSampler6(0,0)<0;1,0>
8785
; CHECK-VISAASM-NEXT: movs (M1_NM, 1) %bss(0) texture(0,0)<0;1,0>
8886
; CHECK-VISAASM-NEXT: (P2) sample_lz.RGBA (M1, 16) 0x0:uw S31 %bss V0032.0 %null.0 V0040.0
89-
; CHECK-VISAASM-NEXT: (P2) goto (M1, 16) _test1_007_unroll_merge
87+
; CHECK-VISAASM-NEXT: (P2) goto (M1, 16) _test1_006_unroll_merge
9088
;
9189
; CHECK-VISAASM: _test1_002_partial_check3:
9290
; CHECK-VISAASM-NEXT: setp (M1_NM, 16) P3 0x0:ud
@@ -102,7 +100,7 @@ define spir_kernel void @test1(<64 x i32> %src, float addrspace(1)* %dst) {
102100
; CHECK-VISAASM-NEXT: movs (M1_NM, 1) S31(0) firstActiveSampler4(0,0)<0;1,0>
103101
; CHECK-VISAASM-NEXT: movs (M1_NM, 1) %bss(0) texture(0,0)<0;1,0>
104102
; CHECK-VISAASM-NEXT: (P4) sample_lz.RGBA (M1, 16) 0x0:uw S31 %bss V0032.0 %null.0 V0048.0
105-
; CHECK-VISAASM-NEXT: (P4) goto (M1, 16) _test1_007_unroll_merge
103+
; CHECK-VISAASM-NEXT: (P4) goto (M1, 16) _test1_006_unroll_merge
106104
;
107105
; CHECK-VISAASM: _test1_003_partial_check1:
108106
; CHECK-VISAASM-NEXT: setp (M1_NM, 16) P5 0x0:ud
@@ -118,7 +116,7 @@ define spir_kernel void @test1(<64 x i32> %src, float addrspace(1)* %dst) {
118116
; CHECK-VISAASM-NEXT: movs (M1_NM, 1) S31(0) firstActiveSampler2(0,0)<0;1,0>
119117
; CHECK-VISAASM-NEXT: movs (M1_NM, 1) %bss(0) texture(0,0)<0;1,0>
120118
; CHECK-VISAASM-NEXT: (P6) sample_lz.RGBA (M1, 16) 0x0:uw S31 %bss V0032.0 %null.0 V0056.0
121-
; CHECK-VISAASM-NEXT: (P6) goto (M1, 16) _test1_007_unroll_merge
119+
; CHECK-VISAASM-NEXT: (P6) goto (M1, 16) _test1_006_unroll_merge
122120
;
123121
; CHECK-VISAASM: _test1_004_partial_check:
124122
; CHECK-VISAASM-NEXT: setp (M1_NM, 16) P7 0x0:ud
@@ -130,15 +128,13 @@ define spir_kernel void @test1(<64 x i32> %src, float addrspace(1)* %dst) {
130128
; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A3(0)<1> &sampler_0 ShuffleTmp_2(0,0)<0;1,0>
131129
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveSampler(0,0)<1> r[A3(0),0]<0;1,0>:ud
132130
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P8 sampler_0(0,0)<1;1,0> firstActiveSampler(0,0)<0;1,0>
133-
; CHECK-VISAASM-NEXT: (!P8) goto (M1, 16) _test1_001_partial_check5
134-
;
135-
; CHECK-VISAASM: _test1_005_last_send:
136131
; CHECK-VISAASM-NEXT: mov (M1, 16) V0064(0,0)<1> 0x0:f
137132
; CHECK-VISAASM-NEXT: movs (M1_NM, 1) S31(0) firstActiveSampler(0,0)<0;1,0>
138133
; CHECK-VISAASM-NEXT: movs (M1_NM, 1) %bss(0) texture(0,0)<0;1,0>
139134
; CHECK-VISAASM-NEXT: (P8) sample_lz.RGBA (M1, 16) 0x0:uw S31 %bss V0032.0 %null.0 V0064.0
135+
; CHECK-VISAASM-NEXT: (!P8) goto (M1, 16) _test1_001_partial_check5
140136
;
141-
; CHECK-VISAASM: _test1_007_unroll_merge:
137+
; CHECK-VISAASM: _test1_006_unroll_merge:
142138
; CHECK-VISAASM-NEXT: mov (M1, 16) out(0,0)<1> V0032(0,0)<1;1,0>
143139
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) dst_0(0,0)<1> dst(0,0)<0;1,0>
144140
; CHECK-VISAASM-NEXT: mov (M1, 16) dstBroadcast_0(0,0)<2> dst_1(0,0)<0;1,0>

0 commit comments

Comments
 (0)