Skip to content

Commit f477776

Browse files
ichenkaiigcbot
authored andcommitted
Update resource loop nested lit tests
Update resource loop nested lit tests.
1 parent ec97e42 commit f477776

File tree

4 files changed

+283
-100
lines changed

4 files changed

+283
-100
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8117,6 +8117,7 @@ void EmitPass::emitSampleInstruction(SampleIntrinsic* inst)
81178117
if (predicationMap.count(inst))
81188118
{
81198119
m_encoder->SetPredicate(m_currShader->GetSymbol(cast<Instruction>(predicationMap[inst])));
8120+
m_encoder->Lifetime(LIFETIME_START, dst);
81208121
}
81218122
else
81228123
{
@@ -19481,6 +19482,7 @@ void EmitPass::emitLSCVectorLoad(Instruction* inst,
1948119482
if (predicationMap.count(inst))
1948219483
{
1948319484
m_encoder->SetPredicate(m_currShader->GetSymbol(cast<Instruction>(predicationMap[inst])));
19485+
m_encoder->Lifetime(LIFETIME_START, destCVar);
1948419486
}
1948519487
else
1948619488
{

IGC/Compiler/CISACodeGen/ResourceLoopUnroll.cpp

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ bool ResourceLoopUnroll::emitResourceLoop(llvm::CallInst* CI)
138138
LLVM3DBuilder<> builder(context, platform);
139139

140140
auto createResLoopIter = [&builder, this]
141-
(Instruction* inst, BasicBlock* checkBB, BasicBlock* nextBB, BasicBlock* exitBB)
141+
(Instruction* inst, BasicBlock* checkBB, BasicBlock* sendBB, BasicBlock* nextBB, BasicBlock* exitBB)
142142
{
143143
Value* resource = nullptr;
144144
Value* sampler = nullptr;
@@ -225,16 +225,38 @@ bool ResourceLoopUnroll::emitResourceLoop(llvm::CallInst* CI)
225225
}
226226
}
227227

228+
// Here we swap the last loop load and goto, such as
229+
// From
230+
// (P89) lsc_load.ugm.ca.ca(M1, 16) V1395:d32x3 bss(firstActiveRes)[V1385] : a32 /// $1953
231+
// (!P89) goto (M1, 16) ___realTimePathTracingRayGeneration__YAXXZ_093_partial_check1736 /// $1954
232+
// To
233+
// (!P89) goto (M1, 16) ___realTimePathTracingRayGeneration__YAXXZ_093_partial_check1736 /// $1954
234+
// (P89) lsc_load.ugm.ca.ca(M1, 16) V1395:d32x3 bss(firstActiveRes)[V1385] : a32 /// $1953
235+
// However, as CreateCondBr is generating terminator, we put the last send into a BB.
236+
// Without swapping, each iteration, the load is loading some channels.
237+
if (sendBB)
238+
{
239+
builder.CreateCondBr(cond, sendBB, nextBB);
240+
builder.SetInsertPoint(sendBB);
241+
}
242+
228243
llvm::Instruction* predSendInstr = inst->clone();
229244
SetResourceOperand(predSendInstr, resourceNew, pairTextureNew, textureNew, samplerNew);
230245
predSendInstr->setName("resLoopSubIterSend");
231246
builder.Insert(predSendInstr);
232247

248+
if (sendBB)
249+
{
250+
builder.CreateBr(exitBB);
251+
}
252+
else
253+
{
254+
builder.CreateCondBr(cond, exitBB, nextBB);
255+
}
256+
233257
// add the cmp/instruction combo to our predication map
234258
m_pCodeGenContext->getModuleMetaData()->predicationMap[predSendInstr] = cond;
235259

236-
builder.CreateCondBr(cond, exitBB, nextBB);
237-
238260
return predSendInstr;
239261
};
240262

@@ -254,10 +276,12 @@ bool ResourceLoopUnroll::emitResourceLoop(llvm::CallInst* CI)
254276
{
255277
// Basicblocks for loop
256278
BasicBlock* partialCheckBB = BasicBlock::Create(context, "partial_check", BB->getParent(), before);
279+
// Since it's created from the end, the i == 0 is the last loop
280+
BasicBlock* lastSendBB = (i == 0) ? BasicBlock::Create(context, "last_send", BB->getParent(), before) : nullptr;
257281

258-
auto send = createResLoopIter(CI, partialCheckBB, before, mergeBB);
282+
auto send = createResLoopIter(CI, partialCheckBB, lastSendBB, before, mergeBB);
259283

260-
PN->addIncoming(send, partialCheckBB);
284+
PN->addIncoming(send, lastSendBB ? lastSendBB : partialCheckBB);
261285
before = partialCheckBB;
262286
}
263287

IGC/Compiler/tests/ResourceLoopUnroll/ResourceloopUnrollNestedLsc.ll

Lines changed: 120 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -7,62 +7,138 @@
77
;
88
;============================ end_copyright_notice =============================
99
; REQUIRES: llvm-14-plus, regkeys
10-
; RUN: igc_opt -platformbmg -igc-resource-loop-unroll -regkey ResourceLoopUnrollNested=4 -verify -S < %s | FileCheck %s
10+
; RUN: igc_opt -platformbmg -igc-resource-loop-unroll -regkey ResourceLoopUnrollNested=4 -verify -S < %s | FileCheck %s --check-prefix=CHECK-LL
11+
; RUN: igc_opt -platformbmg -igc-resource-loop-unroll -igc-emit-visa -simd-mode 16 -inputrt -regkey ResourceLoopUnrollNested=4 -regkey DumpVISAASMToConsole -S < %s | FileCheck %s --check-prefix=CHECK-VISAASM
1112
;
1213
; Test checks how we emit ResourceLoop
1314

14-
1515
@ThreadGroupSize_X = constant i32 64
1616
@ThreadGroupSize_Y = constant i32 1
1717
@ThreadGroupSize_Z = constant i32 1
1818

1919
define spir_kernel void @test1(i32 %src1, i32 %val, i32 addrspace(1)* %dst) {
20-
; CHECK-LABEL: @test1(
21-
; CHECK-NEXT: [[SVN:%.*]] = call i16 @llvm.genx.GenISA.DCL.SystemValue.i16(i32 17)
22-
; CHECK-NEXT: [[NONUNIFORM:%.*]] = zext i16 [[SVN]] to i32
23-
; CHECK-NEXT: [[NONUNIFORMRESOURCE:%.*]] = inttoptr i32 [[NONUNIFORM]] to <4 x float> addrspace(2621440)*
24-
; CHECK-NEXT: [[OFFSET:%.*]] = add i32 [[SRC1:%.*]], 1
25-
; CHECK-NEXT: br label [[PARTIAL_CHECK5:%.*]]
26-
; CHECK: partial_check5:
27-
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0)
28-
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP1]])
29-
; CHECK-NEXT: [[FIRSTACTIVERES6:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP2]], i32 0)
30-
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES6]]
31-
; CHECK-NEXT: [[TMP4:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES6]], i32 [[OFFSET]], i32 4, i1 false)
32-
; CHECK-NEXT: br i1 [[TMP3]], label [[UNROLL_MERGE:%.*]], label [[PARTIAL_CHECK3:%.*]]
33-
; CHECK: partial_check3:
34-
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0)
35-
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP5]])
36-
; CHECK-NEXT: [[FIRSTACTIVERES4:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP6]], i32 0)
37-
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES4]]
38-
; CHECK-NEXT: [[TMP8:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES4]], i32 [[OFFSET]], i32 4, i1 false)
39-
; CHECK-NEXT: br i1 [[TMP7]], label [[UNROLL_MERGE]], label [[PARTIAL_CHECK1:%.*]]
40-
; CHECK: partial_check1:
41-
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0)
42-
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP9]])
43-
; CHECK-NEXT: [[FIRSTACTIVERES2:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP10]], i32 0)
44-
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES2]]
45-
; CHECK-NEXT: [[TMP12:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES2]], i32 [[OFFSET]], i32 4, i1 false)
46-
; CHECK-NEXT: br i1 [[TMP11]], label [[UNROLL_MERGE]], label [[PARTIAL_CHECK:%.*]]
47-
; CHECK: partial_check:
48-
; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0)
49-
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP13]])
50-
; CHECK-NEXT: [[FIRSTACTIVERES:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP14]], i32 0)
51-
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES]]
52-
; CHECK-NEXT: [[TMP16:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES]], i32 [[OFFSET]], i32 4, i1 false)
53-
; CHECK-NEXT: br i1 [[TMP15]], label [[UNROLL_MERGE]], label [[LATCH:%.*]]
54-
; CHECK: latch:
55-
; CHECK-NEXT: br label [[PARTIAL_CHECK5]]
56-
; CHECK: unroll-merge:
57-
; CHECK-NEXT: [[TMP17:%.*]] = phi <3 x i32> [ [[TMP16]], [[PARTIAL_CHECK]] ], [ [[TMP12]], [[PARTIAL_CHECK1]] ], [ [[TMP8]], [[PARTIAL_CHECK3]] ], [ [[TMP4]], [[PARTIAL_CHECK5]] ], !MyUniqueExclusiveLoadMetadata !24
58-
; CHECK-NEXT: [[OUT:%.*]] = extractelement <3 x i32> [[TMP17]], i32 [[VAL:%.*]]
59-
; CHECK-NEXT: store i32 [[OUT]], i32 addrspace(1)* [[DST:%.*]], align 1
60-
; CHECK-NEXT: ret void
20+
; CHECK-LL-LABEL: @test1(
21+
; CHECK-LL: [[SVN:%.*]] = call i16 @llvm.genx.GenISA.DCL.SystemValue.i16(i32 17)
22+
; CHECK-LL-NEXT: [[NONUNIFORM:%.*]] = zext i16 [[SVN]] to i32
23+
; CHECK-LL-NEXT: [[NONUNIFORMRESOURCE:%.*]] = inttoptr i32 [[NONUNIFORM]] to <4 x float> addrspace(2621440)*
24+
; CHECK-LL-NEXT: [[OFFSET:%.*]] = add i32 [[SRC1:%.*]], %nonuniform
25+
; CHECK-LL-NEXT: br label [[PARTIAL_CHECK5:%.*]]
26+
; CHECK-LL: partial_check5:
27+
; CHECK-LL-NEXT: [[TMP1:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0)
28+
; CHECK-LL-NEXT: [[TMP2:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP1]])
29+
; CHECK-LL-NEXT: [[FIRSTACTIVERES6:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP2]], i32 0)
30+
; CHECK-LL-NEXT: [[TMP3:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES6]]
31+
; CHECK-LL-NEXT: [[TMP4:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES6]], i32 [[OFFSET]], i32 4, i1 false)
32+
; CHECK-LL-NEXT: br i1 [[TMP3]], label [[UNROLL_MERGE:%.*]], label [[PARTIAL_CHECK3:%.*]]
33+
; CHECK-LL: partial_check3:
34+
; CHECK-LL-NEXT: [[TMP5:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0)
35+
; CHECK-LL-NEXT: [[TMP6:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP5]])
36+
; CHECK-LL-NEXT: [[FIRSTACTIVERES4:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP6]], i32 0)
37+
; CHECK-LL-NEXT: [[TMP7:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES4]]
38+
; CHECK-LL-NEXT: [[TMP8:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES4]], i32 [[OFFSET]], i32 4, i1 false)
39+
; CHECK-LL-NEXT: br i1 [[TMP7]], label [[UNROLL_MERGE:%.*]], label [[PARTIAL_CHECK1:%.*]]
40+
; CHECK-LL: partial_check1:
41+
; CHECK-LL-NEXT: [[TMP9:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0)
42+
; CHECK-LL-NEXT: [[TMP10:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP9]])
43+
; CHECK-LL-NEXT: [[FIRSTACTIVERES2:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP10]], i32 0)
44+
; CHECK-LL-NEXT: [[TMP11:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES2]]
45+
; CHECK-LL-NEXT: [[TMP12:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES2]], i32 [[OFFSET]], i32 4, i1 false)
46+
; CHECK-LL-NEXT: br i1 [[TMP11]], label [[UNROLL_MERGE:%.*]], label [[PARTIAL_CHECK:%.*]]
47+
; CHECK-LL: partial_check:
48+
; CHECK-LL-NEXT: [[TMP13:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0)
49+
; CHECK-LL-NEXT: [[TMP14:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP13]])
50+
; CHECK-LL-NEXT: [[FIRSTACTIVERES:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP14]], i32 0)
51+
; CHECK-LL-NEXT: [[TMP15:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES]]
52+
; CHECK-LL-NEXT: br i1 [[TMP15]], label [[LAST_SEND:%.*]], label [[LATCH:%.*]]
53+
; CHECK-LL: last_send:
54+
; CHECK-LL-NEXT: [[TMP16:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES]], i32 [[OFFSET]], i32 4, i1 false)
55+
; CHECK-LL-NEXT: br label [[UNROLL_MERGE]]
56+
; CHECK-LL: latch:
57+
; CHECK-LL-NEXT: br label [[PARTIAL_CHECK5]]
58+
; CHECK-LL: unroll-merge:
59+
; CHECK-LL-NEXT: [[TMP17:%.*]] = phi <3 x i32> [ [[TMP16]], [[LAST_SEND]] ], [ [[TMP12]], [[PARTIAL_CHECK1]] ], [ [[TMP8]], [[PARTIAL_CHECK3]] ], [ [[TMP4]], [[PARTIAL_CHECK5]] ], !MyUniqueExclusiveLoadMetadata !24
60+
; CHECK-LL-NEXT: [[OUT:%.*]] = extractelement <3 x i32> [[TMP17]], i32 [[VAL:%.*]]
61+
; CHECK-LL-NEXT: store i32 [[OUT]], i32 addrspace(1)* [[DST:%.*]], align 1
62+
; CHECK-LL-NEXT: ret void
63+
;
64+
; COM: check predicate load and lifetime.start
65+
; CHECK-VISAASM: _main_0:
66+
; CHECK-VISAASM-NEXT: mov (M1, 16) svn(0,0)<1> threadIdInGroupX(0,0)<1;1,0>
67+
; CHECK-VISAASM-NEXT: mov (M1, 16) nonuniform(0,0)<1> svn_0(0,0)<1;1,0>
68+
; CHECK-VISAASM-NEXT: add (M1, 16) offset(0,0)<1> src1(0,0)<0;1,0> nonuniform(0,0)<1;1,0>
69+
;
70+
; CHECK-VISAASM: _test1_001_partial_check5:
71+
; CHECK-VISAASM-NEXT: setp (M1_NM, 16) P1 0x0:ud
72+
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P1 V0034(0,0)<0;1,0> V0034(0,0)<0;1,0>
73+
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0035(0,0)<1> P1
74+
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0033(0,0)<1> V0035(0,0)<0;1,0>
75+
; CHECK-VISAASM-NEXT: fbl (M1_NM, 1) V0037(0,0)<1> V0033(0,0)<0;1,0>
76+
; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp(0,0)<1> V0038(0,0)<0;1,0> 0x2:uw
77+
; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A0(0)<1> &nonuniform_0 ShuffleTmp(0,0)<0;1,0>
78+
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveRes6(0,0)<1> r[A0(0),0]<0;1,0>:ud
79+
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P2 nonuniform_0(0,0)<1;1,0> firstActiveRes6(0,0)<0;1,0>
80+
; CHECK-VISAASM-NEXT: lifetime.start V0039
81+
; CHECK-VISAASM-NEXT: (P2) lsc_load.ugm.ca.ca (M1, 16) V0039:d32x3 bss(firstActiveRes6)[offset]:a32
82+
; CHECK-VISAASM-NEXT: (P2) goto (M1, 16) _test1_007_unroll_merge
83+
;
84+
; CHECK-VISAASM: _test1_002_partial_check3:
85+
; CHECK-VISAASM-NEXT: setp (M1_NM, 16) P3 0x0:ud
86+
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P3 V0042(0,0)<0;1,0> V0042(0,0)<0;1,0>
87+
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0043(0,0)<1> P3
88+
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0041(0,0)<1> V0043(0,0)<0;1,0>
89+
; CHECK-VISAASM-NEXT: fbl (M1_NM, 1) V0045(0,0)<1> V0041(0,0)<0;1,0>
90+
; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp_0(0,0)<1> V0046(0,0)<0;1,0> 0x2:uw
91+
; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A1(0)<1> &nonuniform_0 ShuffleTmp_0(0,0)<0;1,0>
92+
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveRes4(0,0)<1> r[A1(0),0]<0;1,0>:ud
93+
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P4 nonuniform_0(0,0)<1;1,0> firstActiveRes4(0,0)<0;1,0>
94+
; CHECK-VISAASM-NEXT: lifetime.start V0039
95+
; CHECK-VISAASM-NEXT: (P4) lsc_load.ugm.ca.ca (M1, 16) V0039:d32x3 bss(firstActiveRes4)[offset]:a32
96+
; CHECK-VISAASM-NEXT: (P4) goto (M1, 16) _test1_007_unroll_merge
97+
;
98+
; CHECK-VISAASM: _test1_003_partial_check1:
99+
; CHECK-VISAASM-NEXT: setp (M1_NM, 16) P5 0x0:ud
100+
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P5 V0049(0,0)<0;1,0> V0049(0,0)<0;1,0>
101+
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0050(0,0)<1> P5
102+
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0048(0,0)<1> V0050(0,0)<0;1,0>
103+
; CHECK-VISAASM-NEXT: fbl (M1_NM, 1) V0052(0,0)<1> V0048(0,0)<0;1,0>
104+
; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp_1(0,0)<1> V0053(0,0)<0;1,0> 0x2:uw
105+
; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A2(0)<1> &nonuniform_0 ShuffleTmp_1(0,0)<0;1,0>
106+
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveRes2(0,0)<1> r[A2(0),0]<0;1,0>:ud
107+
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P6 nonuniform_0(0,0)<1;1,0> firstActiveRes2(0,0)<0;1,0>
108+
; CHECK-VISAASM-NEXT: lifetime.start V0039
109+
; CHECK-VISAASM-NEXT: (P6) lsc_load.ugm.ca.ca (M1, 16) V0039:d32x3 bss(firstActiveRes2)[offset]:a32
110+
; CHECK-VISAASM-NEXT: (P6) goto (M1, 16) _test1_007_unroll_merge
111+
;
112+
; CHECK-VISAASM: _test1_004_partial_check:
113+
; CHECK-VISAASM-NEXT: setp (M1_NM, 16) P7 0x0:ud
114+
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P7 V0056(0,0)<0;1,0> V0056(0,0)<0;1,0>
115+
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0057(0,0)<1> P7
116+
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0055(0,0)<1> V0057(0,0)<0;1,0>
117+
; CHECK-VISAASM-NEXT: fbl (M1_NM, 1) V0059(0,0)<1> V0055(0,0)<0;1,0>
118+
; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp_2(0,0)<1> V0060(0,0)<0;1,0> 0x2:uw
119+
; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A3(0)<1> &nonuniform_0 ShuffleTmp_2(0,0)<0;1,0>
120+
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveRes(0,0)<1> r[A3(0),0]<0;1,0>:ud
121+
; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P8 nonuniform_0(0,0)<1;1,0> firstActiveRes(0,0)<0;1,0>
122+
; CHECK-VISAASM-NEXT: (!P8) goto (M1, 16) _test1_001_partial_check5
61123
;
124+
; CHECK-VISAASM: _test1_005_last_send:
125+
; CHECK-VISAASM-NEXT: lifetime.start V0039
126+
; CHECK-VISAASM-NEXT: (P8) lsc_load.ugm.ca.ca (M1, 16) V0039:d32x3 bss(firstActiveRes)[offset]:a32
127+
;
128+
; CHECK-VISAASM: _test1_007_unroll_merge:
129+
; CHECK-VISAASM-NEXT: mul (M1_NM, 1) V0061(0,0)<1> val_0(0,0)<0;1,0> 0x40:uw
130+
; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A4(0)<1> &V0039 V0061(0,0)<0;1,0>
131+
; CHECK-VISAASM-NEXT: mov (M1, 16) out(0,0)<1> r[A4(0),0]<8;8,1>:d
132+
; CHECK-VISAASM-NEXT: mov (M1_NM, 1) dst_0(0,0)<1> dst(0,0)<0;1,0>
133+
; CHECK-VISAASM-NEXT: mov (M1, 16) dstBroadcast_0(0,0)<2> dst_1(0,0)<0;1,0>
134+
; CHECK-VISAASM-NEXT: mov (M1, 16) dstBroadcast_0(0,1)<2> dst_1(0,1)<0;1,0>
135+
; CHECK-VISAASM-NEXT: lsc_store.ugm.wb.wb (M1, 16) flat[dstBroadcast]:a64 out:d32
136+
; CHECK-VISAASM-NEXT: ret (M1, 1)
137+
62138
%svn = call i16 @llvm.genx.GenISA.DCL.SystemValue.i16(i32 17)
63139
%nonuniform = zext i16 %svn to i32
64140
%NonUniformResource = inttoptr i32 %nonuniform to <4 x float> addrspace(2621440)*
65-
%offset = add i32 %src1, 1
141+
%offset = add i32 %src1, %nonuniform
66142

67143
%call = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* %NonUniformResource, i32 %offset, i32 4, i1 false)
68144

@@ -71,8 +147,6 @@ define spir_kernel void @test1(i32 %src1, i32 %val, i32 addrspace(1)* %dst) {
71147
ret void
72148
}
73149

74-
75-
76150
declare <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)*, i32, i32, i1) #4
77151

78152
declare i16 @llvm.genx.GenISA.DCL.SystemValue.i16(i32) #1
@@ -84,7 +158,6 @@ declare i32 @llvm.genx.GenISA.firstbitLo(i32)
84158

85159
attributes #4 = { argmemonly nounwind readonly }
86160

87-
88161
!IGCMetadata = !{!0}
89162
!igc.functions = !{!21}
90163

@@ -112,4 +185,3 @@ attributes #4 = { argmemonly nounwind readonly }
112185
!21 = !{void (i32, i32, i32 addrspace(1)*)* @test1, !22}
113186
!22 = !{!23}
114187
!23 = !{!"function_type", i32 0}
115-

0 commit comments

Comments
 (0)