77;
88;============================ end_copyright_notice =============================
99; REQUIRES: llvm-14-plus, regkeys
10- ; RUN: igc_opt -platformbmg -igc-resource-loop-unroll -regkey ResourceLoopUnrollNested=4 -verify -S < %s | FileCheck %s
10+ ; RUN: igc_opt -platformbmg -igc-resource-loop-unroll -regkey ResourceLoopUnrollNested=4 -verify -S < %s | FileCheck %s --check-prefix=CHECK-LL
11+ ; RUN: igc_opt -platformbmg -igc-resource-loop-unroll -igc-emit-visa -simd-mode 16 -inputrt -regkey ResourceLoopUnrollNested=4 -regkey DumpVISAASMToConsole -S < %s | FileCheck %s --check-prefix=CHECK-VISAASM
1112;
1213; Test checks how we emit ResourceLoop
1314
14-
1515@ThreadGroupSize_X = constant i32 64
1616@ThreadGroupSize_Y = constant i32 1
1717@ThreadGroupSize_Z = constant i32 1
1818
1919define spir_kernel void @test1 (i32 %src1 , i32 %val , i32 addrspace (1 )* %dst ) {
20- ; CHECK-LABEL: @test1(
21- ; CHECK-NEXT: [[SVN:%.*]] = call i16 @llvm.genx.GenISA.DCL.SystemValue.i16(i32 17)
22- ; CHECK-NEXT: [[NONUNIFORM:%.*]] = zext i16 [[SVN]] to i32
23- ; CHECK-NEXT: [[NONUNIFORMRESOURCE:%.*]] = inttoptr i32 [[NONUNIFORM]] to <4 x float> addrspace(2621440)*
24- ; CHECK-NEXT: [[OFFSET:%.*]] = add i32 [[SRC1:%.*]], 1
25- ; CHECK-NEXT: br label [[PARTIAL_CHECK5:%.*]]
26- ; CHECK: partial_check5:
27- ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0)
28- ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP1]])
29- ; CHECK-NEXT: [[FIRSTACTIVERES6:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP2]], i32 0)
30- ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES6]]
31- ; CHECK-NEXT: [[TMP4:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES6]], i32 [[OFFSET]], i32 4, i1 false)
32- ; CHECK-NEXT: br i1 [[TMP3]], label [[UNROLL_MERGE:%.*]], label [[PARTIAL_CHECK3:%.*]]
33- ; CHECK: partial_check3:
34- ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0)
35- ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP5]])
36- ; CHECK-NEXT: [[FIRSTACTIVERES4:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP6]], i32 0)
37- ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES4]]
38- ; CHECK-NEXT: [[TMP8:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES4]], i32 [[OFFSET]], i32 4, i1 false)
39- ; CHECK-NEXT: br i1 [[TMP7]], label [[UNROLL_MERGE]], label [[PARTIAL_CHECK1:%.*]]
40- ; CHECK: partial_check1:
41- ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0)
42- ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP9]])
43- ; CHECK-NEXT: [[FIRSTACTIVERES2:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP10]], i32 0)
44- ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES2]]
45- ; CHECK-NEXT: [[TMP12:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES2]], i32 [[OFFSET]], i32 4, i1 false)
46- ; CHECK-NEXT: br i1 [[TMP11]], label [[UNROLL_MERGE]], label [[PARTIAL_CHECK:%.*]]
47- ; CHECK: partial_check:
48- ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0)
49- ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP13]])
50- ; CHECK-NEXT: [[FIRSTACTIVERES:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP14]], i32 0)
51- ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES]]
52- ; CHECK-NEXT: [[TMP16:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES]], i32 [[OFFSET]], i32 4, i1 false)
53- ; CHECK-NEXT: br i1 [[TMP15]], label [[UNROLL_MERGE]], label [[LATCH:%.*]]
54- ; CHECK: latch:
55- ; CHECK-NEXT: br label [[PARTIAL_CHECK5]]
56- ; CHECK: unroll-merge:
57- ; CHECK-NEXT: [[TMP17:%.*]] = phi <3 x i32> [ [[TMP16]], [[PARTIAL_CHECK]] ], [ [[TMP12]], [[PARTIAL_CHECK1]] ], [ [[TMP8]], [[PARTIAL_CHECK3]] ], [ [[TMP4]], [[PARTIAL_CHECK5]] ], !MyUniqueExclusiveLoadMetadata !24
58- ; CHECK-NEXT: [[OUT:%.*]] = extractelement <3 x i32> [[TMP17]], i32 [[VAL:%.*]]
59- ; CHECK-NEXT: store i32 [[OUT]], i32 addrspace(1)* [[DST:%.*]], align 1
60- ; CHECK-NEXT: ret void
20+ ; CHECK-LL-LABEL: @test1(
21+ ; CHECK-LL: [[SVN:%.*]] = call i16 @llvm.genx.GenISA.DCL.SystemValue.i16(i32 17)
22+ ; CHECK-LL-NEXT: [[NONUNIFORM:%.*]] = zext i16 [[SVN]] to i32
23+ ; CHECK-LL-NEXT: [[NONUNIFORMRESOURCE:%.*]] = inttoptr i32 [[NONUNIFORM]] to <4 x float> addrspace(2621440)*
24+ ; CHECK-LL-NEXT: [[OFFSET:%.*]] = add i32 [[SRC1:%.*]], %nonuniform
25+ ; CHECK-LL-NEXT: br label [[PARTIAL_CHECK5:%.*]]
26+ ; CHECK-LL: partial_check5:
27+ ; CHECK-LL-NEXT: [[TMP1:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0)
28+ ; CHECK-LL-NEXT: [[TMP2:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP1]])
29+ ; CHECK-LL-NEXT: [[FIRSTACTIVERES6:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP2]], i32 0)
30+ ; CHECK-LL-NEXT: [[TMP3:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES6]]
31+ ; CHECK-LL-NEXT: [[TMP4:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES6]], i32 [[OFFSET]], i32 4, i1 false)
32+ ; CHECK-LL-NEXT: br i1 [[TMP3]], label [[UNROLL_MERGE:%.*]], label [[PARTIAL_CHECK3:%.*]]
33+ ; CHECK-LL: partial_check3:
34+ ; CHECK-LL-NEXT: [[TMP5:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0)
35+ ; CHECK-LL-NEXT: [[TMP6:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP5]])
36+ ; CHECK-LL-NEXT: [[FIRSTACTIVERES4:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP6]], i32 0)
37+ ; CHECK-LL-NEXT: [[TMP7:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES4]]
38+ ; CHECK-LL-NEXT: [[TMP8:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES4]], i32 [[OFFSET]], i32 4, i1 false)
39+ ; CHECK-LL-NEXT: br i1 [[TMP7]], label [[UNROLL_MERGE:%.*]], label [[PARTIAL_CHECK1:%.*]]
40+ ; CHECK-LL: partial_check1:
41+ ; CHECK-LL-NEXT: [[TMP9:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0)
42+ ; CHECK-LL-NEXT: [[TMP10:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP9]])
43+ ; CHECK-LL-NEXT: [[FIRSTACTIVERES2:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP10]], i32 0)
44+ ; CHECK-LL-NEXT: [[TMP11:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES2]]
45+ ; CHECK-LL-NEXT: [[TMP12:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES2]], i32 [[OFFSET]], i32 4, i1 false)
46+ ; CHECK-LL-NEXT: br i1 [[TMP11]], label [[UNROLL_MERGE:%.*]], label [[PARTIAL_CHECK:%.*]]
47+ ; CHECK-LL: partial_check:
48+ ; CHECK-LL-NEXT: [[TMP13:%.*]] = call i32 @llvm.genx.GenISA.WaveBallot(i1 true, i32 0)
49+ ; CHECK-LL-NEXT: [[TMP14:%.*]] = call i32 @llvm.genx.GenISA.firstbitLo(i32 [[TMP13]])
50+ ; CHECK-LL-NEXT: [[FIRSTACTIVERES:%.*]] = call <4 x float> addrspace(2621440)* @llvm.genx.GenISA.WaveShuffleIndex.p2621440v4f32(<4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], i32 [[TMP14]], i32 0)
51+ ; CHECK-LL-NEXT: [[TMP15:%.*]] = icmp eq <4 x float> addrspace(2621440)* [[NONUNIFORMRESOURCE]], [[FIRSTACTIVERES]]
52+ ; CHECK-LL-NEXT: br i1 [[TMP15]], label [[LAST_SEND:%.*]], label [[LATCH:%.*]]
53+ ; CHECK-LL: last_send:
54+ ; CHECK-LL-NEXT: [[TMP16:%.*]] = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* [[FIRSTACTIVERES]], i32 [[OFFSET]], i32 4, i1 false)
55+ ; CHECK-LL-NEXT: br label [[UNROLL_MERGE]]
56+ ; CHECK-LL: latch:
57+ ; CHECK-LL-NEXT: br label [[PARTIAL_CHECK5]]
58+ ; CHECK-LL: unroll-merge:
59+ ; CHECK-LL-NEXT: [[TMP17:%.*]] = phi <3 x i32> [ [[TMP16]], [[LAST_SEND]] ], [ [[TMP12]], [[PARTIAL_CHECK1]] ], [ [[TMP8]], [[PARTIAL_CHECK3]] ], [ [[TMP4]], [[PARTIAL_CHECK5]] ], !MyUniqueExclusiveLoadMetadata !24
60+ ; CHECK-LL-NEXT: [[OUT:%.*]] = extractelement <3 x i32> [[TMP17]], i32 [[VAL:%.*]]
61+ ; CHECK-LL-NEXT: store i32 [[OUT]], i32 addrspace(1)* [[DST:%.*]], align 1
62+ ; CHECK-LL-NEXT: ret void
63+ ;
64+ ; COM: check predicate load and lifetime.start
65+ ; CHECK-VISAASM: _main_0:
66+ ; CHECK-VISAASM-NEXT: mov (M1, 16) svn(0,0)<1> threadIdInGroupX(0,0)<1;1,0>
67+ ; CHECK-VISAASM-NEXT: mov (M1, 16) nonuniform(0,0)<1> svn_0(0,0)<1;1,0>
68+ ; CHECK-VISAASM-NEXT: add (M1, 16) offset(0,0)<1> src1(0,0)<0;1,0> nonuniform(0,0)<1;1,0>
69+ ;
70+ ; CHECK-VISAASM: _test1_001_partial_check5:
71+ ; CHECK-VISAASM-NEXT: setp (M1_NM, 16) P1 0x0:ud
72+ ; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P1 V0034(0,0)<0;1,0> V0034(0,0)<0;1,0>
73+ ; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0035(0,0)<1> P1
74+ ; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0033(0,0)<1> V0035(0,0)<0;1,0>
75+ ; CHECK-VISAASM-NEXT: fbl (M1_NM, 1) V0037(0,0)<1> V0033(0,0)<0;1,0>
76+ ; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp(0,0)<1> V0038(0,0)<0;1,0> 0x2:uw
77+ ; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A0(0)<1> &nonuniform_0 ShuffleTmp(0,0)<0;1,0>
78+ ; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveRes6(0,0)<1> r[A0(0),0]<0;1,0>:ud
79+ ; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P2 nonuniform_0(0,0)<1;1,0> firstActiveRes6(0,0)<0;1,0>
80+ ; CHECK-VISAASM-NEXT: lifetime.start V0039
81+ ; CHECK-VISAASM-NEXT: (P2) lsc_load.ugm.ca.ca (M1, 16) V0039:d32x3 bss(firstActiveRes6)[offset]:a32
82+ ; CHECK-VISAASM-NEXT: (P2) goto (M1, 16) _test1_007_unroll_merge
83+ ;
84+ ; CHECK-VISAASM: _test1_002_partial_check3:
85+ ; CHECK-VISAASM-NEXT: setp (M1_NM, 16) P3 0x0:ud
86+ ; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P3 V0042(0,0)<0;1,0> V0042(0,0)<0;1,0>
87+ ; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0043(0,0)<1> P3
88+ ; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0041(0,0)<1> V0043(0,0)<0;1,0>
89+ ; CHECK-VISAASM-NEXT: fbl (M1_NM, 1) V0045(0,0)<1> V0041(0,0)<0;1,0>
90+ ; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp_0(0,0)<1> V0046(0,0)<0;1,0> 0x2:uw
91+ ; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A1(0)<1> &nonuniform_0 ShuffleTmp_0(0,0)<0;1,0>
92+ ; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveRes4(0,0)<1> r[A1(0),0]<0;1,0>:ud
93+ ; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P4 nonuniform_0(0,0)<1;1,0> firstActiveRes4(0,0)<0;1,0>
94+ ; CHECK-VISAASM-NEXT: lifetime.start V0039
95+ ; CHECK-VISAASM-NEXT: (P4) lsc_load.ugm.ca.ca (M1, 16) V0039:d32x3 bss(firstActiveRes4)[offset]:a32
96+ ; CHECK-VISAASM-NEXT: (P4) goto (M1, 16) _test1_007_unroll_merge
97+ ;
98+ ; CHECK-VISAASM: _test1_003_partial_check1:
99+ ; CHECK-VISAASM-NEXT: setp (M1_NM, 16) P5 0x0:ud
100+ ; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P5 V0049(0,0)<0;1,0> V0049(0,0)<0;1,0>
101+ ; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0050(0,0)<1> P5
102+ ; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0048(0,0)<1> V0050(0,0)<0;1,0>
103+ ; CHECK-VISAASM-NEXT: fbl (M1_NM, 1) V0052(0,0)<1> V0048(0,0)<0;1,0>
104+ ; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp_1(0,0)<1> V0053(0,0)<0;1,0> 0x2:uw
105+ ; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A2(0)<1> &nonuniform_0 ShuffleTmp_1(0,0)<0;1,0>
106+ ; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveRes2(0,0)<1> r[A2(0),0]<0;1,0>:ud
107+ ; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P6 nonuniform_0(0,0)<1;1,0> firstActiveRes2(0,0)<0;1,0>
108+ ; CHECK-VISAASM-NEXT: lifetime.start V0039
109+ ; CHECK-VISAASM-NEXT: (P6) lsc_load.ugm.ca.ca (M1, 16) V0039:d32x3 bss(firstActiveRes2)[offset]:a32
110+ ; CHECK-VISAASM-NEXT: (P6) goto (M1, 16) _test1_007_unroll_merge
111+ ;
112+ ; CHECK-VISAASM: _test1_004_partial_check:
113+ ; CHECK-VISAASM-NEXT: setp (M1_NM, 16) P7 0x0:ud
114+ ; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P7 V0056(0,0)<0;1,0> V0056(0,0)<0;1,0>
115+ ; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0057(0,0)<1> P7
116+ ; CHECK-VISAASM-NEXT: mov (M1_NM, 1) V0055(0,0)<1> V0057(0,0)<0;1,0>
117+ ; CHECK-VISAASM-NEXT: fbl (M1_NM, 1) V0059(0,0)<1> V0055(0,0)<0;1,0>
118+ ; CHECK-VISAASM-NEXT: shl (M1_NM, 1) ShuffleTmp_2(0,0)<1> V0060(0,0)<0;1,0> 0x2:uw
119+ ; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A3(0)<1> &nonuniform_0 ShuffleTmp_2(0,0)<0;1,0>
120+ ; CHECK-VISAASM-NEXT: mov (M1_NM, 1) firstActiveRes(0,0)<1> r[A3(0),0]<0;1,0>:ud
121+ ; CHECK-VISAASM-NEXT: cmp.eq (M1, 16) P8 nonuniform_0(0,0)<1;1,0> firstActiveRes(0,0)<0;1,0>
122+ ; CHECK-VISAASM-NEXT: (!P8) goto (M1, 16) _test1_001_partial_check5
61123;
124+ ; CHECK-VISAASM: _test1_005_last_send:
125+ ; CHECK-VISAASM-NEXT: lifetime.start V0039
126+ ; CHECK-VISAASM-NEXT: (P8) lsc_load.ugm.ca.ca (M1, 16) V0039:d32x3 bss(firstActiveRes)[offset]:a32
127+ ;
128+ ; CHECK-VISAASM: _test1_007_unroll_merge:
129+ ; CHECK-VISAASM-NEXT: mul (M1_NM, 1) V0061(0,0)<1> val_0(0,0)<0;1,0> 0x40:uw
130+ ; CHECK-VISAASM-NEXT: addr_add (M1_NM, 1) A4(0)<1> &V0039 V0061(0,0)<0;1,0>
131+ ; CHECK-VISAASM-NEXT: mov (M1, 16) out(0,0)<1> r[A4(0),0]<8;8,1>:d
132+ ; CHECK-VISAASM-NEXT: mov (M1_NM, 1) dst_0(0,0)<1> dst(0,0)<0;1,0>
133+ ; CHECK-VISAASM-NEXT: mov (M1, 16) dstBroadcast_0(0,0)<2> dst_1(0,0)<0;1,0>
134+ ; CHECK-VISAASM-NEXT: mov (M1, 16) dstBroadcast_0(0,1)<2> dst_1(0,1)<0;1,0>
135+ ; CHECK-VISAASM-NEXT: lsc_store.ugm.wb.wb (M1, 16) flat[dstBroadcast]:a64 out:d32
136+ ; CHECK-VISAASM-NEXT: ret (M1, 1)
137+
62138 %svn = call i16 @llvm.genx.GenISA.DCL.SystemValue.i16 (i32 17 )
63139 %nonuniform = zext i16 %svn to i32
64140 %NonUniformResource = inttoptr i32 %nonuniform to <4 x float > addrspace (2621440 )*
65- %offset = add i32 %src1 , 1
141+ %offset = add i32 %src1 , %nonuniform
66142
67143 %call = call <3 x i32 > @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32 (<4 x float > addrspace (2621440 )* %NonUniformResource , i32 %offset , i32 4 , i1 false )
68144
@@ -71,8 +147,6 @@ define spir_kernel void @test1(i32 %src1, i32 %val, i32 addrspace(1)* %dst) {
71147 ret void
72148}
73149
74-
75-
76150declare <3 x i32 > @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32 (<4 x float > addrspace (2621440 )*, i32 , i32 , i1 ) #4
77151
78152declare i16 @llvm.genx.GenISA.DCL.SystemValue.i16 (i32 ) #1
@@ -84,7 +158,6 @@ declare i32 @llvm.genx.GenISA.firstbitLo(i32)
84158
85159attributes #4 = { argmemonly nounwind readonly }
86160
87-
88161!IGCMetadata = !{!0 }
89162!igc.functions = !{!21 }
90163
@@ -112,4 +185,3 @@ attributes #4 = { argmemonly nounwind readonly }
112185!21 = !{void (i32 , i32 , i32 addrspace (1 )*)* @test1 , !22 }
113186!22 = !{!23 }
114187!23 = !{!"function_type" , i32 0 }
115-
0 commit comments