|
17 | 17 | @ThreadGroupSize_Z = constant i32 1 |
18 | 18 |
|
19 | 19 | define spir_kernel void @test1(i32 %src1, i32 %val, i32 addrspace(1)* %dst) { |
20 | | -; CHECK: _main_0: |
21 | | -; CHECK-NEXT: mov (M1, 16) svn(0,0)<1> threadIdInGroupX(0,0)<1;1,0> |
22 | | -; CHECK-NEXT: mov (M1, 16) nonuniform(0,0)<1> svn_0(0,0)<1;1,0> |
23 | | -; CHECK-NEXT: add (M1_NM, 1) offset(0,0)<1> src1(0,0)<0;1,0> 0x1:w |
24 | | -; CHECK-NEXT: mov (M1_NM, 1) offsetTrunc(0,0)<1> offset(0,0)<0;1,0> |
25 | | -; CHECK-NEXT: mov (M1, 16) offsetTruncBroadcast(0,0)<1> offsetTrunc(0,0)<0;1,0> |
26 | | -; CHECK-NEXT: setp (M1_NM, 16) P1 0x0:ud |
27 | | -; CHECK-NEXT: setp (M1_NM, 16) P2 0x0:ud |
28 | | -; CHECK-NEXT: setp (M1_NM, 16) P3 0x0:ud |
29 | | -; CHECK-NEXT: lifetime.start call_ |
30 | | -; |
31 | | -; CHECK: _test1_001__opt_resource_loop: |
32 | | -; CHECK-NEXT: setp (M1_NM, 16) P4 0x0:ud |
33 | | -; CHECK-NEXT: setp (M1_NM, 16) P5 0x0:ud |
34 | | -; CHECK-NEXT: cmp.eq (M1, 16) P5 V0032(0,0)<0;1,0> V0032(0,0)<0;1,0> |
35 | | -; CHECK-NEXT: mov (M1_NM, 1) V0033(0,0)<1> P5 |
36 | | -; CHECK-NEXT: fbl (M1_NM, 1) V0034(0,0)<1> V0033(0,0)<0;1,0> |
37 | | -; CHECK-NEXT: shl (M1_NM, 1) V0034(0,0)<1> V0034(0,0)<0;1,0> 0x2:w |
38 | | -; CHECK-NEXT: addr_add (M1_NM, 1) A0(0)<1> &nonuniform V0035(0,0)<0;1,0> |
39 | | -; CHECK-NEXT: mov (M1_NM, 1) V0036(0,0)<1> r[A0(0),0]<0;1,0>:d |
40 | | -; CHECK-NEXT: cmp.eq (M1, 16) P6 V0036(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
41 | | -; CHECK-NEXT: (P6) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0036)[offsetTruncBroadcast]:a32 |
42 | | -; CHECK-NEXT: or (M1_NM, 16) P4 P4 P6 |
43 | | -; CHECK-NEXT: xor (M1_NM, 16) P5 P5 P6 |
44 | | -; CHECK-NEXT: mov (M1_NM, 1) V0033(0,0)<1> P5 |
45 | | -; CHECK-NEXT: fbl (M1_NM, 1) V0037(0,0)<1> V0033(0,0)<0;1,0> |
46 | | -; CHECK-NEXT: and (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0xf:ud |
47 | | -; CHECK-NEXT: shl (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0x2:w |
48 | | -; CHECK-NEXT: addr_add (M1_NM, 1) A1(0)<1> &nonuniform V0038(0,0)<0;1,0> |
49 | | -; CHECK-NEXT: mov (M1_NM, 1) V0039(0,0)<1> r[A1(0),0]<0;1,0>:d |
50 | | -; CHECK-NEXT: cmp.eq (M1, 16) P7 V0039(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
51 | | -; CHECK-NEXT: and (M1_NM, 16) P7 P7 P5 |
52 | | -; CHECK-NEXT: (P7) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0039)[offsetTruncBroadcast]:a32 |
53 | | -; CHECK-NEXT: or (M1_NM, 16) P4 P4 P7 |
54 | | -; CHECK-NEXT: xor (M1_NM, 16) P5 P5 P7 |
55 | | -; CHECK-NEXT: mov (M1_NM, 1) V0033(0,0)<1> P5 |
56 | | -; CHECK-NEXT: fbl (M1_NM, 1) V0040(0,0)<1> V0033(0,0)<0;1,0> |
57 | | -; CHECK-NEXT: and (M1_NM, 1) V0040(0,0)<1> V0040(0,0)<0;1,0> 0xf:ud |
58 | | -; CHECK-NEXT: shl (M1_NM, 1) V0040(0,0)<1> V0040(0,0)<0;1,0> 0x2:w |
59 | | -; CHECK-NEXT: addr_add (M1_NM, 1) A2(0)<1> &nonuniform V0041(0,0)<0;1,0> |
60 | | -; CHECK-NEXT: mov (M1_NM, 1) V0042(0,0)<1> r[A2(0),0]<0;1,0>:d |
61 | | -; CHECK-NEXT: cmp.eq (M1, 16) P8 V0042(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
62 | | -; CHECK-NEXT: and (M1_NM, 16) P8 P8 P5 |
63 | | -; CHECK-NEXT: (P8) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0042)[offsetTruncBroadcast]:a32 |
64 | | -; CHECK-NEXT: or (M1_NM, 16) P4 P4 P8 |
65 | | -; CHECK-NEXT: xor (M1_NM, 16) P5 P5 P8 |
66 | | -; CHECK-NEXT: mov (M1_NM, 1) V0033(0,0)<1> P5 |
67 | | -; CHECK-NEXT: fbl (M1_NM, 1) V0043(0,0)<1> V0033(0,0)<0;1,0> |
68 | | -; CHECK-NEXT: and (M1_NM, 1) V0043(0,0)<1> V0043(0,0)<0;1,0> 0xf:ud |
69 | | -; CHECK-NEXT: shl (M1_NM, 1) V0043(0,0)<1> V0043(0,0)<0;1,0> 0x2:w |
70 | | -; CHECK-NEXT: addr_add (M1_NM, 1) A3(0)<1> &nonuniform V0044(0,0)<0;1,0> |
71 | | -; CHECK-NEXT: mov (M1_NM, 1) V0045(0,0)<1> r[A3(0),0]<0;1,0>:d |
72 | | -; CHECK-NEXT: cmp.eq (M1, 16) P9 V0045(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
73 | | -; CHECK-NEXT: and (M1_NM, 16) P9 P9 P5 |
74 | | -; CHECK-NEXT: (P9) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0045)[offsetTruncBroadcast]:a32 |
75 | | -; CHECK-NEXT: or (M1_NM, 16) P4 P4 P9 |
76 | | -; CHECK-NEXT: (!P4) goto (M1, 16) _test1_001__opt_resource_loop |
77 | | -; CHECK-NEXT: mul (M1_NM, 1) V0046(0,0)<1> val_0(0,0)<0;1,0> 0x40:uw |
78 | | -; CHECK-NEXT: addr_add (M1_NM, 1) A4(0)<1> &call_ V0046(0,0)<0;1,0> |
79 | | -; CHECK-NEXT: mov (M1, 16) out(0,0)<1> r[A4(0),0]<8;8,1>:d |
80 | | -; CHECK-NEXT: mov (M1_NM, 1) dst_0(0,0)<1> dst(0,0)<0;1,0> |
81 | | -; CHECK-NEXT: mov (M1, 16) dstBroadcast_0(0,0)<2> dst_1(0,0)<0;1,0> |
82 | | -; CHECK-NEXT: mov (M1, 16) dstBroadcast_0(0,1)<2> dst_1(0,1)<0;1,0> |
83 | | -; CHECK-NEXT: lsc_store.ugm.wb.wb (M1, 16) flat[dstBroadcast]:a64 out:d32 |
84 | | -; CHECK-NEXT: ret (M1, 1) |
| 20 | +entry: |
| 21 | +; CHECK: _main_0: |
85 | 22 |
|
86 | 23 | %svn = call i16 @llvm.genx.GenISA.DCL.SystemValue.i16(i32 17) |
| 24 | +; CHECK: mov (M1, 16) svn(0,0)<1> threadIdInGroupX(0,0)<1;1,0> |
87 | 25 |
|
88 | 26 | %nonuniform = zext i16 %svn to i32 |
| 27 | +; CHECK: mov (M1, 16) nonuniform(0,0)<1> svn_0(0,0)<1;1,0> |
89 | 28 |
|
90 | 29 | %NonUniformResource = inttoptr i32 %nonuniform to <4 x float> addrspace(2621440)* |
91 | 30 | %offset = add i32 %src1, 1 |
| 31 | +; CHECK: add (M1_NM, 1) offset(0,0)<1> src1(0,0)<0;1,0> 0x1:w |
| 32 | +; CHECK: mov (M1_NM, 1) offsetTrunc(0,0)<1> offset(0,0)<0;1,0> |
| 33 | +; CHECK: mov (M1, 16) offsetTruncBroadcast(0,0)<1> offsetTrunc(0,0)<0;1,0> |
92 | 34 |
|
93 | 35 | %call = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* %NonUniformResource, i32 %offset, i32 4, i1 false) |
| 36 | +; CHECK: _test1_001__opt_resource_loop: |
| 37 | +; CHECK: setp (M1_NM, 16) P4 0x0:ud |
| 38 | +; CHECK: setp (M1_NM, 16) P5 0x0:ud |
| 39 | +; CHECK: cmp.eq (M1, 16) P5 V0032(0,0)<0;1,0> V0032(0,0)<0;1,0> |
| 40 | +; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5 |
| 41 | +; CHECK: fbl (M1_NM, 1) V0034(0,0)<1> V0033(0,0)<0;1,0> |
| 42 | +; CHECK: shl (M1_NM, 1) V0034(0,0)<1> V0034(0,0)<0;1,0> 0x2:w |
| 43 | +; CHECK: addr_add (M1_NM, 1) A0(0)<1> &nonuniform V0035(0,0)<0;1,0> |
| 44 | +; CHECK: mov (M1_NM, 1) V0036(0,0)<1> r[A0(0),0]<0;1,0>:d |
| 45 | +; CHECK: cmp.eq (M1, 16) P6 V0036(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
| 46 | +; CHECK: (P6) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0036)[offsetTruncBroadcast]:a32 |
| 47 | +; CHECK: or (M1_NM, 16) P4 P4 P6 |
| 48 | +; CHECK: xor (M1_NM, 16) P5 P5 P6 |
| 49 | +; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5 |
| 50 | +; CHECK: fbl (M1_NM, 1) V0037(0,0)<1> V0033(0,0)<0;1,0> |
| 51 | +; CHECK: and (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0xf:ud |
| 52 | +; CHECK: shl (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0x2:w |
| 53 | +; CHECK: addr_add (M1_NM, 1) A1(0)<1> &nonuniform V0038(0,0)<0;1,0> |
| 54 | +; CHECK: mov (M1_NM, 1) V0039(0,0)<1> r[A1(0),0]<0;1,0>:d |
| 55 | +; CHECK: cmp.eq (M1, 16) P7 V0039(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
| 56 | +; CHECK: and (M1_NM, 16) P7 P7 P5 |
| 57 | +; CHECK: (P7) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0039)[offsetTruncBroadcast]:a32 |
| 58 | +; CHECK: or (M1_NM, 16) P4 P4 P7 |
| 59 | +; CHECK: xor (M1_NM, 16) P5 P5 P7 |
| 60 | +; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5 |
| 61 | +; CHECK: fbl (M1_NM, 1) V0040(0,0)<1> V0033(0,0)<0;1,0> |
| 62 | +; CHECK: and (M1_NM, 1) V0040(0,0)<1> V0040(0,0)<0;1,0> 0xf:ud |
| 63 | +; CHECK: shl (M1_NM, 1) V0040(0,0)<1> V0040(0,0)<0;1,0> 0x2:w |
| 64 | +; CHECK: addr_add (M1_NM, 1) A2(0)<1> &nonuniform V0041(0,0)<0;1,0> |
| 65 | +; CHECK: mov (M1_NM, 1) V0042(0,0)<1> r[A2(0),0]<0;1,0>:d |
| 66 | +; CHECK: cmp.eq (M1, 16) P8 V0042(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
| 67 | +; CHECK: and (M1_NM, 16) P8 P8 P5 |
| 68 | +; CHECK: (P8) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0042)[offsetTruncBroadcast]:a32 |
| 69 | +; CHECK: or (M1_NM, 16) P4 P4 P8 |
| 70 | +; CHECK: xor (M1_NM, 16) P5 P5 P8 |
| 71 | +; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5 |
| 72 | +; CHECK: fbl (M1_NM, 1) V0043(0,0)<1> V0033(0,0)<0;1,0> |
| 73 | +; CHECK: and (M1_NM, 1) V0043(0,0)<1> V0043(0,0)<0;1,0> 0xf:ud |
| 74 | +; CHECK: shl (M1_NM, 1) V0043(0,0)<1> V0043(0,0)<0;1,0> 0x2:w |
| 75 | +; CHECK: addr_add (M1_NM, 1) A3(0)<1> &nonuniform V0044(0,0)<0;1,0> |
| 76 | +; CHECK: mov (M1_NM, 1) V0045(0,0)<1> r[A3(0),0]<0;1,0>:d |
| 77 | +; CHECK: cmp.eq (M1, 16) P9 V0045(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
| 78 | +; CHECK: and (M1_NM, 16) P9 P9 P5 |
| 79 | +; CHECK: (P9) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0045)[offsetTruncBroadcast]:a32 |
| 80 | +; CHECK: or (M1_NM, 16) P4 P4 P9 |
| 81 | +; CHECK: (!P4) goto (M1, 16) _test1_001__opt_resource_loop |
94 | 82 | %out = extractelement <3 x i32> %call, i32 %val |
95 | 83 | store i32 %out, i32 addrspace(1)* %dst, align 1 |
96 | 84 | ret void |
|
0 commit comments