Skip to content

Commit 5099306

Browse files
committed
Update test results
Signed-off-by: John Lu <[email protected]>
1 parent a0ec5b4 commit 5099306

File tree

1 file changed

+69
-71
lines changed

1 file changed

+69
-71
lines changed

llvm/test/CodeGen/AMDGPU/srl64_reduce.ll

Lines changed: 69 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@ define i64 @srl_metadata(i64 %arg0, ptr %arg1.ptr) {
1717
; CHECK-LABEL: srl_metadata:
1818
; CHECK: ; %bb.0:
1919
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20-
; CHECK-NEXT: flat_load_dword v2, v[2:3]
20+
; CHECK-NEXT: flat_load_dword v0, v[2:3]
2121
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
22-
; CHECK-NEXT: v_lshrrev_b64 v[0:1], v2, v[0:1]
22+
; CHECK-NEXT: v_lshrrev_b32_e32 v0, v0, v1
2323
; CHECK-NEXT: v_mov_b32_e32 v1, 0
2424
; CHECK-NEXT: s_setpc_b64 s[30:31]
2525
%shift.amt = load i64, ptr %arg1.ptr, !range !0, !noundef !{}
@@ -30,9 +30,9 @@ define i64 @srl_metadata(i64 %arg0, ptr %arg1.ptr) {
3030
define amdgpu_ps i64 @srl_metadata_sgpr_return(i64 inreg %arg0, ptr addrspace(1) inreg %arg1.ptr) {
3131
; CHECK-LABEL: srl_metadata_sgpr_return:
3232
; CHECK: ; %bb.0:
33-
; CHECK-NEXT: s_load_dword s2, s[2:3], 0x0
33+
; CHECK-NEXT: s_load_dword s0, s[2:3], 0x0
3434
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
35-
; CHECK-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
35+
; CHECK-NEXT: s_lshr_b32 s0, s1, s0
3636
; CHECK-NEXT: s_mov_b32 s1, 0
3737
; CHECK-NEXT: ; return to shader part epilog
3838
%shift.amt = load i64, ptr addrspace(1) %arg1.ptr, !range !0, !noundef !{}
@@ -59,9 +59,9 @@ define i64 @srl_metadata_two_ranges(i64 %arg0, ptr %arg1.ptr) {
5959
; CHECK-LABEL: srl_metadata_two_ranges:
6060
; CHECK: ; %bb.0:
6161
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
62-
; CHECK-NEXT: flat_load_dword v2, v[2:3]
62+
; CHECK-NEXT: flat_load_dword v0, v[2:3]
6363
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
64-
; CHECK-NEXT: v_lshrrev_b64 v[0:1], v2, v[0:1]
64+
; CHECK-NEXT: v_lshrrev_b32_e32 v0, v0, v1
6565
; CHECK-NEXT: v_mov_b32_e32 v1, 0
6666
; CHECK-NEXT: s_setpc_b64 s[30:31]
6767
%shift.amt = load i64, ptr %arg1.ptr, !range !1, !noundef !{}
@@ -106,8 +106,10 @@ define <2 x i64> @srl_v2_metadata(<2 x i64> %arg0, ptr %arg1.ptr) {
106106
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
107107
; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
108108
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
109-
; CHECK-NEXT: v_lshrrev_b64 v[0:1], v4, v[0:1]
110-
; CHECK-NEXT: v_lshrrev_b64 v[2:3], v6, v[2:3]
109+
; CHECK-NEXT: v_lshrrev_b32_e32 v0, v4, v1
110+
; CHECK-NEXT: v_lshrrev_b32_e32 v2, v6, v3
111+
; CHECK-NEXT: v_mov_b32_e32 v1, 0
112+
; CHECK-NEXT: v_mov_b32_e32 v3, 0
111113
; CHECK-NEXT: s_setpc_b64 s[30:31]
112114
%shift.amt = load <2 x i64>, ptr %arg1.ptr, !range !0, !noundef !{}
113115
%srl = lshr <2 x i64> %arg0, %shift.amt
@@ -135,12 +137,15 @@ define <3 x i64> @srl_v3_metadata(<3 x i64> %arg0, ptr %arg1.ptr) {
135137
; CHECK-LABEL: srl_v3_metadata:
136138
; CHECK: ; %bb.0:
137139
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138-
; CHECK-NEXT: flat_load_dword v12, v[6:7] offset:16
140+
; CHECK-NEXT: flat_load_dword v0, v[6:7] offset:16
139141
; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[6:7]
140142
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
141-
; CHECK-NEXT: v_lshrrev_b64 v[4:5], v12, v[4:5]
142-
; CHECK-NEXT: v_lshrrev_b64 v[0:1], v8, v[0:1]
143-
; CHECK-NEXT: v_lshrrev_b64 v[2:3], v10, v[2:3]
143+
; CHECK-NEXT: v_lshrrev_b32_e32 v4, v0, v5
144+
; CHECK-NEXT: v_lshrrev_b32_e32 v0, v8, v1
145+
; CHECK-NEXT: v_lshrrev_b32_e32 v2, v10, v3
146+
; CHECK-NEXT: v_mov_b32_e32 v1, 0
147+
; CHECK-NEXT: v_mov_b32_e32 v3, 0
148+
; CHECK-NEXT: v_mov_b32_e32 v5, 0
144149
; CHECK-NEXT: s_setpc_b64 s[30:31]
145150
%shift.amt = load <3 x i64>, ptr %arg1.ptr, !range !0, !noundef !{}
146151
%srl = lshr <3 x i64> %arg0, %shift.amt
@@ -155,11 +160,15 @@ define <4 x i64> @srl_v4_metadata(<4 x i64> %arg0, ptr %arg1.ptr) {
155160
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
156161
; CHECK-NEXT: flat_load_dwordx4 v[13:16], v[8:9] offset:16
157162
; CHECK-NEXT: ; kill: killed $vgpr8 killed $vgpr9
158-
; CHECK-NEXT: v_lshrrev_b64 v[0:1], v10, v[0:1]
159-
; CHECK-NEXT: v_lshrrev_b64 v[2:3], v12, v[2:3]
163+
; CHECK-NEXT: v_lshrrev_b32_e32 v0, v10, v1
164+
; CHECK-NEXT: v_lshrrev_b32_e32 v2, v12, v3
160165
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
161-
; CHECK-NEXT: v_lshrrev_b64 v[4:5], v13, v[4:5]
162-
; CHECK-NEXT: v_lshrrev_b64 v[6:7], v15, v[6:7]
166+
; CHECK-NEXT: v_lshrrev_b32_e32 v4, v13, v5
167+
; CHECK-NEXT: v_lshrrev_b32_e32 v6, v15, v7
168+
; CHECK-NEXT: v_mov_b32_e32 v1, 0
169+
; CHECK-NEXT: v_mov_b32_e32 v3, 0
170+
; CHECK-NEXT: v_mov_b32_e32 v5, 0
171+
; CHECK-NEXT: v_mov_b32_e32 v7, 0
163172
; CHECK-NEXT: s_setpc_b64 s[30:31]
164173
%shift.amt = load <4 x i64>, ptr %arg1.ptr, !range !0, !noundef !{}
165174
%srl = lshr <4 x i64> %arg0, %shift.amt
@@ -339,8 +348,7 @@ define i64 @srl_or32(i64 %arg0, i64 %shift_amt) {
339348
; CHECK-LABEL: srl_or32:
340349
; CHECK: ; %bb.0:
341350
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
342-
; CHECK-NEXT: v_or_b32_e32 v2, 32, v2
343-
; CHECK-NEXT: v_lshrrev_b64 v[0:1], v2, v[0:1]
351+
; CHECK-NEXT: v_lshrrev_b32_e32 v0, v2, v1
344352
; CHECK-NEXT: v_mov_b32_e32 v1, 0
345353
; CHECK-NEXT: s_setpc_b64 s[30:31]
346354
%or = or i64 %shift_amt, 32
@@ -352,10 +360,10 @@ define <2 x i64> @srl_v2_or32(<2 x i64> %arg0, <2 x i64> %shift_amt) {
352360
; CHECK-LABEL: srl_v2_or32:
353361
; CHECK: ; %bb.0:
354362
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
355-
; CHECK-NEXT: v_or_b32_e32 v5, 32, v6
356-
; CHECK-NEXT: v_or_b32_e32 v4, 32, v4
357-
; CHECK-NEXT: v_lshrrev_b64 v[0:1], v4, v[0:1]
358-
; CHECK-NEXT: v_lshrrev_b64 v[2:3], v5, v[2:3]
363+
; CHECK-NEXT: v_lshrrev_b32_e32 v0, v4, v1
364+
; CHECK-NEXT: v_lshrrev_b32_e32 v2, v6, v3
365+
; CHECK-NEXT: v_mov_b32_e32 v1, 0
366+
; CHECK-NEXT: v_mov_b32_e32 v3, 0
359367
; CHECK-NEXT: s_setpc_b64 s[30:31]
360368
%or = or <2 x i64> %shift_amt, splat (i64 32)
361369
%srl = lshr <2 x i64> %arg0, %or
@@ -366,12 +374,12 @@ define <3 x i64> @srl_v3_or32(<3 x i64> %arg0, <3 x i64> %shift_amt) {
366374
; CHECK-LABEL: srl_v3_or32:
367375
; CHECK: ; %bb.0:
368376
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
369-
; CHECK-NEXT: v_or_b32_e32 v7, 32, v10
370-
; CHECK-NEXT: v_or_b32_e32 v8, 32, v8
371-
; CHECK-NEXT: v_or_b32_e32 v6, 32, v6
372-
; CHECK-NEXT: v_lshrrev_b64 v[0:1], v6, v[0:1]
373-
; CHECK-NEXT: v_lshrrev_b64 v[2:3], v8, v[2:3]
374-
; CHECK-NEXT: v_lshrrev_b64 v[4:5], v7, v[4:5]
377+
; CHECK-NEXT: v_lshrrev_b32_e32 v0, v6, v1
378+
; CHECK-NEXT: v_lshrrev_b32_e32 v2, v8, v3
379+
; CHECK-NEXT: v_lshrrev_b32_e32 v4, v10, v5
380+
; CHECK-NEXT: v_mov_b32_e32 v1, 0
381+
; CHECK-NEXT: v_mov_b32_e32 v3, 0
382+
; CHECK-NEXT: v_mov_b32_e32 v5, 0
375383
; CHECK-NEXT: s_setpc_b64 s[30:31]
376384
%or = or <3 x i64> %shift_amt, splat (i64 32)
377385
%srl = lshr <3 x i64> %arg0, %or
@@ -382,14 +390,14 @@ define <4 x i64> @srl_v4_or32(<4 x i64> %arg0, <4 x i64> %shift_amt) {
382390
; CHECK-LABEL: srl_v4_or32:
383391
; CHECK: ; %bb.0:
384392
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
385-
; CHECK-NEXT: v_or_b32_e32 v9, 32, v14
386-
; CHECK-NEXT: v_or_b32_e32 v11, 32, v12
387-
; CHECK-NEXT: v_or_b32_e32 v10, 32, v10
388-
; CHECK-NEXT: v_or_b32_e32 v8, 32, v8
389-
; CHECK-NEXT: v_lshrrev_b64 v[0:1], v8, v[0:1]
390-
; CHECK-NEXT: v_lshrrev_b64 v[2:3], v10, v[2:3]
391-
; CHECK-NEXT: v_lshrrev_b64 v[4:5], v11, v[4:5]
392-
; CHECK-NEXT: v_lshrrev_b64 v[6:7], v9, v[6:7]
393+
; CHECK-NEXT: v_lshrrev_b32_e32 v0, v8, v1
394+
; CHECK-NEXT: v_lshrrev_b32_e32 v2, v10, v3
395+
; CHECK-NEXT: v_lshrrev_b32_e32 v4, v12, v5
396+
; CHECK-NEXT: v_lshrrev_b32_e32 v6, v14, v7
397+
; CHECK-NEXT: v_mov_b32_e32 v1, 0
398+
; CHECK-NEXT: v_mov_b32_e32 v3, 0
399+
; CHECK-NEXT: v_mov_b32_e32 v5, 0
400+
; CHECK-NEXT: v_mov_b32_e32 v7, 0
393401
; CHECK-NEXT: s_setpc_b64 s[30:31]
394402
%or = or <4 x i64> %shift_amt, splat (i64 32)
395403
%srl = lshr <4 x i64> %arg0, %or
@@ -402,8 +410,7 @@ define i64 @srl_or32_sgpr(i64 inreg %arg0, i64 inreg %shift_amt) {
402410
; CHECK-LABEL: srl_or32_sgpr:
403411
; CHECK: ; %bb.0:
404412
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
405-
; CHECK-NEXT: s_or_b32 s4, s18, 32
406-
; CHECK-NEXT: s_lshr_b64 s[4:5], s[16:17], s4
413+
; CHECK-NEXT: s_lshr_b32 s4, s17, s18
407414
; CHECK-NEXT: v_mov_b32_e32 v0, s4
408415
; CHECK-NEXT: v_mov_b32_e32 v1, 0
409416
; CHECK-NEXT: s_setpc_b64 s[30:31]
@@ -415,8 +422,7 @@ define i64 @srl_or32_sgpr(i64 inreg %arg0, i64 inreg %shift_amt) {
415422
define amdgpu_ps i64 @srl_or32_sgpr_return(i64 inreg %arg0, i64 inreg %shift_amt) {
416423
; CHECK-LABEL: srl_or32_sgpr_return:
417424
; CHECK: ; %bb.0:
418-
; CHECK-NEXT: s_or_b32 s2, s2, 32
419-
; CHECK-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
425+
; CHECK-NEXT: s_lshr_b32 s0, s1, s2
420426
; CHECK-NEXT: s_mov_b32 s1, 0
421427
; CHECK-NEXT: ; return to shader part epilog
422428
%or = or i64 %shift_amt, 32
@@ -428,14 +434,12 @@ define <2 x i64> @srl_v2_or32_sgpr(<2 x i64> inreg %arg0, <2 x i64> inreg %shift
428434
; CHECK-LABEL: srl_v2_or32_sgpr:
429435
; CHECK: ; %bb.0:
430436
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
431-
; CHECK-NEXT: s_or_b32 s6, s22, 32
432-
; CHECK-NEXT: s_or_b32 s4, s20, 32
433-
; CHECK-NEXT: s_lshr_b64 s[4:5], s[16:17], s4
434-
; CHECK-NEXT: s_lshr_b64 s[6:7], s[18:19], s6
437+
; CHECK-NEXT: s_lshr_b32 s4, s17, s20
438+
; CHECK-NEXT: s_lshr_b32 s5, s19, s22
435439
; CHECK-NEXT: v_mov_b32_e32 v0, s4
436-
; CHECK-NEXT: v_mov_b32_e32 v1, s5
437-
; CHECK-NEXT: v_mov_b32_e32 v2, s6
438-
; CHECK-NEXT: v_mov_b32_e32 v3, s7
440+
; CHECK-NEXT: v_mov_b32_e32 v1, 0
441+
; CHECK-NEXT: v_mov_b32_e32 v2, s5
442+
; CHECK-NEXT: v_mov_b32_e32 v3, 0
439443
; CHECK-NEXT: s_setpc_b64 s[30:31]
440444
%or = or <2 x i64> %shift_amt, splat (i64 32)
441445
%srl = lshr <2 x i64> %arg0, %or
@@ -446,18 +450,15 @@ define <3 x i64> @srl_v3_or32_sgpr(<3 x i64> inreg %arg0, <3 x i64> inreg %shift
446450
; CHECK-LABEL: srl_v3_or32_sgpr:
447451
; CHECK: ; %bb.0:
448452
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
449-
; CHECK-NEXT: s_or_b32 s8, s26, 32
450-
; CHECK-NEXT: s_or_b32 s6, s24, 32
451-
; CHECK-NEXT: s_or_b32 s4, s22, 32
452-
; CHECK-NEXT: s_lshr_b64 s[4:5], s[16:17], s4
453-
; CHECK-NEXT: s_lshr_b64 s[6:7], s[18:19], s6
454-
; CHECK-NEXT: s_lshr_b64 s[8:9], s[20:21], s8
453+
; CHECK-NEXT: s_lshr_b32 s4, s17, s22
454+
; CHECK-NEXT: s_lshr_b32 s5, s19, s24
455+
; CHECK-NEXT: s_lshr_b32 s6, s21, s26
455456
; CHECK-NEXT: v_mov_b32_e32 v0, s4
456-
; CHECK-NEXT: v_mov_b32_e32 v1, s5
457-
; CHECK-NEXT: v_mov_b32_e32 v2, s6
458-
; CHECK-NEXT: v_mov_b32_e32 v3, s7
459-
; CHECK-NEXT: v_mov_b32_e32 v4, s8
460-
; CHECK-NEXT: v_mov_b32_e32 v5, s9
457+
; CHECK-NEXT: v_mov_b32_e32 v1, 0
458+
; CHECK-NEXT: v_mov_b32_e32 v2, s5
459+
; CHECK-NEXT: v_mov_b32_e32 v3, 0
460+
; CHECK-NEXT: v_mov_b32_e32 v4, s6
461+
; CHECK-NEXT: v_mov_b32_e32 v5, 0
461462
; CHECK-NEXT: s_setpc_b64 s[30:31]
462463
%or = or <3 x i64> %shift_amt, splat (i64 32)
463464
%srl = lshr <3 x i64> %arg0, %or
@@ -468,20 +469,17 @@ define <4 x i64> @srl_v4_or32_sgpr(<4 x i64> inreg %arg0, <4 x i64> inreg %shift
468469
; CHECK-LABEL: srl_v4_or32_sgpr:
469470
; CHECK: ; %bb.0:
470471
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
471-
; CHECK-NEXT: v_or_b32_e32 v0, 32, v0
472-
; CHECK-NEXT: s_or_b32 s8, s28, 32
473-
; CHECK-NEXT: s_or_b32 s6, s26, 32
474-
; CHECK-NEXT: s_or_b32 s4, s24, 32
475-
; CHECK-NEXT: s_lshr_b64 s[4:5], s[16:17], s4
476-
; CHECK-NEXT: s_lshr_b64 s[6:7], s[18:19], s6
477-
; CHECK-NEXT: s_lshr_b64 s[8:9], s[20:21], s8
478-
; CHECK-NEXT: v_lshrrev_b64 v[6:7], v0, s[22:23]
472+
; CHECK-NEXT: s_lshr_b32 s4, s17, s24
473+
; CHECK-NEXT: s_lshr_b32 s5, s19, s26
474+
; CHECK-NEXT: s_lshr_b32 s6, s21, s28
475+
; CHECK-NEXT: v_lshrrev_b32_e64 v6, v0, s23
479476
; CHECK-NEXT: v_mov_b32_e32 v0, s4
480-
; CHECK-NEXT: v_mov_b32_e32 v1, s5
481-
; CHECK-NEXT: v_mov_b32_e32 v2, s6
482-
; CHECK-NEXT: v_mov_b32_e32 v3, s7
483-
; CHECK-NEXT: v_mov_b32_e32 v4, s8
484-
; CHECK-NEXT: v_mov_b32_e32 v5, s9
477+
; CHECK-NEXT: v_mov_b32_e32 v1, 0
478+
; CHECK-NEXT: v_mov_b32_e32 v2, s5
479+
; CHECK-NEXT: v_mov_b32_e32 v3, 0
480+
; CHECK-NEXT: v_mov_b32_e32 v4, s6
481+
; CHECK-NEXT: v_mov_b32_e32 v5, 0
482+
; CHECK-NEXT: v_mov_b32_e32 v7, 0
485483
; CHECK-NEXT: s_setpc_b64 s[30:31]
486484
%or = or <4 x i64> %shift_amt, splat (i64 32)
487485
%srl = lshr <4 x i64> %arg0, %or

0 commit comments

Comments
 (0)