@@ -17,9 +17,9 @@ define i64 @srl_metadata(i64 %arg0, ptr %arg1.ptr) {
1717; CHECK-LABEL: srl_metadata:
1818; CHECK: ; %bb.0:
1919; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20- ; CHECK-NEXT: flat_load_dword v2 , v[2:3]
20+ ; CHECK-NEXT: flat_load_dword v0 , v[2:3]
2121; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
22- ; CHECK-NEXT: v_lshrrev_b64 v[0:1], v2, v[0:1]
22+ ; CHECK-NEXT: v_lshrrev_b32_e32 v0, v0, v1
2323; CHECK-NEXT: v_mov_b32_e32 v1, 0
2424; CHECK-NEXT: s_setpc_b64 s[30:31]
2525 %shift.amt = load i64 , ptr %arg1.ptr , !range !0 , !noundef !{}
@@ -30,9 +30,9 @@ define i64 @srl_metadata(i64 %arg0, ptr %arg1.ptr) {
3030define amdgpu_ps i64 @srl_metadata_sgpr_return (i64 inreg %arg0 , ptr addrspace (1 ) inreg %arg1.ptr ) {
3131; CHECK-LABEL: srl_metadata_sgpr_return:
3232; CHECK: ; %bb.0:
33- ; CHECK-NEXT: s_load_dword s2 , s[2:3], 0x0
33+ ; CHECK-NEXT: s_load_dword s0 , s[2:3], 0x0
3434; CHECK-NEXT: s_waitcnt lgkmcnt(0)
35- ; CHECK-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
35+ ; CHECK-NEXT: s_lshr_b32 s0, s1, s0
3636; CHECK-NEXT: s_mov_b32 s1, 0
3737; CHECK-NEXT: ; return to shader part epilog
3838 %shift.amt = load i64 , ptr addrspace (1 ) %arg1.ptr , !range !0 , !noundef !{}
@@ -59,9 +59,9 @@ define i64 @srl_metadata_two_ranges(i64 %arg0, ptr %arg1.ptr) {
5959; CHECK-LABEL: srl_metadata_two_ranges:
6060; CHECK: ; %bb.0:
6161; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
62- ; CHECK-NEXT: flat_load_dword v2 , v[2:3]
62+ ; CHECK-NEXT: flat_load_dword v0 , v[2:3]
6363; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
64- ; CHECK-NEXT: v_lshrrev_b64 v[0:1], v2, v[0:1]
64+ ; CHECK-NEXT: v_lshrrev_b32_e32 v0, v0, v1
6565; CHECK-NEXT: v_mov_b32_e32 v1, 0
6666; CHECK-NEXT: s_setpc_b64 s[30:31]
6767 %shift.amt = load i64 , ptr %arg1.ptr , !range !1 , !noundef !{}
@@ -106,8 +106,10 @@ define <2 x i64> @srl_v2_metadata(<2 x i64> %arg0, ptr %arg1.ptr) {
106106; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
107107; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
108108; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
109- ; CHECK-NEXT: v_lshrrev_b64 v[0:1], v4, v[0:1]
110- ; CHECK-NEXT: v_lshrrev_b64 v[2:3], v6, v[2:3]
109+ ; CHECK-NEXT: v_lshrrev_b32_e32 v0, v4, v1
110+ ; CHECK-NEXT: v_lshrrev_b32_e32 v2, v6, v3
111+ ; CHECK-NEXT: v_mov_b32_e32 v1, 0
112+ ; CHECK-NEXT: v_mov_b32_e32 v3, 0
111113; CHECK-NEXT: s_setpc_b64 s[30:31]
112114 %shift.amt = load <2 x i64 >, ptr %arg1.ptr , !range !0 , !noundef !{}
113115 %srl = lshr <2 x i64 > %arg0 , %shift.amt
@@ -135,12 +137,15 @@ define <3 x i64> @srl_v3_metadata(<3 x i64> %arg0, ptr %arg1.ptr) {
135137; CHECK-LABEL: srl_v3_metadata:
136138; CHECK: ; %bb.0:
137139; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138- ; CHECK-NEXT: flat_load_dword v12 , v[6:7] offset:16
140+ ; CHECK-NEXT: flat_load_dword v0 , v[6:7] offset:16
139141; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[6:7]
140142; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
141- ; CHECK-NEXT: v_lshrrev_b64 v[4:5], v12, v[4:5]
142- ; CHECK-NEXT: v_lshrrev_b64 v[0:1], v8, v[0:1]
143- ; CHECK-NEXT: v_lshrrev_b64 v[2:3], v10, v[2:3]
143+ ; CHECK-NEXT: v_lshrrev_b32_e32 v4, v0, v5
144+ ; CHECK-NEXT: v_lshrrev_b32_e32 v0, v8, v1
145+ ; CHECK-NEXT: v_lshrrev_b32_e32 v2, v10, v3
146+ ; CHECK-NEXT: v_mov_b32_e32 v1, 0
147+ ; CHECK-NEXT: v_mov_b32_e32 v3, 0
148+ ; CHECK-NEXT: v_mov_b32_e32 v5, 0
144149; CHECK-NEXT: s_setpc_b64 s[30:31]
145150 %shift.amt = load <3 x i64 >, ptr %arg1.ptr , !range !0 , !noundef !{}
146151 %srl = lshr <3 x i64 > %arg0 , %shift.amt
@@ -155,11 +160,15 @@ define <4 x i64> @srl_v4_metadata(<4 x i64> %arg0, ptr %arg1.ptr) {
155160; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
156161; CHECK-NEXT: flat_load_dwordx4 v[13:16], v[8:9] offset:16
157162; CHECK-NEXT: ; kill: killed $vgpr8 killed $vgpr9
158- ; CHECK-NEXT: v_lshrrev_b64 v[0:1] , v10, v[0:1]
159- ; CHECK-NEXT: v_lshrrev_b64 v[2:3] , v12, v[2:3]
163+ ; CHECK-NEXT: v_lshrrev_b32_e32 v0 , v10, v1
164+ ; CHECK-NEXT: v_lshrrev_b32_e32 v2 , v12, v3
160165; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
161- ; CHECK-NEXT: v_lshrrev_b64 v[4:5], v13, v[4:5]
162- ; CHECK-NEXT: v_lshrrev_b64 v[6:7], v15, v[6:7]
166+ ; CHECK-NEXT: v_lshrrev_b32_e32 v4, v13, v5
167+ ; CHECK-NEXT: v_lshrrev_b32_e32 v6, v15, v7
168+ ; CHECK-NEXT: v_mov_b32_e32 v1, 0
169+ ; CHECK-NEXT: v_mov_b32_e32 v3, 0
170+ ; CHECK-NEXT: v_mov_b32_e32 v5, 0
171+ ; CHECK-NEXT: v_mov_b32_e32 v7, 0
163172; CHECK-NEXT: s_setpc_b64 s[30:31]
164173 %shift.amt = load <4 x i64 >, ptr %arg1.ptr , !range !0 , !noundef !{}
165174 %srl = lshr <4 x i64 > %arg0 , %shift.amt
@@ -339,8 +348,7 @@ define i64 @srl_or32(i64 %arg0, i64 %shift_amt) {
339348; CHECK-LABEL: srl_or32:
340349; CHECK: ; %bb.0:
341350; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
342- ; CHECK-NEXT: v_or_b32_e32 v2, 32, v2
343- ; CHECK-NEXT: v_lshrrev_b64 v[0:1], v2, v[0:1]
351+ ; CHECK-NEXT: v_lshrrev_b32_e32 v0, v2, v1
344352; CHECK-NEXT: v_mov_b32_e32 v1, 0
345353; CHECK-NEXT: s_setpc_b64 s[30:31]
346354 %or = or i64 %shift_amt , 32
@@ -352,10 +360,10 @@ define <2 x i64> @srl_v2_or32(<2 x i64> %arg0, <2 x i64> %shift_amt) {
352360; CHECK-LABEL: srl_v2_or32:
353361; CHECK: ; %bb.0:
354362; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
355- ; CHECK-NEXT: v_or_b32_e32 v5, 32, v6
356- ; CHECK-NEXT: v_or_b32_e32 v4, 32, v4
357- ; CHECK-NEXT: v_lshrrev_b64 v[0:1], v4, v[0:1]
358- ; CHECK-NEXT: v_lshrrev_b64 v[2:3], v5, v[2:3]
363+ ; CHECK-NEXT: v_lshrrev_b32_e32 v0, v4, v1
364+ ; CHECK-NEXT: v_lshrrev_b32_e32 v2, v6, v3
365+ ; CHECK-NEXT: v_mov_b32_e32 v1, 0
366+ ; CHECK-NEXT: v_mov_b32_e32 v3, 0
359367; CHECK-NEXT: s_setpc_b64 s[30:31]
360368 %or = or <2 x i64 > %shift_amt , splat (i64 32 )
361369 %srl = lshr <2 x i64 > %arg0 , %or
@@ -366,12 +374,12 @@ define <3 x i64> @srl_v3_or32(<3 x i64> %arg0, <3 x i64> %shift_amt) {
366374; CHECK-LABEL: srl_v3_or32:
367375; CHECK: ; %bb.0:
368376; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
369- ; CHECK-NEXT: v_or_b32_e32 v7, 32, v10
370- ; CHECK-NEXT: v_or_b32_e32 v8, 32, v8
371- ; CHECK-NEXT: v_or_b32_e32 v6, 32, v6
372- ; CHECK-NEXT: v_lshrrev_b64 v[0:1], v6, v[0:1]
373- ; CHECK-NEXT: v_lshrrev_b64 v[2:3], v8, v[2:3]
374- ; CHECK-NEXT: v_lshrrev_b64 v[4:5], v7, v[4:5]
377+ ; CHECK-NEXT: v_lshrrev_b32_e32 v0, v6, v1
378+ ; CHECK-NEXT: v_lshrrev_b32_e32 v2, v8, v3
379+ ; CHECK-NEXT: v_lshrrev_b32_e32 v4, v10, v5
380+ ; CHECK-NEXT: v_mov_b32_e32 v1, 0
381+ ; CHECK-NEXT: v_mov_b32_e32 v3, 0
382+ ; CHECK-NEXT: v_mov_b32_e32 v5, 0
375383; CHECK-NEXT: s_setpc_b64 s[30:31]
376384 %or = or <3 x i64 > %shift_amt , splat (i64 32 )
377385 %srl = lshr <3 x i64 > %arg0 , %or
@@ -382,14 +390,14 @@ define <4 x i64> @srl_v4_or32(<4 x i64> %arg0, <4 x i64> %shift_amt) {
382390; CHECK-LABEL: srl_v4_or32:
383391; CHECK: ; %bb.0:
384392; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
385- ; CHECK-NEXT: v_or_b32_e32 v9, 32, v14
386- ; CHECK-NEXT: v_or_b32_e32 v11, 32, v12
387- ; CHECK-NEXT: v_or_b32_e32 v10, 32, v10
388- ; CHECK-NEXT: v_or_b32_e32 v8, 32, v8
389- ; CHECK-NEXT: v_lshrrev_b64 v[0:1], v8, v[0:1]
390- ; CHECK-NEXT: v_lshrrev_b64 v[2:3], v10, v[2:3]
391- ; CHECK-NEXT: v_lshrrev_b64 v[4:5], v11, v[4:5]
392- ; CHECK-NEXT: v_lshrrev_b64 v[6:7], v9, v[6:7]
393+ ; CHECK-NEXT: v_lshrrev_b32_e32 v0, v8, v1
394+ ; CHECK-NEXT: v_lshrrev_b32_e32 v2, v10, v3
395+ ; CHECK-NEXT: v_lshrrev_b32_e32 v4, v12, v5
396+ ; CHECK-NEXT: v_lshrrev_b32_e32 v6, v14, v7
397+ ; CHECK-NEXT: v_mov_b32_e32 v1, 0
398+ ; CHECK-NEXT: v_mov_b32_e32 v3, 0
399+ ; CHECK-NEXT: v_mov_b32_e32 v5, 0
400+ ; CHECK-NEXT: v_mov_b32_e32 v7, 0
393401; CHECK-NEXT: s_setpc_b64 s[30:31]
394402 %or = or <4 x i64 > %shift_amt , splat (i64 32 )
395403 %srl = lshr <4 x i64 > %arg0 , %or
@@ -402,8 +410,7 @@ define i64 @srl_or32_sgpr(i64 inreg %arg0, i64 inreg %shift_amt) {
402410; CHECK-LABEL: srl_or32_sgpr:
403411; CHECK: ; %bb.0:
404412; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
405- ; CHECK-NEXT: s_or_b32 s4, s18, 32
406- ; CHECK-NEXT: s_lshr_b64 s[4:5], s[16:17], s4
413+ ; CHECK-NEXT: s_lshr_b32 s4, s17, s18
407414; CHECK-NEXT: v_mov_b32_e32 v0, s4
408415; CHECK-NEXT: v_mov_b32_e32 v1, 0
409416; CHECK-NEXT: s_setpc_b64 s[30:31]
@@ -415,8 +422,7 @@ define i64 @srl_or32_sgpr(i64 inreg %arg0, i64 inreg %shift_amt) {
415422define amdgpu_ps i64 @srl_or32_sgpr_return (i64 inreg %arg0 , i64 inreg %shift_amt ) {
416423; CHECK-LABEL: srl_or32_sgpr_return:
417424; CHECK: ; %bb.0:
418- ; CHECK-NEXT: s_or_b32 s2, s2, 32
419- ; CHECK-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
425+ ; CHECK-NEXT: s_lshr_b32 s0, s1, s2
420426; CHECK-NEXT: s_mov_b32 s1, 0
421427; CHECK-NEXT: ; return to shader part epilog
422428 %or = or i64 %shift_amt , 32
@@ -428,14 +434,12 @@ define <2 x i64> @srl_v2_or32_sgpr(<2 x i64> inreg %arg0, <2 x i64> inreg %shift
428434; CHECK-LABEL: srl_v2_or32_sgpr:
429435; CHECK: ; %bb.0:
430436; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
431- ; CHECK-NEXT: s_or_b32 s6, s22, 32
432- ; CHECK-NEXT: s_or_b32 s4, s20, 32
433- ; CHECK-NEXT: s_lshr_b64 s[4:5], s[16:17], s4
434- ; CHECK-NEXT: s_lshr_b64 s[6:7], s[18:19], s6
437+ ; CHECK-NEXT: s_lshr_b32 s4, s17, s20
438+ ; CHECK-NEXT: s_lshr_b32 s5, s19, s22
435439; CHECK-NEXT: v_mov_b32_e32 v0, s4
436- ; CHECK-NEXT: v_mov_b32_e32 v1, s5
437- ; CHECK-NEXT: v_mov_b32_e32 v2, s6
438- ; CHECK-NEXT: v_mov_b32_e32 v3, s7
440+ ; CHECK-NEXT: v_mov_b32_e32 v1, 0
441+ ; CHECK-NEXT: v_mov_b32_e32 v2, s5
442+ ; CHECK-NEXT: v_mov_b32_e32 v3, 0
439443; CHECK-NEXT: s_setpc_b64 s[30:31]
440444 %or = or <2 x i64 > %shift_amt , splat (i64 32 )
441445 %srl = lshr <2 x i64 > %arg0 , %or
@@ -446,18 +450,15 @@ define <3 x i64> @srl_v3_or32_sgpr(<3 x i64> inreg %arg0, <3 x i64> inreg %shift
446450; CHECK-LABEL: srl_v3_or32_sgpr:
447451; CHECK: ; %bb.0:
448452; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
449- ; CHECK-NEXT: s_or_b32 s8, s26, 32
450- ; CHECK-NEXT: s_or_b32 s6, s24, 32
451- ; CHECK-NEXT: s_or_b32 s4, s22, 32
452- ; CHECK-NEXT: s_lshr_b64 s[4:5], s[16:17], s4
453- ; CHECK-NEXT: s_lshr_b64 s[6:7], s[18:19], s6
454- ; CHECK-NEXT: s_lshr_b64 s[8:9], s[20:21], s8
453+ ; CHECK-NEXT: s_lshr_b32 s4, s17, s22
454+ ; CHECK-NEXT: s_lshr_b32 s5, s19, s24
455+ ; CHECK-NEXT: s_lshr_b32 s6, s21, s26
455456; CHECK-NEXT: v_mov_b32_e32 v0, s4
456- ; CHECK-NEXT: v_mov_b32_e32 v1, s5
457- ; CHECK-NEXT: v_mov_b32_e32 v2, s6
458- ; CHECK-NEXT: v_mov_b32_e32 v3, s7
459- ; CHECK-NEXT: v_mov_b32_e32 v4, s8
460- ; CHECK-NEXT: v_mov_b32_e32 v5, s9
457+ ; CHECK-NEXT: v_mov_b32_e32 v1, 0
458+ ; CHECK-NEXT: v_mov_b32_e32 v2, s5
459+ ; CHECK-NEXT: v_mov_b32_e32 v3, 0
460+ ; CHECK-NEXT: v_mov_b32_e32 v4, s6
461+ ; CHECK-NEXT: v_mov_b32_e32 v5, 0
461462; CHECK-NEXT: s_setpc_b64 s[30:31]
462463 %or = or <3 x i64 > %shift_amt , splat (i64 32 )
463464 %srl = lshr <3 x i64 > %arg0 , %or
@@ -468,20 +469,17 @@ define <4 x i64> @srl_v4_or32_sgpr(<4 x i64> inreg %arg0, <4 x i64> inreg %shift
468469; CHECK-LABEL: srl_v4_or32_sgpr:
469470; CHECK: ; %bb.0:
470471; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
471- ; CHECK-NEXT: v_or_b32_e32 v0, 32, v0
472- ; CHECK-NEXT: s_or_b32 s8, s28, 32
473- ; CHECK-NEXT: s_or_b32 s6, s26, 32
474- ; CHECK-NEXT: s_or_b32 s4, s24, 32
475- ; CHECK-NEXT: s_lshr_b64 s[4:5], s[16:17], s4
476- ; CHECK-NEXT: s_lshr_b64 s[6:7], s[18:19], s6
477- ; CHECK-NEXT: s_lshr_b64 s[8:9], s[20:21], s8
478- ; CHECK-NEXT: v_lshrrev_b64 v[6:7], v0, s[22:23]
472+ ; CHECK-NEXT: s_lshr_b32 s4, s17, s24
473+ ; CHECK-NEXT: s_lshr_b32 s5, s19, s26
474+ ; CHECK-NEXT: s_lshr_b32 s6, s21, s28
475+ ; CHECK-NEXT: v_lshrrev_b32_e64 v6, v0, s23
479476; CHECK-NEXT: v_mov_b32_e32 v0, s4
480- ; CHECK-NEXT: v_mov_b32_e32 v1, s5
481- ; CHECK-NEXT: v_mov_b32_e32 v2, s6
482- ; CHECK-NEXT: v_mov_b32_e32 v3, s7
483- ; CHECK-NEXT: v_mov_b32_e32 v4, s8
484- ; CHECK-NEXT: v_mov_b32_e32 v5, s9
477+ ; CHECK-NEXT: v_mov_b32_e32 v1, 0
478+ ; CHECK-NEXT: v_mov_b32_e32 v2, s5
479+ ; CHECK-NEXT: v_mov_b32_e32 v3, 0
480+ ; CHECK-NEXT: v_mov_b32_e32 v4, s6
481+ ; CHECK-NEXT: v_mov_b32_e32 v5, 0
482+ ; CHECK-NEXT: v_mov_b32_e32 v7, 0
485483; CHECK-NEXT: s_setpc_b64 s[30:31]
486484 %or = or <4 x i64 > %shift_amt , splat (i64 32 )
487485 %srl = lshr <4 x i64 > %arg0 , %or
0 commit comments