@@ -4934,17 +4934,15 @@ define amdgpu_ps i64 @s_fshl_i64_5(i64 inreg %lhs, i64 inreg %rhs) {
49344934; GCN: ; %bb.0:
49354935; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], 5
49364936; GCN-NEXT: s_lshr_b32 s2, s3, 27
4937- ; GCN-NEXT: s_mov_b32 s3, 0
4938- ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
4937+ ; GCN-NEXT: s_or_b32 s0, s0, s2
49394938; GCN-NEXT: ; return to shader part epilog
49404939;
49414940; GFX11-LABEL: s_fshl_i64_5:
49424941; GFX11: ; %bb.0:
49434942; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 5
49444943; GFX11-NEXT: s_lshr_b32 s2, s3, 27
4945- ; GFX11-NEXT: s_mov_b32 s3, 0
49464944; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
4947- ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
4945+ ; GFX11-NEXT: s_or_b32 s0, s0, s2
49484946; GFX11-NEXT: ; return to shader part epilog
49494947 %result = call i64 @llvm.fshl.i64 (i64 %lhs , i64 %rhs , i64 5 )
49504948 ret i64 %result
@@ -4954,20 +4952,13 @@ define amdgpu_ps i64 @s_fshl_i64_32(i64 inreg %lhs, i64 inreg %rhs) {
49544952; GCN-LABEL: s_fshl_i64_32:
49554953; GCN: ; %bb.0:
49564954; GCN-NEXT: s_mov_b32 s1, s0
4957- ; GCN-NEXT: s_mov_b32 s0, 0
4958- ; GCN-NEXT: s_mov_b32 s2, s3
4959- ; GCN-NEXT: s_mov_b32 s3, s0
4960- ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
4955+ ; GCN-NEXT: s_mov_b32 s0, s3
49614956; GCN-NEXT: ; return to shader part epilog
49624957;
49634958; GFX11-LABEL: s_fshl_i64_32:
49644959; GFX11: ; %bb.0:
49654960; GFX11-NEXT: s_mov_b32 s1, s0
4966- ; GFX11-NEXT: s_mov_b32 s0, 0
4967- ; GFX11-NEXT: s_mov_b32 s2, s3
4968- ; GFX11-NEXT: s_mov_b32 s3, s0
4969- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
4970- ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
4961+ ; GFX11-NEXT: s_mov_b32 s0, s3
49714962; GFX11-NEXT: ; return to shader part epilog
49724963 %result = call i64 @llvm.fshl.i64 (i64 %lhs , i64 %rhs , i64 32 )
49734964 ret i64 %result
@@ -6823,56 +6814,50 @@ define amdgpu_ps i128 @s_fshl_i128_65(i128 inreg %lhs, i128 inreg %rhs) {
68236814; GFX6: ; %bb.0:
68246815; GFX6-NEXT: s_lshl_b64 s[2:3], s[0:1], 1
68256816; GFX6-NEXT: s_lshr_b32 s4, s5, 31
6826- ; GFX6-NEXT: s_mov_b32 s5, 0
68276817; GFX6-NEXT: s_lshl_b64 s[0:1], s[6:7], 1
6828- ; GFX6-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
6818+ ; GFX6-NEXT: s_or_b32 s0, s0, s4
68296819; GFX6-NEXT: s_lshr_b32 s4, s7, 31
6830- ; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
6820+ ; GFX6-NEXT: s_or_b32 s2, s2, s4
68316821; GFX6-NEXT: ; return to shader part epilog
68326822;
68336823; GFX8-LABEL: s_fshl_i128_65:
68346824; GFX8: ; %bb.0:
68356825; GFX8-NEXT: s_lshl_b64 s[2:3], s[0:1], 1
68366826; GFX8-NEXT: s_lshr_b32 s4, s5, 31
6837- ; GFX8-NEXT: s_mov_b32 s5, 0
68386827; GFX8-NEXT: s_lshl_b64 s[0:1], s[6:7], 1
6839- ; GFX8-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
6828+ ; GFX8-NEXT: s_or_b32 s0, s0, s4
68406829; GFX8-NEXT: s_lshr_b32 s4, s7, 31
6841- ; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
6830+ ; GFX8-NEXT: s_or_b32 s2, s2, s4
68426831; GFX8-NEXT: ; return to shader part epilog
68436832;
68446833; GFX9-LABEL: s_fshl_i128_65:
68456834; GFX9: ; %bb.0:
68466835; GFX9-NEXT: s_lshl_b64 s[2:3], s[0:1], 1
68476836; GFX9-NEXT: s_lshr_b32 s4, s5, 31
6848- ; GFX9-NEXT: s_mov_b32 s5, 0
68496837; GFX9-NEXT: s_lshl_b64 s[0:1], s[6:7], 1
6850- ; GFX9-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
6838+ ; GFX9-NEXT: s_or_b32 s0, s0, s4
68516839; GFX9-NEXT: s_lshr_b32 s4, s7, 31
6852- ; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
6840+ ; GFX9-NEXT: s_or_b32 s2, s2, s4
68536841; GFX9-NEXT: ; return to shader part epilog
68546842;
68556843; GFX10-LABEL: s_fshl_i128_65:
68566844; GFX10: ; %bb.0:
6857- ; GFX10-NEXT: s_lshr_b32 s2, s5, 31
6858- ; GFX10-NEXT: s_mov_b32 s3, 0
6859- ; GFX10-NEXT: s_lshl_b64 s[4:5], s[6:7], 1
6860- ; GFX10-NEXT: s_lshl_b64 s[8:9], s[0:1], 1
6861- ; GFX10-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5]
6862- ; GFX10-NEXT: s_lshr_b32 s2, s7, 31
6863- ; GFX10-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3]
6845+ ; GFX10-NEXT: s_lshl_b64 s[2:3], s[0:1], 1
6846+ ; GFX10-NEXT: s_lshr_b32 s4, s5, 31
6847+ ; GFX10-NEXT: s_lshl_b64 s[0:1], s[6:7], 1
6848+ ; GFX10-NEXT: s_lshr_b32 s5, s7, 31
6849+ ; GFX10-NEXT: s_or_b32 s0, s0, s4
6850+ ; GFX10-NEXT: s_or_b32 s2, s2, s5
68646851; GFX10-NEXT: ; return to shader part epilog
68656852;
68666853; GFX11-LABEL: s_fshl_i128_65:
68676854; GFX11: ; %bb.0:
6868- ; GFX11-NEXT: s_lshr_b32 s2, s5, 31
6869- ; GFX11-NEXT: s_mov_b32 s3, 0
6870- ; GFX11-NEXT: s_lshl_b64 s[4:5], s[6:7], 1
6871- ; GFX11-NEXT: s_lshl_b64 s[8:9], s[0:1], 1
6872- ; GFX11-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5]
6873- ; GFX11-NEXT: s_lshr_b32 s2, s7, 31
6874- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
6875- ; GFX11-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3]
6855+ ; GFX11-NEXT: s_lshl_b64 s[2:3], s[0:1], 1
6856+ ; GFX11-NEXT: s_lshr_b32 s4, s5, 31
6857+ ; GFX11-NEXT: s_lshl_b64 s[0:1], s[6:7], 1
6858+ ; GFX11-NEXT: s_lshr_b32 s5, s7, 31
6859+ ; GFX11-NEXT: s_or_b32 s0, s0, s4
6860+ ; GFX11-NEXT: s_or_b32 s2, s2, s5
68766861; GFX11-NEXT: ; return to shader part epilog
68776862 %result = call i128 @llvm.fshl.i128 (i128 %lhs , i128 %rhs , i128 65 )
68786863 ret i128 %result
@@ -6885,7 +6870,7 @@ define i128 @v_fshl_i128_65(i128 %lhs, i128 %rhs) {
68856870; GFX6-NEXT: v_lshl_b64 v[2:3], v[0:1], 1
68866871; GFX6-NEXT: v_lshl_b64 v[0:1], v[6:7], 1
68876872; GFX6-NEXT: v_lshrrev_b32_e32 v4, 31, v5
6888- ; GFX6-NEXT: v_or_b32_e32 v0, v4, v0
6873+ ; GFX6-NEXT: v_or_b32_e32 v0, v0, v4
68896874; GFX6-NEXT: v_lshrrev_b32_e32 v4, 31, v7
68906875; GFX6-NEXT: v_or_b32_e32 v2, v2, v4
68916876; GFX6-NEXT: s_setpc_b64 s[30:31]
@@ -6896,7 +6881,7 @@ define i128 @v_fshl_i128_65(i128 %lhs, i128 %rhs) {
68966881; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[0:1]
68976882; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[6:7]
68986883; GFX8-NEXT: v_lshrrev_b32_e32 v4, 31, v5
6899- ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
6884+ ; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
69006885; GFX8-NEXT: v_lshrrev_b32_e32 v4, 31, v7
69016886; GFX8-NEXT: v_or_b32_e32 v2, v2, v4
69026887; GFX8-NEXT: s_setpc_b64 s[30:31]
@@ -6907,7 +6892,7 @@ define i128 @v_fshl_i128_65(i128 %lhs, i128 %rhs) {
69076892; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[0:1]
69086893; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[6:7]
69096894; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v5
6910- ; GFX9-NEXT: v_or_b32_e32 v0, v4, v0
6895+ ; GFX9-NEXT: v_or_b32_e32 v0, v0, v4
69116896; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v7
69126897; GFX9-NEXT: v_or_b32_e32 v2, v2, v4
69136898; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -6919,7 +6904,7 @@ define i128 @v_fshl_i128_65(i128 %lhs, i128 %rhs) {
69196904; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[6:7]
69206905; GFX10-NEXT: v_lshrrev_b32_e32 v4, 31, v5
69216906; GFX10-NEXT: v_lshrrev_b32_e32 v5, 31, v7
6922- ; GFX10-NEXT: v_or_b32_e32 v0, v4, v0
6907+ ; GFX10-NEXT: v_or_b32_e32 v0, v0, v4
69236908; GFX10-NEXT: v_or_b32_e32 v2, v2, v5
69246909; GFX10-NEXT: s_setpc_b64 s[30:31]
69256910;
@@ -6931,7 +6916,7 @@ define i128 @v_fshl_i128_65(i128 %lhs, i128 %rhs) {
69316916; GFX11-NEXT: v_lshrrev_b32_e32 v4, 31, v5
69326917; GFX11-NEXT: v_lshrrev_b32_e32 v5, 31, v7
69336918; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
6934- ; GFX11-NEXT: v_or_b32_e32 v0, v4, v0
6919+ ; GFX11-NEXT: v_or_b32_e32 v0, v0, v4
69356920; GFX11-NEXT: v_or_b32_e32 v2, v2, v5
69366921; GFX11-NEXT: s_setpc_b64 s[30:31]
69376922 %result = call i128 @llvm.fshl.i128 (i128 %lhs , i128 %rhs , i128 65 )
0 commit comments