@@ -4959,17 +4959,15 @@ define amdgpu_ps i64 @s_fshl_i64_5(i64 inreg %lhs, i64 inreg %rhs) {
49594959; GCN: ; %bb.0:
49604960; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], 5
49614961; GCN-NEXT: s_lshr_b32 s2, s3, 27
4962- ; GCN-NEXT: s_mov_b32 s3, 0
4963- ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
4962+ ; GCN-NEXT: s_or_b32 s0, s0, s2
49644963; GCN-NEXT: ; return to shader part epilog
49654964;
49664965; GFX11-LABEL: s_fshl_i64_5:
49674966; GFX11: ; %bb.0:
49684967; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 5
49694968; GFX11-NEXT: s_lshr_b32 s2, s3, 27
4970- ; GFX11-NEXT: s_mov_b32 s3, 0
49714969; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
4972- ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
4970+ ; GFX11-NEXT: s_or_b32 s0, s0, s2
49734971; GFX11-NEXT: ; return to shader part epilog
49744972 %result = call i64 @llvm.fshl.i64 (i64 %lhs , i64 %rhs , i64 5 )
49754973 ret i64 %result
@@ -4979,20 +4977,13 @@ define amdgpu_ps i64 @s_fshl_i64_32(i64 inreg %lhs, i64 inreg %rhs) {
49794977; GCN-LABEL: s_fshl_i64_32:
49804978; GCN: ; %bb.0:
49814979; GCN-NEXT: s_mov_b32 s1, s0
4982- ; GCN-NEXT: s_mov_b32 s0, 0
4983- ; GCN-NEXT: s_mov_b32 s2, s3
4984- ; GCN-NEXT: s_mov_b32 s3, s0
4985- ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
4980+ ; GCN-NEXT: s_mov_b32 s0, s3
49864981; GCN-NEXT: ; return to shader part epilog
49874982;
49884983; GFX11-LABEL: s_fshl_i64_32:
49894984; GFX11: ; %bb.0:
49904985; GFX11-NEXT: s_mov_b32 s1, s0
4991- ; GFX11-NEXT: s_mov_b32 s0, 0
4992- ; GFX11-NEXT: s_mov_b32 s2, s3
4993- ; GFX11-NEXT: s_mov_b32 s3, s0
4994- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
4995- ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
4986+ ; GFX11-NEXT: s_mov_b32 s0, s3
49964987; GFX11-NEXT: ; return to shader part epilog
49974988 %result = call i64 @llvm.fshl.i64 (i64 %lhs , i64 %rhs , i64 32 )
49984989 ret i64 %result
@@ -6877,56 +6868,50 @@ define amdgpu_ps i128 @s_fshl_i128_65(i128 inreg %lhs, i128 inreg %rhs) {
68776868; GFX6: ; %bb.0:
68786869; GFX6-NEXT: s_lshl_b64 s[2:3], s[0:1], 1
68796870; GFX6-NEXT: s_lshr_b32 s4, s5, 31
6880- ; GFX6-NEXT: s_mov_b32 s5, 0
68816871; GFX6-NEXT: s_lshl_b64 s[0:1], s[6:7], 1
6882- ; GFX6-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
6872+ ; GFX6-NEXT: s_or_b32 s0, s0, s4
68836873; GFX6-NEXT: s_lshr_b32 s4, s7, 31
6884- ; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
6874+ ; GFX6-NEXT: s_or_b32 s2, s2, s4
68856875; GFX6-NEXT: ; return to shader part epilog
68866876;
68876877; GFX8-LABEL: s_fshl_i128_65:
68886878; GFX8: ; %bb.0:
68896879; GFX8-NEXT: s_lshl_b64 s[2:3], s[0:1], 1
68906880; GFX8-NEXT: s_lshr_b32 s4, s5, 31
6891- ; GFX8-NEXT: s_mov_b32 s5, 0
68926881; GFX8-NEXT: s_lshl_b64 s[0:1], s[6:7], 1
6893- ; GFX8-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
6882+ ; GFX8-NEXT: s_or_b32 s0, s0, s4
68946883; GFX8-NEXT: s_lshr_b32 s4, s7, 31
6895- ; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
6884+ ; GFX8-NEXT: s_or_b32 s2, s2, s4
68966885; GFX8-NEXT: ; return to shader part epilog
68976886;
68986887; GFX9-LABEL: s_fshl_i128_65:
68996888; GFX9: ; %bb.0:
69006889; GFX9-NEXT: s_lshl_b64 s[2:3], s[0:1], 1
69016890; GFX9-NEXT: s_lshr_b32 s4, s5, 31
6902- ; GFX9-NEXT: s_mov_b32 s5, 0
69036891; GFX9-NEXT: s_lshl_b64 s[0:1], s[6:7], 1
6904- ; GFX9-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
6892+ ; GFX9-NEXT: s_or_b32 s0, s0, s4
69056893; GFX9-NEXT: s_lshr_b32 s4, s7, 31
6906- ; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
6894+ ; GFX9-NEXT: s_or_b32 s2, s2, s4
69076895; GFX9-NEXT: ; return to shader part epilog
69086896;
69096897; GFX10-LABEL: s_fshl_i128_65:
69106898; GFX10: ; %bb.0:
6911- ; GFX10-NEXT: s_lshr_b32 s2, s5, 31
6912- ; GFX10-NEXT: s_mov_b32 s3, 0
6913- ; GFX10-NEXT: s_lshl_b64 s[4:5], s[6:7], 1
6914- ; GFX10-NEXT: s_lshl_b64 s[8:9], s[0:1], 1
6915- ; GFX10-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5]
6916- ; GFX10-NEXT: s_lshr_b32 s2, s7, 31
6917- ; GFX10-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3]
6899+ ; GFX10-NEXT: s_lshl_b64 s[2:3], s[0:1], 1
6900+ ; GFX10-NEXT: s_lshr_b32 s4, s5, 31
6901+ ; GFX10-NEXT: s_lshl_b64 s[0:1], s[6:7], 1
6902+ ; GFX10-NEXT: s_lshr_b32 s5, s7, 31
6903+ ; GFX10-NEXT: s_or_b32 s0, s0, s4
6904+ ; GFX10-NEXT: s_or_b32 s2, s2, s5
69186905; GFX10-NEXT: ; return to shader part epilog
69196906;
69206907; GFX11-LABEL: s_fshl_i128_65:
69216908; GFX11: ; %bb.0:
6922- ; GFX11-NEXT: s_lshr_b32 s2, s5, 31
6923- ; GFX11-NEXT: s_mov_b32 s3, 0
6924- ; GFX11-NEXT: s_lshl_b64 s[4:5], s[6:7], 1
6925- ; GFX11-NEXT: s_lshl_b64 s[8:9], s[0:1], 1
6926- ; GFX11-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5]
6927- ; GFX11-NEXT: s_lshr_b32 s2, s7, 31
6928- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
6929- ; GFX11-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3]
6909+ ; GFX11-NEXT: s_lshl_b64 s[2:3], s[0:1], 1
6910+ ; GFX11-NEXT: s_lshr_b32 s4, s5, 31
6911+ ; GFX11-NEXT: s_lshl_b64 s[0:1], s[6:7], 1
6912+ ; GFX11-NEXT: s_lshr_b32 s5, s7, 31
6913+ ; GFX11-NEXT: s_or_b32 s0, s0, s4
6914+ ; GFX11-NEXT: s_or_b32 s2, s2, s5
69306915; GFX11-NEXT: ; return to shader part epilog
69316916 %result = call i128 @llvm.fshl.i128 (i128 %lhs , i128 %rhs , i128 65 )
69326917 ret i128 %result
@@ -6939,7 +6924,7 @@ define i128 @v_fshl_i128_65(i128 %lhs, i128 %rhs) {
69396924; GFX6-NEXT: v_lshl_b64 v[2:3], v[0:1], 1
69406925; GFX6-NEXT: v_lshl_b64 v[0:1], v[6:7], 1
69416926; GFX6-NEXT: v_lshrrev_b32_e32 v4, 31, v5
6942- ; GFX6-NEXT: v_or_b32_e32 v0, v4, v0
6927+ ; GFX6-NEXT: v_or_b32_e32 v0, v0, v4
69436928; GFX6-NEXT: v_lshrrev_b32_e32 v4, 31, v7
69446929; GFX6-NEXT: v_or_b32_e32 v2, v2, v4
69456930; GFX6-NEXT: s_setpc_b64 s[30:31]
@@ -6950,7 +6935,7 @@ define i128 @v_fshl_i128_65(i128 %lhs, i128 %rhs) {
69506935; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[0:1]
69516936; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[6:7]
69526937; GFX8-NEXT: v_lshrrev_b32_e32 v4, 31, v5
6953- ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
6938+ ; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
69546939; GFX8-NEXT: v_lshrrev_b32_e32 v4, 31, v7
69556940; GFX8-NEXT: v_or_b32_e32 v2, v2, v4
69566941; GFX8-NEXT: s_setpc_b64 s[30:31]
@@ -6961,7 +6946,7 @@ define i128 @v_fshl_i128_65(i128 %lhs, i128 %rhs) {
69616946; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[0:1]
69626947; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[6:7]
69636948; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v5
6964- ; GFX9-NEXT: v_or_b32_e32 v0, v4, v0
6949+ ; GFX9-NEXT: v_or_b32_e32 v0, v0, v4
69656950; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v7
69666951; GFX9-NEXT: v_or_b32_e32 v2, v2, v4
69676952; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -6973,7 +6958,7 @@ define i128 @v_fshl_i128_65(i128 %lhs, i128 %rhs) {
69736958; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[6:7]
69746959; GFX10-NEXT: v_lshrrev_b32_e32 v4, 31, v5
69756960; GFX10-NEXT: v_lshrrev_b32_e32 v5, 31, v7
6976- ; GFX10-NEXT: v_or_b32_e32 v0, v4, v0
6961+ ; GFX10-NEXT: v_or_b32_e32 v0, v0, v4
69776962; GFX10-NEXT: v_or_b32_e32 v2, v2, v5
69786963; GFX10-NEXT: s_setpc_b64 s[30:31]
69796964;
@@ -6985,7 +6970,7 @@ define i128 @v_fshl_i128_65(i128 %lhs, i128 %rhs) {
69856970; GFX11-NEXT: v_lshrrev_b32_e32 v4, 31, v5
69866971; GFX11-NEXT: v_lshrrev_b32_e32 v5, 31, v7
69876972; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
6988- ; GFX11-NEXT: v_or_b32_e32 v0, v4, v0
6973+ ; GFX11-NEXT: v_or_b32_e32 v0, v0, v4
69896974; GFX11-NEXT: v_or_b32_e32 v2, v2, v5
69906975; GFX11-NEXT: s_setpc_b64 s[30:31]
69916976 %result = call i128 @llvm.fshl.i128 (i128 %lhs , i128 %rhs , i128 65 )
0 commit comments