@@ -38,10 +38,10 @@ define amdgpu_gfx void @s_set_rounding(i32 inreg %rounding) {
38
38
; GFX10: ; %bb.0:
39
39
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40
40
; GFX10-NEXT: s_add_i32 s34, s4, -4
41
- ; GFX10-NEXT: s_min_u32 s34, s4, s34
42
- ; GFX10-NEXT: s_lshl_b32 s36, s34, 2
41
+ ; GFX10-NEXT: s_min_u32 s36, s4, s34
43
42
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
44
43
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
44
+ ; GFX10-NEXT: s_lshl_b32 s36, s36, 2
45
45
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
46
46
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
47
47
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -50,10 +50,10 @@ define amdgpu_gfx void @s_set_rounding(i32 inreg %rounding) {
50
50
; GFX11: ; %bb.0:
51
51
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52
52
; GFX11-NEXT: s_add_i32 s0, s4, -4
53
- ; GFX11-NEXT: s_min_u32 s0, s4, s0
54
- ; GFX11-NEXT: s_lshl_b32 s2, s0, 2
53
+ ; GFX11-NEXT: s_min_u32 s2, s4, s0
55
54
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
56
55
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
56
+ ; GFX11-NEXT: s_lshl_b32 s2, s2, 2
57
57
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
58
58
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
59
59
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -124,29 +124,29 @@ define amdgpu_kernel void @s_set_rounding_kernel(i32 inreg %rounding) {
124
124
;
125
125
; GFX10-LABEL: s_set_rounding_kernel:
126
126
; GFX10: ; %bb.0:
127
- ; GFX10-NEXT: s_load_dword s0, s[4:5], 0x24
127
+ ; GFX10-NEXT: s_load_dword s2, s[4:5], 0x24
128
+ ; GFX10-NEXT: s_mov_b32 s0, 0x1c84a50f
129
+ ; GFX10-NEXT: s_mov_b32 s1, 0xb73e62d9
128
130
; GFX10-NEXT: ;;#ASMSTART
129
131
; GFX10-NEXT: ;;#ASMEND
130
132
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
131
- ; GFX10-NEXT: s_add_i32 s1, s0, -4
132
- ; GFX10-NEXT: s_min_u32 s2, s0, s1
133
- ; GFX10-NEXT: s_mov_b32 s0, 0x1c84a50f
134
- ; GFX10-NEXT: s_mov_b32 s1, 0xb73e62d9
133
+ ; GFX10-NEXT: s_add_i32 s3, s2, -4
134
+ ; GFX10-NEXT: s_min_u32 s2, s2, s3
135
135
; GFX10-NEXT: s_lshl_b32 s2, s2, 2
136
136
; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
137
137
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
138
138
; GFX10-NEXT: s_endpgm
139
139
;
140
140
; GFX11-LABEL: s_set_rounding_kernel:
141
141
; GFX11: ; %bb.0:
142
- ; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x24
142
+ ; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x24
143
+ ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
144
+ ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
143
145
; GFX11-NEXT: ;;#ASMSTART
144
146
; GFX11-NEXT: ;;#ASMEND
145
147
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
146
- ; GFX11-NEXT: s_add_i32 s1, s0, -4
147
- ; GFX11-NEXT: s_min_u32 s2, s0, s1
148
- ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
149
- ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
148
+ ; GFX11-NEXT: s_add_i32 s3, s2, -4
149
+ ; GFX11-NEXT: s_min_u32 s2, s2, s3
150
150
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
151
151
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
152
152
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
@@ -294,10 +294,10 @@ define void @set_rounding_get_rounding() {
294
294
; GFX10-NEXT: s_cmp_lt_u32 s4, 4
295
295
; GFX10-NEXT: s_cselect_b32 s4, s4, s5
296
296
; GFX10-NEXT: s_add_i32 s5, s4, -4
297
- ; GFX10-NEXT: s_min_u32 s4, s4, s5
298
- ; GFX10-NEXT: s_lshl_b32 s6, s4, 2
297
+ ; GFX10-NEXT: s_min_u32 s6, s4, s5
299
298
; GFX10-NEXT: s_mov_b32 s4, 0x1c84a50f
300
299
; GFX10-NEXT: s_mov_b32 s5, 0xb73e62d9
300
+ ; GFX10-NEXT: s_lshl_b32 s6, s6, 2
301
301
; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
302
302
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
303
303
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -315,10 +315,10 @@ define void @set_rounding_get_rounding() {
315
315
; GFX11-NEXT: s_cmp_lt_u32 s0, 4
316
316
; GFX11-NEXT: s_cselect_b32 s0, s0, s1
317
317
; GFX11-NEXT: s_add_i32 s1, s0, -4
318
- ; GFX11-NEXT: s_min_u32 s0, s0, s1
319
- ; GFX11-NEXT: s_lshl_b32 s2, s0, 2
318
+ ; GFX11-NEXT: s_min_u32 s2, s0, s1
320
319
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
321
320
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
321
+ ; GFX11-NEXT: s_lshl_b32 s2, s2, 2
322
322
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
323
323
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
324
324
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -974,10 +974,10 @@ define amdgpu_gfx void @s_set_rounding_i2_signext(i2 signext inreg %rounding) {
974
974
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
975
975
; GFX10-NEXT: s_sext_i32_i16 s34, s4
976
976
; GFX10-NEXT: s_add_i32 s35, s34, -4
977
- ; GFX10-NEXT: s_min_u32 s34, s34, s35
978
- ; GFX10-NEXT: s_lshl_b32 s36, s34, 2
977
+ ; GFX10-NEXT: s_min_u32 s36, s34, s35
979
978
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
980
979
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
980
+ ; GFX10-NEXT: s_lshl_b32 s36, s36, 2
981
981
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
982
982
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
983
983
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -987,10 +987,10 @@ define amdgpu_gfx void @s_set_rounding_i2_signext(i2 signext inreg %rounding) {
987
987
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
988
988
; GFX11-NEXT: s_sext_i32_i16 s0, s4
989
989
; GFX11-NEXT: s_add_i32 s1, s0, -4
990
- ; GFX11-NEXT: s_min_u32 s0, s0, s1
991
- ; GFX11-NEXT: s_lshl_b32 s2, s0, 2
990
+ ; GFX11-NEXT: s_min_u32 s2, s0, s1
992
991
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
993
992
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
993
+ ; GFX11-NEXT: s_lshl_b32 s2, s2, 2
994
994
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
995
995
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
996
996
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -1055,10 +1055,10 @@ define amdgpu_gfx void @s_set_rounding_i3_signext(i3 signext inreg %rounding) {
1055
1055
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1056
1056
; GFX10-NEXT: s_sext_i32_i16 s34, s4
1057
1057
; GFX10-NEXT: s_add_i32 s35, s34, -4
1058
- ; GFX10-NEXT: s_min_u32 s34, s34, s35
1059
- ; GFX10-NEXT: s_lshl_b32 s36, s34, 2
1058
+ ; GFX10-NEXT: s_min_u32 s36, s34, s35
1060
1059
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
1061
1060
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
1061
+ ; GFX10-NEXT: s_lshl_b32 s36, s36, 2
1062
1062
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
1063
1063
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1064
1064
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -1068,10 +1068,10 @@ define amdgpu_gfx void @s_set_rounding_i3_signext(i3 signext inreg %rounding) {
1068
1068
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1069
1069
; GFX11-NEXT: s_sext_i32_i16 s0, s4
1070
1070
; GFX11-NEXT: s_add_i32 s1, s0, -4
1071
- ; GFX11-NEXT: s_min_u32 s0, s0, s1
1072
- ; GFX11-NEXT: s_lshl_b32 s2, s0, 2
1071
+ ; GFX11-NEXT: s_min_u32 s2, s0, s1
1073
1072
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
1074
1073
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
1074
+ ; GFX11-NEXT: s_lshl_b32 s2, s2, 2
1075
1075
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
1076
1076
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
1077
1077
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -1136,10 +1136,10 @@ define amdgpu_gfx void @s_set_rounding_i3_zeroext(i3 zeroext inreg %rounding) {
1136
1136
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1137
1137
; GFX10-NEXT: s_and_b32 s34, 0xffff, s4
1138
1138
; GFX10-NEXT: s_add_i32 s35, s34, -4
1139
- ; GFX10-NEXT: s_min_u32 s34, s34, s35
1140
- ; GFX10-NEXT: s_lshl_b32 s36, s34, 2
1139
+ ; GFX10-NEXT: s_min_u32 s36, s34, s35
1141
1140
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
1142
1141
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
1142
+ ; GFX10-NEXT: s_lshl_b32 s36, s36, 2
1143
1143
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
1144
1144
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1145
1145
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -1149,10 +1149,10 @@ define amdgpu_gfx void @s_set_rounding_i3_zeroext(i3 zeroext inreg %rounding) {
1149
1149
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1150
1150
; GFX11-NEXT: s_and_b32 s0, 0xffff, s4
1151
1151
; GFX11-NEXT: s_add_i32 s1, s0, -4
1152
- ; GFX11-NEXT: s_min_u32 s0, s0, s1
1153
- ; GFX11-NEXT: s_lshl_b32 s2, s0, 2
1152
+ ; GFX11-NEXT: s_min_u32 s2, s0, s1
1154
1153
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
1155
1154
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
1155
+ ; GFX11-NEXT: s_lshl_b32 s2, s2, 2
1156
1156
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
1157
1157
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
1158
1158
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -1569,10 +1569,10 @@ define amdgpu_gfx void @s_set_rounding_select_4_0(i32 inreg %cond) {
1569
1569
; GFX10-NEXT: v_readfirstlane_b32 s34, v0
1570
1570
; GFX10-NEXT: s_lshl_b32 s34, s34, 2
1571
1571
; GFX10-NEXT: s_add_i32 s35, s34, -4
1572
- ; GFX10-NEXT: s_min_u32 s34, s34, s35
1573
- ; GFX10-NEXT: s_lshl_b32 s36, s34, 2
1572
+ ; GFX10-NEXT: s_min_u32 s36, s34, s35
1574
1573
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
1575
1574
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
1575
+ ; GFX10-NEXT: s_lshl_b32 s36, s36, 2
1576
1576
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
1577
1577
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1578
1578
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -1586,10 +1586,10 @@ define amdgpu_gfx void @s_set_rounding_select_4_0(i32 inreg %cond) {
1586
1586
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
1587
1587
; GFX11-NEXT: s_lshl_b32 s0, s0, 2
1588
1588
; GFX11-NEXT: s_add_i32 s1, s0, -4
1589
- ; GFX11-NEXT: s_min_u32 s0, s0, s1
1590
- ; GFX11-NEXT: s_lshl_b32 s2, s0, 2
1589
+ ; GFX11-NEXT: s_min_u32 s2, s0, s1
1591
1590
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
1592
1591
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
1592
+ ; GFX11-NEXT: s_lshl_b32 s2, s2, 2
1593
1593
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
1594
1594
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
1595
1595
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -1634,10 +1634,10 @@ define amdgpu_gfx void @s_set_rounding_select_3_5(i32 inreg %cond) {
1634
1634
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
1635
1635
; GFX10-NEXT: s_cselect_b32 s34, 3, 5
1636
1636
; GFX10-NEXT: s_add_i32 s35, s34, -4
1637
- ; GFX10-NEXT: s_min_u32 s34, s34, s35
1638
- ; GFX10-NEXT: s_lshl_b32 s36, s34, 2
1637
+ ; GFX10-NEXT: s_min_u32 s36, s34, s35
1639
1638
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
1640
1639
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
1640
+ ; GFX10-NEXT: s_lshl_b32 s36, s36, 2
1641
1641
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
1642
1642
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
1643
1643
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -1648,10 +1648,10 @@ define amdgpu_gfx void @s_set_rounding_select_3_5(i32 inreg %cond) {
1648
1648
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
1649
1649
; GFX11-NEXT: s_cselect_b32 s0, 3, 5
1650
1650
; GFX11-NEXT: s_add_i32 s1, s0, -4
1651
- ; GFX11-NEXT: s_min_u32 s0, s0, s1
1652
- ; GFX11-NEXT: s_lshl_b32 s2, s0, 2
1651
+ ; GFX11-NEXT: s_min_u32 s2, s0, s1
1653
1652
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
1654
1653
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
1654
+ ; GFX11-NEXT: s_lshl_b32 s2, s2, 2
1655
1655
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
1656
1656
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
1657
1657
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -1747,13 +1747,13 @@ define amdgpu_kernel void @get_rounding_after_set_rounding_1() {
1747
1747
; GFX10-LABEL: get_rounding_after_set_rounding_1:
1748
1748
; GFX10: ; %bb.0:
1749
1749
; GFX10-NEXT: s_round_mode 0x0
1750
- ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1751
- ; GFX10-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
1752
- ; GFX10-NEXT: v_mov_b32_e32 v1, 0
1753
- ; GFX10-NEXT: s_lshl_b32 s2, s0, 2
1754
1750
; GFX10-NEXT: s_mov_b32 s0, 0xeb24da71
1751
+ ; GFX10-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4)
1755
1752
; GFX10-NEXT: s_mov_b32 s1, 0xc96f385
1753
+ ; GFX10-NEXT: s_lshl_b32 s2, s2, 2
1754
+ ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1756
1755
; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
1756
+ ; GFX10-NEXT: v_mov_b32_e32 v1, 0
1757
1757
; GFX10-NEXT: s_and_b32 s0, s0, 15
1758
1758
; GFX10-NEXT: s_add_i32 s1, s0, 4
1759
1759
; GFX10-NEXT: s_cmp_lt_u32 s0, 4
@@ -1766,11 +1766,11 @@ define amdgpu_kernel void @get_rounding_after_set_rounding_1() {
1766
1766
; GFX11-LABEL: get_rounding_after_set_rounding_1:
1767
1767
; GFX11: ; %bb.0:
1768
1768
; GFX11-NEXT: s_round_mode 0x0
1769
- ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1770
- ; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
1771
- ; GFX11-NEXT: s_lshl_b32 s2, s0, 2
1772
1769
; GFX11-NEXT: s_mov_b32 s0, 0xeb24da71
1770
+ ; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4)
1773
1771
; GFX11-NEXT: s_mov_b32 s1, 0xc96f385
1772
+ ; GFX11-NEXT: s_lshl_b32 s2, s2, 2
1773
+ ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1774
1774
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
1775
1775
; GFX11-NEXT: s_and_b32 s0, s0, 15
1776
1776
; GFX11-NEXT: s_add_i32 s1, s0, 4
0 commit comments