Skip to content

Commit 89cb0ee

Browse files
authored
[AMDGPU] Move GCNPreRAOptimizations after MachineScheduler (#116211)
This is in preparation for adding a new optimization to the pass that cares about the order of instructions. The existing optimization does not care, so this just causes minor codegen differences.
1 parent dc3156d commit 89cb0ee

File tree

7 files changed

+64
-64
lines changed

7 files changed

+64
-64
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1434,7 +1434,7 @@ void GCNPassConfig::addOptimizedRegAlloc() {
14341434
insertPass(&RenameIndependentSubregsID, &GCNRewritePartialRegUsesID);
14351435

14361436
if (isPassEnabled(EnablePreRAOptimizations))
1437-
insertPass(&RenameIndependentSubregsID, &GCNPreRAOptimizationsID);
1437+
insertPass(&MachineSchedulerID, &GCNPreRAOptimizationsID);
14381438

14391439
// Allow the scheduler to run before SIWholeQuadMode inserts exec manipulation
14401440
// instructions that cause scheduling barriers.

llvm/test/CodeGen/AMDGPU/llc-pipeline.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -655,8 +655,8 @@
655655
; GCN-O1-OPTS-NEXT: Register Coalescer
656656
; GCN-O1-OPTS-NEXT: Rename Disconnected Subregister Components
657657
; GCN-O1-OPTS-NEXT: Rewrite Partial Register Uses
658-
; GCN-O1-OPTS-NEXT: AMDGPU Pre-RA optimizations
659658
; GCN-O1-OPTS-NEXT: Machine Instruction Scheduler
659+
; GCN-O1-OPTS-NEXT: AMDGPU Pre-RA optimizations
660660
; GCN-O1-OPTS-NEXT: SI Whole Quad Mode
661661
; GCN-O1-OPTS-NEXT: SI optimize exec mask operations pre-RA
662662
; GCN-O1-OPTS-NEXT: AMDGPU Pre-RA Long Branch Reg
@@ -968,8 +968,8 @@
968968
; GCN-O2-NEXT: Register Coalescer
969969
; GCN-O2-NEXT: Rename Disconnected Subregister Components
970970
; GCN-O2-NEXT: Rewrite Partial Register Uses
971-
; GCN-O2-NEXT: AMDGPU Pre-RA optimizations
972971
; GCN-O2-NEXT: Machine Instruction Scheduler
972+
; GCN-O2-NEXT: AMDGPU Pre-RA optimizations
973973
; GCN-O2-NEXT: SI Whole Quad Mode
974974
; GCN-O2-NEXT: SI optimize exec mask operations pre-RA
975975
; GCN-O2-NEXT: SI Form memory clauses
@@ -1295,8 +1295,8 @@
12951295
; GCN-O3-NEXT: Register Coalescer
12961296
; GCN-O3-NEXT: Rename Disconnected Subregister Components
12971297
; GCN-O3-NEXT: Rewrite Partial Register Uses
1298-
; GCN-O3-NEXT: AMDGPU Pre-RA optimizations
12991298
; GCN-O3-NEXT: Machine Instruction Scheduler
1299+
; GCN-O3-NEXT: AMDGPU Pre-RA optimizations
13001300
; GCN-O3-NEXT: SI Whole Quad Mode
13011301
; GCN-O3-NEXT: SI optimize exec mask operations pre-RA
13021302
; GCN-O3-NEXT: SI Form memory clauses

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,10 @@ define amdgpu_cs void @constant_mask_inverse_ballot(ptr addrspace(1) %out) {
7676
; SDAG-LABEL: constant_mask_inverse_ballot:
7777
; SDAG: ; %bb.0: ; %entry
7878
; SDAG-NEXT: s_mov_b32 s0, 0xf8010000
79-
; SDAG-NEXT: s_mov_b32 s2, 0
8079
; SDAG-NEXT: s_mov_b32 s1, 64
81-
; SDAG-NEXT: v_mov_b32_e32 v3, s2
80+
; SDAG-NEXT: s_mov_b32 s2, 0
8281
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
82+
; SDAG-NEXT: v_mov_b32_e32 v3, s2
8383
; SDAG-NEXT: global_store_b64 v[0:1], v[2:3], off
8484
; SDAG-NEXT: s_endpgm
8585
entry:

llvm/test/CodeGen/AMDGPU/llvm.get.rounding.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,10 @@ define i32 @func_rounding() {
4242
; GFX10-LABEL: func_rounding:
4343
; GFX10: ; %bb.0:
4444
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
45-
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4)
46-
; GFX10-NEXT: s_lshl_b32 s6, s4, 2
45+
; GFX10-NEXT: s_getreg_b32 s6, hwreg(HW_REG_MODE, 0, 4)
4746
; GFX10-NEXT: s_mov_b32 s4, 0xeb24da71
4847
; GFX10-NEXT: s_mov_b32 s5, 0xc96f385
48+
; GFX10-NEXT: s_lshl_b32 s6, s6, 2
4949
; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
5050
; GFX10-NEXT: s_and_b32 s4, s4, 15
5151
; GFX10-NEXT: s_add_i32 s5, s4, 4
@@ -57,10 +57,10 @@ define i32 @func_rounding() {
5757
; GFX11-LABEL: func_rounding:
5858
; GFX11: ; %bb.0:
5959
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60-
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
61-
; GFX11-NEXT: s_lshl_b32 s2, s0, 2
60+
; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4)
6261
; GFX11-NEXT: s_mov_b32 s0, 0xeb24da71
6362
; GFX11-NEXT: s_mov_b32 s1, 0xc96f385
63+
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
6464
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
6565
; GFX11-NEXT: s_and_b32 s0, s0, 15
6666
; GFX11-NEXT: s_add_i32 s1, s0, 4

llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll

Lines changed: 45 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,10 @@ define amdgpu_gfx void @s_set_rounding(i32 inreg %rounding) {
3838
; GFX10: ; %bb.0:
3939
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4040
; GFX10-NEXT: s_add_i32 s34, s4, -4
41-
; GFX10-NEXT: s_min_u32 s34, s4, s34
42-
; GFX10-NEXT: s_lshl_b32 s36, s34, 2
41+
; GFX10-NEXT: s_min_u32 s36, s4, s34
4342
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
4443
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
44+
; GFX10-NEXT: s_lshl_b32 s36, s36, 2
4545
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
4646
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
4747
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -50,10 +50,10 @@ define amdgpu_gfx void @s_set_rounding(i32 inreg %rounding) {
5050
; GFX11: ; %bb.0:
5151
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5252
; GFX11-NEXT: s_add_i32 s0, s4, -4
53-
; GFX11-NEXT: s_min_u32 s0, s4, s0
54-
; GFX11-NEXT: s_lshl_b32 s2, s0, 2
53+
; GFX11-NEXT: s_min_u32 s2, s4, s0
5554
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
5655
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
56+
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
5757
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
5858
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
5959
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -124,29 +124,29 @@ define amdgpu_kernel void @s_set_rounding_kernel(i32 inreg %rounding) {
124124
;
125125
; GFX10-LABEL: s_set_rounding_kernel:
126126
; GFX10: ; %bb.0:
127-
; GFX10-NEXT: s_load_dword s0, s[4:5], 0x24
127+
; GFX10-NEXT: s_load_dword s2, s[4:5], 0x24
128+
; GFX10-NEXT: s_mov_b32 s0, 0x1c84a50f
129+
; GFX10-NEXT: s_mov_b32 s1, 0xb73e62d9
128130
; GFX10-NEXT: ;;#ASMSTART
129131
; GFX10-NEXT: ;;#ASMEND
130132
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
131-
; GFX10-NEXT: s_add_i32 s1, s0, -4
132-
; GFX10-NEXT: s_min_u32 s2, s0, s1
133-
; GFX10-NEXT: s_mov_b32 s0, 0x1c84a50f
134-
; GFX10-NEXT: s_mov_b32 s1, 0xb73e62d9
133+
; GFX10-NEXT: s_add_i32 s3, s2, -4
134+
; GFX10-NEXT: s_min_u32 s2, s2, s3
135135
; GFX10-NEXT: s_lshl_b32 s2, s2, 2
136136
; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
137137
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
138138
; GFX10-NEXT: s_endpgm
139139
;
140140
; GFX11-LABEL: s_set_rounding_kernel:
141141
; GFX11: ; %bb.0:
142-
; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x24
142+
; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x24
143+
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
144+
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
143145
; GFX11-NEXT: ;;#ASMSTART
144146
; GFX11-NEXT: ;;#ASMEND
145147
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
146-
; GFX11-NEXT: s_add_i32 s1, s0, -4
147-
; GFX11-NEXT: s_min_u32 s2, s0, s1
148-
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
149-
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
148+
; GFX11-NEXT: s_add_i32 s3, s2, -4
149+
; GFX11-NEXT: s_min_u32 s2, s2, s3
150150
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
151151
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
152152
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
@@ -294,10 +294,10 @@ define void @set_rounding_get_rounding() {
294294
; GFX10-NEXT: s_cmp_lt_u32 s4, 4
295295
; GFX10-NEXT: s_cselect_b32 s4, s4, s5
296296
; GFX10-NEXT: s_add_i32 s5, s4, -4
297-
; GFX10-NEXT: s_min_u32 s4, s4, s5
298-
; GFX10-NEXT: s_lshl_b32 s6, s4, 2
297+
; GFX10-NEXT: s_min_u32 s6, s4, s5
299298
; GFX10-NEXT: s_mov_b32 s4, 0x1c84a50f
300299
; GFX10-NEXT: s_mov_b32 s5, 0xb73e62d9
300+
; GFX10-NEXT: s_lshl_b32 s6, s6, 2
301301
; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
302302
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
303303
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -315,10 +315,10 @@ define void @set_rounding_get_rounding() {
315315
; GFX11-NEXT: s_cmp_lt_u32 s0, 4
316316
; GFX11-NEXT: s_cselect_b32 s0, s0, s1
317317
; GFX11-NEXT: s_add_i32 s1, s0, -4
318-
; GFX11-NEXT: s_min_u32 s0, s0, s1
319-
; GFX11-NEXT: s_lshl_b32 s2, s0, 2
318+
; GFX11-NEXT: s_min_u32 s2, s0, s1
320319
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
321320
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
321+
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
322322
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
323323
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
324324
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -974,10 +974,10 @@ define amdgpu_gfx void @s_set_rounding_i2_signext(i2 signext inreg %rounding) {
974974
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
975975
; GFX10-NEXT: s_sext_i32_i16 s34, s4
976976
; GFX10-NEXT: s_add_i32 s35, s34, -4
977-
; GFX10-NEXT: s_min_u32 s34, s34, s35
978-
; GFX10-NEXT: s_lshl_b32 s36, s34, 2
977+
; GFX10-NEXT: s_min_u32 s36, s34, s35
979978
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
980979
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
980+
; GFX10-NEXT: s_lshl_b32 s36, s36, 2
981981
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
982982
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
983983
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -987,10 +987,10 @@ define amdgpu_gfx void @s_set_rounding_i2_signext(i2 signext inreg %rounding) {
987987
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
988988
; GFX11-NEXT: s_sext_i32_i16 s0, s4
989989
; GFX11-NEXT: s_add_i32 s1, s0, -4
990-
; GFX11-NEXT: s_min_u32 s0, s0, s1
991-
; GFX11-NEXT: s_lshl_b32 s2, s0, 2
990+
; GFX11-NEXT: s_min_u32 s2, s0, s1
992991
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
993992
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
993+
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
994994
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
995995
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
996996
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -1055,10 +1055,10 @@ define amdgpu_gfx void @s_set_rounding_i3_signext(i3 signext inreg %rounding) {
10551055
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10561056
; GFX10-NEXT: s_sext_i32_i16 s34, s4
10571057
; GFX10-NEXT: s_add_i32 s35, s34, -4
1058-
; GFX10-NEXT: s_min_u32 s34, s34, s35
1059-
; GFX10-NEXT: s_lshl_b32 s36, s34, 2
1058+
; GFX10-NEXT: s_min_u32 s36, s34, s35
10601059
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
10611060
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
1061+
; GFX10-NEXT: s_lshl_b32 s36, s36, 2
10621062
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
10631063
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
10641064
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -1068,10 +1068,10 @@ define amdgpu_gfx void @s_set_rounding_i3_signext(i3 signext inreg %rounding) {
10681068
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10691069
; GFX11-NEXT: s_sext_i32_i16 s0, s4
10701070
; GFX11-NEXT: s_add_i32 s1, s0, -4
1071-
; GFX11-NEXT: s_min_u32 s0, s0, s1
1072-
; GFX11-NEXT: s_lshl_b32 s2, s0, 2
1071+
; GFX11-NEXT: s_min_u32 s2, s0, s1
10731072
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
10741073
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
1074+
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
10751075
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
10761076
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
10771077
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -1136,10 +1136,10 @@ define amdgpu_gfx void @s_set_rounding_i3_zeroext(i3 zeroext inreg %rounding) {
11361136
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11371137
; GFX10-NEXT: s_and_b32 s34, 0xffff, s4
11381138
; GFX10-NEXT: s_add_i32 s35, s34, -4
1139-
; GFX10-NEXT: s_min_u32 s34, s34, s35
1140-
; GFX10-NEXT: s_lshl_b32 s36, s34, 2
1139+
; GFX10-NEXT: s_min_u32 s36, s34, s35
11411140
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
11421141
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
1142+
; GFX10-NEXT: s_lshl_b32 s36, s36, 2
11431143
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
11441144
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
11451145
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -1149,10 +1149,10 @@ define amdgpu_gfx void @s_set_rounding_i3_zeroext(i3 zeroext inreg %rounding) {
11491149
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11501150
; GFX11-NEXT: s_and_b32 s0, 0xffff, s4
11511151
; GFX11-NEXT: s_add_i32 s1, s0, -4
1152-
; GFX11-NEXT: s_min_u32 s0, s0, s1
1153-
; GFX11-NEXT: s_lshl_b32 s2, s0, 2
1152+
; GFX11-NEXT: s_min_u32 s2, s0, s1
11541153
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
11551154
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
1155+
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
11561156
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
11571157
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
11581158
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -1569,10 +1569,10 @@ define amdgpu_gfx void @s_set_rounding_select_4_0(i32 inreg %cond) {
15691569
; GFX10-NEXT: v_readfirstlane_b32 s34, v0
15701570
; GFX10-NEXT: s_lshl_b32 s34, s34, 2
15711571
; GFX10-NEXT: s_add_i32 s35, s34, -4
1572-
; GFX10-NEXT: s_min_u32 s34, s34, s35
1573-
; GFX10-NEXT: s_lshl_b32 s36, s34, 2
1572+
; GFX10-NEXT: s_min_u32 s36, s34, s35
15741573
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
15751574
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
1575+
; GFX10-NEXT: s_lshl_b32 s36, s36, 2
15761576
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
15771577
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
15781578
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -1586,10 +1586,10 @@ define amdgpu_gfx void @s_set_rounding_select_4_0(i32 inreg %cond) {
15861586
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
15871587
; GFX11-NEXT: s_lshl_b32 s0, s0, 2
15881588
; GFX11-NEXT: s_add_i32 s1, s0, -4
1589-
; GFX11-NEXT: s_min_u32 s0, s0, s1
1590-
; GFX11-NEXT: s_lshl_b32 s2, s0, 2
1589+
; GFX11-NEXT: s_min_u32 s2, s0, s1
15911590
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
15921591
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
1592+
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
15931593
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
15941594
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
15951595
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -1634,10 +1634,10 @@ define amdgpu_gfx void @s_set_rounding_select_3_5(i32 inreg %cond) {
16341634
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
16351635
; GFX10-NEXT: s_cselect_b32 s34, 3, 5
16361636
; GFX10-NEXT: s_add_i32 s35, s34, -4
1637-
; GFX10-NEXT: s_min_u32 s34, s34, s35
1638-
; GFX10-NEXT: s_lshl_b32 s36, s34, 2
1637+
; GFX10-NEXT: s_min_u32 s36, s34, s35
16391638
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
16401639
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
1640+
; GFX10-NEXT: s_lshl_b32 s36, s36, 2
16411641
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
16421642
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
16431643
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -1648,10 +1648,10 @@ define amdgpu_gfx void @s_set_rounding_select_3_5(i32 inreg %cond) {
16481648
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
16491649
; GFX11-NEXT: s_cselect_b32 s0, 3, 5
16501650
; GFX11-NEXT: s_add_i32 s1, s0, -4
1651-
; GFX11-NEXT: s_min_u32 s0, s0, s1
1652-
; GFX11-NEXT: s_lshl_b32 s2, s0, 2
1651+
; GFX11-NEXT: s_min_u32 s2, s0, s1
16531652
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
16541653
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
1654+
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
16551655
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
16561656
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
16571657
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -1747,13 +1747,13 @@ define amdgpu_kernel void @get_rounding_after_set_rounding_1() {
17471747
; GFX10-LABEL: get_rounding_after_set_rounding_1:
17481748
; GFX10: ; %bb.0:
17491749
; GFX10-NEXT: s_round_mode 0x0
1750-
; GFX10-NEXT: v_mov_b32_e32 v0, 0
1751-
; GFX10-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
1752-
; GFX10-NEXT: v_mov_b32_e32 v1, 0
1753-
; GFX10-NEXT: s_lshl_b32 s2, s0, 2
17541750
; GFX10-NEXT: s_mov_b32 s0, 0xeb24da71
1751+
; GFX10-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4)
17551752
; GFX10-NEXT: s_mov_b32 s1, 0xc96f385
1753+
; GFX10-NEXT: s_lshl_b32 s2, s2, 2
1754+
; GFX10-NEXT: v_mov_b32_e32 v0, 0
17561755
; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
1756+
; GFX10-NEXT: v_mov_b32_e32 v1, 0
17571757
; GFX10-NEXT: s_and_b32 s0, s0, 15
17581758
; GFX10-NEXT: s_add_i32 s1, s0, 4
17591759
; GFX10-NEXT: s_cmp_lt_u32 s0, 4
@@ -1766,11 +1766,11 @@ define amdgpu_kernel void @get_rounding_after_set_rounding_1() {
17661766
; GFX11-LABEL: get_rounding_after_set_rounding_1:
17671767
; GFX11: ; %bb.0:
17681768
; GFX11-NEXT: s_round_mode 0x0
1769-
; GFX11-NEXT: v_mov_b32_e32 v0, 0
1770-
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
1771-
; GFX11-NEXT: s_lshl_b32 s2, s0, 2
17721769
; GFX11-NEXT: s_mov_b32 s0, 0xeb24da71
1770+
; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4)
17731771
; GFX11-NEXT: s_mov_b32 s1, 0xc96f385
1772+
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
1773+
; GFX11-NEXT: v_mov_b32_e32 v0, 0
17741774
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
17751775
; GFX11-NEXT: s_and_b32 s0, s0, 15
17761776
; GFX11-NEXT: s_add_i32 s1, s0, 4

llvm/test/CodeGen/AMDGPU/offset-split-global.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3185,8 +3185,8 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split0(p
31853185
; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0:
31863186
; GFX12-SDAG: ; %bb.0:
31873187
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
3188-
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
31893188
; GFX12-SDAG-NEXT: s_movk_i32 s2, 0x7ff
3189+
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
31903190
; GFX12-SDAG-NEXT: s_brev_b32 s3, 1
31913191
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
31923192
; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
@@ -3253,8 +3253,8 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split1(p
32533253
; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1:
32543254
; GFX12-SDAG: ; %bb.0:
32553255
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
3256-
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
32573256
; GFX12-SDAG-NEXT: s_movk_i32 s2, 0x800
3257+
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
32583258
; GFX12-SDAG-NEXT: s_brev_b32 s3, 1
32593259
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
32603260
; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
@@ -3321,8 +3321,8 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split0(p
33213321
; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0:
33223322
; GFX12-SDAG: ; %bb.0:
33233323
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
3324-
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
33253324
; GFX12-SDAG-NEXT: s_movk_i32 s2, 0xfff
3325+
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
33263326
; GFX12-SDAG-NEXT: s_brev_b32 s3, 1
33273327
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
33283328
; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
@@ -3389,8 +3389,8 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split1(p
33893389
; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1:
33903390
; GFX12-SDAG: ; %bb.0:
33913391
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
3392-
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
33933392
; GFX12-SDAG-NEXT: s_movk_i32 s2, 0x1000
3393+
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
33943394
; GFX12-SDAG-NEXT: s_brev_b32 s3, 1
33953395
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
33963396
; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
@@ -3457,8 +3457,8 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split0(p
34573457
; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0:
34583458
; GFX12-SDAG: ; %bb.0:
34593459
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
3460-
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
34613460
; GFX12-SDAG-NEXT: s_movk_i32 s2, 0x1fff
3461+
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
34623462
; GFX12-SDAG-NEXT: s_brev_b32 s3, 1
34633463
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
34643464
; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
@@ -3525,8 +3525,8 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split1(p
35253525
; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1:
35263526
; GFX12-SDAG: ; %bb.0:
35273527
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
3528-
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
35293528
; GFX12-SDAG-NEXT: s_movk_i32 s2, 0x2000
3529+
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
35303530
; GFX12-SDAG-NEXT: s_brev_b32 s3, 1
35313531
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
35323532
; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]

0 commit comments

Comments
 (0)