Skip to content

Commit 5f07ff3

Browse files
committed
Fix tests and remove arg from TII.rematerialize
1 parent b5f0950 commit 5f07ff3

File tree

6 files changed

+96
-56
lines changed

6 files changed

+96
-56
lines changed

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2288,8 +2288,7 @@ void PreRARematStage::rematerialize(const RematReg &Remat,
22882288

22892289
// Rematerialize the register in the region where it is used.
22902290
MachineBasicBlock::iterator InsertPos = Remat.UseMI;
2291-
TII->reMaterialize(*InsertPos->getParent(), InsertPos, NewReg, 0, DefMI,
2292-
*DAG.TRI);
2291+
TII->reMaterialize(*InsertPos->getParent(), InsertPos, NewReg, 0, DefMI);
22932292
MachineInstr *RematMI = &*std::prev(InsertPos);
22942293
Remat.UseMI->substituteRegister(Reg, NewReg, 0, *DAG.TRI);
22952294
Remat.insertMI(Remat.UseRegion, RematMI, DAG);
@@ -2356,7 +2355,7 @@ void PreRARematStage::rollback(const RollbackInfo &Rollback,
23562355
// rematerialized exactly in the same position as originally within the
23572356
// region, but it should not matter much.
23582357
MachineBasicBlock::iterator InsertPos(DAG.Regions[Remat->DefRegion].second);
2359-
TII->reMaterialize(*MBB, InsertPos, NewReg, 0, *RematMI, *DAG.TRI);
2358+
TII->reMaterialize(*MBB, InsertPos, NewReg, 0, *RematMI);
23602359
MachineInstr *ReRematMI = &*std::prev(InsertPos);
23612360
REMAT_DEBUG(dbgs() << '[' << Remat->DefRegion << "] Re-rematerialized as "
23622361
<< *ReRematMI);

llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -450,8 +450,7 @@ define amdgpu_kernel void @memcpy_known(ptr addrspace(7) %src, ptr addrspace(7)
450450
; GISEL-GFX942-NEXT: v_add_u32_e32 v63, s12, v0
451451
; GISEL-GFX942-NEXT: v_add_u32_e32 v0, 0x100, v0
452452
; GISEL-GFX942-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
453-
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0)
454-
; GISEL-GFX942-NEXT: scratch_store_dwordx4 off, a[0:3], off ; 16-byte Folded Spill
453+
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
455454
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v63, s[4:7], 0 offen
456455
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
457456
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[6:9], v63, s[4:7], 0 offen offset:16
@@ -992,8 +991,7 @@ define amdgpu_kernel void @memcpy_known_medium(ptr addrspace(7) %src, ptr addrsp
992991
; GISEL-GFX942-NEXT: v_add_u32_e32 v63, s12, v0
993992
; GISEL-GFX942-NEXT: v_add_u32_e32 v0, 0x100, v0
994993
; GISEL-GFX942-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
995-
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0)
996-
; GISEL-GFX942-NEXT: scratch_store_dwordx4 off, a[0:3], off ; 16-byte Folded Spill
994+
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
997995
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v63, s[4:7], 0 offen
998996
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
999997
; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[6:9], v63, s[4:7], 0 offen offset:16

llvm/test/CodeGen/AMDGPU/call-waitcnt.ll

Lines changed: 70 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -72,46 +72,82 @@ define amdgpu_kernel void @call_memory_no_dep(ptr addrspace(1) %ptr, i32) #0 {
7272

7373
; Should not wait after the call before memory
7474
define amdgpu_kernel void @call_no_wait_after_call(ptr addrspace(1) %ptr, i32) #0 {
75-
; GCN-LABEL: call_no_wait_after_call:
76-
; GCN: ; %bb.0:
77-
; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11
78-
; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
79-
; GCN-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0
80-
; GCN-NEXT: s_add_u32 s0, s0, s11
81-
; GCN-NEXT: s_addc_u32 s1, s1, 0
82-
; GCN-NEXT: s_getpc_b64 s[8:9]
83-
; GCN-NEXT: s_add_u32 s8, s8, func@rel32@lo+4
84-
; GCN-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12
85-
; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
86-
; GCN-NEXT: v_mov_b32_e32 v0, 0
87-
; GCN-NEXT: s_mov_b32 s32, 0
88-
; GCN-NEXT: v_mov_b32_e32 v40, 0
89-
; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
90-
; GCN-NEXT: global_store_dword v40, v40, s[34:35]
91-
; GCN-NEXT: s_endpgm
75+
; SDAG-LABEL: call_no_wait_after_call:
76+
; SDAG: ; %bb.0:
77+
; SDAG-NEXT: s_add_u32 flat_scratch_lo, s8, s11
78+
; SDAG-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
79+
; SDAG-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0
80+
; SDAG-NEXT: s_add_u32 s0, s0, s11
81+
; SDAG-NEXT: s_addc_u32 s1, s1, 0
82+
; SDAG-NEXT: s_getpc_b64 s[8:9]
83+
; SDAG-NEXT: s_add_u32 s8, s8, func@rel32@lo+4
84+
; SDAG-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12
85+
; SDAG-NEXT: s_mov_b64 s[6:7], s[4:5]
86+
; SDAG-NEXT: v_mov_b32_e32 v0, 0
87+
; SDAG-NEXT: s_mov_b32 s32, 0
88+
; SDAG-NEXT: v_mov_b32_e32 v40, 0
89+
; SDAG-NEXT: s_swappc_b64 s[30:31], s[8:9]
90+
; SDAG-NEXT: global_store_dword v40, v40, s[34:35]
91+
; SDAG-NEXT: s_endpgm
92+
;
93+
; GISEL-LABEL: call_no_wait_after_call:
94+
; GISEL: ; %bb.0:
95+
; GISEL-NEXT: s_add_u32 flat_scratch_lo, s8, s11
96+
; GISEL-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
97+
; GISEL-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0
98+
; GISEL-NEXT: s_add_u32 s0, s0, s11
99+
; GISEL-NEXT: s_addc_u32 s1, s1, 0
100+
; GISEL-NEXT: s_getpc_b64 s[8:9]
101+
; GISEL-NEXT: s_add_u32 s8, s8, func@rel32@lo+4
102+
; GISEL-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12
103+
; GISEL-NEXT: v_mov_b32_e32 v0, 0
104+
; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
105+
; GISEL-NEXT: s_mov_b32 s32, 0
106+
; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9]
107+
; GISEL-NEXT: v_mov_b32_e32 v0, 0
108+
; GISEL-NEXT: global_store_dword v0, v0, s[34:35]
109+
; GISEL-NEXT: s_endpgm
92110
call void @func(i32 0)
93111
store i32 0, ptr addrspace(1) %ptr
94112
ret void
95113
}
96114

97115
define amdgpu_kernel void @call_no_wait_after_call_return_val(ptr addrspace(1) %ptr, i32) #0 {
98-
; GCN-LABEL: call_no_wait_after_call_return_val:
99-
; GCN: ; %bb.0:
100-
; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11
101-
; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
102-
; GCN-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0
103-
; GCN-NEXT: s_add_u32 s0, s0, s11
104-
; GCN-NEXT: s_addc_u32 s1, s1, 0
105-
; GCN-NEXT: s_getpc_b64 s[8:9]
106-
; GCN-NEXT: s_add_u32 s8, s8, func.return@rel32@lo+4
107-
; GCN-NEXT: s_addc_u32 s9, s9, func.return@rel32@hi+12
108-
; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
109-
; GCN-NEXT: v_mov_b32_e32 v0, 0
110-
; GCN-NEXT: s_mov_b32 s32, 0
111-
; GCN-NEXT: v_mov_b32_e32 v40, 0
112-
; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
113-
; GCN-NEXT: global_store_dword v40, v0, s[34:35]
114-
; GCN-NEXT: s_endpgm
116+
; SDAG-LABEL: call_no_wait_after_call_return_val:
117+
; SDAG: ; %bb.0:
118+
; SDAG-NEXT: s_add_u32 flat_scratch_lo, s8, s11
119+
; SDAG-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
120+
; SDAG-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0
121+
; SDAG-NEXT: s_add_u32 s0, s0, s11
122+
; SDAG-NEXT: s_addc_u32 s1, s1, 0
123+
; SDAG-NEXT: s_getpc_b64 s[8:9]
124+
; SDAG-NEXT: s_add_u32 s8, s8, func.return@rel32@lo+4
125+
; SDAG-NEXT: s_addc_u32 s9, s9, func.return@rel32@hi+12
126+
; SDAG-NEXT: s_mov_b64 s[6:7], s[4:5]
127+
; SDAG-NEXT: v_mov_b32_e32 v0, 0
128+
; SDAG-NEXT: s_mov_b32 s32, 0
129+
; SDAG-NEXT: v_mov_b32_e32 v40, 0
130+
; SDAG-NEXT: s_swappc_b64 s[30:31], s[8:9]
131+
; SDAG-NEXT: global_store_dword v40, v0, s[34:35]
132+
; SDAG-NEXT: s_endpgm
133+
;
134+
; GISEL-LABEL: call_no_wait_after_call_return_val:
135+
; GISEL: ; %bb.0:
136+
; GISEL-NEXT: s_add_u32 flat_scratch_lo, s8, s11
137+
; GISEL-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
138+
; GISEL-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0
139+
; GISEL-NEXT: s_add_u32 s0, s0, s11
140+
; GISEL-NEXT: s_addc_u32 s1, s1, 0
141+
; GISEL-NEXT: s_getpc_b64 s[8:9]
142+
; GISEL-NEXT: s_add_u32 s8, s8, func.return@rel32@lo+4
143+
; GISEL-NEXT: s_addc_u32 s9, s9, func.return@rel32@hi+12
144+
; GISEL-NEXT: v_mov_b32_e32 v0, 0
145+
; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
146+
; GISEL-NEXT: s_mov_b32 s32, 0
147+
; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9]
148+
; GISEL-NEXT: v_mov_b32_e32 v1, 0
149+
; GISEL-NEXT: global_store_dword v1, v0, s[34:35]
150+
; GISEL-NEXT: s_endpgm
115151
%rv = call i32 @func.return(i32 0)
116152
store i32 %rv, ptr addrspace(1) %ptr
117153
ret void

llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1880,7 +1880,6 @@ define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) {
18801880
; GFX9-SDAG-NEXT: .LBB14_6: ; %bb.1
18811881
; GFX9-SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
18821882
; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15
1883-
; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 2
18841883
; GFX9-SDAG-NEXT: v_and_b32_e32 v0, -16, v0
18851884
; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec
18861885
; GFX9-SDAG-NEXT: .LBB14_7: ; =>This Inner Loop Header: Depth=1
@@ -1898,7 +1897,8 @@ define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) {
18981897
; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 1
18991898
; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s33
19001899
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1901-
; GFX9-SDAG-NEXT: buffer_store_dword v1, off, s[0:3], s4
1900+
; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 2
1901+
; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s4
19021902
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
19031903
; GFX9-SDAG-NEXT: s_mov_b32 s32, s34
19041904
; GFX9-SDAG-NEXT: s_mov_b32 s34, s14
@@ -2044,31 +2044,30 @@ define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) {
20442044
; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v1
20452045
; GFX11-SDAG-NEXT: .LBB14_6: ; %bb.1
20462046
; GFX11-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s1
2047-
; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, v0, 2, 15
2048-
; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 2
2047+
; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15
20492048
; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo
2050-
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
2051-
; GFX11-SDAG-NEXT: v_and_b32_e32 v1, -16, v1
2049+
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
2050+
; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0
20522051
; GFX11-SDAG-NEXT: .LBB14_7: ; =>This Inner Loop Header: Depth=1
20532052
; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1
20542053
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
2055-
; GFX11-SDAG-NEXT: v_readlane_b32 s3, v1, s2
2054+
; GFX11-SDAG-NEXT: v_readlane_b32 s3, v0, s2
20562055
; GFX11-SDAG-NEXT: s_bitset0_b32 s1, s2
20572056
; GFX11-SDAG-NEXT: s_max_u32 s0, s0, s3
20582057
; GFX11-SDAG-NEXT: s_cmp_lg_u32 s1, 0
20592058
; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB14_7
20602059
; GFX11-SDAG-NEXT: ; %bb.8:
20612060
; GFX11-SDAG-NEXT: s_mov_b32 s1, s32
2062-
; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 1
2063-
; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, s0, 5, s1
2064-
; GFX11-SDAG-NEXT: scratch_store_b32 off, v2, s33 dlc
2061+
; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2
2062+
; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s0, 5, s1
2063+
; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s33 dlc
20652064
; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
2066-
; GFX11-SDAG-NEXT: scratch_store_b32 off, v0, s1 dlc
2065+
; GFX11-SDAG-NEXT: scratch_store_b32 off, v2, s1 dlc
20672066
; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
2068-
; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v1
2067+
; GFX11-SDAG-NEXT: s_mov_b32 s33, s7
2068+
; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0
20692069
; GFX11-SDAG-NEXT: s_mov_b32 s32, s34
20702070
; GFX11-SDAG-NEXT: s_mov_b32 s34, s8
2071-
; GFX11-SDAG-NEXT: s_mov_b32 s33, s7
20722071
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
20732072
;
20742073
; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_multiple_allocas:

llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,16 @@
44
define amdgpu_gs i32 @main() {
55
; CHECK-LABEL: main:
66
; CHECK: ; %bb.0: ; %bb
7+
; CHECK-NEXT: s_bitcmp1_b32 0, 0
78
; CHECK-NEXT: s_mov_b32 s0, 0
8-
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
9+
; CHECK-NEXT: s_cselect_b32 s1, -1, 0
10+
; CHECK-NEXT: s_or_saveexec_b32 s2, -1
11+
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s1
12+
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
13+
; CHECK-NEXT: v_readfirstlane_b32 s1, v0
14+
; CHECK-NEXT: s_mov_b32 exec_lo, s2
15+
; CHECK-NEXT: s_or_b32 s0, s0, s1
16+
; CHECK-NEXT: s_wait_alu 0xfffe
917
; CHECK-NEXT: s_bitcmp1_b32 s0, 0
1018
; CHECK-NEXT: s_cselect_b32 s0, -1, 0
1119
; CHECK-NEXT: s_wait_alu 0xfffe

llvm/test/CodeGen/AMDGPU/uniform-alignbit.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,4 @@ entry:
3535
declare void @llvm.amdgcn.sched.barrier(i32 immarg) #0
3636

3737
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read)
38-
declare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32 immarg) #1
38+
declare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32 immarg) #1

0 commit comments

Comments
 (0)