Skip to content

Commit 736c9c6

Browse files
authored
AMDGPU: Add tests for atomics with AGPR operands (#155820)
The handling of AGPR vs. VGPR operand restrictions is broken and results in bugs like #155777 and missed optimizations. Add some baseline tests for future improvements.
1 parent c3c24be commit 736c9c6

File tree

5 files changed

+4689
-0
lines changed

5 files changed

+4689
-0
lines changed
Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck %s
3+
4+
define void @ds_atomic_cmpxchg_i32_ret_av_av__av(ptr addrspace(3) %ptr) #0 {
5+
; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_av_av__av:
6+
; CHECK: ; %bb.0:
7+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8+
; CHECK-NEXT: ;;#ASMSTART
9+
; CHECK-NEXT: ; def v1
10+
; CHECK-NEXT: ;;#ASMEND
11+
; CHECK-NEXT: ;;#ASMSTART
12+
; CHECK-NEXT: ; def v2
13+
; CHECK-NEXT: ;;#ASMEND
14+
; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v1, v2 offset:40
15+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
16+
; CHECK-NEXT: ;;#ASMSTART
17+
; CHECK-NEXT: ; use v0
18+
; CHECK-NEXT: ;;#ASMEND
19+
; CHECK-NEXT: s_setpc_b64 s[30:31]
20+
%gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
21+
%data0 = call i32 asm "; def $0", "=^VA"()
22+
%data1 = call i32 asm "; def $0", "=^VA"()
23+
%pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
24+
%result = extractvalue { i32, i1 } %pair, 0
25+
call void asm "; use $0", "^VA"(i32 %result)
26+
ret void
27+
}
28+
29+
define void @ds_atomic_cmpxchg_i32_ret_av_av__v(ptr addrspace(3) %ptr) #0 {
30+
; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_av_av__v:
31+
; CHECK: ; %bb.0:
32+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33+
; CHECK-NEXT: ;;#ASMSTART
34+
; CHECK-NEXT: ; def v1
35+
; CHECK-NEXT: ;;#ASMEND
36+
; CHECK-NEXT: ;;#ASMSTART
37+
; CHECK-NEXT: ; def v2
38+
; CHECK-NEXT: ;;#ASMEND
39+
; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v1, v2 offset:40
40+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
41+
; CHECK-NEXT: ;;#ASMSTART
42+
; CHECK-NEXT: ; use v0
43+
; CHECK-NEXT: ;;#ASMEND
44+
; CHECK-NEXT: s_setpc_b64 s[30:31]
45+
%gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
46+
%data0 = call i32 asm "; def $0", "=^VA"()
47+
%data1 = call i32 asm "; def $0", "=^VA"()
48+
%pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
49+
%result = extractvalue { i32, i1 } %pair, 0
50+
call void asm "; use $0", "v"(i32 %result)
51+
ret void
52+
}
53+
54+
define void @ds_atomic_cmpxchg_i32_ret_av_av__a(ptr addrspace(3) %ptr) #0 {
55+
; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_av_av__a:
56+
; CHECK: ; %bb.0:
57+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
58+
; CHECK-NEXT: ;;#ASMSTART
59+
; CHECK-NEXT: ; def v1
60+
; CHECK-NEXT: ;;#ASMEND
61+
; CHECK-NEXT: ;;#ASMSTART
62+
; CHECK-NEXT: ; def v2
63+
; CHECK-NEXT: ;;#ASMEND
64+
; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v1, v2 offset:40
65+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
66+
; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
67+
; CHECK-NEXT: ;;#ASMSTART
68+
; CHECK-NEXT: ; use a0
69+
; CHECK-NEXT: ;;#ASMEND
70+
; CHECK-NEXT: s_setpc_b64 s[30:31]
71+
%gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
72+
%data0 = call i32 asm "; def $0", "=^VA"()
73+
%data1 = call i32 asm "; def $0", "=^VA"()
74+
%pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
75+
%result = extractvalue { i32, i1 } %pair, 0
76+
call void asm "; use $0", "a"(i32 %result)
77+
ret void
78+
}
79+
80+
; FIXME: Broken
81+
; define void @ds_atomic_cmpxchg_i32_ret_a_a__a(ptr addrspace(3) %ptr) #0 {
82+
; %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
83+
; %data0 = call i32 asm "; def $0", "=a"()
84+
; %data1 = call i32 asm "; def $0", "=a"()
85+
; %pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
86+
; %result = extractvalue { i32, i1 } %pair, 0
87+
; call void asm "; use $0", "a"(i32 %result)
88+
; ret void
89+
; }
90+
91+
; FIXME: Broken
92+
; define void @ds_atomic_cmpxchg_i32_ret_a_a__v(ptr addrspace(3) %ptr) #0 {
93+
; %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
94+
; %data0 = call i32 asm "; def $0", "=a"()
95+
; %data1 = call i32 asm "; def $0", "=a"()
96+
; %pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
97+
; %result = extractvalue { i32, i1 } %pair, 0
98+
; call void asm "; use $0", "v"(i32 %result)
99+
; ret void
100+
; }
101+
102+
; FIXME: Broken
103+
; define void @ds_atomic_cmpxchg_i32_ret_v_a__v(ptr addrspace(3) %ptr) #0 {
104+
; %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
105+
; %data0 = call i32 asm "; def $0", "=v"()
106+
; %data1 = call i32 asm "; def $0", "=a"()
107+
; %pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
108+
; %result = extractvalue { i32, i1 } %pair, 0
109+
; call void asm "; use $0", "v"(i32 %result)
110+
; ret void
111+
; }
112+
113+
; FIXME: Broken
114+
; define void @ds_atomic_cmpxchg_i32_ret_a_v__v(ptr addrspace(3) %ptr) #0 {
115+
; %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
116+
; %data0 = call i32 asm "; def $0", "=a"()
117+
; %data1 = call i32 asm "; def $0", "=v"()
118+
; %pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
119+
; %result = extractvalue { i32, i1 } %pair, 0
120+
; call void asm "; use $0", "v"(i32 %result)
121+
; ret void
122+
; }
123+
124+
define void @ds_atomic_cmpxchg_i32_ret_v_v__a(ptr addrspace(3) %ptr) #0 {
125+
; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_v_v__a:
126+
; CHECK: ; %bb.0:
127+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
128+
; CHECK-NEXT: ;;#ASMSTART
129+
; CHECK-NEXT: ; def v1
130+
; CHECK-NEXT: ;;#ASMEND
131+
; CHECK-NEXT: ;;#ASMSTART
132+
; CHECK-NEXT: ; def v2
133+
; CHECK-NEXT: ;;#ASMEND
134+
; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v1, v2 offset:40
135+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
136+
; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
137+
; CHECK-NEXT: ;;#ASMSTART
138+
; CHECK-NEXT: ; use a0
139+
; CHECK-NEXT: ;;#ASMEND
140+
; CHECK-NEXT: s_setpc_b64 s[30:31]
141+
%gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
142+
%data0 = call i32 asm "; def $0", "=v"()
143+
%data1 = call i32 asm "; def $0", "=v"()
144+
%pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
145+
%result = extractvalue { i32, i1 } %pair, 0
146+
call void asm "; use $0", "a"(i32 %result)
147+
ret void
148+
}
149+
150+
define void @ds_atomic_cmpxchg_i32_ret_av_v__av(ptr addrspace(3) %ptr) #0 {
151+
; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_av_v__av:
152+
; CHECK: ; %bb.0:
153+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
154+
; CHECK-NEXT: ;;#ASMSTART
155+
; CHECK-NEXT: ; def v1
156+
; CHECK-NEXT: ;;#ASMEND
157+
; CHECK-NEXT: ;;#ASMSTART
158+
; CHECK-NEXT: ; def v2
159+
; CHECK-NEXT: ;;#ASMEND
160+
; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v1, v2 offset:40
161+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
162+
; CHECK-NEXT: ;;#ASMSTART
163+
; CHECK-NEXT: ; use v0
164+
; CHECK-NEXT: ;;#ASMEND
165+
; CHECK-NEXT: s_setpc_b64 s[30:31]
166+
%gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
167+
%data0 = call i32 asm "; def $0", "=^VA"()
168+
%data1 = call i32 asm "; def $0", "=v"()
169+
%pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
170+
%result = extractvalue { i32, i1 } %pair, 0
171+
call void asm "; use $0", "^VA"(i32 %result)
172+
ret void
173+
}
174+
175+
define void @ds_atomic_cmpxchg_i32_ret_v_av__av(ptr addrspace(3) %ptr) #0 {
176+
; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_v_av__av:
177+
; CHECK: ; %bb.0:
178+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
179+
; CHECK-NEXT: ;;#ASMSTART
180+
; CHECK-NEXT: ; def v1
181+
; CHECK-NEXT: ;;#ASMEND
182+
; CHECK-NEXT: ;;#ASMSTART
183+
; CHECK-NEXT: ; def v2
184+
; CHECK-NEXT: ;;#ASMEND
185+
; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v1, v2 offset:40
186+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
187+
; CHECK-NEXT: ;;#ASMSTART
188+
; CHECK-NEXT: ; use v0
189+
; CHECK-NEXT: ;;#ASMEND
190+
; CHECK-NEXT: s_setpc_b64 s[30:31]
191+
%gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
192+
%data0 = call i32 asm "; def $0", "=v"()
193+
%data1 = call i32 asm "; def $0", "=^VA"()
194+
%pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
195+
%result = extractvalue { i32, i1 } %pair, 0
196+
call void asm "; use $0", "^VA"(i32 %result)
197+
ret void
198+
}
199+
200+
; FIXME: Broken
201+
; define void @ds_atomic_cmpxchg_i32_ret_av_a__av(ptr addrspace(3) %ptr) #0 {
202+
; %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
203+
; %data0 = call i32 asm "; def $0", "=^VA"()
204+
; %data1 = call i32 asm "; def $0", "=a"()
205+
; %pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
206+
; %result = extractvalue { i32, i1 } %pair, 0
207+
; call void asm "; use $0", "^VA"(i32 %result)
208+
; ret void
209+
; }
210+
211+
define void @ds_atomic_cmpxchg_i32_ret_a_av__av(ptr addrspace(3) %ptr) #0 {
212+
; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_a_av__av:
213+
; CHECK: ; %bb.0:
214+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
215+
; CHECK-NEXT: ;;#ASMSTART
216+
; CHECK-NEXT: ; def a0
217+
; CHECK-NEXT: ;;#ASMEND
218+
; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
219+
; CHECK-NEXT: ;;#ASMSTART
220+
; CHECK-NEXT: ; def v1
221+
; CHECK-NEXT: ;;#ASMEND
222+
; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v2, v1 offset:40
223+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
224+
; CHECK-NEXT: ;;#ASMSTART
225+
; CHECK-NEXT: ; use v0
226+
; CHECK-NEXT: ;;#ASMEND
227+
; CHECK-NEXT: s_setpc_b64 s[30:31]
228+
%gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
229+
%data0 = call i32 asm "; def $0", "=a"()
230+
%data1 = call i32 asm "; def $0", "=^VA"()
231+
%pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
232+
%result = extractvalue { i32, i1 } %pair, 0
233+
call void asm "; use $0", "^VA"(i32 %result)
234+
ret void
235+
}
236+
237+
attributes #0 = { nounwind "amdgpu-waves-per-eu"="10,10" }

0 commit comments

Comments
 (0)