@@ -110,3 +110,226 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
110110 ret <1 x bfloat> %ret
111111}
112112
113+ define <1 x i64 > @atomic_vec1_i64 (ptr %x ) {
114+ ; CHECK-LABEL: atomic_vec1_i64:
115+ ; CHECK: ## %bb.0:
116+ ; CHECK-NEXT: pushq %rax
117+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
118+ ; CHECK-NEXT: movq %rdi, %rsi
119+ ; CHECK-NEXT: movq %rsp, %rdx
120+ ; CHECK-NEXT: movl $8, %edi
121+ ; CHECK-NEXT: movl $2, %ecx
122+ ; CHECK-NEXT: callq ___atomic_load
123+ ; CHECK-NEXT: movq (%rsp), %rax
124+ ; CHECK-NEXT: popq %rcx
125+ ; CHECK-NEXT: retq
126+ ;
127+ ; CHECK0-LABEL: atomic_vec1_i64:
128+ ; CHECK0: ## %bb.0:
129+ ; CHECK0-NEXT: pushq %rax
130+ ; CHECK0-NEXT: .cfi_def_cfa_offset 16
131+ ; CHECK0-NEXT: movq %rdi, %rsi
132+ ; CHECK0-NEXT: movl $8, %edi
133+ ; CHECK0-NEXT: movq %rsp, %rdx
134+ ; CHECK0-NEXT: movl $2, %ecx
135+ ; CHECK0-NEXT: callq ___atomic_load
136+ ; CHECK0-NEXT: movq (%rsp), %rax
137+ ; CHECK0-NEXT: popq %rcx
138+ ; CHECK0-NEXT: retq
139+ %ret = load atomic <1 x i64 >, ptr %x acquire , align 4
140+ ret <1 x i64 > %ret
141+ }
142+
143+ define <1 x double > @atomic_vec1_double (ptr %x ) {
144+ ; CHECK-LABEL: atomic_vec1_double:
145+ ; CHECK: ## %bb.0:
146+ ; CHECK-NEXT: pushq %rax
147+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
148+ ; CHECK-NEXT: movq %rdi, %rsi
149+ ; CHECK-NEXT: movq %rsp, %rdx
150+ ; CHECK-NEXT: movl $8, %edi
151+ ; CHECK-NEXT: movl $2, %ecx
152+ ; CHECK-NEXT: callq ___atomic_load
153+ ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
154+ ; CHECK-NEXT: popq %rax
155+ ; CHECK-NEXT: retq
156+ ;
157+ ; CHECK0-LABEL: atomic_vec1_double:
158+ ; CHECK0: ## %bb.0:
159+ ; CHECK0-NEXT: pushq %rax
160+ ; CHECK0-NEXT: .cfi_def_cfa_offset 16
161+ ; CHECK0-NEXT: movq %rdi, %rsi
162+ ; CHECK0-NEXT: movl $8, %edi
163+ ; CHECK0-NEXT: movq %rsp, %rdx
164+ ; CHECK0-NEXT: movl $2, %ecx
165+ ; CHECK0-NEXT: callq ___atomic_load
166+ ; CHECK0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
167+ ; CHECK0-NEXT: popq %rax
168+ ; CHECK0-NEXT: retq
169+ %ret = load atomic <1 x double >, ptr %x acquire , align 4
170+ ret <1 x double > %ret
171+ }
172+
173+ define <2 x i32 > @atomic_vec2_i32 (ptr %x ) {
174+ ; CHECK-LABEL: atomic_vec2_i32:
175+ ; CHECK: ## %bb.0:
176+ ; CHECK-NEXT: pushq %rax
177+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
178+ ; CHECK-NEXT: movq %rdi, %rsi
179+ ; CHECK-NEXT: movq %rsp, %rdx
180+ ; CHECK-NEXT: movl $8, %edi
181+ ; CHECK-NEXT: movl $2, %ecx
182+ ; CHECK-NEXT: callq ___atomic_load
183+ ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
184+ ; CHECK-NEXT: popq %rax
185+ ; CHECK-NEXT: retq
186+ ;
187+ ; CHECK0-LABEL: atomic_vec2_i32:
188+ ; CHECK0: ## %bb.0:
189+ ; CHECK0-NEXT: pushq %rax
190+ ; CHECK0-NEXT: .cfi_def_cfa_offset 16
191+ ; CHECK0-NEXT: movq %rdi, %rsi
192+ ; CHECK0-NEXT: movl $8, %edi
193+ ; CHECK0-NEXT: movq %rsp, %rdx
194+ ; CHECK0-NEXT: movl $2, %ecx
195+ ; CHECK0-NEXT: callq ___atomic_load
196+ ; CHECK0-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
197+ ; CHECK0-NEXT: popq %rax
198+ ; CHECK0-NEXT: retq
199+ %ret = load atomic <2 x i32 >, ptr %x acquire , align 4
200+ ret <2 x i32 > %ret
201+ }
202+
203+ define <4 x float > @atomic_vec4_float (ptr %x ) {
204+ ; CHECK-LABEL: atomic_vec4_float:
205+ ; CHECK: ## %bb.0:
206+ ; CHECK-NEXT: subq $24, %rsp
207+ ; CHECK-NEXT: .cfi_def_cfa_offset 32
208+ ; CHECK-NEXT: movq %rdi, %rsi
209+ ; CHECK-NEXT: movq %rsp, %rdx
210+ ; CHECK-NEXT: movl $16, %edi
211+ ; CHECK-NEXT: movl $2, %ecx
212+ ; CHECK-NEXT: callq ___atomic_load
213+ ; CHECK-NEXT: movaps (%rsp), %xmm0
214+ ; CHECK-NEXT: addq $24, %rsp
215+ ; CHECK-NEXT: retq
216+ ;
217+ ; CHECK0-LABEL: atomic_vec4_float:
218+ ; CHECK0: ## %bb.0:
219+ ; CHECK0-NEXT: subq $24, %rsp
220+ ; CHECK0-NEXT: .cfi_def_cfa_offset 32
221+ ; CHECK0-NEXT: movq %rdi, %rsi
222+ ; CHECK0-NEXT: movl $16, %edi
223+ ; CHECK0-NEXT: movq %rsp, %rdx
224+ ; CHECK0-NEXT: movl $2, %ecx
225+ ; CHECK0-NEXT: callq ___atomic_load
226+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
227+ ; CHECK0-NEXT: addq $24, %rsp
228+ ; CHECK0-NEXT: retq
229+ %ret = load atomic <4 x float >, ptr %x acquire , align 4
230+ ret <4 x float > %ret
231+ }
232+
233+ define <8 x double > @atomic_vec8_double (ptr %x ) {
234+ ; CHECK-LABEL: atomic_vec8_double:
235+ ; CHECK: ## %bb.0:
236+ ; CHECK-NEXT: subq $72, %rsp
237+ ; CHECK-NEXT: .cfi_def_cfa_offset 80
238+ ; CHECK-NEXT: movq %rdi, %rsi
239+ ; CHECK-NEXT: movq %rsp, %rdx
240+ ; CHECK-NEXT: movl $64, %edi
241+ ; CHECK-NEXT: movl $2, %ecx
242+ ; CHECK-NEXT: callq ___atomic_load
243+ ; CHECK-NEXT: movaps (%rsp), %xmm0
244+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
245+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
246+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
247+ ; CHECK-NEXT: addq $72, %rsp
248+ ; CHECK-NEXT: retq
249+ ;
250+ ; CHECK0-LABEL: atomic_vec8_double:
251+ ; CHECK0: ## %bb.0:
252+ ; CHECK0-NEXT: subq $72, %rsp
253+ ; CHECK0-NEXT: .cfi_def_cfa_offset 80
254+ ; CHECK0-NEXT: movq %rdi, %rsi
255+ ; CHECK0-NEXT: movl $64, %edi
256+ ; CHECK0-NEXT: movq %rsp, %rdx
257+ ; CHECK0-NEXT: movl $2, %ecx
258+ ; CHECK0-NEXT: callq ___atomic_load
259+ ; CHECK0-NEXT: movapd (%rsp), %xmm0
260+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1
261+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm2
262+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm3
263+ ; CHECK0-NEXT: addq $72, %rsp
264+ ; CHECK0-NEXT: retq
265+ %ret = load atomic <8 x double >, ptr %x acquire , align 4
266+ ret <8 x double > %ret
267+ }
268+
269+ define <16 x bfloat> @atomic_vec16_bfloat (ptr %x ) {
270+ ; CHECK-LABEL: atomic_vec16_bfloat:
271+ ; CHECK: ## %bb.0:
272+ ; CHECK-NEXT: subq $40, %rsp
273+ ; CHECK-NEXT: .cfi_def_cfa_offset 48
274+ ; CHECK-NEXT: movq %rdi, %rsi
275+ ; CHECK-NEXT: movq %rsp, %rdx
276+ ; CHECK-NEXT: movl $32, %edi
277+ ; CHECK-NEXT: movl $2, %ecx
278+ ; CHECK-NEXT: callq ___atomic_load
279+ ; CHECK-NEXT: movaps (%rsp), %xmm0
280+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
281+ ; CHECK-NEXT: addq $40, %rsp
282+ ; CHECK-NEXT: retq
283+ ;
284+ ; CHECK0-LABEL: atomic_vec16_bfloat:
285+ ; CHECK0: ## %bb.0:
286+ ; CHECK0-NEXT: subq $40, %rsp
287+ ; CHECK0-NEXT: .cfi_def_cfa_offset 48
288+ ; CHECK0-NEXT: movq %rdi, %rsi
289+ ; CHECK0-NEXT: movl $32, %edi
290+ ; CHECK0-NEXT: movq %rsp, %rdx
291+ ; CHECK0-NEXT: movl $2, %ecx
292+ ; CHECK0-NEXT: callq ___atomic_load
293+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
294+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
295+ ; CHECK0-NEXT: addq $40, %rsp
296+ ; CHECK0-NEXT: retq
297+ %ret = load atomic <16 x bfloat>, ptr %x acquire , align 4
298+ ret <16 x bfloat> %ret
299+ }
300+
301+ define <32 x half > @atomic_vec32_half (ptr %x ) {
302+ ; CHECK-LABEL: atomic_vec32_half:
303+ ; CHECK: ## %bb.0:
304+ ; CHECK-NEXT: subq $72, %rsp
305+ ; CHECK-NEXT: .cfi_def_cfa_offset 80
306+ ; CHECK-NEXT: movq %rdi, %rsi
307+ ; CHECK-NEXT: movq %rsp, %rdx
308+ ; CHECK-NEXT: movl $64, %edi
309+ ; CHECK-NEXT: movl $2, %ecx
310+ ; CHECK-NEXT: callq ___atomic_load
311+ ; CHECK-NEXT: movaps (%rsp), %xmm0
312+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
313+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
314+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
315+ ; CHECK-NEXT: addq $72, %rsp
316+ ; CHECK-NEXT: retq
317+ ;
318+ ; CHECK0-LABEL: atomic_vec32_half:
319+ ; CHECK0: ## %bb.0:
320+ ; CHECK0-NEXT: subq $72, %rsp
321+ ; CHECK0-NEXT: .cfi_def_cfa_offset 80
322+ ; CHECK0-NEXT: movq %rdi, %rsi
323+ ; CHECK0-NEXT: movl $64, %edi
324+ ; CHECK0-NEXT: movq %rsp, %rdx
325+ ; CHECK0-NEXT: movl $2, %ecx
326+ ; CHECK0-NEXT: callq ___atomic_load
327+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
328+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
329+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
330+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
331+ ; CHECK0-NEXT: addq $72, %rsp
332+ ; CHECK0-NEXT: retq
333+ %ret = load atomic <32 x half >, ptr %x acquire , align 4
334+ ret <32 x half > %ret
335+ }
0 commit comments