@@ -127,6 +127,34 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
127127 ret <1 x bfloat> %ret
128128}
129129
130+ define <1 x ptr > @atomic_vec1_ptr (ptr %x ) nounwind {
131+ ; CHECK3-LABEL: atomic_vec1_ptr:
132+ ; CHECK3: ## %bb.0:
133+ ; CHECK3-NEXT: pushq %rax
134+ ; CHECK3-NEXT: movq %rdi, %rsi
135+ ; CHECK3-NEXT: movq %rsp, %rdx
136+ ; CHECK3-NEXT: movl $8, %edi
137+ ; CHECK3-NEXT: movl $2, %ecx
138+ ; CHECK3-NEXT: callq ___atomic_load
139+ ; CHECK3-NEXT: movq (%rsp), %rax
140+ ; CHECK3-NEXT: popq %rcx
141+ ; CHECK3-NEXT: retq
142+ ;
143+ ; CHECK0-LABEL: atomic_vec1_ptr:
144+ ; CHECK0: ## %bb.0:
145+ ; CHECK0-NEXT: pushq %rax
146+ ; CHECK0-NEXT: movq %rdi, %rsi
147+ ; CHECK0-NEXT: movl $8, %edi
148+ ; CHECK0-NEXT: movq %rsp, %rdx
149+ ; CHECK0-NEXT: movl $2, %ecx
150+ ; CHECK0-NEXT: callq ___atomic_load
151+ ; CHECK0-NEXT: movq (%rsp), %rax
152+ ; CHECK0-NEXT: popq %rcx
153+ ; CHECK0-NEXT: retq
154+ %ret = load atomic <1 x ptr >, ptr %x acquire , align 4
155+ ret <1 x ptr > %ret
156+ }
157+
130158define <1 x half > @atomic_vec1_half (ptr %x ) {
131159; CHECK3-LABEL: atomic_vec1_half:
132160; CHECK3: ## %bb.0:
@@ -153,3 +181,214 @@ define <1 x float> @atomic_vec1_float(ptr %x) {
153181 %ret = load atomic <1 x float >, ptr %x acquire , align 4
154182 ret <1 x float > %ret
155183}
184+
185+ define <1 x i64 > @atomic_vec1_i64 (ptr %x ) nounwind {
186+ ; CHECK3-LABEL: atomic_vec1_i64:
187+ ; CHECK3: ## %bb.0:
188+ ; CHECK3-NEXT: pushq %rax
189+ ; CHECK3-NEXT: movq %rdi, %rsi
190+ ; CHECK3-NEXT: movq %rsp, %rdx
191+ ; CHECK3-NEXT: movl $8, %edi
192+ ; CHECK3-NEXT: movl $2, %ecx
193+ ; CHECK3-NEXT: callq ___atomic_load
194+ ; CHECK3-NEXT: movq (%rsp), %rax
195+ ; CHECK3-NEXT: popq %rcx
196+ ; CHECK3-NEXT: retq
197+ ;
198+ ; CHECK0-LABEL: atomic_vec1_i64:
199+ ; CHECK0: ## %bb.0:
200+ ; CHECK0-NEXT: pushq %rax
201+ ; CHECK0-NEXT: movq %rdi, %rsi
202+ ; CHECK0-NEXT: movl $8, %edi
203+ ; CHECK0-NEXT: movq %rsp, %rdx
204+ ; CHECK0-NEXT: movl $2, %ecx
205+ ; CHECK0-NEXT: callq ___atomic_load
206+ ; CHECK0-NEXT: movq (%rsp), %rax
207+ ; CHECK0-NEXT: popq %rcx
208+ ; CHECK0-NEXT: retq
209+ %ret = load atomic <1 x i64 >, ptr %x acquire , align 4
210+ ret <1 x i64 > %ret
211+ }
212+
213+ define <1 x double > @atomic_vec1_double (ptr %x ) nounwind {
214+ ; CHECK3-LABEL: atomic_vec1_double:
215+ ; CHECK3: ## %bb.0:
216+ ; CHECK3-NEXT: pushq %rax
217+ ; CHECK3-NEXT: movq %rdi, %rsi
218+ ; CHECK3-NEXT: movq %rsp, %rdx
219+ ; CHECK3-NEXT: movl $8, %edi
220+ ; CHECK3-NEXT: movl $2, %ecx
221+ ; CHECK3-NEXT: callq ___atomic_load
222+ ; CHECK3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
223+ ; CHECK3-NEXT: popq %rax
224+ ; CHECK3-NEXT: retq
225+ ;
226+ ; CHECK0-LABEL: atomic_vec1_double:
227+ ; CHECK0: ## %bb.0:
228+ ; CHECK0-NEXT: pushq %rax
229+ ; CHECK0-NEXT: movq %rdi, %rsi
230+ ; CHECK0-NEXT: movl $8, %edi
231+ ; CHECK0-NEXT: movq %rsp, %rdx
232+ ; CHECK0-NEXT: movl $2, %ecx
233+ ; CHECK0-NEXT: callq ___atomic_load
234+ ; CHECK0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
235+ ; CHECK0-NEXT: popq %rax
236+ ; CHECK0-NEXT: retq
237+ %ret = load atomic <1 x double >, ptr %x acquire , align 4
238+ ret <1 x double > %ret
239+ }
240+
241+ define <2 x i32 > @atomic_vec2_i32 (ptr %x ) nounwind {
242+ ; CHECK3-LABEL: atomic_vec2_i32:
243+ ; CHECK3: ## %bb.0:
244+ ; CHECK3-NEXT: pushq %rax
245+ ; CHECK3-NEXT: movq %rdi, %rsi
246+ ; CHECK3-NEXT: movq %rsp, %rdx
247+ ; CHECK3-NEXT: movl $8, %edi
248+ ; CHECK3-NEXT: movl $2, %ecx
249+ ; CHECK3-NEXT: callq ___atomic_load
250+ ; CHECK3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
251+ ; CHECK3-NEXT: popq %rax
252+ ; CHECK3-NEXT: retq
253+ ;
254+ ; CHECK0-LABEL: atomic_vec2_i32:
255+ ; CHECK0: ## %bb.0:
256+ ; CHECK0-NEXT: pushq %rax
257+ ; CHECK0-NEXT: movq %rdi, %rsi
258+ ; CHECK0-NEXT: movl $8, %edi
259+ ; CHECK0-NEXT: movq %rsp, %rdx
260+ ; CHECK0-NEXT: movl $2, %ecx
261+ ; CHECK0-NEXT: callq ___atomic_load
262+ ; CHECK0-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
263+ ; CHECK0-NEXT: popq %rax
264+ ; CHECK0-NEXT: retq
265+ %ret = load atomic <2 x i32 >, ptr %x acquire , align 4
266+ ret <2 x i32 > %ret
267+ }
268+
269+ define <4 x float > @atomic_vec4_float (ptr %x ) nounwind {
270+ ; CHECK3-LABEL: atomic_vec4_float:
271+ ; CHECK3: ## %bb.0:
272+ ; CHECK3-NEXT: subq $24, %rsp
273+ ; CHECK3-NEXT: movq %rdi, %rsi
274+ ; CHECK3-NEXT: movq %rsp, %rdx
275+ ; CHECK3-NEXT: movl $16, %edi
276+ ; CHECK3-NEXT: movl $2, %ecx
277+ ; CHECK3-NEXT: callq ___atomic_load
278+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
279+ ; CHECK3-NEXT: addq $24, %rsp
280+ ; CHECK3-NEXT: retq
281+ ;
282+ ; CHECK0-LABEL: atomic_vec4_float:
283+ ; CHECK0: ## %bb.0:
284+ ; CHECK0-NEXT: subq $24, %rsp
285+ ; CHECK0-NEXT: movq %rdi, %rsi
286+ ; CHECK0-NEXT: movl $16, %edi
287+ ; CHECK0-NEXT: movq %rsp, %rdx
288+ ; CHECK0-NEXT: movl $2, %ecx
289+ ; CHECK0-NEXT: callq ___atomic_load
290+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
291+ ; CHECK0-NEXT: addq $24, %rsp
292+ ; CHECK0-NEXT: retq
293+ %ret = load atomic <4 x float >, ptr %x acquire , align 4
294+ ret <4 x float > %ret
295+ }
296+
297+ define <8 x double > @atomic_vec8_double (ptr %x ) nounwind {
298+ ; CHECK3-LABEL: atomic_vec8_double:
299+ ; CHECK3: ## %bb.0:
300+ ; CHECK3-NEXT: subq $72, %rsp
301+ ; CHECK3-NEXT: movq %rdi, %rsi
302+ ; CHECK3-NEXT: movq %rsp, %rdx
303+ ; CHECK3-NEXT: movl $64, %edi
304+ ; CHECK3-NEXT: movl $2, %ecx
305+ ; CHECK3-NEXT: callq ___atomic_load
306+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
307+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
308+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
309+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
310+ ; CHECK3-NEXT: addq $72, %rsp
311+ ; CHECK3-NEXT: retq
312+ ;
313+ ; CHECK0-LABEL: atomic_vec8_double:
314+ ; CHECK0: ## %bb.0:
315+ ; CHECK0-NEXT: subq $72, %rsp
316+ ; CHECK0-NEXT: movq %rdi, %rsi
317+ ; CHECK0-NEXT: movl $64, %edi
318+ ; CHECK0-NEXT: movq %rsp, %rdx
319+ ; CHECK0-NEXT: movl $2, %ecx
320+ ; CHECK0-NEXT: callq ___atomic_load
321+ ; CHECK0-NEXT: movapd (%rsp), %xmm0
322+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1
323+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm2
324+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm3
325+ ; CHECK0-NEXT: addq $72, %rsp
326+ ; CHECK0-NEXT: retq
327+ %ret = load atomic <8 x double >, ptr %x acquire , align 4
328+ ret <8 x double > %ret
329+ }
330+
331+ define <16 x bfloat> @atomic_vec16_bfloat (ptr %x ) nounwind {
332+ ; CHECK3-LABEL: atomic_vec16_bfloat:
333+ ; CHECK3: ## %bb.0:
334+ ; CHECK3-NEXT: subq $40, %rsp
335+ ; CHECK3-NEXT: movq %rdi, %rsi
336+ ; CHECK3-NEXT: movq %rsp, %rdx
337+ ; CHECK3-NEXT: movl $32, %edi
338+ ; CHECK3-NEXT: movl $2, %ecx
339+ ; CHECK3-NEXT: callq ___atomic_load
340+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
341+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
342+ ; CHECK3-NEXT: addq $40, %rsp
343+ ; CHECK3-NEXT: retq
344+ ;
345+ ; CHECK0-LABEL: atomic_vec16_bfloat:
346+ ; CHECK0: ## %bb.0:
347+ ; CHECK0-NEXT: subq $40, %rsp
348+ ; CHECK0-NEXT: movq %rdi, %rsi
349+ ; CHECK0-NEXT: movl $32, %edi
350+ ; CHECK0-NEXT: movq %rsp, %rdx
351+ ; CHECK0-NEXT: movl $2, %ecx
352+ ; CHECK0-NEXT: callq ___atomic_load
353+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
354+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
355+ ; CHECK0-NEXT: addq $40, %rsp
356+ ; CHECK0-NEXT: retq
357+ %ret = load atomic <16 x bfloat>, ptr %x acquire , align 4
358+ ret <16 x bfloat> %ret
359+ }
360+
361+ define <32 x half > @atomic_vec32_half (ptr %x ) nounwind {
362+ ; CHECK3-LABEL: atomic_vec32_half:
363+ ; CHECK3: ## %bb.0:
364+ ; CHECK3-NEXT: subq $72, %rsp
365+ ; CHECK3-NEXT: movq %rdi, %rsi
366+ ; CHECK3-NEXT: movq %rsp, %rdx
367+ ; CHECK3-NEXT: movl $64, %edi
368+ ; CHECK3-NEXT: movl $2, %ecx
369+ ; CHECK3-NEXT: callq ___atomic_load
370+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
371+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
372+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
373+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
374+ ; CHECK3-NEXT: addq $72, %rsp
375+ ; CHECK3-NEXT: retq
376+ ;
377+ ; CHECK0-LABEL: atomic_vec32_half:
378+ ; CHECK0: ## %bb.0:
379+ ; CHECK0-NEXT: subq $72, %rsp
380+ ; CHECK0-NEXT: movq %rdi, %rsi
381+ ; CHECK0-NEXT: movl $64, %edi
382+ ; CHECK0-NEXT: movq %rsp, %rdx
383+ ; CHECK0-NEXT: movl $2, %ecx
384+ ; CHECK0-NEXT: callq ___atomic_load
385+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
386+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
387+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
388+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
389+ ; CHECK0-NEXT: addq $72, %rsp
390+ ; CHECK0-NEXT: retq
391+ %ret = load atomic <32 x half >, ptr %x acquire , align 4
392+ ret <32 x half > %ret
393+ }
394+
0 commit comments