@@ -137,6 +137,34 @@ define <1 x ptr> @atomic_vec1_ptr_align8(ptr %x) nounwind {
137137 ret <1 x ptr > %ret
138138}
139139
140+ define <1 x ptr > @atomic_vec1_ptr (ptr %x ) nounwind {
141+ ; CHECK3-LABEL: atomic_vec1_ptr:
142+ ; CHECK3: ## %bb.0:
143+ ; CHECK3-NEXT: pushq %rax
144+ ; CHECK3-NEXT: movq %rdi, %rsi
145+ ; CHECK3-NEXT: movq %rsp, %rdx
146+ ; CHECK3-NEXT: movl $8, %edi
147+ ; CHECK3-NEXT: movl $2, %ecx
148+ ; CHECK3-NEXT: callq ___atomic_load
149+ ; CHECK3-NEXT: movq (%rsp), %rax
150+ ; CHECK3-NEXT: popq %rcx
151+ ; CHECK3-NEXT: retq
152+ ;
153+ ; CHECK0-LABEL: atomic_vec1_ptr:
154+ ; CHECK0: ## %bb.0:
155+ ; CHECK0-NEXT: pushq %rax
156+ ; CHECK0-NEXT: movq %rdi, %rsi
157+ ; CHECK0-NEXT: movl $8, %edi
158+ ; CHECK0-NEXT: movq %rsp, %rdx
159+ ; CHECK0-NEXT: movl $2, %ecx
160+ ; CHECK0-NEXT: callq ___atomic_load
161+ ; CHECK0-NEXT: movq (%rsp), %rax
162+ ; CHECK0-NEXT: popq %rcx
163+ ; CHECK0-NEXT: retq
164+ %ret = load atomic <1 x ptr >, ptr %x acquire , align 4
165+ ret <1 x ptr > %ret
166+ }
167+
140168define <1 x half > @atomic_vec1_half (ptr %x ) {
141169; CHECK3-LABEL: atomic_vec1_half:
142170; CHECK3: ## %bb.0:
@@ -164,3 +192,214 @@ define <1 x float> @atomic_vec1_float(ptr %x) {
164192 %ret = load atomic <1 x float >, ptr %x acquire , align 4
165193 ret <1 x float > %ret
166194}
195+
196+ define <1 x i64 > @atomic_vec1_i64 (ptr %x ) nounwind {
197+ ; CHECK3-LABEL: atomic_vec1_i64:
198+ ; CHECK3: ## %bb.0:
199+ ; CHECK3-NEXT: pushq %rax
200+ ; CHECK3-NEXT: movq %rdi, %rsi
201+ ; CHECK3-NEXT: movq %rsp, %rdx
202+ ; CHECK3-NEXT: movl $8, %edi
203+ ; CHECK3-NEXT: movl $2, %ecx
204+ ; CHECK3-NEXT: callq ___atomic_load
205+ ; CHECK3-NEXT: movq (%rsp), %rax
206+ ; CHECK3-NEXT: popq %rcx
207+ ; CHECK3-NEXT: retq
208+ ;
209+ ; CHECK0-LABEL: atomic_vec1_i64:
210+ ; CHECK0: ## %bb.0:
211+ ; CHECK0-NEXT: pushq %rax
212+ ; CHECK0-NEXT: movq %rdi, %rsi
213+ ; CHECK0-NEXT: movl $8, %edi
214+ ; CHECK0-NEXT: movq %rsp, %rdx
215+ ; CHECK0-NEXT: movl $2, %ecx
216+ ; CHECK0-NEXT: callq ___atomic_load
217+ ; CHECK0-NEXT: movq (%rsp), %rax
218+ ; CHECK0-NEXT: popq %rcx
219+ ; CHECK0-NEXT: retq
220+ %ret = load atomic <1 x i64 >, ptr %x acquire , align 4
221+ ret <1 x i64 > %ret
222+ }
223+
224+ define <1 x double > @atomic_vec1_double (ptr %x ) nounwind {
225+ ; CHECK3-LABEL: atomic_vec1_double:
226+ ; CHECK3: ## %bb.0:
227+ ; CHECK3-NEXT: pushq %rax
228+ ; CHECK3-NEXT: movq %rdi, %rsi
229+ ; CHECK3-NEXT: movq %rsp, %rdx
230+ ; CHECK3-NEXT: movl $8, %edi
231+ ; CHECK3-NEXT: movl $2, %ecx
232+ ; CHECK3-NEXT: callq ___atomic_load
233+ ; CHECK3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
234+ ; CHECK3-NEXT: popq %rax
235+ ; CHECK3-NEXT: retq
236+ ;
237+ ; CHECK0-LABEL: atomic_vec1_double:
238+ ; CHECK0: ## %bb.0:
239+ ; CHECK0-NEXT: pushq %rax
240+ ; CHECK0-NEXT: movq %rdi, %rsi
241+ ; CHECK0-NEXT: movl $8, %edi
242+ ; CHECK0-NEXT: movq %rsp, %rdx
243+ ; CHECK0-NEXT: movl $2, %ecx
244+ ; CHECK0-NEXT: callq ___atomic_load
245+ ; CHECK0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
246+ ; CHECK0-NEXT: popq %rax
247+ ; CHECK0-NEXT: retq
248+ %ret = load atomic <1 x double >, ptr %x acquire , align 4
249+ ret <1 x double > %ret
250+ }
251+
252+ define <2 x i32 > @atomic_vec2_i32 (ptr %x ) nounwind {
253+ ; CHECK3-LABEL: atomic_vec2_i32:
254+ ; CHECK3: ## %bb.0:
255+ ; CHECK3-NEXT: pushq %rax
256+ ; CHECK3-NEXT: movq %rdi, %rsi
257+ ; CHECK3-NEXT: movq %rsp, %rdx
258+ ; CHECK3-NEXT: movl $8, %edi
259+ ; CHECK3-NEXT: movl $2, %ecx
260+ ; CHECK3-NEXT: callq ___atomic_load
261+ ; CHECK3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
262+ ; CHECK3-NEXT: popq %rax
263+ ; CHECK3-NEXT: retq
264+ ;
265+ ; CHECK0-LABEL: atomic_vec2_i32:
266+ ; CHECK0: ## %bb.0:
267+ ; CHECK0-NEXT: pushq %rax
268+ ; CHECK0-NEXT: movq %rdi, %rsi
269+ ; CHECK0-NEXT: movl $8, %edi
270+ ; CHECK0-NEXT: movq %rsp, %rdx
271+ ; CHECK0-NEXT: movl $2, %ecx
272+ ; CHECK0-NEXT: callq ___atomic_load
273+ ; CHECK0-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
274+ ; CHECK0-NEXT: popq %rax
275+ ; CHECK0-NEXT: retq
276+ %ret = load atomic <2 x i32 >, ptr %x acquire , align 4
277+ ret <2 x i32 > %ret
278+ }
279+
280+ define <4 x float > @atomic_vec4_float (ptr %x ) nounwind {
281+ ; CHECK3-LABEL: atomic_vec4_float:
282+ ; CHECK3: ## %bb.0:
283+ ; CHECK3-NEXT: subq $24, %rsp
284+ ; CHECK3-NEXT: movq %rdi, %rsi
285+ ; CHECK3-NEXT: movq %rsp, %rdx
286+ ; CHECK3-NEXT: movl $16, %edi
287+ ; CHECK3-NEXT: movl $2, %ecx
288+ ; CHECK3-NEXT: callq ___atomic_load
289+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
290+ ; CHECK3-NEXT: addq $24, %rsp
291+ ; CHECK3-NEXT: retq
292+ ;
293+ ; CHECK0-LABEL: atomic_vec4_float:
294+ ; CHECK0: ## %bb.0:
295+ ; CHECK0-NEXT: subq $24, %rsp
296+ ; CHECK0-NEXT: movq %rdi, %rsi
297+ ; CHECK0-NEXT: movl $16, %edi
298+ ; CHECK0-NEXT: movq %rsp, %rdx
299+ ; CHECK0-NEXT: movl $2, %ecx
300+ ; CHECK0-NEXT: callq ___atomic_load
301+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
302+ ; CHECK0-NEXT: addq $24, %rsp
303+ ; CHECK0-NEXT: retq
304+ %ret = load atomic <4 x float >, ptr %x acquire , align 4
305+ ret <4 x float > %ret
306+ }
307+
308+ define <8 x double > @atomic_vec8_double (ptr %x ) nounwind {
309+ ; CHECK3-LABEL: atomic_vec8_double:
310+ ; CHECK3: ## %bb.0:
311+ ; CHECK3-NEXT: subq $72, %rsp
312+ ; CHECK3-NEXT: movq %rdi, %rsi
313+ ; CHECK3-NEXT: movq %rsp, %rdx
314+ ; CHECK3-NEXT: movl $64, %edi
315+ ; CHECK3-NEXT: movl $2, %ecx
316+ ; CHECK3-NEXT: callq ___atomic_load
317+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
318+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
319+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
320+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
321+ ; CHECK3-NEXT: addq $72, %rsp
322+ ; CHECK3-NEXT: retq
323+ ;
324+ ; CHECK0-LABEL: atomic_vec8_double:
325+ ; CHECK0: ## %bb.0:
326+ ; CHECK0-NEXT: subq $72, %rsp
327+ ; CHECK0-NEXT: movq %rdi, %rsi
328+ ; CHECK0-NEXT: movl $64, %edi
329+ ; CHECK0-NEXT: movq %rsp, %rdx
330+ ; CHECK0-NEXT: movl $2, %ecx
331+ ; CHECK0-NEXT: callq ___atomic_load
332+ ; CHECK0-NEXT: movapd (%rsp), %xmm0
333+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1
334+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm2
335+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm3
336+ ; CHECK0-NEXT: addq $72, %rsp
337+ ; CHECK0-NEXT: retq
338+ %ret = load atomic <8 x double >, ptr %x acquire , align 4
339+ ret <8 x double > %ret
340+ }
341+
342+ define <16 x bfloat> @atomic_vec16_bfloat (ptr %x ) nounwind {
343+ ; CHECK3-LABEL: atomic_vec16_bfloat:
344+ ; CHECK3: ## %bb.0:
345+ ; CHECK3-NEXT: subq $40, %rsp
346+ ; CHECK3-NEXT: movq %rdi, %rsi
347+ ; CHECK3-NEXT: movq %rsp, %rdx
348+ ; CHECK3-NEXT: movl $32, %edi
349+ ; CHECK3-NEXT: movl $2, %ecx
350+ ; CHECK3-NEXT: callq ___atomic_load
351+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
352+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
353+ ; CHECK3-NEXT: addq $40, %rsp
354+ ; CHECK3-NEXT: retq
355+ ;
356+ ; CHECK0-LABEL: atomic_vec16_bfloat:
357+ ; CHECK0: ## %bb.0:
358+ ; CHECK0-NEXT: subq $40, %rsp
359+ ; CHECK0-NEXT: movq %rdi, %rsi
360+ ; CHECK0-NEXT: movl $32, %edi
361+ ; CHECK0-NEXT: movq %rsp, %rdx
362+ ; CHECK0-NEXT: movl $2, %ecx
363+ ; CHECK0-NEXT: callq ___atomic_load
364+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
365+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
366+ ; CHECK0-NEXT: addq $40, %rsp
367+ ; CHECK0-NEXT: retq
368+ %ret = load atomic <16 x bfloat>, ptr %x acquire , align 4
369+ ret <16 x bfloat> %ret
370+ }
371+
372+ define <32 x half > @atomic_vec32_half (ptr %x ) nounwind {
373+ ; CHECK3-LABEL: atomic_vec32_half:
374+ ; CHECK3: ## %bb.0:
375+ ; CHECK3-NEXT: subq $72, %rsp
376+ ; CHECK3-NEXT: movq %rdi, %rsi
377+ ; CHECK3-NEXT: movq %rsp, %rdx
378+ ; CHECK3-NEXT: movl $64, %edi
379+ ; CHECK3-NEXT: movl $2, %ecx
380+ ; CHECK3-NEXT: callq ___atomic_load
381+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
382+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
383+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
384+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
385+ ; CHECK3-NEXT: addq $72, %rsp
386+ ; CHECK3-NEXT: retq
387+ ;
388+ ; CHECK0-LABEL: atomic_vec32_half:
389+ ; CHECK0: ## %bb.0:
390+ ; CHECK0-NEXT: subq $72, %rsp
391+ ; CHECK0-NEXT: movq %rdi, %rsi
392+ ; CHECK0-NEXT: movl $64, %edi
393+ ; CHECK0-NEXT: movq %rsp, %rdx
394+ ; CHECK0-NEXT: movl $2, %ecx
395+ ; CHECK0-NEXT: callq ___atomic_load
396+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
397+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
398+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
399+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
400+ ; CHECK0-NEXT: addq $72, %rsp
401+ ; CHECK0-NEXT: retq
402+ %ret = load atomic <32 x half >, ptr %x acquire , align 4
403+ ret <32 x half > %ret
404+ }
405+
0 commit comments