@@ -110,3 +110,196 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
110110 ret <1 x bfloat> %ret
111111}
112112
113+ define <1 x double > @atomic_vec1_double (ptr %x ) {
114+ ; CHECK-LABEL: atomic_vec1_double:
115+ ; CHECK: ## %bb.0:
116+ ; CHECK-NEXT: pushq %rax
117+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
118+ ; CHECK-NEXT: movq %rdi, %rsi
119+ ; CHECK-NEXT: movq %rsp, %rdx
120+ ; CHECK-NEXT: movl $8, %edi
121+ ; CHECK-NEXT: movl $2, %ecx
122+ ; CHECK-NEXT: callq ___atomic_load
123+ ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
124+ ; CHECK-NEXT: popq %rax
125+ ; CHECK-NEXT: retq
126+ ;
127+ ; CHECK0-LABEL: atomic_vec1_double:
128+ ; CHECK0: ## %bb.0:
129+ ; CHECK0-NEXT: pushq %rax
130+ ; CHECK0-NEXT: .cfi_def_cfa_offset 16
131+ ; CHECK0-NEXT: movq %rdi, %rsi
132+ ; CHECK0-NEXT: movl $8, %edi
133+ ; CHECK0-NEXT: movq %rsp, %rdx
134+ ; CHECK0-NEXT: movl $2, %ecx
135+ ; CHECK0-NEXT: callq ___atomic_load
136+ ; CHECK0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
137+ ; CHECK0-NEXT: popq %rax
138+ ; CHECK0-NEXT: retq
139+ %ret = load atomic <1 x double >, ptr %x acquire , align 4
140+ ret <1 x double > %ret
141+ }
142+
143+ define <2 x i32 > @atomic_vec2_i32 (ptr %x ) {
144+ ; CHECK-LABEL: atomic_vec2_i32:
145+ ; CHECK: ## %bb.0:
146+ ; CHECK-NEXT: pushq %rax
147+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
148+ ; CHECK-NEXT: movq %rdi, %rsi
149+ ; CHECK-NEXT: movq %rsp, %rdx
150+ ; CHECK-NEXT: movl $8, %edi
151+ ; CHECK-NEXT: movl $2, %ecx
152+ ; CHECK-NEXT: callq ___atomic_load
153+ ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
154+ ; CHECK-NEXT: popq %rax
155+ ; CHECK-NEXT: retq
156+ ;
157+ ; CHECK0-LABEL: atomic_vec2_i32:
158+ ; CHECK0: ## %bb.0:
159+ ; CHECK0-NEXT: pushq %rax
160+ ; CHECK0-NEXT: .cfi_def_cfa_offset 16
161+ ; CHECK0-NEXT: movq %rdi, %rsi
162+ ; CHECK0-NEXT: movl $8, %edi
163+ ; CHECK0-NEXT: movq %rsp, %rdx
164+ ; CHECK0-NEXT: movl $2, %ecx
165+ ; CHECK0-NEXT: callq ___atomic_load
166+ ; CHECK0-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
167+ ; CHECK0-NEXT: popq %rax
168+ ; CHECK0-NEXT: retq
169+ %ret = load atomic <2 x i32 >, ptr %x acquire , align 4
170+ ret <2 x i32 > %ret
171+ }
172+
173+ define <4 x float > @atomic_vec4_float (ptr %x ) {
174+ ; CHECK-LABEL: atomic_vec4_float:
175+ ; CHECK: ## %bb.0:
176+ ; CHECK-NEXT: subq $24, %rsp
177+ ; CHECK-NEXT: .cfi_def_cfa_offset 32
178+ ; CHECK-NEXT: movq %rdi, %rsi
179+ ; CHECK-NEXT: movq %rsp, %rdx
180+ ; CHECK-NEXT: movl $16, %edi
181+ ; CHECK-NEXT: movl $2, %ecx
182+ ; CHECK-NEXT: callq ___atomic_load
183+ ; CHECK-NEXT: movaps (%rsp), %xmm0
184+ ; CHECK-NEXT: addq $24, %rsp
185+ ; CHECK-NEXT: retq
186+ ;
187+ ; CHECK0-LABEL: atomic_vec4_float:
188+ ; CHECK0: ## %bb.0:
189+ ; CHECK0-NEXT: subq $24, %rsp
190+ ; CHECK0-NEXT: .cfi_def_cfa_offset 32
191+ ; CHECK0-NEXT: movq %rdi, %rsi
192+ ; CHECK0-NEXT: movl $16, %edi
193+ ; CHECK0-NEXT: movq %rsp, %rdx
194+ ; CHECK0-NEXT: movl $2, %ecx
195+ ; CHECK0-NEXT: callq ___atomic_load
196+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
197+ ; CHECK0-NEXT: addq $24, %rsp
198+ ; CHECK0-NEXT: retq
199+ %ret = load atomic <4 x float >, ptr %x acquire , align 4
200+ ret <4 x float > %ret
201+ }
202+
203+ define <8 x double > @atomic_vec8_double (ptr %x ) {
204+ ; CHECK-LABEL: atomic_vec8_double:
205+ ; CHECK: ## %bb.0:
206+ ; CHECK-NEXT: subq $72, %rsp
207+ ; CHECK-NEXT: .cfi_def_cfa_offset 80
208+ ; CHECK-NEXT: movq %rdi, %rsi
209+ ; CHECK-NEXT: movq %rsp, %rdx
210+ ; CHECK-NEXT: movl $64, %edi
211+ ; CHECK-NEXT: movl $2, %ecx
212+ ; CHECK-NEXT: callq ___atomic_load
213+ ; CHECK-NEXT: movaps (%rsp), %xmm0
214+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
215+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
216+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
217+ ; CHECK-NEXT: addq $72, %rsp
218+ ; CHECK-NEXT: retq
219+ ;
220+ ; CHECK0-LABEL: atomic_vec8_double:
221+ ; CHECK0: ## %bb.0:
222+ ; CHECK0-NEXT: subq $72, %rsp
223+ ; CHECK0-NEXT: .cfi_def_cfa_offset 80
224+ ; CHECK0-NEXT: movq %rdi, %rsi
225+ ; CHECK0-NEXT: movl $64, %edi
226+ ; CHECK0-NEXT: movq %rsp, %rdx
227+ ; CHECK0-NEXT: movl $2, %ecx
228+ ; CHECK0-NEXT: callq ___atomic_load
229+ ; CHECK0-NEXT: movapd (%rsp), %xmm0
230+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1
231+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm2
232+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm3
233+ ; CHECK0-NEXT: addq $72, %rsp
234+ ; CHECK0-NEXT: retq
235+ %ret = load atomic <8 x double >, ptr %x acquire , align 4
236+ ret <8 x double > %ret
237+ }
238+
239+ define <16 x bfloat> @atomic_vec16_bfloat (ptr %x ) {
240+ ; CHECK-LABEL: atomic_vec16_bfloat:
241+ ; CHECK: ## %bb.0:
242+ ; CHECK-NEXT: subq $40, %rsp
243+ ; CHECK-NEXT: .cfi_def_cfa_offset 48
244+ ; CHECK-NEXT: movq %rdi, %rsi
245+ ; CHECK-NEXT: movq %rsp, %rdx
246+ ; CHECK-NEXT: movl $32, %edi
247+ ; CHECK-NEXT: movl $2, %ecx
248+ ; CHECK-NEXT: callq ___atomic_load
249+ ; CHECK-NEXT: movaps (%rsp), %xmm0
250+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
251+ ; CHECK-NEXT: addq $40, %rsp
252+ ; CHECK-NEXT: retq
253+ ;
254+ ; CHECK0-LABEL: atomic_vec16_bfloat:
255+ ; CHECK0: ## %bb.0:
256+ ; CHECK0-NEXT: subq $40, %rsp
257+ ; CHECK0-NEXT: .cfi_def_cfa_offset 48
258+ ; CHECK0-NEXT: movq %rdi, %rsi
259+ ; CHECK0-NEXT: movl $32, %edi
260+ ; CHECK0-NEXT: movq %rsp, %rdx
261+ ; CHECK0-NEXT: movl $2, %ecx
262+ ; CHECK0-NEXT: callq ___atomic_load
263+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
264+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
265+ ; CHECK0-NEXT: addq $40, %rsp
266+ ; CHECK0-NEXT: retq
267+ %ret = load atomic <16 x bfloat>, ptr %x acquire , align 4
268+ ret <16 x bfloat> %ret
269+ }
270+
271+ define <32 x half > @atomic_vec32_half (ptr %x ) {
272+ ; CHECK-LABEL: atomic_vec32_half:
273+ ; CHECK: ## %bb.0:
274+ ; CHECK-NEXT: subq $72, %rsp
275+ ; CHECK-NEXT: .cfi_def_cfa_offset 80
276+ ; CHECK-NEXT: movq %rdi, %rsi
277+ ; CHECK-NEXT: movq %rsp, %rdx
278+ ; CHECK-NEXT: movl $64, %edi
279+ ; CHECK-NEXT: movl $2, %ecx
280+ ; CHECK-NEXT: callq ___atomic_load
281+ ; CHECK-NEXT: movaps (%rsp), %xmm0
282+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
283+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
284+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
285+ ; CHECK-NEXT: addq $72, %rsp
286+ ; CHECK-NEXT: retq
287+ ;
288+ ; CHECK0-LABEL: atomic_vec32_half:
289+ ; CHECK0: ## %bb.0:
290+ ; CHECK0-NEXT: subq $72, %rsp
291+ ; CHECK0-NEXT: .cfi_def_cfa_offset 80
292+ ; CHECK0-NEXT: movq %rdi, %rsi
293+ ; CHECK0-NEXT: movl $64, %edi
294+ ; CHECK0-NEXT: movq %rsp, %rdx
295+ ; CHECK0-NEXT: movl $2, %ecx
296+ ; CHECK0-NEXT: callq ___atomic_load
297+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
298+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
299+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
300+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
301+ ; CHECK0-NEXT: addq $72, %rsp
302+ ; CHECK0-NEXT: retq
303+ %ret = load atomic <32 x half >, ptr %x acquire , align 4
304+ ret <32 x half > %ret
305+ }
0 commit comments