@@ -205,6 +205,68 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
205205 ret <2 x float > %ret
206206}
207207
208+ define <2 x half > @atomic_vec2_half (ptr %x ) {
209+ ; CHECK3-LABEL: atomic_vec2_half:
210+ ; CHECK3: ## %bb.0:
211+ ; CHECK3-NEXT: movl (%rdi), %eax
212+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
213+ ; CHECK3-NEXT: shrl $16, %eax
214+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
215+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
216+ ; CHECK3-NEXT: retq
217+ ;
218+ ; CHECK0-LABEL: atomic_vec2_half:
219+ ; CHECK0: ## %bb.0:
220+ ; CHECK0-NEXT: movl (%rdi), %eax
221+ ; CHECK0-NEXT: movl %eax, %ecx
222+ ; CHECK0-NEXT: shrl $16, %ecx
223+ ; CHECK0-NEXT: movw %cx, %dx
224+ ; CHECK0-NEXT: ## implicit-def: $ecx
225+ ; CHECK0-NEXT: movw %dx, %cx
226+ ; CHECK0-NEXT: ## implicit-def: $xmm1
227+ ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm1
228+ ; CHECK0-NEXT: movw %ax, %cx
229+ ; CHECK0-NEXT: ## implicit-def: $eax
230+ ; CHECK0-NEXT: movw %cx, %ax
231+ ; CHECK0-NEXT: ## implicit-def: $xmm0
232+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
233+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
234+ ; CHECK0-NEXT: retq
235+ %ret = load atomic <2 x half >, ptr %x acquire , align 4
236+ ret <2 x half > %ret
237+ }
238+
239+ define <2 x bfloat> @atomic_vec2_bfloat (ptr %x ) {
240+ ; CHECK3-LABEL: atomic_vec2_bfloat:
241+ ; CHECK3: ## %bb.0:
242+ ; CHECK3-NEXT: movl (%rdi), %eax
243+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
244+ ; CHECK3-NEXT: shrl $16, %eax
245+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
246+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
247+ ; CHECK3-NEXT: retq
248+ ;
249+ ; CHECK0-LABEL: atomic_vec2_bfloat:
250+ ; CHECK0: ## %bb.0:
251+ ; CHECK0-NEXT: movl (%rdi), %eax
252+ ; CHECK0-NEXT: movl %eax, %ecx
253+ ; CHECK0-NEXT: shrl $16, %ecx
254+ ; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
255+ ; CHECK0-NEXT: movw %ax, %dx
256+ ; CHECK0-NEXT: ## implicit-def: $eax
257+ ; CHECK0-NEXT: movw %dx, %ax
258+ ; CHECK0-NEXT: ## implicit-def: $xmm0
259+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
260+ ; CHECK0-NEXT: ## implicit-def: $eax
261+ ; CHECK0-NEXT: movw %cx, %ax
262+ ; CHECK0-NEXT: ## implicit-def: $xmm1
263+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
264+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
265+ ; CHECK0-NEXT: retq
266+ %ret = load atomic <2 x bfloat>, ptr %x acquire , align 4
267+ ret <2 x bfloat> %ret
268+ }
269+
208270define <1 x ptr > @atomic_vec1_ptr (ptr %x ) nounwind {
209271; CHECK3-LABEL: atomic_vec1_ptr:
210272; CHECK3: ## %bb.0:
@@ -377,6 +439,115 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
377439 ret <4 x i16 > %ret
378440}
379441
442+ define <4 x half > @atomic_vec4_half (ptr %x ) nounwind {
443+ ; CHECK3-LABEL: atomic_vec4_half:
444+ ; CHECK3: ## %bb.0:
445+ ; CHECK3-NEXT: movq (%rdi), %rax
446+ ; CHECK3-NEXT: movl %eax, %ecx
447+ ; CHECK3-NEXT: shrl $16, %ecx
448+ ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm1
449+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
450+ ; CHECK3-NEXT: movq %rax, %rcx
451+ ; CHECK3-NEXT: shrq $32, %rcx
452+ ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm2
453+ ; CHECK3-NEXT: shrq $48, %rax
454+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm3
455+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
456+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
457+ ; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
458+ ; CHECK3-NEXT: retq
459+ ;
460+ ; CHECK0-LABEL: atomic_vec4_half:
461+ ; CHECK0: ## %bb.0:
462+ ; CHECK0-NEXT: movq (%rdi), %rax
463+ ; CHECK0-NEXT: movl %eax, %ecx
464+ ; CHECK0-NEXT: shrl $16, %ecx
465+ ; CHECK0-NEXT: movw %cx, %dx
466+ ; CHECK0-NEXT: ## implicit-def: $ecx
467+ ; CHECK0-NEXT: movw %dx, %cx
468+ ; CHECK0-NEXT: ## implicit-def: $xmm2
469+ ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm2
470+ ; CHECK0-NEXT: movw %ax, %dx
471+ ; CHECK0-NEXT: ## implicit-def: $ecx
472+ ; CHECK0-NEXT: movw %dx, %cx
473+ ; CHECK0-NEXT: ## implicit-def: $xmm0
474+ ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm0
475+ ; CHECK0-NEXT: movq %rax, %rcx
476+ ; CHECK0-NEXT: shrq $32, %rcx
477+ ; CHECK0-NEXT: movw %cx, %dx
478+ ; CHECK0-NEXT: ## implicit-def: $ecx
479+ ; CHECK0-NEXT: movw %dx, %cx
480+ ; CHECK0-NEXT: ## implicit-def: $xmm1
481+ ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm1
482+ ; CHECK0-NEXT: shrq $48, %rax
483+ ; CHECK0-NEXT: movw %ax, %cx
484+ ; CHECK0-NEXT: ## implicit-def: $eax
485+ ; CHECK0-NEXT: movw %cx, %ax
486+ ; CHECK0-NEXT: ## implicit-def: $xmm3
487+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm3
488+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
489+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
490+ ; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
491+ ; CHECK0-NEXT: retq
492+ %ret = load atomic <4 x half >, ptr %x acquire , align 8
493+ ret <4 x half > %ret
494+ }
495+
496+ define <4 x bfloat> @atomic_vec4_bfloat (ptr %x ) nounwind {
497+ ; CHECK3-LABEL: atomic_vec4_bfloat:
498+ ; CHECK3: ## %bb.0:
499+ ; CHECK3-NEXT: movq (%rdi), %rax
500+ ; CHECK3-NEXT: movq %rax, %rcx
501+ ; CHECK3-NEXT: movq %rax, %rdx
502+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
503+ ; CHECK3-NEXT: ## kill: def $eax killed $eax killed $rax
504+ ; CHECK3-NEXT: shrl $16, %eax
505+ ; CHECK3-NEXT: shrq $32, %rcx
506+ ; CHECK3-NEXT: shrq $48, %rdx
507+ ; CHECK3-NEXT: pinsrw $0, %edx, %xmm1
508+ ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm2
509+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
510+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
511+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
512+ ; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
513+ ; CHECK3-NEXT: retq
514+ ;
515+ ; CHECK0-LABEL: atomic_vec4_bfloat:
516+ ; CHECK0: ## %bb.0:
517+ ; CHECK0-NEXT: movq (%rdi), %rax
518+ ; CHECK0-NEXT: movl %eax, %ecx
519+ ; CHECK0-NEXT: shrl $16, %ecx
520+ ; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
521+ ; CHECK0-NEXT: movw %ax, %dx
522+ ; CHECK0-NEXT: movq %rax, %rsi
523+ ; CHECK0-NEXT: shrq $32, %rsi
524+ ; CHECK0-NEXT: ## kill: def $si killed $si killed $rsi
525+ ; CHECK0-NEXT: shrq $48, %rax
526+ ; CHECK0-NEXT: movw %ax, %di
527+ ; CHECK0-NEXT: ## implicit-def: $eax
528+ ; CHECK0-NEXT: movw %di, %ax
529+ ; CHECK0-NEXT: ## implicit-def: $xmm0
530+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
531+ ; CHECK0-NEXT: ## implicit-def: $eax
532+ ; CHECK0-NEXT: movw %si, %ax
533+ ; CHECK0-NEXT: ## implicit-def: $xmm1
534+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
535+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
536+ ; CHECK0-NEXT: ## implicit-def: $eax
537+ ; CHECK0-NEXT: movw %dx, %ax
538+ ; CHECK0-NEXT: ## implicit-def: $xmm0
539+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
540+ ; CHECK0-NEXT: ## implicit-def: $eax
541+ ; CHECK0-NEXT: movw %cx, %ax
542+ ; CHECK0-NEXT: ## implicit-def: $xmm2
543+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm2
544+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
545+ ; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
546+ ; CHECK0-NEXT: retq
547+ %ret = load atomic <4 x bfloat>, ptr %x acquire , align 8
548+ ret <4 x bfloat> %ret
549+ }
550+
380551define <4 x float > @atomic_vec4_float_align (ptr %x ) nounwind {
381552; CHECK-LABEL: atomic_vec4_float_align:
382553; CHECK: ## %bb.0:
0 commit comments