@@ -205,63 +205,19 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
205205}
206206
207207define <2 x half > @atomic_vec2_half (ptr %x ) {
208- ; CHECK3-LABEL: atomic_vec2_half:
209- ; CHECK3: ## %bb.0:
210- ; CHECK3-NEXT: movl (%rdi), %eax
211- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
212- ; CHECK3-NEXT: shrl $16, %eax
213- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
214- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
215- ; CHECK3-NEXT: retq
216- ;
217- ; CHECK0-LABEL: atomic_vec2_half:
218- ; CHECK0: ## %bb.0:
219- ; CHECK0-NEXT: movl (%rdi), %eax
220- ; CHECK0-NEXT: movl %eax, %ecx
221- ; CHECK0-NEXT: shrl $16, %ecx
222- ; CHECK0-NEXT: movw %cx, %dx
223- ; CHECK0-NEXT: ## implicit-def: $ecx
224- ; CHECK0-NEXT: movw %dx, %cx
225- ; CHECK0-NEXT: ## implicit-def: $xmm1
226- ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm1
227- ; CHECK0-NEXT: movw %ax, %cx
228- ; CHECK0-NEXT: ## implicit-def: $eax
229- ; CHECK0-NEXT: movw %cx, %ax
230- ; CHECK0-NEXT: ## implicit-def: $xmm0
231- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
232- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
233- ; CHECK0-NEXT: retq
208+ ; CHECK-LABEL: atomic_vec2_half:
209+ ; CHECK: ## %bb.0:
210+ ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
211+ ; CHECK-NEXT: retq
234212 %ret = load atomic <2 x half >, ptr %x acquire , align 4
235213 ret <2 x half > %ret
236214}
237215
238216define <2 x bfloat> @atomic_vec2_bfloat (ptr %x ) {
239- ; CHECK3-LABEL: atomic_vec2_bfloat:
240- ; CHECK3: ## %bb.0:
241- ; CHECK3-NEXT: movl (%rdi), %eax
242- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
243- ; CHECK3-NEXT: shrl $16, %eax
244- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
245- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
246- ; CHECK3-NEXT: retq
247- ;
248- ; CHECK0-LABEL: atomic_vec2_bfloat:
249- ; CHECK0: ## %bb.0:
250- ; CHECK0-NEXT: movl (%rdi), %eax
251- ; CHECK0-NEXT: movl %eax, %ecx
252- ; CHECK0-NEXT: shrl $16, %ecx
253- ; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
254- ; CHECK0-NEXT: movw %ax, %dx
255- ; CHECK0-NEXT: ## implicit-def: $eax
256- ; CHECK0-NEXT: movw %dx, %ax
257- ; CHECK0-NEXT: ## implicit-def: $xmm0
258- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
259- ; CHECK0-NEXT: ## implicit-def: $eax
260- ; CHECK0-NEXT: movw %cx, %ax
261- ; CHECK0-NEXT: ## implicit-def: $xmm1
262- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
263- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
264- ; CHECK0-NEXT: retq
217+ ; CHECK-LABEL: atomic_vec2_bfloat:
218+ ; CHECK: ## %bb.0:
219+ ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
220+ ; CHECK-NEXT: retq
265221 %ret = load atomic <2 x bfloat>, ptr %x acquire , align 4
266222 ret <2 x bfloat> %ret
267223}
@@ -439,110 +395,19 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
439395}
440396
441397define <4 x half > @atomic_vec4_half (ptr %x ) nounwind {
442- ; CHECK3-LABEL: atomic_vec4_half:
443- ; CHECK3: ## %bb.0:
444- ; CHECK3-NEXT: movq (%rdi), %rax
445- ; CHECK3-NEXT: movl %eax, %ecx
446- ; CHECK3-NEXT: shrl $16, %ecx
447- ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm1
448- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
449- ; CHECK3-NEXT: movq %rax, %rcx
450- ; CHECK3-NEXT: shrq $32, %rcx
451- ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm2
452- ; CHECK3-NEXT: shrq $48, %rax
453- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm3
454- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
455- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
456- ; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
457- ; CHECK3-NEXT: retq
458- ;
459- ; CHECK0-LABEL: atomic_vec4_half:
460- ; CHECK0: ## %bb.0:
461- ; CHECK0-NEXT: movq (%rdi), %rax
462- ; CHECK0-NEXT: movl %eax, %ecx
463- ; CHECK0-NEXT: shrl $16, %ecx
464- ; CHECK0-NEXT: movw %cx, %dx
465- ; CHECK0-NEXT: ## implicit-def: $ecx
466- ; CHECK0-NEXT: movw %dx, %cx
467- ; CHECK0-NEXT: ## implicit-def: $xmm2
468- ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm2
469- ; CHECK0-NEXT: movw %ax, %dx
470- ; CHECK0-NEXT: ## implicit-def: $ecx
471- ; CHECK0-NEXT: movw %dx, %cx
472- ; CHECK0-NEXT: ## implicit-def: $xmm0
473- ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm0
474- ; CHECK0-NEXT: movq %rax, %rcx
475- ; CHECK0-NEXT: shrq $32, %rcx
476- ; CHECK0-NEXT: movw %cx, %dx
477- ; CHECK0-NEXT: ## implicit-def: $ecx
478- ; CHECK0-NEXT: movw %dx, %cx
479- ; CHECK0-NEXT: ## implicit-def: $xmm1
480- ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm1
481- ; CHECK0-NEXT: shrq $48, %rax
482- ; CHECK0-NEXT: movw %ax, %cx
483- ; CHECK0-NEXT: ## implicit-def: $eax
484- ; CHECK0-NEXT: movw %cx, %ax
485- ; CHECK0-NEXT: ## implicit-def: $xmm3
486- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm3
487- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
488- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
489- ; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
490- ; CHECK0-NEXT: retq
398+ ; CHECK-LABEL: atomic_vec4_half:
399+ ; CHECK: ## %bb.0:
400+ ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
401+ ; CHECK-NEXT: retq
491402 %ret = load atomic <4 x half >, ptr %x acquire , align 8
492403 ret <4 x half > %ret
493404}
494405
495406define <4 x bfloat> @atomic_vec4_bfloat (ptr %x ) nounwind {
496- ; CHECK3-LABEL: atomic_vec4_bfloat:
497- ; CHECK3: ## %bb.0:
498- ; CHECK3-NEXT: movq (%rdi), %rax
499- ; CHECK3-NEXT: movq %rax, %rcx
500- ; CHECK3-NEXT: movq %rax, %rdx
501- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
502- ; CHECK3-NEXT: ## kill: def $eax killed $eax killed $rax
503- ; CHECK3-NEXT: shrl $16, %eax
504- ; CHECK3-NEXT: shrq $32, %rcx
505- ; CHECK3-NEXT: shrq $48, %rdx
506- ; CHECK3-NEXT: pinsrw $0, %edx, %xmm1
507- ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm2
508- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
509- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
510- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
511- ; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
512- ; CHECK3-NEXT: retq
513- ;
514- ; CHECK0-LABEL: atomic_vec4_bfloat:
515- ; CHECK0: ## %bb.0:
516- ; CHECK0-NEXT: movq (%rdi), %rax
517- ; CHECK0-NEXT: movl %eax, %ecx
518- ; CHECK0-NEXT: shrl $16, %ecx
519- ; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
520- ; CHECK0-NEXT: movw %ax, %dx
521- ; CHECK0-NEXT: movq %rax, %rsi
522- ; CHECK0-NEXT: shrq $32, %rsi
523- ; CHECK0-NEXT: ## kill: def $si killed $si killed $rsi
524- ; CHECK0-NEXT: shrq $48, %rax
525- ; CHECK0-NEXT: movw %ax, %di
526- ; CHECK0-NEXT: ## implicit-def: $eax
527- ; CHECK0-NEXT: movw %di, %ax
528- ; CHECK0-NEXT: ## implicit-def: $xmm0
529- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
530- ; CHECK0-NEXT: ## implicit-def: $eax
531- ; CHECK0-NEXT: movw %si, %ax
532- ; CHECK0-NEXT: ## implicit-def: $xmm1
533- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
534- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
535- ; CHECK0-NEXT: ## implicit-def: $eax
536- ; CHECK0-NEXT: movw %dx, %ax
537- ; CHECK0-NEXT: ## implicit-def: $xmm0
538- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
539- ; CHECK0-NEXT: ## implicit-def: $eax
540- ; CHECK0-NEXT: movw %cx, %ax
541- ; CHECK0-NEXT: ## implicit-def: $xmm2
542- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm2
543- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
544- ; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
545- ; CHECK0-NEXT: retq
407+ ; CHECK-LABEL: atomic_vec4_bfloat:
408+ ; CHECK: ## %bb.0:
409+ ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
410+ ; CHECK-NEXT: retq
546411 %ret = load atomic <4 x bfloat>, ptr %x acquire , align 8
547412 ret <4 x bfloat> %ret
548413}
0 commit comments