@@ -59,12 +59,10 @@ define void @test_half_ceil(half %a0, ptr %p0) nounwind {
5959define void @test_half_copysign (half %a0 , half %a1 , ptr %p0 ) nounwind {
6060; F16C-LABEL: test_half_copysign:
6161; F16C: # %bb.0:
62- ; F16C-NEXT: vpextrw $0, %xmm1, %eax
63- ; F16C-NEXT: andl $32768, %eax # imm = 0x8000
64- ; F16C-NEXT: vpextrw $0, %xmm0, %ecx
65- ; F16C-NEXT: andl $32767, %ecx # imm = 0x7FFF
66- ; F16C-NEXT: orl %eax, %ecx
67- ; F16C-NEXT: movw %cx, (%rdi)
62+ ; F16C-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
63+ ; F16C-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
64+ ; F16C-NEXT: vpor %xmm1, %xmm0, %xmm0
65+ ; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
6866; F16C-NEXT: retq
6967;
7068; FP16-LABEL: test_half_copysign:
@@ -76,23 +74,23 @@ define void @test_half_copysign(half %a0, half %a1, ptr %p0) nounwind {
7674;
7775; X64-LABEL: test_half_copysign:
7876; X64: # %bb.0:
79- ; X64-NEXT: pextrw $0, %xmm1, %eax
80- ; X64-NEXT: andl $32768, %eax # imm = 0x8000
81- ; X64-NEXT: pextrw $0, %xmm0, %ecx
82- ; X64-NEXT: andl $32767, %ecx # imm = 0x7FFF
83- ; X64-NEXT: orl %eax, %ecx
84- ; X64-NEXT: movw %cx, (%rdi)
77+ ; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
78+ ; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
79+ ; X64-NEXT: por %xmm1, %xmm0
80+ ; X64-NEXT: pextrw $0, %xmm0, %eax
81+ ; X64-NEXT: movw %ax, (%rdi)
8582; X64-NEXT: retq
8683;
8784; X86-LABEL: test_half_copysign:
8885; X86: # %bb.0:
86+ ; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
87+ ; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm1
8988; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
90- ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
91- ; X86-NEXT: andl $32768, %ecx # imm = 0x8000
92- ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
93- ; X86-NEXT: andl $32767, %edx # imm = 0x7FFF
94- ; X86-NEXT: orl %ecx, %edx
95- ; X86-NEXT: movw %dx, (%eax)
89+ ; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
90+ ; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
91+ ; X86-NEXT: por %xmm1, %xmm0
92+ ; X86-NEXT: pextrw $0, %xmm0, %ecx
93+ ; X86-NEXT: movw %cx, (%eax)
9694; X86-NEXT: retl
9795 %res = call half @llvm.copysign.half (half %a0 , half %a1 )
9896 store half %res , ptr %p0 , align 2
@@ -334,9 +332,7 @@ define void @test_half_exp10(half %a0, ptr %p0) nounwind {
334332define void @test_half_fabs (half %a0 , ptr %p0 ) nounwind {
335333; F16C-LABEL: test_half_fabs:
336334; F16C: # %bb.0:
337- ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
338- ; F16C-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
339- ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
335+ ; F16C-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
340336; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
341337; F16C-NEXT: retq
342338;
@@ -349,14 +345,9 @@ define void @test_half_fabs(half %a0, ptr %p0) nounwind {
349345;
350346; X64-LABEL: test_half_fabs:
351347; X64: # %bb.0:
352- ; X64-NEXT: pushq %rbx
353- ; X64-NEXT: movq %rdi, %rbx
354- ; X64-NEXT: callq __extendhfsf2@PLT
355348; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
356- ; X64-NEXT: callq __truncsfhf2@PLT
357349; X64-NEXT: pextrw $0, %xmm0, %eax
358- ; X64-NEXT: movw %ax, (%rbx)
359- ; X64-NEXT: popq %rbx
350+ ; X64-NEXT: movw %ax, (%rdi)
360351; X64-NEXT: retq
361352;
362353; X86-LABEL: test_half_fabs:
@@ -514,9 +505,7 @@ define void @test_half_fma(half %a0, half %a1, half %a2, ptr %p0) nounwind {
514505define void @test_half_fneg (half %a0 , ptr %p0 ) nounwind {
515506; F16C-LABEL: test_half_fneg:
516507; F16C: # %bb.0:
517- ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
518- ; F16C-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
519- ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
508+ ; F16C-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
520509; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
521510; F16C-NEXT: retq
522511;
@@ -529,14 +518,9 @@ define void @test_half_fneg(half %a0, ptr %p0) nounwind {
529518;
530519; X64-LABEL: test_half_fneg:
531520; X64: # %bb.0:
532- ; X64-NEXT: pushq %rbx
533- ; X64-NEXT: movq %rdi, %rbx
534- ; X64-NEXT: callq __extendhfsf2@PLT
535521; X64-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
536- ; X64-NEXT: callq __truncsfhf2@PLT
537522; X64-NEXT: pextrw $0, %xmm0, %eax
538- ; X64-NEXT: movw %ax, (%rbx)
539- ; X64-NEXT: popq %rbx
523+ ; X64-NEXT: movw %ax, (%rdi)
540524; X64-NEXT: retq
541525;
542526; X86-LABEL: test_half_fneg:
0 commit comments