Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58574,6 +58574,7 @@ static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,

static SDValue combineSCALAR_TO_VECTOR(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
using namespace SDPatternMatch;
EVT VT = N->getValueType(0);
SDValue Src = N->getOperand(0);
SDLoc DL(N);
Expand Down Expand Up @@ -58641,6 +58642,16 @@ static SDValue combineSCALAR_TO_VECTOR(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::MOVQ2DQ, DL, VT, SrcOp);
}

if (VT == MVT::v4i32) {
SDValue HalfSrc;
// Combine (v4i32 (scalar_to_vector (i32 (anyext (bitcast (f16))))))
// to remove XMM->GPR->XMM moves.
if (sd_match(Src, m_AnyExt(m_BitCast(
m_AllOf(m_SpecificVT(MVT::f16), m_Value(HalfSrc))))))
return DAG.getBitcast(
VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f16, HalfSrc));
}

// See if we're broadcasting the scalar value, in which case just reuse that.
// Ensure the same SDValue from the SDNode use is being used.
if (VT.getScalarType() == Src.getValueType())
Expand Down
49 changes: 22 additions & 27 deletions llvm/test/CodeGen/X86/bfloat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -708,10 +708,8 @@ define <2 x bfloat> @pr62997(bfloat %a, bfloat %b) {
;
; BF16-LABEL: pr62997:
; BF16: # %bb.0:
; BF16-NEXT: vpextrw $0, %xmm0, %eax
; BF16-NEXT: vpextrw $0, %xmm1, %ecx
; BF16-NEXT: vmovd %eax, %xmm0
; BF16-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0
; BF16-NEXT: vpextrw $0, %xmm1, %eax
; BF16-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
; BF16-NEXT: retq
;
; FP16-LABEL: pr62997:
Expand Down Expand Up @@ -1652,66 +1650,63 @@ define <8 x bfloat> @fptrunc_v8f64(<8 x double> %a) nounwind {
; AVXNC-NEXT: pushq %r12
; AVXNC-NEXT: pushq %rbx
; AVXNC-NEXT: subq $168, %rsp
; AVXNC-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVXNC-NEXT: vmovups %ymm1, (%rsp) # 32-byte Spill
; AVXNC-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVXNC-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVXNC-NEXT: vzeroupper
; AVXNC-NEXT: callq __truncdfbf2@PLT
; AVXNC-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVXNC-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
; AVXNC-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; AVXNC-NEXT: # xmm0 = mem[1,0]
; AVXNC-NEXT: callq __truncdfbf2@PLT
; AVXNC-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVXNC-NEXT: vmovupd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
; AVXNC-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVXNC-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; AVXNC-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVXNC-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; AVXNC-NEXT: vzeroupper
; AVXNC-NEXT: callq __truncdfbf2@PLT
; AVXNC-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVXNC-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload
; AVXNC-NEXT: # xmm0 = mem[1,0]
; AVXNC-NEXT: callq __truncdfbf2@PLT
; AVXNC-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
; AVXNC-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
; AVXNC-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVXNC-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVXNC-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVXNC-NEXT: vzeroupper
; AVXNC-NEXT: callq __truncdfbf2@PLT
; AVXNC-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVXNC-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; AVXNC-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload
; AVXNC-NEXT: # xmm0 = mem[1,0]
; AVXNC-NEXT: callq __truncdfbf2@PLT
; AVXNC-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVXNC-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
; AVXNC-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVXNC-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVXNC-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVXNC-NEXT: vzeroupper
; AVXNC-NEXT: callq __truncdfbf2@PLT
; AVXNC-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVXNC-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; AVXNC-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; AVXNC-NEXT: # xmm0 = mem[1,0]
; AVXNC-NEXT: callq __truncdfbf2@PLT
; AVXNC-NEXT: vpextrw $0, %xmm0, %eax
; AVXNC-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; AVXNC-NEXT: vpextrw $0, %xmm0, %ebx
; AVXNC-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
; AVXNC-NEXT: vpextrw $0, %xmm0, %ebp
; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; AVXNC-NEXT: vpextrw $0, %xmm0, %r14d
; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; AVXNC-NEXT: vpextrw $0, %xmm0, %r15d
; AVXNC-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; AVXNC-NEXT: vpextrw $0, %xmm0, %r12d
; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; AVXNC-NEXT: vpextrw $0, %xmm0, %r13d
; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; AVXNC-NEXT: vpextrw $0, %xmm0, %ebx
; AVXNC-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; AVXNC-NEXT: # xmm0 = mem[1,0]
; AVXNC-NEXT: callq __truncdfbf2@PLT
; AVXNC-NEXT: vpextrw $0, %xmm0, %eax
; AVXNC-NEXT: vmovd %ebx, %xmm0
; AVXNC-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
; AVXNC-NEXT: vpinsrw $2, %r13d, %xmm0, %xmm0
; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; AVXNC-NEXT: vpinsrw $1, %r13d, %xmm0, %xmm0
; AVXNC-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
; AVXNC-NEXT: vpinsrw $3, %r12d, %xmm0, %xmm0
; AVXNC-NEXT: vpinsrw $4, %r15d, %xmm0, %xmm0
; AVXNC-NEXT: vpinsrw $5, %r14d, %xmm0, %xmm0
; AVXNC-NEXT: vpinsrw $6, %ebp, %xmm0, %xmm0
; AVXNC-NEXT: vpinsrw $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
; AVXNC-NEXT: vpinsrw $7, %ebx, %xmm0, %xmm0
; AVXNC-NEXT: addq $168, %rsp
; AVXNC-NEXT: popq %rbx
; AVXNC-NEXT: popq %r12
Expand Down
4 changes: 0 additions & 4 deletions llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll
Original file line number Diff line number Diff line change
Expand Up @@ -133,11 +133,7 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) nounwind {
;
; AVX512-LABEL: complex_canonicalize_fmul_half:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vpextrw $0, %xmm1, %eax
; AVX512-NEXT: vpextrw $0, %xmm0, %ecx
; AVX512-NEXT: vmovd %ecx, %xmm0
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-NEXT: vmovd %eax, %xmm1
; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1
; AVX512-NEXT: vsubss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/X86/cvt16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,7 @@ define float @test2(ptr nocapture %src) nounwind {
;
; F16C-LABEL: test2:
; F16C: # %bb.0:
; F16C-NEXT: movzwl (%rdi), %eax
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: retq
;
Expand Down Expand Up @@ -119,8 +118,7 @@ define double @test4(ptr nocapture %src) nounwind {
;
; F16C-LABEL: test4:
; F16C: # %bb.0:
; F16C-NEXT: movzwl (%rdi), %eax
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
; F16C-NEXT: retq
Expand Down
2 changes: 0 additions & 2 deletions llvm/test/CodeGen/X86/fp-roundeven.ll
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@ define half @roundeven_f16(half %h) {
;
; AVX512F-LABEL: roundeven_f16:
; AVX512F: ## %bb.0: ## %entry
; AVX512F-NEXT: vpextrw $0, %xmm0, %eax
; AVX512F-NEXT: vmovd %eax, %xmm0
; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512F-NEXT: vroundss $8, %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
Expand Down
46 changes: 0 additions & 46 deletions llvm/test/CodeGen/X86/fp16-libcalls.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
define void @test_half_ceil(half %a0, ptr %p0) nounwind {
; F16C-LABEL: test_half_ceil:
; F16C: # %bb.0:
; F16C-NEXT: vpextrw $0, %xmm0, %eax
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
Expand Down Expand Up @@ -107,8 +105,6 @@ define void @test_half_cos(half %a0, ptr %p0) nounwind {
; F16C: # %bb.0:
; F16C-NEXT: pushq %rbx
; F16C-NEXT: movq %rdi, %rbx
; F16C-NEXT: vpextrw $0, %xmm0, %eax
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: callq cosf@PLT
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
Expand Down Expand Up @@ -168,8 +164,6 @@ define void @test_half_exp(half %a0, ptr %p0) nounwind {
; F16C: # %bb.0:
; F16C-NEXT: pushq %rbx
; F16C-NEXT: movq %rdi, %rbx
; F16C-NEXT: vpextrw $0, %xmm0, %eax
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: callq expf@PLT
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
Expand Down Expand Up @@ -229,8 +223,6 @@ define void @test_half_exp2(half %a0, ptr %p0) nounwind {
; F16C: # %bb.0:
; F16C-NEXT: pushq %rbx
; F16C-NEXT: movq %rdi, %rbx
; F16C-NEXT: vpextrw $0, %xmm0, %eax
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: callq exp2f@PLT
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
Expand Down Expand Up @@ -290,8 +282,6 @@ define void @test_half_exp10(half %a0, ptr %p0) nounwind {
; F16C: # %bb.0:
; F16C-NEXT: pushq %rbx
; F16C-NEXT: movq %rdi, %rbx
; F16C-NEXT: vpextrw $0, %xmm0, %eax
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: callq exp10f@PLT
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
Expand Down Expand Up @@ -349,8 +339,6 @@ define void @test_half_exp10(half %a0, ptr %p0) nounwind {
define void @test_half_fabs(half %a0, ptr %p0) nounwind {
; F16C-LABEL: test_half_fabs:
; F16C: # %bb.0:
; F16C-NEXT: vpextrw $0, %xmm0, %eax
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
Expand Down Expand Up @@ -392,8 +380,6 @@ define void @test_half_fabs(half %a0, ptr %p0) nounwind {
define void @test_half_floor(half %a0, ptr %p0) nounwind {
; F16C-LABEL: test_half_floor:
; F16C: # %bb.0:
; F16C-NEXT: vpextrw $0, %xmm0, %eax
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
Expand Down Expand Up @@ -447,14 +433,8 @@ define void @test_half_fma(half %a0, half %a1, half %a2, ptr %p0) nounwind {
; F16C: # %bb.0:
; F16C-NEXT: pushq %rbx
; F16C-NEXT: movq %rdi, %rbx
; F16C-NEXT: vpextrw $0, %xmm2, %eax
; F16C-NEXT: vpextrw $0, %xmm1, %ecx
; F16C-NEXT: vpextrw $0, %xmm0, %edx
; F16C-NEXT: vmovd %edx, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: vmovd %ecx, %xmm1
; F16C-NEXT: vcvtph2ps %xmm1, %xmm1
; F16C-NEXT: vmovd %eax, %xmm2
; F16C-NEXT: vcvtph2ps %xmm2, %xmm2
; F16C-NEXT: callq fmaf@PLT
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
Expand Down Expand Up @@ -542,8 +522,6 @@ define void @test_half_fma(half %a0, half %a1, half %a2, ptr %p0) nounwind {
define void @test_half_fneg(half %a0, ptr %p0) nounwind {
; F16C-LABEL: test_half_fneg:
; F16C: # %bb.0:
; F16C-NEXT: vpextrw $0, %xmm0, %eax
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
Expand Down Expand Up @@ -587,8 +565,6 @@ define void @test_half_log(half %a0, ptr %p0) nounwind {
; F16C: # %bb.0:
; F16C-NEXT: pushq %rbx
; F16C-NEXT: movq %rdi, %rbx
; F16C-NEXT: vpextrw $0, %xmm0, %eax
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: callq logf@PLT
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
Expand Down Expand Up @@ -648,8 +624,6 @@ define void @test_half_log2(half %a0, ptr %p0) nounwind {
; F16C: # %bb.0:
; F16C-NEXT: pushq %rbx
; F16C-NEXT: movq %rdi, %rbx
; F16C-NEXT: vpextrw $0, %xmm0, %eax
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: callq log2f@PLT
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
Expand Down Expand Up @@ -709,8 +683,6 @@ define void @test_half_log10(half %a0, ptr %p0) nounwind {
; F16C: # %bb.0:
; F16C-NEXT: pushq %rbx
; F16C-NEXT: movq %rdi, %rbx
; F16C-NEXT: vpextrw $0, %xmm0, %eax
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: callq log10f@PLT
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
Expand Down Expand Up @@ -768,8 +740,6 @@ define void @test_half_log10(half %a0, ptr %p0) nounwind {
define void @test_half_nearbyint(half %a0, ptr %p0) nounwind {
; F16C-LABEL: test_half_nearbyint:
; F16C: # %bb.0:
; F16C-NEXT: vpextrw $0, %xmm0, %eax
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
Expand Down Expand Up @@ -823,11 +793,7 @@ define void @test_half_pow(half %a0, half %a1, ptr %p0) nounwind {
; F16C: # %bb.0:
; F16C-NEXT: pushq %rbx
; F16C-NEXT: movq %rdi, %rbx
; F16C-NEXT: vpextrw $0, %xmm1, %eax
; F16C-NEXT: vpextrw $0, %xmm0, %ecx
; F16C-NEXT: vmovd %ecx, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: vmovd %eax, %xmm1
; F16C-NEXT: vcvtph2ps %xmm1, %xmm1
; F16C-NEXT: callq powf@PLT
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
Expand Down Expand Up @@ -907,8 +873,6 @@ define void @test_half_powi(half %a0, i32 %a1, ptr %p0) nounwind {
; F16C: # %bb.0:
; F16C-NEXT: pushq %rbx
; F16C-NEXT: movq %rsi, %rbx
; F16C-NEXT: vpextrw $0, %xmm0, %eax
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: callq __powisf2@PLT
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
Expand Down Expand Up @@ -976,8 +940,6 @@ define void @test_half_powi(half %a0, i32 %a1, ptr %p0) nounwind {
define void @test_half_rint(half %a0, ptr %p0) nounwind {
; F16C-LABEL: test_half_rint:
; F16C: # %bb.0:
; F16C-NEXT: vpextrw $0, %xmm0, %eax
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
Expand Down Expand Up @@ -1031,8 +993,6 @@ define void @test_half_sin(half %a0, ptr %p0) nounwind {
; F16C: # %bb.0:
; F16C-NEXT: pushq %rbx
; F16C-NEXT: movq %rdi, %rbx
; F16C-NEXT: vpextrw $0, %xmm0, %eax
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: callq sinf@PLT
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
Expand Down Expand Up @@ -1090,8 +1050,6 @@ define void @test_half_sin(half %a0, ptr %p0) nounwind {
define void @test_half_sqrt(half %a0, ptr %p0) nounwind {
; F16C-LABEL: test_half_sqrt:
; F16C: # %bb.0:
; F16C-NEXT: vpextrw $0, %xmm0, %eax
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
Expand Down Expand Up @@ -1146,8 +1104,6 @@ define void @test_half_tan(half %a0, ptr %p0) nounwind {
; F16C: # %bb.0:
; F16C-NEXT: pushq %rbx
; F16C-NEXT: movq %rdi, %rbx
; F16C-NEXT: vpextrw $0, %xmm0, %eax
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: callq tanf@PLT
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
Expand Down Expand Up @@ -1205,8 +1161,6 @@ define void @test_half_tan(half %a0, ptr %p0) nounwind {
define void @test_half_trunc(half %a0, ptr %p0) nounwind {
; F16C-LABEL: test_half_trunc:
; F16C: # %bb.0:
; F16C-NEXT: vpextrw $0, %xmm0, %eax
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/X86/half-darwin.ll
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,7 @@ define float @extendhfsf(ptr %ptr) nounwind {
;
; CHECK-F16C-LABEL: extendhfsf:
; CHECK-F16C: ## %bb.0:
; CHECK-F16C-NEXT: movzwl (%rdi), %eax
; CHECK-F16C-NEXT: vmovd %eax, %xmm0
; CHECK-F16C-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; CHECK-F16C-NEXT: retq
;
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/X86/half-fp80-darwin.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@ define void @extendhfxf(ptr %outptr, ptr %inptr) nounwind {
;
; CHECK-F16C-LABEL: extendhfxf:
; CHECK-F16C: ## %bb.0:
; CHECK-F16C-NEXT: movzwl (%rsi), %eax
; CHECK-F16C-NEXT: vmovd %eax, %xmm0
; CHECK-F16C-NEXT: vpinsrw $0, (%rsi), %xmm0, %xmm0
; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; CHECK-F16C-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-F16C-NEXT: flds -{{[0-9]+}}(%rsp)
Expand Down
Loading
Loading