diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index ecaeff77fcb4b..0c71844e3a73e 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -333,6 +333,13 @@ def combine_mul_cmlt : GICombineRule< (apply [{ applyCombineMulCMLT(*${root}, MRI, B, ${matchinfo}); }]) >; +def lower_fptrunc_fptrunc: GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_FPTRUNC):$root, + [{ return matchFpTruncFpTrunc(*${root}, MRI); }]), + (apply [{ applyFpTruncFpTrunc(*${root}, MRI, B); }]) +>; + // Post-legalization combines which should happen at all optimization levels. // (E.g. ones that facilitate matching for the selector) For example, matching // pseudos. @@ -341,7 +348,7 @@ def AArch64PostLegalizerLowering [shuffle_vector_lowering, vashr_vlshr_imm, icmp_lowering, build_vector_lowering, lower_vector_fcmp, form_truncstore, fconstant_to_constant, - vector_sext_inreg_to_shift, + vector_sext_inreg_to_shift, lower_fptrunc_fptrunc, unmerge_ext_to_unmerge, lower_mulv2s64, vector_unmerge_lowering, insertelt_nonconst, unmerge_duplanes]> { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 9e2d698e04ae7..fde86449a76a7 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/DerivedTypes.h" @@ -817,14 +818,31 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .legalFor( {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}}) .libcallFor({{s16, s128}, {s32, s128}, {s64, s128}}) - .clampNumElements(0, v4s16, v4s16) - .clampNumElements(0, v2s32, v2s32) + .moreElementsToNextPow2(1) + .customIf([](const LegalityQuery &Q) { + LLT DstTy = Q.Types[0]; + LLT SrcTy = Q.Types[1]; + return SrcTy.isFixedVector() && DstTy.isFixedVector() && + SrcTy.getScalarSizeInBits() == 64 && + DstTy.getScalarSizeInBits() == 16; + }) + // Clamp based on input + .clampNumElements(1, v4s32, v4s32) + .clampNumElements(1, v2s64, v2s64) .scalarize(0); getActionDefinitionsBuilder(G_FPEXT) .legalFor( {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}}) .libcallFor({{s128, s64}, {s128, s32}, {s128, s16}}) + .moreElementsToNextPow2(0) + .customIf([](const LegalityQuery &Q) { + LLT DstTy = Q.Types[0]; + LLT SrcTy = Q.Types[1]; + return SrcTy.isVector() && DstTy.isVector() && + SrcTy.getScalarSizeInBits() == 16 && + DstTy.getScalarSizeInBits() == 64; + }) .clampNumElements(0, v4s32, v4s32) .clampNumElements(0, v2s64, v2s64) .scalarize(0); @@ -1472,6 +1490,12 @@ bool AArch64LegalizerInfo::legalizeCustom( return legalizeICMP(MI, MRI, MIRBuilder); case TargetOpcode::G_BITCAST: return legalizeBitcast(MI, Helper); + case TargetOpcode::G_FPEXT: + // In order to vectorise f16 to f64 properly, we need to use f32 as an + // intermediary + return legalizeViaF32(MI, MIRBuilder, MRI, TargetOpcode::G_FPEXT); + case TargetOpcode::G_FPTRUNC: + return legalizeViaF32(MI, MIRBuilder, MRI, TargetOpcode::G_FPTRUNC); } llvm_unreachable("expected switch to return"); @@ -2396,3 +2420,37 @@ bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI, MI.eraseFromParent(); return true; } + +bool AArch64LegalizerInfo::legalizeViaF32(MachineInstr &MI, + MachineIRBuilder &MIRBuilder, + MachineRegisterInfo &MRI, + unsigned Opcode) const { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + + LLT MidTy = LLT::fixed_vector(SrcTy.getNumElements(), LLT::scalar(32)); + + MachineInstrBuilder Mid; + MachineInstrBuilder Fin; + MIRBuilder.setInstrAndDebugLoc(MI); + switch (Opcode) { + default: + return false; + case TargetOpcode::G_FPEXT: { + Mid = MIRBuilder.buildFPExt(MidTy, Src); + Fin = MIRBuilder.buildFPExt(DstTy, Mid.getReg(0)); + break; + } + case TargetOpcode::G_FPTRUNC: { + Mid = MIRBuilder.buildFPTrunc(MidTy, Src); + Fin = MIRBuilder.buildFPTrunc(DstTy, Mid.getReg(0)); + break; + } + } + + MRI.replaceRegWith(Dst, Fin.getReg(0)); + MI.eraseFromParent(); + return true; +} \ No newline at end of file diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h index bcb294326fa92..049808d66f983 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h @@ -67,6 +67,8 @@ class AArch64LegalizerInfo : public LegalizerInfo { bool legalizeDynStackAlloc(MachineInstr &MI, LegalizerHelper &Helper) const; bool legalizePrefetch(MachineInstr &MI, LegalizerHelper &Helper) const; bool legalizeBitcast(MachineInstr &MI, LegalizerHelper &Helper) const; + bool legalizeViaF32(MachineInstr &MI, MachineIRBuilder &MIRBuilder, + MachineRegisterInfo &MRI, unsigned Opcode) const; const AArch64Subtarget *ST; }; } // End llvm namespace. diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp index 23dcaea2ac1a4..e675fac0f13ac 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -901,6 +901,200 @@ unsigned getCmpOperandFoldingProfit(Register CmpOp, MachineRegisterInfo &MRI) { return 0; } +// Helper function for matchFpTruncFpTrunc. +// Checks that the given definition belongs to an FPTRUNC and that the source is +// not an integer, as no rounding is necessary due to the range of values +bool checkTruncSrc(MachineRegisterInfo &MRI, MachineInstr *MaybeFpTrunc) { + if (!MaybeFpTrunc || MaybeFpTrunc->getOpcode() != TargetOpcode::G_FPTRUNC) + return false; + + // Check the source is 64 bits as we only want to match a very specific + // pattern + Register FpTruncSrc = MaybeFpTrunc->getOperand(1).getReg(); + LLT SrcTy = MRI.getType(FpTruncSrc); + if (SrcTy.getScalarSizeInBits() != 64) + return false; + + // Need to check the float didn't come from an int as no rounding is + // neccessary + MachineInstr *FpTruncSrcDef = getDefIgnoringCopies(FpTruncSrc, MRI); + if (FpTruncSrcDef->getOpcode() == TargetOpcode::G_SITOFP || + FpTruncSrcDef->getOpcode() == TargetOpcode::G_UITOFP) + return false; + + return true; +} + +// To avoid double rounding issues we need to lower FPTRUNC(FPTRUNC) to an odd +// rounding truncate and a normal truncate. When +// truncating an FP that came from an integer this is not a problem as the range +// of values is lower in the int +bool matchFpTruncFpTrunc(MachineInstr &MI, MachineRegisterInfo &MRI) { + if (MI.getOpcode() != TargetOpcode::G_FPTRUNC) + return false; + + // Check the destination is 16 bits as we only want to match a very specific + // pattern + Register Dst = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(Dst); + if (DstTy.getScalarSizeInBits() != 16) + return false; + + Register Src = MI.getOperand(1).getReg(); + + MachineInstr *ParentDef = getDefIgnoringCopies(Src, MRI); + if (!ParentDef) + return false; + + MachineInstr *FpTruncDef; + switch (ParentDef->getOpcode()) { + default: + return false; + case TargetOpcode::G_CONCAT_VECTORS: { + // Expecting exactly two FPTRUNCs + if (ParentDef->getNumOperands() != 3) + return false; + + // All operands need to be FPTRUNC + for (unsigned OpIdx = 1, NumOperands = ParentDef->getNumOperands(); + OpIdx != NumOperands; ++OpIdx) { + Register FpTruncDst = ParentDef->getOperand(OpIdx).getReg(); + + FpTruncDef = getDefIgnoringCopies(FpTruncDst, MRI); + + if (!checkTruncSrc(MRI, FpTruncDef)) + return false; + } + + return true; + } + // This is to match cases in which vectors are widened to a larger size + case TargetOpcode::G_INSERT_VECTOR_ELT: { + Register VecExtractDst = ParentDef->getOperand(2).getReg(); + MachineInstr *VecExtractDef = getDefIgnoringCopies(VecExtractDst, MRI); + + Register FpTruncDst = VecExtractDef->getOperand(1).getReg(); + FpTruncDef = getDefIgnoringCopies(FpTruncDst, MRI); + + if (!checkTruncSrc(MRI, FpTruncDef)) + return false; + break; + } + case TargetOpcode::G_FPTRUNC: { + Register FpTruncDst = ParentDef->getOperand(1).getReg(); + FpTruncDef = getDefIgnoringCopies(FpTruncDst, MRI); + + if (!checkTruncSrc(MRI, FpTruncDef)) + return false; + break; + } + } + + return true; +} + +void applyFpTruncFpTrunc(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + + LLT V2F32 = LLT::fixed_vector(2, LLT::scalar(32)); + LLT V4F32 = LLT::fixed_vector(4, LLT::scalar(32)); + LLT V4F16 = LLT::fixed_vector(4, LLT::scalar(16)); + + B.setInstrAndDebugLoc(MI); + + MachineInstr *ParentDef = getDefIgnoringCopies(Src, MRI); + if (!ParentDef) + return; + + switch (ParentDef->getOpcode()) { + default: + return; + case TargetOpcode::G_INSERT_VECTOR_ELT: { + Register VecExtractDst = ParentDef->getOperand(2).getReg(); + MachineInstr *VecExtractDef = getDefIgnoringCopies(VecExtractDst, MRI); + + Register FpTruncDst = VecExtractDef->getOperand(1).getReg(); + MachineInstr *FpTruncDef = getDefIgnoringCopies(FpTruncDst, MRI); + + Register FpTruncSrc = FpTruncDef->getOperand(1).getReg(); + MRI.setRegClass(FpTruncSrc, &AArch64::FPR128RegClass); + + Register Fp32 = MRI.createGenericVirtualRegister(V2F32); + MRI.setRegClass(Fp32, &AArch64::FPR64RegClass); + + B.buildInstr(AArch64::FCVTXNv2f32, {Fp32}, {FpTruncSrc}); + + // Only 4f32 -> 4f16 is legal so we need to mimic that situation + Register Fp32Padding = B.buildUndef(V2F32).getReg(0); + MRI.setRegClass(Fp32Padding, &AArch64::FPR64RegClass); + + Register Fp32Full = MRI.createGenericVirtualRegister(V4F32); + MRI.setRegClass(Fp32Full, &AArch64::FPR128RegClass); + B.buildConcatVectors(Fp32Full, {Fp32, Fp32Padding}); + + Register Fp16 = MRI.createGenericVirtualRegister(V4F16); + MRI.setRegClass(Fp16, &AArch64::FPR64RegClass); + B.buildFPTrunc(Fp16, Fp32Full); + + MRI.replaceRegWith(Dst, Fp16); + MI.eraseFromParent(); + break; + } + case TargetOpcode::G_CONCAT_VECTORS: { + // Get the two FP Truncs that are being concatenated + Register FpTrunc1Dst = ParentDef->getOperand(1).getReg(); + Register FpTrunc2Dst = ParentDef->getOperand(2).getReg(); + + MachineInstr *FpTrunc1Def = getDefIgnoringCopies(FpTrunc1Dst, MRI); + MachineInstr *FpTrunc2Def = getDefIgnoringCopies(FpTrunc2Dst, MRI); + + // Make the registers 128bit to store the 2 doubles + Register LoFp64 = FpTrunc1Def->getOperand(1).getReg(); + MRI.setRegClass(LoFp64, &AArch64::FPR128RegClass); + Register HiFp64 = FpTrunc2Def->getOperand(1).getReg(); + MRI.setRegClass(HiFp64, &AArch64::FPR128RegClass); + + B.setInstrAndDebugLoc(MI); + + // Convert the lower half + Register LoFp32 = MRI.createGenericVirtualRegister(V2F32); + MRI.setRegClass(LoFp32, &AArch64::FPR64RegClass); + B.buildInstr(AArch64::FCVTXNv2f32, {LoFp32}, {LoFp64}); + + // Create a register for the high half to use + Register AccUndef = MRI.createGenericVirtualRegister(V4F32); + MRI.setRegClass(AccUndef, &AArch64::FPR128RegClass); + B.buildUndef(AccUndef); + + Register Acc = MRI.createGenericVirtualRegister(V4F32); + MRI.setRegClass(Acc, &AArch64::FPR128RegClass); + B.buildInstr(TargetOpcode::INSERT_SUBREG) + .addDef(Acc) + .addUse(AccUndef) + .addUse(LoFp32) + .addImm(AArch64::dsub); + + // Convert the high half + Register AccOut = MRI.createGenericVirtualRegister(V4F32); + MRI.setRegClass(AccOut, &AArch64::FPR128RegClass); + B.buildInstr(AArch64::FCVTXNv4f32) + .addDef(AccOut) + .addUse(Acc) + .addUse(HiFp64); + + Register Fp16 = MRI.createGenericVirtualRegister(V4F16); + MRI.setRegClass(Fp16, &AArch64::FPR64RegClass); + B.buildFPTrunc(Fp16, AccOut); + + MRI.replaceRegWith(Dst, Fp16); + MI.eraseFromParent(); + break; + } + } +} + /// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP /// instruction \p MI. bool trySwapICmpOperands(MachineInstr &MI, MachineRegisterInfo &MRI) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index 896603d6eb20d..0561f91b6e015 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -555,11 +555,11 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_FPEXT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. the first uncovered type index: 2, OK -# DEBUG-NEXT: .. the first uncovered imm index: 0, OK +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_FPTRUNC (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. the first uncovered type index: 2, OK -# DEBUG-NEXT: .. the first uncovered imm index: 0, OK +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_FPTOSI (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected diff --git a/llvm/test/CodeGen/AArch64/arm64-fp128.ll b/llvm/test/CodeGen/AArch64/arm64-fp128.ll index 3e4b887fed55d..b8b8d20b9a17b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fp128.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fp128.ll @@ -1197,30 +1197,22 @@ define <2 x half> @vec_round_f16(<2 x fp128> %val) { ; ; CHECK-GI-LABEL: vec_round_f16: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sub sp, sp, #64 -; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 +; CHECK-GI-NEXT: sub sp, sp, #48 +; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: mov v2.d[0], x8 ; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill -; CHECK-GI-NEXT: mov v2.d[1], x8 -; CHECK-GI-NEXT: str q2, [sp, #32] // 16-byte Folded Spill ; CHECK-GI-NEXT: bl __trunctfhf2 ; CHECK-GI-NEXT: // kill: def $h0 killed $h0 def $q0 ; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: bl __trunctfhf2 +; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $h0 killed $h0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-GI-NEXT: bl __trunctfhf2 -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-GI-NEXT: bl __trunctfhf2 -; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: add sp, sp, #64 +; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-GI-NEXT: mov v1.h[1], v0.h[0] +; CHECK-GI-NEXT: fmov d0, d1 +; CHECK-GI-NEXT: add sp, sp, #48 ; CHECK-GI-NEXT: ret %dst = fptrunc <2 x fp128> %val to <2 x half> ret <2 x half> %dst diff --git a/llvm/test/CodeGen/AArch64/fmla.ll b/llvm/test/CodeGen/AArch64/fmla.ll index a37aabb0b5384..12b6562b5cf0c 100644 --- a/llvm/test/CodeGen/AArch64/fmla.ll +++ b/llvm/test/CodeGen/AArch64/fmla.ll @@ -865,22 +865,22 @@ define <7 x half> @fmuladd_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) { ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v3.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v5.4h -; CHECK-GI-NOFP16-NEXT: mov v5.h[0], v2.h[4] ; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v4.4h ; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v2.h[5] -; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v3.4s, v4.4s -; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v0.4s -; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v2.h[6] -; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v3.h[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v5.4h +; CHECK-GI-NOFP16-NEXT: mov v1.h[0], v2.h[4] +; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[5] +; CHECK-GI-NOFP16-NEXT: fcvtn v4.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v3.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[6] +; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v4.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v3.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[1] -; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[2] +; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v4.h[1] +; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[2] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v3.h[3] +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[3] ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v1.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[2] @@ -1350,22 +1350,22 @@ define <7 x half> @fmul_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) { ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v3.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v5.4h -; CHECK-GI-NOFP16-NEXT: mov v5.h[0], v2.h[4] ; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v4.4h ; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v2.h[5] -; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v3.4s, v4.4s -; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v0.4s -; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v2.h[6] -; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v3.h[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v5.4h +; CHECK-GI-NOFP16-NEXT: mov v1.h[0], v2.h[4] +; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[5] +; CHECK-GI-NOFP16-NEXT: fcvtn v4.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v3.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[6] +; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v4.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v3.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[1] -; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[2] +; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v4.h[1] +; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[2] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v3.h[3] +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[3] ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v1.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[2] diff --git a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll index 6233ce743b706..760742a4efad7 100644 --- a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll +++ b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll @@ -170,47 +170,12 @@ define <4 x half> @s_to_h(<4 x float> %a) { } define <4 x half> @d_to_h(<4 x double> %a) { -; CHECK-CVT-SD-LABEL: d_to_h: -; CHECK-CVT-SD: // %bb.0: -; CHECK-CVT-SD-NEXT: fcvtxn v0.2s, v0.2d -; CHECK-CVT-SD-NEXT: fcvtxn2 v0.4s, v1.2d -; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s -; CHECK-CVT-SD-NEXT: ret -; -; CHECK-FP16-SD-LABEL: d_to_h: -; CHECK-FP16-SD: // %bb.0: -; CHECK-FP16-SD-NEXT: fcvtxn v0.2s, v0.2d -; CHECK-FP16-SD-NEXT: fcvtxn2 v0.4s, v1.2d -; CHECK-FP16-SD-NEXT: fcvtn v0.4h, v0.4s -; CHECK-FP16-SD-NEXT: ret -; -; CHECK-CVT-GI-LABEL: d_to_h: -; CHECK-CVT-GI: // %bb.0: -; CHECK-CVT-GI-NEXT: mov d2, v0.d[1] -; CHECK-CVT-GI-NEXT: fcvt h0, d0 -; CHECK-CVT-GI-NEXT: mov d3, v1.d[1] -; CHECK-CVT-GI-NEXT: fcvt h1, d1 -; CHECK-CVT-GI-NEXT: fcvt h2, d2 -; CHECK-CVT-GI-NEXT: mov v0.h[1], v2.h[0] -; CHECK-CVT-GI-NEXT: fcvt h2, d3 -; CHECK-CVT-GI-NEXT: mov v0.h[2], v1.h[0] -; CHECK-CVT-GI-NEXT: mov v0.h[3], v2.h[0] -; CHECK-CVT-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-CVT-GI-NEXT: ret -; -; CHECK-FP16-GI-LABEL: d_to_h: -; CHECK-FP16-GI: // %bb.0: -; CHECK-FP16-GI-NEXT: mov d2, v0.d[1] -; CHECK-FP16-GI-NEXT: fcvt h0, d0 -; CHECK-FP16-GI-NEXT: mov d3, v1.d[1] -; CHECK-FP16-GI-NEXT: fcvt h1, d1 -; CHECK-FP16-GI-NEXT: fcvt h2, d2 -; CHECK-FP16-GI-NEXT: mov v0.h[1], v2.h[0] -; CHECK-FP16-GI-NEXT: fcvt h2, d3 -; CHECK-FP16-GI-NEXT: mov v0.h[2], v1.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[3], v2.h[0] -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-FP16-GI-NEXT: ret +; CHECK-LABEL: d_to_h: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtxn v0.2s, v0.2d +; CHECK-NEXT: fcvtxn2 v0.4s, v1.2d +; CHECK-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NEXT: ret %1 = fptrunc <4 x double> %a to <4 x half> ret <4 x half> %1 } @@ -241,30 +206,16 @@ define <4 x double> @h_to_d(<4 x half> %a) { ; ; CHECK-CVT-GI-LABEL: h_to_d: ; CHECK-CVT-GI: // %bb.0: -; CHECK-CVT-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-CVT-GI-NEXT: mov h1, v0.h[1] -; CHECK-CVT-GI-NEXT: mov h2, v0.h[2] -; CHECK-CVT-GI-NEXT: mov h3, v0.h[3] -; CHECK-CVT-GI-NEXT: fcvt d0, h0 -; CHECK-CVT-GI-NEXT: fcvt d4, h1 -; CHECK-CVT-GI-NEXT: fcvt d1, h2 -; CHECK-CVT-GI-NEXT: fcvt d2, h3 -; CHECK-CVT-GI-NEXT: mov v0.d[1], v4.d[0] -; CHECK-CVT-GI-NEXT: mov v1.d[1], v2.d[0] +; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl v0.2d, v1.2s +; CHECK-CVT-GI-NEXT: fcvtl2 v1.2d, v1.4s ; CHECK-CVT-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: h_to_d: ; CHECK-FP16-GI: // %bb.0: -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d4, h1 -; CHECK-FP16-GI-NEXT: fcvt d1, h2 -; CHECK-FP16-GI-NEXT: fcvt d2, h3 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v4.d[0] -; CHECK-FP16-GI-NEXT: mov v1.d[1], v2.d[0] +; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl v0.2d, v1.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v1.2d, v1.4s ; CHECK-FP16-GI-NEXT: ret %1 = fpext <4 x half> %a to <4 x double> ret <4 x double> %1 diff --git a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll index 86763eb5f9e3b..4d8505679c71c 100644 --- a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll +++ b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll @@ -198,48 +198,22 @@ define <8 x half> @d_to_h(<8 x double> %a) { ; ; CHECK-CVT-GI-LABEL: d_to_h: ; CHECK-CVT-GI: // %bb.0: -; CHECK-CVT-GI-NEXT: mov d4, v0.d[1] -; CHECK-CVT-GI-NEXT: fcvt h0, d0 -; CHECK-CVT-GI-NEXT: mov d5, v1.d[1] -; CHECK-CVT-GI-NEXT: fcvt h1, d1 -; CHECK-CVT-GI-NEXT: fcvt h4, d4 -; CHECK-CVT-GI-NEXT: mov v0.h[1], v4.h[0] -; CHECK-CVT-GI-NEXT: fcvt h4, d5 -; CHECK-CVT-GI-NEXT: mov v0.h[2], v1.h[0] -; CHECK-CVT-GI-NEXT: mov d1, v2.d[1] -; CHECK-CVT-GI-NEXT: fcvt h2, d2 -; CHECK-CVT-GI-NEXT: mov v0.h[3], v4.h[0] -; CHECK-CVT-GI-NEXT: fcvt h1, d1 -; CHECK-CVT-GI-NEXT: mov v0.h[4], v2.h[0] -; CHECK-CVT-GI-NEXT: mov d2, v3.d[1] -; CHECK-CVT-GI-NEXT: fcvt h3, d3 -; CHECK-CVT-GI-NEXT: mov v0.h[5], v1.h[0] -; CHECK-CVT-GI-NEXT: fcvt h1, d2 -; CHECK-CVT-GI-NEXT: mov v0.h[6], v3.h[0] -; CHECK-CVT-GI-NEXT: mov v0.h[7], v1.h[0] +; CHECK-CVT-GI-NEXT: fcvtxn v0.2s, v0.2d +; CHECK-CVT-GI-NEXT: fcvtxn2 v0.4s, v1.2d +; CHECK-CVT-GI-NEXT: fcvtxn v1.2s, v2.2d +; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v0.4s +; CHECK-CVT-GI-NEXT: fcvtxn2 v1.4s, v3.2d +; CHECK-CVT-GI-NEXT: fcvtn2 v0.8h, v1.4s ; CHECK-CVT-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: d_to_h: ; CHECK-FP16-GI: // %bb.0: -; CHECK-FP16-GI-NEXT: mov d4, v0.d[1] -; CHECK-FP16-GI-NEXT: fcvt h0, d0 -; CHECK-FP16-GI-NEXT: mov d5, v1.d[1] -; CHECK-FP16-GI-NEXT: fcvt h1, d1 -; CHECK-FP16-GI-NEXT: fcvt h4, d4 -; CHECK-FP16-GI-NEXT: mov v0.h[1], v4.h[0] -; CHECK-FP16-GI-NEXT: fcvt h4, d5 -; CHECK-FP16-GI-NEXT: mov v0.h[2], v1.h[0] -; CHECK-FP16-GI-NEXT: mov d1, v2.d[1] -; CHECK-FP16-GI-NEXT: fcvt h2, d2 -; CHECK-FP16-GI-NEXT: mov v0.h[3], v4.h[0] -; CHECK-FP16-GI-NEXT: fcvt h1, d1 -; CHECK-FP16-GI-NEXT: mov v0.h[4], v2.h[0] -; CHECK-FP16-GI-NEXT: mov d2, v3.d[1] -; CHECK-FP16-GI-NEXT: fcvt h3, d3 -; CHECK-FP16-GI-NEXT: mov v0.h[5], v1.h[0] -; CHECK-FP16-GI-NEXT: fcvt h1, d2 -; CHECK-FP16-GI-NEXT: mov v0.h[6], v3.h[0] -; CHECK-FP16-GI-NEXT: mov v0.h[7], v1.h[0] +; CHECK-FP16-GI-NEXT: fcvtxn v0.2s, v0.2d +; CHECK-FP16-GI-NEXT: fcvtxn2 v0.4s, v1.2d +; CHECK-FP16-GI-NEXT: fcvtxn v1.2s, v2.2d +; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s +; CHECK-FP16-GI-NEXT: fcvtxn2 v1.4s, v3.2d +; CHECK-FP16-GI-NEXT: fcvtn2 v0.8h, v1.4s ; CHECK-FP16-GI-NEXT: ret %1 = fptrunc <8 x double> %a to <8 x half> ret <8 x half> %1 @@ -298,48 +272,22 @@ define <8 x double> @h_to_d(<8 x half> %a) { ; ; CHECK-CVT-GI-LABEL: h_to_d: ; CHECK-CVT-GI: // %bb.0: -; CHECK-CVT-GI-NEXT: mov h1, v0.h[1] -; CHECK-CVT-GI-NEXT: mov h2, v0.h[2] -; CHECK-CVT-GI-NEXT: mov h3, v0.h[3] -; CHECK-CVT-GI-NEXT: mov h4, v0.h[4] -; CHECK-CVT-GI-NEXT: mov h5, v0.h[5] -; CHECK-CVT-GI-NEXT: mov h6, v0.h[6] -; CHECK-CVT-GI-NEXT: mov h7, v0.h[7] -; CHECK-CVT-GI-NEXT: fcvt d0, h0 -; CHECK-CVT-GI-NEXT: fcvt d16, h1 -; CHECK-CVT-GI-NEXT: fcvt d1, h2 -; CHECK-CVT-GI-NEXT: fcvt d17, h3 -; CHECK-CVT-GI-NEXT: fcvt d2, h4 -; CHECK-CVT-GI-NEXT: fcvt d4, h5 -; CHECK-CVT-GI-NEXT: fcvt d3, h6 -; CHECK-CVT-GI-NEXT: fcvt d5, h7 -; CHECK-CVT-GI-NEXT: mov v0.d[1], v16.d[0] -; CHECK-CVT-GI-NEXT: mov v1.d[1], v17.d[0] -; CHECK-CVT-GI-NEXT: mov v2.d[1], v4.d[0] -; CHECK-CVT-GI-NEXT: mov v3.d[1], v5.d[0] +; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v3.4s, v0.8h +; CHECK-CVT-GI-NEXT: fcvtl v0.2d, v1.2s +; CHECK-CVT-GI-NEXT: fcvtl2 v1.2d, v1.4s +; CHECK-CVT-GI-NEXT: fcvtl v2.2d, v3.2s +; CHECK-CVT-GI-NEXT: fcvtl2 v3.2d, v3.4s ; CHECK-CVT-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: h_to_d: ; CHECK-FP16-GI: // %bb.0: -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h6, v0.h[6] -; CHECK-FP16-GI-NEXT: mov h7, v0.h[7] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d16, h1 -; CHECK-FP16-GI-NEXT: fcvt d1, h2 -; CHECK-FP16-GI-NEXT: fcvt d17, h3 -; CHECK-FP16-GI-NEXT: fcvt d2, h4 -; CHECK-FP16-GI-NEXT: fcvt d4, h5 -; CHECK-FP16-GI-NEXT: fcvt d3, h6 -; CHECK-FP16-GI-NEXT: fcvt d5, h7 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v16.d[0] -; CHECK-FP16-GI-NEXT: mov v1.d[1], v17.d[0] -; CHECK-FP16-GI-NEXT: mov v2.d[1], v4.d[0] -; CHECK-FP16-GI-NEXT: mov v3.d[1], v5.d[0] +; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl2 v3.4s, v0.8h +; CHECK-FP16-GI-NEXT: fcvtl v0.2d, v1.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v1.2d, v1.4s +; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v3.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v3.2d, v3.4s ; CHECK-FP16-GI-NEXT: ret %1 = fpext <8 x half> %a to <8 x double> ret <8 x double> %1 diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll index 637c02875b84e..b075a8b6f70ee 100644 --- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll @@ -285,31 +285,24 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; ; CHECK-FP16-GI-LABEL: stest_f16i32: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-FP16-GI-NEXT: adrp x8, .LCPI6_1 -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d1, h1 -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0] -; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v2.2d ; CHECK-FP16-GI-NEXT: ldr q2, [x8, :lo12:.LCPI6_1] ; CHECK-FP16-GI-NEXT: adrp x8, .LCPI6_0 -; CHECK-FP16-GI-NEXT: cmgt v3.2d, v2.2d, v0.2d -; CHECK-FP16-GI-NEXT: cmgt v4.2d, v2.2d, v1.2d -; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v3.16b -; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v4.16b +; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-FP16-GI-NEXT: cmgt v3.2d, v2.2d, v1.2d +; CHECK-FP16-GI-NEXT: cmgt v4.2d, v2.2d, v0.2d +; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v3.16b +; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v4.16b ; CHECK-FP16-GI-NEXT: ldr q2, [x8, :lo12:.LCPI6_0] -; CHECK-FP16-GI-NEXT: cmgt v3.2d, v0.2d, v2.2d -; CHECK-FP16-GI-NEXT: cmgt v4.2d, v1.2d, v2.2d -; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v3.16b -; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v4.16b -; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v2.2d +; CHECK-FP16-GI-NEXT: cmgt v4.2d, v0.2d, v2.2d +; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v3.16b +; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v4.16b +; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v1.4s, v0.4s ; CHECK-FP16-GI-NEXT: ret entry: %conv = fptosi <4 x half> %x to <4 x i64> @@ -351,24 +344,17 @@ define <4 x i32> @utest_f16i32(<4 x half> %x) { ; ; CHECK-FP16-GI-LABEL: utest_f16i32: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-FP16-GI-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: fcvt d4, h4 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v2.d[0] -; CHECK-FP16-GI-NEXT: mov v3.d[1], v4.d[0] +; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v2.2d ; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v3.2d -; CHECK-FP16-GI-NEXT: cmhi v3.2d, v1.2d, v0.2d -; CHECK-FP16-GI-NEXT: cmhi v4.2d, v1.2d, v2.2d -; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v3.16b -; CHECK-FP16-GI-NEXT: bit v1.16b, v2.16b, v4.16b -; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-FP16-GI-NEXT: cmhi v3.2d, v1.2d, v2.2d +; CHECK-FP16-GI-NEXT: cmhi v4.2d, v1.2d, v0.2d +; CHECK-FP16-GI-NEXT: bif v2.16b, v1.16b, v3.16b +; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v4.16b +; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v2.4s, v0.4s ; CHECK-FP16-GI-NEXT: ret entry: %conv = fptoui <4 x half> %x to <4 x i64> @@ -412,28 +398,21 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; ; CHECK-FP16-GI-LABEL: ustest_f16i32: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-FP16-GI-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: fcvt d4, h4 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v2.d[0] -; CHECK-FP16-GI-NEXT: mov v3.d[1], v4.d[0] +; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v2.2d ; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v3.2d -; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v0.2d -; CHECK-FP16-GI-NEXT: cmgt v4.2d, v1.2d, v2.2d -; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v3.16b -; CHECK-FP16-GI-NEXT: bit v1.16b, v2.16b, v4.16b -; CHECK-FP16-GI-NEXT: cmgt v2.2d, v0.2d, #0 -; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, #0 -; CHECK-FP16-GI-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-FP16-GI-NEXT: and v1.16b, v1.16b, v3.16b -; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v2.2d +; CHECK-FP16-GI-NEXT: cmgt v4.2d, v1.2d, v0.2d +; CHECK-FP16-GI-NEXT: bif v2.16b, v1.16b, v3.16b +; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v4.16b +; CHECK-FP16-GI-NEXT: cmgt v1.2d, v2.2d, #0 +; CHECK-FP16-GI-NEXT: cmgt v3.2d, v0.2d, #0 +; CHECK-FP16-GI-NEXT: and v1.16b, v2.16b, v1.16b +; CHECK-FP16-GI-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v1.4s, v0.4s ; CHECK-FP16-GI-NEXT: ret entry: %conv = fptosi <4 x half> %x to <4 x i64> @@ -2273,31 +2252,24 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; ; CHECK-FP16-GI-LABEL: stest_f16i32_mm: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-FP16-GI-NEXT: adrp x8, .LCPI33_1 -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d1, h1 -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0] -; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v2.2d ; CHECK-FP16-GI-NEXT: ldr q2, [x8, :lo12:.LCPI33_1] ; CHECK-FP16-GI-NEXT: adrp x8, .LCPI33_0 -; CHECK-FP16-GI-NEXT: cmgt v3.2d, v2.2d, v0.2d -; CHECK-FP16-GI-NEXT: cmgt v4.2d, v2.2d, v1.2d -; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v3.16b -; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v4.16b +; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-FP16-GI-NEXT: cmgt v3.2d, v2.2d, v1.2d +; CHECK-FP16-GI-NEXT: cmgt v4.2d, v2.2d, v0.2d +; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v3.16b +; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v4.16b ; CHECK-FP16-GI-NEXT: ldr q2, [x8, :lo12:.LCPI33_0] -; CHECK-FP16-GI-NEXT: cmgt v3.2d, v0.2d, v2.2d -; CHECK-FP16-GI-NEXT: cmgt v4.2d, v1.2d, v2.2d -; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v3.16b -; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v4.16b -; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v2.2d +; CHECK-FP16-GI-NEXT: cmgt v4.2d, v0.2d, v2.2d +; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v3.16b +; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v4.16b +; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v1.4s, v0.4s ; CHECK-FP16-GI-NEXT: ret entry: %conv = fptosi <4 x half> %x to <4 x i64> @@ -2337,24 +2309,17 @@ define <4 x i32> @utest_f16i32_mm(<4 x half> %x) { ; ; CHECK-FP16-GI-LABEL: utest_f16i32_mm: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-FP16-GI-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: fcvt d4, h4 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v2.d[0] -; CHECK-FP16-GI-NEXT: mov v3.d[1], v4.d[0] +; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v2.2d ; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v3.2d -; CHECK-FP16-GI-NEXT: cmhi v3.2d, v1.2d, v0.2d -; CHECK-FP16-GI-NEXT: cmhi v4.2d, v1.2d, v2.2d -; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v3.16b -; CHECK-FP16-GI-NEXT: bit v1.16b, v2.16b, v4.16b -; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-FP16-GI-NEXT: cmhi v3.2d, v1.2d, v2.2d +; CHECK-FP16-GI-NEXT: cmhi v4.2d, v1.2d, v0.2d +; CHECK-FP16-GI-NEXT: bif v2.16b, v1.16b, v3.16b +; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v4.16b +; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v2.4s, v0.4s ; CHECK-FP16-GI-NEXT: ret entry: %conv = fptoui <4 x half> %x to <4 x i64> @@ -2397,28 +2362,21 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; ; CHECK-FP16-GI-LABEL: ustest_f16i32_mm: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-FP16-GI-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: fcvt d4, h4 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v2.d[0] -; CHECK-FP16-GI-NEXT: mov v3.d[1], v4.d[0] +; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v2.2d ; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v3.2d -; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v0.2d -; CHECK-FP16-GI-NEXT: cmgt v4.2d, v1.2d, v2.2d -; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v3.16b -; CHECK-FP16-GI-NEXT: bit v1.16b, v2.16b, v4.16b -; CHECK-FP16-GI-NEXT: cmgt v2.2d, v0.2d, #0 -; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, #0 -; CHECK-FP16-GI-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-FP16-GI-NEXT: and v1.16b, v1.16b, v3.16b -; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v2.2d +; CHECK-FP16-GI-NEXT: cmgt v4.2d, v1.2d, v0.2d +; CHECK-FP16-GI-NEXT: bif v2.16b, v1.16b, v3.16b +; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v4.16b +; CHECK-FP16-GI-NEXT: cmgt v1.2d, v2.2d, #0 +; CHECK-FP16-GI-NEXT: cmgt v3.2d, v0.2d, #0 +; CHECK-FP16-GI-NEXT: and v1.16b, v2.16b, v1.16b +; CHECK-FP16-GI-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v1.4s, v0.4s ; CHECK-FP16-GI-NEXT: ret entry: %conv = fptosi <4 x half> %x to <4 x i64> diff --git a/llvm/test/CodeGen/AArch64/fpext.ll b/llvm/test/CodeGen/AArch64/fpext.ll index df90f9d5f0910..8980340a447de 100644 --- a/llvm/test/CodeGen/AArch64/fpext.ll +++ b/llvm/test/CodeGen/AArch64/fpext.ll @@ -82,11 +82,12 @@ define <3 x double> @fpext_v3f32_v3f64(<3 x float> %a) { ; ; CHECK-GI-LABEL: fpext_v3f32_v3f64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s1, v0.s[2] +; CHECK-GI-NEXT: mov v1.s[0], v0.s[2] ; CHECK-GI-NEXT: fcvtl v0.2d, v0.2s -; CHECK-GI-NEXT: fcvt d2, s1 +; CHECK-GI-NEXT: fcvtl v2.2d, v1.2s ; CHECK-GI-NEXT: mov d1, v0.d[1] ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2 ; CHECK-GI-NEXT: ret entry: %c = fpext <3 x float> %a to <3 x double> @@ -320,20 +321,11 @@ entry: } define <2 x double> @fpext_v2f16_v2f64(<2 x half> %a) { -; CHECK-SD-LABEL: fpext_v2f16_v2f64: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fcvtl v0.4s, v0.4h -; CHECK-SD-NEXT: fcvtl v0.2d, v0.2s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fpext_v2f16_v2f64: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: fcvt d0, h0 -; CHECK-GI-NEXT: fcvt d1, h1 -; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fpext_v2f16_v2f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: ret entry: %c = fpext <2 x half> %a to <2 x double> ret <2 x double> %c @@ -353,12 +345,12 @@ define <3 x double> @fpext_v3f16_v3f64(<3 x half> %a) { ; ; CHECK-GI-LABEL: fpext_v3f16_v3f64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov h2, v0.h[2] -; CHECK-GI-NEXT: fcvt d0, h0 -; CHECK-GI-NEXT: fcvt d1, h1 -; CHECK-GI-NEXT: fcvt d2, h2 +; CHECK-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-GI-NEXT: fcvtl v0.2d, v1.2s +; CHECK-GI-NEXT: fcvtl2 v2.2d, v1.4s +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret entry: %c = fpext <3 x half> %a to <3 x double> @@ -375,16 +367,9 @@ define <4 x double> @fpext_v4f16_v4f64(<4 x half> %a) { ; ; CHECK-GI-LABEL: fpext_v4f16_v4f64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov h2, v0.h[2] -; CHECK-GI-NEXT: mov h3, v0.h[3] -; CHECK-GI-NEXT: fcvt d0, h0 -; CHECK-GI-NEXT: fcvt d4, h1 -; CHECK-GI-NEXT: fcvt d1, h2 -; CHECK-GI-NEXT: fcvt d2, h3 -; CHECK-GI-NEXT: mov v0.d[1], v4.d[0] -; CHECK-GI-NEXT: mov v1.d[1], v2.d[0] +; CHECK-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-GI-NEXT: fcvtl v0.2d, v1.2s +; CHECK-GI-NEXT: fcvtl2 v1.2d, v1.4s ; CHECK-GI-NEXT: ret entry: %c = fpext <4 x half> %a to <4 x double> diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll index f6053cee50dae..3dafabe0b69d7 100644 --- a/llvm/test/CodeGen/AArch64/fptoi.ll +++ b/llvm/test/CodeGen/AArch64/fptoi.ll @@ -4610,11 +4610,8 @@ define <2 x i64> @fptos_v2f16_v2i64(<2 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptos_v2f16_v2i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d1, h1 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl v0.2d, v0.2s ; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-FP16-GI-NEXT: ret entry: @@ -4654,11 +4651,8 @@ define <2 x i64> @fptou_v2f16_v2i64(<2 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptou_v2f16_v2i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d1, h1 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl v0.2d, v0.2s ; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v0.2d ; CHECK-FP16-GI-NEXT: ret entry: @@ -4710,20 +4704,14 @@ define <3 x i64> @fptos_v3f16_v3i64(<3 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptos_v3f16_v3i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] -; CHECK-FP16-GI-NEXT: fcvt d1, h0 -; CHECK-FP16-GI-NEXT: mov h3, v0.h[2] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v2.d[0] -; CHECK-FP16-GI-NEXT: fcvt d2, h3 -; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: mov v2.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov d1, v0.d[1] +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v2.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v1.2d ; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-GI-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-FP16-GI-NEXT: mov d1, v0.d[1] +; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-GI-NEXT: ret entry: %c = fptosi <3 x half> %a to <3 x i64> @@ -4774,20 +4762,14 @@ define <3 x i64> @fptou_v3f16_v3i64(<3 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptou_v3f16_v3i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] -; CHECK-FP16-GI-NEXT: fcvt d1, h0 -; CHECK-FP16-GI-NEXT: mov h3, v0.h[2] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v2.d[0] -; CHECK-FP16-GI-NEXT: fcvt d2, h3 -; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: mov v2.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov d1, v0.d[1] +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v2.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v1.2d ; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v2.2d -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-GI-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-FP16-GI-NEXT: mov d1, v0.d[1] +; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-GI-NEXT: ret entry: %c = fptoui <3 x half> %a to <3 x i64> @@ -4842,17 +4824,10 @@ define <4 x i64> @fptos_v4f16_v4i64(<4 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptos_v4f16_v4i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d1, h1 -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0] -; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v2.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v1.2d ; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v2.2d ; CHECK-FP16-GI-NEXT: ret entry: @@ -4908,17 +4883,10 @@ define <4 x i64> @fptou_v4f16_v4i64(<4 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptou_v4f16_v4i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d1, h1 -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0] -; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v2.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v1.2d ; CHECK-FP16-GI-NEXT: fcvtzu v1.2d, v2.2d ; CHECK-FP16-GI-NEXT: ret entry: @@ -5005,29 +4973,16 @@ define <8 x i64> @fptos_v8f16_v8i64(<8 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptos_v8f16_v8i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h6, v0.h[6] -; CHECK-FP16-GI-NEXT: mov h7, v0.h[7] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d1, h1 -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: fcvt d4, h4 -; CHECK-FP16-GI-NEXT: fcvt d5, h5 -; CHECK-FP16-GI-NEXT: fcvt d6, h6 -; CHECK-FP16-GI-NEXT: fcvt d7, h7 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0] -; CHECK-FP16-GI-NEXT: mov v4.d[1], v5.d[0] -; CHECK-FP16-GI-NEXT: mov v6.d[1], v7.d[0] -; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v2.2d -; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v4.2d -; CHECK-FP16-GI-NEXT: fcvtzs v3.2d, v6.2d +; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v1.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v1.2d, v1.4s +; CHECK-FP16-GI-NEXT: fcvtl v3.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v4.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v2.2d +; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v3.2d +; CHECK-FP16-GI-NEXT: fcvtzs v3.2d, v4.2d ; CHECK-FP16-GI-NEXT: ret entry: %c = fptosi <8 x half> %a to <8 x i64> @@ -5113,29 +5068,16 @@ define <8 x i64> @fptou_v8f16_v8i64(<8 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptou_v8f16_v8i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h6, v0.h[6] -; CHECK-FP16-GI-NEXT: mov h7, v0.h[7] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d1, h1 -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: fcvt d4, h4 -; CHECK-FP16-GI-NEXT: fcvt d5, h5 -; CHECK-FP16-GI-NEXT: fcvt d6, h6 -; CHECK-FP16-GI-NEXT: fcvt d7, h7 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0] -; CHECK-FP16-GI-NEXT: mov v4.d[1], v5.d[0] -; CHECK-FP16-GI-NEXT: mov v6.d[1], v7.d[0] -; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: fcvtzu v1.2d, v2.2d -; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v4.2d -; CHECK-FP16-GI-NEXT: fcvtzu v3.2d, v6.2d +; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v1.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v1.2d, v1.4s +; CHECK-FP16-GI-NEXT: fcvtl v3.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v4.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v2.2d +; CHECK-FP16-GI-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v3.2d +; CHECK-FP16-GI-NEXT: fcvtzu v3.2d, v4.2d ; CHECK-FP16-GI-NEXT: ret entry: %c = fptoui <8 x half> %a to <8 x i64> @@ -5285,52 +5227,26 @@ define <16 x i64> @fptos_v16f16_v16i64(<16 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptos_v16f16_v16i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: mov h3, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d2, h0 -; CHECK-FP16-GI-NEXT: mov h6, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h7, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h16, v0.h[6] -; CHECK-FP16-GI-NEXT: mov h0, v0.h[7] -; CHECK-FP16-GI-NEXT: mov h17, v1.h[1] -; CHECK-FP16-GI-NEXT: mov h18, v1.h[2] -; CHECK-FP16-GI-NEXT: mov h19, v1.h[3] -; CHECK-FP16-GI-NEXT: mov h20, v1.h[4] -; CHECK-FP16-GI-NEXT: mov h21, v1.h[5] -; CHECK-FP16-GI-NEXT: mov h22, v1.h[6] -; CHECK-FP16-GI-NEXT: mov h23, v1.h[7] -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: fcvt d4, h4 -; CHECK-FP16-GI-NEXT: fcvt d5, h5 -; CHECK-FP16-GI-NEXT: fcvt d6, h6 -; CHECK-FP16-GI-NEXT: fcvt d7, h7 -; CHECK-FP16-GI-NEXT: fcvt d16, h16 -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d24, h1 -; CHECK-FP16-GI-NEXT: fcvt d1, h17 -; CHECK-FP16-GI-NEXT: fcvt d17, h18 -; CHECK-FP16-GI-NEXT: fcvt d18, h19 -; CHECK-FP16-GI-NEXT: fcvt d19, h20 -; CHECK-FP16-GI-NEXT: fcvt d20, h21 -; CHECK-FP16-GI-NEXT: fcvt d21, h22 -; CHECK-FP16-GI-NEXT: fcvt d22, h23 -; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0] -; CHECK-FP16-GI-NEXT: mov v4.d[1], v5.d[0] -; CHECK-FP16-GI-NEXT: mov v6.d[1], v7.d[0] -; CHECK-FP16-GI-NEXT: mov v16.d[1], v0.d[0] -; CHECK-FP16-GI-NEXT: mov v24.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov v17.d[1], v18.d[0] -; CHECK-FP16-GI-NEXT: mov v19.d[1], v20.d[0] -; CHECK-FP16-GI-NEXT: mov v21.d[1], v22.d[0] -; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v2.2d -; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v4.2d -; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v6.2d -; CHECK-FP16-GI-NEXT: fcvtzs v3.2d, v16.2d -; CHECK-FP16-GI-NEXT: fcvtzs v4.2d, v24.2d -; CHECK-FP16-GI-NEXT: fcvtzs v5.2d, v17.2d -; CHECK-FP16-GI-NEXT: fcvtzs v6.2d, v19.2d -; CHECK-FP16-GI-NEXT: fcvtzs v7.2d, v21.2d +; CHECK-FP16-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-FP16-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-FP16-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-FP16-GI-NEXT: fcvtl v4.2d, v2.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v2.2d, v2.4s +; CHECK-FP16-GI-NEXT: fcvtl v5.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v6.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtl v7.2d, v3.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v16.2d, v3.4s +; CHECK-FP16-GI-NEXT: fcvtl v17.2d, v1.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v18.2d, v1.4s +; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v4.2d +; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v2.2d +; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v5.2d +; CHECK-FP16-GI-NEXT: fcvtzs v3.2d, v6.2d +; CHECK-FP16-GI-NEXT: fcvtzs v4.2d, v7.2d +; CHECK-FP16-GI-NEXT: fcvtzs v5.2d, v16.2d +; CHECK-FP16-GI-NEXT: fcvtzs v6.2d, v17.2d +; CHECK-FP16-GI-NEXT: fcvtzs v7.2d, v18.2d ; CHECK-FP16-GI-NEXT: ret entry: %c = fptosi <16 x half> %a to <16 x i64> @@ -5480,52 +5396,26 @@ define <16 x i64> @fptou_v16f16_v16i64(<16 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptou_v16f16_v16i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: mov h3, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d2, h0 -; CHECK-FP16-GI-NEXT: mov h6, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h7, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h16, v0.h[6] -; CHECK-FP16-GI-NEXT: mov h0, v0.h[7] -; CHECK-FP16-GI-NEXT: mov h17, v1.h[1] -; CHECK-FP16-GI-NEXT: mov h18, v1.h[2] -; CHECK-FP16-GI-NEXT: mov h19, v1.h[3] -; CHECK-FP16-GI-NEXT: mov h20, v1.h[4] -; CHECK-FP16-GI-NEXT: mov h21, v1.h[5] -; CHECK-FP16-GI-NEXT: mov h22, v1.h[6] -; CHECK-FP16-GI-NEXT: mov h23, v1.h[7] -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: fcvt d4, h4 -; CHECK-FP16-GI-NEXT: fcvt d5, h5 -; CHECK-FP16-GI-NEXT: fcvt d6, h6 -; CHECK-FP16-GI-NEXT: fcvt d7, h7 -; CHECK-FP16-GI-NEXT: fcvt d16, h16 -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d24, h1 -; CHECK-FP16-GI-NEXT: fcvt d1, h17 -; CHECK-FP16-GI-NEXT: fcvt d17, h18 -; CHECK-FP16-GI-NEXT: fcvt d18, h19 -; CHECK-FP16-GI-NEXT: fcvt d19, h20 -; CHECK-FP16-GI-NEXT: fcvt d20, h21 -; CHECK-FP16-GI-NEXT: fcvt d21, h22 -; CHECK-FP16-GI-NEXT: fcvt d22, h23 -; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0] -; CHECK-FP16-GI-NEXT: mov v4.d[1], v5.d[0] -; CHECK-FP16-GI-NEXT: mov v6.d[1], v7.d[0] -; CHECK-FP16-GI-NEXT: mov v16.d[1], v0.d[0] -; CHECK-FP16-GI-NEXT: mov v24.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov v17.d[1], v18.d[0] -; CHECK-FP16-GI-NEXT: mov v19.d[1], v20.d[0] -; CHECK-FP16-GI-NEXT: mov v21.d[1], v22.d[0] -; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v2.2d -; CHECK-FP16-GI-NEXT: fcvtzu v1.2d, v4.2d -; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v6.2d -; CHECK-FP16-GI-NEXT: fcvtzu v3.2d, v16.2d -; CHECK-FP16-GI-NEXT: fcvtzu v4.2d, v24.2d -; CHECK-FP16-GI-NEXT: fcvtzu v5.2d, v17.2d -; CHECK-FP16-GI-NEXT: fcvtzu v6.2d, v19.2d -; CHECK-FP16-GI-NEXT: fcvtzu v7.2d, v21.2d +; CHECK-FP16-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-FP16-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-FP16-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-FP16-GI-NEXT: fcvtl v4.2d, v2.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v2.2d, v2.4s +; CHECK-FP16-GI-NEXT: fcvtl v5.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v6.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtl v7.2d, v3.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v16.2d, v3.4s +; CHECK-FP16-GI-NEXT: fcvtl v17.2d, v1.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v18.2d, v1.4s +; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v4.2d +; CHECK-FP16-GI-NEXT: fcvtzu v1.2d, v2.2d +; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v5.2d +; CHECK-FP16-GI-NEXT: fcvtzu v3.2d, v6.2d +; CHECK-FP16-GI-NEXT: fcvtzu v4.2d, v7.2d +; CHECK-FP16-GI-NEXT: fcvtzu v5.2d, v16.2d +; CHECK-FP16-GI-NEXT: fcvtzu v6.2d, v17.2d +; CHECK-FP16-GI-NEXT: fcvtzu v7.2d, v18.2d ; CHECK-FP16-GI-NEXT: ret entry: %c = fptoui <16 x half> %a to <16 x i64> diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll index c74112937ba53..f1b13f93fb7d8 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -3083,30 +3083,14 @@ define <4 x i64> @test_signed_v4f16_v4i64(<4 x half> %f) { ; CHECK-SD-FP16-NEXT: mov v1.d[1], x11 ; CHECK-SD-FP16-NEXT: ret ; -; CHECK-GI-CVT-LABEL: test_signed_v4f16_v4i64: -; CHECK-GI-CVT: // %bb.0: -; CHECK-GI-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-CVT-NEXT: fcvtl v1.2d, v0.2s -; CHECK-GI-CVT-NEXT: fcvtl2 v2.2d, v0.4s -; CHECK-GI-CVT-NEXT: fcvtzs v0.2d, v1.2d -; CHECK-GI-CVT-NEXT: fcvtzs v1.2d, v2.2d -; CHECK-GI-CVT-NEXT: ret -; -; CHECK-GI-FP16-LABEL: test_signed_v4f16_v4i64: -; CHECK-GI-FP16: // %bb.0: -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: fcvt d1, h1 -; CHECK-GI-FP16-NEXT: fcvt d2, h2 -; CHECK-GI-FP16-NEXT: fcvt d3, h3 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] -; CHECK-GI-FP16-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v2.2d -; CHECK-GI-FP16-NEXT: ret +; CHECK-GI-LABEL: test_signed_v4f16_v4i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NEXT: fcvtl v1.2d, v0.2s +; CHECK-GI-NEXT: fcvtl2 v2.2d, v0.4s +; CHECK-GI-NEXT: fcvtzs v0.2d, v1.2d +; CHECK-GI-NEXT: fcvtzs v1.2d, v2.2d +; CHECK-GI-NEXT: ret %x = call <4 x i64> @llvm.fptosi.sat.v4f16.v4i64(<4 x half> %f) ret <4 x i64> %x } @@ -3792,46 +3776,19 @@ define <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) { ; CHECK-SD-FP16-NEXT: mov v3.d[1], x14 ; CHECK-SD-FP16-NEXT: ret ; -; CHECK-GI-CVT-LABEL: test_signed_v8f16_v8i64: -; CHECK-GI-CVT: // %bb.0: -; CHECK-GI-CVT-NEXT: fcvtl v1.4s, v0.4h -; CHECK-GI-CVT-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-GI-CVT-NEXT: fcvtl v2.2d, v1.2s -; CHECK-GI-CVT-NEXT: fcvtl2 v1.2d, v1.4s -; CHECK-GI-CVT-NEXT: fcvtl v3.2d, v0.2s -; CHECK-GI-CVT-NEXT: fcvtl2 v4.2d, v0.4s -; CHECK-GI-CVT-NEXT: fcvtzs v0.2d, v2.2d -; CHECK-GI-CVT-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-GI-CVT-NEXT: fcvtzs v2.2d, v3.2d -; CHECK-GI-CVT-NEXT: fcvtzs v3.2d, v4.2d -; CHECK-GI-CVT-NEXT: ret -; -; CHECK-GI-FP16-LABEL: test_signed_v8f16_v8i64: -; CHECK-GI-FP16: // %bb.0: -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov h7, v0.h[7] -; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: fcvt d1, h1 -; CHECK-GI-FP16-NEXT: fcvt d2, h2 -; CHECK-GI-FP16-NEXT: fcvt d3, h3 -; CHECK-GI-FP16-NEXT: fcvt d4, h4 -; CHECK-GI-FP16-NEXT: fcvt d5, h5 -; CHECK-GI-FP16-NEXT: fcvt d6, h6 -; CHECK-GI-FP16-NEXT: fcvt d7, h7 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] -; CHECK-GI-FP16-NEXT: mov v4.d[1], v5.d[0] -; CHECK-GI-FP16-NEXT: mov v6.d[1], v7.d[0] -; CHECK-GI-FP16-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v2.2d -; CHECK-GI-FP16-NEXT: fcvtzs v2.2d, v4.2d -; CHECK-GI-FP16-NEXT: fcvtzs v3.2d, v6.2d -; CHECK-GI-FP16-NEXT: ret +; CHECK-GI-LABEL: test_signed_v8f16_v8i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NEXT: fcvtl v2.2d, v1.2s +; CHECK-GI-NEXT: fcvtl2 v1.2d, v1.4s +; CHECK-GI-NEXT: fcvtl v3.2d, v0.2s +; CHECK-GI-NEXT: fcvtl2 v4.2d, v0.4s +; CHECK-GI-NEXT: fcvtzs v0.2d, v2.2d +; CHECK-GI-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-GI-NEXT: fcvtzs v2.2d, v3.2d +; CHECK-GI-NEXT: fcvtzs v3.2d, v4.2d +; CHECK-GI-NEXT: ret %x = call <8 x i64> @llvm.fptosi.sat.v8f16.v8i64(<8 x half> %f) ret <8 x i64> %x } diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll index efe0a1bedbc9e..b407b3c0e9940 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -2501,30 +2501,14 @@ define <4 x i64> @test_unsigned_v4f16_v4i64(<4 x half> %f) { ; CHECK-SD-FP16-NEXT: mov v1.d[1], x11 ; CHECK-SD-FP16-NEXT: ret ; -; CHECK-GI-CVT-LABEL: test_unsigned_v4f16_v4i64: -; CHECK-GI-CVT: // %bb.0: -; CHECK-GI-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-CVT-NEXT: fcvtl v1.2d, v0.2s -; CHECK-GI-CVT-NEXT: fcvtl2 v2.2d, v0.4s -; CHECK-GI-CVT-NEXT: fcvtzu v0.2d, v1.2d -; CHECK-GI-CVT-NEXT: fcvtzu v1.2d, v2.2d -; CHECK-GI-CVT-NEXT: ret -; -; CHECK-GI-FP16-LABEL: test_unsigned_v4f16_v4i64: -; CHECK-GI-FP16: // %bb.0: -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: fcvt d1, h1 -; CHECK-GI-FP16-NEXT: fcvt d2, h2 -; CHECK-GI-FP16-NEXT: fcvt d3, h3 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] -; CHECK-GI-FP16-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v2.2d -; CHECK-GI-FP16-NEXT: ret +; CHECK-GI-LABEL: test_unsigned_v4f16_v4i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NEXT: fcvtl v1.2d, v0.2s +; CHECK-GI-NEXT: fcvtl2 v2.2d, v0.4s +; CHECK-GI-NEXT: fcvtzu v0.2d, v1.2d +; CHECK-GI-NEXT: fcvtzu v1.2d, v2.2d +; CHECK-GI-NEXT: ret %x = call <4 x i64> @llvm.fptoui.sat.v4f16.v4i64(<4 x half> %f) ret <4 x i64> %x } @@ -3109,46 +3093,19 @@ define <8 x i64> @test_unsigned_v8f16_v8i64(<8 x half> %f) { ; CHECK-SD-FP16-NEXT: mov v3.d[1], x14 ; CHECK-SD-FP16-NEXT: ret ; -; CHECK-GI-CVT-LABEL: test_unsigned_v8f16_v8i64: -; CHECK-GI-CVT: // %bb.0: -; CHECK-GI-CVT-NEXT: fcvtl v1.4s, v0.4h -; CHECK-GI-CVT-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-GI-CVT-NEXT: fcvtl v2.2d, v1.2s -; CHECK-GI-CVT-NEXT: fcvtl2 v1.2d, v1.4s -; CHECK-GI-CVT-NEXT: fcvtl v3.2d, v0.2s -; CHECK-GI-CVT-NEXT: fcvtl2 v4.2d, v0.4s -; CHECK-GI-CVT-NEXT: fcvtzu v0.2d, v2.2d -; CHECK-GI-CVT-NEXT: fcvtzu v1.2d, v1.2d -; CHECK-GI-CVT-NEXT: fcvtzu v2.2d, v3.2d -; CHECK-GI-CVT-NEXT: fcvtzu v3.2d, v4.2d -; CHECK-GI-CVT-NEXT: ret -; -; CHECK-GI-FP16-LABEL: test_unsigned_v8f16_v8i64: -; CHECK-GI-FP16: // %bb.0: -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov h7, v0.h[7] -; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: fcvt d1, h1 -; CHECK-GI-FP16-NEXT: fcvt d2, h2 -; CHECK-GI-FP16-NEXT: fcvt d3, h3 -; CHECK-GI-FP16-NEXT: fcvt d4, h4 -; CHECK-GI-FP16-NEXT: fcvt d5, h5 -; CHECK-GI-FP16-NEXT: fcvt d6, h6 -; CHECK-GI-FP16-NEXT: fcvt d7, h7 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] -; CHECK-GI-FP16-NEXT: mov v4.d[1], v5.d[0] -; CHECK-GI-FP16-NEXT: mov v6.d[1], v7.d[0] -; CHECK-GI-FP16-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v2.2d -; CHECK-GI-FP16-NEXT: fcvtzu v2.2d, v4.2d -; CHECK-GI-FP16-NEXT: fcvtzu v3.2d, v6.2d -; CHECK-GI-FP16-NEXT: ret +; CHECK-GI-LABEL: test_unsigned_v8f16_v8i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NEXT: fcvtl v2.2d, v1.2s +; CHECK-GI-NEXT: fcvtl2 v1.2d, v1.4s +; CHECK-GI-NEXT: fcvtl v3.2d, v0.2s +; CHECK-GI-NEXT: fcvtl2 v4.2d, v0.4s +; CHECK-GI-NEXT: fcvtzu v0.2d, v2.2d +; CHECK-GI-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-GI-NEXT: fcvtzu v2.2d, v3.2d +; CHECK-GI-NEXT: fcvtzu v3.2d, v4.2d +; CHECK-GI-NEXT: ret %x = call <8 x i64> @llvm.fptoui.sat.v8f16.v8i64(<8 x half> %f) ret <8 x i64> %x } diff --git a/llvm/test/CodeGen/AArch64/fptrunc.ll b/llvm/test/CodeGen/AArch64/fptrunc.ll index 1f84c944d7c16..de780bf475138 100644 --- a/llvm/test/CodeGen/AArch64/fptrunc.ll +++ b/llvm/test/CodeGen/AArch64/fptrunc.ll @@ -112,30 +112,22 @@ define <2 x half> @fptrunc_v2f128_v2f16(<2 x fp128> %a) { ; ; CHECK-GI-LABEL: fptrunc_v2f128_v2f16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub sp, sp, #64 -; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 +; CHECK-GI-NEXT: sub sp, sp, #48 +; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: mov v2.d[0], x8 ; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill -; CHECK-GI-NEXT: mov v2.d[1], x8 -; CHECK-GI-NEXT: str q2, [sp, #32] // 16-byte Folded Spill ; CHECK-GI-NEXT: bl __trunctfhf2 ; CHECK-GI-NEXT: // kill: def $h0 killed $h0 def $q0 ; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: bl __trunctfhf2 +; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $h0 killed $h0 def $q0 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-GI-NEXT: bl __trunctfhf2 -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-GI-NEXT: bl __trunctfhf2 -; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: add sp, sp, #64 +; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-GI-NEXT: mov v1.h[1], v0.h[0] +; CHECK-GI-NEXT: fmov d0, d1 +; CHECK-GI-NEXT: add sp, sp, #48 ; CHECK-GI-NEXT: ret entry: %c = fptrunc <2 x fp128> %a to <2 x half> @@ -260,8 +252,9 @@ define <3 x float> @fptrunc_v3f64_v3f32(<3 x double> %a) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: fcvt s2, d2 +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: fcvtn v2.2s, v2.2d ; CHECK-GI-NEXT: fcvtn v1.2s, v0.2d ; CHECK-GI-NEXT: mov v0.s[0], v1.s[0] ; CHECK-GI-NEXT: mov v0.s[1], v1.s[1] @@ -284,61 +277,49 @@ entry: } define <2 x half> @fptrunc_v2f64_v2f16(<2 x double> %a) { -; CHECK-SD-LABEL: fptrunc_v2f64_v2f16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fcvtxn v0.2s, v0.2d -; CHECK-SD-NEXT: fcvtn v0.4h, v0.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fptrunc_v2f64_v2f16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: fcvt h0, d0 -; CHECK-GI-NEXT: fcvt h1, d1 -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fptrunc_v2f64_v2f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtxn v0.2s, v0.2d +; CHECK-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NEXT: ret entry: %c = fptrunc <2 x double> %a to <2 x half> ret <2 x half> %c } define <3 x half> @fptrunc_v3f64_v3f16(<3 x double> %a) { -; CHECK-LABEL: fptrunc_v3f64_v3f16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvt h0, d0 -; CHECK-NEXT: fcvt h1, d1 -; CHECK-NEXT: fcvt h2, d2 -; CHECK-NEXT: mov v0.h[1], v1.h[0] -; CHECK-NEXT: mov v0.h[2], v2.h[0] -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fptrunc_v3f64_v3f16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fcvt h0, d0 +; CHECK-SD-NEXT: fcvt h1, d1 +; CHECK-SD-NEXT: fcvt h2, d2 +; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] +; CHECK-SD-NEXT: mov v0.h[2], v2.h[0] +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fptrunc_v3f64_v3f16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: fcvtxn v0.2s, v0.2d +; CHECK-GI-NEXT: fcvtxn2 v0.4s, v2.2d +; CHECK-GI-NEXT: fcvtn v0.4h, v0.4s +; CHECK-GI-NEXT: ret entry: %c = fptrunc <3 x double> %a to <3 x half> ret <3 x half> %c } define <4 x half> @fptrunc_v4f64_v4f16(<4 x double> %a) { -; CHECK-SD-LABEL: fptrunc_v4f64_v4f16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fcvtxn v0.2s, v0.2d -; CHECK-SD-NEXT: fcvtxn2 v0.4s, v1.2d -; CHECK-SD-NEXT: fcvtn v0.4h, v0.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fptrunc_v4f64_v4f16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov d2, v0.d[1] -; CHECK-GI-NEXT: fcvt h0, d0 -; CHECK-GI-NEXT: mov d3, v1.d[1] -; CHECK-GI-NEXT: fcvt h1, d1 -; CHECK-GI-NEXT: fcvt h2, d2 -; CHECK-GI-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NEXT: fcvt h2, d3 -; CHECK-GI-NEXT: mov v0.h[2], v1.h[0] -; CHECK-GI-NEXT: mov v0.h[3], v2.h[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fptrunc_v4f64_v4f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtxn v0.2s, v0.2d +; CHECK-NEXT: fcvtxn2 v0.4s, v1.2d +; CHECK-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NEXT: ret entry: %c = fptrunc <4 x double> %a to <4 x half> ret <4 x half> %c