diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 8891aedcb58e5..5f0ae5ce8614c 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -16122,6 +16122,63 @@ of the argument. When specified with the fast-math-flag 'afn', the result may be approximated using a less accurate calculation. +'``llvm.modf.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.modf`` on any floating-point +or vector of floating-point type. However, not all targets support all types. + +:: + + declare { float, float } @llvm.modf.f32(float %Val) + declare { double, double } @llvm.modf.f64(double %Val) + declare { x86_fp80, x86_fp80 } @llvm.modf.f80(x86_fp80 %Val) + declare { fp128, fp128 } @llvm.modf.f128(fp128 %Val) + declare { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %Val) + declare { <4 x float>, <4 x float> } @llvm.modf.v4f32(<4 x float> %Val) + +Overview: +""""""""" + +The '``llvm.modf.*``' intrinsics return the operand's integral and fractional +parts. + +Arguments: +"""""""""" + +The argument is a :ref:`floating-point ` value or +:ref:`vector ` of floating-point values. Returns two values matching +the argument type in a struct. + +Semantics: +"""""""""" + +Return the same values as a corresponding libm '``modf``' function without +trapping or setting ``errno``. + +The first result is the fractional part of the operand and the second result is +the integral part of the operand. Both results have the same sign as the operand. + +Not including exceptional inputs (listed below), `llvm.modf.*` is semantically +equivalent to: + + %fp = frem %x, 1.0 ; Fractional part + %ip = fsub %x, %fp ; Integral part + +(assuming no floating-point precision errors) + +If the argument is a zero, returns a zero with the same sign for both the +fractional and integral parts. + +If the argument is an infinity, returns a fractional part of zero with the same +sign, and infinity with the same sign as the integral part. + +When specified with the fast-math-flag 'afn', the result may be approximated +using a less accurate calculation. + '``llvm.pow.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index a76de251c7138..8468992ed4b7a 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2101,6 +2101,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::sincos: ISD = ISD::FSINCOS; break; + case Intrinsic::modf: + ISD = ISD::FMODF; + break; case Intrinsic::tan: ISD = ISD::FTAN; break; diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index fd8784a4c1003..046d9befd0e91 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1058,6 +1058,10 @@ enum NodeType { /// FSINCOS - Compute both fsin and fcos as a single operation. FSINCOS, + /// FMODF - Decomposes the operand into integral and fractional parts, each + /// having the same type and sign as the operand. + FMODF, + /// Gets the current floating-point environment. The first operand is a token /// chain. The results are FP environment, represented by an integer value, /// and a token chain. diff --git a/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h b/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h index 045ec7d365311..59313520e0d83 100644 --- a/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h +++ b/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h @@ -66,6 +66,10 @@ Libcall getFREXP(EVT RetVT); /// UNKNOWN_LIBCALL if there is none. Libcall getFSINCOS(EVT RetVT); +/// getMODF - Return the MODF_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +Libcall getMODF(EVT RetVT); + /// Return the SYNC_FETCH_AND_* value for the given opcode and type, or /// UNKNOWN_LIBCALL if there is none. Libcall getSYNC(unsigned Opc, MVT VT); diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index ee877349a3314..2c22060237faa 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1063,6 +1063,8 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { def int_roundeven : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_sincos : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>], [llvm_anyfloat_ty]>; + def int_modf : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>], + [llvm_anyfloat_ty]>; // Truncate a floating point number with a specific rounding mode def int_fptrunc_round : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def index 8153845b52c7a..dc69b1ae19769 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.def +++ b/llvm/include/llvm/IR/RuntimeLibcalls.def @@ -354,6 +354,11 @@ HANDLE_LIBCALL(FREXP_F64, "frexp") HANDLE_LIBCALL(FREXP_F80, "frexpl") HANDLE_LIBCALL(FREXP_F128, "frexpl") HANDLE_LIBCALL(FREXP_PPCF128, "frexpl") +HANDLE_LIBCALL(MODF_F32, "modff") +HANDLE_LIBCALL(MODF_F64, "modf") +HANDLE_LIBCALL(MODF_F80, "modfl") +HANDLE_LIBCALL(MODF_F128, "modfl") +HANDLE_LIBCALL(MODF_PPCF128, "modfl") // Floating point environment HANDLE_LIBCALL(FEGETENV, "fegetenv") diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 6c9c96ceaa4ba..f61928a66eb3c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4609,12 +4609,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { ExpandFPLibCall(Node, RTLIB::LDEXP_F32, RTLIB::LDEXP_F64, RTLIB::LDEXP_F80, RTLIB::LDEXP_F128, RTLIB::LDEXP_PPCF128, Results); break; + case ISD::FMODF: case ISD::FFREXP: { - RTLIB::Libcall LC = RTLIB::getFREXP(Node->getValueType(0)); + EVT VT = Node->getValueType(0); + RTLIB::Libcall LC = Node->getOpcode() == ISD::FMODF ? RTLIB::getMODF(VT) + : RTLIB::getFREXP(VT); bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results, /*CallRetResNo=*/0); if (!Expanded) - llvm_unreachable("Expected scalar FFREXP to expand to libcall!"); + llvm_unreachable("Expected scalar FFREXP/FMODF to expand to libcall!"); break; } case ISD::FPOWI: @@ -5503,9 +5506,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Results.push_back(Tmp2.getValue(1)); break; } + case ISD::FMODF: case ISD::FSINCOS: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); - Tmp2 = DAG.getNode(ISD::FSINCOS, dl, DAG.getVTList(NVT, NVT), Tmp1, + Tmp2 = DAG.getNode(Node->getOpcode(), dl, DAG.getVTList(NVT, NVT), Tmp1, Node->getFlags()); Tmp3 = DAG.getIntPtrConstant(0, dl, /*isTarget=*/true); for (unsigned ResNum = 0; ResNum < Node->getNumValues(); ResNum++) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 71f100bfa0343..2a4eed1ed527a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -2766,10 +2766,10 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { case ISD::FLDEXP: R = PromoteFloatRes_ExpOp(N); break; case ISD::FFREXP: R = PromoteFloatRes_FFREXP(N); break; + case ISD::FMODF: case ISD::FSINCOS: R = PromoteFloatRes_UnaryWithTwoFPResults(N); break; - case ISD::FP_ROUND: R = PromoteFloatRes_FP_ROUND(N); break; case ISD::STRICT_FP_ROUND: R = PromoteFloatRes_STRICT_FP_ROUND(N); @@ -3228,6 +3228,7 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::FFREXP: R = SoftPromoteHalfRes_FFREXP(N); break; + case ISD::FMODF: case ISD::FSINCOS: R = SoftPromoteHalfRes_UnaryWithTwoFPResults(N); break; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 6ad08bce44b0a..416da1bb7bfcf 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -454,6 +454,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::UMULO: case ISD::FCANONICALIZE: case ISD::FFREXP: + case ISD::FMODF: case ISD::FSINCOS: case ISD::SADDSAT: case ISD::UADDSAT: @@ -1223,6 +1224,14 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl &Results) { return; break; } + case ISD::FMODF: { + RTLIB::Libcall LC = + RTLIB::getMODF(Node->getValueType(0).getVectorElementType()); + if (DAG.expandMultipleResultFPLibCall(LC, Node, Results, + /*CallRetResNo=*/0)) + return; + break; + } case ISD::VECTOR_COMPRESS: Results.push_back(TLI.expandVECTOR_COMPRESS(Node, DAG)); return; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 1000235ab4061..adafbe7cdcaa6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -133,6 +133,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::ADDRSPACECAST: R = ScalarizeVecRes_ADDRSPACECAST(N); break; + case ISD::FMODF: case ISD::FFREXP: case ISD::FSINCOS: R = ScalarizeVecRes_UnaryOpWithTwoResults(N, ResNo); @@ -1261,6 +1262,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::ADDRSPACECAST: SplitVecRes_ADDRSPACECAST(N, Lo, Hi); break; + case ISD::FMODF: case ISD::FFREXP: case ISD::FSINCOS: SplitVecRes_UnaryOpWithTwoResults(N, ResNo, Lo, Hi); @@ -4783,6 +4785,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::VP_FSHR: Res = WidenVecRes_Ternary(N); break; + case ISD::FMODF: case ISD::FFREXP: case ISD::FSINCOS: { if (!unrollExpandedOp()) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 428e7a316d247..6833f6c183d64 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6977,6 +6977,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags)); return; + case Intrinsic::modf: case Intrinsic::sincos: case Intrinsic::frexp: { unsigned Opcode; @@ -6986,6 +6987,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::sincos: Opcode = ISD::FSINCOS; break; + case Intrinsic::modf: + Opcode = ISD::FMODF; + break; case Intrinsic::frexp: Opcode = ISD::FFREXP; break; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index f63c8dd3df1c8..7b1a2d640a2bd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -219,6 +219,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FCOS: return "fcos"; case ISD::STRICT_FCOS: return "strict_fcos"; case ISD::FSINCOS: return "fsincos"; + case ISD::FMODF: return "fmodf"; case ISD::FTAN: return "ftan"; case ISD::STRICT_FTAN: return "strict_ftan"; case ISD::FASIN: return "fasin"; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 9c56912aa6ba0..1f39ec205c517 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -407,6 +407,11 @@ RTLIB::Libcall RTLIB::getFSINCOS(EVT RetVT) { SINCOS_PPCF128); } +RTLIB::Libcall RTLIB::getMODF(EVT RetVT) { + return getFPLibCall(RetVT, MODF_F32, MODF_F64, MODF_F80, MODF_F128, + MODF_PPCF128); +} + RTLIB::Libcall RTLIB::getOutlineAtomicHelper(const Libcall (&LC)[5][4], AtomicOrdering Order, uint64_t MemSize) { @@ -775,9 +780,9 @@ void TargetLoweringBase::initActions() { setOperationAction({ISD::BITREVERSE, ISD::PARITY}, VT, Expand); // These library functions default to expand. - setOperationAction( - {ISD::FROUND, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP, ISD::FSINCOS}, VT, - Expand); + setOperationAction({ISD::FROUND, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP, + ISD::FSINCOS, ISD::FMODF}, + VT, Expand); // These operations default to expand for vector types. if (VT.isVector()) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 8617377ffc55b..558a8b03bda97 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -735,19 +735,19 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Promote); } - for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI, - ISD::FCOS, ISD::FSIN, ISD::FSINCOS, - ISD::FACOS, ISD::FASIN, ISD::FATAN, - ISD::FATAN2, ISD::FCOSH, ISD::FSINH, - ISD::FTANH, ISD::FTAN, ISD::FEXP, - ISD::FEXP2, ISD::FEXP10, ISD::FLOG, - ISD::FLOG2, ISD::FLOG10, ISD::STRICT_FREM, - ISD::STRICT_FPOW, ISD::STRICT_FPOWI, ISD::STRICT_FCOS, - ISD::STRICT_FSIN, ISD::STRICT_FACOS, ISD::STRICT_FASIN, - ISD::STRICT_FATAN, ISD::STRICT_FATAN2, ISD::STRICT_FCOSH, - ISD::STRICT_FSINH, ISD::STRICT_FTANH, ISD::STRICT_FEXP, - ISD::STRICT_FEXP2, ISD::STRICT_FLOG, ISD::STRICT_FLOG2, - ISD::STRICT_FLOG10, ISD::STRICT_FTAN}) { + for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI, + ISD::FCOS, ISD::FSIN, ISD::FSINCOS, + ISD::FMODF, ISD::FACOS, ISD::FASIN, + ISD::FATAN, ISD::FATAN2, ISD::FCOSH, + ISD::FSINH, ISD::FTANH, ISD::FTAN, + ISD::FEXP, ISD::FEXP2, ISD::FEXP10, + ISD::FLOG, ISD::FLOG2, ISD::FLOG10, + ISD::STRICT_FREM, ISD::STRICT_FPOW, ISD::STRICT_FPOWI, + ISD::STRICT_FCOS, ISD::STRICT_FSIN, ISD::STRICT_FACOS, + ISD::STRICT_FASIN, ISD::STRICT_FATAN, ISD::STRICT_FATAN2, + ISD::STRICT_FCOSH, ISD::STRICT_FSINH, ISD::STRICT_FTANH, + ISD::STRICT_FEXP, ISD::STRICT_FEXP2, ISD::STRICT_FLOG, + ISD::STRICT_FLOG2, ISD::STRICT_FLOG10, ISD::STRICT_FTAN}) { setOperationAction(Op, MVT::f16, Promote); setOperationAction(Op, MVT::v4f16, Expand); setOperationAction(Op, MVT::v8f16, Expand); diff --git a/llvm/test/CodeGen/AArch64/llvm.modf.ll b/llvm/test/CodeGen/AArch64/llvm.modf.ll new file mode 100644 index 0000000000000..41fe796daca86 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/llvm.modf.ll @@ -0,0 +1,255 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=aarch64-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s + +define { half, half } @test_modf_f16(half %a) { +; CHECK-LABEL: test_modf_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: add x0, sp, #12 +; CHECK-NEXT: bl modff +; CHECK-NEXT: ldr s1, [sp, #12] +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: fcvt h1, s1 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %result = call { half, half } @llvm.modf.f16(half %a) + ret { half, half } %result +} + +define half @test_modf_f16_only_use_fractional_part(half %a) { +; CHECK-LABEL: test_modf_f16_only_use_fractional_part: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: add x0, sp, #12 +; CHECK-NEXT: bl modff +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %result = call { half, half } @llvm.modf.f16(half %a) + %result.0 = extractvalue { half, half } %result, 0 + ret half %result.0 +} + +define half @test_modf_f16_only_use_integral_part(half %a) { +; CHECK-LABEL: test_modf_f16_only_use_integral_part: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: add x0, sp, #12 +; CHECK-NEXT: bl modff +; CHECK-NEXT: ldr s0, [sp, #12] +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %result = call { half, half } @llvm.modf.f16(half %a) + %result.1 = extractvalue { half, half } %result, 1 + ret half %result.1 +} + +define { <2 x half>, <2 x half> } @test_modf_v2f16(<2 x half> %a) { +; CHECK-LABEL: test_modf_v2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov h1, v0.h[1] +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: add x0, sp, #44 +; CHECK-NEXT: fcvt s0, h1 +; CHECK-NEXT: bl modff +; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: add x0, sp, #40 +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: fmov s0, s1 +; CHECK-NEXT: bl modff +; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: fcvt h2, s0 +; CHECK-NEXT: add x0, sp, #56 +; CHECK-NEXT: mov h1, v1.h[2] +; CHECK-NEXT: fcvt s0, h1 +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.h[1], v1.h[0] +; CHECK-NEXT: str q2, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: bl modff +; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: fcvt h2, s0 +; CHECK-NEXT: add x0, sp, #60 +; CHECK-NEXT: mov h1, v1.h[3] +; CHECK-NEXT: fcvt s0, h1 +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov v1.h[2], v2.h[0] +; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: bl modff +; CHECK-NEXT: ldp s2, s1, [sp, #40] +; CHECK-NEXT: fcvt h4, s0 +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: fcvt h3, s1 +; CHECK-NEXT: fcvt h1, s2 +; CHECK-NEXT: ldr s2, [sp, #56] +; CHECK-NEXT: mov v0.h[3], v4.h[0] +; CHECK-NEXT: fcvt h2, s2 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: mov v1.h[1], v3.h[0] +; CHECK-NEXT: ldr s3, [sp, #60] +; CHECK-NEXT: mov v1.h[2], v2.h[0] +; CHECK-NEXT: fcvt h2, s3 +; CHECK-NEXT: mov v1.h[3], v2.h[0] +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ret + %result = call { <2 x half>, <2 x half> } @llvm.modf.v2f16(<2 x half> %a) + ret { <2 x half>, <2 x half> } %result +} + +define { float, float } @test_modf_f32(float %a) { +; CHECK-LABEL: test_modf_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: add x0, sp, #12 +; CHECK-NEXT: bl modff +; CHECK-NEXT: ldr s1, [sp, #12] +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %result = call { float, float } @llvm.modf.f32(float %a) + ret { float, float } %result +} + +define { <3 x float>, <3 x float> } @test_modf_v3f32(<3 x float> %a) { +; CHECK-LABEL: test_modf_v3f32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #80 +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: add x0, sp, #56 +; CHECK-NEXT: add x19, sp, #56 +; CHECK-NEXT: bl modff +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: add x0, sp, #44 +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: bl modff +; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: add x0, sp, #60 +; CHECK-NEXT: add x20, sp, #60 +; CHECK-NEXT: mov v0.s[1], v1.s[0] +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov s0, v0.s[2] +; CHECK-NEXT: bl modff +; CHECK-NEXT: ldr s1, [sp, #44] +; CHECK-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: ld1 { v1.s }[1], [x19] +; CHECK-NEXT: mov v2.s[2], v0.s[0] +; CHECK-NEXT: ld1 { v1.s }[2], [x20] +; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: ret + %result = call { <3 x float>, <3 x float> } @llvm.modf.v3f32(<3 x float> %a) + ret { <3 x float>, <3 x float> } %result +} + +define { <2 x float>, <2 x float> } @test_modf_v2f32(<2 x float> %a) { +; CHECK-LABEL: test_modf_v2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: add x0, sp, #40 +; CHECK-NEXT: add x19, sp, #40 +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: bl modff +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: add x0, sp, #44 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: bl modff +; CHECK-NEXT: ldr s1, [sp, #44] +; CHECK-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: ld1 { v1.s }[1], [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.s[1], v2.s[0] +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ret + %result = call { <2 x float>, <2 x float> } @llvm.modf.v2f32(<2 x float> %a) + ret { <2 x float>, <2 x float> } %result +} + +define { double, double } @test_modf_f64(double %a) { +; CHECK-LABEL: test_modf_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: add x0, sp, #8 +; CHECK-NEXT: bl modf +; CHECK-NEXT: ldr d1, [sp, #8] +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %result = call { double, double } @llvm.modf.f64(double %a) + ret { double, double } %result +} + +define { <2 x double>, <2 x double> } @test_modf_v2f64(<2 x double> %a) { +; CHECK-LABEL: test_modf_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov d0, v0.d[1] +; CHECK-NEXT: add x0, sp, #32 +; CHECK-NEXT: add x19, sp, #32 +; CHECK-NEXT: bl modf +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: add x0, sp, #40 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: bl modf +; CHECK-NEXT: ldr d1, [sp, #40] +; CHECK-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: ld1 { v1.d }[1], [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v2.d[0] +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ret + %result = call { <2 x double>, <2 x double> } @llvm.modf.v2f64(<2 x double> %a) + ret { <2 x double>, <2 x double> } %result +} diff --git a/llvm/test/CodeGen/AArch64/veclib-llvm.modf.ll b/llvm/test/CodeGen/AArch64/veclib-llvm.modf.ll new file mode 100644 index 0000000000000..78dbc09a57afd --- /dev/null +++ b/llvm/test/CodeGen/AArch64/veclib-llvm.modf.ll @@ -0,0 +1,144 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -vector-library=sleefgnuabi < %s | FileCheck %s -check-prefix=SLEEF +; RUN: llc -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -vector-library=ArmPL < %s | FileCheck %s -check-prefix=ARMPL + +define <4 x float> @test_modf_v4f32(<4 x float> %x, ptr %out_integral) { +; SLEEF-LABEL: test_modf_v4f32: +; SLEEF: // %bb.0: +; SLEEF-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; SLEEF-NEXT: .cfi_def_cfa_offset 16 +; SLEEF-NEXT: .cfi_offset w30, -16 +; SLEEF-NEXT: bl _ZGVnN4vl4_modff +; SLEEF-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; SLEEF-NEXT: ret +; +; ARMPL-LABEL: test_modf_v4f32: +; ARMPL: // %bb.0: +; ARMPL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; ARMPL-NEXT: .cfi_def_cfa_offset 16 +; ARMPL-NEXT: .cfi_offset w30, -16 +; ARMPL-NEXT: bl armpl_vmodfq_f32 +; ARMPL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; ARMPL-NEXT: ret + %result = call { <4 x float>, <4 x float> } @llvm.modf.v4f32(<4 x float> %x) + %result.0 = extractvalue { <4 x float>, <4 x float> } %result, 0 + %result.1 = extractvalue { <4 x float>, <4 x float> } %result, 1 + store <4 x float> %result.1, ptr %out_integral, align 4 + ret <4 x float> %result.0 +} + +define <2 x double> @test_modf_v2f64(<2 x double> %x, ptr %out_integral) { +; SLEEF-LABEL: test_modf_v2f64: +; SLEEF: // %bb.0: +; SLEEF-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; SLEEF-NEXT: .cfi_def_cfa_offset 16 +; SLEEF-NEXT: .cfi_offset w30, -16 +; SLEEF-NEXT: bl _ZGVnN2vl8_modf +; SLEEF-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; SLEEF-NEXT: ret +; +; ARMPL-LABEL: test_modf_v2f64: +; ARMPL: // %bb.0: +; ARMPL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; ARMPL-NEXT: .cfi_def_cfa_offset 16 +; ARMPL-NEXT: .cfi_offset w30, -16 +; ARMPL-NEXT: bl armpl_vmodfq_f64 +; ARMPL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; ARMPL-NEXT: ret + %result = call { <2 x double>, <2 x double> } @llvm.modf.v2f64(<2 x double> %x) + %result.0 = extractvalue { <2 x double>, <2 x double> } %result, 0 + %result.1 = extractvalue { <2 x double>, <2 x double> } %result, 1 + store <2 x double> %result.1, ptr %out_integral, align 8 + ret <2 x double> %result.0 +} + +define @test_modf_nxv4f32( %x, ptr %out_integral) { +; SLEEF-LABEL: test_modf_nxv4f32: +; SLEEF: // %bb.0: +; SLEEF-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; SLEEF-NEXT: .cfi_def_cfa_offset 16 +; SLEEF-NEXT: .cfi_offset w30, -16 +; SLEEF-NEXT: bl _ZGVsNxvl4_modff +; SLEEF-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; SLEEF-NEXT: ret +; +; ARMPL-LABEL: test_modf_nxv4f32: +; ARMPL: // %bb.0: +; ARMPL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; ARMPL-NEXT: .cfi_def_cfa_offset 16 +; ARMPL-NEXT: .cfi_offset w30, -16 +; ARMPL-NEXT: ptrue p0.s +; ARMPL-NEXT: bl armpl_svmodf_f32_x +; ARMPL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; ARMPL-NEXT: ret + %result = call { , } @llvm.modf.nxv4f32( %x) + %result.0 = extractvalue { , } %result, 0 + %result.1 = extractvalue { , } %result, 1 + store %result.1, ptr %out_integral, align 4 + ret %result.0 +} + +define @test_modf_nxv2f64( %x, ptr %out_integral) { +; SLEEF-LABEL: test_modf_nxv2f64: +; SLEEF: // %bb.0: +; SLEEF-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; SLEEF-NEXT: .cfi_def_cfa_offset 16 +; SLEEF-NEXT: .cfi_offset w30, -16 +; SLEEF-NEXT: bl _ZGVsNxvl8_modf +; SLEEF-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; SLEEF-NEXT: ret +; +; ARMPL-LABEL: test_modf_nxv2f64: +; ARMPL: // %bb.0: +; ARMPL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; ARMPL-NEXT: .cfi_def_cfa_offset 16 +; ARMPL-NEXT: .cfi_offset w30, -16 +; ARMPL-NEXT: ptrue p0.d +; ARMPL-NEXT: bl armpl_svmodf_f64_x +; ARMPL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; ARMPL-NEXT: ret + %result = call { , } @llvm.modf.nxv2f64( %x) + %result.0 = extractvalue { , } %result, 0 + %result.1 = extractvalue { , } %result, 1 + store %result.1, ptr %out_integral, align 8 + ret %result.0 +} + +define <4 x float> @modf_store_merging_load_before_store(<4 x float> %x, ptr %out_integral) { +; SLEEF-LABEL: modf_store_merging_load_before_store: +; SLEEF: // %bb.0: +; SLEEF-NEXT: sub sp, sp, #32 +; SLEEF-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; SLEEF-NEXT: .cfi_def_cfa_offset 32 +; SLEEF-NEXT: .cfi_offset w30, -16 +; SLEEF-NEXT: ldr q1, [x0] +; SLEEF-NEXT: str q1, [sp] // 16-byte Folded Spill +; SLEEF-NEXT: bl _ZGVnN4vl4_modff +; SLEEF-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; SLEEF-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; SLEEF-NEXT: fadd v0.4s, v1.4s, v0.4s +; SLEEF-NEXT: add sp, sp, #32 +; SLEEF-NEXT: ret +; +; ARMPL-LABEL: modf_store_merging_load_before_store: +; ARMPL: // %bb.0: +; ARMPL-NEXT: sub sp, sp, #32 +; ARMPL-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; ARMPL-NEXT: .cfi_def_cfa_offset 32 +; ARMPL-NEXT: .cfi_offset w30, -16 +; ARMPL-NEXT: ldr q1, [x0] +; ARMPL-NEXT: str q1, [sp] // 16-byte Folded Spill +; ARMPL-NEXT: bl armpl_vmodfq_f32 +; ARMPL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; ARMPL-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; ARMPL-NEXT: fadd v0.4s, v1.4s, v0.4s +; ARMPL-NEXT: add sp, sp, #32 +; ARMPL-NEXT: ret + %result = call { <4 x float>, <4 x float> } @llvm.modf.v4f32(<4 x float> %x) + %result.0 = extractvalue { <4 x float>, <4 x float> } %result, 0 + %result.1 = extractvalue { <4 x float>, <4 x float> } %result, 1 + %original_intergral = load <4 x float>, ptr %out_integral, align 4 + store <4 x float> %result.1, ptr %out_integral, align 4 + %return = fadd <4 x float> %original_intergral, %result.0 + ret <4 x float> %return +}