diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 009d69b2b9433..9df30638b1318 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -487,6 +487,14 @@ class AArch64DAGToDAGISel : public SelectionDAGISel { bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos, unsigned Width); + template + bool SelectCVTFixedPosRecipOperandVec(SDValue N, SDValue &FixedPos) { + return SelectCVTFixedPosRecipOperandVec(N, FixedPos, FloatWidth); + } + + bool SelectCVTFixedPosRecipOperandVec(SDValue N, SDValue &FixedPos, + unsigned Width); + bool SelectCMP_SWAP(SDNode *N); bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); @@ -3952,6 +3960,129 @@ static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, return true; } +static bool checkCVTFixedPointOperandWithFBitsForVectors(SelectionDAG *CurDAG, + SDValue N, + SDValue &FixedPos, + unsigned FloatWidth, + bool IsReciprocal) { + + if (N->getNumOperands() < 1) + return false; + + SDValue ImmediateNode = N.getOperand(0); + if (N.getOpcode() == ISD::BITCAST || N.getOpcode() == AArch64ISD::NVCAST) { + // This could have been a bitcast to a scalar + if (!ImmediateNode.getValueType().isVector()) + return false; + } + + if (!(ImmediateNode.getOpcode() == AArch64ISD::DUP || + ImmediateNode.getOpcode() == AArch64ISD::MOVIshift || + ImmediateNode.getOpcode() == ISD::BUILD_VECTOR || + ImmediateNode.getOpcode() == ISD::Constant || + ImmediateNode.getOpcode() == ISD::SPLAT_VECTOR)) { + return false; + } + + if (ImmediateNode.getOpcode() != ISD::Constant) { + auto *C = dyn_cast(ImmediateNode.getOperand(0)); + if (!C) + return false; + } + + if (ImmediateNode.getOpcode() == ISD::BUILD_VECTOR) { + // For BUILD_VECTOR, we must explicitly check if it's a constant splat. + BuildVectorSDNode *BVN = cast(ImmediateNode.getNode()); + APInt SplatValue; + APInt SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs)) { + return false; + } + } + + APInt Imm; + bool IsIntConstant = false; + if (ImmediateNode.getOpcode() == AArch64ISD::MOVIshift) { + EVT NodeVT = N.getValueType(); + Imm = APInt(NodeVT.getScalarSizeInBits(), + ImmediateNode.getConstantOperandVal(0) + << ImmediateNode.getConstantOperandVal(1)); + IsIntConstant = true; + } else if (ImmediateNode.getOpcode() == ISD::Constant) { + auto *C = dyn_cast(ImmediateNode); + if (!C) + return false; + uint8_t EncodedU8 = static_cast(C->getZExtValue()); + uint64_t DecodedBits = AArch64_AM::decodeAdvSIMDModImmType11(EncodedU8); + + unsigned BitWidth = N.getValueType().getVectorElementType().getSizeInBits(); + uint64_t Mask = (BitWidth == 64) ? ~0ULL : ((1ULL << BitWidth) - 1); + uint64_t MaskedBits = DecodedBits & Mask; + + Imm = APInt(BitWidth, MaskedBits); + IsIntConstant = true; + } else if (auto *CI = dyn_cast(ImmediateNode.getOperand(0))) { + Imm = CI->getAPIntValue(); + IsIntConstant = true; + } + + APFloat FVal(0.0); + // --- Extract the actual constant value --- + if (IsIntConstant) { + // Scalar source is an integer constant; interpret its bits as + // floating-point. + EVT FloatEltVT = N.getValueType().getVectorElementType(); + + if (FloatEltVT == MVT::f32) { + FVal = APFloat(APFloat::IEEEsingle(), Imm); + } else if (FloatEltVT == MVT::f64) { + FVal = APFloat(APFloat::IEEEdouble(), Imm); + } else if (FloatEltVT == MVT::f16) { + FVal = APFloat(APFloat::IEEEhalf(), Imm); + } else { + // Unsupported floating-point element type. + return false; + } + } else { + // ScalarSourceNode is not a recognized constant type. + return false; + } + + // Handle reciprocal case. + if (IsReciprocal) { + if (!FVal.getExactInverse(&FVal)) + // Not an exact reciprocal, or reciprocal not a power of 2. + return false; + } + + bool IsExact; + unsigned TargetIntBits = + N.getValueType().getVectorElementType().getSizeInBits(); + APSInt IntVal( + TargetIntBits + 1, + true); // Use TargetIntBits + 1 for sufficient bits for conversion + + FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); + + if (!IsExact || !IntVal.isPowerOf2()) + return false; + + unsigned FBits = IntVal.logBase2(); + // FBits must be non-zero (implies actual scaling) and within the range + // supported by the instruction (typically 1 to 64 for AArch64 FCVTZS/FCVTZU). + // FloatWidth should ideally be the width of the *integer elements* in the + // vector (16, 32, 64). + if (FBits == 0 || FBits > FloatWidth) + return false; + + // Set FixedPos to the extracted FBits as an i32 constant SDValue. + FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32); + return true; +} + bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth) { return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth, @@ -3965,6 +4096,12 @@ bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N, true); } +bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperandVec( + SDValue N, SDValue &FixedPos, unsigned FloatWidth) { + return checkCVTFixedPointOperandWithFBitsForVectors(CurDAG, N, FixedPos, + FloatWidth, true); +} + // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields // of the string and obtains the integer values from them and combines these // into a single value to be used in the MRS/MSR instruction. diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index f90f12b5ac3c7..5613128d0e9fd 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -8473,6 +8473,58 @@ def : Pat<(v8f16 (sint_to_fp (v8i16 (AArch64vashr_exact v8i16:$Vn, i32:$shift))) (SCVTFv8i16_shift $Vn, vecshiftR16:$shift)>; } +// Select fmul(sitofp(x), C) where C is a constant reciprocal of a power of two. +// For both scalar and vector inputs, if we have sitofp(X) * C (where C is +// 1/2^N), this can be optimized to scvtf(X, 2^N). +class fixedpoint_recip_vec_i16 + : ComplexPattern", []>; +class fixedpoint_recip_vec_i32 + : ComplexPattern", []>; +class fixedpoint_recip_vec_i64 + : ComplexPattern", []>; +def fixedpoint_recip_vec_xform : SDNodeXForm; + +def fixedpoint_recip_v2f32_v2i32 : fixedpoint_recip_vec_i32; +def fixedpoint_recip_v4f32_v4i32 : fixedpoint_recip_vec_i32; +def fixedpoint_recip_v2f64_v2i64 : fixedpoint_recip_vec_i64; + +def fixedpoint_recip_v4f16_v4i16 : fixedpoint_recip_vec_i16; +def fixedpoint_recip_v8f16_v8i16 : fixedpoint_recip_vec_i16; + +let Predicates = [HasNEON] in { + def : Pat<(v2f32(fmul(sint_to_fp(v2i32 V64:$Rn)), + fixedpoint_recip_v2f32_v2i32:$scale)), + (v2f32(SCVTFv2i32_shift(v2i32 V64:$Rn), + (fixedpoint_recip_vec_xform fixedpoint_recip_v2f32_v2i32:$scale)))>; + + def : Pat<(v4f32(fmul(sint_to_fp(v4i32 FPR128:$Rn)), + fixedpoint_recip_v4f32_v4i32:$scale)), + (v4f32(SCVTFv4i32_shift(v4i32 FPR128:$Rn), + (fixedpoint_recip_vec_xform fixedpoint_recip_v4f32_v4i32:$scale)))>; + + def : Pat<(v2f64(fmul(sint_to_fp(v2i64 FPR128:$Rn)), + fixedpoint_recip_v2f64_v2i64:$scale)), + (v2f64(SCVTFv2i64_shift(v2i64 FPR128:$Rn), + (fixedpoint_recip_vec_xform fixedpoint_recip_v2f64_v2i64:$scale)))>; +} + +let Predicates = [HasNEON, HasFullFP16] in { + def : Pat<(v4f16(fmul(sint_to_fp(v4i16 V64:$Rn)), + fixedpoint_recip_v4f16_v4i16:$scale)), + (v4f16(SCVTFv4i16_shift(v4i16 V64:$Rn), + (fixedpoint_recip_vec_xform fixedpoint_recip_v4f16_v4i16:$scale)))>; + + def : Pat<(v8f16(fmul(sint_to_fp(v8i16 FPR128:$Rn)), + fixedpoint_recip_v8f16_v8i16:$scale)), + (v8f16(SCVTFv8i16_shift(v8i16 FPR128:$Rn), + (fixedpoint_recip_vec_xform fixedpoint_recip_v8f16_v8i16:$scale)))>; +} + // X << 1 ==> X + X class SHLToADDPat : Pat<(ty (AArch64vshl (ty regtype:$Rn), (i32 1))), diff --git a/llvm/test/CodeGen/AArch64/scvtf-div-mul-combine.ll b/llvm/test/CodeGen/AArch64/scvtf-div-mul-combine.ll new file mode 100644 index 0000000000000..ed967cf1f13f5 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/scvtf-div-mul-combine.ll @@ -0,0 +1,487 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-neon-syntax=apple -mattr=+fullfp16 -o - %s | FileCheck %s + +; This test file verifies that fdiv(sitofp(x), C), +; where C is a constant power of two, +; is optimized to scvtf(X, shift_amount). +; This typically involves an implicit fdiv -> fmul_reciprocal transformation. + +; Scalar f32 (from i32) +define float @test_f32_div_const(i32 %in) { +; CHECK-LABEL: test_f32_div_const: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf s0, w0, #4 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp i32 %in to float + %div.i = fdiv float %vcvt.i, 16.0 + ret float %div.i +} + +; Scalar f64 (from i64) +define double @test_f64_div_const(i64 %in) { +; CHECK-LABEL: test_f64_div_const: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf d0, x0, #4 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp i64 %in to double + %div.i = fdiv double %vcvt.i, 16.0 + ret double %div.i +} + +; Vector v2f32 (from v2i32) +define <2 x float> @test_v2f32_div_const_2(<2 x i32> %in) { +; CHECK-LABEL: test_v2f32_div_const_2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.2s v0, v0, #1 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <2 x i32> %in to <2 x float> + %div.i = fdiv <2 x float> %vcvt.i, + ret <2 x float> %div.i +} + +define <2 x float> @test_v2f32_div_const_4(<2 x i32> %in) { +; CHECK-LABEL: test_v2f32_div_const_4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.2s v0, v0, #2 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <2 x i32> %in to <2 x float> + %div.i = fdiv <2 x float> %vcvt.i, + ret <2 x float> %div.i +} + +define <2 x float> @test_v2f32_div_const_8(<2 x i32> %in) { +; CHECK-LABEL: test_v2f32_div_const_8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.2s v0, v0, #3 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <2 x i32> %in to <2 x float> + %div.i = fdiv <2 x float> %vcvt.i, + ret <2 x float> %div.i +} + +define <2 x float> @test_v2f32_div_const_16(<2 x i32> %in) { +; CHECK-LABEL: test_v2f32_div_const_16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.2s v0, v0, #4 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <2 x i32> %in to <2 x float> + %div.i = fdiv <2 x float> %vcvt.i, + ret <2 x float> %div.i +} + +define <2 x float> @test_v2f32_div_const_32(<2 x i32> %in) { +; CHECK-LABEL: test_v2f32_div_const_32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.2s v0, v0, #5 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <2 x i32> %in to <2 x float> + %div.i = fdiv <2 x float> %vcvt.i, + ret <2 x float> %div.i +} + +define <2 x float> @test_v2f32_div_const_64(<2 x i32> %in) { +; CHECK-LABEL: test_v2f32_div_const_64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.2s v0, v0, #6 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <2 x i32> %in to <2 x float> + %div.i = fdiv <2 x float> %vcvt.i, + ret <2 x float> %div.i +} + +define <2 x float> @test_v2f32_div_const_128(<2 x i32> %in) { +; CHECK-LABEL: test_v2f32_div_const_128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.2s v0, v0, #7 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <2 x i32> %in to <2 x float> + %div.i = fdiv <2 x float> %vcvt.i, + ret <2 x float> %div.i +} + +define <2 x float> @test_v2f32_div_const_256(<2 x i32> %in) { +; CHECK-LABEL: test_v2f32_div_const_256: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.2s v0, v0, #8 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <2 x i32> %in to <2 x float> + %div.i = fdiv <2 x float> %vcvt.i, + ret <2 x float> %div.i +} + +define <2 x float> @test_v2f32_div_const_512(<2 x i32> %in) { +; CHECK-LABEL: test_v2f32_div_const_512: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.2s v0, v0, #9 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <2 x i32> %in to <2 x float> + %div.i = fdiv <2 x float> %vcvt.i, + ret <2 x float> %div.i +} + +define <2 x float> @test_v2f32_div_const_1024(<2 x i32> %in) { +; CHECK-LABEL: test_v2f32_div_const_1024: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.2s v0, v0, #10 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <2 x i32> %in to <2 x float> + %div.i = fdiv <2 x float> %vcvt.i, + ret <2 x float> %div.i +} + +; Vector v4f32 (from v4i32) +define <4 x float> @test_v4f32_div_const_2(<4 x i32> %in) { +; CHECK-LABEL: test_v4f32_div_const_2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.4s v0, v0, #1 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <4 x i32> %in to <4 x float> + %div.i = fdiv <4 x float> %vcvt.i, + ret <4 x float> %div.i +} + +define <4 x float> @test_v4f32_div_const_4(<4 x i32> %in) { +; CHECK-LABEL: test_v4f32_div_const_4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.4s v0, v0, #2 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <4 x i32> %in to <4 x float> + %div.i = fdiv <4 x float> %vcvt.i, + ret <4 x float> %div.i +} + +define <4 x float> @test_v4f32_div_const_8(<4 x i32> %in) { +; CHECK-LABEL: test_v4f32_div_const_8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.4s v0, v0, #3 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <4 x i32> %in to <4 x float> + %div.i = fdiv <4 x float> %vcvt.i, + ret <4 x float> %div.i +} + +define <4 x float> @test_v4f32_div_const_16(<4 x i32> %in) { +; CHECK-LABEL: test_v4f32_div_const_16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.4s v0, v0, #4 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <4 x i32> %in to <4 x float> + %div.i = fdiv <4 x float> %vcvt.i, + ret <4 x float> %div.i +} + +define <4 x float> @test_v4f32_div_const_32(<4 x i32> %in) { +; CHECK-LABEL: test_v4f32_div_const_32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.4s v0, v0, #5 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <4 x i32> %in to <4 x float> + %div.i = fdiv <4 x float> %vcvt.i, + ret <4 x float> %div.i +} + +define <4 x float> @test_v4f32_div_const_64(<4 x i32> %in) { +; CHECK-LABEL: test_v4f32_div_const_64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.4s v0, v0, #6 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <4 x i32> %in to <4 x float> + %div.i = fdiv <4 x float> %vcvt.i, + ret <4 x float> %div.i +} + +define <4 x float> @test_v4f32_div_const_128(<4 x i32> %in) { +; CHECK-LABEL: test_v4f32_div_const_128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.4s v0, v0, #7 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <4 x i32> %in to <4 x float> + %div.i = fdiv <4 x float> %vcvt.i, + ret <4 x float> %div.i +} + +define <4 x float> @test_v4f32_div_const_256(<4 x i32> %in) { +; CHECK-LABEL: test_v4f32_div_const_256: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.4s v0, v0, #8 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <4 x i32> %in to <4 x float> + %div.i = fdiv <4 x float> %vcvt.i, + ret <4 x float> %div.i +} + +define <4 x float> @test_v4f32_div_const_512(<4 x i32> %in) { +; CHECK-LABEL: test_v4f32_div_const_512: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.4s v0, v0, #9 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <4 x i32> %in to <4 x float> + %div.i = fdiv <4 x float> %vcvt.i, + ret <4 x float> %div.i +} + +define <4 x float> @test_v4f32_div_const_1024(<4 x i32> %in) { +; CHECK-LABEL: test_v4f32_div_const_1024: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.4s v0, v0, #10 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <4 x i32> %in to <4 x float> + %div.i = fdiv <4 x float> %vcvt.i, + ret <4 x float> %div.i +} + +; Vector v2f64 (from v2i64) +define <2 x double> @test_v2f64_div_const(<2 x i64> %in) { +; CHECK-LABEL: test_v2f64_div_const: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.2d v0, v0, #4 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <2 x i64> %in to <2 x double> + %div.i = fdiv <2 x double> %vcvt.i, + ret <2 x double> %div.i +} + +; Vector v4f16 (from v4i16) +define <4 x half> @test_v4f16_div_const_2(<4 x i16> %in) { +; CHECK-LABEL: test_v4f16_div_const_2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.4h v0, v0, #1 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <4 x i16> %in to <4 x half> + %div.i = fdiv <4 x half> %vcvt.i, + ret <4 x half> %div.i +} + +define <4 x half> @test_v4f16_div_const_4(<4 x i16> %in) { +; CHECK-LABEL: test_v4f16_div_const_4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.4h v0, v0, #2 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <4 x i16> %in to <4 x half> + %div.i = fdiv <4 x half> %vcvt.i, + ret <4 x half> %div.i +} + +define <4 x half> @test_v4f16_div_const_8(<4 x i16> %in) { +; CHECK-LABEL: test_v4f16_div_const_8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.4h v0, v0, #3 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <4 x i16> %in to <4 x half> + %div.i = fdiv <4 x half> %vcvt.i, + ret <4 x half> %div.i +} + +define <4 x half> @test_v4f16_div_const_16(<4 x i16> %in) { +; CHECK-LABEL: test_v4f16_div_const_16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.4h v0, v0, #4 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <4 x i16> %in to <4 x half> + %div.i = fdiv <4 x half> %vcvt.i, + ret <4 x half> %div.i +} + +define <4 x half> @test_v4f16_div_const_32(<4 x i16> %in) { +; CHECK-LABEL: test_v4f16_div_const_32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.4h v0, v0, #5 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <4 x i16> %in to <4 x half> + %div.i = fdiv <4 x half> %vcvt.i, + ret <4 x half> %div.i +} + +; Vector v8f16 (from v8i16) +define <8 x half> @test_v8f16_div_const_2(<8 x i16> %in) { +; CHECK-LABEL: test_v8f16_div_const_2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.8h v0, v0, #1 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <8 x i16> %in to <8 x half> + %div.i = fdiv <8 x half> %vcvt.i, + ret <8 x half> %div.i +} + +define <8 x half> @test_v8f16_div_const_4(<8 x i16> %in) { +; CHECK-LABEL: test_v8f16_div_const_4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.8h v0, v0, #2 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <8 x i16> %in to <8 x half> + %div.i = fdiv <8 x half> %vcvt.i, + ret <8 x half> %div.i +} + +define <8 x half> @test_v8f16_div_const_8(<8 x i16> %in) { +; CHECK-LABEL: test_v8f16_div_const_8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.8h v0, v0, #3 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <8 x i16> %in to <8 x half> + %div.i = fdiv <8 x half> %vcvt.i, + ret <8 x half> %div.i +} + +define <8 x half> @test_v8f16_div_const_16(<8 x i16> %in) { +; CHECK-LABEL: test_v8f16_div_const_16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.8h v0, v0, #4 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <8 x i16> %in to <8 x half> + %div.i = fdiv <8 x half> %vcvt.i, + ret <8 x half> %div.i +} + +define <8 x half> @test_v8f16_div_const_32(<8 x i16> %in) { +; CHECK-LABEL: test_v8f16_div_const_32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: scvtf.8h v0, v0, #5 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <8 x i16> %in to <8 x half> + %div.i = fdiv <8 x half> %vcvt.i, + ret <8 x half> %div.i +} + +; Vector v2f32 (from v2i32) +define <2 x float> @test_v2f32_div_big_pow2_const(<2 x i32> %in) { +; CHECK-LABEL: test_v2f32_div_big_pow2_const: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi.2s v1, #47, lsl #24 +; CHECK-NEXT: scvtf.2s v0, v0 +; CHECK-NEXT: fmul.2s v0, v0, v1 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <2 x i32> %in to <2 x float> + %div.i = fdiv <2 x float> %vcvt.i, + ret <2 x float> %div.i +} + +; Vector v4f32 (from v4i32) +define <4 x float> @test_v4f32_div_big_pow2_const(<4 x i32> %in) { +; CHECK-LABEL: test_v4f32_div_big_pow2_const: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi.4s v1, #47, lsl #24 +; CHECK-NEXT: scvtf.4s v0, v0 +; CHECK-NEXT: fmul.4s v0, v0, v1 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <4 x i32> %in to <4 x float> + %div.i = fdiv <4 x float> %vcvt.i, + ret <4 x float> %div.i +} + +; Vector v2f64 (from v2i64) +define <2 x double> @test_v2f64_div_big_pow2_const(<2 x i64> %in) { +; CHECK-LABEL: test_v2f64_div_big_pow2_const: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x8, #4314448443020935168 // =0x3be0000000000000 +; CHECK-NEXT: scvtf.2d v0, v0 +; CHECK-NEXT: dup.2d v1, x8 +; CHECK-NEXT: fmul.2d v0, v0, v1 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <2 x i64> %in to <2 x double> + %div.i = fdiv <2 x double> %vcvt.i, + ret <2 x double> %div.i +} + +; Scalar f32 with non-power-of-2 constant +define float @negative_test_f32_mul_non_pow2_const(i32 %in) { +; CHECK-LABEL: negative_test_f32_mul_non_pow2_const: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #36704 // =0x8f60 +; CHECK-NEXT: scvtf s0, w0 +; CHECK-NEXT: movk w8, #9666, lsl #16 +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: fmul s0, s0, s1 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp i32 %in to float + %mul.i = fmul float %vcvt.i, 0x3CB851EC00000000 + ret float %mul.i +} + + +; Vector v2f32 with non-power-of-2 constant +define <2 x float> @negative_test_v2f32_mul_non_pow2_const(<2 x i32> %in) { +; CHECK-LABEL: negative_test_v2f32_mul_non_pow2_const: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #36704 // =0x8f60 +; CHECK-NEXT: scvtf.2s v0, v0 +; CHECK-NEXT: movk w8, #9666, lsl #16 +; CHECK-NEXT: dup.2s v1, w8 +; CHECK-NEXT: fmul.2s v0, v0, v1 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <2 x i32> %in to <2 x float> + %mul.i = fmul <2 x float> %vcvt.i, + ret <2 x float> %mul.i +} + +; Vector v4f32 with non-power-of-2 constant +define <4 x float> @negative_test_v4f32_mul_non_pow2_const(<4 x i32> %in) { +; CHECK-LABEL: negative_test_v4f32_mul_non_pow2_const: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #36704 // =0x8f60 +; CHECK-NEXT: scvtf.4s v0, v0 +; CHECK-NEXT: movk w8, #9666, lsl #16 +; CHECK-NEXT: dup.4s v1, w8 +; CHECK-NEXT: fmul.4s v0, v0, v1 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <4 x i32> %in to <4 x float> + %mul.i = fmul <4 x float> %vcvt.i, + ret <4 x float> %mul.i +} + +; Vector v2f64 with non-power-of-2 constant +define <2 x double> @negative_test_v2f64_mul_non_pow2_const(<2 x i64> %in) { +; CHECK-LABEL: negative_test_v2f64_mul_non_pow2_const: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x8, #5243 // =0x147b +; CHECK-NEXT: scvtf.2d v0, v0 +; CHECK-NEXT: movk x8, #18350, lsl #16 +; CHECK-NEXT: movk x8, #31457, lsl #32 +; CHECK-NEXT: movk x8, #16276, lsl #48 +; CHECK-NEXT: dup.2d v1, x8 +; CHECK-NEXT: fmul.2d v0, v0, v1 +; CHECK-NEXT: ret +entry: + %vcvt.i = sitofp <2 x i64> %in to <2 x double> + %mul.i = fmul <2 x double> %vcvt.i, + ret <2 x double> %mul.i +}