diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 6344bc4664d3b..5d21bb611df4e 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1191,6 +1191,37 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return getCmpSelInstrCost(Instruction::Select, ICA.getReturnType(), ICA.getArgTypes()[0], CmpInst::BAD_ICMP_PREDICATE, CostKind); + case Intrinsic::vp_reduce_add: + case Intrinsic::vp_reduce_fadd: + case Intrinsic::vp_reduce_mul: + case Intrinsic::vp_reduce_fmul: + case Intrinsic::vp_reduce_and: + case Intrinsic::vp_reduce_or: + case Intrinsic::vp_reduce_xor: { + std::optional RedID = + VPIntrinsic::getFunctionalIntrinsicIDForVP(ICA.getID()); + assert(RedID.has_value()); + unsigned RedOp = getArithmeticReductionInstruction(*RedID); + return getArithmeticReductionCost(RedOp, + cast(ICA.getArgTypes()[1]), + ICA.getFlags(), CostKind); + } + case Intrinsic::vp_reduce_smax: + case Intrinsic::vp_reduce_smin: + case Intrinsic::vp_reduce_umax: + case Intrinsic::vp_reduce_umin: + case Intrinsic::vp_reduce_fmax: + case Intrinsic::vp_reduce_fmaximum: + case Intrinsic::vp_reduce_fmin: + case Intrinsic::vp_reduce_fminimum: { + std::optional RedID = + VPIntrinsic::getFunctionalIntrinsicIDForVP(ICA.getID()); + assert(RedID.has_value()); + Intrinsic::ID MinMaxID = getMinMaxReductionIntrinsicOp(*RedID); + return getMinMaxReductionCost(MinMaxID, + cast(ICA.getArgTypes()[1]), + ICA.getFlags(), CostKind); + } } if (ST->hasVInstructions() && RetTy->isVectorTy()) { diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll index 6032ae01aa718..5d5750b4e9c3e 100644 --- a/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-add.ll @@ -14,6 +14,14 @@ define i32 @reduce_i1(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.add.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.add.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.add.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.add.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.add.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.add.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.add.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.add.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_i1' @@ -25,6 +33,14 @@ define i32 @reduce_i1(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.add.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.add.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.add.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.add.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.add.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.add.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.add.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.add.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef) @@ -35,6 +51,15 @@ define i32 @reduce_i1(i32 %arg) { %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef) %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef) %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef) + + %V1_vp = call i1 @llvm.vp.reduce.add.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i1 @llvm.vp.reduce.add.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i1 @llvm.vp.reduce.add.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i1 @llvm.vp.reduce.add.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i1 @llvm.vp.reduce.add.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i1 @llvm.vp.reduce.add.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i1 @llvm.vp.reduce.add.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i1 @llvm.vp.reduce.add.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -48,6 +73,14 @@ define i32 @reduce_i8(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.add.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.add.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.add.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.add.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_i8' @@ -59,6 +92,14 @@ define i32 @reduce_i8(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.add.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.add.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.add.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.add.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef) @@ -69,6 +110,15 @@ define i32 @reduce_i8(i32 %arg) { %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) + + %V1_vp = call i8 @llvm.vp.reduce.add.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i8 @llvm.vp.reduce.add.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i8 @llvm.vp.reduce.add.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i8 @llvm.vp.reduce.add.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -82,6 +132,14 @@ define i32 @reduce_i16(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.add.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.add.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.add.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.add.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.add.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.add.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.add.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.add.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_i16' @@ -93,6 +151,14 @@ define i32 @reduce_i16(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.add.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.add.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.add.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.add.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.add.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.add.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.add.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.add.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef) @@ -103,6 +169,15 @@ define i32 @reduce_i16(i32 %arg) { %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef) %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef) %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef) + + %V1_vp = call i16 @llvm.vp.reduce.add.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i16 @llvm.vp.reduce.add.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i16 @llvm.vp.reduce.add.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i16 @llvm.vp.reduce.add.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i16 @llvm.vp.reduce.add.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i16 @llvm.vp.reduce.add.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i16 @llvm.vp.reduce.add.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i16 @llvm.vp.reduce.add.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -116,6 +191,14 @@ define i32 @reduce_i32(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.add.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.add.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.add.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.add.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.add.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.add.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.add.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.add.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_i32' @@ -127,6 +210,14 @@ define i32 @reduce_i32(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.add.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.add.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.add.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.add.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.add.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.add.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.add.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.add.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef) @@ -137,6 +228,15 @@ define i32 @reduce_i32(i32 %arg) { %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef) %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef) %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef) + + %V1_vp = call i32 @llvm.vp.reduce.add.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i32 @llvm.vp.reduce.add.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i32 @llvm.vp.reduce.add.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i32 @llvm.vp.reduce.add.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i32 @llvm.vp.reduce.add.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i32 @llvm.vp.reduce.add.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i32 @llvm.vp.reduce.add.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i32 @llvm.vp.reduce.add.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -150,6 +250,14 @@ define i32 @reduce_i64(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.add.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.add.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.add.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.add.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.add.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.add.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.add.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.add.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_i64' @@ -161,6 +269,14 @@ define i32 @reduce_i64(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.add.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.add.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.add.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.add.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.add.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.add.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.add.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.add.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef) @@ -171,6 +287,15 @@ define i32 @reduce_i64(i32 %arg) { %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef) %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef) %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef) + + %V1_vp = call i64 @llvm.vp.reduce.add.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i64 @llvm.vp.reduce.add.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i64 @llvm.vp.reduce.add.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i64 @llvm.vp.reduce.add.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i64 @llvm.vp.reduce.add.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i64 @llvm.vp.reduce.add.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i64 @llvm.vp.reduce.add.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i64 @llvm.vp.reduce.add.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef) ret i32 undef } diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll index a8eb4e9a280dd..aa24013aa5615 100644 --- a/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-and.ll @@ -17,6 +17,17 @@ define i32 @reduce_i1(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i1 @llvm.vp.reduce.and.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i1 @llvm.vp.reduce.and.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i1 @llvm.vp.reduce.and.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i1 @llvm.vp.reduce.and.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i1 @llvm.vp.reduce.and.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i1 @llvm.vp.reduce.and.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i1 @llvm.vp.reduce.and.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i1 @llvm.vp.reduce.and.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_vp = call i1 @llvm.vp.reduce.and.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512_vp = call i1 @llvm.vp.reduce.and.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.and.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_i1' @@ -31,6 +42,17 @@ define i32 @reduce_i1(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i1 @llvm.vp.reduce.and.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i1 @llvm.vp.reduce.and.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i1 @llvm.vp.reduce.and.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i1 @llvm.vp.reduce.and.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i1 @llvm.vp.reduce.and.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i1 @llvm.vp.reduce.and.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i1 @llvm.vp.reduce.and.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i1 @llvm.vp.reduce.and.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_vp = call i1 @llvm.vp.reduce.and.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512_vp = call i1 @llvm.vp.reduce.and.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.and.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef) @@ -44,6 +66,18 @@ define i32 @reduce_i1(i32 %arg) { %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef) %V512 = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> undef) %V1024 = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> undef) + + %V1_vp = call i1 @llvm.vp.reduce.and.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i1 @llvm.vp.reduce.and.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i1 @llvm.vp.reduce.and.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i1 @llvm.vp.reduce.and.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i1 @llvm.vp.reduce.and.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i1 @llvm.vp.reduce.and.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i1 @llvm.vp.reduce.and.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i1 @llvm.vp.reduce.and.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef) + %V256_vp = call i1 @llvm.vp.reduce.and.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef) + %V512_vp = call i1 @llvm.vp.reduce.and.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef) + %V1024_vp = call i1 @llvm.vp.reduce.and.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef) ret i32 undef } @@ -57,6 +91,14 @@ define i32 @reduce_i8(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.and.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.and.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.and.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.and.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.and.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_i8' @@ -68,6 +110,14 @@ define i32 @reduce_i8(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.and.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.and.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.and.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.and.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.and.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> undef) @@ -78,6 +128,15 @@ define i32 @reduce_i8(i32 %arg) { %V32 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef) %V64 = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> undef) %V128 = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> undef) + + %V1_vp = call i8 @llvm.vp.reduce.and.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i8 @llvm.vp.reduce.and.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i8 @llvm.vp.reduce.and.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i8 @llvm.vp.reduce.and.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i8 @llvm.vp.reduce.and.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i8 @llvm.vp.reduce.and.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i8 @llvm.vp.reduce.and.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i8 @llvm.vp.reduce.and.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -91,6 +150,14 @@ define i32 @reduce_i16(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.and.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.and.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.and.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.and.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.and.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.and.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.and.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.and.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_i16' @@ -102,6 +169,14 @@ define i32 @reduce_i16(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.and.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.and.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.and.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.and.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.and.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.and.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.and.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.and.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i16 @llvm.vector.reduce.and.v1i16(<1 x i16> undef) @@ -112,6 +187,15 @@ define i32 @reduce_i16(i32 %arg) { %V32 = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> undef) %V64 = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> undef) %V128 = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> undef) + + %V1_vp = call i16 @llvm.vp.reduce.and.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i16 @llvm.vp.reduce.and.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i16 @llvm.vp.reduce.and.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i16 @llvm.vp.reduce.and.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i16 @llvm.vp.reduce.and.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i16 @llvm.vp.reduce.and.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i16 @llvm.vp.reduce.and.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i16 @llvm.vp.reduce.and.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -125,6 +209,14 @@ define i32 @reduce_i32(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.and.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.and.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.and.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.and.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.and.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.and.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.and.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_i32' @@ -136,6 +228,14 @@ define i32 @reduce_i32(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.and.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.and.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.and.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.and.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.and.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.and.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.and.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i32 @llvm.vector.reduce.and.v1i32(<1 x i32> undef) @@ -146,6 +246,15 @@ define i32 @reduce_i32(i32 %arg) { %V32 = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> undef) %V64 = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> undef) %V128 = call i32 @llvm.vector.reduce.and.v128i32(<128 x i32> undef) + + %V1_vp = call i32 @llvm.vp.reduce.and.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i32 @llvm.vp.reduce.and.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i32 @llvm.vp.reduce.and.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i32 @llvm.vp.reduce.and.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i32 @llvm.vp.reduce.and.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i32 @llvm.vp.reduce.and.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i32 @llvm.vp.reduce.and.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -159,6 +268,14 @@ define i32 @reduce_i64(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.and.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.and.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.and.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.and.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.and.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.and.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.and.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.and.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_i64' @@ -170,6 +287,14 @@ define i32 @reduce_i64(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.and.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.and.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.and.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.and.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.and.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.and.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.and.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.and.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef) @@ -180,6 +305,15 @@ define i32 @reduce_i64(i32 %arg) { %V32 = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> undef) %V64 = call i64 @llvm.vector.reduce.and.v64i64(<64 x i64> undef) %V128 = call i64 @llvm.vector.reduce.and.v128i64(<128 x i64> undef) + + %V1_vp = call i64 @llvm.vp.reduce.and.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i64 @llvm.vp.reduce.and.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i64 @llvm.vp.reduce.and.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i64 @llvm.vp.reduce.and.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i64 @llvm.vp.reduce.and.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i64 @llvm.vp.reduce.and.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i64 @llvm.vp.reduce.and.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i64 @llvm.vp.reduce.and.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef) ret i32 undef } diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll index 1762f701a9b2d..31c4988170f19 100644 --- a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll @@ -19,6 +19,20 @@ define void @reduce_fadd_bfloat() { ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, undef) ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, undef) ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 541 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 573 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat 0xR0000, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat 0xR0000, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat 0xR0000, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv8bf16(bfloat 0xR0000, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv16bf16(bfloat 0xR0000, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv32bf16(bfloat 0xR0000, undef, undef, i32 undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fadd_bfloat' @@ -36,6 +50,20 @@ define void @reduce_fadd_bfloat() { ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, undef) ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, undef) ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat 0xR0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat 0xR0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat 0xR0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv8bf16(bfloat 0xR0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv16bf16(bfloat 0xR0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv32bf16(bfloat 0xR0000, undef, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call fast bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef) @@ -52,6 +80,21 @@ define void @reduce_fadd_bfloat() { %NXV8 = call fast bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0.0, undef) %NXV16 = call fast bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0.0, undef) %NXV32 = call fast bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0.0, undef) + + %V1_vp = call fast bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef, <1 x i1> undef, i32 undef) + %V2_vp = call fast bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0.0, <2 x bfloat> undef, <2 x i1> undef, i32 undef) + %V4_vp = call fast bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0.0, <4 x bfloat> undef, <4 x i1> undef, i32 undef) + %V8_vp = call fast bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0.0, <8 x bfloat> undef, <8 x i1> undef, i32 undef) + %V16_vp = call fast bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0.0, <16 x bfloat> undef, <16 x i1> undef, i32 undef) + %v32_vp = call fast bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0.0, <32 x bfloat> undef, <32 x i1> undef, i32 undef) + %V64_vp = call fast bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0.0, <64 x bfloat> undef, <64 x i1> undef, i32 undef) + %V128_vp = call fast bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0.0, <128 x bfloat> undef, <128 x i1> undef, i32 undef) + %NXV1_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat 0.0, undef, undef, i32 undef) + %NXV2_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat 0.0, undef, undef, i32 undef) + %NXV4_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat 0.0, undef, undef, i32 undef) + %NXV8_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv8bf16(bfloat 0.0, undef, undef, i32 undef) + %NXV16_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv16bf16(bfloat 0.0, undef, undef, i32 undef) + %NXV32_vp = call fast bfloat @llvm.vp.reduce.fadd.nxv32bf16(bfloat 0.0, undef, undef, i32 undef) ret void } @@ -71,6 +114,20 @@ define void @reduce_fadd_half() { ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, undef) ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, undef) ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV8_vp = call fast half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %NXV16_vp = call fast half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32_vp = call fast half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, undef, undef, i32 undef) ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_fadd_half' @@ -88,6 +145,20 @@ define void @reduce_fadd_half() { ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, undef) ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, undef) ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 541 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 573 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, undef, undef, i32 undef) ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fadd_half' @@ -105,6 +176,20 @@ define void @reduce_fadd_half() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8_vp = call fast half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16_vp = call fast half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV32_vp = call fast half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, undef, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef) @@ -121,6 +206,21 @@ define void @reduce_fadd_half() { %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0.0, undef) %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0.0, undef) %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0.0, undef) + + %V1_vp = call fast half @llvm.vp.reduce.fadd.v1f16(half 0.0, <1 x half> undef, <1 x i1> undef, i32 undef) + %V2_vp = call fast half @llvm.vp.reduce.fadd.v2f16(half 0.0, <2 x half> undef, <2 x i1> undef, i32 undef) + %V4_vp = call fast half @llvm.vp.reduce.fadd.v4f16(half 0.0, <4 x half> undef, <4 x i1> undef, i32 undef) + %V8_vp = call fast half @llvm.vp.reduce.fadd.v8f16(half 0.0, <8 x half> undef, <8 x i1> undef, i32 undef) + %V16_vp = call fast half @llvm.vp.reduce.fadd.v16f16(half 0.0, <16 x half> undef, <16 x i1> undef, i32 undef) + %v32_vp = call fast half @llvm.vp.reduce.fadd.v32f16(half 0.0, <32 x half> undef, <32 x i1> undef, i32 undef) + %V64_vp = call fast half @llvm.vp.reduce.fadd.v64f16(half 0.0, <64 x half> undef, <64 x i1> undef, i32 undef) + %V128_vp = call fast half @llvm.vp.reduce.fadd.v128f16(half 0.0, <128 x half> undef, <128 x i1> undef, i32 undef) + %NXV1_vp = call fast half @llvm.vp.reduce.fadd.nxv1f16(half 0.0, undef, undef, i32 undef) + %NXV2_vp = call fast half @llvm.vp.reduce.fadd.nxv2f16(half 0.0, undef, undef, i32 undef) + %NXV4_vp = call fast half @llvm.vp.reduce.fadd.nxv4f16(half 0.0, undef, undef, i32 undef) + %NXV8_vp = call fast half @llvm.vp.reduce.fadd.nxv8f16(half 0.0, undef, undef, i32 undef) + %NXV16_vp = call fast half @llvm.vp.reduce.fadd.nxv16f16(half 0.0, undef, undef, i32 undef) + %NXV32_vp = call fast half @llvm.vp.reduce.fadd.nxv32f16(half 0.0, undef, undef, i32 undef) ret void } @@ -139,6 +239,20 @@ define void @reduce_fadd_float() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV8 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %NXV16 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1_vp = call fast float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2_vp = call fast float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4_vp = call fast float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV8_vp = call fast float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %NXV16_vp = call fast float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %NXV32_vp = call fast float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, undef, undef, i32 undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fadd_float' @@ -155,6 +269,20 @@ define void @reduce_fadd_float() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1_vp = call fast float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2_vp = call fast float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4_vp = call fast float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8_vp = call fast float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16_vp = call fast float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV32_vp = call fast float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, undef, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef) @@ -170,6 +298,21 @@ define void @reduce_fadd_float() { %NXV4 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, undef) %NXV8 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.0, undef) %NXV16 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.0, undef) + + %V1_vp = call fast float @llvm.vp.reduce.fadd.v1f32(float 0.0, <1 x float> undef, <1 x i1> undef, i32 undef) + %V2_vp = call fast float @llvm.vp.reduce.fadd.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef) + %V4_vp = call fast float @llvm.vp.reduce.fadd.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef) + %V8_vp = call fast float @llvm.vp.reduce.fadd.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef) + %V16_vp = call fast float @llvm.vp.reduce.fadd.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef) + %v32_vp = call fast float @llvm.vp.reduce.fadd.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef) + %V64_vp = call fast float @llvm.vp.reduce.fadd.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef) + %V128_vp = call fast float @llvm.vp.reduce.fadd.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef) + %NXV1_vp = call fast float @llvm.vp.reduce.fadd.nxv1f32(float 0.0, undef, undef, i32 undef) + %NXV2_vp = call fast float @llvm.vp.reduce.fadd.nxv2f32(float 0.0, undef, undef, i32 undef) + %NXV4_vp = call fast float @llvm.vp.reduce.fadd.nxv4f32(float 0.0, undef, undef, i32 undef) + %NXV8_vp = call fast float @llvm.vp.reduce.fadd.nxv8f32(float 0.0, undef, undef, i32 undef) + %NXV16_vp = call fast float @llvm.vp.reduce.fadd.nxv16f32(float 0.0, undef, undef, i32 undef) + %NXV32_vp = call fast float @llvm.vp.reduce.fadd.nxv32f32(float 0.0, undef, undef, i32 undef) ret void } @@ -187,6 +330,20 @@ define void @reduce_fadd_double() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV8 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1_vp = call fast double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2_vp = call fast double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4_vp = call fast double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV8_vp = call fast double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %NXV16_vp = call fast double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %NXV32_vp = call fast double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, undef, undef, i32 undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fadd_double' @@ -202,6 +359,20 @@ define void @reduce_fadd_double() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1_vp = call fast double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2_vp = call fast double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4_vp = call fast double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8_vp = call fast double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16_vp = call fast double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV32_vp = call fast double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, undef, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef) @@ -216,6 +387,21 @@ define void @reduce_fadd_double() { %NXV2 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.0, undef) %NXV4 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.0, undef) %NXV8 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.0, undef) + + %V1_vp = call fast double @llvm.vp.reduce.fadd.v1f64(double 0.0, <1 x double> undef, <1 x i1> undef, i32 undef) + %V2_vp = call fast double @llvm.vp.reduce.fadd.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef) + %V4_vp = call fast double @llvm.vp.reduce.fadd.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef) + %V8_vp = call fast double @llvm.vp.reduce.fadd.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef) + %V16_vp = call fast double @llvm.vp.reduce.fadd.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef) + %v32_vp = call fast double @llvm.vp.reduce.fadd.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef) + %V64_vp = call fast double @llvm.vp.reduce.fadd.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef) + %V128_vp = call fast double @llvm.vp.reduce.fadd.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef) + %NXV1_vp = call fast double @llvm.vp.reduce.fadd.nxv1f64(double 0.0, undef, undef, i32 undef) + %NXV2_vp = call fast double @llvm.vp.reduce.fadd.nxv2f64(double 0.0, undef, undef, i32 undef) + %NXV4_vp = call fast double @llvm.vp.reduce.fadd.nxv4f64(double 0.0, undef, undef, i32 undef) + %NXV8_vp = call fast double @llvm.vp.reduce.fadd.nxv8f64(double 0.0, undef, undef, i32 undef) + %NXV16_vp = call fast double @llvm.vp.reduce.fadd.nxv16f64(double 0.0, undef, undef, i32 undef) + %NXV32_vp = call fast double @llvm.vp.reduce.fadd.nxv32f64(double 0.0, undef, undef, i32 undef) ret void } @@ -235,6 +421,20 @@ define void @reduce_ordered_fadd_bfloat() { ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, undef) ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, undef) ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 255 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 510 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat 0xR0000, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat 0xR0000, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat 0xR0000, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call bfloat @llvm.vp.reduce.fadd.nxv8bf16(bfloat 0xR0000, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call bfloat @llvm.vp.reduce.fadd.nxv16bf16(bfloat 0xR0000, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call bfloat @llvm.vp.reduce.fadd.nxv32bf16(bfloat 0xR0000, undef, undef, i32 undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_ordered_fadd_bfloat' @@ -252,6 +452,20 @@ define void @reduce_ordered_fadd_bfloat() { ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, undef) ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, undef) ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 382 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat 0xR0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat 0xR0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat 0xR0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call bfloat @llvm.vp.reduce.fadd.nxv8bf16(bfloat 0xR0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call bfloat @llvm.vp.reduce.fadd.nxv16bf16(bfloat 0xR0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call bfloat @llvm.vp.reduce.fadd.nxv32bf16(bfloat 0xR0000, undef, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef) @@ -268,10 +482,26 @@ define void @reduce_ordered_fadd_bfloat() { %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0.0, undef) %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0.0, undef) %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0.0, undef) + + %V1_vp = call bfloat @llvm.vp.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef, <1 x i1> undef, i32 undef) + %V2_vp = call bfloat @llvm.vp.reduce.fadd.v2bf16(bfloat 0.0, <2 x bfloat> undef, <2 x i1> undef, i32 undef) + %V4_vp = call bfloat @llvm.vp.reduce.fadd.v4bf16(bfloat 0.0, <4 x bfloat> undef, <4 x i1> undef, i32 undef) + %V8_vp = call bfloat @llvm.vp.reduce.fadd.v8bf16(bfloat 0.0, <8 x bfloat> undef, <8 x i1> undef, i32 undef) + %V16_vp = call bfloat @llvm.vp.reduce.fadd.v16bf16(bfloat 0.0, <16 x bfloat> undef, <16 x i1> undef, i32 undef) + %v32_vp = call bfloat @llvm.vp.reduce.fadd.v32bf16(bfloat 0.0, <32 x bfloat> undef, <32 x i1> undef, i32 undef) + %V64_vp = call bfloat @llvm.vp.reduce.fadd.v64bf16(bfloat 0.0, <64 x bfloat> undef, <64 x i1> undef, i32 undef) + %V128_vp = call bfloat @llvm.vp.reduce.fadd.v128bf16(bfloat 0.0, <128 x bfloat> undef, <128 x i1> undef, i32 undef) + %NXV1_vp = call bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat 0.0, undef, undef, i32 undef) + %NXV2_vp = call bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat 0.0, undef, undef, i32 undef) + %NXV4_vp = call bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat 0.0, undef, undef, i32 undef) + %NXV8_vp = call bfloat @llvm.vp.reduce.fadd.nxv8bf16(bfloat 0.0, undef, undef, i32 undef) + %NXV16_vp = call bfloat @llvm.vp.reduce.fadd.nxv16bf16(bfloat 0.0, undef, undef, i32 undef) + %NXV32_vp = call bfloat @llvm.vp.reduce.fadd.nxv32bf16(bfloat 0.0, undef, undef, i32 undef) ret void } define void @reduce_ordered_fadd_half() { +; ; FP-REDUCE-ZVFH-LABEL: 'reduce_ordered_fadd_half' ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef) ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) @@ -287,6 +517,20 @@ define void @reduce_ordered_fadd_half() { ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, undef) ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, undef) ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2_vp = call half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4_vp = call half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8_vp = call half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16_vp = call half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v32_vp = call half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64_vp = call half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V128_vp = call half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV2_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8_vp = call half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %NXV16_vp = call half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %NXV32_vp = call half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, undef, undef, i32 undef) ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_ordered_fadd_half' @@ -304,6 +548,20 @@ define void @reduce_ordered_fadd_half() { ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, undef) ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, undef) ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8_vp = call half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16_vp = call half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %v32_vp = call half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 255 for instruction: %V64_vp = call half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 510 for instruction: %V128_vp = call half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, undef, undef, i32 undef) ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_ordered_fadd_half' @@ -321,6 +579,20 @@ define void @reduce_ordered_fadd_half() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call half @llvm.vp.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call half @llvm.vp.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call half @llvm.vp.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call half @llvm.vp.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call half @llvm.vp.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32_vp = call half @llvm.vp.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call half @llvm.vp.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call half @llvm.vp.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half 0xH0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half 0xH0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half 0xH0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8_vp = call half @llvm.vp.reduce.fadd.nxv8f16(half 0xH0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16_vp = call half @llvm.vp.reduce.fadd.nxv16f16(half 0xH0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV32_vp = call half @llvm.vp.reduce.fadd.nxv32f16(half 0xH0000, undef, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef) @@ -337,6 +609,21 @@ define void @reduce_ordered_fadd_half() { %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0.0, undef) %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0.0, undef) %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0.0, undef) + + %V1_vp = call half @llvm.vp.reduce.fadd.v1f16(half 0.0, <1 x half> undef, <1 x i1> undef, i32 undef) + %V2_vp = call half @llvm.vp.reduce.fadd.v2f16(half 0.0, <2 x half> undef, <2 x i1> undef, i32 undef) + %V4_vp = call half @llvm.vp.reduce.fadd.v4f16(half 0.0, <4 x half> undef, <4 x i1> undef, i32 undef) + %V8_vp = call half @llvm.vp.reduce.fadd.v8f16(half 0.0, <8 x half> undef, <8 x i1> undef, i32 undef) + %V16_vp = call half @llvm.vp.reduce.fadd.v16f16(half 0.0, <16 x half> undef, <16 x i1> undef, i32 undef) + %v32_vp = call half @llvm.vp.reduce.fadd.v32f16(half 0.0, <32 x half> undef, <32 x i1> undef, i32 undef) + %V64_vp = call half @llvm.vp.reduce.fadd.v64f16(half 0.0, <64 x half> undef, <64 x i1> undef, i32 undef) + %V128_vp = call half @llvm.vp.reduce.fadd.v128f16(half 0.0, <128 x half> undef, <128 x i1> undef, i32 undef) + %NXV1_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half 0.0, undef, undef, i32 undef) + %NXV2_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half 0.0, undef, undef, i32 undef) + %NXV4_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half 0.0, undef, undef, i32 undef) + %NXV8_vp = call half @llvm.vp.reduce.fadd.nxv8f16(half 0.0, undef, undef, i32 undef) + %NXV16_vp = call half @llvm.vp.reduce.fadd.nxv16f16(half 0.0, undef, undef, i32 undef) + %NXV32_vp = call half @llvm.vp.reduce.fadd.nxv32f16(half 0.0, undef, undef, i32 undef) ret void } @@ -355,6 +642,20 @@ define void @reduce_ordered_fadd_float() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %NXV16 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2_vp = call float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4_vp = call float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8_vp = call float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16_vp = call float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v32_vp = call float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64_vp = call float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V128_vp = call float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV2_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8_vp = call float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %NXV16_vp = call float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %NXV32_vp = call float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, undef, undef, i32 undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_ordered_fadd_float' @@ -371,6 +672,20 @@ define void @reduce_ordered_fadd_float() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call float @llvm.vp.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call float @llvm.vp.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call float @llvm.vp.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call float @llvm.vp.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call float @llvm.vp.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32_vp = call float @llvm.vp.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call float @llvm.vp.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call float @llvm.vp.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8_vp = call float @llvm.vp.reduce.fadd.nxv8f32(float 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16_vp = call float @llvm.vp.reduce.fadd.nxv16f32(float 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV32_vp = call float @llvm.vp.reduce.fadd.nxv32f32(float 0.000000e+00, undef, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef) @@ -386,6 +701,21 @@ define void @reduce_ordered_fadd_float() { %NXV4 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, undef) %NXV8 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.0, undef) %NXV16 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.0, undef) + + %V1_vp = call float @llvm.vp.reduce.fadd.v1f32(float 0.0, <1 x float> undef, <1 x i1> undef, i32 undef) + %V2_vp = call float @llvm.vp.reduce.fadd.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef) + %V4_vp = call float @llvm.vp.reduce.fadd.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef) + %V8_vp = call float @llvm.vp.reduce.fadd.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef) + %V16_vp = call float @llvm.vp.reduce.fadd.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef) + %v32_vp = call float @llvm.vp.reduce.fadd.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef) + %V64_vp = call float @llvm.vp.reduce.fadd.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef) + %V128_vp = call float @llvm.vp.reduce.fadd.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef) + %NXV1_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float 0.0, undef, undef, i32 undef) + %NXV2_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float 0.0, undef, undef, i32 undef) + %NXV4_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float 0.0, undef, undef, i32 undef) + %NXV8_vp = call float @llvm.vp.reduce.fadd.nxv8f32(float 0.0, undef, undef, i32 undef) + %NXV16_vp = call float @llvm.vp.reduce.fadd.nxv16f32(float 0.0, undef, undef, i32 undef) + %NXV32_vp = call float @llvm.vp.reduce.fadd.nxv32f32(float 0.0, undef, undef, i32 undef) ret void } @@ -403,6 +733,20 @@ define void @reduce_ordered_fadd_double() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV2 = call double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2_vp = call double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4_vp = call double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8_vp = call double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16_vp = call double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v32_vp = call double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64_vp = call double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V128_vp = call double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV2_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8_vp = call double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %NXV16_vp = call double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %NXV32_vp = call double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, undef, undef, i32 undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_ordered_fadd_double' @@ -418,6 +762,20 @@ define void @reduce_ordered_fadd_double() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call double @llvm.vp.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call double @llvm.vp.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call double @llvm.vp.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call double @llvm.vp.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call double @llvm.vp.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32_vp = call double @llvm.vp.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call double @llvm.vp.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call double @llvm.vp.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8_vp = call double @llvm.vp.reduce.fadd.nxv8f64(double 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16_vp = call double @llvm.vp.reduce.fadd.nxv16f64(double 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV32_vp = call double @llvm.vp.reduce.fadd.nxv32f64(double 0.000000e+00, undef, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef) @@ -432,5 +790,20 @@ define void @reduce_ordered_fadd_double() { %NXV2 = call double @llvm.vector.reduce.fadd.nxv2f64(double 0.0, undef) %NXV4 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.0, undef) %NXV8 = call double @llvm.vector.reduce.fadd.nxv8f64(double 0.0, undef) + + %V1_vp = call double @llvm.vp.reduce.fadd.v1f64(double 0.0, <1 x double> undef, <1 x i1> undef, i32 undef) + %V2_vp = call double @llvm.vp.reduce.fadd.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef) + %V4_vp = call double @llvm.vp.reduce.fadd.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef) + %V8_vp = call double @llvm.vp.reduce.fadd.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef) + %V16_vp = call double @llvm.vp.reduce.fadd.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef) + %v32_vp = call double @llvm.vp.reduce.fadd.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef) + %V64_vp = call double @llvm.vp.reduce.fadd.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef) + %V128_vp = call double @llvm.vp.reduce.fadd.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef) + %NXV1_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double 0.0, undef, undef, i32 undef) + %NXV2_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double 0.0, undef, undef, i32 undef) + %NXV4_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double 0.0, undef, undef, i32 undef) + %NXV8_vp = call double @llvm.vp.reduce.fadd.nxv8f64(double 0.0, undef, undef, i32 undef) + %NXV16_vp = call double @llvm.vp.reduce.fadd.nxv16f64(double 0.0, undef, undef, i32 undef) + %NXV32_vp = call double @llvm.vp.reduce.fadd.nxv32f64(double 0.0, undef, undef, i32 undef) ret void } diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll index b14c60012077d..6b8f18a2d2df1 100644 --- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmaximum.ll @@ -20,6 +20,34 @@ define float @reduce_fmaximum_f32(float %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %5 = call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %6 = call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4_vp = call float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8_vp = call float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16_vp = call float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32_vp = call float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64_vp = call float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128_vp = call float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call fast float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = call fast float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = call fast float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %11 = call fast float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %12 = call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %13 = call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %14 = call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef ; ; SIZE-LABEL: 'reduce_fmaximum_f32' @@ -37,6 +65,34 @@ define float @reduce_fmaximum_f32(float %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4_vp = call float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8_vp = call float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16_vp = call float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64_vp = call float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V128_vp = call float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call fast float @llvm.vp.reduce.fmaximum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call fast float @llvm.vp.reduce.fmaximum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call fast float @llvm.vp.reduce.fmaximum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call fast float @llvm.vp.reduce.fmaximum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = call float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %16 = call float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %17 = call float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = call float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %19 = call float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = call float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %21 = call float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %22 = call fast float @llvm.vp.reduce.fmax.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %23 = call fast float @llvm.vp.reduce.fmax.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %24 = call fast float @llvm.vp.reduce.fmax.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %25 = call fast float @llvm.vp.reduce.fmax.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %26 = call fast float @llvm.vp.reduce.fmax.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %27 = call fast float @llvm.vp.reduce.fmax.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %28 = call fast float @llvm.vp.reduce.fmax.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef ; %V2 = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> undef) @@ -53,6 +109,36 @@ call fast float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> undef) call fast float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> undef) call fast float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> undef) call fast float @llvm.vector.reduce.fmaximum.v128f32(<128 x float> undef) + +%V2_vp = call float @llvm.vp.reduce.fmaximum.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef) +%V4_vp = call float @llvm.vp.reduce.fmaximum.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef) +%V8_vp = call float @llvm.vp.reduce.fmaximum.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef) +%V16_vp = call float @llvm.vp.reduce.fmaximum.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef) +%V32_vp = call float @llvm.vp.reduce.fmaximum.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef) +%V64_vp = call float @llvm.vp.reduce.fmaximum.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef) +%V128_vp = call float @llvm.vp.reduce.fmaximum.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef) +call fast float @llvm.vp.reduce.fmaximum.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef) +call fast float @llvm.vp.reduce.fmaximum.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef) +call fast float @llvm.vp.reduce.fmaximum.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef) +call fast float @llvm.vp.reduce.fmaximum.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef) +call fast float @llvm.vp.reduce.fmaximum.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef) +call fast float @llvm.vp.reduce.fmaximum.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef) +call fast float @llvm.vp.reduce.fmaximum.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef) + +call float @llvm.vp.reduce.fmax.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef) +call float @llvm.vp.reduce.fmax.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef) +call float @llvm.vp.reduce.fmax.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef) +call float @llvm.vp.reduce.fmax.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef) +call float @llvm.vp.reduce.fmax.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef) +call float @llvm.vp.reduce.fmax.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef) +call float @llvm.vp.reduce.fmax.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef) +call fast float @llvm.vp.reduce.fmax.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef) +call fast float @llvm.vp.reduce.fmax.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef) +call fast float @llvm.vp.reduce.fmax.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef) +call fast float @llvm.vp.reduce.fmax.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef) +call fast float @llvm.vp.reduce.fmax.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef) +call fast float @llvm.vp.reduce.fmax.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef) +call fast float @llvm.vp.reduce.fmax.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef) ret float undef } declare float @llvm.vector.reduce.fmaximum.v2f32(<2 x float>) @@ -77,6 +163,34 @@ define double @reduce_fmaximum_f64(double %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %4 = call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %5 = call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %7 = call double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %8 = call double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %9 = call double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %10 = call double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %11 = call double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %12 = call double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %13 = call double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call fast double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = call fast double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %16 = call fast double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %17 = call fast double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %18 = call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %19 = call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %20 = call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef ; ; SIZE-LABEL: 'reduce_fmaximum_f64' @@ -92,6 +206,34 @@ define double @reduce_fmaximum_f64(double %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call fast double @llvm.vp.reduce.fmaximum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call fast double @llvm.vp.reduce.fmaximum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %16 = call fast double @llvm.vp.reduce.fmaximum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = call fast double @llvm.vp.reduce.fmaximum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %21 = call double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %22 = call double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %23 = call double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %24 = call double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %25 = call double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %26 = call double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %27 = call double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = call fast double @llvm.vp.reduce.fmax.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %29 = call fast double @llvm.vp.reduce.fmax.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %30 = call fast double @llvm.vp.reduce.fmax.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %31 = call fast double @llvm.vp.reduce.fmax.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = call fast double @llvm.vp.reduce.fmax.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %33 = call fast double @llvm.vp.reduce.fmax.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %34 = call fast double @llvm.vp.reduce.fmax.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double undef ; %V2 = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> undef) @@ -106,6 +248,36 @@ call fast double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> undef) call fast double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> undef) call fast double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> undef) call fast double @llvm.vector.reduce.fmaximum.v64f64(<64 x double> undef) + +call double @llvm.vp.reduce.fmaximum.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef) +call double @llvm.vp.reduce.fmaximum.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef) +call double @llvm.vp.reduce.fmaximum.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef) +call double @llvm.vp.reduce.fmaximum.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef) +call double @llvm.vp.reduce.fmaximum.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef) +call double @llvm.vp.reduce.fmaximum.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef) +call double @llvm.vp.reduce.fmaximum.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef) +call fast double @llvm.vp.reduce.fmaximum.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef) +call fast double @llvm.vp.reduce.fmaximum.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef) +call fast double @llvm.vp.reduce.fmaximum.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef) +call fast double @llvm.vp.reduce.fmaximum.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef) +call fast double @llvm.vp.reduce.fmaximum.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef) +call fast double @llvm.vp.reduce.fmaximum.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef) +call fast double @llvm.vp.reduce.fmaximum.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef) + +call double @llvm.vp.reduce.fmax.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef) +call double @llvm.vp.reduce.fmax.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef) +call double @llvm.vp.reduce.fmax.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef) +call double @llvm.vp.reduce.fmax.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef) +call double @llvm.vp.reduce.fmax.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef) +call double @llvm.vp.reduce.fmax.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef) +call double @llvm.vp.reduce.fmax.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef) +call fast double @llvm.vp.reduce.fmax.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef) +call fast double @llvm.vp.reduce.fmax.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef) +call fast double @llvm.vp.reduce.fmax.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef) +call fast double @llvm.vp.reduce.fmax.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef) +call fast double @llvm.vp.reduce.fmax.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef) +call fast double @llvm.vp.reduce.fmax.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef) +call fast double @llvm.vp.reduce.fmax.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef) ret double undef } declare double @llvm.vector.reduce.fmaximum.v2f64(<2 x double>) diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll index 2172a85bc46aa..763490f86b27a 100644 --- a/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fminimum.ll @@ -13,6 +13,20 @@ define float @reduce_fmaximum_f32(float %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call float @llvm.vp.reduce.fminimum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4_vp = call float @llvm.vp.reduce.fminimum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8_vp = call float @llvm.vp.reduce.fminimum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16_vp = call float @llvm.vp.reduce.fminimum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef ; ; SIZE-LABEL: 'reduce_fmaximum_f32' @@ -23,6 +37,20 @@ define float @reduce_fmaximum_f32(float %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call float @llvm.vp.reduce.fminimum.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4_vp = call float @llvm.vp.reduce.fminimum.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8_vp = call float @llvm.vp.reduce.fminimum.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16_vp = call float @llvm.vp.reduce.fminimum.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = call float @llvm.vp.reduce.fmin.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = call float @llvm.vp.reduce.fmin.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = call float @llvm.vp.reduce.fmin.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = call float @llvm.vp.reduce.fmin.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %5 = call float @llvm.vp.reduce.fmin.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = call float @llvm.vp.reduce.fmin.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %7 = call float @llvm.vp.reduce.fmin.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef ; %V2 = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> undef) @@ -32,6 +60,22 @@ define float @reduce_fmaximum_f32(float %arg) { %V32 = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> undef) %V64 = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> undef) %V128 = call float @llvm.vector.reduce.fminimum.v128f32(<128 x float> undef) + +%V2_vp = call float @llvm.vp.reduce.fminimum.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef) +%V4_vp = call float @llvm.vp.reduce.fminimum.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef) +%V8_vp = call float @llvm.vp.reduce.fminimum.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef) +%V16_vp = call float @llvm.vp.reduce.fminimum.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef) +%V32_vp = call float @llvm.vp.reduce.fminimum.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef) +%V64_vp = call float @llvm.vp.reduce.fminimum.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef) +%V128_vp = call float @llvm.vp.reduce.fminimum.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef) + +call float @llvm.vp.reduce.fmin.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef) +call float @llvm.vp.reduce.fmin.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef) +call float @llvm.vp.reduce.fmin.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef) +call float @llvm.vp.reduce.fmin.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef) +call float @llvm.vp.reduce.fmin.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef) +call float @llvm.vp.reduce.fmin.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef) +call float @llvm.vp.reduce.fmin.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef) ret float undef } declare float @llvm.vector.reduce.fminimum.v2f32(<2 x float>) @@ -50,6 +94,20 @@ define double @reduce_fmaximum_f64(double %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call double @llvm.vp.reduce.fminimum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4_vp = call double @llvm.vp.reduce.fminimum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8_vp = call double @llvm.vp.reduce.fminimum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16_vp = call double @llvm.vp.reduce.fminimum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double undef ; ; SIZE-LABEL: 'reduce_fmaximum_f64' @@ -59,6 +117,20 @@ define double @reduce_fmaximum_f64(double %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call double @llvm.vp.reduce.fminimum.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4_vp = call double @llvm.vp.reduce.fminimum.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8_vp = call double @llvm.vp.reduce.fminimum.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16_vp = call double @llvm.vp.reduce.fminimum.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = call double @llvm.vp.reduce.fmin.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = call double @llvm.vp.reduce.fmin.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = call double @llvm.vp.reduce.fmin.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = call double @llvm.vp.reduce.fmin.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = call double @llvm.vp.reduce.fmin.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %6 = call double @llvm.vp.reduce.fmin.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %7 = call double @llvm.vp.reduce.fmin.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double undef ; %V2 = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> undef) @@ -67,6 +139,22 @@ define double @reduce_fmaximum_f64(double %arg) { %V16 = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> undef) %V32 = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> undef) %V64 = call double @llvm.vector.reduce.fminimum.v64f64(<64 x double> undef) + +%V2_vp = call double @llvm.vp.reduce.fminimum.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef) +%V4_vp = call double @llvm.vp.reduce.fminimum.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef) +%V8_vp = call double @llvm.vp.reduce.fminimum.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef) +%V16_vp = call double @llvm.vp.reduce.fminimum.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef) +%V32_vp = call double @llvm.vp.reduce.fminimum.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef) +%V64_vp = call double @llvm.vp.reduce.fminimum.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef) +%V128_vp = call double @llvm.vp.reduce.fminimum.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef) + +call double @llvm.vp.reduce.fmin.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef) +call double @llvm.vp.reduce.fmin.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef) +call double @llvm.vp.reduce.fmin.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef) +call double @llvm.vp.reduce.fmin.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef) +call double @llvm.vp.reduce.fmin.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef) +call double @llvm.vp.reduce.fmin.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef) +call double @llvm.vp.reduce.fmin.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef) ret double undef } declare double @llvm.vector.reduce.fminimum.v2f64(<2 x double>) diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll index 211bcb1343eea..daf80d0a6d733 100644 --- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll @@ -19,6 +19,20 @@ define void @reduce_fmul_bfloat() { ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, undef) ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, undef) ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 541 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 573 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv1bf16(bfloat 0xR0000, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv2bf16(bfloat 0xR0000, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv4bf16(bfloat 0xR0000, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv8bf16(bfloat 0xR0000, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv16bf16(bfloat 0xR0000, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv32bf16(bfloat 0xR0000, undef, undef, i32 undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fmul_bfloat' @@ -36,6 +50,20 @@ define void @reduce_fmul_bfloat() { ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, undef) ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, undef) ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call fast bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call fast bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8_vp = call fast bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16_vp = call fast bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v32_vp = call fast bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64_vp = call fast bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V128_vp = call fast bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv1bf16(bfloat 0xR0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv2bf16(bfloat 0xR0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv4bf16(bfloat 0xR0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv8bf16(bfloat 0xR0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv16bf16(bfloat 0xR0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv32bf16(bfloat 0xR0000, undef, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call fast bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef) @@ -52,6 +80,21 @@ define void @reduce_fmul_bfloat() { %NXV8 = call fast bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0.0, undef) %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0.0, undef) %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0.0, undef) + + %V1_vp = call fast bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef, <1 x i1> undef, i32 undef) + %V2_vp = call fast bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0.0, <2 x bfloat> undef, <2 x i1> undef, i32 undef) + %V4_vp = call fast bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0.0, <4 x bfloat> undef, <4 x i1> undef, i32 undef) + %V8_vp = call fast bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0.0, <8 x bfloat> undef, <8 x i1> undef, i32 undef) + %V16_vp = call fast bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0.0, <16 x bfloat> undef, <16 x i1> undef, i32 undef) + %v32_vp = call fast bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0.0, <32 x bfloat> undef, <32 x i1> undef, i32 undef) + %V64_vp = call fast bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0.0, <64 x bfloat> undef, <64 x i1> undef, i32 undef) + %V128_vp = call fast bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0.0, <128 x bfloat> undef, <128 x i1> undef, i32 undef) + %NXV1_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv1bf16(bfloat 0.0, undef, undef, i32 undef) + %NXV2_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv2bf16(bfloat 0.0, undef, undef, i32 undef) + %NXV4_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv4bf16(bfloat 0.0, undef, undef, i32 undef) + %NXV8_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv8bf16(bfloat 0.0, undef, undef, i32 undef) + %NXV16_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv16bf16(bfloat 0.0, undef, undef, i32 undef) + %NXV32_vp = call fast bfloat @llvm.vp.reduce.fmul.nxv32bf16(bfloat 0.0, undef, undef, i32 undef) ret void } @@ -71,6 +114,20 @@ define void @reduce_fmul_half() { ; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, undef) ; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, undef) ; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 151 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 541 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 573 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fmul.nxv1f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fmul.nxv2f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fmul.nxv4f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast half @llvm.vp.reduce.fmul.nxv8f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast half @llvm.vp.reduce.fmul.nxv16f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast half @llvm.vp.reduce.fmul.nxv32f16(half 0xH0000, undef, undef, i32 undef) ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_fmul_half' @@ -88,6 +145,20 @@ define void @reduce_fmul_half() { ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, undef) ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, undef) ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 541 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 573 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fmul.nxv1f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fmul.nxv2f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fmul.nxv4f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast half @llvm.vp.reduce.fmul.nxv8f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast half @llvm.vp.reduce.fmul.nxv16f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast half @llvm.vp.reduce.fmul.nxv32f16(half 0xH0000, undef, undef, i32 undef) ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fmul_half' @@ -105,6 +176,20 @@ define void @reduce_fmul_half() { ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, undef) ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, undef) ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast half @llvm.vp.reduce.fmul.nxv1f16(half 0xH0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast half @llvm.vp.reduce.fmul.nxv2f16(half 0xH0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast half @llvm.vp.reduce.fmul.nxv4f16(half 0xH0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast half @llvm.vp.reduce.fmul.nxv8f16(half 0xH0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast half @llvm.vp.reduce.fmul.nxv16f16(half 0xH0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast half @llvm.vp.reduce.fmul.nxv32f16(half 0xH0000, undef, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call fast half @llvm.vector.reduce.fmul.v1f16(half 0.0, <1 x half> undef) @@ -121,6 +206,21 @@ define void @reduce_fmul_half() { %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0.0, undef) %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0.0, undef) %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0.0, undef) + + %V1_vp = call fast half @llvm.vp.reduce.fmul.v1f16(half 0.0, <1 x half> undef, <1 x i1> undef, i32 undef) + %V2_vp = call fast half @llvm.vp.reduce.fmul.v2f16(half 0.0, <2 x half> undef, <2 x i1> undef, i32 undef) + %V4_vp = call fast half @llvm.vp.reduce.fmul.v4f16(half 0.0, <4 x half> undef, <4 x i1> undef, i32 undef) + %V8_vp = call fast half @llvm.vp.reduce.fmul.v8f16(half 0.0, <8 x half> undef, <8 x i1> undef, i32 undef) + %V16_vp = call fast half @llvm.vp.reduce.fmul.v16f16(half 0.0, <16 x half> undef, <16 x i1> undef, i32 undef) + %v32_vp = call fast half @llvm.vp.reduce.fmul.v32f16(half 0.0, <32 x half> undef, <32 x i1> undef, i32 undef) + %V64_vp = call fast half @llvm.vp.reduce.fmul.v64f16(half 0.0, <64 x half> undef, <64 x i1> undef, i32 undef) + %V128_vp = call fast half @llvm.vp.reduce.fmul.v128f16(half 0.0, <128 x half> undef, <128 x i1> undef, i32 undef) + %NXV1_vp = call fast half @llvm.vp.reduce.fmul.nxv1f16(half 0.0, undef, undef, i32 undef) + %NXV2_vp = call fast half @llvm.vp.reduce.fmul.nxv2f16(half 0.0, undef, undef, i32 undef) + %NXV4_vp = call fast half @llvm.vp.reduce.fmul.nxv4f16(half 0.0, undef, undef, i32 undef) + %NXV8_vp = call fast half @llvm.vp.reduce.fmul.nxv8f16(half 0.0, undef, undef, i32 undef) + %NXV16_vp = call fast half @llvm.vp.reduce.fmul.nxv16f16(half 0.0, undef, undef, i32 undef) + %NXV32_vp = call fast half @llvm.vp.reduce.fmul.nxv32f16(half 0.0, undef, undef, i32 undef) ret void } @@ -139,6 +239,20 @@ define void @reduce_fmul_float() { ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 121 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 451 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 483 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 547 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast float @llvm.vp.reduce.fmul.nxv1f32(float 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast float @llvm.vp.reduce.fmul.nxv2f32(float 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast float @llvm.vp.reduce.fmul.nxv4f32(float 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast float @llvm.vp.reduce.fmul.nxv8f32(float 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast float @llvm.vp.reduce.fmul.nxv16f32(float 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast float @llvm.vp.reduce.fmul.nxv32f32(float 0.000000e+00, undef, undef, i32 undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fmul_float' @@ -155,6 +269,20 @@ define void @reduce_fmul_float() { ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call fast float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call fast float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8_vp = call fast float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16_vp = call fast float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v32_vp = call fast float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64_vp = call fast float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V128_vp = call fast float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast float @llvm.vp.reduce.fmul.nxv1f32(float 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast float @llvm.vp.reduce.fmul.nxv2f32(float 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast float @llvm.vp.reduce.fmul.nxv4f32(float 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast float @llvm.vp.reduce.fmul.nxv8f32(float 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast float @llvm.vp.reduce.fmul.nxv16f32(float 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast float @llvm.vp.reduce.fmul.nxv32f32(float 0.000000e+00, undef, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call fast float @llvm.vector.reduce.fmul.v1f32(float 0.0, <1 x float> undef) @@ -170,6 +298,21 @@ define void @reduce_fmul_float() { %NXV4 = call fast float @llvm.vector.reduce.fmul.nxv4f32(float 0.0, undef) %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.0, undef) %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.0, undef) + + %V1_vp = call fast float @llvm.vp.reduce.fmul.v1f32(float 0.0, <1 x float> undef, <1 x i1> undef, i32 undef) + %V2_vp = call fast float @llvm.vp.reduce.fmul.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef) + %V4_vp = call fast float @llvm.vp.reduce.fmul.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef) + %V8_vp = call fast float @llvm.vp.reduce.fmul.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef) + %V16_vp = call fast float @llvm.vp.reduce.fmul.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef) + %v32_vp = call fast float @llvm.vp.reduce.fmul.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef) + %V64_vp = call fast float @llvm.vp.reduce.fmul.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef) + %V128_vp = call fast float @llvm.vp.reduce.fmul.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef) + %NXV1_vp = call fast float @llvm.vp.reduce.fmul.nxv1f32(float 0.0, undef, undef, i32 undef) + %NXV2_vp = call fast float @llvm.vp.reduce.fmul.nxv2f32(float 0.0, undef, undef, i32 undef) + %NXV4_vp = call fast float @llvm.vp.reduce.fmul.nxv4f32(float 0.0, undef, undef, i32 undef) + %NXV8_vp = call fast float @llvm.vp.reduce.fmul.nxv8f32(float 0.0, undef, undef, i32 undef) + %NXV16_vp = call fast float @llvm.vp.reduce.fmul.nxv16f32(float 0.0, undef, undef, i32 undef) + %NXV32_vp = call fast float @llvm.vp.reduce.fmul.nxv32f32(float 0.0, undef, undef, i32 undef) ret void } @@ -187,6 +330,20 @@ define void @reduce_fmul_double() { ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 361 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 393 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 457 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 585 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast double @llvm.vp.reduce.fmul.nxv1f64(double 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast double @llvm.vp.reduce.fmul.nxv2f64(double 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast double @llvm.vp.reduce.fmul.nxv4f64(double 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast double @llvm.vp.reduce.fmul.nxv8f64(double 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast double @llvm.vp.reduce.fmul.nxv16f64(double 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast double @llvm.vp.reduce.fmul.nxv32f64(double 0.000000e+00, undef, undef, i32 undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fmul_double' @@ -202,6 +359,20 @@ define void @reduce_fmul_double() { ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1_vp = call fast double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2_vp = call fast double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call fast double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8_vp = call fast double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16_vp = call fast double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v32_vp = call fast double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64_vp = call fast double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128_vp = call fast double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call fast double @llvm.vp.reduce.fmul.nxv1f64(double 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call fast double @llvm.vp.reduce.fmul.nxv2f64(double 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call fast double @llvm.vp.reduce.fmul.nxv4f64(double 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call fast double @llvm.vp.reduce.fmul.nxv8f64(double 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call fast double @llvm.vp.reduce.fmul.nxv16f64(double 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call fast double @llvm.vp.reduce.fmul.nxv32f64(double 0.000000e+00, undef, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call fast double @llvm.vector.reduce.fmul.v1f64(double 0.0, <1 x double> undef) @@ -216,6 +387,21 @@ define void @reduce_fmul_double() { %NXV2 = call fast double @llvm.vector.reduce.fmul.nxv2f64(double 0.0, undef) %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.0, undef) %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.0, undef) + + %V1_vp = call fast double @llvm.vp.reduce.fmul.v1f64(double 0.0, <1 x double> undef, <1 x i1> undef, i32 undef) + %V2_vp = call fast double @llvm.vp.reduce.fmul.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef) + %V4_vp = call fast double @llvm.vp.reduce.fmul.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef) + %V8_vp = call fast double @llvm.vp.reduce.fmul.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef) + %V16_vp = call fast double @llvm.vp.reduce.fmul.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef) + %v32_vp = call fast double @llvm.vp.reduce.fmul.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef) + %V64_vp = call fast double @llvm.vp.reduce.fmul.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef) + %V128_vp = call fast double @llvm.vp.reduce.fmul.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef) + %NXV1_vp = call fast double @llvm.vp.reduce.fmul.nxv1f64(double 0.0, undef, undef, i32 undef) + %NXV2_vp = call fast double @llvm.vp.reduce.fmul.nxv2f64(double 0.0, undef, undef, i32 undef) + %NXV4_vp = call fast double @llvm.vp.reduce.fmul.nxv4f64(double 0.0, undef, undef, i32 undef) + %NXV8_vp = call fast double @llvm.vp.reduce.fmul.nxv8f64(double 0.0, undef, undef, i32 undef) + %NXV16_vp = call fast double @llvm.vp.reduce.fmul.nxv16f64(double 0.0, undef, undef, i32 undef) + %NXV32_vp = call fast double @llvm.vp.reduce.fmul.nxv32f64(double 0.0, undef, undef, i32 undef) ret void } @@ -235,6 +421,20 @@ define void @reduce_ordered_fmul_bfloat() { ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, undef) ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, undef) ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 255 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 510 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call bfloat @llvm.vp.reduce.fmul.nxv1bf16(bfloat 0xR0000, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call bfloat @llvm.vp.reduce.fmul.nxv2bf16(bfloat 0xR0000, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call bfloat @llvm.vp.reduce.fmul.nxv4bf16(bfloat 0xR0000, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call bfloat @llvm.vp.reduce.fmul.nxv8bf16(bfloat 0xR0000, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call bfloat @llvm.vp.reduce.fmul.nxv16bf16(bfloat 0xR0000, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call bfloat @llvm.vp.reduce.fmul.nxv32bf16(bfloat 0xR0000, undef, undef, i32 undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_ordered_fmul_bfloat' @@ -252,6 +452,20 @@ define void @reduce_ordered_fmul_bfloat() { ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, undef) ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, undef) ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0xR0000, <1 x bfloat> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2_vp = call bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0xR0000, <2 x bfloat> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0xR0000, <4 x bfloat> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8_vp = call bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0xR0000, <8 x bfloat> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16_vp = call bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0xR0000, <16 x bfloat> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %v32_vp = call bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %V64_vp = call bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 382 for instruction: %V128_vp = call bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef, <128 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call bfloat @llvm.vp.reduce.fmul.nxv1bf16(bfloat 0xR0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call bfloat @llvm.vp.reduce.fmul.nxv2bf16(bfloat 0xR0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call bfloat @llvm.vp.reduce.fmul.nxv4bf16(bfloat 0xR0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call bfloat @llvm.vp.reduce.fmul.nxv8bf16(bfloat 0xR0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call bfloat @llvm.vp.reduce.fmul.nxv16bf16(bfloat 0xR0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call bfloat @llvm.vp.reduce.fmul.nxv32bf16(bfloat 0xR0000, undef, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef) @@ -268,6 +482,21 @@ define void @reduce_ordered_fmul_bfloat() { %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0.0, undef) %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0.0, undef) %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0.0, undef) + + %V1_vp = call bfloat @llvm.vp.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef, <1 x i1> undef, i32 undef) + %V2_vp = call bfloat @llvm.vp.reduce.fmul.v2bf16(bfloat 0.0, <2 x bfloat> undef, <2 x i1> undef, i32 undef) + %V4_vp = call bfloat @llvm.vp.reduce.fmul.v4bf16(bfloat 0.0, <4 x bfloat> undef, <4 x i1> undef, i32 undef) + %V8_vp = call bfloat @llvm.vp.reduce.fmul.v8bf16(bfloat 0.0, <8 x bfloat> undef, <8 x i1> undef, i32 undef) + %V16_vp = call bfloat @llvm.vp.reduce.fmul.v16bf16(bfloat 0.0, <16 x bfloat> undef, <16 x i1> undef, i32 undef) + %v32_vp = call bfloat @llvm.vp.reduce.fmul.v32bf16(bfloat 0.0, <32 x bfloat> undef, <32 x i1> undef, i32 undef) + %V64_vp = call bfloat @llvm.vp.reduce.fmul.v64bf16(bfloat 0.0, <64 x bfloat> undef, <64 x i1> undef, i32 undef) + %V128_vp = call bfloat @llvm.vp.reduce.fmul.v128bf16(bfloat 0.0, <128 x bfloat> undef, <128 x i1> undef, i32 undef) + %NXV1_vp = call bfloat @llvm.vp.reduce.fmul.nxv1bf16(bfloat 0.0, undef, undef, i32 undef) + %NXV2_vp = call bfloat @llvm.vp.reduce.fmul.nxv2bf16(bfloat 0.0, undef, undef, i32 undef) + %NXV4_vp = call bfloat @llvm.vp.reduce.fmul.nxv4bf16(bfloat 0.0, undef, undef, i32 undef) + %NXV8_vp = call bfloat @llvm.vp.reduce.fmul.nxv8bf16(bfloat 0.0, undef, undef, i32 undef) + %NXV16_vp = call bfloat @llvm.vp.reduce.fmul.nxv16bf16(bfloat 0.0, undef, undef, i32 undef) + %NXV32_vp = call bfloat @llvm.vp.reduce.fmul.nxv32bf16(bfloat 0.0, undef, undef, i32 undef) ret void } @@ -287,6 +516,20 @@ define void @reduce_ordered_fmul_half() { ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, undef) ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, undef) ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8_vp = call half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16_vp = call half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %v32_vp = call half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 255 for instruction: %V64_vp = call half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 510 for instruction: %V128_vp = call half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call half @llvm.vp.reduce.fmul.nxv1f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call half @llvm.vp.reduce.fmul.nxv2f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call half @llvm.vp.reduce.fmul.nxv4f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call half @llvm.vp.reduce.fmul.nxv8f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call half @llvm.vp.reduce.fmul.nxv16f16(half 0xH0000, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call half @llvm.vp.reduce.fmul.nxv32f16(half 0xH0000, undef, undef, i32 undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_ordered_fmul_half' @@ -304,6 +547,20 @@ define void @reduce_ordered_fmul_half() { ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, undef) ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, undef) ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call half @llvm.vp.reduce.fmul.v1f16(half 0xH0000, <1 x half> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2_vp = call half @llvm.vp.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call half @llvm.vp.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8_vp = call half @llvm.vp.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16_vp = call half @llvm.vp.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %v32_vp = call half @llvm.vp.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %V64_vp = call half @llvm.vp.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 382 for instruction: %V128_vp = call half @llvm.vp.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef, <128 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call half @llvm.vp.reduce.fmul.nxv1f16(half 0xH0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call half @llvm.vp.reduce.fmul.nxv2f16(half 0xH0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call half @llvm.vp.reduce.fmul.nxv4f16(half 0xH0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call half @llvm.vp.reduce.fmul.nxv8f16(half 0xH0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call half @llvm.vp.reduce.fmul.nxv16f16(half 0xH0000, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32_vp = call half @llvm.vp.reduce.fmul.nxv32f16(half 0xH0000, undef, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call half @llvm.vector.reduce.fmul.v1f16(half 0.0, <1 x half> undef) @@ -320,6 +577,21 @@ define void @reduce_ordered_fmul_half() { %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0.0, undef) %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0.0, undef) %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0.0, undef) + + %V1_vp = call half @llvm.vp.reduce.fmul.v1f16(half 0.0, <1 x half> undef, <1 x i1> undef, i32 undef) + %V2_vp = call half @llvm.vp.reduce.fmul.v2f16(half 0.0, <2 x half> undef, <2 x i1> undef, i32 undef) + %V4_vp = call half @llvm.vp.reduce.fmul.v4f16(half 0.0, <4 x half> undef, <4 x i1> undef, i32 undef) + %V8_vp = call half @llvm.vp.reduce.fmul.v8f16(half 0.0, <8 x half> undef, <8 x i1> undef, i32 undef) + %V16_vp = call half @llvm.vp.reduce.fmul.v16f16(half 0.0, <16 x half> undef, <16 x i1> undef, i32 undef) + %v32_vp = call half @llvm.vp.reduce.fmul.v32f16(half 0.0, <32 x half> undef, <32 x i1> undef, i32 undef) + %V64_vp = call half @llvm.vp.reduce.fmul.v64f16(half 0.0, <64 x half> undef, <64 x i1> undef, i32 undef) + %V128_vp = call half @llvm.vp.reduce.fmul.v128f16(half 0.0, <128 x half> undef, <128 x i1> undef, i32 undef) + %NXV1_vp = call half @llvm.vp.reduce.fmul.nxv1f16(half 0.0, undef, undef, i32 undef) + %NXV2_vp = call half @llvm.vp.reduce.fmul.nxv2f16(half 0.0, undef, undef, i32 undef) + %NXV4_vp = call half @llvm.vp.reduce.fmul.nxv4f16(half 0.0, undef, undef, i32 undef) + %NXV8_vp = call half @llvm.vp.reduce.fmul.nxv8f16(half 0.0, undef, undef, i32 undef) + %NXV16_vp = call half @llvm.vp.reduce.fmul.nxv16f16(half 0.0, undef, undef, i32 undef) + %NXV32_vp = call half @llvm.vp.reduce.fmul.nxv32f16(half 0.0, undef, undef, i32 undef) ret void } @@ -338,6 +610,19 @@ define void @reduce_ordered_fmul_float() { ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8_vp = call float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16_vp = call float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %v32_vp = call float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 254 for instruction: %V64_vp = call float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 508 for instruction: %V128_vp = call float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call float @llvm.vp.reduce.fmul.nxv1f32(float 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call float @llvm.vp.reduce.fmul.nxv2f32(float 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call float @llvm.vp.reduce.fmul.nxv4f32(float 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call float @llvm.vp.reduce.fmul.nxv8f32(float 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call float @llvm.vp.reduce.fmul.nxv16f32(float 0.000000e+00, undef, undef, i32 undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_ordered_fmul_float' @@ -354,6 +639,19 @@ define void @reduce_ordered_fmul_float() { ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call float @llvm.vp.reduce.fmul.v1f32(float 0.000000e+00, <1 x float> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2_vp = call float @llvm.vp.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call float @llvm.vp.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8_vp = call float @llvm.vp.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16_vp = call float @llvm.vp.reduce.fmul.v16f32(float 0.000000e+00, <16 x float> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %v32_vp = call float @llvm.vp.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V64_vp = call float @llvm.vp.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 380 for instruction: %V128_vp = call float @llvm.vp.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef, <128 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call float @llvm.vp.reduce.fmul.nxv1f32(float 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call float @llvm.vp.reduce.fmul.nxv2f32(float 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call float @llvm.vp.reduce.fmul.nxv4f32(float 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call float @llvm.vp.reduce.fmul.nxv8f32(float 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16_vp = call float @llvm.vp.reduce.fmul.nxv16f32(float 0.000000e+00, undef, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call float @llvm.vector.reduce.fmul.v1f32(float 0.0, <1 x float> undef) @@ -369,6 +667,20 @@ define void @reduce_ordered_fmul_float() { %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.0, undef) %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.0, undef) %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.0, undef) + + %V1_vp = call float @llvm.vp.reduce.fmul.v1f32(float 0.0, <1 x float> undef, <1 x i1> undef, i32 undef) + %V2_vp = call float @llvm.vp.reduce.fmul.v2f32(float 0.0, <2 x float> undef, <2 x i1> undef, i32 undef) + %V4_vp = call float @llvm.vp.reduce.fmul.v4f32(float 0.0, <4 x float> undef, <4 x i1> undef, i32 undef) + %V8_vp = call float @llvm.vp.reduce.fmul.v8f32(float 0.0, <8 x float> undef, <8 x i1> undef, i32 undef) + %V16_vp = call float @llvm.vp.reduce.fmul.v16f32(float 0.0, <16 x float> undef, <16 x i1> undef, i32 undef) + %v32_vp = call float @llvm.vp.reduce.fmul.v32f32(float 0.0, <32 x float> undef, <32 x i1> undef, i32 undef) + %V64_vp = call float @llvm.vp.reduce.fmul.v64f32(float 0.0, <64 x float> undef, <64 x i1> undef, i32 undef) + %V128_vp = call float @llvm.vp.reduce.fmul.v128f32(float 0.0, <128 x float> undef, <128 x i1> undef, i32 undef) + %NXV1_vp = call float @llvm.vp.reduce.fmul.nxv1f32(float 0.0, undef, undef, i32 undef) + %NXV2_vp = call float @llvm.vp.reduce.fmul.nxv2f32(float 0.0, undef, undef, i32 undef) + %NXV4_vp = call float @llvm.vp.reduce.fmul.nxv4f32(float 0.0, undef, undef, i32 undef) + %NXV8_vp = call float @llvm.vp.reduce.fmul.nxv8f32(float 0.0, undef, undef, i32 undef) + %NXV16_vp = call float @llvm.vp.reduce.fmul.nxv16f32(float 0.0, undef, undef, i32 undef) ret void } @@ -386,6 +698,18 @@ define void @reduce_ordered_fmul_double() { ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2_vp = call double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4_vp = call double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8_vp = call double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16_vp = call double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %v32_vp = call double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V64_vp = call double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V128_vp = call double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call double @llvm.vp.reduce.fmul.nxv1f64(double 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call double @llvm.vp.reduce.fmul.nxv2f64(double 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call double @llvm.vp.reduce.fmul.nxv4f64(double 0.000000e+00, undef, undef, i32 undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call double @llvm.vp.reduce.fmul.nxv8f64(double 0.000000e+00, undef, undef, i32 undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_ordered_fmul_double' @@ -401,6 +725,18 @@ define void @reduce_ordered_fmul_double() { ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call double @llvm.vp.reduce.fmul.v1f64(double 0.000000e+00, <1 x double> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2_vp = call double @llvm.vp.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4_vp = call double @llvm.vp.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8_vp = call double @llvm.vp.reduce.fmul.v8f64(double 0.000000e+00, <8 x double> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16_vp = call double @llvm.vp.reduce.fmul.v16f64(double 0.000000e+00, <16 x double> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %v32_vp = call double @llvm.vp.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V64_vp = call double @llvm.vp.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %V128_vp = call double @llvm.vp.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef, <128 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1_vp = call double @llvm.vp.reduce.fmul.nxv1f64(double 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2_vp = call double @llvm.vp.reduce.fmul.nxv2f64(double 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4_vp = call double @llvm.vp.reduce.fmul.nxv4f64(double 0.000000e+00, undef, undef, i32 undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8_vp = call double @llvm.vp.reduce.fmul.nxv8f64(double 0.000000e+00, undef, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call double @llvm.vector.reduce.fmul.v1f64(double 0.0, <1 x double> undef) @@ -415,5 +751,18 @@ define void @reduce_ordered_fmul_double() { %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.0, undef) %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.0, undef) %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.0, undef) + + %V1_vp = call double @llvm.vp.reduce.fmul.v1f64(double 0.0, <1 x double> undef, <1 x i1> undef, i32 undef) + %V2_vp = call double @llvm.vp.reduce.fmul.v2f64(double 0.0, <2 x double> undef, <2 x i1> undef, i32 undef) + %V4_vp = call double @llvm.vp.reduce.fmul.v4f64(double 0.0, <4 x double> undef, <4 x i1> undef, i32 undef) + %V8_vp = call double @llvm.vp.reduce.fmul.v8f64(double 0.0, <8 x double> undef, <8 x i1> undef, i32 undef) + %V16_vp = call double @llvm.vp.reduce.fmul.v16f64(double 0.0, <16 x double> undef, <16 x i1> undef, i32 undef) + %v32_vp = call double @llvm.vp.reduce.fmul.v32f64(double 0.0, <32 x double> undef, <32 x i1> undef, i32 undef) + %V64_vp = call double @llvm.vp.reduce.fmul.v64f64(double 0.0, <64 x double> undef, <64 x i1> undef, i32 undef) + %V128_vp = call double @llvm.vp.reduce.fmul.v128f64(double 0.0, <128 x double> undef, <128 x i1> undef, i32 undef) + %NXV1_vp = call double @llvm.vp.reduce.fmul.nxv1f64(double 0.0, undef, undef, i32 undef) + %NXV2_vp = call double @llvm.vp.reduce.fmul.nxv2f64(double 0.0, undef, undef, i32 undef) + %NXV4_vp = call double @llvm.vp.reduce.fmul.nxv4f64(double 0.0, undef, undef, i32 undef) + %NXV8_vp = call double @llvm.vp.reduce.fmul.nxv8f64(double 0.0, undef, undef, i32 undef) ret void } diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll index c5d677e574c13..6090c287bf694 100644 --- a/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-max.ll @@ -14,6 +14,14 @@ define i32 @reduce_umin_i1(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.umax.v32i1(<32 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.umax.v64i1(<64 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.umax.v128i1(<128 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_umin_i1' @@ -25,6 +33,14 @@ define i32 @reduce_umin_i1(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.umax.v32i1(<32 x i1> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.umax.v64i1(<64 x i1> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.umax.v128i1(<128 x i1> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i1 @llvm.vector.reduce.umax.v1i1(<1 x i1> undef) @@ -35,6 +51,15 @@ define i32 @reduce_umin_i1(i32 %arg) { %V32 = call i1 @llvm.vector.reduce.umax.v32i1(<32 x i1> undef) %V64 = call i1 @llvm.vector.reduce.umax.v64i1(<64 x i1> undef) %V128 = call i1 @llvm.vector.reduce.umax.v128i1(<128 x i1> undef) + + %V1_vp = call i1 @llvm.vp.reduce.umax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i1 @llvm.vp.reduce.umax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i1 @llvm.vp.reduce.umax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i1 @llvm.vp.reduce.umax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i1 @llvm.vp.reduce.umax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i1 @llvm.vp.reduce.umax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i1 @llvm.vp.reduce.umax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i1 @llvm.vp.reduce.umax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -48,6 +73,14 @@ define i32 @reduce_umax_i8(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_umax_i8' @@ -59,6 +92,14 @@ define i32 @reduce_umax_i8(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef) @@ -69,6 +110,15 @@ define i32 @reduce_umax_i8(i32 %arg) { %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) + + %V1_vp = call i8 @llvm.vp.reduce.umax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i8 @llvm.vp.reduce.umax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i8 @llvm.vp.reduce.umax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i8 @llvm.vp.reduce.umax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i8 @llvm.vp.reduce.umax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i8 @llvm.vp.reduce.umax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i8 @llvm.vp.reduce.umax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i8 @llvm.vp.reduce.umax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -82,6 +132,14 @@ define i32 @reduce_umax_i16(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_umax_i16' @@ -93,6 +151,14 @@ define i32 @reduce_umax_i16(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> undef) @@ -102,7 +168,16 @@ define i32 @reduce_umax_i16(i32 %arg) { %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) - %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef) + %V128 = call i16 @llvm.vector.reduce.umax.v128i16(<128 x i16> undef) + + %V1_vp = call i16 @llvm.vp.reduce.umax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i16 @llvm.vp.reduce.umax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i16 @llvm.vp.reduce.umax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i16 @llvm.vp.reduce.umax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i16 @llvm.vp.reduce.umax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i16 @llvm.vp.reduce.umax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i16 @llvm.vp.reduce.umax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i16 @llvm.vp.reduce.umax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -116,6 +191,14 @@ define i32 @reduce_umax_i32(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_umax_i32' @@ -127,6 +210,14 @@ define i32 @reduce_umax_i32(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> undef) @@ -137,6 +228,15 @@ define i32 @reduce_umax_i32(i32 %arg) { %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef) %V64 = call i32 @llvm.vector.reduce.umax.v64i32(<64 x i32> undef) %V128 = call i32 @llvm.vector.reduce.umax.v128i32(<128 x i32> undef) + + %V1_vp = call i32 @llvm.vp.reduce.umax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i32 @llvm.vp.reduce.umax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i32 @llvm.vp.reduce.umax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i32 @llvm.vp.reduce.umax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i32 @llvm.vp.reduce.umax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i32 @llvm.vp.reduce.umax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i32 @llvm.vp.reduce.umax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -150,6 +250,14 @@ define i32 @reduce_umax_i64(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.umax.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.umax.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.umax.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.umax.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.umax.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.umax.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.umax.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.umax.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_umax_i64' @@ -161,6 +269,14 @@ define i32 @reduce_umax_i64(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.umax.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.umax.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.umax.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.umax.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.umax.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.umax.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.umax.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.umax.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) @@ -171,6 +287,15 @@ define i32 @reduce_umax_i64(i32 %arg) { %V32 = call i64 @llvm.vector.reduce.umax.v32i64(<32 x i64> undef) %V64 = call i64 @llvm.vector.reduce.umax.v64i64(<64 x i64> undef) %V128 = call i64 @llvm.vector.reduce.umax.v128i64(<128 x i64> undef) + + %V1_vp = call i64 @llvm.vp.reduce.umax.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i64 @llvm.vp.reduce.umax.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i64 @llvm.vp.reduce.umax.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i64 @llvm.vp.reduce.umax.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i64 @llvm.vp.reduce.umax.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i64 @llvm.vp.reduce.umax.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i64 @llvm.vp.reduce.umax.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i64 @llvm.vp.reduce.umax.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -184,6 +309,14 @@ define i32 @reduce_smin_i1(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.smax.v64i1(<64 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.smax.v128i1(<128 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_smin_i1' @@ -195,6 +328,14 @@ define i32 @reduce_smin_i1(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.smax.v64i1(<64 x i1> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.smax.v128i1(<128 x i1> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i1 @llvm.vector.reduce.smax.v1i1(<1 x i1> undef) @@ -205,6 +346,15 @@ define i32 @reduce_smin_i1(i32 %arg) { %V32 = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> undef) %V64 = call i1 @llvm.vector.reduce.smax.v64i1(<64 x i1> undef) %V128 = call i1 @llvm.vector.reduce.smax.v128i1(<128 x i1> undef) + + %V1_vp = call i1 @llvm.vp.reduce.smax.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i1 @llvm.vp.reduce.smax.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i1 @llvm.vp.reduce.smax.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i1 @llvm.vp.reduce.smax.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i1 @llvm.vp.reduce.smax.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i1 @llvm.vp.reduce.smax.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i1 @llvm.vp.reduce.smax.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i1 @llvm.vp.reduce.smax.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -218,6 +368,14 @@ define i32 @reduce_smax_i8(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_smax_i8' @@ -229,6 +387,14 @@ define i32 @reduce_smax_i8(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef) @@ -239,6 +405,15 @@ define i32 @reduce_smax_i8(i32 %arg) { %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) + + %V1_vp = call i8 @llvm.vp.reduce.smax.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i8 @llvm.vp.reduce.smax.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i8 @llvm.vp.reduce.smax.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i8 @llvm.vp.reduce.smax.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i8 @llvm.vp.reduce.smax.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i8 @llvm.vp.reduce.smax.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i8 @llvm.vp.reduce.smax.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i8 @llvm.vp.reduce.smax.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -252,6 +427,14 @@ define i32 @reduce_smax_i16(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_smax_i16' @@ -263,6 +446,14 @@ define i32 @reduce_smax_i16(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i16 @llvm.vector.reduce.smax.v1i16(<1 x i16> undef) @@ -273,6 +464,15 @@ define i32 @reduce_smax_i16(i32 %arg) { %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) %V128 = call i16 @llvm.vector.reduce.smax.v128i16(<128 x i16> undef) + + %V1_vp = call i16 @llvm.vp.reduce.smax.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i16 @llvm.vp.reduce.smax.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i16 @llvm.vp.reduce.smax.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i16 @llvm.vp.reduce.smax.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i16 @llvm.vp.reduce.smax.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i16 @llvm.vp.reduce.smax.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i16 @llvm.vp.reduce.smax.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i16 @llvm.vp.reduce.smax.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -286,6 +486,14 @@ define i32 @reduce_smax_i32(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_smax_i32' @@ -297,6 +505,14 @@ define i32 @reduce_smax_i32(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i32 @llvm.vector.reduce.smax.v1i32(<1 x i32> undef) @@ -307,6 +523,15 @@ define i32 @reduce_smax_i32(i32 %arg) { %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) %V64 = call i32 @llvm.vector.reduce.smax.v64i32(<64 x i32> undef) %V128 = call i32 @llvm.vector.reduce.smax.v128i32(<128 x i32> undef) + + %V1_vp = call i32 @llvm.vp.reduce.smax.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i32 @llvm.vp.reduce.smax.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i32 @llvm.vp.reduce.smax.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i32 @llvm.vp.reduce.smax.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i32 @llvm.vp.reduce.smax.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i32 @llvm.vp.reduce.smax.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i32 @llvm.vp.reduce.smax.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -320,6 +545,14 @@ define i32 @reduce_smax_i64(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.smax.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.smax.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.smax.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.smax.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.smax.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.smax.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.smax.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.smax.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_smax_i64' @@ -331,6 +564,14 @@ define i32 @reduce_smax_i64(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.smax.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.smax.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.smax.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.smax.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.smax.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.smax.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.smax.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.smax.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) @@ -341,6 +582,15 @@ define i32 @reduce_smax_i64(i32 %arg) { %V32 = call i64 @llvm.vector.reduce.smax.v32i64(<32 x i64> undef) %V64 = call i64 @llvm.vector.reduce.smax.v64i64(<64 x i64> undef) %V128 = call i64 @llvm.vector.reduce.smax.v128i64(<128 x i64> undef) + + %V1_vp = call i64 @llvm.vp.reduce.smax.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i64 @llvm.vp.reduce.smax.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i64 @llvm.vp.reduce.smax.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i64 @llvm.vp.reduce.smax.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i64 @llvm.vp.reduce.smax.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i64 @llvm.vp.reduce.smax.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i64 @llvm.vp.reduce.smax.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i64 @llvm.vp.reduce.smax.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef) ret i32 undef } diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll index 89bff38138415..7983452304c67 100644 --- a/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-min.ll @@ -14,6 +14,14 @@ define i32 @reduce_umin_i1(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.umin.v32i1(<32 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.umin.v64i1(<64 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.umin.v128i1(<128 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_umin_i1' @@ -25,6 +33,14 @@ define i32 @reduce_umin_i1(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i1 @llvm.vector.reduce.umin.v32i1(<32 x i1> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i1 @llvm.vector.reduce.umin.v64i1(<64 x i1> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i1 @llvm.vector.reduce.umin.v128i1(<128 x i1> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i1 @llvm.vp.reduce.umin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i1 @llvm.vp.reduce.umin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i1 @llvm.vp.reduce.umin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i1 @llvm.vp.reduce.umin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i1 @llvm.vp.reduce.umin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i1 @llvm.vp.reduce.umin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i1 @llvm.vp.reduce.umin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i1 @llvm.vp.reduce.umin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i1 @llvm.vector.reduce.umin.v1i1(<1 x i1> undef) @@ -35,6 +51,15 @@ define i32 @reduce_umin_i1(i32 %arg) { %V32 = call i1 @llvm.vector.reduce.umin.v32i1(<32 x i1> undef) %V64 = call i1 @llvm.vector.reduce.umin.v64i1(<64 x i1> undef) %V128 = call i1 @llvm.vector.reduce.umin.v128i1(<128 x i1> undef) + + %V1_vp = call i1 @llvm.vp.reduce.umin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i1 @llvm.vp.reduce.umin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i1 @llvm.vp.reduce.umin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i1 @llvm.vp.reduce.umin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i1 @llvm.vp.reduce.umin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i1 @llvm.vp.reduce.umin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i1 @llvm.vp.reduce.umin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i1 @llvm.vp.reduce.umin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -48,6 +73,14 @@ define i32 @reduce_umin_i8(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_umin_i8' @@ -59,6 +92,14 @@ define i32 @reduce_umin_i8(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.umin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.umin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.umin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.umin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.umin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.umin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.umin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.umin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef) @@ -69,6 +110,15 @@ define i32 @reduce_umin_i8(i32 %arg) { %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) + + %V1_vp = call i8 @llvm.vp.reduce.umin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i8 @llvm.vp.reduce.umin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i8 @llvm.vp.reduce.umin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i8 @llvm.vp.reduce.umin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i8 @llvm.vp.reduce.umin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i8 @llvm.vp.reduce.umin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i8 @llvm.vp.reduce.umin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i8 @llvm.vp.reduce.umin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -82,6 +132,14 @@ define i32 @reduce_umin_i16(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_umin_i16' @@ -93,6 +151,14 @@ define i32 @reduce_umin_i16(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.umin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.umin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.umin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.umin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.umin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.umin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.umin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.umin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i16 @llvm.vector.reduce.umin.v1i16(<1 x i16> undef) @@ -103,6 +169,15 @@ define i32 @reduce_umin_i16(i32 %arg) { %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) %V128 = call i16 @llvm.vector.reduce.umin.v128i16(<128 x i16> undef) + + %V1_vp = call i16 @llvm.vp.reduce.umin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i16 @llvm.vp.reduce.umin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i16 @llvm.vp.reduce.umin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i16 @llvm.vp.reduce.umin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i16 @llvm.vp.reduce.umin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i16 @llvm.vp.reduce.umin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i16 @llvm.vp.reduce.umin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i16 @llvm.vp.reduce.umin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -116,6 +191,14 @@ define i32 @reduce_umin_i32(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_umin_i32' @@ -127,6 +210,14 @@ define i32 @reduce_umin_i32(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.umin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.umin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.umin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.umin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.umin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.umin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.umin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i32 @llvm.vector.reduce.umin.v1i32(<1 x i32> undef) @@ -137,6 +228,15 @@ define i32 @reduce_umin_i32(i32 %arg) { %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) %V64 = call i32 @llvm.vector.reduce.umin.v64i32(<64 x i32> undef) %V128 = call i32 @llvm.vector.reduce.umin.v128i32(<128 x i32> undef) + + %V1_vp = call i32 @llvm.vp.reduce.umin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i32 @llvm.vp.reduce.umin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i32 @llvm.vp.reduce.umin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i32 @llvm.vp.reduce.umin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i32 @llvm.vp.reduce.umin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i32 @llvm.vp.reduce.umin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i32 @llvm.vp.reduce.umin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -150,6 +250,14 @@ define i32 @reduce_umin_i64(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.umin.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.umin.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.umin.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.umin.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.umin.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.umin.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.umin.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.umin.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_umin_i64' @@ -161,6 +269,14 @@ define i32 @reduce_umin_i64(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.umin.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.umin.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.umin.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.umin.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.umin.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.umin.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.umin.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.umin.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) @@ -171,6 +287,15 @@ define i32 @reduce_umin_i64(i32 %arg) { %V32 = call i64 @llvm.vector.reduce.umin.v32i64(<32 x i64> undef) %V64 = call i64 @llvm.vector.reduce.umin.v64i64(<64 x i64> undef) %V128 = call i64 @llvm.vector.reduce.umin.v128i64(<128 x i64> undef) + + %V1_vp = call i64 @llvm.vp.reduce.umin.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i64 @llvm.vp.reduce.umin.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i64 @llvm.vp.reduce.umin.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i64 @llvm.vp.reduce.umin.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i64 @llvm.vp.reduce.umin.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i64 @llvm.vp.reduce.umin.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i64 @llvm.vp.reduce.umin.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i64 @llvm.vp.reduce.umin.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -184,6 +309,14 @@ define i32 @reduce_smin_i1(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.smin.v32i1(<32 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.smin.v64i1(<64 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.smin.v128i1(<128 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_smin_i1' @@ -195,6 +328,14 @@ define i32 @reduce_smin_i1(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.smin.v32i1(<32 x i1> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.smin.v64i1(<64 x i1> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.smin.v128i1(<128 x i1> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.smin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.smin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.smin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.smin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.smin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.smin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.smin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.smin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i1 @llvm.vector.reduce.smin.v1i1(<1 x i1> undef) @@ -205,6 +346,15 @@ define i32 @reduce_smin_i1(i32 %arg) { %V32 = call i1 @llvm.vector.reduce.smin.v32i1(<32 x i1> undef) %V64 = call i1 @llvm.vector.reduce.smin.v64i1(<64 x i1> undef) %V128 = call i1 @llvm.vector.reduce.smin.v128i1(<128 x i1> undef) + + %V1_vp = call i1 @llvm.vp.reduce.smin.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i1 @llvm.vp.reduce.smin.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i1 @llvm.vp.reduce.smin.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i1 @llvm.vp.reduce.smin.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i1 @llvm.vp.reduce.smin.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i1 @llvm.vp.reduce.smin.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i1 @llvm.vp.reduce.smin.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i1 @llvm.vp.reduce.smin.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -218,6 +368,14 @@ define i32 @reduce_smin_i8(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_smin_i8' @@ -229,6 +387,14 @@ define i32 @reduce_smin_i8(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.smin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.smin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.smin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.smin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.smin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.smin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.smin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.smin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef) @@ -239,6 +405,15 @@ define i32 @reduce_smin_i8(i32 %arg) { %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) + + %V1_vp = call i8 @llvm.vp.reduce.smin.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i8 @llvm.vp.reduce.smin.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i8 @llvm.vp.reduce.smin.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i8 @llvm.vp.reduce.smin.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i8 @llvm.vp.reduce.smin.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i8 @llvm.vp.reduce.smin.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i8 @llvm.vp.reduce.smin.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i8 @llvm.vp.reduce.smin.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -252,6 +427,14 @@ define i32 @reduce_smin_i16(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_smin_i16' @@ -263,6 +446,14 @@ define i32 @reduce_smin_i16(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.smin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.smin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.smin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.smin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.smin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.smin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.smin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.smin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i16 @llvm.vector.reduce.smin.v1i16(<1 x i16> undef) @@ -273,6 +464,15 @@ define i32 @reduce_smin_i16(i32 %arg) { %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef) %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef) %V128 = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> undef) + + %V1_vp = call i16 @llvm.vp.reduce.smin.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i16 @llvm.vp.reduce.smin.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i16 @llvm.vp.reduce.smin.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i16 @llvm.vp.reduce.smin.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i16 @llvm.vp.reduce.smin.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i16 @llvm.vp.reduce.smin.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i16 @llvm.vp.reduce.smin.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i16 @llvm.vp.reduce.smin.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -286,6 +486,14 @@ define i32 @reduce_smin_i32(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_smin_i32' @@ -297,6 +505,14 @@ define i32 @reduce_smin_i32(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.smin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.smin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.smin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.smin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.smin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.smin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.smin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i32 @llvm.vector.reduce.smin.v1i32(<1 x i32> undef) @@ -307,6 +523,15 @@ define i32 @reduce_smin_i32(i32 %arg) { %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef) %V64 = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> undef) %V128 = call i32 @llvm.vector.reduce.smin.v128i32(<128 x i32> undef) + + %V1_vp = call i32 @llvm.vp.reduce.smin.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i32 @llvm.vp.reduce.smin.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i32 @llvm.vp.reduce.smin.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i32 @llvm.vp.reduce.smin.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i32 @llvm.vp.reduce.smin.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i32 @llvm.vp.reduce.smin.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i32 @llvm.vp.reduce.smin.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -320,6 +545,14 @@ define i32 @reduce_smin_i64(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.smin.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.smin.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.smin.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.smin.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.smin.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.smin.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.smin.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.smin.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_smin_i64' @@ -331,6 +564,14 @@ define i32 @reduce_smin_i64(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.smin.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.smin.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.smin.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.smin.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.smin.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.smin.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.smin.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.smin.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) @@ -341,6 +582,15 @@ define i32 @reduce_smin_i64(i32 %arg) { %V32 = call i64 @llvm.vector.reduce.smin.v32i64(<32 x i64> undef) %V64 = call i64 @llvm.vector.reduce.smin.v64i64(<64 x i64> undef) %V128 = call i64 @llvm.vector.reduce.smin.v128i64(<128 x i64> undef) + + %V1_vp = call i64 @llvm.vp.reduce.smin.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i64 @llvm.vp.reduce.smin.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i64 @llvm.vp.reduce.smin.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i64 @llvm.vp.reduce.smin.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i64 @llvm.vp.reduce.smin.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i64 @llvm.vp.reduce.smin.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i64 @llvm.vp.reduce.smin.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i64 @llvm.vp.reduce.smin.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef) ret i32 undef } diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll index d75a95f3fadd9..f1c9ab603a521 100644 --- a/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-or.ll @@ -17,6 +17,17 @@ define i32 @reduce_i1(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V512 = call i1 @llvm.vector.reduce.or.v512i1(<512 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V1024 = call i1 @llvm.vector.reduce.or.v1024i1(<1024 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.or.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.or.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.or.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.or.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.or.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.or.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.or.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.or.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_vp = call i1 @llvm.vp.reduce.or.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V512_vp = call i1 @llvm.vp.reduce.or.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.or.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_i1' @@ -31,6 +42,17 @@ define i32 @reduce_i1(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V512 = call i1 @llvm.vector.reduce.or.v512i1(<512 x i1> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V1024 = call i1 @llvm.vector.reduce.or.v1024i1(<1024 x i1> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.or.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.or.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.or.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.or.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.or.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.or.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.or.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.or.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_vp = call i1 @llvm.vp.reduce.or.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V512_vp = call i1 @llvm.vp.reduce.or.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V1024_vp = call i1 @llvm.vp.reduce.or.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef) @@ -44,6 +66,18 @@ define i32 @reduce_i1(i32 %arg) { %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef) %V512 = call i1 @llvm.vector.reduce.or.v512i1(<512 x i1> undef) %V1024 = call i1 @llvm.vector.reduce.or.v1024i1(<1024 x i1> undef) + + %V1_vp = call i1 @llvm.vp.reduce.or.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i1 @llvm.vp.reduce.or.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i1 @llvm.vp.reduce.or.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i1 @llvm.vp.reduce.or.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i1 @llvm.vp.reduce.or.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i1 @llvm.vp.reduce.or.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i1 @llvm.vp.reduce.or.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i1 @llvm.vp.reduce.or.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef) + %V256_vp = call i1 @llvm.vp.reduce.or.v256i1(i1 undef, <256 x i1> undef, <256 x i1> undef, i32 undef) + %V512_vp = call i1 @llvm.vp.reduce.or.v512i1(i1 undef, <512 x i1> undef, <512 x i1> undef, i32 undef) + %V1024_vp = call i1 @llvm.vp.reduce.or.v1024i1(i1 undef, <1024 x i1> undef, <1024 x i1> undef, i32 undef) ret i32 undef } @@ -57,6 +91,14 @@ define i32 @reduce_i8(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.or.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.or.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.or.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.or.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.or.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.or.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.or.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.or.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_i8' @@ -68,6 +110,14 @@ define i32 @reduce_i8(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.or.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.or.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.or.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.or.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.or.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.or.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.or.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.or.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> undef) @@ -78,6 +128,15 @@ define i32 @reduce_i8(i32 %arg) { %V32 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef) %V64 = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> undef) %V128 = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> undef) + + %V1_vp = call i8 @llvm.vp.reduce.or.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i8 @llvm.vp.reduce.or.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i8 @llvm.vp.reduce.or.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i8 @llvm.vp.reduce.or.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i8 @llvm.vp.reduce.or.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i8 @llvm.vp.reduce.or.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i8 @llvm.vp.reduce.or.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i8 @llvm.vp.reduce.or.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -91,6 +150,14 @@ define i32 @reduce_i16(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.or.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.or.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.or.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.or.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.or.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.or.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.or.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.or.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_i16' @@ -102,6 +169,14 @@ define i32 @reduce_i16(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.or.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.or.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.or.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.or.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.or.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.or.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.or.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.or.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i16 @llvm.vector.reduce.or.v1i16(<1 x i16> undef) @@ -112,6 +187,15 @@ define i32 @reduce_i16(i32 %arg) { %V32 = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> undef) %V64 = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> undef) %V128 = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> undef) + + %V1_vp = call i16 @llvm.vp.reduce.or.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i16 @llvm.vp.reduce.or.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i16 @llvm.vp.reduce.or.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i16 @llvm.vp.reduce.or.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i16 @llvm.vp.reduce.or.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i16 @llvm.vp.reduce.or.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i16 @llvm.vp.reduce.or.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i16 @llvm.vp.reduce.or.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -125,6 +209,14 @@ define i32 @reduce_i32(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.or.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.or.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.or.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.or.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.or.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.or.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.or.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_i32' @@ -136,6 +228,14 @@ define i32 @reduce_i32(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.or.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.or.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.or.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.or.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.or.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.or.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.or.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i32 @llvm.vector.reduce.or.v1i32(<1 x i32> undef) @@ -146,6 +246,15 @@ define i32 @reduce_i32(i32 %arg) { %V32 = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> undef) %V64 = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> undef) %V128 = call i32 @llvm.vector.reduce.or.v128i32(<128 x i32> undef) + + %V1_vp = call i32 @llvm.vp.reduce.or.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i32 @llvm.vp.reduce.or.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i32 @llvm.vp.reduce.or.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i32 @llvm.vp.reduce.or.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i32 @llvm.vp.reduce.or.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i32 @llvm.vp.reduce.or.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i32 @llvm.vp.reduce.or.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -159,6 +268,14 @@ define i32 @reduce_i64(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.or.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.or.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.or.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.or.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.or.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.or.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.or.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.or.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_i64' @@ -170,6 +287,14 @@ define i32 @reduce_i64(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.or.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.or.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.or.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.or.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.or.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.or.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.or.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.or.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef) @@ -180,6 +305,15 @@ define i32 @reduce_i64(i32 %arg) { %V32 = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> undef) %V64 = call i64 @llvm.vector.reduce.or.v64i64(<64 x i64> undef) %V128 = call i64 @llvm.vector.reduce.or.v128i64(<128 x i64> undef) + + %V1_vp = call i64 @llvm.vp.reduce.or.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i64 @llvm.vp.reduce.or.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i64 @llvm.vp.reduce.or.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i64 @llvm.vp.reduce.or.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i64 @llvm.vp.reduce.or.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i64 @llvm.vp.reduce.or.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i64 @llvm.vp.reduce.or.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i64 @llvm.vp.reduce.or.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef) ret i32 undef } diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll index 4f6e0ba074ed8..9457d63f29f93 100644 --- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll @@ -9,26 +9,32 @@ declare half @llvm.vector.reduce.fadd.nxv1f16(half, ) define half @vreduce_fadd_nxv1f16( %v, half %s) { ; CHECK-LABEL: 'vreduce_fadd_nxv1f16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red ; ; SIZE-LABEL: 'vreduce_fadd_nxv1f16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, %v) + %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, %v, undef, i32 undef) ret half %red } define half @vreduce_ord_fadd_nxv1f16( %v, half %s) { ; CHECK-LABEL: 'vreduce_ord_fadd_nxv1f16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red ; ; SIZE-LABEL: 'vreduce_ord_fadd_nxv1f16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, %v) + %red_vp = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, %v, undef, i32 undef) ret half %red } @@ -37,26 +43,32 @@ declare half @llvm.vector.reduce.fadd.nxv2f16(half, ) define half @vreduce_fadd_nxv2f16( %v, half %s) { ; CHECK-LABEL: 'vreduce_fadd_nxv2f16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red ; ; SIZE-LABEL: 'vreduce_fadd_nxv2f16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, %v) + %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, %v, undef, i32 undef) ret half %red } define half @vreduce_ord_fadd_nxv2f16( %v, half %s) { ; CHECK-LABEL: 'vreduce_ord_fadd_nxv2f16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red ; ; SIZE-LABEL: 'vreduce_ord_fadd_nxv2f16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, %v) + %red_vp = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, %v, undef, i32 undef) ret half %red } @@ -65,26 +77,32 @@ declare half @llvm.vector.reduce.fadd.nxv4f16(half, ) define half @vreduce_fadd_nxv4f16( %v, half %s) { ; CHECK-LABEL: 'vreduce_fadd_nxv4f16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red ; ; SIZE-LABEL: 'vreduce_fadd_nxv4f16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, %v) + %red_vp = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, %v, undef, i32 undef) ret half %red } define half @vreduce_ord_fadd_nxv4f16( %v, half %s) { ; CHECK-LABEL: 'vreduce_ord_fadd_nxv4f16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red ; ; SIZE-LABEL: 'vreduce_ord_fadd_nxv4f16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call half @llvm.vector.reduce.fadd.nxv4f16(half %s, %v) + %red_vp = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, %v, undef, i32 undef) ret half %red } @@ -93,26 +111,32 @@ declare float @llvm.vector.reduce.fadd.nxv1f32(float, ) define float @vreduce_fadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_fadd_nxv1f32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red ; ; SIZE-LABEL: 'vreduce_fadd_nxv1f32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, %v) + %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, %v, undef, i32 undef) ret float %red } define float @vreduce_ord_fadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_ord_fadd_nxv1f32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red ; ; SIZE-LABEL: 'vreduce_ord_fadd_nxv1f32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, %v) + %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, %v, undef, i32 undef) ret float %red } @@ -120,15 +144,18 @@ define float @vreduce_fwadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_fwadd_nxv1f32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red ; ; SIZE-LABEL: 'vreduce_fwadd_nxv1f32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %e = fpext %v to %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, %e) + %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, %e, undef, i32 undef) ret float %red } @@ -136,15 +163,18 @@ define float @vreduce_ord_fwadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv1f32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red ; ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv1f32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %e = fpext %v to %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, %e) + %red_vp = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, %e, undef, i32 undef) ret float %red } @@ -153,26 +183,32 @@ declare float @llvm.vector.reduce.fadd.nxv2f32(float, ) define float @vreduce_fadd_nxv2f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_fadd_nxv2f32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red ; ; SIZE-LABEL: 'vreduce_fadd_nxv2f32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, %v) + %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, %v, undef, i32 undef) ret float %red } define float @vreduce_ord_fadd_nxv2f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_ord_fadd_nxv2f32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red ; ; SIZE-LABEL: 'vreduce_ord_fadd_nxv2f32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, %v) + %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, %v, undef, i32 undef) ret float %red } @@ -180,15 +216,18 @@ define float @vreduce_fwadd_nxv2f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_fwadd_nxv2f32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red ; ; SIZE-LABEL: 'vreduce_fwadd_nxv2f32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %e = fpext %v to %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, %e) + %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, %e, undef, i32 undef) ret float %red } @@ -196,15 +235,18 @@ define float @vreduce_ord_fwadd_nxv2f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv2f32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red ; ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv2f32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %e = fpext %v to %red = call float @llvm.vector.reduce.fadd.nxv2f32(float %s, %e) + %red_vp = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, %e, undef, i32 undef) ret float %red } @@ -213,26 +255,32 @@ declare float @llvm.vector.reduce.fadd.nxv4f32(float, ) define float @vreduce_fadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_fadd_nxv4f32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red ; ; SIZE-LABEL: 'vreduce_fadd_nxv4f32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, %v) + %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, %v, undef, i32 undef) ret float %red } define float @vreduce_ord_fadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_ord_fadd_nxv4f32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red ; ; SIZE-LABEL: 'vreduce_ord_fadd_nxv4f32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, %v) + %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, %v, undef, i32 undef) ret float %red } @@ -240,15 +288,18 @@ define float @vreduce_fwadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_fwadd_nxv4f32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = fpext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red ; ; SIZE-LABEL: 'vreduce_fwadd_nxv4f32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %e = fpext %v to %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, %e) + %red_vp = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, %e, undef, i32 undef) ret float %red } @@ -256,15 +307,18 @@ define float @vreduce_ord_fwadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv4f32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = fpext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red ; ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv4f32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %e = fpext %v to %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, %e) + %red_vp = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, %e, undef, i32 undef) ret float %red } @@ -273,26 +327,32 @@ declare double @llvm.vector.reduce.fadd.nxv1f64(double, ) define double @vreduce_fadd_nxv1f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_fadd_nxv1f64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; ; SIZE-LABEL: 'vreduce_fadd_nxv1f64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, %v) + %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, %v, undef, i32 undef) ret double %red } define double @vreduce_ord_fadd_nxv1f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_ord_fadd_nxv1f64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; ; SIZE-LABEL: 'vreduce_ord_fadd_nxv1f64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, %v) + %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, %v, undef, i32 undef) ret double %red } @@ -300,15 +360,18 @@ define double @vreduce_fwadd_nxv1f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_fwadd_nxv1f64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; ; SIZE-LABEL: 'vreduce_fwadd_nxv1f64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %e = fpext %v to %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, %e) + %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, %e, undef, i32 undef) ret double %red } @@ -316,15 +379,18 @@ define double @vreduce_ord_fwadd_nxv1f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv1f64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv1f64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %e = fpext %v to %red = call double @llvm.vector.reduce.fadd.nxv1f64(double %s, %e) + %red_vp = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, %e, undef, i32 undef) ret double %red } @@ -333,26 +399,32 @@ declare double @llvm.vector.reduce.fadd.nxv2f64(double, ) define double @vreduce_fadd_nxv2f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_fadd_nxv2f64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; ; SIZE-LABEL: 'vreduce_fadd_nxv2f64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, %v) + %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, %v, undef, i32 undef) ret double %red } define double @vreduce_ord_fadd_nxv2f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_ord_fadd_nxv2f64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; ; SIZE-LABEL: 'vreduce_ord_fadd_nxv2f64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, %v) + %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, %v, undef, i32 undef) ret double %red } @@ -360,15 +432,18 @@ define double @vreduce_fwadd_nxv2f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_fwadd_nxv2f64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = fpext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; ; SIZE-LABEL: 'vreduce_fwadd_nxv2f64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %e = fpext %v to %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, %e) + %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, %e, undef, i32 undef) ret double %red } @@ -376,15 +451,18 @@ define double @vreduce_ord_fwadd_nxv2f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv2f64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = fpext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv2f64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %e = fpext %v to %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, %e) + %red_vp = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, %e, undef, i32 undef) ret double %red } @@ -393,26 +471,32 @@ declare double @llvm.vector.reduce.fadd.nxv4f64(double, ) define double @vreduce_fadd_nxv4f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_fadd_nxv4f64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; ; SIZE-LABEL: 'vreduce_fadd_nxv4f64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, %v) + %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, %v, undef, i32 undef) ret double %red } define double @vreduce_ord_fadd_nxv4f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_ord_fadd_nxv4f64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; ; SIZE-LABEL: 'vreduce_ord_fadd_nxv4f64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, %v) + %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, %v, undef, i32 undef) ret double %red } @@ -420,15 +504,18 @@ define double @vreduce_fwadd_nxv4f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_fwadd_nxv4f64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e = fpext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; ; SIZE-LABEL: 'vreduce_fwadd_nxv4f64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %e = fpext %v to %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, %e) + %red_vp = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, %e, undef, i32 undef) ret double %red } @@ -436,15 +523,18 @@ define double @vreduce_ord_fwadd_nxv4f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv4f64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e = fpext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; ; SIZE-LABEL: 'vreduce_ord_fwadd_nxv4f64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %e = fpext %v to %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, %e) + %red_vp = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, %e, undef, i32 undef) ret double %red } @@ -453,39 +543,48 @@ declare half @llvm.vector.reduce.fmin.nxv1f16() define half @vreduce_fmin_nxv1f16( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv1f16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv1f16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red ; ; SIZE-LABEL: 'vreduce_fmin_nxv1f16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv1f16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call half @llvm.vector.reduce.fmin.nxv1f16( %v) + %red_vp = call half @llvm.vp.reduce.fmin.nxv1f16(half undef, %v, undef, i32 undef) ret half %red } define half @vreduce_fmin_nxv1f16_nonans( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv1f16_nonans' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red ; ; SIZE-LABEL: 'vreduce_fmin_nxv1f16_nonans' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16( %v) + %red_vp = call nnan half @llvm.vp.reduce.fmin.nxv1f16(half undef, %v, undef, i32 undef) ret half %red } define half @vreduce_fmin_nxv1f16_nonans_noinfs( %v) #1 { ; CHECK-LABEL: 'vreduce_fmin_nxv1f16_nonans_noinfs' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red ; ; SIZE-LABEL: 'vreduce_fmin_nxv1f16_nonans_noinfs' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16( %v) + %red_vp = call nnan ninf half @llvm.vp.reduce.fmin.nxv1f16(half undef, %v, undef, i32 undef) ret half %red } @@ -494,13 +593,16 @@ declare half @llvm.vector.reduce.fmin.nxv2f16() define half @vreduce_fmin_nxv2f16( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv2f16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv2f16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red ; ; SIZE-LABEL: 'vreduce_fmin_nxv2f16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv2f16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call half @llvm.vector.reduce.fmin.nxv2f16( %v) + %red_vp = call half @llvm.vp.reduce.fmin.nxv2f16(half undef, %v, undef, i32 undef) ret half %red } @@ -509,13 +611,16 @@ declare half @llvm.vector.reduce.fmin.nxv4f16() define half @vreduce_fmin_nxv4f16( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv4f16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv4f16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red ; ; SIZE-LABEL: 'vreduce_fmin_nxv4f16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv4f16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call half @llvm.vector.reduce.fmin.nxv4f16( %v) + %red_vp = call half @llvm.vp.reduce.fmin.nxv4f16(half undef, %v, undef, i32 undef) ret half %red } @@ -524,13 +629,16 @@ declare half @llvm.vector.reduce.fmin.nxv64f16() define half @vreduce_fmin_nxv64f16( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv64f16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv64f16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red ; ; SIZE-LABEL: 'vreduce_fmin_nxv64f16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmin.nxv64f16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call half @llvm.vector.reduce.fmin.nxv64f16( %v) + %red_vp = call half @llvm.vp.reduce.fmin.nxv64f16(half undef, %v, undef, i32 undef) ret half %red } @@ -539,39 +647,48 @@ declare float @llvm.vector.reduce.fmin.nxv1f32() define float @vreduce_fmin_nxv1f32( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv1f32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv1f32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red ; ; SIZE-LABEL: 'vreduce_fmin_nxv1f32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv1f32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call float @llvm.vector.reduce.fmin.nxv1f32( %v) + %red_vp = call float @llvm.vp.reduce.fmin.nxv1f32(float undef, %v, undef, i32 undef) ret float %red } define float @vreduce_fmin_nxv1f32_nonans( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv1f32_nonans' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red ; ; SIZE-LABEL: 'vreduce_fmin_nxv1f32_nonans' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32( %v) + %red_vp = call nnan float @llvm.vp.reduce.fmin.nxv1f32(float undef, %v, undef, i32 undef) ret float %red } define float @vreduce_fmin_nxv1f32_nonans_noinfs( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv1f32_nonans_noinfs' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red ; ; SIZE-LABEL: 'vreduce_fmin_nxv1f32_nonans_noinfs' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32( %v) + %red_vp = call nnan ninf float @llvm.vp.reduce.fmin.nxv1f32(float undef, %v, undef, i32 undef) ret float %red } @@ -580,13 +697,16 @@ declare float @llvm.vector.reduce.fmin.nxv2f32() define float @vreduce_fmin_nxv2f32( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv2f32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv2f32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red ; ; SIZE-LABEL: 'vreduce_fmin_nxv2f32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv2f32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call float @llvm.vector.reduce.fmin.nxv2f32( %v) + %red_vp = call float @llvm.vp.reduce.fmin.nxv2f32(float undef, %v, undef, i32 undef) ret float %red } @@ -595,13 +715,16 @@ declare float @llvm.vector.reduce.fmin.nxv4f32() define float @vreduce_fmin_nxv4f32( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv4f32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv4f32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red ; ; SIZE-LABEL: 'vreduce_fmin_nxv4f32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv4f32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call float @llvm.vector.reduce.fmin.nxv4f32( %v) + %red_vp = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, %v, undef, i32 undef) ret float %red } @@ -610,13 +733,16 @@ declare float @llvm.vector.reduce.fmin.nxv32f32() define float @vreduce_fmin_nxv32f32( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv32f32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv32f32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red ; ; SIZE-LABEL: 'vreduce_fmin_nxv32f32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmin.nxv32f32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call float @llvm.vector.reduce.fmin.nxv32f32( %v) + %red_vp = call float @llvm.vp.reduce.fmin.nxv32f32(float undef, %v, undef, i32 undef) ret float %red } @@ -625,39 +751,48 @@ declare double @llvm.vector.reduce.fmin.nxv1f64() define double @vreduce_fmin_nxv1f64( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv1f64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv1f64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; ; SIZE-LABEL: 'vreduce_fmin_nxv1f64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv1f64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call double @llvm.vector.reduce.fmin.nxv1f64( %v) + %red_vp = call double @llvm.vp.reduce.fmin.nxv1f64(double undef, %v, undef, i32 undef) ret double %red } define double @vreduce_fmin_nxv1f64_nonans( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv1f64_nonans' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; ; SIZE-LABEL: 'vreduce_fmin_nxv1f64_nonans' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64( %v) + %red_vp = call nnan double @llvm.vp.reduce.fmin.nxv1f64(double undef, %v, undef, i32 undef) ret double %red } define double @vreduce_fmin_nxv1f64_nonans_noinfs( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv1f64_nonans_noinfs' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; ; SIZE-LABEL: 'vreduce_fmin_nxv1f64_nonans_noinfs' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64( %v) + %red_vp = call nnan ninf double @llvm.vp.reduce.fmin.nxv1f64(double undef, %v, undef, i32 undef) ret double %red } @@ -666,13 +801,16 @@ declare double @llvm.vector.reduce.fmin.nxv2f64() define double @vreduce_fmin_nxv2f64( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv2f64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv2f64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; ; SIZE-LABEL: 'vreduce_fmin_nxv2f64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv2f64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call double @llvm.vector.reduce.fmin.nxv2f64( %v) + %red_vp = call double @llvm.vp.reduce.fmin.nxv2f64(double undef, %v, undef, i32 undef) ret double %red } @@ -681,13 +819,16 @@ declare double @llvm.vector.reduce.fmin.nxv4f64() define double @vreduce_fmin_nxv4f64( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv4f64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv4f64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; ; SIZE-LABEL: 'vreduce_fmin_nxv4f64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv4f64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call double @llvm.vector.reduce.fmin.nxv4f64( %v) + %red_vp = call double @llvm.vp.reduce.fmin.nxv4f64(double undef, %v, undef, i32 undef) ret double %red } @@ -696,13 +837,16 @@ declare double @llvm.vector.reduce.fmin.nxv16f64() define double @vreduce_fmin_nxv16f64( %v) { ; CHECK-LABEL: 'vreduce_fmin_nxv16f64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv16f64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; ; SIZE-LABEL: 'vreduce_fmin_nxv16f64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmin.nxv16f64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call double @llvm.vector.reduce.fmin.nxv16f64( %v) + %red_vp = call double @llvm.vp.reduce.fmin.nxv16f64(double undef, %v, undef, i32 undef) ret double %red } @@ -711,39 +855,48 @@ declare half @llvm.vector.reduce.fmax.nxv1f16() define half @vreduce_fmax_nxv1f16( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv1f16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv1f16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red ; ; SIZE-LABEL: 'vreduce_fmax_nxv1f16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv1f16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call half @llvm.vector.reduce.fmax.nxv1f16( %v) + %red_vp = call half @llvm.vp.reduce.fmax.nxv1f16(half undef, %v, undef, i32 undef) ret half %red } define half @vreduce_fmax_nxv1f16_nonans( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv1f16_nonans' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red ; ; SIZE-LABEL: 'vreduce_fmax_nxv1f16_nonans' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16( %v) + %red_vp = call nnan half @llvm.vp.reduce.fmax.nxv1f16(half undef, %v, undef, i32 undef) ret half %red } define half @vreduce_fmax_nxv1f16_nonans_noinfs( %v) #1 { ; CHECK-LABEL: 'vreduce_fmax_nxv1f16_nonans_noinfs' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red ; ; SIZE-LABEL: 'vreduce_fmax_nxv1f16_nonans_noinfs' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16( %v) + %red_vp = call nnan ninf half @llvm.vp.reduce.fmax.nxv1f16(half undef, %v, undef, i32 undef) ret half %red } @@ -752,13 +905,16 @@ declare half @llvm.vector.reduce.fmax.nxv2f16() define half @vreduce_fmax_nxv2f16( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv2f16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv2f16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red ; ; SIZE-LABEL: 'vreduce_fmax_nxv2f16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv2f16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call half @llvm.vector.reduce.fmax.nxv2f16( %v) + %red_vp = call half @llvm.vp.reduce.fmax.nxv2f16(half undef, %v, undef, i32 undef) ret half %red } @@ -767,13 +923,16 @@ declare half @llvm.vector.reduce.fmax.nxv4f16() define half @vreduce_fmax_nxv4f16( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv4f16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv4f16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red ; ; SIZE-LABEL: 'vreduce_fmax_nxv4f16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv4f16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call half @llvm.vector.reduce.fmax.nxv4f16( %v) + %red_vp = call half @llvm.vp.reduce.fmax.nxv4f16(half undef, %v, undef, i32 undef) ret half %red } @@ -782,13 +941,16 @@ declare half @llvm.vector.reduce.fmax.nxv64f16() define half @vreduce_fmax_nxv64f16( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv64f16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv64f16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %red ; ; SIZE-LABEL: 'vreduce_fmax_nxv64f16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call half @llvm.vector.reduce.fmax.nxv64f16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret half %red ; %red = call half @llvm.vector.reduce.fmax.nxv64f16( %v) + %red_vp = call half @llvm.vp.reduce.fmax.nxv64f16(half undef, %v, undef, i32 undef) ret half %red } @@ -797,39 +959,48 @@ declare float @llvm.vector.reduce.fmax.nxv1f32() define float @vreduce_fmax_nxv1f32( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv1f32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv1f32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red ; ; SIZE-LABEL: 'vreduce_fmax_nxv1f32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv1f32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call float @llvm.vector.reduce.fmax.nxv1f32( %v) + %red_vp = call float @llvm.vp.reduce.fmax.nxv1f32(float undef, %v, undef, i32 undef) ret float %red } define float @vreduce_fmax_nxv1f32_nonans( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv1f32_nonans' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red ; ; SIZE-LABEL: 'vreduce_fmax_nxv1f32_nonans' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32( %v) + %red_vp = call nnan float @llvm.vp.reduce.fmax.nxv1f32(float undef, %v, undef, i32 undef) ret float %red } define float @vreduce_fmax_nxv1f32_nonans_noinfs( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv1f32_nonans_noinfs' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red ; ; SIZE-LABEL: 'vreduce_fmax_nxv1f32_nonans_noinfs' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32( %v) + %red_vp = call nnan ninf float @llvm.vp.reduce.fmax.nxv1f32(float undef, %v, undef, i32 undef) ret float %red } @@ -838,13 +1009,16 @@ declare float @llvm.vector.reduce.fmax.nxv2f32() define float @vreduce_fmax_nxv2f32( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv2f32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv2f32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red ; ; SIZE-LABEL: 'vreduce_fmax_nxv2f32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv2f32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call float @llvm.vector.reduce.fmax.nxv2f32( %v) + %red_vp = call float @llvm.vp.reduce.fmax.nxv2f32(float undef, %v, undef, i32 undef) ret float %red } @@ -853,13 +1027,16 @@ declare float @llvm.vector.reduce.fmax.nxv4f32() define float @vreduce_fmax_nxv4f32( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv4f32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv4f32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red ; ; SIZE-LABEL: 'vreduce_fmax_nxv4f32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv4f32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call float @llvm.vector.reduce.fmax.nxv4f32( %v) + %red_vp = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, %v, undef, i32 undef) ret float %red } @@ -868,13 +1045,16 @@ declare float @llvm.vector.reduce.fmax.nxv32f32() define float @vreduce_fmax_nxv32f32( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv32f32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv32f32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red ; ; SIZE-LABEL: 'vreduce_fmax_nxv32f32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call float @llvm.vector.reduce.fmax.nxv32f32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call float @llvm.vector.reduce.fmax.nxv32f32( %v) + %red_vp = call float @llvm.vp.reduce.fmax.nxv32f32(float undef, %v, undef, i32 undef) ret float %red } @@ -883,39 +1063,48 @@ declare double @llvm.vector.reduce.fmax.nxv1f64() define double @vreduce_fmax_nxv1f64( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv1f64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv1f64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; ; SIZE-LABEL: 'vreduce_fmax_nxv1f64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv1f64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call double @llvm.vector.reduce.fmax.nxv1f64( %v) + %red_vp = call double @llvm.vp.reduce.fmax.nxv1f64(double undef, %v, undef, i32 undef) ret double %red } define double @vreduce_fmax_nxv1f64_nonans( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv1f64_nonans' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; ; SIZE-LABEL: 'vreduce_fmax_nxv1f64_nonans' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64( %v) + %red_vp = call nnan double @llvm.vp.reduce.fmax.nxv1f64(double undef, %v, undef, i32 undef) ret double %red } define double @vreduce_fmax_nxv1f64_nonans_noinfs( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv1f64_nonans_noinfs' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; ; SIZE-LABEL: 'vreduce_fmax_nxv1f64_nonans_noinfs' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64( %v) + %red_vp = call nnan ninf double @llvm.vp.reduce.fmax.nxv1f64(double undef, %v, undef, i32 undef) ret double %red } @@ -924,13 +1113,16 @@ declare double @llvm.vector.reduce.fmax.nxv2f64() define double @vreduce_fmax_nxv2f64( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv2f64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv2f64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; ; SIZE-LABEL: 'vreduce_fmax_nxv2f64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv2f64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call double @llvm.vector.reduce.fmax.nxv2f64( %v) + %red_vp = call double @llvm.vp.reduce.fmax.nxv2f64(double undef, %v, undef, i32 undef) ret double %red } @@ -939,13 +1131,16 @@ declare double @llvm.vector.reduce.fmax.nxv4f64() define double @vreduce_fmax_nxv4f64( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv4f64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv4f64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; ; SIZE-LABEL: 'vreduce_fmax_nxv4f64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv4f64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call double @llvm.vector.reduce.fmax.nxv4f64( %v) + %red_vp = call double @llvm.vp.reduce.fmax.nxv4f64(double undef, %v, undef, i32 undef) ret double %red } @@ -954,25 +1149,31 @@ declare double @llvm.vector.reduce.fmax.nxv16f64() define double @vreduce_fmax_nxv16f64( %v) { ; CHECK-LABEL: 'vreduce_fmax_nxv16f64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv16f64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; ; SIZE-LABEL: 'vreduce_fmax_nxv16f64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call double @llvm.vector.reduce.fmax.nxv16f64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %red ; %red = call double @llvm.vector.reduce.fmax.nxv16f64( %v) + %red_vp = call double @llvm.vp.reduce.fmax.nxv16f64(double undef, %v, undef, i32 undef) ret double %red } define float @vreduce_nsz_fadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_nsz_fadd_nxv1f32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc nsz float @llvm.vp.reduce.fadd.nxv1f32(float %s, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red ; ; SIZE-LABEL: 'vreduce_nsz_fadd_nxv1f32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call reassoc nsz float @llvm.vp.reduce.fadd.nxv1f32(float %s, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %red ; %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, %v) + %red_vp = call reassoc nsz float @llvm.vp.reduce.fadd.nxv1f32(float %s, %v, undef, i32 undef) ret float %red } diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll index 2807f7526760f..5151181c663ff 100644 --- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-int.ll @@ -9,13 +9,16 @@ declare i8 @llvm.vector.reduce.add.nxv1i8() define signext i8 @vreduce_add_nxv1i8( %v) { ; CHECK-LABEL: 'vreduce_add_nxv1i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv1i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv1i8(i8 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red ; ; SIZE-LABEL: 'vreduce_add_nxv1i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv1i8( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv1i8(i8 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.add.nxv1i8( %v) + %red_vp = call i8 @llvm.vp.reduce.add.nxv1i8(i8 undef, %v, undef, i32 undef) ret i8 %red } @@ -24,13 +27,16 @@ declare i8 @llvm.vector.reduce.umax.nxv1i8() define signext i8 @vreduce_umax_nxv1i8( %v) { ; CHECK-LABEL: 'vreduce_umax_nxv1i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv1i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv1i8(i8 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red ; ; SIZE-LABEL: 'vreduce_umax_nxv1i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv1i8( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv1i8(i8 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.umax.nxv1i8( %v) + %red_vp = call i8 @llvm.vp.reduce.umax.nxv1i8(i8 undef, %v, undef, i32 undef) ret i8 %red } @@ -39,13 +45,16 @@ declare i8 @llvm.vector.reduce.smax.nxv1i8() define signext i8 @vreduce_smax_nxv1i8( %v) { ; CHECK-LABEL: 'vreduce_smax_nxv1i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv1i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv1i8(i8 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red ; ; SIZE-LABEL: 'vreduce_smax_nxv1i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv1i8( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv1i8(i8 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.smax.nxv1i8( %v) + %red_vp = call i8 @llvm.vp.reduce.smax.nxv1i8(i8 undef, %v, undef, i32 undef) ret i8 %red } @@ -54,13 +63,16 @@ declare i8 @llvm.vector.reduce.umin.nxv1i8() define signext i8 @vreduce_umin_nxv1i8( %v) { ; CHECK-LABEL: 'vreduce_umin_nxv1i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv1i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv1i8(i8 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red ; ; SIZE-LABEL: 'vreduce_umin_nxv1i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv1i8( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv1i8(i8 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.umin.nxv1i8( %v) + %red_vp = call i8 @llvm.vp.reduce.umin.nxv1i8(i8 undef, %v, undef, i32 undef) ret i8 %red } @@ -69,13 +81,16 @@ declare i8 @llvm.vector.reduce.smin.nxv1i8() define signext i8 @vreduce_smin_nxv1i8( %v) { ; CHECK-LABEL: 'vreduce_smin_nxv1i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv1i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv1i8(i8 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red ; ; SIZE-LABEL: 'vreduce_smin_nxv1i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv1i8( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv1i8(i8 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.smin.nxv1i8( %v) + %red_vp = call i8 @llvm.vp.reduce.smin.nxv1i8(i8 undef, %v, undef, i32 undef) ret i8 %red } @@ -84,13 +99,16 @@ declare i8 @llvm.vector.reduce.and.nxv1i8() define signext i8 @vreduce_and_nxv1i8( %v) { ; CHECK-LABEL: 'vreduce_and_nxv1i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv1i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv1i8(i8 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red ; ; SIZE-LABEL: 'vreduce_and_nxv1i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv1i8( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv1i8(i8 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.and.nxv1i8( %v) + %red_vp = call i8 @llvm.vp.reduce.and.nxv1i8(i8 undef, %v, undef, i32 undef) ret i8 %red } @@ -99,13 +117,16 @@ declare i8 @llvm.vector.reduce.or.nxv1i8() define signext i8 @vreduce_or_nxv1i8( %v) { ; CHECK-LABEL: 'vreduce_or_nxv1i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv1i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv1i8(i8 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red ; ; SIZE-LABEL: 'vreduce_or_nxv1i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv1i8( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv1i8(i8 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.or.nxv1i8( %v) + %red_vp = call i8 @llvm.vp.reduce.or.nxv1i8(i8 undef, %v, undef, i32 undef) ret i8 %red } @@ -114,13 +135,16 @@ declare i8 @llvm.vector.reduce.xor.nxv1i8() define signext i8 @vreduce_xor_nxv1i8( %v) { ; CHECK-LABEL: 'vreduce_xor_nxv1i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv1i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv1i8(i8 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red ; ; SIZE-LABEL: 'vreduce_xor_nxv1i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv1i8( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv1i8(i8 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.xor.nxv1i8( %v) + %red_vp = call i8 @llvm.vp.reduce.xor.nxv1i8(i8 undef, %v, undef, i32 undef) ret i8 %red } @@ -129,13 +153,16 @@ declare i8 @llvm.vector.reduce.add.nxv2i8() define signext i8 @vreduce_add_nxv2i8( %v) { ; CHECK-LABEL: 'vreduce_add_nxv2i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv2i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv2i8(i8 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red ; ; SIZE-LABEL: 'vreduce_add_nxv2i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv2i8( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv2i8(i8 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.add.nxv2i8( %v) + %red_vp = call i8 @llvm.vp.reduce.add.nxv2i8(i8 undef, %v, undef, i32 undef) ret i8 %red } @@ -144,13 +171,16 @@ declare i8 @llvm.vector.reduce.umax.nxv2i8() define signext i8 @vreduce_umax_nxv2i8( %v) { ; CHECK-LABEL: 'vreduce_umax_nxv2i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv2i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv2i8(i8 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red ; ; SIZE-LABEL: 'vreduce_umax_nxv2i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv2i8( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv2i8(i8 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.umax.nxv2i8( %v) + %red_vp = call i8 @llvm.vp.reduce.umax.nxv2i8(i8 undef, %v, undef, i32 undef) ret i8 %red } @@ -159,13 +189,16 @@ declare i8 @llvm.vector.reduce.smax.nxv2i8() define signext i8 @vreduce_smax_nxv2i8( %v) { ; CHECK-LABEL: 'vreduce_smax_nxv2i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv2i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv2i8(i8 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red ; ; SIZE-LABEL: 'vreduce_smax_nxv2i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv2i8( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv2i8(i8 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.smax.nxv2i8( %v) + %red_vp = call i8 @llvm.vp.reduce.smax.nxv2i8(i8 undef, %v, undef, i32 undef) ret i8 %red } @@ -174,13 +207,16 @@ declare i8 @llvm.vector.reduce.umin.nxv2i8() define signext i8 @vreduce_umin_nxv2i8( %v) { ; CHECK-LABEL: 'vreduce_umin_nxv2i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv2i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv2i8(i8 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red ; ; SIZE-LABEL: 'vreduce_umin_nxv2i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv2i8( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv2i8(i8 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.umin.nxv2i8( %v) + %red_vp = call i8 @llvm.vp.reduce.umin.nxv2i8(i8 undef, %v, undef, i32 undef) ret i8 %red } @@ -189,13 +225,16 @@ declare i8 @llvm.vector.reduce.smin.nxv2i8() define signext i8 @vreduce_smin_nxv2i8( %v) { ; CHECK-LABEL: 'vreduce_smin_nxv2i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv2i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv2i8(i8 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red ; ; SIZE-LABEL: 'vreduce_smin_nxv2i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv2i8( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv2i8(i8 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.smin.nxv2i8( %v) + %red_vp = call i8 @llvm.vp.reduce.smin.nxv2i8(i8 undef, %v, undef, i32 undef) ret i8 %red } @@ -204,13 +243,16 @@ declare i8 @llvm.vector.reduce.and.nxv2i8() define signext i8 @vreduce_and_nxv2i8( %v) { ; CHECK-LABEL: 'vreduce_and_nxv2i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv2i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv2i8(i8 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red ; ; SIZE-LABEL: 'vreduce_and_nxv2i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv2i8( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv2i8(i8 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.and.nxv2i8( %v) + %red_vp = call i8 @llvm.vp.reduce.and.nxv2i8(i8 undef, %v, undef, i32 undef) ret i8 %red } @@ -219,13 +261,16 @@ declare i8 @llvm.vector.reduce.or.nxv2i8() define signext i8 @vreduce_or_nxv2i8( %v) { ; CHECK-LABEL: 'vreduce_or_nxv2i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv2i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv2i8(i8 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red ; ; SIZE-LABEL: 'vreduce_or_nxv2i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv2i8( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv2i8(i8 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.or.nxv2i8( %v) + %red_vp = call i8 @llvm.vp.reduce.or.nxv2i8(i8 undef, %v, undef, i32 undef) ret i8 %red } @@ -234,13 +279,16 @@ declare i8 @llvm.vector.reduce.xor.nxv2i8() define signext i8 @vreduce_xor_nxv2i8( %v) { ; CHECK-LABEL: 'vreduce_xor_nxv2i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv2i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv2i8(i8 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red ; ; SIZE-LABEL: 'vreduce_xor_nxv2i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv2i8( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv2i8(i8 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.xor.nxv2i8( %v) + %red_vp = call i8 @llvm.vp.reduce.xor.nxv2i8(i8 undef, %v, undef, i32 undef) ret i8 %red } @@ -249,13 +297,16 @@ declare i8 @llvm.vector.reduce.add.nxv4i8() define signext i8 @vreduce_add_nxv4i8( %v) { ; CHECK-LABEL: 'vreduce_add_nxv4i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv4i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red ; ; SIZE-LABEL: 'vreduce_add_nxv4i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.add.nxv4i8( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.add.nxv4i8( %v) + %red_vp = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, %v, undef, i32 undef) ret i8 %red } @@ -264,13 +315,16 @@ declare i8 @llvm.vector.reduce.umax.nxv4i8() define signext i8 @vreduce_umax_nxv4i8( %v) { ; CHECK-LABEL: 'vreduce_umax_nxv4i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv4i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv4i8(i8 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red ; ; SIZE-LABEL: 'vreduce_umax_nxv4i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umax.nxv4i8( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umax.nxv4i8(i8 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.umax.nxv4i8( %v) + %red_vp = call i8 @llvm.vp.reduce.umax.nxv4i8(i8 undef, %v, undef, i32 undef) ret i8 %red } @@ -279,13 +333,16 @@ declare i8 @llvm.vector.reduce.smax.nxv4i8() define signext i8 @vreduce_smax_nxv4i8( %v) { ; CHECK-LABEL: 'vreduce_smax_nxv4i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv4i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv4i8(i8 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red ; ; SIZE-LABEL: 'vreduce_smax_nxv4i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smax.nxv4i8( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smax.nxv4i8(i8 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.smax.nxv4i8( %v) + %red_vp = call i8 @llvm.vp.reduce.smax.nxv4i8(i8 undef, %v, undef, i32 undef) ret i8 %red } @@ -294,13 +351,16 @@ declare i8 @llvm.vector.reduce.umin.nxv4i8() define signext i8 @vreduce_umin_nxv4i8( %v) { ; CHECK-LABEL: 'vreduce_umin_nxv4i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv4i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv4i8(i8 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red ; ; SIZE-LABEL: 'vreduce_umin_nxv4i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.umin.nxv4i8( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.umin.nxv4i8(i8 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.umin.nxv4i8( %v) + %red_vp = call i8 @llvm.vp.reduce.umin.nxv4i8(i8 undef, %v, undef, i32 undef) ret i8 %red } @@ -309,13 +369,16 @@ declare i8 @llvm.vector.reduce.smin.nxv4i8() define signext i8 @vreduce_smin_nxv4i8( %v) { ; CHECK-LABEL: 'vreduce_smin_nxv4i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv4i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv4i8(i8 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red ; ; SIZE-LABEL: 'vreduce_smin_nxv4i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.smin.nxv4i8( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.smin.nxv4i8(i8 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.smin.nxv4i8( %v) + %red_vp = call i8 @llvm.vp.reduce.smin.nxv4i8(i8 undef, %v, undef, i32 undef) ret i8 %red } @@ -324,13 +387,16 @@ declare i8 @llvm.vector.reduce.and.nxv4i8() define signext i8 @vreduce_and_nxv4i8( %v) { ; CHECK-LABEL: 'vreduce_and_nxv4i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv4i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv4i8(i8 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red ; ; SIZE-LABEL: 'vreduce_and_nxv4i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.and.nxv4i8( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.and.nxv4i8(i8 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.and.nxv4i8( %v) + %red_vp = call i8 @llvm.vp.reduce.and.nxv4i8(i8 undef, %v, undef, i32 undef) ret i8 %red } @@ -339,13 +405,16 @@ declare i8 @llvm.vector.reduce.or.nxv4i8() define signext i8 @vreduce_or_nxv4i8( %v) { ; CHECK-LABEL: 'vreduce_or_nxv4i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv4i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv4i8(i8 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red ; ; SIZE-LABEL: 'vreduce_or_nxv4i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.or.nxv4i8( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.or.nxv4i8(i8 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.or.nxv4i8( %v) + %red_vp = call i8 @llvm.vp.reduce.or.nxv4i8(i8 undef, %v, undef, i32 undef) ret i8 %red } @@ -354,13 +423,16 @@ declare i8 @llvm.vector.reduce.xor.nxv4i8() define signext i8 @vreduce_xor_nxv4i8( %v) { ; CHECK-LABEL: 'vreduce_xor_nxv4i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv4i8( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv4i8(i8 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %red ; ; SIZE-LABEL: 'vreduce_xor_nxv4i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i8 @llvm.vector.reduce.xor.nxv4i8( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i8 @llvm.vp.reduce.xor.nxv4i8(i8 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %red ; %red = call i8 @llvm.vector.reduce.xor.nxv4i8( %v) + %red_vp = call i8 @llvm.vp.reduce.xor.nxv4i8(i8 undef, %v, undef, i32 undef) ret i8 %red } @@ -369,13 +441,16 @@ declare i16 @llvm.vector.reduce.add.nxv1i16() define signext i16 @vreduce_add_nxv1i16( %v) { ; CHECK-LABEL: 'vreduce_add_nxv1i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vreduce_add_nxv1i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.add.nxv1i16( %v) + %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, %v, undef, i32 undef) ret i16 %red } @@ -383,15 +458,18 @@ define signext i16 @vwreduce_add_nxv1i8( %v) { ; CHECK-LABEL: 'vwreduce_add_nxv1i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vwreduce_add_nxv1i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16( %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %e = sext %v to %red = call i16 @llvm.vector.reduce.add.nxv1i16( %e) + %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, %e, undef, i32 undef) ret i16 %red } @@ -399,15 +477,18 @@ define signext i16 @vwreduce_uadd_nxv1i8( %v) { ; CHECK-LABEL: 'vwreduce_uadd_nxv1i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vwreduce_uadd_nxv1i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv1i16( %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %e = sext %v to %red = call i16 @llvm.vector.reduce.add.nxv1i16( %e) + %red_vp = call i16 @llvm.vp.reduce.add.nxv1i16(i16 undef, %e, undef, i32 undef) ret i16 %red } @@ -416,13 +497,16 @@ declare i16 @llvm.vector.reduce.umax.nxv1i16() define signext i16 @vreduce_umax_nxv1i16( %v) { ; CHECK-LABEL: 'vreduce_umax_nxv1i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv1i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vreduce_umax_nxv1i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv1i16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.umax.nxv1i16( %v) + %red_vp = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 undef, %v, undef, i32 undef) ret i16 %red } @@ -431,13 +515,16 @@ declare i16 @llvm.vector.reduce.smax.nxv1i16() define signext i16 @vreduce_smax_nxv1i16( %v) { ; CHECK-LABEL: 'vreduce_smax_nxv1i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv1i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv1i16(i16 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vreduce_smax_nxv1i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv1i16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv1i16(i16 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.smax.nxv1i16( %v) + %red_vp = call i16 @llvm.vp.reduce.smax.nxv1i16(i16 undef, %v, undef, i32 undef) ret i16 %red } @@ -446,13 +533,16 @@ declare i16 @llvm.vector.reduce.umin.nxv1i16() define signext i16 @vreduce_umin_nxv1i16( %v) { ; CHECK-LABEL: 'vreduce_umin_nxv1i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv1i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vreduce_umin_nxv1i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv1i16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.umin.nxv1i16( %v) + %red_vp = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 undef, %v, undef, i32 undef) ret i16 %red } @@ -461,13 +551,16 @@ declare i16 @llvm.vector.reduce.smin.nxv1i16() define signext i16 @vreduce_smin_nxv1i16( %v) { ; CHECK-LABEL: 'vreduce_smin_nxv1i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv1i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv1i16(i16 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vreduce_smin_nxv1i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv1i16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv1i16(i16 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.smin.nxv1i16( %v) + %red_vp = call i16 @llvm.vp.reduce.smin.nxv1i16(i16 undef, %v, undef, i32 undef) ret i16 %red } @@ -476,13 +569,16 @@ declare i16 @llvm.vector.reduce.and.nxv1i16() define signext i16 @vreduce_and_nxv1i16( %v) { ; CHECK-LABEL: 'vreduce_and_nxv1i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv1i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv1i16(i16 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vreduce_and_nxv1i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv1i16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv1i16(i16 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.and.nxv1i16( %v) + %red_vp = call i16 @llvm.vp.reduce.and.nxv1i16(i16 undef, %v, undef, i32 undef) ret i16 %red } @@ -491,13 +587,16 @@ declare i16 @llvm.vector.reduce.or.nxv1i16() define signext i16 @vreduce_or_nxv1i16( %v) { ; CHECK-LABEL: 'vreduce_or_nxv1i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv1i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv1i16(i16 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vreduce_or_nxv1i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv1i16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv1i16(i16 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.or.nxv1i16( %v) + %red_vp = call i16 @llvm.vp.reduce.or.nxv1i16(i16 undef, %v, undef, i32 undef) ret i16 %red } @@ -506,13 +605,16 @@ declare i16 @llvm.vector.reduce.xor.nxv1i16() define signext i16 @vreduce_xor_nxv1i16( %v) { ; CHECK-LABEL: 'vreduce_xor_nxv1i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv1i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv1i16(i16 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vreduce_xor_nxv1i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv1i16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv1i16(i16 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.xor.nxv1i16( %v) + %red_vp = call i16 @llvm.vp.reduce.xor.nxv1i16(i16 undef, %v, undef, i32 undef) ret i16 %red } @@ -521,13 +623,16 @@ declare i16 @llvm.vector.reduce.add.nxv2i16() define signext i16 @vreduce_add_nxv2i16( %v) { ; CHECK-LABEL: 'vreduce_add_nxv2i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vreduce_add_nxv2i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.add.nxv2i16( %v) + %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, %v, undef, i32 undef) ret i16 %red } @@ -535,15 +640,18 @@ define signext i16 @vwreduce_add_nxv2i8( %v) { ; CHECK-LABEL: 'vwreduce_add_nxv2i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vwreduce_add_nxv2i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16( %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %e = sext %v to %red = call i16 @llvm.vector.reduce.add.nxv2i16( %e) + %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, %e, undef, i32 undef) ret i16 %red } @@ -551,15 +659,18 @@ define signext i16 @vwreduce_uadd_nxv2i8( %v) { ; CHECK-LABEL: 'vwreduce_uadd_nxv2i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vwreduce_uadd_nxv2i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv2i16( %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %e = sext %v to %red = call i16 @llvm.vector.reduce.add.nxv2i16( %e) + %red_vp = call i16 @llvm.vp.reduce.add.nxv2i16(i16 undef, %e, undef, i32 undef) ret i16 %red } @@ -568,13 +679,16 @@ declare i16 @llvm.vector.reduce.umax.nxv2i16() define signext i16 @vreduce_umax_nxv2i16( %v) { ; CHECK-LABEL: 'vreduce_umax_nxv2i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv2i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vreduce_umax_nxv2i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv2i16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.umax.nxv2i16( %v) + %red_vp = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 undef, %v, undef, i32 undef) ret i16 %red } @@ -583,13 +697,16 @@ declare i16 @llvm.vector.reduce.smax.nxv2i16() define signext i16 @vreduce_smax_nxv2i16( %v) { ; CHECK-LABEL: 'vreduce_smax_nxv2i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv2i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv2i16(i16 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vreduce_smax_nxv2i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv2i16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv2i16(i16 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.smax.nxv2i16( %v) + %red_vp = call i16 @llvm.vp.reduce.smax.nxv2i16(i16 undef, %v, undef, i32 undef) ret i16 %red } @@ -598,13 +715,16 @@ declare i16 @llvm.vector.reduce.umin.nxv2i16() define signext i16 @vreduce_umin_nxv2i16( %v) { ; CHECK-LABEL: 'vreduce_umin_nxv2i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv2i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vreduce_umin_nxv2i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv2i16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.umin.nxv2i16( %v) + %red_vp = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 undef, %v, undef, i32 undef) ret i16 %red } @@ -613,13 +733,16 @@ declare i16 @llvm.vector.reduce.smin.nxv2i16() define signext i16 @vreduce_smin_nxv2i16( %v) { ; CHECK-LABEL: 'vreduce_smin_nxv2i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv2i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv2i16(i16 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vreduce_smin_nxv2i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv2i16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv2i16(i16 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.smin.nxv2i16( %v) + %red_vp = call i16 @llvm.vp.reduce.smin.nxv2i16(i16 undef, %v, undef, i32 undef) ret i16 %red } @@ -628,13 +751,16 @@ declare i16 @llvm.vector.reduce.and.nxv2i16() define signext i16 @vreduce_and_nxv2i16( %v) { ; CHECK-LABEL: 'vreduce_and_nxv2i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv2i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv2i16(i16 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vreduce_and_nxv2i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv2i16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv2i16(i16 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.and.nxv2i16( %v) + %red_vp = call i16 @llvm.vp.reduce.and.nxv2i16(i16 undef, %v, undef, i32 undef) ret i16 %red } @@ -643,13 +769,16 @@ declare i16 @llvm.vector.reduce.or.nxv2i16() define signext i16 @vreduce_or_nxv2i16( %v) { ; CHECK-LABEL: 'vreduce_or_nxv2i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv2i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv2i16(i16 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vreduce_or_nxv2i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv2i16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv2i16(i16 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.or.nxv2i16( %v) + %red_vp = call i16 @llvm.vp.reduce.or.nxv2i16(i16 undef, %v, undef, i32 undef) ret i16 %red } @@ -658,13 +787,16 @@ declare i16 @llvm.vector.reduce.xor.nxv2i16() define signext i16 @vreduce_xor_nxv2i16( %v) { ; CHECK-LABEL: 'vreduce_xor_nxv2i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv2i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv2i16(i16 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vreduce_xor_nxv2i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv2i16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv2i16(i16 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.xor.nxv2i16( %v) + %red_vp = call i16 @llvm.vp.reduce.xor.nxv2i16(i16 undef, %v, undef, i32 undef) ret i16 %red } @@ -673,13 +805,16 @@ declare i16 @llvm.vector.reduce.add.nxv4i16() define signext i16 @vreduce_add_nxv4i16( %v) { ; CHECK-LABEL: 'vreduce_add_nxv4i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vreduce_add_nxv4i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.add.nxv4i16( %v) + %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, %v, undef, i32 undef) ret i16 %red } @@ -687,15 +822,18 @@ define signext i16 @vwreduce_add_nxv4i8( %v) { ; CHECK-LABEL: 'vwreduce_add_nxv4i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vwreduce_add_nxv4i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16( %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %e = sext %v to %red = call i16 @llvm.vector.reduce.add.nxv4i16( %e) + %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, %e, undef, i32 undef) ret i16 %red } @@ -703,15 +841,18 @@ define signext i16 @vwreduce_uadd_nxv4i8( %v) { ; CHECK-LABEL: 'vwreduce_uadd_nxv4i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vwreduce_uadd_nxv4i8' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.add.nxv4i16( %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %e = sext %v to %red = call i16 @llvm.vector.reduce.add.nxv4i16( %e) + %red_vp = call i16 @llvm.vp.reduce.add.nxv4i16(i16 undef, %e, undef, i32 undef) ret i16 %red } @@ -720,13 +861,16 @@ declare i16 @llvm.vector.reduce.umax.nxv4i16() define signext i16 @vreduce_umax_nxv4i16( %v) { ; CHECK-LABEL: 'vreduce_umax_nxv4i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv4i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vreduce_umax_nxv4i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umax.nxv4i16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.umax.nxv4i16( %v) + %red_vp = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 undef, %v, undef, i32 undef) ret i16 %red } @@ -735,13 +879,16 @@ declare i16 @llvm.vector.reduce.smax.nxv4i16() define signext i16 @vreduce_smax_nxv4i16( %v) { ; CHECK-LABEL: 'vreduce_smax_nxv4i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv4i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv4i16(i16 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vreduce_smax_nxv4i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smax.nxv4i16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smax.nxv4i16(i16 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.smax.nxv4i16( %v) + %red_vp = call i16 @llvm.vp.reduce.smax.nxv4i16(i16 undef, %v, undef, i32 undef) ret i16 %red } @@ -750,13 +897,16 @@ declare i16 @llvm.vector.reduce.umin.nxv4i16() define signext i16 @vreduce_umin_nxv4i16( %v) { ; CHECK-LABEL: 'vreduce_umin_nxv4i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv4i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vreduce_umin_nxv4i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.umin.nxv4i16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.umin.nxv4i16( %v) + %red_vp = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 undef, %v, undef, i32 undef) ret i16 %red } @@ -765,13 +915,16 @@ declare i16 @llvm.vector.reduce.smin.nxv4i16() define signext i16 @vreduce_smin_nxv4i16( %v) { ; CHECK-LABEL: 'vreduce_smin_nxv4i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv4i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv4i16(i16 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vreduce_smin_nxv4i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.smin.nxv4i16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.smin.nxv4i16(i16 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.smin.nxv4i16( %v) + %red_vp = call i16 @llvm.vp.reduce.smin.nxv4i16(i16 undef, %v, undef, i32 undef) ret i16 %red } @@ -780,13 +933,16 @@ declare i16 @llvm.vector.reduce.and.nxv4i16() define signext i16 @vreduce_and_nxv4i16( %v) { ; CHECK-LABEL: 'vreduce_and_nxv4i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv4i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv4i16(i16 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vreduce_and_nxv4i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.and.nxv4i16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.and.nxv4i16(i16 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.and.nxv4i16( %v) + %red_vp = call i16 @llvm.vp.reduce.and.nxv4i16(i16 undef, %v, undef, i32 undef) ret i16 %red } @@ -795,13 +951,16 @@ declare i16 @llvm.vector.reduce.or.nxv4i16() define signext i16 @vreduce_or_nxv4i16( %v) { ; CHECK-LABEL: 'vreduce_or_nxv4i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv4i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv4i16(i16 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vreduce_or_nxv4i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.or.nxv4i16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.or.nxv4i16(i16 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.or.nxv4i16( %v) + %red_vp = call i16 @llvm.vp.reduce.or.nxv4i16(i16 undef, %v, undef, i32 undef) ret i16 %red } @@ -810,13 +969,16 @@ declare i16 @llvm.vector.reduce.xor.nxv4i16() define signext i16 @vreduce_xor_nxv4i16( %v) { ; CHECK-LABEL: 'vreduce_xor_nxv4i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv4i16( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv4i16(i16 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %red ; ; SIZE-LABEL: 'vreduce_xor_nxv4i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i16 @llvm.vector.reduce.xor.nxv4i16( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i16 @llvm.vp.reduce.xor.nxv4i16(i16 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %red ; %red = call i16 @llvm.vector.reduce.xor.nxv4i16( %v) + %red_vp = call i16 @llvm.vp.reduce.xor.nxv4i16(i16 undef, %v, undef, i32 undef) ret i16 %red } @@ -825,13 +987,16 @@ declare i32 @llvm.vector.reduce.add.nxv1i32() define signext i32 @vreduce_add_nxv1i32( %v) { ; CHECK-LABEL: 'vreduce_add_nxv1i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vreduce_add_nxv1i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.add.nxv1i32( %v) + %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, %v, undef, i32 undef) ret i32 %red } @@ -839,15 +1004,18 @@ define signext i32 @vwreduce_add_nxv1i16( %v) { ; CHECK-LABEL: 'vwreduce_add_nxv1i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vwreduce_add_nxv1i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32( %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %e = sext %v to %red = call i32 @llvm.vector.reduce.add.nxv1i32( %e) + %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, %e, undef, i32 undef) ret i32 %red } @@ -855,15 +1023,18 @@ define signext i32 @vwreduce_uadd_nxv1i16( %v) { ; CHECK-LABEL: 'vwreduce_uadd_nxv1i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vwreduce_uadd_nxv1i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv1i32( %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %e = zext %v to %red = call i32 @llvm.vector.reduce.add.nxv1i32( %e) + %red_vp = call i32 @llvm.vp.reduce.add.nxv1i32(i32 undef, %e, undef, i32 undef) ret i32 %red } @@ -872,13 +1043,16 @@ declare i32 @llvm.vector.reduce.umax.nxv1i32() define signext i32 @vreduce_umax_nxv1i32( %v) { ; CHECK-LABEL: 'vreduce_umax_nxv1i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv1i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv1i32(i32 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vreduce_umax_nxv1i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv1i32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv1i32(i32 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.umax.nxv1i32( %v) + %red_vp = call i32 @llvm.vp.reduce.umax.nxv1i32(i32 undef, %v, undef, i32 undef) ret i32 %red } @@ -887,13 +1061,16 @@ declare i32 @llvm.vector.reduce.smax.nxv1i32() define signext i32 @vreduce_smax_nxv1i32( %v) { ; CHECK-LABEL: 'vreduce_smax_nxv1i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv1i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv1i32(i32 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vreduce_smax_nxv1i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv1i32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv1i32(i32 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.smax.nxv1i32( %v) + %red_vp = call i32 @llvm.vp.reduce.smax.nxv1i32(i32 undef, %v, undef, i32 undef) ret i32 %red } @@ -902,13 +1079,16 @@ declare i32 @llvm.vector.reduce.umin.nxv1i32() define signext i32 @vreduce_umin_nxv1i32( %v) { ; CHECK-LABEL: 'vreduce_umin_nxv1i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv1i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv1i32(i32 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vreduce_umin_nxv1i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv1i32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv1i32(i32 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.umin.nxv1i32( %v) + %red_vp = call i32 @llvm.vp.reduce.umin.nxv1i32(i32 undef, %v, undef, i32 undef) ret i32 %red } @@ -917,13 +1097,16 @@ declare i32 @llvm.vector.reduce.smin.nxv1i32() define signext i32 @vreduce_smin_nxv1i32( %v) { ; CHECK-LABEL: 'vreduce_smin_nxv1i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv1i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv1i32(i32 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vreduce_smin_nxv1i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv1i32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv1i32(i32 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.smin.nxv1i32( %v) + %red_vp = call i32 @llvm.vp.reduce.smin.nxv1i32(i32 undef, %v, undef, i32 undef) ret i32 %red } @@ -932,13 +1115,16 @@ declare i32 @llvm.vector.reduce.and.nxv1i32() define signext i32 @vreduce_and_nxv1i32( %v) { ; CHECK-LABEL: 'vreduce_and_nxv1i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv1i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv1i32(i32 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vreduce_and_nxv1i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv1i32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv1i32(i32 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.and.nxv1i32( %v) + %red_vp = call i32 @llvm.vp.reduce.and.nxv1i32(i32 undef, %v, undef, i32 undef) ret i32 %red } @@ -947,13 +1133,16 @@ declare i32 @llvm.vector.reduce.or.nxv1i32() define signext i32 @vreduce_or_nxv1i32( %v) { ; CHECK-LABEL: 'vreduce_or_nxv1i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv1i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv1i32(i32 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vreduce_or_nxv1i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv1i32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv1i32(i32 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.or.nxv1i32( %v) + %red_vp = call i32 @llvm.vp.reduce.or.nxv1i32(i32 undef, %v, undef, i32 undef) ret i32 %red } @@ -962,13 +1151,16 @@ declare i32 @llvm.vector.reduce.xor.nxv1i32() define signext i32 @vreduce_xor_nxv1i32( %v) { ; CHECK-LABEL: 'vreduce_xor_nxv1i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv1i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv1i32(i32 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vreduce_xor_nxv1i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv1i32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv1i32(i32 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.xor.nxv1i32( %v) + %red_vp = call i32 @llvm.vp.reduce.xor.nxv1i32(i32 undef, %v, undef, i32 undef) ret i32 %red } @@ -977,13 +1169,16 @@ declare i32 @llvm.vector.reduce.add.nxv2i32() define signext i32 @vreduce_add_nxv2i32( %v) { ; CHECK-LABEL: 'vreduce_add_nxv2i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vreduce_add_nxv2i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.add.nxv2i32( %v) + %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, %v, undef, i32 undef) ret i32 %red } @@ -991,15 +1186,18 @@ define signext i32 @vwreduce_add_nxv2i16( %v) { ; CHECK-LABEL: 'vwreduce_add_nxv2i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vwreduce_add_nxv2i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32( %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %e = sext %v to %red = call i32 @llvm.vector.reduce.add.nxv2i32( %e) + %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, %e, undef, i32 undef) ret i32 %red } @@ -1007,15 +1205,18 @@ define signext i32 @vwreduce_uadd_nxv2i16( %v) { ; CHECK-LABEL: 'vwreduce_uadd_nxv2i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vwreduce_uadd_nxv2i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv2i32( %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %e = zext %v to %red = call i32 @llvm.vector.reduce.add.nxv2i32( %e) + %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, %e, undef, i32 undef) ret i32 %red } @@ -1024,13 +1225,16 @@ declare i32 @llvm.vector.reduce.umax.nxv2i32() define signext i32 @vreduce_umax_nxv2i32( %v) { ; CHECK-LABEL: 'vreduce_umax_nxv2i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv2i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv2i32(i32 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vreduce_umax_nxv2i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv2i32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv2i32(i32 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.umax.nxv2i32( %v) + %red_vp = call i32 @llvm.vp.reduce.umax.nxv2i32(i32 undef, %v, undef, i32 undef) ret i32 %red } @@ -1039,13 +1243,16 @@ declare i32 @llvm.vector.reduce.smax.nxv2i32() define signext i32 @vreduce_smax_nxv2i32( %v) { ; CHECK-LABEL: 'vreduce_smax_nxv2i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv2i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vreduce_smax_nxv2i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv2i32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.smax.nxv2i32( %v) + %red_vp = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 undef, %v, undef, i32 undef) ret i32 %red } @@ -1054,13 +1261,16 @@ declare i32 @llvm.vector.reduce.umin.nxv2i32() define signext i32 @vreduce_umin_nxv2i32( %v) { ; CHECK-LABEL: 'vreduce_umin_nxv2i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv2i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv2i32(i32 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vreduce_umin_nxv2i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv2i32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv2i32(i32 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.umin.nxv2i32( %v) + %red_vp = call i32 @llvm.vp.reduce.umin.nxv2i32(i32 undef, %v, undef, i32 undef) ret i32 %red } @@ -1069,13 +1279,16 @@ declare i32 @llvm.vector.reduce.smin.nxv2i32() define signext i32 @vreduce_smin_nxv2i32( %v) { ; CHECK-LABEL: 'vreduce_smin_nxv2i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv2i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv2i32(i32 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vreduce_smin_nxv2i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv2i32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv2i32(i32 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.smin.nxv2i32( %v) + %red_vp = call i32 @llvm.vp.reduce.smin.nxv2i32(i32 undef, %v, undef, i32 undef) ret i32 %red } @@ -1084,13 +1297,16 @@ declare i32 @llvm.vector.reduce.and.nxv2i32() define signext i32 @vreduce_and_nxv2i32( %v) { ; CHECK-LABEL: 'vreduce_and_nxv2i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv2i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vreduce_and_nxv2i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv2i32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.and.nxv2i32( %v) + %red_vp = call i32 @llvm.vp.reduce.and.nxv2i32(i32 undef, %v, undef, i32 undef) ret i32 %red } @@ -1099,13 +1315,16 @@ declare i32 @llvm.vector.reduce.or.nxv2i32() define signext i32 @vreduce_or_nxv2i32( %v) { ; CHECK-LABEL: 'vreduce_or_nxv2i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv2i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv2i32(i32 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vreduce_or_nxv2i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv2i32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv2i32(i32 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.or.nxv2i32( %v) + %red_vp = call i32 @llvm.vp.reduce.or.nxv2i32(i32 undef, %v, undef, i32 undef) ret i32 %red } @@ -1114,13 +1333,16 @@ declare i32 @llvm.vector.reduce.xor.nxv2i32() define signext i32 @vreduce_xor_nxv2i32( %v) { ; CHECK-LABEL: 'vreduce_xor_nxv2i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv2i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv2i32(i32 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vreduce_xor_nxv2i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv2i32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv2i32(i32 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.xor.nxv2i32( %v) + %red_vp = call i32 @llvm.vp.reduce.xor.nxv2i32(i32 undef, %v, undef, i32 undef) ret i32 %red } @@ -1129,13 +1351,16 @@ declare i32 @llvm.vector.reduce.add.nxv4i32() define signext i32 @vreduce_add_nxv4i32( %v) { ; CHECK-LABEL: 'vreduce_add_nxv4i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vreduce_add_nxv4i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.add.nxv4i32( %v) + %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, %v, undef, i32 undef) ret i32 %red } @@ -1143,15 +1368,18 @@ define signext i32 @vwreduce_add_nxv4i16( %v) { ; CHECK-LABEL: 'vwreduce_add_nxv4i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = sext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vwreduce_add_nxv4i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32( %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %e = sext %v to %red = call i32 @llvm.vector.reduce.add.nxv4i32( %e) + %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, %e, undef, i32 undef) ret i32 %red } @@ -1159,15 +1387,18 @@ define signext i32 @vwreduce_uadd_nxv4i16( %v) { ; CHECK-LABEL: 'vwreduce_uadd_nxv4i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = zext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vwreduce_uadd_nxv4i16' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.add.nxv4i32( %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %e = zext %v to %red = call i32 @llvm.vector.reduce.add.nxv4i32( %e) + %red_vp = call i32 @llvm.vp.reduce.add.nxv4i32(i32 undef, %e, undef, i32 undef) ret i32 %red } @@ -1176,13 +1407,16 @@ declare i32 @llvm.vector.reduce.umax.nxv4i32() define signext i32 @vreduce_umax_nxv4i32( %v) { ; CHECK-LABEL: 'vreduce_umax_nxv4i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv4i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vreduce_umax_nxv4i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umax.nxv4i32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.umax.nxv4i32( %v) + %red_vp = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 undef, %v, undef, i32 undef) ret i32 %red } @@ -1191,13 +1425,16 @@ declare i32 @llvm.vector.reduce.smax.nxv4i32() define signext i32 @vreduce_smax_nxv4i32( %v) { ; CHECK-LABEL: 'vreduce_smax_nxv4i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv4i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vreduce_smax_nxv4i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smax.nxv4i32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.smax.nxv4i32( %v) + %red_vp = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 undef, %v, undef, i32 undef) ret i32 %red } @@ -1206,13 +1443,16 @@ declare i32 @llvm.vector.reduce.umin.nxv4i32() define signext i32 @vreduce_umin_nxv4i32( %v) { ; CHECK-LABEL: 'vreduce_umin_nxv4i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv4i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vreduce_umin_nxv4i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.umin.nxv4i32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.umin.nxv4i32( %v) + %red_vp = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 undef, %v, undef, i32 undef) ret i32 %red } @@ -1221,13 +1461,16 @@ declare i32 @llvm.vector.reduce.smin.nxv4i32() define signext i32 @vreduce_smin_nxv4i32( %v) { ; CHECK-LABEL: 'vreduce_smin_nxv4i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv4i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vreduce_smin_nxv4i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.smin.nxv4i32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.smin.nxv4i32( %v) + %red_vp = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 undef, %v, undef, i32 undef) ret i32 %red } @@ -1236,13 +1479,16 @@ declare i32 @llvm.vector.reduce.and.nxv4i32() define signext i32 @vreduce_and_nxv4i32( %v) { ; CHECK-LABEL: 'vreduce_and_nxv4i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv4i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv4i32(i32 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vreduce_and_nxv4i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.and.nxv4i32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.and.nxv4i32(i32 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.and.nxv4i32( %v) + %red_vp = call i32 @llvm.vp.reduce.and.nxv4i32(i32 undef, %v, undef, i32 undef) ret i32 %red } @@ -1251,13 +1497,16 @@ declare i32 @llvm.vector.reduce.or.nxv4i32() define signext i32 @vreduce_or_nxv4i32( %v) { ; CHECK-LABEL: 'vreduce_or_nxv4i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv4i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv4i32(i32 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vreduce_or_nxv4i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.or.nxv4i32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.or.nxv4i32(i32 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.or.nxv4i32( %v) + %red_vp = call i32 @llvm.vp.reduce.or.nxv4i32(i32 undef, %v, undef, i32 undef) ret i32 %red } @@ -1266,13 +1515,16 @@ declare i32 @llvm.vector.reduce.xor.nxv4i32() define signext i32 @vreduce_xor_nxv4i32( %v) { ; CHECK-LABEL: 'vreduce_xor_nxv4i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv4i32( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %red ; ; SIZE-LABEL: 'vreduce_xor_nxv4i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i32 @llvm.vector.reduce.xor.nxv4i32( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %red ; %red = call i32 @llvm.vector.reduce.xor.nxv4i32( %v) + %red_vp = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 undef, %v, undef, i32 undef) ret i32 %red } @@ -1281,13 +1533,16 @@ declare i64 @llvm.vector.reduce.add.nxv1i64() define i64 @vreduce_add_nxv1i64( %v) { ; CHECK-LABEL: 'vreduce_add_nxv1i64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vreduce_add_nxv1i64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.add.nxv1i64( %v) + %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, %v, undef, i32 undef) ret i64 %red } @@ -1295,15 +1550,18 @@ define i64 @vwreduce_add_nxv1i32( %v) { ; CHECK-LABEL: 'vwreduce_add_nxv1i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vwreduce_add_nxv1i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64( %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %e = sext %v to %red = call i64 @llvm.vector.reduce.add.nxv1i64( %e) + %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, %e, undef, i32 undef) ret i64 %red } @@ -1311,15 +1569,18 @@ define i64 @vwreduce_uadd_nxv1i32( %v) { ; CHECK-LABEL: 'vwreduce_uadd_nxv1i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vwreduce_uadd_nxv1i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv1i64( %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %e = zext %v to %red = call i64 @llvm.vector.reduce.add.nxv1i64( %e) + %red_vp = call i64 @llvm.vp.reduce.add.nxv1i64(i64 undef, %e, undef, i32 undef) ret i64 %red } @@ -1328,13 +1589,16 @@ declare i64 @llvm.vector.reduce.umax.nxv1i64() define i64 @vreduce_umax_nxv1i64( %v) { ; CHECK-LABEL: 'vreduce_umax_nxv1i64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv1i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv1i64(i64 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vreduce_umax_nxv1i64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv1i64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv1i64(i64 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.umax.nxv1i64( %v) + %red_vp = call i64 @llvm.vp.reduce.umax.nxv1i64(i64 undef, %v, undef, i32 undef) ret i64 %red } @@ -1343,13 +1607,16 @@ declare i64 @llvm.vector.reduce.smax.nxv1i64() define i64 @vreduce_smax_nxv1i64( %v) { ; CHECK-LABEL: 'vreduce_smax_nxv1i64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv1i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv1i64(i64 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vreduce_smax_nxv1i64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv1i64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv1i64(i64 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.smax.nxv1i64( %v) + %red_vp = call i64 @llvm.vp.reduce.smax.nxv1i64(i64 undef, %v, undef, i32 undef) ret i64 %red } @@ -1358,13 +1625,16 @@ declare i64 @llvm.vector.reduce.umin.nxv1i64() define i64 @vreduce_umin_nxv1i64( %v) { ; CHECK-LABEL: 'vreduce_umin_nxv1i64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv1i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv1i64(i64 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vreduce_umin_nxv1i64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv1i64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv1i64(i64 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.umin.nxv1i64( %v) + %red_vp = call i64 @llvm.vp.reduce.umin.nxv1i64(i64 undef, %v, undef, i32 undef) ret i64 %red } @@ -1373,13 +1643,16 @@ declare i64 @llvm.vector.reduce.smin.nxv1i64() define i64 @vreduce_smin_nxv1i64( %v) { ; CHECK-LABEL: 'vreduce_smin_nxv1i64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv1i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv1i64(i64 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vreduce_smin_nxv1i64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv1i64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv1i64(i64 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.smin.nxv1i64( %v) + %red_vp = call i64 @llvm.vp.reduce.smin.nxv1i64(i64 undef, %v, undef, i32 undef) ret i64 %red } @@ -1388,13 +1661,16 @@ declare i64 @llvm.vector.reduce.and.nxv1i64() define i64 @vreduce_and_nxv1i64( %v) { ; CHECK-LABEL: 'vreduce_and_nxv1i64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv1i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv1i64(i64 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vreduce_and_nxv1i64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv1i64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv1i64(i64 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.and.nxv1i64( %v) + %red_vp = call i64 @llvm.vp.reduce.and.nxv1i64(i64 undef, %v, undef, i32 undef) ret i64 %red } @@ -1403,13 +1679,16 @@ declare i64 @llvm.vector.reduce.or.nxv1i64() define i64 @vreduce_or_nxv1i64( %v) { ; CHECK-LABEL: 'vreduce_or_nxv1i64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv1i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv1i64(i64 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vreduce_or_nxv1i64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv1i64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv1i64(i64 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.or.nxv1i64( %v) + %red_vp = call i64 @llvm.vp.reduce.or.nxv1i64(i64 undef, %v, undef, i32 undef) ret i64 %red } @@ -1418,13 +1697,16 @@ declare i64 @llvm.vector.reduce.xor.nxv1i64() define i64 @vreduce_xor_nxv1i64( %v) { ; CHECK-LABEL: 'vreduce_xor_nxv1i64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv1i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv1i64(i64 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vreduce_xor_nxv1i64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv1i64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv1i64(i64 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.xor.nxv1i64( %v) + %red_vp = call i64 @llvm.vp.reduce.xor.nxv1i64(i64 undef, %v, undef, i32 undef) ret i64 %red } @@ -1433,13 +1715,16 @@ declare i64 @llvm.vector.reduce.add.nxv2i64() define i64 @vreduce_add_nxv2i64( %v) { ; CHECK-LABEL: 'vreduce_add_nxv2i64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vreduce_add_nxv2i64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.add.nxv2i64( %v) + %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, %v, undef, i32 undef) ret i64 %red } @@ -1447,15 +1732,18 @@ define i64 @vwreduce_add_nxv2i32( %v) { ; CHECK-LABEL: 'vwreduce_add_nxv2i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = sext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vwreduce_add_nxv2i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64( %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %e = sext %v to %red = call i64 @llvm.vector.reduce.add.nxv2i64( %e) + %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, %e, undef, i32 undef) ret i64 %red } @@ -1463,15 +1751,18 @@ define i64 @vwreduce_uadd_nxv2i32( %v) { ; CHECK-LABEL: 'vwreduce_uadd_nxv2i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = zext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vwreduce_uadd_nxv2i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv2i64( %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %e = zext %v to %red = call i64 @llvm.vector.reduce.add.nxv2i64( %e) + %red_vp = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, %e, undef, i32 undef) ret i64 %red } @@ -1480,13 +1771,16 @@ declare i64 @llvm.vector.reduce.umax.nxv2i64() define i64 @vreduce_umax_nxv2i64( %v) { ; CHECK-LABEL: 'vreduce_umax_nxv2i64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv2i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv2i64(i64 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vreduce_umax_nxv2i64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv2i64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv2i64(i64 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.umax.nxv2i64( %v) + %red_vp = call i64 @llvm.vp.reduce.umax.nxv2i64(i64 undef, %v, undef, i32 undef) ret i64 %red } @@ -1495,13 +1789,16 @@ declare i64 @llvm.vector.reduce.smax.nxv2i64() define i64 @vreduce_smax_nxv2i64( %v) { ; CHECK-LABEL: 'vreduce_smax_nxv2i64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv2i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv2i64(i64 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vreduce_smax_nxv2i64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv2i64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv2i64(i64 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.smax.nxv2i64( %v) + %red_vp = call i64 @llvm.vp.reduce.smax.nxv2i64(i64 undef, %v, undef, i32 undef) ret i64 %red } @@ -1510,13 +1807,16 @@ declare i64 @llvm.vector.reduce.umin.nxv2i64() define i64 @vreduce_umin_nxv2i64( %v) { ; CHECK-LABEL: 'vreduce_umin_nxv2i64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv2i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv2i64(i64 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vreduce_umin_nxv2i64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv2i64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv2i64(i64 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.umin.nxv2i64( %v) + %red_vp = call i64 @llvm.vp.reduce.umin.nxv2i64(i64 undef, %v, undef, i32 undef) ret i64 %red } @@ -1525,13 +1825,16 @@ declare i64 @llvm.vector.reduce.smin.nxv2i64() define i64 @vreduce_smin_nxv2i64( %v) { ; CHECK-LABEL: 'vreduce_smin_nxv2i64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv2i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv2i64(i64 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vreduce_smin_nxv2i64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv2i64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv2i64(i64 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.smin.nxv2i64( %v) + %red_vp = call i64 @llvm.vp.reduce.smin.nxv2i64(i64 undef, %v, undef, i32 undef) ret i64 %red } @@ -1540,13 +1843,16 @@ declare i64 @llvm.vector.reduce.and.nxv2i64() define i64 @vreduce_and_nxv2i64( %v) { ; CHECK-LABEL: 'vreduce_and_nxv2i64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv2i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv2i64(i64 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vreduce_and_nxv2i64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv2i64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv2i64(i64 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.and.nxv2i64( %v) + %red_vp = call i64 @llvm.vp.reduce.and.nxv2i64(i64 undef, %v, undef, i32 undef) ret i64 %red } @@ -1555,13 +1861,16 @@ declare i64 @llvm.vector.reduce.or.nxv2i64() define i64 @vreduce_or_nxv2i64( %v) { ; CHECK-LABEL: 'vreduce_or_nxv2i64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv2i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv2i64(i64 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vreduce_or_nxv2i64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv2i64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv2i64(i64 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.or.nxv2i64( %v) + %red_vp = call i64 @llvm.vp.reduce.or.nxv2i64(i64 undef, %v, undef, i32 undef) ret i64 %red } @@ -1570,13 +1879,16 @@ declare i64 @llvm.vector.reduce.xor.nxv2i64() define i64 @vreduce_xor_nxv2i64( %v) { ; CHECK-LABEL: 'vreduce_xor_nxv2i64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv2i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv2i64(i64 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vreduce_xor_nxv2i64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv2i64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv2i64(i64 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.xor.nxv2i64( %v) + %red_vp = call i64 @llvm.vp.reduce.xor.nxv2i64(i64 undef, %v, undef, i32 undef) ret i64 %red } @@ -1585,13 +1897,16 @@ declare i64 @llvm.vector.reduce.add.nxv4i64() define i64 @vreduce_add_nxv4i64( %v) { ; CHECK-LABEL: 'vreduce_add_nxv4i64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vreduce_add_nxv4i64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.add.nxv4i64( %v) + %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, %v, undef, i32 undef) ret i64 %red } @@ -1599,15 +1914,18 @@ define i64 @vwreduce_add_nxv4i32( %v) { ; CHECK-LABEL: 'vwreduce_add_nxv4i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e = sext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vwreduce_add_nxv4i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = sext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64( %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %e = sext %v to %red = call i64 @llvm.vector.reduce.add.nxv4i64( %e) + %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, %e, undef, i32 undef) ret i64 %red } @@ -1615,15 +1933,18 @@ define i64 @vwreduce_uadd_nxv4i32( %v) { ; CHECK-LABEL: 'vwreduce_uadd_nxv4i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e = zext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64( %e) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, %e, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vwreduce_uadd_nxv4i32' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = zext %v to ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.add.nxv4i64( %e) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, %e, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %e = zext %v to %red = call i64 @llvm.vector.reduce.add.nxv4i64( %e) + %red_vp = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, %e, undef, i32 undef) ret i64 %red } @@ -1632,13 +1953,16 @@ declare i64 @llvm.vector.reduce.umax.nxv4i64() define i64 @vreduce_umax_nxv4i64( %v) { ; CHECK-LABEL: 'vreduce_umax_nxv4i64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv4i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv4i64(i64 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vreduce_umax_nxv4i64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umax.nxv4i64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umax.nxv4i64(i64 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.umax.nxv4i64( %v) + %red_vp = call i64 @llvm.vp.reduce.umax.nxv4i64(i64 undef, %v, undef, i32 undef) ret i64 %red } @@ -1647,13 +1971,16 @@ declare i64 @llvm.vector.reduce.smax.nxv4i64() define i64 @vreduce_smax_nxv4i64( %v) { ; CHECK-LABEL: 'vreduce_smax_nxv4i64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv4i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv4i64(i64 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vreduce_smax_nxv4i64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smax.nxv4i64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smax.nxv4i64(i64 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.smax.nxv4i64( %v) + %red_vp = call i64 @llvm.vp.reduce.smax.nxv4i64(i64 undef, %v, undef, i32 undef) ret i64 %red } @@ -1662,13 +1989,16 @@ declare i64 @llvm.vector.reduce.umin.nxv4i64() define i64 @vreduce_umin_nxv4i64( %v) { ; CHECK-LABEL: 'vreduce_umin_nxv4i64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv4i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv4i64(i64 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vreduce_umin_nxv4i64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.umin.nxv4i64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.umin.nxv4i64(i64 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.umin.nxv4i64( %v) + %red_vp = call i64 @llvm.vp.reduce.umin.nxv4i64(i64 undef, %v, undef, i32 undef) ret i64 %red } @@ -1677,13 +2007,16 @@ declare i64 @llvm.vector.reduce.smin.nxv4i64() define i64 @vreduce_smin_nxv4i64( %v) { ; CHECK-LABEL: 'vreduce_smin_nxv4i64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv4i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv4i64(i64 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vreduce_smin_nxv4i64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.smin.nxv4i64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.smin.nxv4i64(i64 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.smin.nxv4i64( %v) + %red_vp = call i64 @llvm.vp.reduce.smin.nxv4i64(i64 undef, %v, undef, i32 undef) ret i64 %red } @@ -1692,13 +2025,16 @@ declare i64 @llvm.vector.reduce.and.nxv4i64() define i64 @vreduce_and_nxv4i64( %v) { ; CHECK-LABEL: 'vreduce_and_nxv4i64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv4i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv4i64(i64 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vreduce_and_nxv4i64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.and.nxv4i64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.and.nxv4i64(i64 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.and.nxv4i64( %v) + %red_vp = call i64 @llvm.vp.reduce.and.nxv4i64(i64 undef, %v, undef, i32 undef) ret i64 %red } @@ -1707,13 +2043,16 @@ declare i64 @llvm.vector.reduce.or.nxv4i64() define i64 @vreduce_or_nxv4i64( %v) { ; CHECK-LABEL: 'vreduce_or_nxv4i64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv4i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv4i64(i64 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vreduce_or_nxv4i64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.or.nxv4i64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.or.nxv4i64(i64 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.or.nxv4i64( %v) + %red_vp = call i64 @llvm.vp.reduce.or.nxv4i64(i64 undef, %v, undef, i32 undef) ret i64 %red } @@ -1722,12 +2061,15 @@ declare i64 @llvm.vector.reduce.xor.nxv4i64() define i64 @vreduce_xor_nxv4i64( %v) { ; CHECK-LABEL: 'vreduce_xor_nxv4i64' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv4i64( %v) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv4i64(i64 undef, %v, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %red ; ; SIZE-LABEL: 'vreduce_xor_nxv4i64' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i64 @llvm.vector.reduce.xor.nxv4i64( %v) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red_vp = call i64 @llvm.vp.reduce.xor.nxv4i64(i64 undef, %v, undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %red ; %red = call i64 @llvm.vector.reduce.xor.nxv4i64( %v) + %red_vp = call i64 @llvm.vp.reduce.xor.nxv4i64(i64 undef, %v, undef, i32 undef) ret i64 %red } diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll index aa03b02895d5f..bac33bd908f6a 100644 --- a/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-xor.ll @@ -14,6 +14,14 @@ define i32 @reduce_i1(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.xor.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.xor.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.xor.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.xor.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.xor.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.xor.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.xor.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.xor.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_i1' @@ -25,6 +33,14 @@ define i32 @reduce_i1(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i1 @llvm.vp.reduce.xor.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2_vp = call i1 @llvm.vp.reduce.xor.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4_vp = call i1 @llvm.vp.reduce.xor.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8_vp = call i1 @llvm.vp.reduce.xor.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16_vp = call i1 @llvm.vp.reduce.xor.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_vp = call i1 @llvm.vp.reduce.xor.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_vp = call i1 @llvm.vp.reduce.xor.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_vp = call i1 @llvm.vp.reduce.xor.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef) @@ -35,6 +51,15 @@ define i32 @reduce_i1(i32 %arg) { %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef) %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef) %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef) + + %V1_vp = call i1 @llvm.vp.reduce.xor.v1i1(i1 undef, <1 x i1> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i1 @llvm.vp.reduce.xor.v2i1(i1 undef, <2 x i1> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i1 @llvm.vp.reduce.xor.v4i1(i1 undef, <4 x i1> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i1 @llvm.vp.reduce.xor.v8i1(i1 undef, <8 x i1> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i1 @llvm.vp.reduce.xor.v16i1(i1 undef, <16 x i1> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i1 @llvm.vp.reduce.xor.v32i1(i1 undef, <32 x i1> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i1 @llvm.vp.reduce.xor.v64i1(i1 undef, <64 x i1> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i1 @llvm.vp.reduce.xor.v128i1(i1 undef, <128 x i1> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -48,6 +73,14 @@ define i32 @reduce_i8(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i8 @llvm.vp.reduce.xor.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.xor.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i8 @llvm.vp.reduce.xor.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i8 @llvm.vp.reduce.xor.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i8 @llvm.vp.reduce.xor.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i8 @llvm.vp.reduce.xor.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i8 @llvm.vp.reduce.xor.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128_vp = call i8 @llvm.vp.reduce.xor.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_i8' @@ -59,6 +92,14 @@ define i32 @reduce_i8(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i8 @llvm.vp.reduce.xor.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i8 @llvm.vp.reduce.xor.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i8 @llvm.vp.reduce.xor.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i8 @llvm.vp.reduce.xor.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i8 @llvm.vp.reduce.xor.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i8 @llvm.vp.reduce.xor.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i8 @llvm.vp.reduce.xor.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_vp = call i8 @llvm.vp.reduce.xor.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> undef) @@ -69,6 +110,15 @@ define i32 @reduce_i8(i32 %arg) { %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef) %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef) %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef) + + %V1_vp = call i8 @llvm.vp.reduce.xor.v1i8(i8 undef, <1 x i8> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i8 @llvm.vp.reduce.xor.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i8 @llvm.vp.reduce.xor.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i8 @llvm.vp.reduce.xor.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i8 @llvm.vp.reduce.xor.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i8 @llvm.vp.reduce.xor.v32i8(i8 undef, <32 x i8> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i8 @llvm.vp.reduce.xor.v64i8(i8 undef, <64 x i8> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i8 @llvm.vp.reduce.xor.v128i8(i8 undef, <128 x i8> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -82,6 +132,14 @@ define i32 @reduce_i16(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i16 @llvm.vp.reduce.xor.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.xor.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i16 @llvm.vp.reduce.xor.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i16 @llvm.vp.reduce.xor.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i16 @llvm.vp.reduce.xor.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i16 @llvm.vp.reduce.xor.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64_vp = call i16 @llvm.vp.reduce.xor.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128_vp = call i16 @llvm.vp.reduce.xor.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_i16' @@ -93,6 +151,14 @@ define i32 @reduce_i16(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i16 @llvm.vp.reduce.xor.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i16 @llvm.vp.reduce.xor.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i16 @llvm.vp.reduce.xor.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i16 @llvm.vp.reduce.xor.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i16 @llvm.vp.reduce.xor.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i16 @llvm.vp.reduce.xor.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_vp = call i16 @llvm.vp.reduce.xor.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128_vp = call i16 @llvm.vp.reduce.xor.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i16 @llvm.vector.reduce.xor.v1i16(<1 x i16> undef) @@ -103,6 +169,15 @@ define i32 @reduce_i16(i32 %arg) { %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef) %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef) %V128 = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> undef) + + %V1_vp = call i16 @llvm.vp.reduce.xor.v1i16(i16 undef, <1 x i16> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i16 @llvm.vp.reduce.xor.v2i16(i16 undef, <2 x i16> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i16 @llvm.vp.reduce.xor.v4i16(i16 undef, <4 x i16> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i16 @llvm.vp.reduce.xor.v8i16(i16 undef, <8 x i16> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i16 @llvm.vp.reduce.xor.v16i16(i16 undef, <16 x i16> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i16 @llvm.vp.reduce.xor.v32i16(i16 undef, <32 x i16> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i16 @llvm.vp.reduce.xor.v64i16(i16 undef, <64 x i16> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i16 @llvm.vp.reduce.xor.v128i16(i16 undef, <128 x i16> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -116,6 +191,14 @@ define i32 @reduce_i32(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i32 @llvm.vp.reduce.xor.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.xor.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i32 @llvm.vp.reduce.xor.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i32 @llvm.vp.reduce.xor.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32_vp = call i32 @llvm.vp.reduce.xor.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64_vp = call i32 @llvm.vp.reduce.xor.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128_vp = call i32 @llvm.vp.reduce.xor.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_i32' @@ -127,6 +210,14 @@ define i32 @reduce_i32(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i32 @llvm.vp.reduce.xor.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i32 @llvm.vp.reduce.xor.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i32 @llvm.vp.reduce.xor.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i32 @llvm.vp.reduce.xor.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_vp = call i32 @llvm.vp.reduce.xor.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64_vp = call i32 @llvm.vp.reduce.xor.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128_vp = call i32 @llvm.vp.reduce.xor.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i32 @llvm.vector.reduce.xor.v1i32(<1 x i32> undef) @@ -137,6 +228,15 @@ define i32 @reduce_i32(i32 %arg) { %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef) %V64 = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> undef) %V128 = call i32 @llvm.vector.reduce.xor.v128i32(<128 x i32> undef) + + %V1_vp = call i32 @llvm.vp.reduce.xor.v1i32(i32 undef, <1 x i32> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i32 @llvm.vp.reduce.xor.v2i32(i32 undef, <2 x i32> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i32 @llvm.vp.reduce.xor.v8i32(i32 undef, <8 x i32> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i32 @llvm.vp.reduce.xor.v16i32(i32 undef, <16 x i32> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i32 @llvm.vp.reduce.xor.v32i32(i32 undef, <32 x i32> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i32 @llvm.vp.reduce.xor.v64i32(i32 undef, <64 x i32> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i32 @llvm.vp.reduce.xor.v128i32(i32 undef, <128 x i32> undef, <128 x i1> undef, i32 undef) ret i32 undef } @@ -150,6 +250,14 @@ define i32 @reduce_i64(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1_vp = call i64 @llvm.vp.reduce.xor.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.xor.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4_vp = call i64 @llvm.vp.reduce.xor.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8_vp = call i64 @llvm.vp.reduce.xor.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16_vp = call i64 @llvm.vp.reduce.xor.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32_vp = call i64 @llvm.vp.reduce.xor.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64_vp = call i64 @llvm.vp.reduce.xor.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128_vp = call i64 @llvm.vp.reduce.xor.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SIZE-LABEL: 'reduce_i64' @@ -161,6 +269,14 @@ define i32 @reduce_i64(i32 %arg) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1_vp = call i64 @llvm.vp.reduce.xor.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2_vp = call i64 @llvm.vp.reduce.xor.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4_vp = call i64 @llvm.vp.reduce.xor.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8_vp = call i64 @llvm.vp.reduce.xor.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16_vp = call i64 @llvm.vp.reduce.xor.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32_vp = call i64 @llvm.vp.reduce.xor.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64_vp = call i64 @llvm.vp.reduce.xor.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_vp = call i64 @llvm.vp.reduce.xor.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef) @@ -171,6 +287,15 @@ define i32 @reduce_i64(i32 %arg) { %V32 = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> undef) %V64 = call i64 @llvm.vector.reduce.xor.v64i64(<64 x i64> undef) %V128 = call i64 @llvm.vector.reduce.xor.v128i64(<128 x i64> undef) + + %V1_vp = call i64 @llvm.vp.reduce.xor.v1i64(i64 undef, <1 x i64> undef, <1 x i1> undef, i32 undef) + %V2_vp = call i64 @llvm.vp.reduce.xor.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) + %V4_vp = call i64 @llvm.vp.reduce.xor.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) + %V8_vp = call i64 @llvm.vp.reduce.xor.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) + %V16_vp = call i64 @llvm.vp.reduce.xor.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) + %V32_vp = call i64 @llvm.vp.reduce.xor.v32i64(i64 undef, <32 x i64> undef, <32 x i1> undef, i32 undef) + %V64_vp = call i64 @llvm.vp.reduce.xor.v64i64(i64 undef, <64 x i64> undef, <64 x i1> undef, i32 undef) + %V128_vp = call i64 @llvm.vp.reduce.xor.v128i64(i64 undef, <128 x i64> undef, <128 x i1> undef, i32 undef) ret i32 undef } diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll index bb98508f239c1..869e51966e092 100644 --- a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll @@ -1218,37 +1218,37 @@ define void @reduce_add() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; TYPEBASED-LABEL: 'reduce_add' -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %3 = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %5 = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %5 = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %6 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %7 = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %7 = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %9 = call i64 @llvm.vp.reduce.add.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = call i64 @llvm.vp.reduce.add.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %10 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %11 = call i64 @llvm.vp.reduce.add.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = call i64 @llvm.vp.reduce.add.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %12 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %13 = call i64 @llvm.vp.reduce.add.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %13 = call i64 @llvm.vp.reduce.add.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %14 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %15 = call i64 @llvm.vp.reduce.add.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %15 = call i64 @llvm.vp.reduce.add.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %17 = call i8 @llvm.vp.reduce.add.nxv8i8(i8 undef, undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %17 = call i8 @llvm.vp.reduce.add.nxv8i8(i8 undef, undef, undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %18 = call i8 @llvm.vector.reduce.add.nxv2i8( undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %19 = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %19 = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, undef, undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %20 = call i8 @llvm.vector.reduce.add.nxv4i8( undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %21 = call i8 @llvm.vp.reduce.add.nxv8i8(i8 undef, undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %21 = call i8 @llvm.vp.reduce.add.nxv8i8(i8 undef, undef, undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %22 = call i8 @llvm.vector.reduce.add.nxv8i8( undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %23 = call i8 @llvm.vp.reduce.add.nxv16i8(i8 undef, undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %23 = call i8 @llvm.vp.reduce.add.nxv16i8(i8 undef, undef, undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = call i8 @llvm.vector.reduce.add.nxv16i8( undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %25 = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %25 = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, undef, undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %26 = call i64 @llvm.vector.reduce.add.nxv2i64( undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %27 = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %27 = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, undef, undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %28 = call i64 @llvm.vector.reduce.add.nxv4i64( undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %29 = call i64 @llvm.vp.reduce.add.nxv8i64(i64 undef, undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %29 = call i64 @llvm.vp.reduce.add.nxv8i64(i64 undef, undef, undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %30 = call i64 @llvm.vector.reduce.add.nxv8i64( undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %31 = call i64 @llvm.vp.reduce.add.nxv16i64(i64 undef, undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %31 = call i64 @llvm.vp.reduce.add.nxv16i64(i64 undef, undef, undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %32 = call i64 @llvm.vector.reduce.add.nxv16i64( undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; @@ -1324,37 +1324,37 @@ define void @reduce_fadd() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; TYPEBASED-LABEL: 'reduce_fadd' -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = call float @llvm.vp.reduce.fadd.v2f32(float undef, <2 x float> undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = call float @llvm.vp.reduce.fadd.v2f32(float undef, <2 x float> undef, <2 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = call float @llvm.vector.reduce.fadd.v2f32(float undef, <2 x float> undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %3 = call float @llvm.vp.reduce.fadd.v4f32(float undef, <4 x float> undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %3 = call float @llvm.vp.reduce.fadd.v4f32(float undef, <4 x float> undef, <4 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %4 = call float @llvm.vector.reduce.fadd.v4f32(float undef, <4 x float> undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %5 = call float @llvm.vp.reduce.fadd.v8f32(float undef, <8 x float> undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %5 = call float @llvm.vp.reduce.fadd.v8f32(float undef, <8 x float> undef, <8 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %6 = call float @llvm.vector.reduce.fadd.v8f32(float undef, <8 x float> undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %7 = call float @llvm.vp.reduce.fadd.v16f32(float undef, <16 x float> undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %7 = call float @llvm.vp.reduce.fadd.v16f32(float undef, <16 x float> undef, <16 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %8 = call float @llvm.vector.reduce.fadd.v16f32(float undef, <16 x float> undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %9 = call double @llvm.vp.reduce.fadd.v2f64(double undef, <2 x double> undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = call double @llvm.vp.reduce.fadd.v2f64(double undef, <2 x double> undef, <2 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = call double @llvm.vector.reduce.fadd.v2f64(double undef, <2 x double> undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %11 = call double @llvm.vp.reduce.fadd.v4f64(double undef, <4 x double> undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %11 = call double @llvm.vp.reduce.fadd.v4f64(double undef, <4 x double> undef, <4 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %12 = call double @llvm.vector.reduce.fadd.v4f64(double undef, <4 x double> undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %13 = call double @llvm.vp.reduce.fadd.v8f64(double undef, <8 x double> undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %13 = call double @llvm.vp.reduce.fadd.v8f64(double undef, <8 x double> undef, <8 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %14 = call double @llvm.vector.reduce.fadd.v8f64(double undef, <8 x double> undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %15 = call double @llvm.vp.reduce.fadd.v16f64(double undef, <16 x double> undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %15 = call double @llvm.vp.reduce.fadd.v16f64(double undef, <16 x double> undef, <16 x i1> undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %16 = call double @llvm.vector.reduce.fadd.v16f64(double undef, <16 x double> undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %17 = call float @llvm.vp.reduce.fadd.nxv2f32(float undef, undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %17 = call float @llvm.vp.reduce.fadd.nxv2f32(float undef, undef, undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %18 = call float @llvm.vector.reduce.fadd.nxv2f32(float undef, undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %19 = call float @llvm.vp.reduce.fadd.nxv4f32(float undef, undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %19 = call float @llvm.vp.reduce.fadd.nxv4f32(float undef, undef, undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %20 = call float @llvm.vector.reduce.fadd.nxv4f32(float undef, undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %21 = call float @llvm.vp.reduce.fadd.nxv8f32(float undef, undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %21 = call float @llvm.vp.reduce.fadd.nxv8f32(float undef, undef, undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %22 = call float @llvm.vector.reduce.fadd.nxv8f32(float undef, undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %23 = call float @llvm.vp.reduce.fadd.nxv16f32(float undef, undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %23 = call float @llvm.vp.reduce.fadd.nxv16f32(float undef, undef, undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %24 = call float @llvm.vector.reduce.fadd.nxv16f32(float undef, undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %25 = call double @llvm.vp.reduce.fadd.nxv2f64(double undef, undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %25 = call double @llvm.vp.reduce.fadd.nxv2f64(double undef, undef, undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %26 = call double @llvm.vector.reduce.fadd.nxv2f64(double undef, undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %27 = call double @llvm.vp.reduce.fadd.nxv4f64(double undef, undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %27 = call double @llvm.vp.reduce.fadd.nxv4f64(double undef, undef, undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %28 = call double @llvm.vector.reduce.fadd.nxv4f64(double undef, undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %29 = call double @llvm.vp.reduce.fadd.nxv8f64(double undef, undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %29 = call double @llvm.vp.reduce.fadd.nxv8f64(double undef, undef, undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %30 = call double @llvm.vector.reduce.fadd.nxv8f64(double undef, undef) -; TYPEBASED-NEXT: Cost Model: Invalid cost for instruction: %31 = call double @llvm.vp.reduce.fadd.nxv16f64(double undef, undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %31 = call double @llvm.vp.reduce.fadd.nxv16f64(double undef, undef, undef, i32 undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %32 = call double @llvm.vector.reduce.fadd.nxv16f64(double undef, undef) ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ;