From b8a2c2967b2d8364964a42ddd1cc8d012a3fd6a5 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 30 Oct 2024 23:33:50 +0800 Subject: [PATCH] [RISCV] Cost ordered bf16/f16 w/ zvfhmin reductions as invalid This moves the check earlier to fix this. This also uses BasicTTIImpl instead which now assigns a valid but expensive cost for fixed-length vectors, which reflects how codegen will actually scalarize them. --- .../Target/RISCV/RISCVTargetTransformInfo.cpp | 10 +- .../Analysis/CostModel/RISCV/reduce-fadd.ll | 151 ++++++++++-------- 2 files changed, 89 insertions(+), 72 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index ac9d5ee3ebb9d..b84c94f718b28 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1583,6 +1583,11 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, Opcodes = {RISCV::VMV_S_X, RISCV::VREDAND_VS, RISCV::VMV_X_S}; break; case ISD::FADD: + // We can't promote f16/bf16 fadd reductions. + if ((LT.second.getVectorElementType() == MVT::f16 && + !ST->hasVInstructionsF16()) || + LT.second.getVectorElementType() == MVT::bf16) + return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind); if (TTI::requiresOrderedReduction(FMF)) { Opcodes.push_back(RISCV::VFMV_S_F); for (unsigned i = 0; i < LT.first.getValue(); i++) @@ -1590,11 +1595,6 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, Opcodes.push_back(RISCV::VFMV_F_S); return getRISCVInstructionCost(Opcodes, LT.second, CostKind); } - // We can't promote f16/bf16 fadd reductions. - if ((LT.second.getVectorElementType() == MVT::f16 && - !ST->hasVInstructionsF16()) || - LT.second.getVectorElementType() == MVT::bf16) - return InstructionCost::getInvalid(); SplitOp = RISCV::VFADD_VV; Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDUSUM_VS, RISCV::VFMV_F_S}; break; diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll index 196e7376677a5..1762f701a9b2d 100644 --- a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll @@ -5,14 +5,14 @@ define void @reduce_fadd_bfloat() { ; FP-REDUCE-LABEL: 'reduce_fadd_bfloat' -; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %V1 = call fast bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %V2 = call fast bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %V4 = call fast bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %V8 = call fast bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %V16 = call fast bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call fast bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call fast bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call fast bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V16 = call fast bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, undef) ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, undef) ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, undef) @@ -22,14 +22,14 @@ define void @reduce_fadd_bfloat() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fadd_bfloat' -; SIZE-NEXT: Cost Model: Invalid cost for instruction: %V1 = call fast bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef) -; SIZE-NEXT: Cost Model: Invalid cost for instruction: %V2 = call fast bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef) -; SIZE-NEXT: Cost Model: Invalid cost for instruction: %V4 = call fast bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef) -; SIZE-NEXT: Cost Model: Invalid cost for instruction: %V8 = call fast bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef) -; SIZE-NEXT: Cost Model: Invalid cost for instruction: %V16 = call fast bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef) -; SIZE-NEXT: Cost Model: Invalid cost for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) -; SIZE-NEXT: Cost Model: Invalid cost for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) -; SIZE-NEXT: Cost Model: Invalid cost for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call fast bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call fast bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call fast bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call fast bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, undef) ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, undef) ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, undef) @@ -74,14 +74,14 @@ define void @reduce_fadd_half() { ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_fadd_half' -; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef) -; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %V2 = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) -; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %V4 = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) -; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %V8 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) -; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %V16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) -; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) -; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) -; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, undef) ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, undef) ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, undef) @@ -222,36 +222,36 @@ define void @reduce_fadd_double() { define void @reduce_ordered_fadd_bfloat() { ; FP-REDUCE-LABEL: 'reduce_ordered_fadd_bfloat' ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8 = call bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16 = call bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 255 for instruction: %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 510 for instruction: %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_ordered_fadd_bfloat' -; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, undef) -; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8 = call bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16 = call bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 382 for instruction: %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef) @@ -272,22 +272,39 @@ define void @reduce_ordered_fadd_bfloat() { } define void @reduce_ordered_fadd_half() { -; FP-REDUCE-LABEL: 'reduce_ordered_fadd_half' -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV2 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, undef) -; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; FP-REDUCE-ZVFH-LABEL: 'reduce_ordered_fadd_half' +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV2 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_ordered_fadd_half' +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 255 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 510 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_ordered_fadd_half' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)