|
18 | 18 | #include "llvm/CodeGen/BasicTTIImpl.h" |
19 | 19 | #include "llvm/CodeGen/CostTable.h" |
20 | 20 | #include "llvm/CodeGen/TargetLowering.h" |
| 21 | +#include "llvm/IR/DerivedTypes.h" |
21 | 22 | #include "llvm/IR/IntrinsicInst.h" |
22 | 23 | #include "llvm/IR/Intrinsics.h" |
23 | 24 | #include "llvm/IR/IntrinsicsAArch64.h" |
@@ -3616,17 +3617,26 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost( |
3616 | 3617 | // When SVE is available, we get: |
3617 | 3618 | // smulh + lsr + add/sub + asr + add/sub. |
3618 | 3619 | if (Ty->isScalableTy() && ST->hasSVE()) |
3619 | | - return 2 * MulCost /*smulh cost*/ + 2 * AddCost + 2 * AsrCost; |
| 3620 | + return MulCost /*smulh cost*/ + 2 * AddCost + 2 * AsrCost; |
3620 | 3621 | return 2 * MulCost + AddCost /*uzp2 cost*/ + AsrCost + UsraCost; |
3621 | 3622 | } |
3622 | 3623 | } |
3623 | 3624 | } |
3624 | 3625 | if (Op2Info.isConstant() && !Op2Info.isUniform() && |
3625 | 3626 | LT.second.isFixedLengthVector()) { |
3626 | | - auto VT = TLI->getValueType(DL, Ty); |
3627 | | - return VT.getVectorNumElements() * |
3628 | | - getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind, |
3629 | | - Op1Info.getNoProps(), Op2Info.getNoProps()); |
| 3627 | + // FIXME: When the constant vector is non-uniform, this may result in |
| 3628 | + // loading the vector from constant pool or in some cases, may also result |
| 3629 | + // in scalarization. For now, we are approximating this with the |
| 3630 | + // scalarization cost. |
| 3631 | + auto ExtractCost = 2 * getVectorInstrCost(Instruction::ExtractElement, Ty, |
| 3632 | + CostKind, -1, nullptr, nullptr); |
| 3633 | + auto InsertCost = getVectorInstrCost(Instruction::InsertElement, Ty, |
| 3634 | + CostKind, -1, nullptr, nullptr); |
| 3635 | + unsigned NElts = cast<FixedVectorType>(Ty)->getNumElements(); |
| 3636 | + return ExtractCost + InsertCost + |
| 3637 | + NElts * getArithmeticInstrCost(Opcode, Ty->getScalarType(), |
| 3638 | + CostKind, Op1Info.getNoProps(), |
| 3639 | + Op2Info.getNoProps()); |
3630 | 3640 | } |
3631 | 3641 | [[fallthrough]]; |
3632 | 3642 | case ISD::UDIV: |
|
0 commit comments