@@ -213,6 +213,12 @@ typedef TargetTransformInfo TTI;
213213// / for IR-level transformations.
214214class TargetTransformInfo {
215215public:
216+ enum PartialReductionExtendKind { PR_None, PR_SignExtend, PR_ZeroExtend };
217+
218+ // / Get the kind of extension that an instruction represents.
219+ static PartialReductionExtendKind
220+ getPartialReductionExtendKind (Instruction *I);
221+
216222 // / Construct a TTI object using a type implementing the \c Concept
217223 // / API below.
218224 // /
@@ -1257,6 +1263,20 @@ class TargetTransformInfo {
12571263 // / \return if target want to issue a prefetch in address space \p AS.
12581264 bool shouldPrefetchAddressSpace (unsigned AS) const ;
12591265
1266+ // / \return The cost of a partial reduction, which is a reduction from a
1267+ // / vector to another vector with fewer elements of larger size. They are
1268+ // / represented by the llvm.experimental.partial.reduce.add intrinsic, which
1269+ // / takes an accumulator and a binary operation operand that itself is fed by
1270+ // / two extends. An example of an operation that uses a partial reduction is a
1271+ // / dot product, which reduces two vectors to another of 4 times fewer and 4
1272+ // / times larger elements.
1273+ InstructionCost
1274+ getPartialReductionCost (unsigned Opcode, Type *InputTypeA, Type *InputTypeB,
1275+ Type *AccumType, ElementCount VF,
1276+ PartialReductionExtendKind OpAExtend,
1277+ PartialReductionExtendKind OpBExtend,
1278+ std::optional<unsigned > BinOp = std::nullopt ) const ;
1279+
12601280 // / \return The maximum interleave factor that any transform should try to
12611281 // / perform for this target. This number depends on the level of parallelism
12621282 // / and the number of execution units in the CPU.
@@ -2034,6 +2054,20 @@ class TargetTransformInfo::Concept {
20342054 // / \return if target want to issue a prefetch in address space \p AS.
20352055 virtual bool shouldPrefetchAddressSpace (unsigned AS) const = 0;
20362056
2057+ // / \return The cost of a partial reduction, which is a reduction from a
2058+ // / vector to another vector with fewer elements of larger size. They are
2059+ // / represented by the llvm.experimental.partial.reduce.add intrinsic, which
2060+ // / takes an accumulator and a binary operation operand that itself is fed by
2061+ // / two extends. An example of an operation that uses a partial reduction is a
2062+ // / dot product, which reduces two vectors to another of 4 times fewer and 4
2063+ // / times larger elements.
2064+ virtual InstructionCost
2065+ getPartialReductionCost (unsigned Opcode, Type *InputTypeA, Type *InputTypeB,
2066+ Type *AccumType, ElementCount VF,
2067+ PartialReductionExtendKind OpAExtend,
2068+ PartialReductionExtendKind OpBExtend,
2069+ std::optional<unsigned > BinOp) const = 0 ;
2070+
20372071 virtual unsigned getMaxInterleaveFactor (ElementCount VF) = 0;
20382072 virtual InstructionCost getArithmeticInstrCost (
20392073 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
@@ -2669,6 +2703,16 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
26692703 return Impl.shouldPrefetchAddressSpace (AS);
26702704 }
26712705
2706+ InstructionCost getPartialReductionCost (
2707+ unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
2708+ ElementCount VF, PartialReductionExtendKind OpAExtend,
2709+ PartialReductionExtendKind OpBExtend,
2710+ std::optional<unsigned > BinOp = std::nullopt ) const override {
2711+ return Impl.getPartialReductionCost (Opcode, InputTypeA, InputTypeB,
2712+ AccumType, VF, OpAExtend, OpBExtend,
2713+ BinOp);
2714+ }
2715+
26722716 unsigned getMaxInterleaveFactor (ElementCount VF) override {
26732717 return Impl.getMaxInterleaveFactor (VF);
26742718 }
0 commit comments