Skip to content

Commit 026935f

Browse files
committed
Rebase and address review
1 parent 62490e4 commit 026935f

File tree

4 files changed

+97
-70
lines changed

4 files changed

+97
-70
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2869,15 +2869,16 @@ InstructionCost VPExpressionRecipe::computeCost(ElementCount VF,
28692869
unsigned Opcode = RecurrenceDescriptor::getOpcode(
28702870
cast<VPReductionRecipe>(ExpressionRecipes[1])->getRecurrenceKind());
28712871
auto *ExtR = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);
2872-
if (isa<VPPartialReductionRecipe>(ExpressionRecipes.back())) {
2873-
return Ctx.TTI.getPartialReductionCost(
2874-
Opcode, Ctx.Types.inferScalarType(getOperand(0)), nullptr, RedTy, VF,
2875-
TargetTransformInfo::getPartialReductionExtendKind(ExtR->getOpcode()),
2876-
TargetTransformInfo::PR_None, std::nullopt, Ctx.CostKind);
2877-
}
2878-
return Ctx.TTI.getExtendedReductionCost(
2879-
Opcode, ExtR->getOpcode() == Instruction::ZExt, RedTy, SrcVecTy,
2880-
std::nullopt, Ctx.CostKind);
2872+
return isa<VPPartialReductionRecipe>(ExpressionRecipes.back())
2873+
? Ctx.TTI.getPartialReductionCost(
2874+
Opcode, Ctx.Types.inferScalarType(getOperand(0)), nullptr,
2875+
RedTy, VF,
2876+
TargetTransformInfo::getPartialReductionExtendKind(
2877+
ExtR->getOpcode()),
2878+
TargetTransformInfo::PR_None, std::nullopt, Ctx.CostKind)
2879+
: Ctx.TTI.getExtendedReductionCost(
2880+
Opcode, ExtR->getOpcode() == Instruction::ZExt, RedTy,
2881+
SrcVecTy, std::nullopt, Ctx.CostKind);
28812882
}
28822883
case ExpressionTypes::MulAccReduction:
28832884
return Ctx.TTI.getMulAccReductionCost(false, Opcode, RedTy, SrcVecTy,

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-constant-ops.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -538,7 +538,7 @@ loop:
538538
%mul = mul i32 %conv, %conv
539539
%mul.ext = zext i32 %mul to i64
540540
%add = add i64 %res2, %mul.ext
541-
%or = or i32 %mul, %c
541+
%second_use = or i32 %mul, %c ; this value is otherwise unused, but that's sufficient for the test
542542
%load.ext = sext i16 %load to i32
543543
%load.ext.ext = sext i32 %load.ext to i64
544544
%exitcond740.not = icmp eq i64 %iv, %n

0 commit comments

Comments
 (0)