From 93dfdaf1a3badf1de801823623f90f40f43ced34 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 18 Aug 2025 16:46:55 +0100 Subject: [PATCH 1/3] [VPlan] Compute cost of replicating calls in VPlan. (NFCI) Implement computing the scalarization overhead for replicating calls in VPlan, matching the legacy cost model. Depends on https://github.com/llvm/llvm-project/pull/154126. --- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 44 +++++++++++++++---- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index f8fde0500b77a..c3cfd15d9a6ea 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3002,13 +3002,6 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, // instruction cost. return 0; case Instruction::Call: { - if (!isSingleScalar()) { - // TODO: Handle remaining call costs here as well. - if (VF.isScalable()) - return InstructionCost::getInvalid(); - break; - } - auto *CalledFn = cast(getOperand(getNumOperands() - 1)->getLiveInIRValue()); if (CalledFn->isIntrinsic()) @@ -3017,8 +3010,43 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, SmallVector Tys; for (VPValue *ArgOp : drop_end(operands())) Tys.push_back(Ctx.Types.inferScalarType(ArgOp)); + Type *ResultTy = Ctx.Types.inferScalarType(this); - return Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind); + InstructionCost ScalarCallCost = + Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind); + if (isSingleScalar()) + return ScalarCallCost; + + if (VF.isScalable()) + return InstructionCost::getInvalid(); + + // Compute the cost of scalarizing the result and operands if needed. + InstructionCost ScalarizationCost = 0; + if (VF.isVector()) { + if (!ResultTy->isVoidTy()) { + for (Type *VectorTy : getContainedTypes(toVectorizedTy(ResultTy, VF))) { + ScalarizationCost += Ctx.TTI.getScalarizationOverhead( + cast(VectorTy), APInt::getAllOnes(VF.getFixedValue()), + /*Insert=*/true, + /*Extract=*/false, Ctx.CostKind); + } + } + // Skip operands that do not require extraction/scalarization and do not + // incur any overhead. + SmallVector Tys; + SmallPtrSet UniqueOperands; + for (auto *Op : drop_end(operands())) { + if (Op->isLiveIn() || isa(Op) || + !UniqueOperands.insert(Op).second) + continue; + Tys.push_back(toVectorizedTy(Ctx.Types.inferScalarType(Op), VF)); + } + ScalarizationCost += + Ctx.TTI.getOperandsScalarizationOverhead(Tys, Ctx.CostKind); + } + + return ScalarCallCost * (isSingleScalar() ? 1 : VF.getFixedValue()) + + ScalarizationCost; } case Instruction::Add: case Instruction::Sub: From 5fdb4e5d523e2675527880c10368486478276ef7 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 26 Aug 2025 09:26:16 +0100 Subject: [PATCH 2/3] !fixup use clear instead of new smallvector. --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 097f8c2b4d3bf..5bbf8ccca4c32 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3040,7 +3040,6 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, SmallVector Tys; for (VPValue *ArgOp : drop_end(operands())) Tys.push_back(Ctx.Types.inferScalarType(ArgOp)); - Type *ResultTy = Ctx.Types.inferScalarType(this); InstructionCost ScalarCallCost = Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind); @@ -3063,8 +3062,8 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, } // Skip operands that do not require extraction/scalarization and do not // incur any overhead. - SmallVector Tys; SmallPtrSet UniqueOperands; + Tys.clear(); for (auto *Op : drop_end(operands())) { if (Op->isLiveIn() || isa(Op) || !UniqueOperands.insert(Op).second) From ddc132b048c6503de4dda61986b466b908189b13 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 26 Aug 2025 12:43:52 +0100 Subject: [PATCH 3/3] !fixup use to_vector to fix stack-use-after-free. --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index f7517861f716e..86834ab1240c1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3068,7 +3068,8 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, InstructionCost ScalarizationCost = 0; if (VF.isVector()) { if (!ResultTy->isVoidTy()) { - for (Type *VectorTy : getContainedTypes(toVectorizedTy(ResultTy, VF))) { + for (Type *VectorTy : + to_vector(getContainedTypes(toVectorizedTy(ResultTy, VF)))) { ScalarizationCost += Ctx.TTI.getScalarizationOverhead( cast(VectorTy), APInt::getAllOnes(VF.getFixedValue()), /*Insert=*/true,