Skip to content

Commit db73e22

Browse files
artagnondvbuka
authored andcommitted
[VPlan] Extend tryToFoldLiveIns to fold binary intrinsics (llvm#161703)
InstSimplifyFolder can fold binary intrinsics, so take the opportunity to unify code with getOpcodeOrIntrinsicID, and handle the case. The additional handling of WidenGEP is non-functional, as the GEP is simplified before it is widened, as the included test shows.
1 parent 0e6bb88 commit db73e22

File tree

3 files changed

+157
-78
lines changed

3 files changed

+157
-78
lines changed

llvm/include/llvm/Analysis/InstSimplifyFolder.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ class LLVM_ABI InstSimplifyFolder final : public IRBuilderFolder {
120120
}
121121

122122
Value *FoldBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, Type *Ty,
123-
Instruction *FMFSource) const override {
123+
Instruction *FMFSource = nullptr) const override {
124124
return simplifyBinaryIntrinsic(ID, Ty, LHS, RHS, SQ,
125125
dyn_cast_if_present<CallBase>(FMFSource));
126126
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 87 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -943,56 +943,98 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
943943
}
944944
}
945945

946+
/// Get any instruction opcode or intrinsic ID data embedded in recipe \p R.
947+
/// Returns an optional pair, where the first element indicates whether it is
948+
/// an intrinsic ID.
949+
static std::optional<std::pair<bool, unsigned>>
950+
getOpcodeOrIntrinsicID(const VPSingleDefRecipe *R) {
951+
return TypeSwitch<const VPSingleDefRecipe *,
952+
std::optional<std::pair<bool, unsigned>>>(R)
953+
.Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
954+
VPWidenSelectRecipe, VPWidenGEPRecipe, VPReplicateRecipe>(
955+
[](auto *I) { return std::make_pair(false, I->getOpcode()); })
956+
.Case<VPWidenIntrinsicRecipe>([](auto *I) {
957+
return std::make_pair(true, I->getVectorIntrinsicID());
958+
})
959+
.Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](auto *I) {
960+
// For recipes that do not directly map to LLVM IR instructions,
961+
// assign opcodes after the last VPInstruction opcode (which is also
962+
// after the last IR Instruction opcode), based on the VPDefID.
963+
return std::make_pair(false,
964+
VPInstruction::OpsEnd + 1 + I->getVPDefID());
965+
})
966+
.Default([](auto *) { return std::nullopt; });
967+
}
968+
946969
/// Try to fold \p R using InstSimplifyFolder. Will succeed and return a
947-
/// non-nullptr Value for a handled \p Opcode if corresponding \p Operands are
948-
/// foldable live-ins.
949-
static Value *tryToFoldLiveIns(const VPRecipeBase &R, unsigned Opcode,
950-
ArrayRef<VPValue *> Operands,
951-
const DataLayout &DL, VPTypeAnalysis &TypeInfo) {
970+
/// non-nullptr VPValue for a handled opcode or intrinsic ID if corresponding \p
971+
/// Operands are foldable live-ins.
972+
static VPValue *tryToFoldLiveIns(VPSingleDefRecipe &R,
973+
ArrayRef<VPValue *> Operands,
974+
const DataLayout &DL,
975+
VPTypeAnalysis &TypeInfo) {
976+
auto OpcodeOrIID = getOpcodeOrIntrinsicID(&R);
977+
if (!OpcodeOrIID)
978+
return nullptr;
979+
952980
SmallVector<Value *, 4> Ops;
953981
for (VPValue *Op : Operands) {
954982
if (!Op->isLiveIn() || !Op->getLiveInIRValue())
955983
return nullptr;
956984
Ops.push_back(Op->getLiveInIRValue());
957985
}
958986

959-
InstSimplifyFolder Folder(DL);
960-
if (Instruction::isBinaryOp(Opcode))
961-
return Folder.FoldBinOp(static_cast<Instruction::BinaryOps>(Opcode), Ops[0],
987+
auto FoldToIRValue = [&]() -> Value * {
988+
InstSimplifyFolder Folder(DL);
989+
if (OpcodeOrIID->first) {
990+
if (R.getNumOperands() != 2)
991+
return nullptr;
992+
unsigned ID = OpcodeOrIID->second;
993+
return Folder.FoldBinaryIntrinsic(ID, Ops[0], Ops[1],
994+
TypeInfo.inferScalarType(&R));
995+
}
996+
unsigned Opcode = OpcodeOrIID->second;
997+
if (Instruction::isBinaryOp(Opcode))
998+
return Folder.FoldBinOp(static_cast<Instruction::BinaryOps>(Opcode),
999+
Ops[0], Ops[1]);
1000+
if (Instruction::isCast(Opcode))
1001+
return Folder.FoldCast(static_cast<Instruction::CastOps>(Opcode), Ops[0],
1002+
TypeInfo.inferScalarType(R.getVPSingleValue()));
1003+
switch (Opcode) {
1004+
case VPInstruction::LogicalAnd:
1005+
return Folder.FoldSelect(Ops[0], Ops[1],
1006+
ConstantInt::getNullValue(Ops[1]->getType()));
1007+
case VPInstruction::Not:
1008+
return Folder.FoldBinOp(Instruction::BinaryOps::Xor, Ops[0],
1009+
Constant::getAllOnesValue(Ops[0]->getType()));
1010+
case Instruction::Select:
1011+
return Folder.FoldSelect(Ops[0], Ops[1], Ops[2]);
1012+
case Instruction::ICmp:
1013+
case Instruction::FCmp:
1014+
return Folder.FoldCmp(cast<VPRecipeWithIRFlags>(R).getPredicate(), Ops[0],
9621015
Ops[1]);
963-
if (Instruction::isCast(Opcode))
964-
return Folder.FoldCast(static_cast<Instruction::CastOps>(Opcode), Ops[0],
965-
TypeInfo.inferScalarType(R.getVPSingleValue()));
966-
switch (Opcode) {
967-
case VPInstruction::LogicalAnd:
968-
return Folder.FoldSelect(Ops[0], Ops[1],
969-
ConstantInt::getNullValue(Ops[1]->getType()));
970-
case VPInstruction::Not:
971-
return Folder.FoldBinOp(Instruction::BinaryOps::Xor, Ops[0],
972-
Constant::getAllOnesValue(Ops[0]->getType()));
973-
case Instruction::Select:
974-
return Folder.FoldSelect(Ops[0], Ops[1], Ops[2]);
975-
case Instruction::ICmp:
976-
case Instruction::FCmp:
977-
return Folder.FoldCmp(cast<VPRecipeWithIRFlags>(R).getPredicate(), Ops[0],
978-
Ops[1]);
979-
case Instruction::GetElementPtr: {
980-
auto &RFlags = cast<VPRecipeWithIRFlags>(R);
981-
auto *GEP = cast<GetElementPtrInst>(RFlags.getUnderlyingInstr());
982-
return Folder.FoldGEP(GEP->getSourceElementType(), Ops[0], drop_begin(Ops),
983-
RFlags.getGEPNoWrapFlags());
984-
}
985-
case VPInstruction::PtrAdd:
986-
case VPInstruction::WidePtrAdd:
987-
return Folder.FoldGEP(IntegerType::getInt8Ty(TypeInfo.getContext()), Ops[0],
988-
Ops[1],
989-
cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
990-
// An extract of a live-in is an extract of a broadcast, so return the
991-
// broadcasted element.
992-
case Instruction::ExtractElement:
993-
assert(!Ops[0]->getType()->isVectorTy() && "Live-ins should be scalar");
994-
return Ops[0];
995-
}
1016+
case Instruction::GetElementPtr: {
1017+
auto &RFlags = cast<VPRecipeWithIRFlags>(R);
1018+
auto *GEP = cast<GetElementPtrInst>(RFlags.getUnderlyingInstr());
1019+
return Folder.FoldGEP(GEP->getSourceElementType(), Ops[0],
1020+
drop_begin(Ops), RFlags.getGEPNoWrapFlags());
1021+
}
1022+
case VPInstruction::PtrAdd:
1023+
case VPInstruction::WidePtrAdd:
1024+
return Folder.FoldGEP(IntegerType::getInt8Ty(TypeInfo.getContext()),
1025+
Ops[0], Ops[1],
1026+
cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
1027+
// An extract of a live-in is an extract of a broadcast, so return the
1028+
// broadcasted element.
1029+
case Instruction::ExtractElement:
1030+
assert(!Ops[0]->getType()->isVectorTy() && "Live-ins should be scalar");
1031+
return Ops[0];
1032+
}
1033+
return nullptr;
1034+
};
1035+
1036+
if (Value *V = FoldToIRValue())
1037+
return R.getParent()->getPlan()->getOrAddLiveIn(V);
9961038
return nullptr;
9971039
}
9981040

@@ -1006,19 +1048,10 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
10061048

10071049
// Simplification of live-in IR values for SingleDef recipes using
10081050
// InstSimplifyFolder.
1009-
if (TypeSwitch<VPRecipeBase *, bool>(&R)
1010-
.Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
1011-
VPReplicateRecipe, VPWidenSelectRecipe>([&](auto *I) {
1012-
const DataLayout &DL =
1013-
Plan->getScalarHeader()->getIRBasicBlock()->getDataLayout();
1014-
Value *V = tryToFoldLiveIns(*I, I->getOpcode(), I->operands(), DL,
1015-
TypeInfo);
1016-
if (V)
1017-
I->replaceAllUsesWith(Plan->getOrAddLiveIn(V));
1018-
return V;
1019-
})
1020-
.Default([](auto *) { return false; }))
1021-
return;
1051+
const DataLayout &DL =
1052+
Plan->getScalarHeader()->getIRBasicBlock()->getDataLayout();
1053+
if (VPValue *V = tryToFoldLiveIns(*Def, Def->operands(), DL, TypeInfo))
1054+
return Def->replaceAllUsesWith(V);
10221055

10231056
// Fold PredPHI LiveIn -> LiveIn.
10241057
if (auto *PredPHI = dyn_cast<VPPredInstPHIRecipe>(&R)) {
@@ -1996,29 +2029,6 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
19962029
return Def == getEmptyKey() || Def == getTombstoneKey();
19972030
}
19982031

1999-
/// Get any instruction opcode or intrinsic ID data embedded in recipe \p R.
2000-
/// Returns an optional pair, where the first element indicates whether it is
2001-
/// an intrinsic ID.
2002-
static std::optional<std::pair<bool, unsigned>>
2003-
getOpcodeOrIntrinsicID(const VPSingleDefRecipe *R) {
2004-
return TypeSwitch<const VPSingleDefRecipe *,
2005-
std::optional<std::pair<bool, unsigned>>>(R)
2006-
.Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
2007-
VPWidenSelectRecipe, VPWidenGEPRecipe, VPReplicateRecipe>(
2008-
[](auto *I) { return std::make_pair(false, I->getOpcode()); })
2009-
.Case<VPWidenIntrinsicRecipe>([](auto *I) {
2010-
return std::make_pair(true, I->getVectorIntrinsicID());
2011-
})
2012-
.Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](auto *I) {
2013-
// For recipes that do not directly map to LLVM IR instructions,
2014-
// assign opcodes after the last VPInstruction opcode (which is also
2015-
// after the last IR Instruction opcode), based on the VPDefID.
2016-
return std::make_pair(false,
2017-
VPInstruction::OpsEnd + 1 + I->getVPDefID());
2018-
})
2019-
.Default([](auto *) { return std::nullopt; });
2020-
}
2021-
20222032
/// If recipe \p R will lower to a GEP with a non-i8 source element type,
20232033
/// return that source element type.
20242034
static Type *getGEPSourceElementType(const VPSingleDefRecipe *R) {

llvm/test/Transforms/LoopVectorize/constantfolder.ll

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,3 +288,72 @@ loop.latch:
288288
exit:
289289
ret void
290290
}
291+
292+
define void @const_fold_binaryintrinsic(ptr %dst, i64 %d) {
293+
; CHECK-LABEL: define void @const_fold_binaryintrinsic(
294+
; CHECK-SAME: ptr [[DST:%.*]], i64 [[D:%.*]]) {
295+
; CHECK-NEXT: [[ENTRY:.*:]]
296+
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
297+
; CHECK: [[VECTOR_PH]]:
298+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
299+
; CHECK: [[VECTOR_BODY]]:
300+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
301+
; CHECK-NEXT: store i64 3, ptr [[DST]], align 2
302+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
303+
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
304+
; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
305+
; CHECK: [[MIDDLE_BLOCK]]:
306+
; CHECK-NEXT: br label %[[EXIT:.*]]
307+
; CHECK: [[EXIT]]:
308+
; CHECK-NEXT: ret void
309+
;
310+
entry:
311+
br label %loop
312+
313+
loop:
314+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
315+
%const.0 = xor i64 %d, %d
316+
%trunc = call i64 @llvm.umax.i64(i64 %const.0, i64 3)
317+
store i64 %trunc, ptr %dst, align 2
318+
%iv.next = add i64 %iv, 1
319+
%cmp = icmp ult i64 %iv.next, 100
320+
br i1 %cmp, label %loop, label %exit
321+
322+
exit:
323+
ret void
324+
}
325+
326+
define void @const_fold_widegep(ptr noalias %A, ptr noalias %B, i64 %d) {
327+
; CHECK-LABEL: define void @const_fold_widegep(
328+
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[D:%.*]]) {
329+
; CHECK-NEXT: [[ENTRY:.*:]]
330+
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
331+
; CHECK: [[VECTOR_PH]]:
332+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
333+
; CHECK: [[VECTOR_BODY]]:
334+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
335+
; CHECK-NEXT: store ptr [[A]], ptr [[B]], align 8
336+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
337+
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
338+
; CHECK-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
339+
; CHECK: [[MIDDLE_BLOCK]]:
340+
; CHECK-NEXT: br label %[[EXIT:.*]]
341+
; CHECK: [[EXIT]]:
342+
; CHECK-NEXT: ret void
343+
;
344+
entry:
345+
br label %loop
346+
347+
loop:
348+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
349+
%const.0 = xor i64 %d, %d
350+
%gep.A = getelementptr i64, ptr %A, i64 %const.0
351+
%gep.B = getelementptr i64, ptr %B, i64 %const.0
352+
store ptr %gep.A, ptr %gep.B
353+
%iv.next = add nuw nsw i64 %iv, 1
354+
%exit.cond = icmp ult i64 %iv.next, 100
355+
br i1 %exit.cond, label %loop, label %exit
356+
357+
exit:
358+
ret void
359+
}

0 commit comments

Comments
 (0)