Skip to content

Commit 35bad22

Browse files
authored
[PredicateInfo] Use bitcast instead of ssa.copy (#151174)
PredicateInfo needs some no-op to which the predicate can be attached. Currently this is an ssa.copy intrinsic. This PR replaces it with a no-op bitcast. Using a bitcast is more efficient because we don't have the overhead of an overloaded intrinsic. It also makes things slightly simpler overall.
1 parent e8918c3 commit 35bad22

File tree

15 files changed

+237
-271
lines changed

15 files changed

+237
-271
lines changed

llvm/include/llvm/Transforms/Utils/PredicateInfo.h

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
/// %cmp = icmp eq i32, %x, 50
3131
/// br i1 %cmp, label %true, label %false
3232
/// true:
33-
/// %x.0 = call \@llvm.ssa_copy.i32(i32 %x)
33+
/// %x.0 = bitcast i32 %x to %x
3434
/// ret i32 %x.0
3535
/// false:
3636
/// ret i32 1
@@ -70,7 +70,7 @@ class raw_ostream;
7070
enum PredicateType { PT_Branch, PT_Assume, PT_Switch };
7171

7272
/// Constraint for a predicate of the form "cmp Pred Op, OtherOp", where Op
73-
/// is the value the constraint applies to (the ssa.copy result).
73+
/// is the value the constraint applies to (the bitcast result).
7474
struct PredicateConstraint {
7575
CmpInst::Predicate Predicate;
7676
Value *OtherOp;
@@ -177,7 +177,6 @@ class PredicateInfo {
177177
public:
178178
LLVM_ABI PredicateInfo(Function &, DominatorTree &, AssumptionCache &,
179179
BumpPtrAllocator &);
180-
LLVM_ABI ~PredicateInfo();
181180

182181
LLVM_ABI void verifyPredicateInfo() const;
183182

@@ -200,10 +199,6 @@ class PredicateInfo {
200199
// the Predicate Info, they belong to the ValueInfo structs in the ValueInfos
201200
// vector.
202201
DenseMap<const Value *, const PredicateBase *> PredicateMap;
203-
// The set of ssa_copy declarations we created with our custom mangling.
204-
SmallSet<AssertingVH<Function>, 20> CreatedDeclarations;
205-
// Cache of ssa.copy declaration for a given type.
206-
SmallDenseMap<Type *, Function *> DeclarationCache;
207202
};
208203

209204
/// Printer pass for \c PredicateInfo.

llvm/lib/Transforms/IPO/FunctionSpecialization.cpp

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -400,12 +400,6 @@ Constant *InstCostVisitor::visitFreezeInst(FreezeInst &I) {
400400
Constant *InstCostVisitor::visitCallBase(CallBase &I) {
401401
assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
402402

403-
// Look through calls to ssa_copy intrinsics.
404-
if (auto *II = dyn_cast<IntrinsicInst>(&I);
405-
II && II->getIntrinsicID() == Intrinsic::ssa_copy) {
406-
return LastVisited->second;
407-
}
408-
409403
Function *F = I.getCalledFunction();
410404
if (!F || !canConstantFoldCallTo(&I, F))
411405
return nullptr;
@@ -611,17 +605,15 @@ void FunctionSpecializer::promoteConstantStackValues(Function *F) {
611605
}
612606
}
613607

614-
// ssa_copy intrinsics are introduced by the SCCP solver. These intrinsics
615-
// interfere with the promoteConstantStackValues() optimization.
608+
// The SCCP solver inserts bitcasts for PredicateInfo. These interfere with the
609+
// promoteConstantStackValues() optimization.
616610
static void removeSSACopy(Function &F) {
617611
for (BasicBlock &BB : F) {
618612
for (Instruction &Inst : llvm::make_early_inc_range(BB)) {
619-
auto *II = dyn_cast<IntrinsicInst>(&Inst);
620-
if (!II)
621-
continue;
622-
if (II->getIntrinsicID() != Intrinsic::ssa_copy)
613+
auto *BC = dyn_cast<BitCastInst>(&Inst);
614+
if (!BC || BC->getType() != BC->getOperand(0)->getType())
623615
continue;
624-
Inst.replaceAllUsesWith(II->getOperand(0));
616+
Inst.replaceAllUsesWith(BC->getOperand(0));
625617
Inst.eraseFromParent();
626618
}
627619
}

llvm/lib/Transforms/Scalar/NewGVN.cpp

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -651,7 +651,7 @@ class NewGVN {
651651
BitVector TouchedInstructions;
652652

653653
DenseMap<const BasicBlock *, std::pair<unsigned, unsigned>> BlockInstRange;
654-
mutable DenseMap<const IntrinsicInst *, const Value *> PredicateSwapChoice;
654+
mutable DenseMap<const BitCastInst *, const Value *> PredicateSwapChoice;
655655

656656
#ifndef NDEBUG
657657
// Debugging for how many times each block and instruction got processed.
@@ -819,7 +819,7 @@ class NewGVN {
819819
BasicBlock *PHIBlock) const;
820820
const Expression *performSymbolicAggrValueEvaluation(Instruction *) const;
821821
ExprResult performSymbolicCmpEvaluation(Instruction *) const;
822-
ExprResult performSymbolicPredicateInfoEvaluation(IntrinsicInst *) const;
822+
ExprResult performSymbolicPredicateInfoEvaluation(BitCastInst *) const;
823823

824824
// Congruence finding.
825825
bool someEquivalentDominates(const Instruction *, const Instruction *) const;
@@ -841,7 +841,7 @@ class NewGVN {
841841
unsigned int getRank(const Value *) const;
842842
bool shouldSwapOperands(const Value *, const Value *) const;
843843
bool shouldSwapOperandsForPredicate(const Value *, const Value *,
844-
const IntrinsicInst *I) const;
844+
const BitCastInst *I) const;
845845

846846
// Reachability handling.
847847
void updateReachableEdge(BasicBlock *, BasicBlock *);
@@ -1013,9 +1013,9 @@ void NewGVN::deleteExpression(const Expression *E) const {
10131013

10141014
// If V is a predicateinfo copy, get the thing it is a copy of.
10151015
static Value *getCopyOf(const Value *V) {
1016-
if (auto *II = dyn_cast<IntrinsicInst>(V))
1017-
if (II->getIntrinsicID() == Intrinsic::ssa_copy)
1018-
return II->getOperand(0);
1016+
if (auto *BC = dyn_cast<BitCastInst>(V))
1017+
if (BC->getType() == BC->getOperand(0)->getType())
1018+
return BC->getOperand(0);
10191019
return nullptr;
10201020
}
10211021

@@ -1604,7 +1604,7 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) const {
16041604
}
16051605

16061606
NewGVN::ExprResult
1607-
NewGVN::performSymbolicPredicateInfoEvaluation(IntrinsicInst *I) const {
1607+
NewGVN::performSymbolicPredicateInfoEvaluation(BitCastInst *I) const {
16081608
auto *PI = PredInfo->getPredicateInfoFor(I);
16091609
if (!PI)
16101610
return ExprResult::none();
@@ -1647,13 +1647,8 @@ NewGVN::performSymbolicPredicateInfoEvaluation(IntrinsicInst *I) const {
16471647
NewGVN::ExprResult NewGVN::performSymbolicCallEvaluation(Instruction *I) const {
16481648
auto *CI = cast<CallInst>(I);
16491649
if (auto *II = dyn_cast<IntrinsicInst>(I)) {
1650-
// Intrinsics with the returned attribute are copies of arguments.
1651-
if (auto *ReturnedValue = II->getReturnedArgOperand()) {
1652-
if (II->getIntrinsicID() == Intrinsic::ssa_copy)
1653-
if (auto Res = performSymbolicPredicateInfoEvaluation(II))
1654-
return Res;
1650+
if (auto *ReturnedValue = II->getReturnedArgOperand())
16551651
return ExprResult::some(createVariableOrConstant(ReturnedValue));
1656-
}
16571652
}
16581653

16591654
// FIXME: Currently the calls which may access the thread id may
@@ -2032,6 +2027,12 @@ NewGVN::performSymbolicEvaluation(Instruction *I,
20322027
E = performSymbolicLoadEvaluation(I);
20332028
break;
20342029
case Instruction::BitCast:
2030+
// Intrinsics with the returned attribute are copies of arguments.
2031+
if (I->getType() == I->getOperand(0)->getType())
2032+
if (auto Res =
2033+
performSymbolicPredicateInfoEvaluation(cast<BitCastInst>(I)))
2034+
return Res;
2035+
[[fallthrough]];
20352036
case Instruction::AddrSpaceCast:
20362037
case Instruction::Freeze:
20372038
return createExpression(I);
@@ -4075,8 +4076,7 @@ bool NewGVN::eliminateInstructions(Function &F) {
40754076
if (DominatingLeader != Def) {
40764077
// Even if the instruction is removed, we still need to update
40774078
// flags/metadata due to downstreams users of the leader.
4078-
if (!match(DefI, m_Intrinsic<Intrinsic::ssa_copy>()))
4079-
patchReplacementInstruction(DefI, DominatingLeader);
4079+
patchReplacementInstruction(DefI, DominatingLeader);
40804080

40814081
SmallVector<DbgVariableRecord *> DVRUsers;
40824082
findDbgUsers(DefI, DVRUsers);
@@ -4116,10 +4116,14 @@ bool NewGVN::eliminateInstructions(Function &F) {
41164116

41174117
Value *DominatingLeader = EliminationStack.back();
41184118

4119-
auto *II = dyn_cast<IntrinsicInst>(DominatingLeader);
4120-
bool isSSACopy = II && II->getIntrinsicID() == Intrinsic::ssa_copy;
4121-
if (isSSACopy)
4122-
DominatingLeader = II->getOperand(0);
4119+
Instruction *SSACopy = nullptr;
4120+
if (auto *BC = dyn_cast<BitCastInst>(DominatingLeader)) {
4121+
if (BC->getType() == BC->getOperand(0)->getType() &&
4122+
PredInfo->getPredicateInfoFor(DominatingLeader)) {
4123+
SSACopy = BC;
4124+
DominatingLeader = BC->getOperand(0);
4125+
}
4126+
}
41234127

41244128
// Don't replace our existing users with ourselves.
41254129
if (U->get() == DominatingLeader)
@@ -4145,12 +4149,12 @@ bool NewGVN::eliminateInstructions(Function &F) {
41454149
ProbablyDead.erase(cast<Instruction>(DominatingLeader));
41464150
// For copy instructions, we use their operand as a leader,
41474151
// which means we remove a user of the copy and it may become dead.
4148-
if (isSSACopy) {
4149-
auto It = UseCounts.find(II);
4152+
if (SSACopy) {
4153+
auto It = UseCounts.find(SSACopy);
41504154
if (It != UseCounts.end()) {
41514155
unsigned &IIUseCount = It->second;
41524156
if (--IIUseCount == 0)
4153-
ProbablyDead.insert(II);
4157+
ProbablyDead.insert(SSACopy);
41544158
}
41554159
}
41564160
++LeaderUseCount;
@@ -4251,7 +4255,7 @@ bool NewGVN::shouldSwapOperands(const Value *A, const Value *B) const {
42514255
}
42524256

42534257
bool NewGVN::shouldSwapOperandsForPredicate(const Value *A, const Value *B,
4254-
const IntrinsicInst *I) const {
4258+
const BitCastInst *I) const {
42554259
if (shouldSwapOperands(A, B)) {
42564260
PredicateSwapChoice[I] = B;
42574261
return true;

llvm/lib/Transforms/Utils/PredicateInfo.cpp

Lines changed: 12 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -506,33 +506,19 @@ Value *PredicateInfoBuilder::materializeStack(unsigned int &Counter,
506506
ValInfo->RenamedOp = (RenameStack.end() - Start) == RenameStack.begin()
507507
? OrigOp
508508
: (RenameStack.end() - Start - 1)->Def;
509-
auto CreateSSACopy = [this](IRBuilderBase &B, Value *Op,
510-
const Twine &Name = "") {
511-
auto It = PI.DeclarationCache.try_emplace(Op->getType());
512-
if (It.second) {
513-
// The number of named values is used to detect if a new declaration
514-
// was added. If so, that declaration is tracked so that it can be
515-
// removed when the analysis is done. The corner case were a new
516-
// declaration results in a name clash and the old name being renamed
517-
// is not considered as that represents an invalid module.
518-
auto NumDecls = F.getParent()->getNumNamedValues();
519-
Function *IF = Intrinsic::getOrInsertDeclaration(
520-
F.getParent(), Intrinsic::ssa_copy, Op->getType());
521-
if (NumDecls != F.getParent()->getNumNamedValues())
522-
PI.CreatedDeclarations.insert(IF);
523-
It.first->second = IF;
524-
}
525-
return B.CreateCall(It.first->second, Op, Name);
509+
auto CreateSSACopy = [](Instruction *InsertPt, Value *Op,
510+
const Twine &Name = "") {
511+
// Use a no-op bitcast to represent ssa copy.
512+
return new BitCastInst(Op, Op->getType(), Name, InsertPt->getIterator());
526513
};
527514
// For edge predicates, we can just place the operand in the block before
528515
// the terminator. For assume, we have to place it right after the assume
529516
// to ensure we dominate all uses except assume itself. Always insert
530517
// right before the terminator or after the assume, so that we insert in
531518
// proper order in the case of multiple predicateinfo in the same block.
532519
if (isa<PredicateWithEdge>(ValInfo)) {
533-
IRBuilder<> B(getBranchTerminator(ValInfo));
534-
CallInst *PIC =
535-
CreateSSACopy(B, Op, Op->getName() + "." + Twine(Counter++));
520+
BitCastInst *PIC = CreateSSACopy(getBranchTerminator(ValInfo), Op,
521+
Op->getName() + "." + Twine(Counter++));
536522
PI.PredicateMap.insert({PIC, ValInfo});
537523
Result.Def = PIC;
538524
} else {
@@ -541,8 +527,7 @@ Value *PredicateInfoBuilder::materializeStack(unsigned int &Counter,
541527
"Should not have gotten here without it being an assume");
542528
// Insert the predicate directly after the assume. While it also holds
543529
// directly before it, assume(i1 true) is not a useful fact.
544-
IRBuilder<> B(PAssume->AssumeInst->getNextNode());
545-
CallInst *PIC = CreateSSACopy(B, Op);
530+
BitCastInst *PIC = CreateSSACopy(PAssume->AssumeInst->getNextNode(), Op);
546531
PI.PredicateMap.insert({PIC, ValInfo});
547532
Result.Def = PIC;
548533
}
@@ -710,23 +695,6 @@ PredicateInfo::PredicateInfo(Function &F, DominatorTree &DT,
710695
Builder.buildPredicateInfo();
711696
}
712697

713-
// Remove all declarations we created . The PredicateInfo consumers are
714-
// responsible for remove the ssa_copy calls created.
715-
PredicateInfo::~PredicateInfo() {
716-
// Collect function pointers in set first, as SmallSet uses a SmallVector
717-
// internally and we have to remove the asserting value handles first.
718-
SmallPtrSet<Function *, 20> FunctionPtrs;
719-
for (const auto &F : CreatedDeclarations)
720-
FunctionPtrs.insert(&*F);
721-
CreatedDeclarations.clear();
722-
723-
for (Function *F : FunctionPtrs) {
724-
assert(F->users().empty() &&
725-
"PredicateInfo consumer did not remove all SSA copies.");
726-
F->eraseFromParent();
727-
}
728-
}
729-
730698
std::optional<PredicateConstraint> PredicateBase::getConstraint() const {
731699
switch (Type) {
732700
case PT_Assume:
@@ -779,15 +747,16 @@ std::optional<PredicateConstraint> PredicateBase::getConstraint() const {
779747

780748
void PredicateInfo::verifyPredicateInfo() const {}
781749

782-
// Replace ssa_copy calls created by PredicateInfo with their operand.
750+
// Replace bitcasts created by PredicateInfo with their operand.
783751
static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) {
784752
for (Instruction &Inst : llvm::make_early_inc_range(instructions(F))) {
785753
const auto *PI = PredInfo.getPredicateInfoFor(&Inst);
786-
auto *II = dyn_cast<IntrinsicInst>(&Inst);
787-
if (!PI || !II || II->getIntrinsicID() != Intrinsic::ssa_copy)
754+
if (!PI)
788755
continue;
789756

790-
Inst.replaceAllUsesWith(II->getOperand(0));
757+
assert(isa<BitCastInst>(Inst) &&
758+
Inst.getType() == Inst.getOperand(0)->getType());
759+
Inst.replaceAllUsesWith(Inst.getOperand(0));
791760
Inst.eraseFromParent();
792761
}
793762
}

llvm/lib/Transforms/Utils/SCCPSolver.cpp

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -777,10 +777,10 @@ class SCCPInstVisitor : public InstVisitor<SCCPInstVisitor> {
777777

778778
for (BasicBlock &BB : F) {
779779
for (Instruction &Inst : llvm::make_early_inc_range(BB)) {
780-
if (auto *II = dyn_cast<IntrinsicInst>(&Inst)) {
781-
if (II->getIntrinsicID() == Intrinsic::ssa_copy) {
780+
if (auto *BC = dyn_cast<BitCastInst>(&Inst)) {
781+
if (BC->getType() == BC->getOperand(0)->getType()) {
782782
if (It->second->getPredicateInfoFor(&Inst)) {
783-
Value *Op = II->getOperand(0);
783+
Value *Op = BC->getOperand(0);
784784
Inst.replaceAllUsesWith(Op);
785785
Inst.eraseFromParent();
786786
}
@@ -1413,6 +1413,15 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) {
14131413
if (ValueState[&I].isOverdefined())
14141414
return;
14151415

1416+
if (auto *BC = dyn_cast<BitCastInst>(&I)) {
1417+
if (BC->getType() == BC->getOperand(0)->getType()) {
1418+
if (const PredicateBase *PI = getPredicateInfoFor(&I)) {
1419+
handlePredicate(&I, I.getOperand(0), PI);
1420+
return;
1421+
}
1422+
}
1423+
}
1424+
14161425
ValueLatticeElement OpSt = getValueState(I.getOperand(0));
14171426
if (OpSt.isUnknownOrUndef())
14181427
return;
@@ -2001,17 +2010,6 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) {
20012010
Function *F = CB.getCalledFunction();
20022011

20032012
if (auto *II = dyn_cast<IntrinsicInst>(&CB)) {
2004-
if (II->getIntrinsicID() == Intrinsic::ssa_copy) {
2005-
if (ValueState[&CB].isOverdefined())
2006-
return;
2007-
2008-
Value *CopyOf = CB.getOperand(0);
2009-
const PredicateBase *PI = getPredicateInfoFor(&CB);
2010-
assert(PI && "Missing predicate info for ssa.copy");
2011-
handlePredicate(&CB, CopyOf, PI);
2012-
return;
2013-
}
2014-
20152013
if (II->getIntrinsicID() == Intrinsic::vscale) {
20162014
unsigned BitWidth = CB.getType()->getScalarSizeInBits();
20172015
const ConstantRange Result = getVScaleRange(II->getFunction(), BitWidth);

llvm/test/Other/debugcounter-predicateinfo.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@ define fastcc void @barney() {
88
; CHECK-NEXT: br label [[BB22:%.*]]
99
; CHECK: bb22:
1010
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i32 undef, 2
11-
; CHECK: [[TMP23_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[TMP23]])
11+
; CHECK: [[TMP23_0:%.*]] = bitcast i1 [[TMP23]] to i1
1212
; CHECK-NEXT: br i1 [[TMP23]], label [[BB29:%.*]], label [[BB35:%.*]]
1313
; CHECK: bb29:
14-
; CHECK: [[TMP23_0_1:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[TMP23_0]])
14+
; CHECK: [[TMP23_0_1:%.*]] = bitcast i1 [[TMP23_0]] to i1
1515
; CHECK-NEXT: br i1 [[TMP23]], label [[BB33:%.*]], label [[BB35]]
1616
; CHECK: bb33:
1717
; CHECK-NEXT: br i1 [[TMP23_0_1]], label [[BB35]], label [[BB35]]

llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -53,17 +53,17 @@ exit4:
5353
; PREDINF-NEXT: br label %[[BLOCK1:.*]]
5454
; PREDINF: [[BLOCK1]]:
5555
; PREDINF-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 0
56-
; PREDINF: [[CMP_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[CMP]])
57-
; PREDINF: [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
58-
; PREDINF: [[X_4:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
56+
; PREDINF: [[CMP_0:%.*]] = bitcast i1 [[CMP]] to i1
57+
; PREDINF: [[X_0:%.*]] = bitcast i32 [[X]] to i32
58+
; PREDINF: [[X_4:%.*]] = bitcast i32 [[X]] to i32
5959
; PREDINF-NEXT: br i1 [[CMP]], label %[[BLOCK2:.*]], label %[[EXIT1:.*]]
6060
; PREDINF: [[BLOCK2]]:
61-
; PREDINF: [[CMP_0_1:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[CMP_0]])
62-
; PREDINF: [[X_0_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X_0]])
63-
; PREDINF: [[X_0_3:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X_0]])
61+
; PREDINF: [[CMP_0_1:%.*]] = bitcast i1 [[CMP_0]] to i1
62+
; PREDINF: [[X_0_1:%.*]] = bitcast i32 [[X_0]] to i32
63+
; PREDINF: [[X_0_3:%.*]] = bitcast i32 [[X_0]] to i32
6464
; PREDINF-NEXT: br i1 [[CMP_0]], label %[[BLOCK3:.*]], label %[[EXIT2:.*]]
6565
; PREDINF: [[BLOCK3]]:
66-
; PREDINF: [[X_0_1_2:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X_0_1]])
66+
; PREDINF: [[X_0_1_2:%.*]] = bitcast i32 [[X_0_1]] to i32
6767
; PREDINF-NEXT: br i1 [[CMP_0_1]], label %[[EXIT4:.*]], label %[[EXIT3:.*]]
6868
; PREDINF: [[EXIT1]]:
6969
; PREDINF-NEXT: ret i32 [[X_4]]

0 commit comments

Comments
 (0)