-
Notifications
You must be signed in to change notification settings - Fork 15k
[PredicateInfo] Support existing PredicateType by adding PredicatePHI when needing introduction of phi nodes
#151132
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-llvm-transforms Author: Rajveer Singh Bharadwaj (Rajveer100) ChangesResolves #150606 Currently Full diff: https://github.com/llvm/llvm-project/pull/151132.diff 2 Files Affected:
diff --git a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
index c243e236901d5..dfaeec04daff1 100644
--- a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
+++ b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
@@ -67,7 +67,7 @@ class Value;
class IntrinsicInst;
class raw_ostream;
-enum PredicateType { PT_Branch, PT_Assume, PT_Switch };
+enum PredicateType { PT_Branch, PT_Assume, PT_Switch, PT_PHI };
/// Constraint for a predicate of the form "cmp Pred Op, OtherOp", where Op
/// is the value the constraint applies to (the ssa.copy result).
@@ -171,6 +171,18 @@ class PredicateSwitch : public PredicateWithEdge {
}
};
+class PredicatePHI : public PredicateBase {
+public:
+ BasicBlock *PHIBlock;
+ SmallVector<std::pair<BasicBlock *, PredicateBase *>, 4> IncomingPredicates;
+
+ PredicatePHI(Value *Op, BasicBlock *PHIBB)
+ : PredicateBase(PT_PHI, Op, nullptr), PHIBlock(PHIBB) {}
+ static bool classof(const PredicateBase *PB) { return PB->Type == PT_PHI; }
+
+ LLVM_ABI std::optional<PredicateConstraint> getConstraint() const;
+};
+
/// Encapsulates PredicateInfo, including all data associated with memory
/// accesses.
class PredicateInfo {
diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index ac413c9b58152..6e5e97e97849c 100644
--- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -214,6 +214,8 @@ class PredicateInfoBuilder {
// whether it returned a valid result.
DenseMap<Value *, unsigned int> ValueInfoNums;
+ DenseMap<BasicBlock *, SmallVector<Value *, 4>> PHICandidates;
+
BumpPtrAllocator &Allocator;
ValueInfo &getOrCreateValueInfo(Value *);
@@ -225,6 +227,13 @@ class PredicateInfoBuilder {
SmallVectorImpl<Value *> &OpsToRename);
void processSwitch(SwitchInst *, BasicBlock *,
SmallVectorImpl<Value *> &OpsToRename);
+ void identifyPHIInsertionPoints(SmallVectorImpl<Value *> &OpsToRename);
+ bool needsPHIForPredicateInfo(Value *Op, BasicBlock *BB);
+ void insertPredicatePHIs(Value *Op, BasicBlock *BB);
+ void computePredicateIDF(Value *Op,
+ const SmallPtrSet<BasicBlock *, 8> &DefiningBlocks,
+ SmallPtrSet<BasicBlock *, 8> &PHIBlocks);
+ void processPredicatePHIs(SmallVectorImpl<Value *> &OpsToRename);
void renameUses(SmallVectorImpl<Value *> &OpsToRename);
void addInfoFor(SmallVectorImpl<Value *> &OpsToRename, Value *Op,
PredicateBase *PB);
@@ -453,6 +462,155 @@ void PredicateInfoBuilder::processSwitch(
}
}
+void PredicateInfoBuilder::identifyPHIInsertionPoints(
+ SmallVectorImpl<Value *> &OpsToRename) {
+ for (Value *Op : OpsToRename) {
+ const auto &ValueInfo = getValueInfo(Op);
+ SmallPtrSet<BasicBlock *, 8> DefiningBlocks;
+
+ for (const auto *PInfo : ValueInfo.Infos) {
+ if (auto *PBranch = dyn_cast<PredicateBranch>(PInfo)) {
+ DefiningBlocks.insert(PBranch->To);
+ } else if (auto *PSwitch = dyn_cast<PredicateSwitch>(PInfo)) {
+ DefiningBlocks.insert(PSwitch->To);
+ } else if (auto *PAssume = dyn_cast<PredicateAssume>(PInfo)) {
+ DefiningBlocks.insert(PAssume->AssumeInst->getParent());
+ }
+ }
+
+ if (DefiningBlocks.size() > 1) {
+ SmallPtrSet<BasicBlock *, 8> PHIBlocks;
+ computePredicateIDF(Op, DefiningBlocks, PHIBlocks);
+
+ for (BasicBlock *PHIBlock : PHIBlocks) {
+ if (needsPHIForPredicateInfo(Op, PHIBlock)) {
+ PHICandidates[PHIBlock].push_back(Op);
+ }
+ }
+ }
+ }
+}
+
+void PredicateInfoBuilder::computePredicateIDF(
+ Value *Op, const SmallPtrSet<BasicBlock *, 8> &DefiningBlocks,
+ SmallPtrSet<BasicBlock *, 8> &PHIBlocks) {
+
+ SmallVector<BasicBlock *, 8> Worklist(DefiningBlocks.begin(),
+ DefiningBlocks.end());
+
+ while (!Worklist.empty()) {
+ BasicBlock *BB = Worklist.pop_back_val();
+
+ DomTreeNode *Node = DT.getNode(BB);
+ if (!Node)
+ continue;
+
+ for (BasicBlock *Succ : successors(BB)) {
+ if (!DT.dominates(BB, Succ)) {
+ BasicBlock *IDom = DT.getNode(BB)->getIDom()->getBlock();
+ if (DT.dominates(IDom, Succ)) {
+ if (PHIBlocks.insert(Succ).second) {
+ bool HasOpUse = false;
+ for (auto &I : *Succ) {
+ for (Use &U : I.uses()) {
+ if (U.get() == Op) {
+ HasOpUse = true;
+ break;
+ }
+ }
+ }
+ if (HasOpUse) {
+ Worklist.push_back(Succ);
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+bool PredicateInfoBuilder::needsPHIForPredicateInfo(Value *Op, BasicBlock *BB) {
+ if (BB->getSinglePredecessor())
+ return false;
+
+ const auto &ValueInfo = getValueInfo(Op);
+ SmallDenseSet<PredicateBase *, 4> PredPredicates;
+
+ for (BasicBlock *Pred : predecessors(BB)) {
+ PredicateBase *PredInfo = nullptr;
+
+ for (const auto *PInfo : ValueInfo.Infos) {
+ if (auto *PBranch = dyn_cast<PredicateBranch>(PInfo)) {
+ if (PBranch->From == Pred && PBranch->To == BB) {
+ PredInfo = const_cast<PredicateBase *>(PInfo);
+ break;
+ }
+ } else if (auto *PSwitch = dyn_cast<PredicateSwitch>(PInfo)) {
+ if (PSwitch->From == Pred && PSwitch->To == BB) {
+ PredInfo = const_cast<PredicateBase *>(PInfo);
+ break;
+ }
+ }
+ }
+
+ if (PredInfo) {
+ PredPredicates.insert(PredInfo);
+ }
+ }
+
+ return PredPredicates.size() > 1 ||
+ (PredPredicates.size() == 1 && pred_size(BB) > 1);
+}
+
+void PredicateInfoBuilder::insertPredicatePHIs(Value *Op, BasicBlock *BB) {
+ IRBuilder<> Builder(&BB->front());
+ PHINode *PHI = Builder.CreatePHI(Op->getType(), pred_size(BB),
+ Op->getName() + ".predicate.phi");
+
+ auto *PPhi = new (Allocator) PredicatePHI(Op, BB);
+ PPhi->RenamedOp = PHI;
+
+ const auto &ValueInfo = getValueInfo(Op);
+ for (BasicBlock *Pred : predecessors(BB)) {
+ Value *IncomingValue = Op;
+
+ for (const auto *PInfo : ValueInfo.Infos) {
+ if (auto *PBranch = dyn_cast<PredicateBranch>(PInfo)) {
+ if (PBranch->From == Pred && PBranch->To == BB) {
+ PPhi->IncomingPredicates.push_back(
+ {Pred, const_cast<PredicateBase *>(PInfo)});
+ break;
+ }
+ } else if (auto *PSwitch = dyn_cast<PredicateSwitch>(PInfo)) {
+ if (PSwitch->From == Pred && PSwitch->To == BB) {
+ PPhi->IncomingPredicates.push_back(
+ {Pred, const_cast<PredicateBase *>(PInfo)});
+ break;
+ }
+ }
+ }
+
+ PHI->addIncoming(IncomingValue, Pred);
+ }
+
+ PI.PredicateMap.insert({PHI, PPhi});
+
+ auto &OperandInfo = getOrCreateValueInfo(Op);
+ OperandInfo.Infos.push_back(PPhi);
+}
+
+void PredicateInfoBuilder::processPredicatePHIs(
+ SmallVectorImpl<Value *> &OpsToRename) {
+
+ for (const auto &Entry : PHICandidates) {
+ BasicBlock *PHIBlock = Entry.first;
+
+ for (Value *Op : Entry.second) {
+ insertPredicatePHIs(Op, PHIBlock);
+ }
+ }
+}
+
// Build predicate info for our function
void PredicateInfoBuilder::buildPredicateInfo() {
DT.updateDFSNumbers();
@@ -479,6 +637,10 @@ void PredicateInfoBuilder::buildPredicateInfo() {
if (DT.isReachableFromEntry(II->getParent()))
processAssume(II, II->getParent(), OpsToRename);
}
+
+ identifyPHIInsertionPoints(OpsToRename);
+ processPredicatePHIs(OpsToRename);
+
// Now rename all our operations.
renameUses(OpsToRename);
}
@@ -774,10 +936,33 @@ std::optional<PredicateConstraint> PredicateBase::getConstraint() const {
}
return {{CmpInst::ICMP_EQ, cast<PredicateSwitch>(this)->CaseValue}};
+ case PT_PHI:
+ return cast<PredicatePHI>(this)->getConstraint();
}
llvm_unreachable("Unknown predicate type");
}
+std::optional<PredicateConstraint> PredicatePHI::getConstraint() const {
+ // For PHI predicates, find the common constraint across all incoming edges
+ if (IncomingPredicates.empty())
+ return std::nullopt;
+
+ auto FirstConstraint = IncomingPredicates[0].second->getConstraint();
+ if (!FirstConstraint)
+ return std::nullopt;
+
+ // Verify all incoming predicates have the same constraint
+ for (size_t I = 1; I < IncomingPredicates.size(); ++I) {
+ auto Constraint = IncomingPredicates[I].second->getConstraint();
+ if (!Constraint || Constraint->Predicate != FirstConstraint->Predicate ||
+ Constraint->OtherOp != FirstConstraint->OtherOp) {
+ return std::nullopt;
+ }
+ }
+
+ return FirstConstraint;
+}
+
void PredicateInfo::verifyPredicateInfo() const {}
// Replace ssa_copy calls created by PredicateInfo with their operand.
@@ -839,6 +1024,10 @@ class PredicateInfoAnnotatedWriter : public AssemblyAnnotationWriter {
} else if (const auto *PA = dyn_cast<PredicateAssume>(PI)) {
OS << "; assume predicate info {"
<< " Comparison:" << *PA->Condition;
+ } else if (const auto *PP = dyn_cast<PredicatePHI>(PI)) {
+ OS << "; phi predicate info { PHIBlock: ";
+ PP->PHIBlock->printAsOperand(OS);
+ OS << " IncomingEdges: " << PP->IncomingPredicates.size();
}
OS << ", RenamedOp: ";
PI->RenamedOp->printAsOperand(OS, false);
|
|
A little unsure about the approach, here's some debug info: Details
build/bin/opt -passes="print-predicateinfo,gvn,newgvn" -debug -S opt-const-store.ll -o opt-const-store-O3.ll
Args: build/bin/opt -passes=print-predicateinfo,gvn,newgvn -debug -S opt-const-store.ll -o opt-const-store-O3.ll
PredicateInfo for function: h5diff
Visiting %cond = icmp eq i32 %0, 0
Rename Stack is empty
Current DFS numbers are (1,2)
Rename Stack is empty
Current DFS numbers are (1,2)
Rename Stack Top DFS numbers are (1,2)
Current DFS numbers are (1,2)
Rename Stack Top DFS numbers are (1,2)
Current DFS numbers are (1,2)
Found replacement %cond.0 = call i1 @llvm.ssa.copy.i1(i1 %cond) for %cond = icmp eq i32 %0, 0 in %cond.predicate.phi = phi i1 [ %cond, %5 ], [ %cond, %4 ], [ %cond, %3 ]
Rename Stack Top DFS numbers are (1,2)
Current DFS numbers are (3,4)
Rename Stack is empty
Current DFS numbers are (7,8)
Rename Stack is empty
Current DFS numbers are (7,8)
Rename Stack Top DFS numbers are (7,8)
Current DFS numbers are (7,8)
Rename Stack Top DFS numbers are (7,8)
Current DFS numbers are (7,8)
Found replacement %cond.1 = call i1 @llvm.ssa.copy.i1(i1 %cond) for %cond = icmp eq i32 %0, 0 in %cond.predicate.phi = phi i1 [ %cond, %5 ], [ %cond, %4 ], [ %cond.0, %3 ]
Visiting i32 %0
Rename Stack is empty
Current DFS numbers are (0,9)
Rename Stack is empty
Current DFS numbers are (1,2)
Rename Stack Top DFS numbers are (1,2)
Current DFS numbers are (1,2)
Rename Stack Top DFS numbers are (1,2)
Current DFS numbers are (1,2)
Found replacement %.0 = call i32 @llvm.ssa.copy.i32(i32 %0) for i32 %0 in %.predicate.phi = phi i32 [ %0, %5 ], [ %0, %4 ], [ %0, %3 ]
Rename Stack Top DFS numbers are (1,2)
Current DFS numbers are (3,4)
Rename Stack is empty
Current DFS numbers are (3,4)
Rename Stack is empty
Current DFS numbers are (7,8)
Rename Stack Top DFS numbers are (7,8)
Current DFS numbers are (7,8)
Rename Stack Top DFS numbers are (7,8)
Current DFS numbers are (7,8)
Found replacement %.1 = call i32 @llvm.ssa.copy.i32(i32 %0) for i32 %0 in %.predicate.phi = phi i32 [ %0, %5 ], [ %0, %4 ], [ %.0, %3 ]
define noundef i32 @h5diff(i32 %0, i1 %1) local_unnamed_addr {
%cond = icmp eq i32 %0, 0
br i1 %1, label %3, label %4
3: ; preds = %2
store i32 1, ptr @p, align 4
; Has predicate info
; branch predicate info { TrueEdge: 0 Comparison: %cond = icmp eq i32 %0, 0 Edge: [label %3,label %common.ret], RenamedOp: %cond }
%cond.0 = call i1 @llvm.ssa.copy.i1(i1 %cond)
; Has predicate info
; branch predicate info { TrueEdge: 0 Comparison: %cond = icmp eq i32 %0, 0 Edge: [label %3,label %common.ret], RenamedOp: %0 }
%.0 = call i32 @llvm.ssa.copy.i32(i32 %0)
br i1 %cond, label %5, label %common.ret
common.ret: ; preds = %5, %4, %3
; Has predicate info
; phi predicate info { PHIBlock: label %common.ret IncomingEdges: 2, RenamedOp: %.predicate.phi }
%.predicate.phi = phi i32 [ %0, %5 ], [ %.1, %4 ], [ %.0, %3 ]
; Has predicate info
; phi predicate info { PHIBlock: label %common.ret IncomingEdges: 2, RenamedOp: %cond.predicate.phi }
%cond.predicate.phi = phi i1 [ %cond, %5 ], [ %cond.1, %4 ], [ %cond.0, %3 ]
ret i32 0
4: ; preds = %2
store i32 2, ptr @p, align 4
; Has predicate info
; branch predicate info { TrueEdge: 0 Comparison: %cond = icmp eq i32 %0, 0 Edge: [label %4,label %common.ret], RenamedOp: %cond }
%cond.1 = call i1 @llvm.ssa.copy.i1(i1 %cond)
; Has predicate info
; branch predicate info { TrueEdge: 0 Comparison: %cond = icmp eq i32 %0, 0 Edge: [label %4,label %common.ret], RenamedOp: %0 }
%.1 = call i32 @llvm.ssa.copy.i32(i32 %0)
br i1 %cond, label %5, label %common.ret
5: ; preds = %4, %3
store i32 %0, ptr @p, align 4
br label %common.ret
}
GVN iteration: 0
Replace dominated use of 'i1 %cond' with i1 false in %cond.predicate.phi = phi i1 [ %cond, %5 ], [ %cond, %4 ], [ %cond, %3 ]
Replace dominated use of 'i1 %cond' with i1 false in %cond.predicate.phi = phi i1 [ %cond, %5 ], [ false, %4 ], [ %cond, %3 ]
GVN iteration: 1
Visiting %cond = icmp eq i32 %0, 0
Rename Stack is empty
Current DFS numbers are (1,2)
Rename Stack is empty
Current DFS numbers are (1,2)
Rename Stack Top DFS numbers are (1,2)
Current DFS numbers are (1,2)
Rename Stack Top DFS numbers are (1,2)
Current DFS numbers are (1,2)
Found replacement %cond.0 = call i1 @llvm.ssa.copy.i1(i1 %cond) for %cond = icmp eq i32 %0, 0 in %cond.predicate.phi1 = phi i1 [ %cond, %5 ], [ %cond, %4 ], [ %cond, %3 ]
Rename Stack Top DFS numbers are (1,2)
Current DFS numbers are (3,4)
Rename Stack is empty
Current DFS numbers are (3,4)
Rename Stack is empty
Current DFS numbers are (7,8)
Rename Stack is empty
Current DFS numbers are (7,8)
Rename Stack Top DFS numbers are (7,8)
Current DFS numbers are (7,8)
Rename Stack Top DFS numbers are (7,8)
Current DFS numbers are (7,8)
Found replacement %cond.1 = call i1 @llvm.ssa.copy.i1(i1 %cond) for %cond = icmp eq i32 %0, 0 in %cond.predicate.phi1 = phi i1 [ %cond, %5 ], [ %cond, %4 ], [ %cond.0, %3 ]
Visiting i32 %0
Rename Stack is empty
Current DFS numbers are (0,9)
Rename Stack is empty
Current DFS numbers are (1,2)
Rename Stack Top DFS numbers are (1,2)
Current DFS numbers are (1,2)
Rename Stack Top DFS numbers are (1,2)
Current DFS numbers are (1,2)
Found replacement %.0 = call i32 @llvm.ssa.copy.i32(i32 %0) for i32 %0 in %.predicate.phi = phi i32 [ %0, %5 ], [ %0, %4 ], [ %0, %3 ]
Rename Stack Top DFS numbers are (1,2)
Current DFS numbers are (3,4)
Rename Stack is empty
Current DFS numbers are (3,4)
Rename Stack is empty
Current DFS numbers are (7,8)
Rename Stack Top DFS numbers are (7,8)
Current DFS numbers are (7,8)
Rename Stack Top DFS numbers are (7,8)
Current DFS numbers are (7,8)
Found replacement %.1 = call i32 @llvm.ssa.copy.i32(i32 %0) for i32 %0 in %.predicate.phi = phi i32 [ %0, %5 ], [ %0, %4 ], [ %.0, %3 ]
Skipping trivially dead instruction %.predicate.phi = phi i32 [ %0, %5 ], [ %.1, %4 ], [ %.0, %3 ]
Marking %.predicate.phi = phi i32 [ %0, %5 ], [ %.1, %4 ], [ %.0, %3 ] for deletion
Skipping trivially dead instruction %cond.predicate.phi1 = phi i1 [ %cond, %5 ], [ %cond.1, %4 ], [ %cond.0, %3 ]
Marking %cond.predicate.phi1 = phi i1 [ %cond, %5 ], [ %cond.1, %4 ], [ %cond.0, %3 ] for deletion
Skipping trivially dead instruction %cond.predicate.phi = phi i1 [ %cond, %5 ], [ false, %4 ], [ false, %3 ]
Marking %cond.predicate.phi = phi i1 [ %cond, %5 ], [ false, %4 ], [ false, %3 ] for deletion
Block %2 marked reachable
Processing instruction %cond = icmp eq i32 %0, 0
Created new congruence class for %cond = icmp eq i32 %0, 0 using expression { ExpressionTypeBasic, opcode = 13600, operands = {[0] = i32 0 [1] = i32 %0 } } at 4 and leader %cond = icmp eq i32 %0, 0
New class 4 for expression { ExpressionTypeBasic, opcode = 13600, operands = {[0] = i32 0 [1] = i32 %0 } }
Processing instruction br i1 %1, label %3, label %4
Block %3 marked reachable
Block %4 marked reachable
Processing instruction store i32 2, ptr @p, align 4
Created new congruence class for store i32 2, ptr @p, align 4 using expression { ExpressionTypeStore, opcode = 0, operands = {[0] = ptr @p } represents Store store i32 2, ptr @p, align 4 with StoredValue i32 2 and MemoryLeader 2 = MemoryDef(liveOnEntry)} at 5 and leader store i32 2, ptr @p, align 4 and stored value i32 2
New class 5 for expression { ExpressionTypeStore, opcode = 0, operands = {[0] = ptr @p } represents Store store i32 2, ptr @p, align 4 with StoredValue i32 2 and MemoryLeader 2 = MemoryDef(liveOnEntry)}
Memory class leader change for class 5 due to new memory instruction becoming leader
Setting 2 = MemoryDef(liveOnEntry) equivalent to congruence class 5 with current MemoryAccess leader 2 = MemoryDef(liveOnEntry)
Processing instruction %cond.1 = call i1 @llvm.ssa.copy.i1(i1 %cond)
Found predicate info from instruction !
Created new congruence class for %cond.1 = call i1 @llvm.ssa.copy.i1(i1 %cond) using expression { ExpressionTypeConstant, opcode = 5, constant = i1 false} at 6 and leader i1 false
New class 6 for expression { ExpressionTypeConstant, opcode = 5, constant = i1 false}
Processing instruction %.1 = call i32 @llvm.ssa.copy.i32(i32 %0)
Found predicate info from instruction !
New class 2 for expression { ExpressionTypeVariable, opcode = 22, variable = i32 %0}
Processing instruction br i1 %cond, label %5, label %common.ret
Block %5 marked reachable
Block common.ret marked reachable
Processing instruction store i32 1, ptr @p, align 4
Created new congruence class for store i32 1, ptr @p, align 4 using expression { ExpressionTypeStore, opcode = 0, operands = {[0] = ptr @p } represents Store store i32 1, ptr @p, align 4 with StoredValue i32 1 and MemoryLeader 1 = MemoryDef(liveOnEntry)} at 7 and leader store i32 1, ptr @p, align 4 and stored value i32 1
New class 7 for expression { ExpressionTypeStore, opcode = 0, operands = {[0] = ptr @p } represents Store store i32 1, ptr @p, align 4 with StoredValue i32 1 and MemoryLeader 1 = MemoryDef(liveOnEntry)}
Memory class leader change for class 7 due to new memory instruction becoming leader
Setting 1 = MemoryDef(liveOnEntry) equivalent to congruence class 7 with current MemoryAccess leader 1 = MemoryDef(liveOnEntry)
Processing instruction %cond.0 = call i1 @llvm.ssa.copy.i1(i1 %cond)
Found predicate info from instruction !
New class 6 for expression { ExpressionTypeConstant, opcode = 5, constant = i1 false}
Processing instruction %.0 = call i32 @llvm.ssa.copy.i32(i32 %0)
Found predicate info from instruction !
New class 2 for expression { ExpressionTypeVariable, opcode = 22, variable = i32 %0}
Processing instruction br i1 %cond, label %5, label %common.ret
Block %5 was reachable, but new edge {%3,%5} to it found
Block common.ret was reachable, but new edge {%3,common.ret} to it found
Processing MemoryPhi 4 = MemoryPhi({%3,1},{%4,2})
Memory Phi value numbered to itself
Setting 4 = MemoryPhi({%3,1},{%4,2}) equivalent to congruence class 8 with current MemoryAccess leader 4 = MemoryPhi({%3,1},{%4,2})
Processing instruction store i32 %0, ptr @p, align 4
Created new congruence class for store i32 %0, ptr @p, align 4 using expression { ExpressionTypeStore, opcode = 0, operands = {[0] = ptr @p } represents Store store i32 %0, ptr @p, align 4 with StoredValue i32 %0 and MemoryLeader 3 = MemoryDef(4)} at 9 and leader store i32 %0, ptr @p, align 4 and stored value i32 %0
New class 9 for expression { ExpressionTypeStore, opcode = 0, operands = {[0] = ptr @p } represents Store store i32 %0, ptr @p, align 4 with StoredValue i32 %0 and MemoryLeader 3 = MemoryDef(4)}
Memory class leader change for class 9 due to new memory instruction becoming leader
Setting 3 = MemoryDef(4) equivalent to congruence class 9 with current MemoryAccess leader 3 = MemoryDef(4)
Processing instruction br label %common.ret
Block common.ret was reachable, but new edge {%5,common.ret} to it found
Processing MemoryPhi 5 = MemoryPhi({%3,1},{%5,3},{%4,2})
Memory Phi value numbered to itself
Setting 5 = MemoryPhi({%3,1},{%5,3},{%4,2}) equivalent to congruence class 10 with current MemoryAccess leader 5 = MemoryPhi({%3,1},{%5,3},{%4,2})
Processing instruction ret i32 0
Beginning iteration verification
Processing instruction %cond = icmp eq i32 %0, 0
Processing instruction br i1 %1, label %3, label %4
Processing instruction store i32 2, ptr @p, align 4
Processing instruction %cond.1 = call i1 @llvm.ssa.copy.i1(i1 %cond)
Found predicate info from instruction !
Processing instruction %.1 = call i32 @llvm.ssa.copy.i32(i32 %0)
Found predicate info from instruction !
Processing instruction br i1 %cond, label %5, label %common.ret
Processing instruction store i32 1, ptr @p, align 4
Processing instruction %cond.0 = call i1 @llvm.ssa.copy.i1(i1 %cond)
Found predicate info from instruction !
Processing instruction %.0 = call i32 @llvm.ssa.copy.i32(i32 %0)
Found predicate info from instruction !
Processing instruction br i1 %cond, label %5, label %common.ret
Processing MemoryPhi 4 = MemoryPhi({%3,1},{%4,2})
Memory Phi value numbered to itself
Setting 4 = MemoryPhi({%3,1},{%4,2}) equivalent to congruence class 8 with current MemoryAccess leader 4 = MemoryPhi({%3,1},{%4,2})
Processing instruction store i32 %0, ptr @p, align 4
Processing instruction br label %common.ret
Processing MemoryPhi 5 = MemoryPhi({%3,1},{%5,3},{%4,2})
Memory Phi value numbered to itself
Setting 5 = MemoryPhi({%3,1},{%5,3},{%4,2}) equivalent to congruence class 10 with current MemoryAccess leader 5 = MemoryPhi({%3,1},{%5,3},{%4,2})
Processing instruction ret i32 0
Eliminating in congruence class 10
Eliminating in congruence class 9
Eliminating in congruence class 8
Eliminating in congruence class 7
Eliminating in congruence class 6
Found replacement i1 false for %cond.1 = call i1 @llvm.ssa.copy.i1(i1 %cond)
Replacing %cond.1 = call i1 @llvm.ssa.copy.i1(i1 %cond) with i1 false
Marking %cond.1 = call i1 @llvm.ssa.copy.i1(i1 %cond) for deletion
Found replacement i1 false for %cond.0 = call i1 @llvm.ssa.copy.i1(i1 %cond)
Replacing %cond.0 = call i1 @llvm.ssa.copy.i1(i1 %cond) with i1 false
Marking %cond.0 = call i1 @llvm.ssa.copy.i1(i1 %cond) for deletion
Eliminating in congruence class 5
Eliminating in congruence class 4
Eliminating in congruence class 3
Eliminating in congruence class 2
Found replacement i32 %0 for %.1 = call i32 @llvm.ssa.copy.i32(i32 %0)
Replacing %.1 = call i32 @llvm.ssa.copy.i32(i32 %0) with i32 %0
Marking %.1 = call i32 @llvm.ssa.copy.i32(i32 %0) for deletion
Found replacement i32 %0 for %.0 = call i32 @llvm.ssa.copy.i32(i32 %0)
Replacing %.0 = call i32 @llvm.ssa.copy.i32(i32 %0) with i32 %0
Marking %.0 = call i32 @llvm.ssa.copy.i32(i32 %0) for deletion
Eliminating in congruence class 1
Eliminating in congruence class 0
Congruence class 0 has 3 members
Congruence class 1 has 0 members
Congruence class 2 has 1 members
Congruence class 3 has 1 members
Congruence class 4 has 1 members
Congruence class 5 has 1 members
Congruence class 6 has 0 members
Congruence class 7 has 1 members
Congruence class 8 has 0 members
Congruence class 9 has 1 members
Congruence class 10 has 0 members
The predicate information looks fine to me, but I think I am missing something that's not causing the store to change. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd expect you to place an actual phi node (which combines one predicated input and other non-predicated input), not a new type of predicate.
|
I do insert them if you look at the debug info. |
|
Can you please add a test that shows the generated IR? It's hard to comment in abstract. |
|
Below would be clearer: build/bin/opt -passes="print-predicateinfo" -S opt-const-store.ll -o opt-const-store-O3.llPredicateInfo for function: h5diff
define noundef i32 @h5diff(i32 %0, i1 %1) local_unnamed_addr {
%cond = icmp eq i32 %0, 0
br i1 %1, label %3, label %4
3: ; preds = %2
store i32 1, ptr @p, align 4
; Has predicate info
; branch predicate info { TrueEdge: 0 Comparison: %cond = icmp eq i32 %0, 0 Edge: [label %3,label %common.ret], RenamedOp: %cond }
%cond.0 = call i1 @llvm.ssa.copy.i1(i1 %cond)
; Has predicate info
; branch predicate info { TrueEdge: 0 Comparison: %cond = icmp eq i32 %0, 0 Edge: [label %3,label %common.ret], RenamedOp: %0 }
%.0 = call i32 @llvm.ssa.copy.i32(i32 %0)
br i1 %cond, label %5, label %common.ret
common.ret: ; preds = %5, %4, %3
; Has predicate info
; phi predicate info { PHIBlock: label %common.ret IncomingEdges: 2, RenamedOp: %.predicate.phi }
%.predicate.phi = phi i32 [ %0, %5 ], [ %.1, %4 ], [ %.0, %3 ]
; Has predicate info
; phi predicate info { PHIBlock: label %common.ret IncomingEdges: 2, RenamedOp: %cond.predicate.phi }
%cond.predicate.phi = phi i1 [ %cond, %5 ], [ %cond.1, %4 ], [ %cond.0, %3 ]
ret i32 0
4: ; preds = %2
store i32 2, ptr @p, align 4
; Has predicate info
; branch predicate info { TrueEdge: 0 Comparison: %cond = icmp eq i32 %0, 0 Edge: [label %4,label %common.ret], RenamedOp: %cond }
%cond.1 = call i1 @llvm.ssa.copy.i1(i1 %cond)
; Has predicate info
; branch predicate info { TrueEdge: 0 Comparison: %cond = icmp eq i32 %0, 0 Edge: [label %4,label %common.ret], RenamedOp: %0 }
%.1 = call i32 @llvm.ssa.copy.i32(i32 %0)
br i1 %cond, label %5, label %common.ret
5: ; preds = %4, %3
store i32 %0, ptr @p, align 4
br label %common.ret
} |
|
It looks like the store in |
b58b62c to
fa9177b
Compare
|
After debugging under lldb and looking further, I understood the issue. Re-implementing it :) |
|
I was just wondering if we could simply add a fold or case based optimization like what instcombine does by recognizing patterns rather than making changes in predicate info? From your point of view, do you think adding phi nodes covers many cases potentially apart from generalizing and using existing code? |
fa9177b to
0b13638
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So I managed to get the predicate information to work correctly and achieve the final output as well:
store i32 0, ptr @p, align 4More info below.
0b13638 to
5557a40
Compare
|
@nikic |
5557a40 to
b2e4347
Compare
|
@nikic |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This needs a rebase as use of ssa.copy has been removed.
The last time I tried this patch it failed to bootstrap clang: https://llvm-compile-time-tracker.com/show_error.php?commit=9257de61b46d07932842d91ee25304007f1cdf66 But I see you made more changes since, maybe it's fixed.
Interesting, I believe you are referring to: |
b2e4347 to
34b7bd9
Compare
|
I had some stale build files and weird behaviors for tests which showed as passed on my local machine and fail alternatively when running. Cleaning them and re-checking. |
34b7bd9 to
c07e0d7
Compare
|
Linux tests have passed, looking into the Windows CI, seems to be just a labelling issue. |
|
This test file: --- a/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll
+++ b/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll
@@ -68,10 +68,10 @@ define i32 @foo(ptr nocapture %var1, ptr nocapture readnone %var2, ptr nocapture
; CHECK-NEXT: [[ADD8]] = add nsw i32 [[ADD86]], [[ADD]]
; CHECK-NEXT: [[INC]] = add nuw i32 [[J_113]], 1
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[INC]], [[ITR]]
-; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_INC11_LOOPEXIT_LOOPEXIT7:%.*]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_INC11_LOOPEXIT_LOOPEXIT6:%.*]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: for.inc11.loopexit.loopexit:
; CHECK-NEXT: br label [[FOR_INC11_LOOPEXIT:%.*]]
-; CHECK: for.inc11.loopexit.loopexit7:
+; CHECK: for.inc11.loopexit.loopexit6:
; CHECK-NEXT: [[ADD8_LCSSA:%.*]] = phi i32 [ [[ADD8]], [[FOR_BODY3]] ]
; CHECK-NEXT: store i32 [[ADD8_LCSSA]], ptr [[ARRAYIDX7]], align 4, !alias.scope [[META2]]
; CHECK-NEXT: br label [[FOR_INC11_LOOPEXIT]]It's unpredictable, keeps switching between 6 and 7. It has no phi predicates, so unrelated to my change. |
|
All checks have passed. |
|
@nikic |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@Rajveer100 could you add a test that shows an improvement for a transform?
The description mentions this resolves #150606, but it doesn't look like there's a test case that shows an improvement?
The NewGVN and LoopVersioningLICM changes seem to just be artifacts of running the test update scripts?
…PHI` when needing introduction of phi nodes Resolves llvm#150606 Currently `ssa.copy` is used mostly for straight line code, i.e, without joins or uses of phi nodes. With this, passes would be able to pick up the relevant info and further optimize the IR.
c07e0d7 to
f1692ac
Compare
Added. |
|
@fhahn |
|
@nikic |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, I'm not convinced. Please remove PredicatePHI.
|
Alright, I will make the changes and we can continue review from there :) |
|
@nikic After removal: // For edge predicates, we can just place the operand in the block before
// the terminator. For assume, we have to place it right after the assume
// to ensure we dominate all uses except assume itself. Always insert
// right before the terminator or after the assume, so that we insert in
// proper order in the case of multiple predicateinfo in the same block.
if (isa<PredicateWithEdge>(ValInfo)) {
BitCastInst *PIC = CreateSSACopy(getBranchTerminator(ValInfo), Op,
Op->getName() + "." + Twine(Counter++));
PI.PredicateMap.insert({PIC, ValInfo});
Result.Def = PIC;
} else {
auto *PAssume = dyn_cast<PredicateAssume>(ValInfo);
assert(PAssume &&
"Should not have gotten here without it being an assume");
// Insert the predicate directly after the assume. While it also holds
// directly before it, assume(i1 true) is not a useful fact.
BitCastInst *PIC = CreateSSACopy(PAssume->AssumeInst->getNextNode(), Op);
PI.PredicateMap.insert({PIC, ValInfo});
Result.Def = PIC;
}
for (const auto &Entry : InsertedPHIBlocks) {
BasicBlock *PHIBlock = Entry.first;
Value *OpInBlock = Entry.second;
if (DT.dominates(...)) {
BitCastInst *PIC =
CreateSSACopy(&*PHIBlock->getFirstInsertionPt(), OpInBlock);
Result.Def = PIC;
}
}We would need some way to determine to dominated uses for this predicate which is an extra overhead to check for. For this, we would need to do it after we completely materialize the stack and then insert them. This would mean moving the lambda function for creating ssa copy as well extra checks. In fact, the rename use definition itself insists on this fact: // Instead of the standard SSA renaming algorithm, which is O(Number of
// instructions), and walks the entire dominator tree, we walk only the defs +
// uses. The standard SSA renaming algorithm does not really rely on the
// dominator tree except to order the stack push/pops of the renaming stacks, so
// that defs end up getting pushed before hitting the correct uses. This does
// not require the dominator tree, only the *order* of the dominator tree. The
// complete and correct ordering of the defs and uses, in dominator tree is
// contained in the DFS numbering of the dominator tree. So we sort the defs and
// uses into the DFS ordering, and then just use the renaming stack as per
// normal, pushing when we hit a def (which is a predicateinfo instruction),
// popping when we are out of the dfs scope for that def, and replacing any uses
// with top of stack if it exists. In order to handle liveness without
// propagating liveness info, we don't actually insert the predicateinfo
// instruction def until we see a use that it would dominate. Once we see such
// a use, we materialize the predicateinfo instruction in the right place and
// use it.
//
// TODO: Use this algorithm to perform fast single-variable renaming in
// promotememtoreg and memoryssa.Please try to look into it once again. |
|
I'm not sure I fully understand what you mean here. Is this about renaming uses so that the inserted phis get used? The phis do need to be integrated with renaming, but treating them like a plain predicate is not quite right. If you do that, then you'll introduce a bitcast (formerly ssa.copy). But we should not be inserting a bitcast, things should directly use the phi nodes. |
|
I think we're on the same page. So, from a performance standpoint what do you think is better? If I understand correctly, if we don't use bitcast we would need to replace all uses for the given Whereas, if we just let the predicate be, it doesn't need any further handling. |
for (Value *Op : OpsToRename) {
if (NewPHIs.count(Op)) {
Value *PHI = PHIs[Op];
SmallVector<Use *, 8> UsesToReplace;
for (Use &U : Op->uses()) {
if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
if (I->getParent() == PHI->getParent()) {
UsesToReplace.push_back(&U);
}
}
}
for (Use *U : UsesToReplace) {
U->set(PHI);
}
}
}This is what I am talking about, this is quite inefficient as compared to what I have done even though we introduced a new |
|
@nikic |
|
@nikic |
@fhahn |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Description still mentions ssa copy but that's not involved
Yeah, it should be changed to |
Resolves #150606
Currently
ssa.copyis used mostly for straight line code, i.e, without joins or uses of phi nodes. With this, passes would be able to pick up the relevant info and further optimize the IR.