Skip to content
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions llvm/include/llvm/Transforms/Scalar/JumpThreading.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,11 @@ class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> {
/// if 'HasProfile' is true creates new instance through
/// FunctionAnalysisManager, otherwise nullptr.
BlockFrequencyInfo *getOrCreateBFI(bool Force = false);

// Internal overload of evaluateOnPredecessorEdge().
Constant *evaluateOnPredecessorEdge(BasicBlock *BB, BasicBlock *PredPredBB,
Value *cond, const DataLayout &DL,
SmallPtrSet<Value *, 8> &Visited);
};

} // end namespace llvm
Expand Down
27 changes: 23 additions & 4 deletions llvm/lib/Transforms/Scalar/JumpThreading.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1498,6 +1498,13 @@ Constant *JumpThreadingPass::evaluateOnPredecessorEdge(BasicBlock *BB,
BasicBlock *PredPredBB,
Value *V,
const DataLayout &DL) {
SmallPtrSet<Value *, 8> Visited({V});
return evaluateOnPredecessorEdge(BB, PredPredBB, V, DL, Visited);
}

Constant *JumpThreadingPass::evaluateOnPredecessorEdge(
BasicBlock *BB, BasicBlock *PredPredBB, Value *V, const DataLayout &DL,
SmallPtrSet<Value *, 8> &Visited) {
BasicBlock *PredBB = BB->getSinglePredecessor();
assert(PredBB && "Expected a single predecessor");

Expand All @@ -1519,12 +1526,24 @@ Constant *JumpThreadingPass::evaluateOnPredecessorEdge(BasicBlock *BB,
}

// If we have a CmpInst, try to fold it for each incoming edge into PredBB.
// Note that during the execution of the pass, phi nodes may become constant
// and may be removed, which can lead to self-referencing instructions in
// code that becomes unreachable. Consequently, we need to handle those
// instructions in unreachable code and check before going into recursion.
if (CmpInst *CondCmp = dyn_cast<CmpInst>(V)) {
if (CondCmp->getParent() == BB) {
Constant *Op0 =
evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(0), DL);
Constant *Op1 =
evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(1), DL);
Constant *Op0 = nullptr;
Constant *Op1 = nullptr;
if (Value *V0 = CondCmp->getOperand(0); !Visited.contains(V0)) {
Visited.insert(V0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be cleaner to do these checks in evaluateOnPredecessorEdge(), not at each call to evaluateOnPredecessorEdge. Doesn't really matter right now, but will make sure this keeps getting handled if this code handles more than icmp in the future.

Please also combine the contains and insert calls by using the return value of insert.

Copy link
Contributor Author

@ro-i ro-i Apr 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be cleaner to do these checks in evaluateOnPredecessorEdge(), not at each call to evaluateOnPredecessorEdge. Doesn't really matter right now, but will make sure this keeps getting handled if this code handles more than icmp in the future.

I adapted the code to insert and double check at the start of evaluateOnPredecessorEdge, see my latest commit. However, there still needs to be a contains before each call because, otherwise, the caller doesn't know whether it's allowed/required to erase the Value after the call.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can also handle the erase in the same place, e.g. by adding a auto _ = make_scope_exit([V]() { Visited.erase(V); });

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't know make_scope_exit, thanks!

Op0 = evaluateOnPredecessorEdge(BB, PredPredBB, V0, DL, Visited);
Visited.erase(V0);
}
if (Value *V1 = CondCmp->getOperand(1); !Visited.contains(V1)) {
Visited.insert(V1);
Op1 = evaluateOnPredecessorEdge(BB, PredPredBB, V1, DL, Visited);
Visited.erase(V1);
}
if (Op0 && Op1) {
return ConstantFoldCompareInstOperands(CondCmp->getPredicate(), Op0,
Op1, DL);
Expand Down
117 changes: 117 additions & 0 deletions llvm/test/Transforms/JumpThreading/unreachable-loops.ll
Original file line number Diff line number Diff line change
Expand Up @@ -180,4 +180,121 @@ cleanup2343.loopexit4: ; preds = %cleanup1491
unreachable
}

; This segfaults due to recursion in %C4. Reason: %L6 is identified to be a
; "partially redundant load" and is replaced by a PHI node. The PHI node is then
; simplified to be constant and is removed. This leads to %L6 being replaced by
; %C4, which makes %C4 invalid since it uses %L6.
; The test case has been generated by the AMD Fuzzing project and simplified
; manually and by llvm-reduce.

define i32 @constant_phi_leads_to_self_reference(ptr %ptr) {
; CHECK-LABEL: @constant_phi_leads_to_self_reference(
; CHECK-NEXT: [[A9:%.*]] = alloca i1, align 1
; CHECK-NEXT: br label [[F6:%.*]]
; CHECK: T3:
; CHECK-NEXT: br label [[BB5:%.*]]
; CHECK: BB5:
; CHECK-NEXT: [[L10:%.*]] = load i1, ptr [[A9]], align 1
; CHECK-NEXT: br i1 [[L10]], label [[BB6:%.*]], label [[F6]]
; CHECK: BB6:
; CHECK-NEXT: [[LGV3:%.*]] = load i1, ptr [[PTR:%.*]], align 1
; CHECK-NEXT: [[C4:%.*]] = icmp sle i1 [[C4]], true
; CHECK-NEXT: store i1 [[C4]], ptr [[PTR]], align 1
; CHECK-NEXT: br i1 [[C4]], label [[F6]], label [[T3:%.*]]
; CHECK: F6:
; CHECK-NEXT: ret i32 0
; CHECK: F7:
; CHECK-NEXT: br label [[BB5]]
;
%A9 = alloca i1, align 1
br i1 false, label %BB4, label %F6

BB4: ; preds = %0
br i1 false, label %F6, label %F1

F1: ; preds = %BB4
br i1 false, label %T4, label %T3

T3: ; preds = %T4, %BB6, %F1
%L6 = load i1, ptr %ptr, align 1
br label %BB5

BB5: ; preds = %F7, %T3
%L10 = load i1, ptr %A9, align 1
br i1 %L10, label %BB6, label %F6

BB6: ; preds = %BB5
%LGV3 = load i1, ptr %ptr, align 1
%C4 = icmp sle i1 %L6, true
store i1 %C4, ptr %ptr, align 1
br i1 %L6, label %F6, label %T3

T4: ; preds = %F1
br label %T3

F6: ; preds = %BB6, %BB5, %BB4, %0
ret i32 0

F7: ; No predecessors!
br label %BB5
}

; Same as above, but with multiple icmps referencing the same PHI node.

define i32 @recursive_icmp_mult(ptr %ptr) {
; CHECK-LABEL: @recursive_icmp_mult(
; CHECK-NEXT: [[A9:%.*]] = alloca i1, align 1
; CHECK-NEXT: br label [[F6:%.*]]
; CHECK: T3:
; CHECK-NEXT: br label [[BB5:%.*]]
; CHECK: BB5:
; CHECK-NEXT: [[L10:%.*]] = load i1, ptr [[A9]], align 1
; CHECK-NEXT: br i1 [[L10]], label [[BB6:%.*]], label [[F6]]
; CHECK: BB6:
; CHECK-NEXT: [[LGV3:%.*]] = load i1, ptr [[PTR:%.*]], align 1
; CHECK-NEXT: [[C4:%.*]] = icmp sle i1 [[C6:%.*]], true
; CHECK-NEXT: [[C5:%.*]] = icmp sle i1 [[C6]], false
; CHECK-NEXT: [[C6]] = icmp sle i1 [[C4]], [[C5]]
; CHECK-NEXT: store i1 [[C6]], ptr [[PTR]], align 1
; CHECK-NEXT: br i1 [[C6]], label [[F6]], label [[T3:%.*]]
; CHECK: F6:
; CHECK-NEXT: ret i32 0
; CHECK: F7:
; CHECK-NEXT: br label [[BB5]]
;
%A9 = alloca i1, align 1
br i1 false, label %BB4, label %F6

BB4: ; preds = %0
br i1 false, label %F6, label %F1

F1: ; preds = %BB4
br i1 false, label %T4, label %T3

T3: ; preds = %T4, %BB6, %F1
%L6 = load i1, ptr %ptr, align 1
br label %BB5

BB5: ; preds = %F7, %T3
%L10 = load i1, ptr %A9, align 1
br i1 %L10, label %BB6, label %F6

BB6: ; preds = %BB5
%LGV3 = load i1, ptr %ptr, align 1
%C4 = icmp sle i1 %L6, true
%C5 = icmp sle i1 %L6, false
%C6 = icmp sle i1 %C4, %C5
store i1 %C6, ptr %ptr, align 1
br i1 %L6, label %F6, label %T3

T4: ; preds = %F1
br label %T3

F6: ; preds = %BB6, %BB5, %BB4, %0
ret i32 0

F7: ; No predecessors!
br label %BB5
}

!0 = !{!"branch_weights", i32 2146410443, i32 1073205}