From 18e66796de21349a9cc6edfe63224e8d1fd41895 Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Fri, 26 Sep 2025 05:50:33 -0700 Subject: [PATCH 1/2] [LoopInterchange] Also look at lcssa phis in outer loop latch block This deals with a corner case of LCSSA phi nodes in the outer loop latch block: the loop was in LCSSA form, some transformations can come along (e.g. unswitch) and create an empty block: BB4: br label %BB5 BB5: %old.cond.lcssa = phi i16 [ %cond, %BB4 ] br outer.header Interchange then brings it in LCSSA form again and we get: BB4: %new.cond.lcssa = phi i16 [ %cond, %BB3 ] br label %BB5 BB5: %old.cond.lcssa = phi i16 [ %new.cond.lcssa, %BB4 ] Which means that we have a chain of LCSSA phi nodes from %new.cond.lcssa to %old.cond.lcssa. The problem is that interchange can reoder blocks BB4 and BB5 placing the use before the def if we don't check this. The observation is that %old.cond.lcssa is unused, so instead of moving and renaming these phi nodes, just delete it if it's trivially dead. If it isn't trivially dead, it is handled elsewhere. The loop should still be in LCSSA form, and if it isn't, formLCSSARecursively is called after the interchange rewrite. Fixes #160068 --- .../lib/Transforms/Scalar/LoopInterchange.cpp | 33 ++++++++ .../LoopInterchange/lcssa-phi-outer-latch.ll | 75 +++++++++++++++++++ 2 files changed, 108 insertions(+) create mode 100644 llvm/test/Transforms/LoopInterchange/lcssa-phi-outer-latch.ll diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 28ae4f0a0aad9..e42d82d1533e1 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -44,6 +44,7 @@ #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include #include #include @@ -1837,6 +1838,38 @@ static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, for (PHINode *P : LcssaInnerLatch) P->moveBefore(InnerExit->getFirstNonPHIIt()); + // This deals with a corner case of LCSSA phi nodes in the outer loop latch + // block: the loop was in LCSSA form, some transformations can come along + // (e.g. unswitch) and create an empty block: + // + // BB4: + // br label %BB5 + // BB5: + // %old.cond.lcssa = phi i16 [ %cond, %BB4 ] + // br outer.header + // + // Interchange then brings it in LCSSA form again and we get: + // + // BB4: + // %new.cond.lcssa = phi i16 [ %cond, %BB3 ] + // br label %BB5 + // BB5: + // %old.cond.lcssa = phi i16 [ %new.cond.lcssa, %BB4 ] + // + // Which means that we have a chain of LCSSA phi nodes from %new.cond.lcssa + // to %old.cond.lcssa. The problem is that interchange can reoder blocks BB4 + // and BB5 placing the use before the def if we don't check this. The + // observation is that %old.cond.lcssa is unused, so instead of moving and + // renaming these phi nodes, just delete it if it's trivially dead. If it + // isn't trivially dead, it is handled above. The loop should still be in + // LCSSA form, and if it isn't, formLCSSARecursively is called after the + // interchange rewrite. + SmallVector LcssaOuterLatch( + llvm::make_pointer_range(OuterLatch->phis())); + for (PHINode *P : LcssaOuterLatch) + if (isInstructionTriviallyDead(P)) + P->eraseFromParent(); + // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have // incoming values defined in the outer loop, we have to add a new PHI // in the inner loop latch, which became the exit block of the outer loop, diff --git a/llvm/test/Transforms/LoopInterchange/lcssa-phi-outer-latch.ll b/llvm/test/Transforms/LoopInterchange/lcssa-phi-outer-latch.ll new file mode 100644 index 0000000000000..41846462e6b74 --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/lcssa-phi-outer-latch.ll @@ -0,0 +1,75 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --prefix-filecheck-ir-name TEST --version 6 +; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s + +; This test is checking that blocks BB4 and BB5, where BB4 is the exit +; block of the inner loop and BB5 the latch of the outer loop, correctly +; deal with the phi-node use-def chain %new.cond.lcssa -> %old.cond.lcssa. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define i16 @main() { +; CHECK-LABEL: define i16 @main() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[BB2_PREHEADER:.*]] +; CHECK: [[BB1_PREHEADER:.*]]: +; CHECK-NEXT: br label %[[TESTBB1:.*]] +; CHECK: [[TESTBB1]]: +; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[BB5:.*]] ], [ 1, %[[BB1_PREHEADER]] ] +; CHECK-NEXT: br label %[[BB2_SPLIT:.*]] +; CHECK: [[BB2_PREHEADER]]: +; CHECK-NEXT: br label %[[TESTBB2:.*]] +; CHECK: [[TESTBB2]]: +; CHECK-NEXT: [[J:%.*]] = phi i16 [ [[TMP1:%.*]], %[[BB3_SPLIT:.*]] ], [ 0, %[[BB2_PREHEADER]] ] +; CHECK-NEXT: br label %[[BB1_PREHEADER]] +; CHECK: [[BB2_SPLIT]]: +; CHECK-NEXT: [[ARRAYIDX_US_US:%.*]] = getelementptr i16, ptr null, i16 [[J]] +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX_US_US]], align 1 +; CHECK-NEXT: [[COND:%.*]] = select i1 false, i16 0, i16 0 +; CHECK-NEXT: br label %[[TESTBB3:.*]] +; CHECK: [[TESTBB3]]: +; CHECK-NEXT: [[J_NEXT:%.*]] = add i16 [[J]], 1 +; CHECK-NEXT: br label %[[TESTBB4:.*]] +; CHECK: [[BB3_SPLIT]]: +; CHECK-NEXT: [[NEW_COND_LCSSA:%.*]] = phi i16 [ [[COND]], %[[BB5]] ] +; CHECK-NEXT: [[TMP1]] = add i16 [[J]], 1 +; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[TESTBB2]] +; CHECK: [[TESTBB4]]: +; CHECK-NEXT: br label %[[BB5]] +; CHECK: [[BB5]]: +; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1 +; CHECK-NEXT: [[CMP286_US:%.*]] = icmp ugt i64 [[I]], 0 +; CHECK-NEXT: br i1 [[CMP286_US]], label %[[TESTBB1]], label %[[BB3_SPLIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i16 0 +; +entry: + br label %BB1 + +BB1: + %i = phi i64 [ 1, %entry ], [ %i.next, %BB5 ] + br label %BB2 + +BB2: + %j = phi i16 [ 0, %BB1 ], [ %j.next, %BB3 ] + %arrayidx.us.us = getelementptr i16, ptr null, i16 %j + %0 = load i16, ptr %arrayidx.us.us, align 1 + %cond = select i1 false, i16 0, i16 0 + br label %BB3 + +BB3: + %j.next = add i16 %j, 1 + br i1 true, label %BB4, label %BB2 + +BB4: + %new.cond.lcssa = phi i16 [ %cond, %BB3 ] + br label %BB5 + +BB5: + %old.cond.lcssa = phi i16 [ %new.cond.lcssa, %BB4 ] + %i.next = add i64 %i, 1 + %cmp286.us = icmp ugt i64 %i, 0 + br i1 %cmp286.us, label %BB1, label %exit + +exit: + ret i16 0 +} From a4bb77f55c761cca6088bfb3505c162635ae57f4 Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Wed, 8 Oct 2025 02:27:01 -0700 Subject: [PATCH 2/2] Create new helper simplifyLCSSAPhis that checks single input phis in outer loop latch blocks that are not exit blocks. --- .../lib/Transforms/Scalar/LoopInterchange.cpp | 79 +++++++++------- .../LoopInterchange/lcssa-phi-outer-latch.ll | 89 ++++++++++--------- 2 files changed, 93 insertions(+), 75 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index e42d82d1533e1..b4003f8cd2163 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -1838,38 +1838,6 @@ static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, for (PHINode *P : LcssaInnerLatch) P->moveBefore(InnerExit->getFirstNonPHIIt()); - // This deals with a corner case of LCSSA phi nodes in the outer loop latch - // block: the loop was in LCSSA form, some transformations can come along - // (e.g. unswitch) and create an empty block: - // - // BB4: - // br label %BB5 - // BB5: - // %old.cond.lcssa = phi i16 [ %cond, %BB4 ] - // br outer.header - // - // Interchange then brings it in LCSSA form again and we get: - // - // BB4: - // %new.cond.lcssa = phi i16 [ %cond, %BB3 ] - // br label %BB5 - // BB5: - // %old.cond.lcssa = phi i16 [ %new.cond.lcssa, %BB4 ] - // - // Which means that we have a chain of LCSSA phi nodes from %new.cond.lcssa - // to %old.cond.lcssa. The problem is that interchange can reoder blocks BB4 - // and BB5 placing the use before the def if we don't check this. The - // observation is that %old.cond.lcssa is unused, so instead of moving and - // renaming these phi nodes, just delete it if it's trivially dead. If it - // isn't trivially dead, it is handled above. The loop should still be in - // LCSSA form, and if it isn't, formLCSSARecursively is called after the - // interchange rewrite. - SmallVector LcssaOuterLatch( - llvm::make_pointer_range(OuterLatch->phis())); - for (PHINode *P : LcssaOuterLatch) - if (isInstructionTriviallyDead(P)) - P->eraseFromParent(); - // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have // incoming values defined in the outer loop, we have to add a new PHI // in the inner loop latch, which became the exit block of the outer loop, @@ -1905,6 +1873,50 @@ static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch); } +// This deals with a corner case when a LCSSA phi node appears in a non-exit +// block: the outer loop latch block does not need to be exit block of the +// inner loop. Consider a loop that was in LCSSA form, but then some +// transformation like loop-unswitch comes along and creates an empty block, +// where BB5 in this example is the outer loop latch block: +// +// BB4: +// br label %BB5 +// BB5: +// %old.cond.lcssa = phi i16 [ %cond, %BB4 ] +// br outer.header +// +// Interchange then brings it in LCSSA form again resulting in this chain of +// single-input phi nodes: +// +// BB4: +// %new.cond.lcssa = phi i16 [ %cond, %BB3 ] +// br label %BB5 +// BB5: +// %old.cond.lcssa = phi i16 [ %new.cond.lcssa, %BB4 ] +// +// The problem is that interchange can reoder blocks BB4 and BB5 placing the +// use before the def if we don't check this. +// +static void simplifyLCSSAPhis(Loop *OuterLoop, Loop *InnerLoop) { + BasicBlock *InnerLoopExit = InnerLoop->getExitBlock(); + BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); + + // Do not modify lcssa phis where they actually belong, i.e. in exit blocks. + if (OuterLoopLatch == InnerLoopExit) + return; + + // Collect and remove phis in non-exit blocks if they have 1 input. + SmallVector Phis( + llvm::make_pointer_range(OuterLoopLatch->phis())); + for (PHINode *Phi : Phis) { + assert(Phi->getNumIncomingValues() == 1 && "Single input phi expected"); + LLVM_DEBUG(dbgs() << "Removing 1-input phi in non-exit block: " << *Phi + << "\n"); + Phi->replaceAllUsesWith(Phi->getIncomingValue(0)); + Phi->eraseFromParent(); + } +} + bool LoopInterchangeTransform::adjustLoopBranches() { LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n"); std::vector DTUpdates; @@ -1915,6 +1927,9 @@ bool LoopInterchangeTransform::adjustLoopBranches() { assert(OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && "Guaranteed by loop-simplify form"); + + simplifyLCSSAPhis(OuterLoop, InnerLoop); + // Ensure that both preheaders do not contain PHI nodes and have single // predecessors. This allows us to move them easily. We use // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing diff --git a/llvm/test/Transforms/LoopInterchange/lcssa-phi-outer-latch.ll b/llvm/test/Transforms/LoopInterchange/lcssa-phi-outer-latch.ll index 41846462e6b74..d9323c088b235 100644 --- a/llvm/test/Transforms/LoopInterchange/lcssa-phi-outer-latch.ll +++ b/llvm/test/Transforms/LoopInterchange/lcssa-phi-outer-latch.ll @@ -1,75 +1,78 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --prefix-filecheck-ir-name TEST --version 6 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s -; This test is checking that blocks BB4 and BB5, where BB4 is the exit -; block of the inner loop and BB5 the latch of the outer loop, correctly -; deal with the phi-node use-def chain %new.cond.lcssa -> %old.cond.lcssa. +; This test is checking that blocks outer.body and outer.latch, where outer.body is the exit +; block of the inner loop and outer.latch the latch of the outer loop, correctly +; deal with the phi-node use-def chain %new.cond.lcssa -> %old.cond.lcssa. What we expect +; here is that block outer.latch does not contain a phi node, because it is a single input +; phi in a non-exit block. target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define i16 @main() { ; CHECK-LABEL: define i16 @main() { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: br label %[[BB2_PREHEADER:.*]] -; CHECK: [[BB1_PREHEADER:.*]]: -; CHECK-NEXT: br label %[[TESTBB1:.*]] -; CHECK: [[TESTBB1]]: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[BB5:.*]] ], [ 1, %[[BB1_PREHEADER]] ] -; CHECK-NEXT: br label %[[BB2_SPLIT:.*]] -; CHECK: [[BB2_PREHEADER]]: -; CHECK-NEXT: br label %[[TESTBB2:.*]] -; CHECK: [[TESTBB2]]: -; CHECK-NEXT: [[J:%.*]] = phi i16 [ [[TMP1:%.*]], %[[BB3_SPLIT:.*]] ], [ 0, %[[BB2_PREHEADER]] ] -; CHECK-NEXT: br label %[[BB1_PREHEADER]] -; CHECK: [[BB2_SPLIT]]: +; CHECK-NEXT: br label %[[INNER_HEADER_PREHEADER:.*]] +; CHECK: [[OUTER_HEADER_PREHEADER:.*]]: +; CHECK-NEXT: br label %[[OUTER_HEADER:.*]] +; CHECK: [[OUTER_HEADER]]: +; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[OUTER_LATCH:.*]] ], [ 1, %[[OUTER_HEADER_PREHEADER]] ] +; CHECK-NEXT: br label %[[INNER_HEADER_SPLIT:.*]] +; CHECK: [[INNER_HEADER_PREHEADER]]: +; CHECK-NEXT: br label %[[INNER_HEADER:.*]] +; CHECK: [[INNER_HEADER]]: +; CHECK-NEXT: [[J:%.*]] = phi i16 [ [[TMP1:%.*]], %[[INNER_LATCH_SPLIT:.*]] ], [ 0, %[[INNER_HEADER_PREHEADER]] ] +; CHECK-NEXT: br label %[[OUTER_HEADER_PREHEADER]] +; CHECK: [[INNER_HEADER_SPLIT]]: ; CHECK-NEXT: [[ARRAYIDX_US_US:%.*]] = getelementptr i16, ptr null, i16 [[J]] ; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX_US_US]], align 1 ; CHECK-NEXT: [[COND:%.*]] = select i1 false, i16 0, i16 0 -; CHECK-NEXT: br label %[[TESTBB3:.*]] -; CHECK: [[TESTBB3]]: +; CHECK-NEXT: br label %[[INNER_LATCH:.*]] +; CHECK: [[INNER_LATCH]]: ; CHECK-NEXT: [[J_NEXT:%.*]] = add i16 [[J]], 1 -; CHECK-NEXT: br label %[[TESTBB4:.*]] -; CHECK: [[BB3_SPLIT]]: -; CHECK-NEXT: [[NEW_COND_LCSSA:%.*]] = phi i16 [ [[COND]], %[[BB5]] ] +; CHECK-NEXT: br label %[[OUTER_BODY:.*]] +; CHECK: [[INNER_LATCH_SPLIT]]: +; CHECK-NEXT: [[NEW_COND_LCSSA:%.*]] = phi i16 [ [[COND]], %[[OUTER_LATCH]] ] ; CHECK-NEXT: [[TMP1]] = add i16 [[J]], 1 -; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[TESTBB2]] -; CHECK: [[TESTBB4]]: -; CHECK-NEXT: br label %[[BB5]] -; CHECK: [[BB5]]: +; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[INNER_HEADER]] +; CHECK: [[OUTER_BODY]]: +; CHECK-NEXT: br label %[[OUTER_LATCH]] +; CHECK: [[OUTER_LATCH]]: ; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1 ; CHECK-NEXT: [[CMP286_US:%.*]] = icmp ugt i64 [[I]], 0 -; CHECK-NEXT: br i1 [[CMP286_US]], label %[[TESTBB1]], label %[[BB3_SPLIT]] +; CHECK-NEXT: br i1 [[CMP286_US]], label %[[OUTER_HEADER]], label %[[INNER_LATCH_SPLIT]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: ret i16 0 +; CHECK-NEXT: [[OLD_COND_LCSSA_LCSSA:%.*]] = phi i16 [ [[NEW_COND_LCSSA]], %[[INNER_LATCH_SPLIT]] ] +; CHECK-NEXT: ret i16 [[OLD_COND_LCSSA_LCSSA]] ; entry: - br label %BB1 + br label %outer.header -BB1: - %i = phi i64 [ 1, %entry ], [ %i.next, %BB5 ] - br label %BB2 +outer.header: + %i = phi i64 [ 1, %entry ], [ %i.next, %outer.latch ] + br label %inner.header -BB2: - %j = phi i16 [ 0, %BB1 ], [ %j.next, %BB3 ] +inner.header: + %j = phi i16 [ 0, %outer.header ], [ %j.next, %inner.latch ] %arrayidx.us.us = getelementptr i16, ptr null, i16 %j %0 = load i16, ptr %arrayidx.us.us, align 1 %cond = select i1 false, i16 0, i16 0 - br label %BB3 + br label %inner.latch -BB3: +inner.latch: %j.next = add i16 %j, 1 - br i1 true, label %BB4, label %BB2 + br i1 true, label %outer.body, label %inner.header -BB4: - %new.cond.lcssa = phi i16 [ %cond, %BB3 ] - br label %BB5 +outer.body: + %new.cond.lcssa = phi i16 [ %cond, %inner.latch ] + br label %outer.latch -BB5: - %old.cond.lcssa = phi i16 [ %new.cond.lcssa, %BB4 ] +outer.latch: + %old.cond.lcssa = phi i16 [ %new.cond.lcssa, %outer.body ] %i.next = add i64 %i, 1 %cmp286.us = icmp ugt i64 %i, 0 - br i1 %cmp286.us, label %BB1, label %exit + br i1 %cmp286.us, label %outer.header, label %exit exit: - ret i16 0 + ret i16 %old.cond.lcssa }