From cba73f82cb4cce790c9739ca228f8bc683b3e56a Mon Sep 17 00:00:00 2001 From: Ryotaro Kasuga Date: Tue, 24 Jun 2025 11:55:51 +0000 Subject: [PATCH 1/2] [Passes] Move LoopInterchange into optimization pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As mentioned in https://github.com/llvm/llvm-project/pull/145071, LoopInterchange should be part of the optimization pipeline rather than the simplification pipeline. This patch moves LoopInterchange into the optimization pipeline. More contexts: - By default, LoopInterchange attempts to improve data locality, however, it also takes vectorization opportunities into account. Given that, it is reasonable to run it as close to vectorization as possible. - I looked into previous changes related to the placement of LoopInterchange, but couldn’t find any strong motivation suggesting that it benefits other simplifications. - As far as I tried some tests (including llvm-test-suite), removing LoopInterchange from the simplification pipeline does not affect other simplifications. Therefore, there doesn't seem to be much value in keeping it there. --- llvm/lib/Passes/PassBuilderPipelines.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index c83d2dc1f1514..98821bb1408a7 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -690,9 +690,6 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, LPM2.addPass(LoopDeletionPass()); - if (PTO.LoopInterchange) - LPM2.addPass(LoopInterchangePass()); - // Do not enable unrolling in PreLinkThinLTO phase during sample PGO // because it changes IR to makes profile annotation in back compile // inaccurate. The normal unroller doesn't pay attention to forced full unroll @@ -1547,6 +1544,10 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, // this may need to be revisited once we run GVN before loop deletion // in the simplification pipeline. LPM.addPass(LoopDeletionPass()); + + if (PTO.LoopInterchange) + LPM.addPass(LoopInterchangePass()); + OptimizePM.addPass(createFunctionToLoopPassAdaptor( std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false)); From 11e2519e811efa07208d3d74236ccf34374b0221 Mon Sep 17 00:00:00 2001 From: Ryotaro Kasuga Date: Thu, 26 Jun 2025 12:55:56 +0000 Subject: [PATCH 2/2] Add test to check the position of LoopInterchange in the pipeline --- .../LoopInterchange/position-in-pipeline.ll | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 llvm/test/Transforms/LoopInterchange/position-in-pipeline.ll diff --git a/llvm/test/Transforms/LoopInterchange/position-in-pipeline.ll b/llvm/test/Transforms/LoopInterchange/position-in-pipeline.ll new file mode 100644 index 0000000000000..d0b6447d92fe7 --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/position-in-pipeline.ll @@ -0,0 +1,48 @@ +; RUN: opt -passes='default' -enable-loopinterchange -disable-output \ +; RUN: -disable-verify -verify-analysis-invalidation=0 \ +; RUN: -debug-pass-manager=quiet %s 2>&1 | FileCheck %s + +; Test the position of LoopInterchange in the pass pipeline. + +; CHECK-NOT: Running pass: LoopInterchangePass +; CHECK: Running pass: ControlHeightReductionPass +; CHECK-NEXT: Running pass: LoopSimplifyPass +; CHECK-NEXT: Running pass: LCSSAPass +; CHECK-NEXT: Running pass: LoopRotatePass +; CHECK-NEXT: Running pass: LoopDeletionPass +; CHECK-NEXT: Running pass: LoopRotatePass +; CHECK-NEXT: Running pass: LoopDeletionPass +; CHECK-NEXT: Running pass: LoopInterchangePass +; CHECK-NEXT: Running pass: LoopDistributePass +; CHECK-NEXT: Running pass: InjectTLIMappings +; CHECK-NEXT: Running pass: LoopVectorizePass + + +define void @foo(ptr %a, i32 %n) { +entry: + br label %for.i.header + +for.i.header: + %i = phi i32 [ 0, %entry ], [ %i.next, %for.i.latch ] + br label %for.j + +for.j: + %j = phi i32 [ 0, %for.i.header ], [ %j.next, %for.j ] + %tmp = mul i32 %i, %n + %offset = add i32 %tmp, %j + %idx = getelementptr inbounds i32, ptr %a, i32 %offset + %load = load i32, ptr %idx, align 4 + %inc = add i32 %load, 1 + store i32 %inc, ptr %idx, align 4 + %j.next = add i32 %j, 1 + %j.exit = icmp eq i32 %j.next, %n + br i1 %j.exit, label %for.i.latch, label %for.j + +for.i.latch: + %i.next = add i32 %i, 1 + %i.exit = icmp eq i32 %i.next, %n + br i1 %i.exit, label %for.i.header, label %exit + +exit: + ret void +}