[flang] add -floop-interchange and enable it with opt levels #140182

sebpop · 2025-05-16T03:40:35Z

Hi,

two patches enable the use of -floop-interchange from the flang driver and enable LLVM's loop interchange at levels -O2, -O3, -Ofast, and -Os.

llvmbot · 2025-05-16T03:41:06Z

@llvm/pr-subscribers-clang-driver
@llvm/pr-subscribers-flang-semantics

@llvm/pr-subscribers-clang

Author: Sebastian Pop (sebpop)

Changes

Hi,

two patches enable the use of -floop-interchange from the flang driver and enable LLVM's loop interchange at levels -O2, -O3, -Ofast, and -Os.

Full diff: https://github.com/llvm/llvm-project/pull/140182.diff

9 Files Affected:

(modified) clang/include/clang/Driver/Options.td (+2-2)
(modified) clang/lib/Driver/ToolChains/CommonArgs.cpp (+13)
(modified) clang/lib/Driver/ToolChains/CommonArgs.h (+4)
(modified) clang/lib/Driver/ToolChains/Flang.cpp (+1)
(modified) flang/include/flang/Frontend/CodeGenOptions.def (+1)
(modified) flang/lib/Frontend/CompilerInvocation.cpp (+3)
(modified) flang/lib/Frontend/FrontendActions.cpp (+1)
(modified) flang/lib/Semantics/expression.cpp (+2-1)
(added) flang/test/Driver/loop-interchange.f90 (+13)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 11677626dbf1f..287a00863bb35 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4141,9 +4141,9 @@ def ftrap_function_EQ : Joined<["-"], "ftrap-function=">, Group<f_Group>,
   HelpText<"Issue call to specified function rather than a trap instruction">,
   MarshallingInfoString<CodeGenOpts<"TrapFuncName">>;
 def floop_interchange : Flag<["-"], "floop-interchange">, Group<f_Group>,
-  HelpText<"Enable the loop interchange pass">, Visibility<[ClangOption, CC1Option]>;
+  HelpText<"Enable the loop interchange pass">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
 def fno_loop_interchange: Flag<["-"], "fno-loop-interchange">, Group<f_Group>,
-  HelpText<"Disable the loop interchange pass">, Visibility<[ClangOption, CC1Option]>;
+  HelpText<"Disable the loop interchange pass">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
 def funroll_loops : Flag<["-"], "funroll-loops">, Group<f_Group>,
   HelpText<"Turn on loop unroller">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
 def fno_unroll_loops : Flag<["-"], "fno-unroll-loops">, Group<f_Group>,
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index e4bad39f8332a..89f4ebd519ebf 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -3152,3 +3152,16 @@ void tools::handleVectorizeSLPArgs(const ArgList &Args,
                    options::OPT_fno_slp_vectorize, EnableSLPVec))
     CmdArgs.push_back("-vectorize-slp");
 }
+
+void tools::handleInterchangeLoopsArgs(const ArgList &Args,
+                                       ArgStringList &CmdArgs) {
+  // FIXME: instead of relying on shouldEnableVectorizerAtOLevel, we may want to
+  // implement a separate function to infer loop interchange from opt level.
+  // For now, enable loop-interchange at the same opt levels as loop-vectorize.
+  bool EnableInterch = shouldEnableVectorizerAtOLevel(Args, false);
+  OptSpecifier interchangeAliasOption =
+      EnableInterch ? options::OPT_O_Group : options::OPT_floop_interchange;
+  if (Args.hasFlag(options::OPT_floop_interchange, interchangeAliasOption,
+                   options::OPT_fno_loop_interchange, EnableInterch))
+    CmdArgs.push_back("-floop-interchange");
+}
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h
index 96bc0619dcbc0..6d36a0e8bf493 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.h
+++ b/clang/lib/Driver/ToolChains/CommonArgs.h
@@ -259,6 +259,10 @@ void renderCommonIntegerOverflowOptions(const llvm::opt::ArgList &Args,
 bool shouldEnableVectorizerAtOLevel(const llvm::opt::ArgList &Args,
                                     bool isSlpVec);
 
+/// Enable -floop-interchange based on the optimization level selected.
+void handleInterchangeLoopsArgs(const llvm::opt::ArgList &Args,
+                                llvm::opt::ArgStringList &CmdArgs);
+
 /// Enable -fvectorize based on the optimization level selected.
 void handleVectorizeLoopsArgs(const llvm::opt::ArgList &Args,
                               llvm::opt::ArgStringList &CmdArgs);
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index b1ca747e68b89..54176381b6e5b 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -152,6 +152,7 @@ void Flang::addCodegenOptions(const ArgList &Args,
       !stackArrays->getOption().matches(options::OPT_fno_stack_arrays))
     CmdArgs.push_back("-fstack-arrays");
 
+  handleInterchangeLoopsArgs(Args, CmdArgs);
   handleVectorizeLoopsArgs(Args, CmdArgs);
   handleVectorizeSLPArgs(Args, CmdArgs);
 
diff --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def
index d9dbd274e83e5..7ced60f512219 100644
--- a/flang/include/flang/Frontend/CodeGenOptions.def
+++ b/flang/include/flang/Frontend/CodeGenOptions.def
@@ -35,6 +35,7 @@ CODEGENOPT(PrepareForThinLTO , 1, 0) ///< Set when -flto=thin is enabled on the
 CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass)
 CODEGENOPT(VectorizeLoop, 1, 0) ///< Enable loop vectorization.
 CODEGENOPT(VectorizeSLP, 1, 0) ///< Enable SLP vectorization.
+CODEGENOPT(InterchangeLoops, 1, 0) ///< Enable loop interchange.
 CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning.
 CODEGENOPT(UnrollLoops, 1, 0) ///< Enable loop unrolling
 CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 28f2f69f23baf..0bdbb616136f1 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -269,6 +269,9 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
                    clang::driver::options::OPT_fno_stack_arrays, false))
     opts.StackArrays = 1;
 
+  if (args.getLastArg(clang::driver::options::OPT_floop_interchange))
+    opts.InterchangeLoops = 1;
+
   if (args.getLastArg(clang::driver::options::OPT_vectorize_loops))
     opts.VectorizeLoop = 1;
 
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index c1f47b12abee2..7c936ee23009d 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -915,6 +915,7 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) {
   if (ci.isTimingEnabled())
     si.getTimePasses().setOutStream(ci.getTimingStreamLLVM());
   pto.LoopUnrolling = opts.UnrollLoops;
+  pto.LoopInterchange = opts.InterchangeLoops;
   pto.LoopInterleaving = opts.UnrollLoops;
   pto.LoopVectorization = opts.VectorizeLoop;
   pto.SLPVectorization = opts.VectorizeSLP;
diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp
index e139bda7e4950..35eb7b61429fb 100644
--- a/flang/lib/Semantics/expression.cpp
+++ b/flang/lib/Semantics/expression.cpp
@@ -421,7 +421,8 @@ static void CheckSubscripts(
 
 static void CheckSubscripts(
     semantics::SemanticsContext &context, CoarrayRef &ref) {
-  const Symbol &coarraySymbol{ref.GetBase().GetLastSymbol()};
+  const auto &base = ref.GetBase();
+  const Symbol &coarraySymbol{base.GetLastSymbol()};
   Shape lb, ub;
   if (FoldSubscripts(context, coarraySymbol, ref.subscript(), lb, ub)) {
     ValidateSubscripts(context, coarraySymbol, ref.subscript(), lb, ub);
diff --git a/flang/test/Driver/loop-interchange.f90 b/flang/test/Driver/loop-interchange.f90
new file mode 100644
index 0000000000000..d5d62e9a777d2
--- /dev/null
+++ b/flang/test/Driver/loop-interchange.f90
@@ -0,0 +1,13 @@
+! RUN: %flang -### -S -floop-interchange %s 2>&1 | FileCheck -check-prefix=CHECK-LOOP-INTERCHANGE %s
+! RUN: %flang -### -S -fno-loop-interchange %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-INTERCHANGE %s
+! RUN: %flang -### -S -O0 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-INTERCHANGE %s
+! RUN: %flang -### -S -O1 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-INTERCHANGE %s
+! RUN: %flang -### -S -O2 %s 2>&1 | FileCheck -check-prefix=CHECK-LOOP-INTERCHANGE %s
+! RUN: %flang -### -S -O3 %s 2>&1 | FileCheck -check-prefix=CHECK-LOOP-INTERCHANGE %s
+! RUN: %flang -### -S -Os %s 2>&1 | FileCheck -check-prefix=CHECK-LOOP-INTERCHANGE %s
+! RUN: %flang -### -S -Oz %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-INTERCHANGE %s
+! CHECK-LOOP-INTERCHANGE: "-floop-interchange"
+! CHECK-NO-LOOP-INTERCHANGE-NOT: "-floop-interchange"
+
+program test
+end program

llvmbot · 2025-05-16T03:41:06Z

@llvm/pr-subscribers-flang-driver

Author: Sebastian Pop (sebpop)

Changes

Hi,

two patches enable the use of -floop-interchange from the flang driver and enable LLVM's loop interchange at levels -O2, -O3, -Ofast, and -Os.

Full diff: https://github.com/llvm/llvm-project/pull/140182.diff

9 Files Affected:

(modified) clang/include/clang/Driver/Options.td (+2-2)
(modified) clang/lib/Driver/ToolChains/CommonArgs.cpp (+13)
(modified) clang/lib/Driver/ToolChains/CommonArgs.h (+4)
(modified) clang/lib/Driver/ToolChains/Flang.cpp (+1)
(modified) flang/include/flang/Frontend/CodeGenOptions.def (+1)
(modified) flang/lib/Frontend/CompilerInvocation.cpp (+3)
(modified) flang/lib/Frontend/FrontendActions.cpp (+1)
(modified) flang/lib/Semantics/expression.cpp (+2-1)
(added) flang/test/Driver/loop-interchange.f90 (+13)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 11677626dbf1f..287a00863bb35 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4141,9 +4141,9 @@ def ftrap_function_EQ : Joined<["-"], "ftrap-function=">, Group<f_Group>,
   HelpText<"Issue call to specified function rather than a trap instruction">,
   MarshallingInfoString<CodeGenOpts<"TrapFuncName">>;
 def floop_interchange : Flag<["-"], "floop-interchange">, Group<f_Group>,
-  HelpText<"Enable the loop interchange pass">, Visibility<[ClangOption, CC1Option]>;
+  HelpText<"Enable the loop interchange pass">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
 def fno_loop_interchange: Flag<["-"], "fno-loop-interchange">, Group<f_Group>,
-  HelpText<"Disable the loop interchange pass">, Visibility<[ClangOption, CC1Option]>;
+  HelpText<"Disable the loop interchange pass">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
 def funroll_loops : Flag<["-"], "funroll-loops">, Group<f_Group>,
   HelpText<"Turn on loop unroller">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
 def fno_unroll_loops : Flag<["-"], "fno-unroll-loops">, Group<f_Group>,
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index e4bad39f8332a..89f4ebd519ebf 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -3152,3 +3152,16 @@ void tools::handleVectorizeSLPArgs(const ArgList &Args,
                    options::OPT_fno_slp_vectorize, EnableSLPVec))
     CmdArgs.push_back("-vectorize-slp");
 }
+
+void tools::handleInterchangeLoopsArgs(const ArgList &Args,
+                                       ArgStringList &CmdArgs) {
+  // FIXME: instead of relying on shouldEnableVectorizerAtOLevel, we may want to
+  // implement a separate function to infer loop interchange from opt level.
+  // For now, enable loop-interchange at the same opt levels as loop-vectorize.
+  bool EnableInterch = shouldEnableVectorizerAtOLevel(Args, false);
+  OptSpecifier interchangeAliasOption =
+      EnableInterch ? options::OPT_O_Group : options::OPT_floop_interchange;
+  if (Args.hasFlag(options::OPT_floop_interchange, interchangeAliasOption,
+                   options::OPT_fno_loop_interchange, EnableInterch))
+    CmdArgs.push_back("-floop-interchange");
+}
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h
index 96bc0619dcbc0..6d36a0e8bf493 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.h
+++ b/clang/lib/Driver/ToolChains/CommonArgs.h
@@ -259,6 +259,10 @@ void renderCommonIntegerOverflowOptions(const llvm::opt::ArgList &Args,
 bool shouldEnableVectorizerAtOLevel(const llvm::opt::ArgList &Args,
                                     bool isSlpVec);
 
+/// Enable -floop-interchange based on the optimization level selected.
+void handleInterchangeLoopsArgs(const llvm::opt::ArgList &Args,
+                                llvm::opt::ArgStringList &CmdArgs);
+
 /// Enable -fvectorize based on the optimization level selected.
 void handleVectorizeLoopsArgs(const llvm::opt::ArgList &Args,
                               llvm::opt::ArgStringList &CmdArgs);
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index b1ca747e68b89..54176381b6e5b 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -152,6 +152,7 @@ void Flang::addCodegenOptions(const ArgList &Args,
       !stackArrays->getOption().matches(options::OPT_fno_stack_arrays))
     CmdArgs.push_back("-fstack-arrays");
 
+  handleInterchangeLoopsArgs(Args, CmdArgs);
   handleVectorizeLoopsArgs(Args, CmdArgs);
   handleVectorizeSLPArgs(Args, CmdArgs);
 
diff --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def
index d9dbd274e83e5..7ced60f512219 100644
--- a/flang/include/flang/Frontend/CodeGenOptions.def
+++ b/flang/include/flang/Frontend/CodeGenOptions.def
@@ -35,6 +35,7 @@ CODEGENOPT(PrepareForThinLTO , 1, 0) ///< Set when -flto=thin is enabled on the
 CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass)
 CODEGENOPT(VectorizeLoop, 1, 0) ///< Enable loop vectorization.
 CODEGENOPT(VectorizeSLP, 1, 0) ///< Enable SLP vectorization.
+CODEGENOPT(InterchangeLoops, 1, 0) ///< Enable loop interchange.
 CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning.
 CODEGENOPT(UnrollLoops, 1, 0) ///< Enable loop unrolling
 CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 28f2f69f23baf..0bdbb616136f1 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -269,6 +269,9 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
                    clang::driver::options::OPT_fno_stack_arrays, false))
     opts.StackArrays = 1;
 
+  if (args.getLastArg(clang::driver::options::OPT_floop_interchange))
+    opts.InterchangeLoops = 1;
+
   if (args.getLastArg(clang::driver::options::OPT_vectorize_loops))
     opts.VectorizeLoop = 1;
 
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index c1f47b12abee2..7c936ee23009d 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -915,6 +915,7 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) {
   if (ci.isTimingEnabled())
     si.getTimePasses().setOutStream(ci.getTimingStreamLLVM());
   pto.LoopUnrolling = opts.UnrollLoops;
+  pto.LoopInterchange = opts.InterchangeLoops;
   pto.LoopInterleaving = opts.UnrollLoops;
   pto.LoopVectorization = opts.VectorizeLoop;
   pto.SLPVectorization = opts.VectorizeSLP;
diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp
index e139bda7e4950..35eb7b61429fb 100644
--- a/flang/lib/Semantics/expression.cpp
+++ b/flang/lib/Semantics/expression.cpp
@@ -421,7 +421,8 @@ static void CheckSubscripts(
 
 static void CheckSubscripts(
     semantics::SemanticsContext &context, CoarrayRef &ref) {
-  const Symbol &coarraySymbol{ref.GetBase().GetLastSymbol()};
+  const auto &base = ref.GetBase();
+  const Symbol &coarraySymbol{base.GetLastSymbol()};
   Shape lb, ub;
   if (FoldSubscripts(context, coarraySymbol, ref.subscript(), lb, ub)) {
     ValidateSubscripts(context, coarraySymbol, ref.subscript(), lb, ub);
diff --git a/flang/test/Driver/loop-interchange.f90 b/flang/test/Driver/loop-interchange.f90
new file mode 100644
index 0000000000000..d5d62e9a777d2
--- /dev/null
+++ b/flang/test/Driver/loop-interchange.f90
@@ -0,0 +1,13 @@
+! RUN: %flang -### -S -floop-interchange %s 2>&1 | FileCheck -check-prefix=CHECK-LOOP-INTERCHANGE %s
+! RUN: %flang -### -S -fno-loop-interchange %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-INTERCHANGE %s
+! RUN: %flang -### -S -O0 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-INTERCHANGE %s
+! RUN: %flang -### -S -O1 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-INTERCHANGE %s
+! RUN: %flang -### -S -O2 %s 2>&1 | FileCheck -check-prefix=CHECK-LOOP-INTERCHANGE %s
+! RUN: %flang -### -S -O3 %s 2>&1 | FileCheck -check-prefix=CHECK-LOOP-INTERCHANGE %s
+! RUN: %flang -### -S -Os %s 2>&1 | FileCheck -check-prefix=CHECK-LOOP-INTERCHANGE %s
+! RUN: %flang -### -S -Oz %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-INTERCHANGE %s
+! CHECK-LOOP-INTERCHANGE: "-floop-interchange"
+! CHECK-NO-LOOP-INTERCHANGE-NOT: "-floop-interchange"
+
+program test
+end program

sjoerdmeijer · 2025-05-16T09:35:20Z

For more context, this is part of our loop-interchange enablement story, see our RFC here: https://discourse.llvm.org/t/enabling-loop-interchange/82589.

We have fixed all the compile-time issues and loop-interchange issues that we are aware of, and would like to enable this in the C/C++ flow, see here: #124911.

As part of this work, we also promised to fix DependenceAnalysis. The last DA correctness corner-case that is being worked on is: #123436. This is a corner-case for C/C++ related to type punning, different offset sizes that won't be a problem in Fortran. Therefore, we think that enabling interchange and dependence analysis for Fortran makes sense.

kiranchandramohan

Thanks for this PR. Do you have any compilation time and performance data?

kiranchandramohan · 2025-05-16T09:59:04Z

flang/lib/Semantics/expression.cpp

+  const auto &base = ref.GetBase();
+  const Symbol &coarraySymbol{base.GetLastSymbol()};


Nit: Is this an unrelated change?

This has been submitted separately #138793

Without this change I cannot build flang on arm64-linux ubuntu 24.04 machine.

I requested a review for #138793. It's probably best to proceed with this after that has been merged.

sebpop · 2025-05-16T13:31:50Z

Do you have any compilation time and performance data?

@madhur13490 did several changes to loop interchange to optimize the overall compilation time with the pass. I believe Madhur has only looked at c/c++ benchmarks and not at how loop interchange would impact flang. I think that if compilation time is good for c/c++, it should also be good for fortran.

On the perf side, I was looking if we can already catch swim from cpu2000, and that fails with not enough data to infer number of iterations. I will be working on adding assume (N < 1335) based on analyzing array decls and infer loop bounds.

tarunprabhu

Could you add a test that ensures that the loop-interchange pass is added to the pipeline. Perhaps something like flang/test/Driver/slp-vectorize.f90

tarunprabhu · 2025-05-16T15:36:21Z

flang/lib/Semantics/expression.cpp

+  const auto &base = ref.GetBase();
+  const Symbol &coarraySymbol{base.GetLastSymbol()};


I requested a review for #138793. It's probably best to proceed with this after that has been merged.

sjoerdmeijer · 2025-05-17T08:46:36Z

Thanks for this PR. Do you have any compilation time and performance data?

This information is a bit spread out in the other tickets that I linked earlier, so to summarise that, compile times look really good and increases very minimal after the work that Madhur did. In #124911, I wrote:

The compile-time increase with a geomean increase of 0.19% looks good (after committing #124247), I think:

stage1-O3:
Benchmark
kimwitu++        +0.10%
sqlite3          +0.14%
consumer-typeset +0.07%
Bullet           +0.06%
tramp3d-v4       +0.21%
mafft            +0.39%
ClamAVi          +0.06%
lencod           +0.61%
SPASS            +0.17%
7zip             +0.08%
geomean          +0.19%

Regarding performance, as I also wrote in that ticket, loop-interchange has a lot of potential. It triggers a lot of times e.g. in the LLVM test-suite, see this #124911 (comment).
It is now triggering slightly less than what I wrote in that comment because we made interchange more pessimistic to fix correctness issues, but we think that's okay because we consider getting interchange and DependenceAnalysis running by default as a first enablement step. Once we have achieved this, we are going to focus on performance and lift some of the restrictions (while maintaining correctness of course). With this first patch, interchange won't trigger on SPEC for example, but we plan to do that as follow up.

sjoerdmeijer

This probably deserves a mention in the Flang ReleaseNotes?

sjoerdmeijer · 2025-05-20T10:35:56Z

clang/lib/Driver/ToolChains/CommonArgs.cpp

Bikeshedding names: I would add 4 characters "ange" to the variable name: EnableInterch -> EnableInterchange :-)

sjoerdmeijer · 2025-05-20T10:36:46Z

clang/lib/Driver/ToolChains/CommonArgs.cpp

Nit: interchangeAliasOption - > InterchangeAliasOption ?

This patch allows flang to recognize the flags -floop-interchange and -fno-loop-interchange. -floop-interchange adds the loop interchange pass to the pass pipeline.

tarunprabhu

LGTM. Thanks!

kasuga-fj · 2025-08-20T06:32:49Z

To Flang community:
After this PR was merged, lots of miscompilations were discovered in DependenceAnalysis, which LoopInterchange depends on. Resolving these issues may take some time, so I suggest we disable LoopInterchange by default for now. I'm not very familiar with Flang's policy, so, what does the community think about this?

Meinersbur · 2025-08-20T12:56:02Z

Flang's policy is not really different from Clang's, but I can raise it in the next community call

kasuga-fj · 2025-08-20T13:12:22Z

Flang's policy is not really different from Clang's, but I can raise it in the next community call

Thanks. I think this should be removed from LLVM 21.

tarunprabhu · 2025-08-20T16:13:38Z

Does this affect clang as well? If we do decide to disable it, will it only be in flang, or in both clang and flang? If the latter, we should check with the clang developers as well.

kasuga-fj · 2025-08-20T16:18:42Z

It is only enabled in flang, and not in clang at the moment. I think it doesn't affect to clang.

sjoerdmeijer · 2025-08-21T11:02:59Z

To Flang community: After this PR was merged, lots of miscompilations were discovered in DependenceAnalysis, which LoopInterchange depends on. Resolving these issues may take some time, so I suggest we disable LoopInterchange by default for now. I'm not very familiar with Flang's policy, so, what does the community think about this?

You're pointing to 4 patches, one of them I raised which I found by using different fuzzers. In my fuzzing exercise, I found ~30 issues, and I found a lot more issues in the loop vectoriser than there are issues in DA. Are we going to disable the vectoriser because we raise issues against it of which some of them are still open?

That's a rhetorical question. My point is, this needs to be judged on a case by case basis, and we shouldn't just point to a list of less than a handful issues, that is not going to be useful.

kasuga-fj · 2025-08-21T13:08:05Z

I'm not trying to emphasize the number of issues raised on GitHub, the real concern is their substance. What I pointed out in the issues represents only a small portion of the overall problem. There are fundamental flaws in DA's implementation, particularly in how it handles wrapping of SCEVs. These issues aren't caught by assertions and can silently lead to incorrect transformations. I don't know how many miscompiles exist, but it would not be a small number.

The situation seems to differ from that of the vectorizer. DA was already broken when this PR was submitted. I believe it's premature to enable any passes that rely on DA by default. This would not directly reflect the severity of the issue, but it's the conclusion I reached after thoroughly examining DA for a month. There are simply too many bugs.

Also note that properly fixing these issues will take considerable time, as we (or at least I) still aren't sure what the best approach to solving them is.

sjoerdmeijer · 2025-08-21T13:31:50Z

There are fundamental flaws in DA's implementation, particularly in how it handles wrapping of SCEVs.

So the real question here is: how important or realistic is this for Fortran?

sebpop · 2025-08-21T13:55:06Z

DA was already broken when this PR was submitted.

This is wrong, we fixed all outstanding bugs reported against DA at the time when we enabled interchange in flang.

I believe it's premature to enable any passes that rely on DA by default.

If we don't enable interchange by default then we don't get more bugs reported against it because the pass is not getting used.

This would not directly reflect the severity of the issue, but it's the conclusion I reached after thoroughly examining DA for a month.
There are simply too many bugs.

too many is not substantiated.
Please provide a list of bugs reported against flang due to interchange.

After this PR was merged, lots of miscompilations were discovered in DependenceAnalysis, which LoopInterchange depends on.

How many of the current 4 bugs you point to are related to flang?
There is no mention about flang in any of the bug reports.

sebpop · 2025-08-21T14:10:07Z

Instead of spreading FUD around, please be more precise and open bug reports for the following:

There are fundamental flaws in DA's implementation, particularly in how it handles wrapping of SCEVs.

Tell me more about it in a separate bug, and let's see how we can work towards fixing it.

These issues aren't caught by assertions and can silently lead to incorrect transformations.

What are you speaking about?
Again this is FUD.

I don't know how many miscompiles exist, but it would not be a small number.

Please provide data.

kasuga-fj · 2025-08-21T15:10:36Z

This is wrong, we fixed all outstanding bugs reported against DA at the time when we enabled interchange in flang.

I’m referring to the bugs that existed at that time but were not reported. The issues that were raised do not represent all the defects.

If we don't enable interchange by default then we don't get more bugs reported against it because the pass is not getting used.

I don’t think enabling passes just to get more bug reports is a good strategy. At the very least, we should carefully check the existing code beforehand.

To be clear: all my bug reports are what I found by reading the code. I didn't use flang to find them.

How many of the current 4 bugs you point to are related to flang?
There is no mention about flang in any of the bug reports.

Are there any guarantees that Flang won’t generate the IR I mentioned in the issues? If so, that’s fine, but I don’t think any such guarantees exist.

Instead of spreading FUD around, please be more precise and open bug reports for the following:

At the very least I can't help feeling anxious, since there seem to be so many issues that it’s really hard to imagine what might happen.

Tell me more about it in a separate bug, and let's see how we can work towards fixing it.

Here are some examples that immediately come to my mind (not limited to them):

This is wrong, !isKnownNegative doesn't mean it's positive.
This seems incorrect, since we don't check the monotonicity of each subscript.
This is wrong, we don't check whether X is non-zero.
This is wrong. In general, the no-wrap flags aren't preserved.
In this function, we must ensure the multiplication doesn't overflow.

But the most serious issue is that DA doesn’t handle overflow at all.

These issues aren't caught by assertions and can silently lead to incorrect transformations.

What are you speaking about?

I'm saying that DA can miss dependency, and it can result in incorrect legality check in LoopInterchange.

Please provide data.

I don’t have any data, so I'm fine if the Flang community is okay with it. However, I strongly think these issues also affect Flang. In the end, there’s no data showing that these issues are unrelated to Flang.

sebpop · 2025-08-21T15:49:29Z

Thank you for raising these issues to my attention, I will be working on fixing these bugs.

I believe those bugs may not impact Fortran programs as generated by flang:

One data point is our internal runs of SPEC Cpu benchmarks on which interchange does trigger with flang, and there are no correctness issues.
Another data point is your earlier comment:

I've confirmed the result of Fujitsu Compiler Test Suite. The only correctness issue affected by this commit is https://github.com/fujitsu/compiler-test-suite/blob/main/Fortran/0347/0347_0240.f, which will be resolved by #140709.

So far I have not seen bugs reported against flang + interchange.

tblah · 2025-08-21T16:27:24Z

Weighing in with my personal opinion here:

If this can be shown to miss compile any known application then that's grounds for imediately disabling the pass until it can be fixed.
If this only breaks some very obscure fortran formulation that it is unlikely for a human to write then I guess we should weigh up pros and cons. I would lean towards a revert but there have been exceptions made in the past for critical optimisations which are not known to break on any known production code (the example I'm thinking of was a theoretical correctness issue with the TBAA alias tags when a function is inlined into itself which we were confident didn't effect real code because classic Flang had the same issue).
If this is unlikely to be triggered from any fortran code and the pass is providing a noticeable speedup such that disabling it would constitute a measurable regression in Flang codegen then we may decide not to disable the pass, especially if DA is going to be fixed relatively soon.

In this particular case, the pass has been merged for a long time. In that time we have built a lot of applications and benchmarks at -O3 in our internal CI and not seen any issues known to arise from loop interchange. Presumably other organisations have been testing Flang during this time too.

@kasuga-fj thank you very much for reporting this. It is helpful for the whole community to know that there could be issues here so that we know where to look if any miss-compilations are discovered. Your careful review of DA looks like a great step forward. If the reported issues break something you are doing with Flang then I will be happy to consider disabling the pass.

Meinersbur · 2025-08-21T17:50:21Z

There are fundamental flaws in DA's implementation, particularly in how it handles wrapping of SCEVs.

Tell me more about it in a separate bug, and let's see how we can work towards fixing it.

If you look over the code you can see that no attention was paid to integer wrapping behavior. For instance, x < y is implemented in multiple places (e.g. DependenceInfo::isKnownLessThan and DependenceInfo::isKnownPredicate) as

SE->isKnownNegative(getMinusSCEV(x,y))

isKnownNegative always assumes an signed interpretation (otherwise it could never be negative), but the code may use unsigned loop variables/pointer indices
Even if it is signed, if eg x = 1, y = INT_MIN (SCEVConstants, x is obviously larger) x - y would wrap back into the negative and return true. Note that the intented expression x < y does not involve any wrapping behaviour.

There is ScalarEvolution::isKnownPredicate that allows to choose signedness and should handle large integers correctly.

Instead of spreading FUD around [...]

@kasuga-fj pointed out bugs in the current implementation. Whether those are sufficient to justify changing default behavior is subjective. Generally assume best intentions.

sebpop · 2025-08-21T18:36:53Z

Generally assume best intentions.

I agree, and I will keep this in mind.

These issues aren't caught by assertions and can silently lead to incorrect transformations.

For GCC, I adapted the Omega test (from Bill Pugh) to produce the same representation as the classic dependence tests. And we got more confidence that the dependence analysis implementation was correct. I removed the extra checks gcc-mirror/gcc@49b8fe6 10 years down the road.

Maybe we could do the same to build trust in the results provided by LLVM's DA.
We could adapt Polly to print distance and direction vectors and then diff DA against Polly.

kasuga-fj · 2025-08-22T02:01:41Z

@tblah Thank you for the comments! This is exactly what I wanted to know!
@Meinersbur Thank you for the additional input (I should have included that in my earlier comment).

First of all, I'm not strongly requesting to revert this PR. The primary purpose of my first comment is to share information, and of course, I think it's up to the Flang developers to make the final decision (especially since I don't know much about Flang myself).

In this particular case, the pass has been merged for a long time. In that time we have built a lot of applications and benchmarks at -O3 in our internal CI and not seen any issues known to arise from loop interchange. Presumably other organisations have been testing Flang during this time too.

This makes a lot of sense to me. Thanks for sharing the contexts.

Let me share a few more details in the hope that they will be helpful.

Maybe I've been a bit oversensitive lately, having encountered too many unexpected behaviors. Perhaps I've even lost sight of what qualifies as a "practical example"... Still, let me share one to illustrate why I believe this issue is serious. Please consider the following case:

; for (i = 0; i < 100; i++) for (int j = 0; j < 100; j++)
;   a[j & 1][i & 1] = 42;
define void @f(ptr %a) {
entry:
  br label %loop.i.header

loop.i.header:
  %i = phi i64 [ 0, %entry ], [ %i.next, %loop.i.latch ]
  %and.i = and i64 %i, 1
  br label %loop.j

loop.j:
  %j = phi i64 [ 0, %loop.i.header ], [ %j.next, %loop.j ]
  %and.j = and i64 %j, 1
  %idx = getelementptr [2 x [2 x i8]], ptr %a, i64 0, i64 %and.i, i64 %and.j
  store i8 42, ptr %idx
  %j.next = add i64 %j, 1
  %exitcond.j = icmp eq i64 %j.next, 100
  br i1 %exitcond.j, label %loop.i.latch, label %loop.j

loop.i.latch:
  %i.next = add i64 %i, 1
  %exitcond.i = icmp eq i64 %i.next, 100
  br i1 %exitcond.i, label %exit, label %loop.i.header

exit:
  ret void
}

Current DA result in "there are no loop-carried dependencies for the store", which is obviously incorrect (godbolt). Honestly, I'm no longer sure what's realistic here, or how many similar cases might exist... I believe resolving this will take some time.

The bugs I listed are relatively minor and might be fixed quickly (I haven't worked on them simply because writing tests felt tedious). However, the most serious issue is that the current implementation doesn't account for integer calculation wrapping during analysis, as mentioned in earlier comment by @Meinersbur . Again, I believe this will take time to fix.
As for the minor issues, the ones I mentioned don't cover everything (I'm not going to go out of my way to list every single one here). That said, I believe most of them can be resolved relatively quickly.

sebpop · 2025-08-22T17:00:57Z

I posted a patch that fixes your above testcase.
I also added the above testcase as llvm/test/Analysis/DependenceAnalysis/wrapping-addrec-1.ll
The output is

; CHECK-NEXT:    da analyze - output [* *]!

sebpop · 2025-08-22T17:02:45Z

I'm no longer sure what's realistic

TBH, nobody writes such code: a[j & 1][i & 1].
Anyway, we have a fix for all those.

nikic · 2025-08-22T17:11:02Z

To be honest, I am very surprised that this PR was submitted, let alone landed. We have explicitly not approved enabling LoopInterchange by default in LLVM due to outstanding issues. So instead you go ahead and enable it directly in Flang instead, without even notifying any of the people who were involved in the original discussions. That's not how things should work.

Please revert this patch ASAP and request an LLVM 21 backport.

Edit: It should also be fine to just disable loop interchange by default, while retaining the -floop-interchange option, to match Clang.

kasuga-fj · 2025-08-22T17:21:27Z

To be honest, I am very surprised that this PR was submitted, let alone landed. We have explicitly not approved enabling LoopInterchange by default in LLVM due to outstanding issues. So instead you go ahead and enable it directly in Flang instead, without even notifying any of the people who were involved in the original discussions. That's not how things should work.

Please revert this patch ASAP and request an LLVM 21 backport.

(Thank you very much for finding this PR...)

kasuga-fj · 2025-08-22T17:27:50Z

I’ll leave the revert up to the author.

sjoerdmeijer · 2025-08-23T08:21:53Z

@nikic: I feel that all nuance has been lost here in this discussion, so I would like to bring some of that back:

First of all, let's recognise that Fortran and C/C++ are really different, and that they are different users of DA. At the time of enablement here, the last known bug that is being worked on involved type-punning of base pointers in a loop body, and/or some variants of this. You can't write that in Fortran. And in C/C++ you won't find this in normal code (it's also non-portable code).
In a bug triage process, before any action is taken, first the trigger conditions should be determined, and then the impact to determine the severity. We don't have any of this information. I thus feel we have different standards for DA compared to other components that have problems (that may or may not have an impact). We should also recognise that new information appeared after this patch was merged, and very recently. I
I haven't looked at all new bugs, but one of them is definitely another very weird corner case that I doubt can be triggered from Fortran. And whether the exact process has been followed for enablement I don't know, but now the fact is that this has been running for 3 months and no bugs have been raised against Flang or interchange.

If you feel this should be reverted based on the grounds of process or inclusion, okay, then it is what it is, fair enough.
Going forward though I would encourage a more constructive approach to deal with bugs before any conclusions are drawn.

nikic · 2025-08-25T15:58:17Z

@sjoerdmeijer Let me address two points separately.

The first is the original submission of this PR. There was an existing PR to enable loop interchange in #124911, and discussion for this enablement was consolidated there. I can see no evidence that reviewers at the time believed that loop interchange / dependence analysis is ready to be enabled. You could have posted a comment there asking whether people think it's okay to enable it just for Flang due to difference language characteristics -- and I expect you'd have gotten a fairly clear "no" on making this language-dependent. But that did not happen. I guess this was just a miscommunication.

Now, regarding the issues in dependence analysis that have been found more recently: Yes, I believe these issues are quite severe and justify a revert. It's not that these issues are easy to trigger, but that they point to some rather fundamental issues in the dependence analysis implementation, which will likely require non-trivial changes to fully address. I don't want to backport all necessary changes to LLVM 21.

And yes, there is a certain double standard when it comes to issues in a newly enabled pass, and issues in a pass that has already been enabled for a very long time. In the latter case, it would take some rather extreme circumstances for us to disable the pass entirely, while in the former case this is the default response for non-trivial issues.

nikic · 2025-08-25T19:56:32Z

As this is time critical (needs to land prior to the LLVM 21 release tomorrow) I've submitted a PR myself: #155279

Disable loop interchange by default, while keeping the ability to explicitly enable using `-floop-interchange`. This matches Clang. See discussion on #140182.

Disable loop interchange by default, while keeping the ability to explicitly enable using `-floop-interchange`. This matches Clang. See discussion on llvm/llvm-project#140182.

Disable loop interchange by default, while keeping the ability to explicitly enable using `-floop-interchange`. This matches Clang. See discussion on llvm#140182. (cherry picked from commit 8849750)

Disable loop interchange by default, while keeping the ability to explicitly enable using `-floop-interchange`. This matches Clang. See discussion on llvm/llvm-project#140182. (cherry picked from commit 8849750)

sebpop requested review from sjoerdmeijer and vzakhari May 16, 2025 03:40

sebpop self-assigned this May 16, 2025

llvmbot added clang Clang issues not falling into any other category clang:driver 'clang' and 'clang++' user-facing binaries. Not 'clang-cl' flang:driver flang Flang issues not falling into any other category flang:semantics labels May 16, 2025

sjoerdmeijer requested a review from kiranchandramohan May 16, 2025 09:37

kiranchandramohan reviewed May 16, 2025

View reviewed changes

kiranchandramohan requested review from DanielCChen, DavidTruby, Meinersbur, kkwli, mjklemm, tarunprabhu and tblah May 16, 2025 10:03

tarunprabhu reviewed May 16, 2025

View reviewed changes

vzakhari requested a review from mcinally May 16, 2025 21:22

sjoerdmeijer reviewed May 20, 2025

View reviewed changes

sebpop added 3 commits May 20, 2025 19:02

[flang] add -floop-interchange to flang driver

46efee7

This patch allows flang to recognize the flags -floop-interchange and -fno-loop-interchange. -floop-interchange adds the loop interchange pass to the pass pipeline.

[flang] enable loop-interchange at O3, O2, and Os

9dc3774

test loop-interchange in pass pipeline

0b81d78

sebpop force-pushed the flang-interch branch from dd3f7b2 to 0b81d78 Compare May 20, 2025 20:02

tarunprabhu approved these changes May 20, 2025

View reviewed changes

nikic mentioned this pull request Aug 25, 2025

[flang] Disable loop interchange by default #155279

Merged

nikic added a commit that referenced this pull request Aug 26, 2025

[flang] Disable loop interchange by default (#155279)

8849750

Disable loop interchange by default, while keeping the ability to explicitly enable using `-floop-interchange`. This matches Clang. See discussion on #140182.

aemerson mentioned this pull request Aug 30, 2025

[NFC] [clangd] [Modules] remove dot in log #156207

Closed

		const auto &base = ref.GetBase();
		const Symbol &coarraySymbol{base.GetLastSymbol()};

[flang] add -floop-interchange and enable it with opt levels #140182

[flang] add -floop-interchange and enable it with opt levels #140182

Uh oh!

Conversation

sebpop commented May 16, 2025

Uh oh!

llvmbot commented May 16, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented May 16, 2025

Uh oh!

sjoerdmeijer commented May 16, 2025

Uh oh!

kiranchandramohan left a comment

Choose a reason for hiding this comment

Uh oh!

kiranchandramohan May 16, 2025

Choose a reason for hiding this comment

Uh oh!

sebpop May 16, 2025

Choose a reason for hiding this comment

Uh oh!

tarunprabhu May 16, 2025

Choose a reason for hiding this comment

Uh oh!

sebpop commented May 16, 2025

Uh oh!

tarunprabhu left a comment

Choose a reason for hiding this comment

Uh oh!

tarunprabhu May 16, 2025

Choose a reason for hiding this comment

Uh oh!

sjoerdmeijer commented May 17, 2025

Uh oh!

sjoerdmeijer left a comment

Choose a reason for hiding this comment

Uh oh!

sjoerdmeijer May 20, 2025

Choose a reason for hiding this comment

Uh oh!

sjoerdmeijer May 20, 2025

Choose a reason for hiding this comment

Uh oh!

tarunprabhu left a comment

Choose a reason for hiding this comment

Uh oh!

kasuga-fj commented Aug 20, 2025

Uh oh!

Meinersbur commented Aug 20, 2025

Uh oh!

kasuga-fj commented Aug 20, 2025

Uh oh!

tarunprabhu commented Aug 20, 2025

Uh oh!

kasuga-fj commented Aug 20, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

sjoerdmeijer commented Aug 21, 2025

Uh oh!

kasuga-fj commented Aug 21, 2025

Uh oh!

sjoerdmeijer commented Aug 21, 2025

Uh oh!

sebpop commented Aug 21, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

sebpop commented Aug 21, 2025

Uh oh!

kasuga-fj commented Aug 21, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

sebpop commented Aug 21, 2025

Uh oh!

tblah commented Aug 21, 2025

Uh oh!

Meinersbur commented Aug 21, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

sebpop commented Aug 21, 2025

llvmbot commented May 16, 2025 •

edited

Loading

kasuga-fj commented Aug 20, 2025 •

edited

Loading

sebpop commented Aug 21, 2025 •

edited

Loading

kasuga-fj commented Aug 21, 2025 •

edited

Loading

Meinersbur commented Aug 21, 2025 •

edited

Loading

nikic commented Aug 22, 2025 •

edited

Loading