-
Notifications
You must be signed in to change notification settings - Fork 14.8k
Reapply "Introduce -fexperimental-loop-fusion to clang and flang #158844
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang-driver @llvm/pr-subscribers-flang-driver Author: Madhur Amilkanthwar (madhur13490) ChangesThis PR is a reapplication of #142686 Full diff: https://github.com/llvm/llvm-project/pull/158844.diff 15 Files Affected:
diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
index fda0da99b60c0..872f73ebf3810 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -322,6 +322,7 @@ CODEGENOPT(TimeTrace , 1, 0, Benign) ///< Set when -ftime-trace is enabl
VALUE_CODEGENOPT(TimeTraceGranularity, 32, 500, Benign) ///< Minimum time granularity (in microseconds),
///< traced by time profiler
CODEGENOPT(InterchangeLoops , 1, 0, Benign) ///< Run loop-interchange.
+CODEGENOPT(FuseLoops , 1, 0, Benign) ///< Run loop-fusion.
CODEGENOPT(UnrollLoops , 1, 0, Benign) ///< Control whether loops are unrolled.
CODEGENOPT(RerollLoops , 1, 0, Benign) ///< Control whether loops are rerolled.
CODEGENOPT(NoUseJumpTables , 1, 0, Benign) ///< Set when -fno-jump-tables is enabled.
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 54cc1e986b601..def7c09d58cfb 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4304,6 +4304,10 @@ def floop_interchange : Flag<["-"], "floop-interchange">, Group<f_Group>,
HelpText<"Enable the loop interchange pass">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
def fno_loop_interchange: Flag<["-"], "fno-loop-interchange">, Group<f_Group>,
HelpText<"Disable the loop interchange pass">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
+defm experimental_loop_fusion
+ : OptInCC1FFlag<"experimental-loop-fusion", "Enable", "Disable",
+ "Enable the loop fusion pass",
+ [ClangOption, FlangOption, FC1Option]>;
def funroll_loops : Flag<["-"], "funroll-loops">, Group<f_Group>,
HelpText<"Turn on loop unroller">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
def fno_unroll_loops : Flag<["-"], "fno-unroll-loops">, Group<f_Group>,
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 3f095c03397fd..8c99af2bdff83 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -896,6 +896,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
PipelineTuningOptions PTO;
PTO.LoopUnrolling = CodeGenOpts.UnrollLoops;
PTO.LoopInterchange = CodeGenOpts.InterchangeLoops;
+ PTO.LoopFusion = CodeGenOpts.FuseLoops;
// For historical reasons, loop interleaving is set to mirror setting for loop
// unrolling.
PTO.LoopInterleaving = CodeGenOpts.UnrollLoops;
@@ -1331,6 +1332,7 @@ runThinLTOBackend(CompilerInstance &CI, ModuleSummaryIndex *CombinedIndex,
Conf.SampleProfile = std::move(SampleProfile);
Conf.PTO.LoopUnrolling = CGOpts.UnrollLoops;
Conf.PTO.LoopInterchange = CGOpts.InterchangeLoops;
+ Conf.PTO.LoopFusion = CGOpts.FuseLoops;
// For historical reasons, loop interleaving is set to mirror setting for loop
// unrolling.
Conf.PTO.LoopInterleaving = CGOpts.UnrollLoops;
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index a72bdedd7317b..c9377e23086c5 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -6861,6 +6861,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
options::OPT_fno_unroll_loops);
Args.AddLastArg(CmdArgs, options::OPT_floop_interchange,
options::OPT_fno_loop_interchange);
+ Args.addOptInFlag(CmdArgs, options::OPT_fexperimental_loop_fusion,
+ options::OPT_fno_experimental_loop_fusion);
Args.AddLastArg(CmdArgs, options::OPT_fstrict_flex_arrays_EQ);
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index 2118c67dedc54..12e510ab1562d 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -156,6 +156,9 @@ void Flang::addCodegenOptions(const ArgList &Args,
!stackArrays->getOption().matches(options::OPT_fno_stack_arrays))
CmdArgs.push_back("-fstack-arrays");
+ Args.addOptInFlag(CmdArgs, options::OPT_fexperimental_loop_fusion,
+ options::OPT_fno_experimental_loop_fusion);
+
handleInterchangeLoopsArgs(Args, CmdArgs);
handleVectorizeLoopsArgs(Args, CmdArgs);
handleVectorizeSLPArgs(Args, CmdArgs);
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 761310813f787..422375240bab6 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1680,6 +1680,9 @@ void CompilerInvocationBase::GenerateCodeGenArgs(const CodeGenOptions &Opts,
else
GenerateArg(Consumer, OPT_fno_loop_interchange);
+ if (Opts.FuseLoops)
+ GenerateArg(Consumer, OPT_fexperimental_loop_fusion);
+
if (!Opts.BinutilsVersion.empty())
GenerateArg(Consumer, OPT_fbinutils_version_EQ, Opts.BinutilsVersion);
@@ -2001,6 +2004,8 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
(Opts.OptimizationLevel > 1));
Opts.InterchangeLoops =
Args.hasFlag(OPT_floop_interchange, OPT_fno_loop_interchange, false);
+ Opts.FuseLoops = Args.hasFlag(OPT_fexperimental_loop_fusion,
+ OPT_fno_experimental_loop_fusion, false);
Opts.BinutilsVersion =
std::string(Args.getLastArgValue(OPT_fbinutils_version_EQ));
diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c
index ee7ded265769b..eb3994ddabcd3 100644
--- a/clang/test/Driver/clang_f_opts.c
+++ b/clang/test/Driver/clang_f_opts.c
@@ -52,6 +52,15 @@
// CHECK-INTERCHANGE-LOOPS: "-floop-interchange"
// CHECK-NO-INTERCHANGE-LOOPS: "-fno-loop-interchange"
+// RUN: %clang -### -S -fexperimental-loop-fusion %s 2>&1 | FileCheck -check-prefix=CHECK-FUSE-LOOPS %s
+// CHECK-FUSE-LOOPS: "-fexperimental-loop-fusion"
+//
+// RUN: %clang -c -fexperimental-loop-fusion -mllvm -print-pipeline-passes -O3 %s 2>&1 | FileCheck --check-prefixes=LOOP-FUSION-ON %s
+// RUN: %clang -c -mllvm -print-pipeline-passes -O3 %s 2>&1 | FileCheck --check-prefixes=LOOP-FUSION-OFF %s
+
+// LOOP-FUSION-ON: loop-fusion
+// LOOP-FUSION-OFF-NOT: loop-fusion
+
// RUN: %clang -### -S -fprofile-sample-accurate %s 2>&1 | FileCheck -check-prefix=CHECK-PROFILE-SAMPLE-ACCURATE %s
// CHECK-PROFILE-SAMPLE-ACCURATE: "-fprofile-sample-accurate"
diff --git a/flang/docs/ReleaseNotes.md b/flang/docs/ReleaseNotes.md
index c9623ea08c4e6..6a285f829053b 100644
--- a/flang/docs/ReleaseNotes.md
+++ b/flang/docs/ReleaseNotes.md
@@ -35,6 +35,8 @@ page](https://llvm.org/releases/).
## New Compiler Flags
+* -fexperimental-loop-fusion is now recognized by flang.
+
## Windows Support
## Fortran Language Changes in Flang
diff --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def
index 22f778c5d3d38..f09159962883f 100644
--- a/flang/include/flang/Frontend/CodeGenOptions.def
+++ b/flang/include/flang/Frontend/CodeGenOptions.def
@@ -43,6 +43,7 @@ CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass)
CODEGENOPT(VectorizeLoop, 1, 0) ///< Enable loop vectorization.
CODEGENOPT(VectorizeSLP, 1, 0) ///< Enable SLP vectorization.
CODEGENOPT(InterchangeLoops, 1, 0) ///< Enable loop interchange.
+CODEGENOPT(FuseLoops, 1, 0) ///< Enable loop fusion.
CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning.
CODEGENOPT(UnrollLoops, 1, 0) ///< Enable loop unrolling
CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index c7dbfc46f432c..a00e568bb4a54 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -280,6 +280,9 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
if (args.getLastArg(clang::driver::options::OPT_floop_interchange))
opts.InterchangeLoops = 1;
+ if (args.getLastArg(clang::driver::options::OPT_fexperimental_loop_fusion))
+ opts.FuseLoops = 1;
+
if (args.getLastArg(clang::driver::options::OPT_vectorize_loops))
opts.VectorizeLoop = 1;
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index 3bef6b1c31825..23cc1e63e773d 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -958,6 +958,7 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) {
si.getTimePasses().setOutStream(ci.getTimingStreamLLVM());
pto.LoopUnrolling = opts.UnrollLoops;
pto.LoopInterchange = opts.InterchangeLoops;
+ pto.LoopFusion = opts.FuseLoops;
pto.LoopInterleaving = opts.UnrollLoops;
pto.LoopVectorization = opts.VectorizeLoop;
pto.SLPVectorization = opts.VectorizeSLP;
diff --git a/flang/test/Driver/loop-fuse.f90 b/flang/test/Driver/loop-fuse.f90
new file mode 100644
index 0000000000000..ddfd9065e0fd4
--- /dev/null
+++ b/flang/test/Driver/loop-fuse.f90
@@ -0,0 +1,17 @@
+! RUN: %flang -### -S -fexperimental-loop-fusion %s 2>&1 | FileCheck -check-prefix=CHECK-LOOP-FUSE %s
+! RUN: %flang -### -S -fno-experimental-loop-fusion %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
+! RUN: %flang -### -S -O0 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
+! RUN: %flang -### -S -O1 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
+! RUN: %flang -### -S -O2 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
+! RUN: %flang -### -S -O3 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
+! RUN: %flang -### -S -Os %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
+! RUN: %flang -### -S -Oz %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
+! CHECK-LOOP-FUSE: "-fexperimental-loop-fusion"
+! CHECK-NO-LOOP-FUSE-NOT: "-fexperimental-loop-fusion"
+! RUN: %flang_fc1 -emit-llvm -O2 -fexperimental-loop-fusion -mllvm -print-pipeline-passes -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-LOOP-FUSE-PASS %s
+! RUN: %flang_fc1 -emit-llvm -O2 -fno-experimental-loop-fusion -mllvm -print-pipeline-passes -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE-PASS %s
+! CHECK-LOOP-FUSE-PASS: loop-fusion
+! CHECK-NO-LOOP-FUSE-PASS-NOT: loop-fusion
+
+program test
+end program
diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index 9cdb7ca7dbc9b..2742ec1b71b7e 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -65,6 +65,9 @@ class PipelineTuningOptions {
/// false.
bool LoopInterchange;
+ /// Tuning option to enable/disable loop fusion. Its default value is false.
+ bool LoopFusion;
+
/// Tuning option to forget all SCEV loops in LoopUnroll. Its default value
/// is that of the flag: `-forget-scev-loop-unroll`.
bool ForgetAllSCEVInLoopUnroll;
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 98821bb1408a7..c3f35f0f5e7fa 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -104,6 +104,7 @@
#include "llvm/Transforms/Scalar/LoopDeletion.h"
#include "llvm/Transforms/Scalar/LoopDistribute.h"
#include "llvm/Transforms/Scalar/LoopFlatten.h"
+#include "llvm/Transforms/Scalar/LoopFuse.h"
#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
#include "llvm/Transforms/Scalar/LoopInterchange.h"
@@ -313,6 +314,7 @@ PipelineTuningOptions::PipelineTuningOptions() {
SLPVectorization = false;
LoopUnrolling = true;
LoopInterchange = EnableLoopInterchange;
+ LoopFusion = false;
ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
LicmMssaOptCap = SetLicmMssaOptCap;
LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
@@ -1551,6 +1553,11 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
OptimizePM.addPass(createFunctionToLoopPassAdaptor(
std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
+ // FIXME: This may not be the right place in the pipeline.
+ // We need to have the data to support the right place.
+ if (PTO.LoopFusion)
+ OptimizePM.addPass(LoopFusePass());
+
// Distribute loops to allow partial vectorization. I.e. isolate dependences
// into separate loop that would otherwise inhibit vectorization. This is
// currently only performed for loops marked with the metadata
@@ -2355,4 +2362,4 @@ AAManager PassBuilder::buildDefaultAAPipeline() {
bool PassBuilder::isInstrumentedPGOUse() const {
return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) ||
!UseCtxProfile.empty();
-}
\ No newline at end of file
+}
diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp
index b9b8929a0f703..0c991b71a6b26 100644
--- a/llvm/tools/opt/NewPMDriver.cpp
+++ b/llvm/tools/opt/NewPMDriver.cpp
@@ -60,6 +60,9 @@ cl::opt<bool> VerifyEachDebugInfoPreserve(
cl::desc("Start each pass with collecting and end it with checking of "
"debug info preservation."));
+static cl::opt<bool> EnableLoopFusion("enable-loopfusion", cl::init(false),
+ cl::Hidden,
+ cl::desc("Enable the LoopFuse Pass"));
cl::opt<std::string>
VerifyDIPreserveExport("verify-di-preserve-export",
cl::desc("Export debug info preservation failures into "
@@ -446,6 +449,7 @@ bool llvm::runPassPipeline(
// option has been enabled.
PTO.LoopUnrolling = !DisableLoopUnrolling;
PTO.UnifiedLTO = UnifiedLTO;
+ PTO.LoopFusion = EnableLoopFusion;
PassBuilder PB(TM, PTO, P, &PIC);
registerEPCallbacks(PB);
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(FWIW, it's helpful to keep the change since last land in a separate commit, otherwise it's hard to tell what exactly changed between the two.)
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/190/builds/27531 Here is the relevant piece of the build log for the reference
|
This PR is a reapplication of #142686