Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/include/clang/Basic/CodeGenOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@ CODEGENOPT(TimeTrace , 1, 0, Benign) ///< Set when -ftime-trace is enabl
VALUE_CODEGENOPT(TimeTraceGranularity, 32, 500, Benign) ///< Minimum time granularity (in microseconds),
///< traced by time profiler
CODEGENOPT(InterchangeLoops , 1, 0, Benign) ///< Run loop-interchange.
CODEGENOPT(FuseLoops , 1, 0, Benign) ///< Run loop-fusion.
CODEGENOPT(UnrollLoops , 1, 0, Benign) ///< Control whether loops are unrolled.
CODEGENOPT(RerollLoops , 1, 0, Benign) ///< Control whether loops are rerolled.
CODEGENOPT(NoUseJumpTables , 1, 0, Benign) ///< Set when -fno-jump-tables is enabled.
Expand Down
4 changes: 4 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -4304,6 +4304,10 @@ def floop_interchange : Flag<["-"], "floop-interchange">, Group<f_Group>,
HelpText<"Enable the loop interchange pass">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
def fno_loop_interchange: Flag<["-"], "fno-loop-interchange">, Group<f_Group>,
HelpText<"Disable the loop interchange pass">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
defm experimental_loop_fusion
: OptInCC1FFlag<"experimental-loop-fusion", "Enable", "Disable",
"Enable the loop fusion pass",
[ClangOption, FlangOption, FC1Option]>;
def funroll_loops : Flag<["-"], "funroll-loops">, Group<f_Group>,
HelpText<"Turn on loop unroller">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
def fno_unroll_loops : Flag<["-"], "fno-unroll-loops">, Group<f_Group>,
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/CodeGen/BackendUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -896,6 +896,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
PipelineTuningOptions PTO;
PTO.LoopUnrolling = CodeGenOpts.UnrollLoops;
PTO.LoopInterchange = CodeGenOpts.InterchangeLoops;
PTO.LoopFusion = CodeGenOpts.FuseLoops;
// For historical reasons, loop interleaving is set to mirror setting for loop
// unrolling.
PTO.LoopInterleaving = CodeGenOpts.UnrollLoops;
Expand Down Expand Up @@ -1331,6 +1332,7 @@ runThinLTOBackend(CompilerInstance &CI, ModuleSummaryIndex *CombinedIndex,
Conf.SampleProfile = std::move(SampleProfile);
Conf.PTO.LoopUnrolling = CGOpts.UnrollLoops;
Conf.PTO.LoopInterchange = CGOpts.InterchangeLoops;
Conf.PTO.LoopFusion = CGOpts.FuseLoops;
// For historical reasons, loop interleaving is set to mirror setting for loop
// unrolling.
Conf.PTO.LoopInterleaving = CGOpts.UnrollLoops;
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6854,6 +6854,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
options::OPT_fno_unroll_loops);
Args.AddLastArg(CmdArgs, options::OPT_floop_interchange,
options::OPT_fno_loop_interchange);
Args.addOptInFlag(CmdArgs, options::OPT_fexperimental_loop_fusion,
options::OPT_fno_experimental_loop_fusion);

Args.AddLastArg(CmdArgs, options::OPT_fstrict_flex_arrays_EQ);

Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Driver/ToolChains/Flang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,9 @@ void Flang::addCodegenOptions(const ArgList &Args,
!stackArrays->getOption().matches(options::OPT_fno_stack_arrays))
CmdArgs.push_back("-fstack-arrays");

Args.addOptInFlag(CmdArgs, options::OPT_fexperimental_loop_fusion,
options::OPT_fno_experimental_loop_fusion);

handleInterchangeLoopsArgs(Args, CmdArgs);
handleVectorizeLoopsArgs(Args, CmdArgs);
handleVectorizeSLPArgs(Args, CmdArgs);
Expand Down
5 changes: 5 additions & 0 deletions clang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1680,6 +1680,9 @@ void CompilerInvocationBase::GenerateCodeGenArgs(const CodeGenOptions &Opts,
else
GenerateArg(Consumer, OPT_fno_loop_interchange);

if (Opts.FuseLoops)
GenerateArg(Consumer, OPT_fexperimental_loop_fusion);

if (!Opts.BinutilsVersion.empty())
GenerateArg(Consumer, OPT_fbinutils_version_EQ, Opts.BinutilsVersion);

Expand Down Expand Up @@ -2001,6 +2004,8 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
(Opts.OptimizationLevel > 1));
Opts.InterchangeLoops =
Args.hasFlag(OPT_floop_interchange, OPT_fno_loop_interchange, false);
Opts.FuseLoops = Args.hasFlag(OPT_fexperimental_loop_fusion,
OPT_fno_experimental_loop_fusion, false);
Opts.BinutilsVersion =
std::string(Args.getLastArgValue(OPT_fbinutils_version_EQ));

Expand Down
9 changes: 9 additions & 0 deletions clang/test/Driver/clang_f_opts.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,15 @@
// CHECK-INTERCHANGE-LOOPS: "-floop-interchange"
// CHECK-NO-INTERCHANGE-LOOPS: "-fno-loop-interchange"

// RUN: %clang -### -S -fexperimental-loop-fusion %s 2>&1 | FileCheck -check-prefix=CHECK-FUSE-LOOPS %s
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should check that -fno-experimental-loop-fusion is also recognized by clang.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is usually done or necessary. I checked other options, which are introduced by addOptInFlag, namely funique_internal_linkage_names, fseparate_named_sections, fatomic_ignore_denormal_mode, fsplit_stack, and many others, and I don't see any precedence of having a test which checks if the negative options are recognized.

Do we want to make an exception for this?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's fine not to test -fno-experimental-loop-fusion. I also don't mind if the author adds it and adds a CHECK-NOT.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was partly basing this on the fact that the loop-interchange above does check for both, but if I have not checked if that uses addOptInFlag. If there is precedence for not testing it, that's fine too.

// CHECK-FUSE-LOOPS: "-fexperimental-loop-fusion"
//
// RUN: %clang -c -fexperimental-loop-fusion -mllvm -print-pipeline-passes -O3 %s 2>&1 | FileCheck --check-prefixes=LOOP-FUSION-ON %s
// RUN: %clang -c -mllvm -print-pipeline-passes -O3 %s 2>&1 | FileCheck --check-prefixes=LOOP-FUSION-OFF %s

// LOOP-FUSION-ON: loop-fusion
// LOOP-FUSION-OFF-NOT: loop-fusion

// RUN: %clang -### -S -fprofile-sample-accurate %s 2>&1 | FileCheck -check-prefix=CHECK-PROFILE-SAMPLE-ACCURATE %s
// CHECK-PROFILE-SAMPLE-ACCURATE: "-fprofile-sample-accurate"

Expand Down
2 changes: 2 additions & 0 deletions flang/docs/ReleaseNotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ page](https://llvm.org/releases/).

## New Compiler Flags

* -fexperimental-loop-fusion is now recognized by flang.

## Windows Support

## Fortran Language Changes in Flang
Expand Down
1 change: 1 addition & 0 deletions flang/include/flang/Frontend/CodeGenOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass)
CODEGENOPT(VectorizeLoop, 1, 0) ///< Enable loop vectorization.
CODEGENOPT(VectorizeSLP, 1, 0) ///< Enable SLP vectorization.
CODEGENOPT(InterchangeLoops, 1, 0) ///< Enable loop interchange.
CODEGENOPT(FuseLoops, 1, 0) ///< Enable loop fusion.
CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning.
CODEGENOPT(UnrollLoops, 1, 0) ///< Enable loop unrolling
CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass
Expand Down
3 changes: 3 additions & 0 deletions flang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,9 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
if (args.getLastArg(clang::driver::options::OPT_floop_interchange))
opts.InterchangeLoops = 1;

if (args.getLastArg(clang::driver::options::OPT_fexperimental_loop_fusion))
opts.FuseLoops = 1;

if (args.getLastArg(clang::driver::options::OPT_vectorize_loops))
opts.VectorizeLoop = 1;

Expand Down
1 change: 1 addition & 0 deletions flang/lib/Frontend/FrontendActions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -958,6 +958,7 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) {
si.getTimePasses().setOutStream(ci.getTimingStreamLLVM());
pto.LoopUnrolling = opts.UnrollLoops;
pto.LoopInterchange = opts.InterchangeLoops;
pto.LoopFusion = opts.FuseLoops;
pto.LoopInterleaving = opts.UnrollLoops;
pto.LoopVectorization = opts.VectorizeLoop;
pto.SLPVectorization = opts.VectorizeSLP;
Expand Down
17 changes: 17 additions & 0 deletions flang/test/Driver/loop-fuse.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
! RUN: %flang -### -S -fexperimental-loop-fusion %s 2>&1 | FileCheck -check-prefix=CHECK-LOOP-FUSE %s
! RUN: %flang -### -S -fno-experimental-loop-fusion %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
! RUN: %flang -### -S -O0 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
! RUN: %flang -### -S -O1 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
! RUN: %flang -### -S -O2 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
! RUN: %flang -### -S -O3 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
! RUN: %flang -### -S -Os %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
! RUN: %flang -### -S -Oz %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
! CHECK-LOOP-FUSE: "-fexperimental-loop-fusion"
! CHECK-NO-LOOP-FUSE-NOT: "-fexperimental-loop-fusion"
! RUN: %flang_fc1 -emit-llvm -O2 -fexperimental-loop-fusion -mllvm -print-pipeline-passes -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-LOOP-FUSE-PASS %s
! RUN: %flang_fc1 -emit-llvm -O2 -fno-experimental-loop-fusion -mllvm -print-pipeline-passes -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE-PASS %s
! CHECK-LOOP-FUSE-PASS: loop-fusion
! CHECK-NO-LOOP-FUSE-PASS-NOT: loop-fusion

program test
end program
3 changes: 3 additions & 0 deletions llvm/include/llvm/Passes/PassBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ class PipelineTuningOptions {
/// false.
bool LoopInterchange;

/// Tuning option to enable/disable loop fusion. Its default value is false.
bool LoopFusion;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I can tell, this default is never actually set anywhere. There is no assignment in the PipelineTuningOptions ctor. (Probably that ctor should be replaced with direct struct member initialization...)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In ThinLTOCodeGenerator.cpp, I need to do PTO.LoopFusion = False. Such change is also needed in mlir/lib/ExecutionEngine/OptUtils.cpp.

This is required at each site which is using PipelineTuningOptions. For LoopInterchange and others, initialization occurs in PassBuilderPipelines.cpp, but for LoopFusion, I can't do this because the definition of the LoopFusion variable is in NewPMDriver.cpp, as suggested by @kasuga-fj.

I am inclined to follow the way LoopInterchange is initialized (and thus move the definition of LoopFusion from NewPMDriver.cpp to PassBuilderPipelines.cpp)

What do you think?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is required at each site which is using PipelineTuningOptions. For LoopInterchange and others, initialization occurs in PassBuilderPipelines.cpp, but for LoopFusion, I can't do this because the definition of the LoopFusion variable is in NewPMDriver.cpp, as suggested by @kasuga-fj.

Why can't you do the initialization to false in PassBuilderPipelines? Isn't this just the default value that later gets overwritten by the code in NewPMDriver?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that is another option. #158844. Can you please have a quick look?


/// Tuning option to forget all SCEV loops in LoopUnroll. Its default value
/// is that of the flag: `-forget-scev-loop-unroll`.
bool ForgetAllSCEVInLoopUnroll;
Expand Down
8 changes: 7 additions & 1 deletion llvm/lib/Passes/PassBuilderPipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@
#include "llvm/Transforms/Scalar/LoopDeletion.h"
#include "llvm/Transforms/Scalar/LoopDistribute.h"
#include "llvm/Transforms/Scalar/LoopFlatten.h"
#include "llvm/Transforms/Scalar/LoopFuse.h"
#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
#include "llvm/Transforms/Scalar/LoopInterchange.h"
Expand Down Expand Up @@ -1551,6 +1552,11 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
OptimizePM.addPass(createFunctionToLoopPassAdaptor(
std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));

// FIXME: This may not be the right place in the pipeline.
// We need to have the data to support the right place.
if (PTO.LoopFusion)
OptimizePM.addPass(LoopFusePass());

// Distribute loops to allow partial vectorization. I.e. isolate dependences
// into separate loop that would otherwise inhibit vectorization. This is
// currently only performed for loops marked with the metadata
Expand Down Expand Up @@ -2355,4 +2361,4 @@ AAManager PassBuilder::buildDefaultAAPipeline() {
bool PassBuilder::isInstrumentedPGOUse() const {
return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) ||
!UseCtxProfile.empty();
}
}
4 changes: 4 additions & 0 deletions llvm/tools/opt/NewPMDriver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ cl::opt<bool> VerifyEachDebugInfoPreserve(
cl::desc("Start each pass with collecting and end it with checking of "
"debug info preservation."));

static cl::opt<bool> EnableLoopFusion("enable-loopfusion", cl::init(false),
cl::Hidden,
cl::desc("Enable the LoopFuse Pass"));
cl::opt<std::string>
VerifyDIPreserveExport("verify-di-preserve-export",
cl::desc("Export debug info preservation failures into "
Expand Down Expand Up @@ -446,6 +449,7 @@ bool llvm::runPassPipeline(
// option has been enabled.
PTO.LoopUnrolling = !DisableLoopUnrolling;
PTO.UnifiedLTO = UnifiedLTO;
PTO.LoopFusion = EnableLoopFusion;
PassBuilder PB(TM, PTO, P, &PIC);
registerEPCallbacks(PB);

Expand Down
Loading