Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions flang/include/flang/Frontend/CodeGenOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ CODEGENOPT(Name, Bits, Default)
#endif

CODEGENOPT(OptimizationLevel, 2, 0) ///< The -O[0-3] option specified.
/// The -Os (==1) or -Oz (==2) option is specified.
CODEGENOPT(OptimizeSize, 2, 0)

CODEGENOPT(DebugPassManager, 1, 0) ///< Prints debug information for the new
///< pass manager.
Expand Down
23 changes: 23 additions & 0 deletions flang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,35 @@ static unsigned getOptimizationLevel(llvm::opt::ArgList &args,

assert(a->getOption().matches(clang::driver::options::OPT_O));

llvm::StringRef s(a->getValue());
if (s == "s" || s == "z")
return 2;

return getLastArgIntValue(args, clang::driver::options::OPT_O, defaultOpt,
diags);
}

return defaultOpt;
}

/// Extracts the size-optimization level from \a args
static unsigned getOptimizationLevelSize(llvm::opt::ArgList &args) {
if (llvm::opt::Arg *a =
args.getLastArg(clang::driver::options::OPT_O_Group)) {
if (a->getOption().matches(clang::driver::options::OPT_O)) {
switch (a->getValue()[0]) {
default:
return 0;
case 's':
return 1;
case 'z':
return 2;
}
}
}
return 0;
}

bool Fortran::frontend::parseDiagnosticArgs(clang::DiagnosticOptions &opts,
llvm::opt::ArgList &args) {
opts.ShowColors = parseShowColorsArgs(args);
Expand Down Expand Up @@ -273,6 +295,7 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
llvm::opt::ArgList &args,
clang::DiagnosticsEngine &diags) {
opts.OptimizationLevel = getOptimizationLevel(args, diags);
opts.OptimizeSize = getOptimizationLevelSize(args);

if (args.hasFlag(clang::driver::options::OPT_fdebug_pass_manager,
clang::driver::options::OPT_fno_debug_pass_manager, false))
Expand Down
11 changes: 10 additions & 1 deletion flang/lib/Frontend/FrontendActions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -602,7 +602,16 @@ mapToLevel(const Fortran::frontend::CodeGenOptions &opts) {
case 1:
return llvm::OptimizationLevel::O1;
case 2:
return llvm::OptimizationLevel::O2;
switch (opts.OptimizeSize) {
default:
llvm_unreachable("Invalid optimization level for size!");
case 0:
return llvm::OptimizationLevel::O2;
case 1:
return llvm::OptimizationLevel::Os;
case 2:
return llvm::OptimizationLevel::Oz;
}
case 3:
return llvm::OptimizationLevel::O3;
}
Expand Down
59 changes: 36 additions & 23 deletions flang/lib/Optimizer/Passes/Pipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ void addCfgConversionPass(mlir::PassManager &pm,

void addAVC(mlir::PassManager &pm, const llvm::OptimizationLevel &optLevel) {
ArrayValueCopyOptions options;
options.optimizeConflicts = optLevel.isOptimizingForSpeed();
options.optimizeConflicts = optLevel != llvm::OptimizationLevel::O0;
addNestedPassConditionally<mlir::func::FuncOp>(
pm, disableFirAvc, [&]() { return createArrayValueCopyPass(options); });
}
Expand Down Expand Up @@ -169,28 +169,32 @@ void registerDefaultInlinerPass(MLIRToLLVMPassPipelineConfig &config) {
/// \param pm - MLIR pass manager that will hold the pipeline definition
void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
MLIRToLLVMPassPipelineConfig &pc) {
llvm::OptimizationLevel optLevel = pc.OptLevel;
unsigned speedupLevel = optLevel.getSpeedupLevel();
unsigned sizeLevel = optLevel.getSizeLevel();

// Early Optimizer EP Callback
pc.invokeFIROptEarlyEPCallbacks(pm, pc.OptLevel);
pc.invokeFIROptEarlyEPCallbacks(pm, optLevel);

// simplify the IR
mlir::GreedyRewriteConfig config;
config.setRegionSimplificationLevel(
mlir::GreedySimplifyRegionLevel::Disabled);
pm.addPass(mlir::createCSEPass());
fir::addAVC(pm, pc.OptLevel);
fir::addAVC(pm, optLevel);
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, fir::createCharacterConversion);
pm.addPass(mlir::createCanonicalizerPass(config));
pm.addPass(fir::createSimplifyRegionLite());
if (pc.OptLevel.isOptimizingForSpeed()) {
if (speedupLevel && !sizeLevel) {
// These passes may increase code size.
pm.addPass(fir::createSimplifyIntrinsics());
pm.addPass(fir::createAlgebraicSimplificationPass(config));
if (enableConstantArgumentGlobalisation)
pm.addPass(fir::createConstantArgumentGlobalisationOpt());
}

if (pc.LoopVersioning)
if (pc.LoopVersioning && !sizeLevel)
pm.addPass(fir::createLoopVersioning());

pm.addPass(mlir::createCSEPass());
Expand All @@ -201,7 +205,7 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
fir::addMemoryAllocationOpt(pm);

// FIR Inliner Callback
pc.invokeFIRInlinerCallback(pm, pc.OptLevel);
pc.invokeFIRInlinerCallback(pm, optLevel);

pm.addPass(fir::createSimplifyRegionLite());
pm.addPass(mlir::createCSEPass());
Expand All @@ -212,13 +216,13 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,

// Optimize redundant array repacking operations,
// if the source is known to be contiguous.
if (pc.OptLevel.isOptimizingForSpeed())
if (speedupLevel)
pm.addPass(fir::createOptimizeArrayRepacking());
pm.addPass(fir::createLowerRepackArraysPass());
// Expand FIR operations that may use SCF dialect for their
// implementation. This is a mandatory pass.
pm.addPass(fir::createSimplifyFIROperations(
{/*preferInlineImplementation=*/pc.OptLevel.isOptimizingForSpeed()}));
bool preferInlineImplementation = speedupLevel && !sizeLevel;
pm.addPass(fir::createSimplifyFIROperations({preferInlineImplementation}));

addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, fir::createStackReclaim);
Expand All @@ -232,11 +236,11 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
pm.addPass(fir::createConvertComplexPow());
pm.addPass(mlir::createCSEPass());

if (pc.OptLevel.isOptimizingForSpeed())
if (speedupLevel)
pm.addPass(fir::createSetRuntimeCallAttributes());

// Last Optimizer EP Callback
pc.invokeFIROptLastEPCallbacks(pm, pc.OptLevel);
pc.invokeFIROptLastEPCallbacks(pm, optLevel);
}

/// Create a pass pipeline for lowering from HLFIR to FIR
Expand All @@ -247,18 +251,24 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
EnableOpenMP enableOpenMP,
llvm::OptimizationLevel optLevel) {
if (optLevel.getSizeLevel() > 0 || optLevel.getSpeedupLevel() > 0) {
// if sizeLevel > 0 (this is the case when either -Os or -Oz is provided on
// the command line), the speedupLevel is guaranteed to be 2.
unsigned speedupLevel = optLevel.getSpeedupLevel();
unsigned sizeLevel = optLevel.getSizeLevel();

if (speedupLevel) {
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createExpressionSimplification);
}
if (optLevel.isOptimizingForSpeed()) {
if (speedupLevel) {
addCanonicalizerPassWithoutRegionSimplification(pm);
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createSimplifyHLFIRIntrinsics);
}
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createInlineElementals);
if (optLevel.isOptimizingForSpeed()) {
if (!sizeLevel)
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createInlineElementals);
if (speedupLevel) {
addCanonicalizerPassWithoutRegionSimplification(pm);
pm.addPass(mlir::createCSEPass());
// Run SimplifyHLFIRIntrinsics pass late after CSE,
Expand All @@ -271,8 +281,9 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
pm, hlfir::createPropagateFortranVariableAttributes);
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createOptimizedBufferization);
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createInlineHLFIRAssign);
if (!sizeLevel)
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createInlineHLFIRAssign);

if (optLevel == llvm::OptimizationLevel::O3) {
addNestedPassToAllTopLevelOperations<PassConstructor>(
Expand All @@ -287,7 +298,7 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
// from hlfir.elemental lowering, if the result is an empty array.
// This helps to avoid long running loops for elementals with
// shapes like (0, HUGE).
if (optLevel.isOptimizingForSpeed())
if (speedupLevel)
bufferizeOptions.optimizeEmptyElementals = true;
pm.addPass(hlfir::createBufferizeHLFIR(bufferizeOptions));
// Run hlfir.assign inlining again after BufferizeHLFIR,
Expand All @@ -297,7 +308,7 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
// TODO: we can remove the previous InlineHLFIRAssign, when
// FIR AliasAnalysis is good enough to say that a temporary
// array does not alias with any user object.
if (optLevel.isOptimizingForSpeed())
if (speedupLevel && !sizeLevel)
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createInlineHLFIRAssign);
pm.addPass(hlfir::createConvertHLFIRtoFIR());
Expand Down Expand Up @@ -354,10 +365,12 @@ void createDebugPasses(mlir::PassManager &pm,
void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm,
MLIRToLLVMPassPipelineConfig config,
llvm::StringRef inputFilename) {
unsigned speedupLevel = config.OptLevel.getSpeedupLevel();

pm.addPass(fir::createMIFOpConversion());
fir::addBoxedProcedurePass(pm);
if (config.OptLevel.isOptimizingForSpeed() && config.AliasAnalysis &&
!disableFirAliasTags && !useOldAliasTags)
if (speedupLevel && config.AliasAnalysis && !disableFirAliasTags &&
!useOldAliasTags)
pm.addPass(fir::createAddAliasTags());
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, fir::createAbstractResultOpt);
Expand Down Expand Up @@ -389,7 +402,7 @@ void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm,
// TODO: re-enable setNoAlias by default (when optimizing for speed) once
// function specialization is fixed.
bool setNoAlias = forceNoAlias;
bool setNoCapture = config.OptLevel.isOptimizingForSpeed();
bool setNoCapture = speedupLevel;

pm.addPass(fir::createFunctionAttr(
{framePointerKind, config.InstrumentFunctionEntry,
Expand Down
14 changes: 13 additions & 1 deletion flang/test/Driver/default-optimization-pipelines.f90
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,16 @@
! RUN: %flang_fc1 -S -O2 %s -flto=full -fdebug-pass-manager -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-O2-LTO
! RUN: %flang_fc1 -S -O2 %s -flto=thin -fdebug-pass-manager -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-O2-THINLTO

! Verify that only the left-most `-O{n}` is used
! Verify that only the right-most `-O{n}` is used
! RUN: %flang -S -O2 -O0 %s -Xflang -fdebug-pass-manager -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-O0
! RUN: %flang_fc1 -S -O2 -O0 %s -fdebug-pass-manager -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-O0

! Verify that passing -Os/-Oz have the desired effect on the pass pipelines.
! RUN: %flang -S -Os %s -Xflang -fdebug-pass-manager -o /dev/null 2>&1 \
! RUN: | FileCheck %s --check-prefix=CHECK-OSIZE
! RUN: %flang -S -Oz %s -Xflang -fdebug-pass-manager -o /dev/null 2>&1 \
! RUN: | FileCheck %s --check-prefix=CHECK-OSIZE

! CHECK-O0-NOT: Running pass: SimplifyCFGPass on simple_loop_
! CHECK-O0: Running analysis: TargetLibraryAnalysis on simple_loop_
! CHECK-O0-ANYLTO: Running pass: CanonicalizeAliasesPass on [module]
Expand All @@ -33,6 +39,12 @@
! CHECK-O2-THINLTO: Running pass: CanonicalizeAliasesPass on [module]
! CHECK-O2-THINLTO: Running pass: NameAnonGlobalPass on [module]

! -Os/-Oz imply -O2, so check that a pass that runs on O2 is run. Then check
! that passes like LibShrinkWrap, that should not be run when optimizing for
! size, are not run (see llvm/lib/Passes/PassBuilderPipelines.cpp).
! CHECK-OSIZE: Running pass: SimplifyCFGPass on simple_loop_
! CHECK-OSIZE-NOT: Running pass: LibCallsShrinkWrapPass on simple_loop_

subroutine simple_loop
integer :: i
do i=1,5
Expand Down
Loading