diff --git a/OmaxLTO.cfg b/OmaxLTO.cfg index 5c029b91..f7a11a59 100644 --- a/OmaxLTO.cfg +++ b/OmaxLTO.cfg @@ -1,3 +1,4 @@ -flto=full \ -fvirtual-function-elimination \ --fwhole-program-vtables +-fwhole-program-vtables \ +-mllvm -extra-LTO-loop-unroll=true diff --git a/README.md b/README.md index 32e7afc1..ad7e865a 100644 --- a/README.md +++ b/README.md @@ -172,6 +172,7 @@ and/or increased memory usage during linking. Some of the options in the config corresponding optimisation passes in the [LLVM project](https://github.com/llvm/llvm-project) to find out more. Users are also encouraged to create their own configs and tune their own flag parameters. +Information on LLVM Embedded Toolchain for Arm specific optimization flags is available in [Optimization Flags](https://github.com/ARM-software/LLVM-embedded-toolchain-for-Arm/blob/main/docs/optimization-flags.md) Binary releases of the LLVM Embedded Toolchain for Arm are based on release branches of the upstream LLVM Project, thus can safely be used with all tools diff --git a/docs/optimization-flags.md b/docs/optimization-flags.md new file mode 100644 index 00000000..f67f6412 --- /dev/null +++ b/docs/optimization-flags.md @@ -0,0 +1,9 @@ +Additional optimization flags +============================= + +## Additional loop unroll in the LTO pipeline +In some cases it is benefitial to perform an additional loop unroll pass so that extra information becomes available to later passes, e.g. SROA. +Use cases where this could be beneficial - multiple (N>=4) nested loops. + +### Usage: + -mllvm -extra-LTO-loop-unroll=true/false diff --git a/patches/llvm-project-perf/0000-Placeholder-commit.patch b/patches/llvm-project-perf/0000-Placeholder-commit.patch deleted file mode 100644 index ff8059cb..00000000 --- a/patches/llvm-project-perf/0000-Placeholder-commit.patch +++ /dev/null @@ -1,22 +0,0 @@ -From e79697a54cba9bfc1a755ed048e42054d679de61 Mon Sep 17 00:00:00 2001 -From: David Candler -Date: Wed, 2 Oct 2024 14:13:31 +0100 -Subject: [PATCH] Placeholder commit - ---- - .gitignore | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/.gitignore b/.gitignore -index 0e7c6c790013..dfa0b8da0ccd 100644 ---- a/.gitignore -+++ b/.gitignore -@@ -1,4 +1,4 @@ --#==============================================================================# -+#==============================================================================# - # This file specifies intentionally untracked files that git should ignore. - # See: http://www.kernel.org/pub/software/scm/git/docs/gitignore.html - # --- -2.34.1 - diff --git a/patches/llvm-project-perf/0001-LTOpasses-add-loop-unroll.patch b/patches/llvm-project-perf/0001-LTOpasses-add-loop-unroll.patch new file mode 100644 index 00000000..88c68109 --- /dev/null +++ b/patches/llvm-project-perf/0001-LTOpasses-add-loop-unroll.patch @@ -0,0 +1,55 @@ +From 4adfc5231d2c0182d6278b4aa75eec57648e5dd4 Mon Sep 17 00:00:00 2001 +From: Vladi Krapp +Date: Tue, 3 Sep 2024 14:12:48 +0100 +Subject: [Pipelines] Additional unrolling in LTO + +Some workloads require specific sequences of events to happen +to fully simplify. This adds an extra full unrolling pass to help these +cases on the cores with branch predictors. It helps produce simplified +loops, which can then be SROA'd allowing further simplification, which +can be important for performance. +Feature adds extra compile time to get extra performance and +is enabled by the opt flag 'extra-LTO-loop-unroll' (off by default). + +Original patch by David Green (david.green@arm.com) +--- + llvm/lib/Passes/PassBuilderPipelines.cpp | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp +index 1184123c7710..6dc45d85927a 100644 +--- a/llvm/lib/Passes/PassBuilderPipelines.cpp ++++ b/llvm/lib/Passes/PassBuilderPipelines.cpp +@@ -332,6 +332,10 @@ namespace llvm { + extern cl::opt MaxDevirtIterations; + } // namespace llvm + ++static cl::opt LTOExtraLoopUnroll( ++ "extra-LTO-loop-unroll", cl::init(false), cl::Hidden, ++ cl::desc("Perform extra loop unrolling pass to assist SROA")); ++ + void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM, + OptimizationLevel Level) { + for (auto &C : PeepholeEPCallbacks) +@@ -1940,6 +1944,18 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass())); + + FunctionPassManager FPM; ++ ++ if (LTOExtraLoopUnroll) { ++ LoopPassManager OmaxLPM; ++ OmaxLPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), ++ /* OnlyWhenForced= */ !PTO.LoopUnrolling, ++ PTO.ForgetAllSCEVInLoopUnroll)); ++ FPM.addPass( ++ createFunctionToLoopPassAdaptor(std::move(OmaxLPM), ++ /*UseMemorySSA=*/false, ++ /*UseBlockFrequencyInfo=*/true)); ++ } ++ + // The IPO Passes may leave cruft around. Clean up after them. + FPM.addPass(InstCombinePass()); + invokePeepholeEPCallbacks(FPM, Level); +-- +2.34.1 +