|
21 | 21 | #include "llvm/Support/LineIterator.h" |
22 | 22 | #include "llvm/Support/Path.h" |
23 | 23 | #include "llvm/Support/Process.h" |
| 24 | +#include "llvm/Support/Threading.h" |
24 | 25 | #include "llvm/Support/VirtualFileSystem.h" |
25 | 26 | #include "llvm/TargetParser/Host.h" |
26 | 27 | #include <optional> |
@@ -630,8 +631,11 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, |
630 | 631 | getToolChain().AddFilePathLibArgs(Args, CmdArgs); |
631 | 632 | AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); |
632 | 633 | if (C.getDriver().isUsingLTO()) { |
633 | | - addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0], |
634 | | - C.getDriver().getLTOMode() == LTOK_Thin); |
| 634 | + const bool ThinLTO = (C.getDriver().getLTOMode() == LTOK_Thin); |
| 635 | + addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0], ThinLTO); |
| 636 | + |
| 637 | + if (!ThinLTO) |
| 638 | + addFullLTOPartitionOption(C.getDriver(), Args, CmdArgs); |
635 | 639 | } else if (Args.hasArg(options::OPT_mcpu_EQ)) { |
636 | 640 | CmdArgs.push_back(Args.MakeArgString( |
637 | 641 | "-plugin-opt=mcpu=" + |
@@ -708,6 +712,34 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D, |
708 | 712 | options::OPT_m_amdgpu_Features_Group); |
709 | 713 | } |
710 | 714 |
|
| 715 | +static unsigned GetFullLTOPartitions(const Driver &D, const ArgList &Args) { |
| 716 | + const Arg *A = Args.getLastArg(options::OPT_flto_partitions_EQ); |
| 717 | + // In the absence of an option, use the number of available threads with a cap |
| 718 | + // at 16 partitions. More than 16 partitions rarely benefits code splitting |
| 719 | + // and can lead to more empty/small modules each with their own overhead. |
| 720 | + if (!A) |
| 721 | + return std::max(16u, llvm::hardware_concurrency().compute_thread_count()); |
| 722 | + int Value; |
| 723 | + if (StringRef(A->getValue()).getAsInteger(10, Value) || (Value < 1)) { |
| 724 | + D.Diag(diag::err_drv_invalid_int_value) |
| 725 | + << A->getAsString(Args) << A->getValue(); |
| 726 | + return 1; |
| 727 | + } |
| 728 | + |
| 729 | + return Value; |
| 730 | +} |
| 731 | + |
| 732 | +void amdgpu::addFullLTOPartitionOption(const Driver &D, |
| 733 | + const llvm::opt::ArgList &Args, |
| 734 | + llvm::opt::ArgStringList &CmdArgs) { |
| 735 | + // TODO: restrict to gpu-rdc only? |
| 736 | + |
| 737 | + if (unsigned NumParts = GetFullLTOPartitions(D, Args); NumParts > 1) { |
| 738 | + CmdArgs.push_back( |
| 739 | + Args.MakeArgString("--lto-partitions=" + std::to_string(NumParts))); |
| 740 | + } |
| 741 | +} |
| 742 | + |
711 | 743 | /// AMDGPU Toolchain |
712 | 744 | AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple, |
713 | 745 | const ArgList &Args) |
|
0 commit comments