Skip to content

Commit 3a5421c

Browse files
author
Salinas, David
authored
(cherry-pick) Fixes and Improvements for -flto-partitions option (llvm#1416) (llvm#1890)
2 parents 70c627c + 9a98832 commit 3a5421c

File tree

8 files changed

+75
-98
lines changed

8 files changed

+75
-98
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1480,8 +1480,6 @@ def fhip_emit_relocatable : Flag<["-"], "fhip-emit-relocatable">,
14801480
HelpText<"Compile HIP source to relocatable">;
14811481
def fno_hip_emit_relocatable : Flag<["-"], "fno-hip-emit-relocatable">,
14821482
HelpText<"Do not override toolchain to compile HIP source to relocatable">;
1483-
def flto_partitions_EQ : Joined<["--"], "flto-partitions=">, Group<hip_Group>,
1484-
HelpText<"Number of partitions to use for parallel full LTO codegen. Use 1 to disable partitioning.">;
14851483
}
14861484

14871485
// Clang specific/exclusive options for OpenACC.
@@ -3098,6 +3096,8 @@ defm fat_lto_objects : BoolFOption<"fat-lto-objects",
30983096
PosFlag<SetTrue, [], [ClangOption, CC1Option], "Enable">,
30993097
NegFlag<SetFalse, [], [ClangOption, CC1Option], "Disable">,
31003098
BothFlags<[], [ClangOption, CC1Option], " fat LTO object support">>;
3099+
def flto_partitions_EQ : Joined<["-"], "flto-partitions=">, Group<f_Group>,
3100+
HelpText<"Number of partitions to use for parallel full LTO codegen, ld.lld only.">;
31013101
def fmacro_backtrace_limit_EQ : Joined<["-"], "fmacro-backtrace-limit=">,
31023102
Group<f_Group>, Visibility<[ClangOption, CC1Option, CLOption]>,
31033103
HelpText<"Set the maximum number of entries to print in a macro expansion backtrace (0 = no limit)">,

clang/lib/Driver/ToolChains/AMDGPU.cpp

Lines changed: 26 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -626,42 +626,17 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
626626
CmdArgs.push_back("-shared");
627627
}
628628

629-
addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
630-
Args.AddAllArgs(CmdArgs, options::OPT_L);
631-
getToolChain().AddFilePathLibArgs(Args, CmdArgs);
632-
AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
633629
if (C.getDriver().isUsingLTO()) {
634630
const bool ThinLTO = (C.getDriver().getLTOMode() == LTOK_Thin);
635631
addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0], ThinLTO);
636-
637-
if (!ThinLTO)
638-
addFullLTOPartitionOption(C.getDriver(), Args, CmdArgs);
639-
} else if (Args.hasArg(options::OPT_mcpu_EQ)) {
632+
} else if (Args.hasArg(options::OPT_mcpu_EQ))
640633
CmdArgs.push_back(Args.MakeArgString(
641-
"-plugin-opt=mcpu=" +
642-
getProcessorFromTargetID(getToolChain().getTriple(),
643-
Args.getLastArgValue(options::OPT_mcpu_EQ))));
644-
}
634+
"-plugin-opt=mcpu=" + Args.getLastArgValue(options::OPT_mcpu_EQ)));
645635

646-
// Always pass the target-id features to the LTO job.
647-
std::vector<StringRef> Features;
648-
getAMDGPUTargetFeatures(C.getDriver(), getToolChain().getTriple(), Args,
649-
Features);
650-
if (!Features.empty()) {
651-
CmdArgs.push_back(
652-
Args.MakeArgString("-plugin-opt=-mattr=" + llvm::join(Features, ",")));
653-
}
654-
655-
if (Args.hasArg(options::OPT_stdlib))
656-
CmdArgs.append({"-lc", "-lm"});
657-
if (Args.hasArg(options::OPT_startfiles)) {
658-
std::optional<std::string> IncludePath = getToolChain().getStdlibPath();
659-
if (!IncludePath)
660-
IncludePath = "/lib";
661-
SmallString<128> P(*IncludePath);
662-
llvm::sys::path::append(P, "crt1.o");
663-
CmdArgs.push_back(Args.MakeArgString(P));
664-
}
636+
addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
637+
Args.AddAllArgs(CmdArgs, options::OPT_L);
638+
getToolChain().AddFilePathLibArgs(Args, CmdArgs);
639+
AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
665640

666641
CmdArgs.push_back("-o");
667642
CmdArgs.push_back(Output.getFilename());
@@ -670,6 +645,26 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
670645
CmdArgs, Inputs, Output));
671646
}
672647

648+
static unsigned getFullLTOPartitions(const Driver &D, const ArgList &Args) {
649+
int Value = 0;
650+
StringRef A = Args.getLastArgValue(options::OPT_flto_partitions_EQ, "8");
651+
if (A.getAsInteger(10, Value) || (Value < 1)) {
652+
Arg *Arg = Args.getLastArg(options::OPT_flto_partitions_EQ);
653+
D.Diag(diag::err_drv_invalid_int_value)
654+
<< Arg->getAsString(Args) << Arg->getValue();
655+
return 1;
656+
}
657+
658+
return Value;
659+
}
660+
661+
void amdgpu::addFullLTOPartitionOption(const Driver &D,
662+
const llvm::opt::ArgList &Args,
663+
llvm::opt::ArgStringList &CmdArgs) {
664+
CmdArgs.push_back(Args.MakeArgString("--lto-partitions=" +
665+
Twine(getFullLTOPartitions(D, Args))));
666+
}
667+
673668
void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
674669
const llvm::Triple &Triple,
675670
const llvm::opt::ArgList &Args,
@@ -763,33 +758,6 @@ llvm::SmallVector<std::string, 12> amdgpu::dlr::getCommonDeviceLibNames(
763758
FastRelaxedMath, CorrectSqrt, ABIVer, isOpenMP);
764759
}
765760

766-
static unsigned getFullLTOPartitions(const Driver &D, const ArgList &Args) {
767-
const Arg *A = Args.getLastArg(options::OPT_flto_partitions_EQ);
768-
// In the absence of an option, use 8 as the default.
769-
if (!A)
770-
return 8;
771-
int Value = 0;
772-
if (StringRef(A->getValue()).getAsInteger(10, Value) || (Value < 1)) {
773-
D.Diag(diag::err_drv_invalid_int_value)
774-
<< A->getAsString(Args) << A->getValue();
775-
return 1;
776-
}
777-
778-
return Value;
779-
}
780-
781-
void amdgpu::addFullLTOPartitionOption(const Driver &D,
782-
const llvm::opt::ArgList &Args,
783-
llvm::opt::ArgStringList &CmdArgs) {
784-
// TODO: Should this be restricted to fgpu-rdc only ? Currently we'll
785-
// also do it for non gpu-rdc LTO
786-
787-
if (unsigned NumParts = getFullLTOPartitions(D, Args); NumParts > 1) {
788-
CmdArgs.push_back(
789-
Args.MakeArgString("--lto-partitions=" + Twine(NumParts)));
790-
}
791-
}
792-
793761
/// AMDGPU Toolchain
794762
AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple,
795763
const ArgList &Args)

clang/lib/Driver/ToolChains/CommonArgs.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -907,6 +907,17 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args,
907907
// files
908908
if (IsFatLTO)
909909
CmdArgs.push_back("--fat-lto-objects");
910+
911+
if (Args.hasArg(options::OPT_flto_partitions_EQ)) {
912+
int Value = 0;
913+
StringRef A = Args.getLastArgValue(options::OPT_flto_partitions_EQ, "8");
914+
if (A.getAsInteger(10, Value) || (Value < 1)) {
915+
Arg *Arg = Args.getLastArg(options::OPT_flto_partitions_EQ);
916+
D.Diag(diag::err_drv_invalid_int_value)
917+
<< Arg->getAsString(Args) << Arg->getValue();
918+
}
919+
CmdArgs.push_back(Args.MakeArgString("--lto-partitions=" + A));
920+
}
910921
}
911922

912923
const char *PluginOptPrefix = IsOSAIX ? "-bplugin_opt:" : "-plugin-opt=";

clang/lib/Driver/ToolChains/HIPAMD.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,6 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
123123

124124
addLinkerCompressDebugSectionsOption(TC, Args, LldArgs);
125125

126-
amdgpu::addFullLTOPartitionOption(D, Args, LldArgs);
127-
128126
// Given that host and device linking happen in separate processes, the device
129127
// linker doesn't always have the visibility as to which device symbols are
130128
// needed by a program, especially for the device symbol dependencies that are
@@ -317,6 +315,10 @@ HIPAMDToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
317315
checkTargetID(*DAL);
318316
}
319317

318+
if (!Args.hasArg(options::OPT_flto_partitions_EQ))
319+
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_flto_partitions_EQ),
320+
"8");
321+
320322
return DAL;
321323
}
322324

clang/test/Driver/amdgpu-toolchain.c

Lines changed: 5 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -18,37 +18,15 @@
1818
// AS_LINK_UR: "-cc1as"
1919
// AS_LINK_UR: ld.lld{{.*}} "--no-undefined"{{.*}} "--unresolved-symbols=ignore-all"
2020

21-
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+:sramecc- -nogpulib \
21+
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
2222
// RUN: -L. -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s
23-
// LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions"
24-
// LTO: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"--lto-partitions={{[0-9]+}}"{{.*}}"-plugin-opt=-mattr=-sramecc,+xnack"
23+
// LTO: clang{{.*}}"-flto=full"{{.*}}"-fconvergent-functions"
24+
// LTO: ld.lld{{.*}}"-plugin-opt=mcpu=gfx906"{{.*}}"{{.*}}
2525

26-
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+:sramecc- -nogpulib \
26+
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
2727
// RUN: -L. -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=MCPU %s
28-
// MCPU: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"-plugin-opt=-mattr=-sramecc,+xnack"
28+
// MCPU: ld.lld{{.*}}"-plugin-opt=mcpu=gfx906"{{.*}}
2929

3030
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
3131
// RUN: -fuse-ld=ld %s 2>&1 | FileCheck -check-prefixes=LD %s
3232
// LD: ld.lld
33-
34-
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
35-
// RUN: -r %s 2>&1 | FileCheck -check-prefixes=RELO %s
36-
// RELO-NOT: -shared
37-
38-
// RUN: %clang -target amdgcn-amd-amdhsa -march=gfx90a -stdlib -startfiles \
39-
// RUN: -nogpulib -nogpuinc -### %s 2>&1 | FileCheck -check-prefix=STARTUP %s
40-
// STARTUP: ld.lld{{.*}}"-lc" "-lm" "{{.*}}crt1.o"
41-
42-
// Check --flto-partitions
43-
44-
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a -nogpulib \
45-
// RUN: -L. -flto --flto-partitions=42 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS %s
46-
// LTO_PARTS: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"--lto-partitions=42"
47-
48-
// RUN: not %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a -nogpulib \
49-
// RUN: -L. -flto --flto-partitions=a %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV0 %s
50-
// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '--flto-partitions=a'
51-
52-
// RUN: not %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a -nogpulib \
53-
// RUN: -L. -flto --flto-partitions=0 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV1 %s
54-
// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '--flto-partitions=0'
Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: %clang -### --target=x86_64-linux-gnu \
2-
// RUN: -x hip --cuda-gpu-arch=gfx803 --flto-partitions=42 \
2+
// RUN: -x hip --cuda-gpu-arch=gfx803 -flto-partitions=42 \
33
// RUN: --no-offload-new-driver --emit-static-lib -nogpulib \
44
// RUN: -fuse-ld=lld -B%S/Inputs/lld -fgpu-rdc -nogpuinc \
55
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
@@ -10,26 +10,26 @@
1010
// FIXED-PARTS-NOT: ".*opt"
1111
// FIXED-PARTS-NOT: ".*llc"
1212
// FIXED-PARTS: [[LLD: ".*lld.*"]] {{.*}} "-plugin-opt=-amdgpu-internalize-symbols"
13-
// FIXED-PARTS-SAME: "-plugin-opt=mcpu=gfx803"
1413
// FIXED-PARTS-SAME: "--lto-partitions=42"
14+
// FIXED-PARTS-SAME: "-plugin-opt=mcpu=gfx803"
1515
// FIXED-PARTS-SAME: "-o" "{{.*out}}" "{{.*bc}}"
1616

1717
// RUN: not %clang -### --target=x86_64-linux-gnu \
18-
// RUN: -x hip --cuda-gpu-arch=gfx803 --flto-partitions=a \
18+
// RUN: -x hip --cuda-gpu-arch=gfx803 -flto-partitions=a \
1919
// RUN: --no-offload-new-driver --emit-static-lib -nogpulib \
2020
// RUN: -fuse-ld=lld -B%S/Inputs/lld -fgpu-rdc -nogpuinc \
2121
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
2222
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
2323
// RUN: 2>&1 | FileCheck %s --check-prefix=LTO_PARTS_INV0
2424

25-
// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '--flto-partitions=a'
25+
// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '-flto-partitions=a'
2626

2727
// RUN: not %clang -### --target=x86_64-linux-gnu \
28-
// RUN: -x hip --cuda-gpu-arch=gfx803 --flto-partitions=0 \
28+
// RUN: -x hip --cuda-gpu-arch=gfx803 -flto-partitions=0 \
2929
// RUN: --no-offload-new-driver --emit-static-lib -nogpulib \
3030
// RUN: -fuse-ld=lld -B%S/Inputs/lld -fgpu-rdc -nogpuinc \
3131
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
3232
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
3333
// RUN: 2>&1 | FileCheck %s --check-prefix=LTO_PARTS_INV1
3434

35-
// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '--flto-partitions=0'
35+
// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '-flto-partitions=0'

clang/test/Driver/hip-toolchain-rdc-static-lib.hip

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@
4848
// CHECK-NOT: ".*opt"
4949
// CHECK-NOT: ".*llc"
5050
// CHECK: [[LLD: ".*lld.*"]] {{.*}} "-plugin-opt=-amdgpu-internalize-symbols"
51-
// CHECK-SAME: "-plugin-opt=mcpu=gfx803"
5251
// CHECK-SAME: "--lto-partitions={{[0-9]+}}"
52+
// CHECK-SAME: "-plugin-opt=mcpu=gfx803"
5353
// CHECK-SAME: "-o" "[[IMG_DEV1:.*out]]" [[A_BC1]] [[B_BC1]]
5454

5555
// generate image for device side path on gfx900
@@ -77,8 +77,8 @@
7777
// CHECK-NOT: ".*opt"
7878
// CHECK-NOT: ".*llc"
7979
// CHECK: [[LLD]] {{.*}} "-plugin-opt=-amdgpu-internalize-symbols"
80-
// CHECK-SAME: "-plugin-opt=mcpu=gfx900"
8180
// CHECK-SAME: "--lto-partitions={{[0-9]+}}"
81+
// CHECK-SAME: "-plugin-opt=mcpu=gfx900"
8282
// CHECK-SAME: "--whole-archive"
8383
// CHECK-SAME: "-o" "[[IMG_DEV2:.*out]]" [[A_BC2]] [[B_BC2]]
8484
// CHECK-SAME: "--no-whole-archive"

clang/test/Driver/hip-toolchain-rdc.hip

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,8 @@
146146
// CHECK-NOT: ".*opt"
147147
// CHECK-NOT: ".*llc"
148148
// CHECK: {{".*lld.*"}} {{.*}} "-plugin-opt=-amdgpu-internalize-symbols"
149-
// CHECK-SAME: "-plugin-opt=mcpu=gfx900"
150149
// CHECK-SAME: "--lto-partitions={{[0-9]+}}"
150+
// CHECK-SAME: "-plugin-opt=mcpu=gfx900"
151151
// CHECK-SAME: "-o" "[[IMG_DEV2:.*.out]]" [[A_BC2]] [[B_BC2]]
152152

153153
// combine images generated into hip fat binary object
@@ -161,3 +161,21 @@
161161
// output the executable
162162
// LNX: [[LD:".*ld.*"]] {{.*}}"-o" "a.out" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]]
163163
// MSVC: [[LD:".*lld-link.*"]] {{.*}}"-out:a.exe" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]]
164+
165+
// Check -flto-partitions
166+
167+
// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --no-offload-new-driver \
168+
// RUN: -L. -foffload-lto %s 2>&1 | FileCheck -check-prefix=LTO_DEFAULT %s
169+
// LTO_DEFAULT: lld{{.*}}"--lto-partitions=8"
170+
171+
// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --no-offload-new-driver \
172+
// RUN: -L. -foffload-lto -flto-partitions=42 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS %s
173+
// LTO_PARTS: lld{{.*}}"--lto-partitions=42"
174+
175+
// RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --no-offload-new-driver \
176+
// RUN: -L. -foffload-lto -flto-partitions=a %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV0 %s
177+
// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '-flto-partitions=a'
178+
179+
// RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --no-offload-new-driver \
180+
// RUN: -L. -foffload-lto -flto-partitions=0 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV1 %s
181+
// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '-flto-partitions=0'

0 commit comments

Comments
 (0)