Skip to content

Commit cf66fd8

Browse files
jhuber6Pierre-vh
authored andcommitted
[Clang] Handle -flto-partitions generically and forward it properly (llvm#133283)
Summary: The llvm#128509 patch introduced `--flto-partitions`. This was marked as a HIP only argument, and was also spelled and handled incorrectly for an `-f` option. This patch makes the handling generic for `ld.lld` consumers. This also fixes some issues with emitting the flags being put after the default arguments, preventing users from overriding them. Also, forwards things properly for the new driver so we can test this.
1 parent b3d463a commit cf66fd8

File tree

8 files changed

+45
-33
lines changed

8 files changed

+45
-33
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1458,8 +1458,6 @@ def fhip_emit_relocatable : Flag<["-"], "fhip-emit-relocatable">,
14581458
HelpText<"Compile HIP source to relocatable">;
14591459
def fno_hip_emit_relocatable : Flag<["-"], "fno-hip-emit-relocatable">,
14601460
HelpText<"Do not override toolchain to compile HIP source to relocatable">;
1461-
def flto_partitions_EQ : Joined<["--"], "flto-partitions=">, Group<hip_Group>,
1462-
HelpText<"Number of partitions to use for parallel full LTO codegen. Use 1 to disable partitioning.">;
14631461
}
14641462

14651463
// Clang specific/exclusive options for OpenACC.
@@ -3002,6 +3000,8 @@ defm fat_lto_objects : BoolFOption<"fat-lto-objects",
30023000
PosFlag<SetTrue, [], [ClangOption, CC1Option], "Enable">,
30033001
NegFlag<SetFalse, [], [ClangOption, CC1Option], "Disable">,
30043002
BothFlags<[], [ClangOption, CC1Option], " fat LTO object support">>;
3003+
def flto_partitions_EQ : Joined<["-"], "flto-partitions=">, Group<f_Group>,
3004+
HelpText<"Number of partitions to use for parallel full LTO codegen, ld.lld only.">;
30053005
def fmacro_backtrace_limit_EQ : Joined<["-"], "fmacro-backtrace-limit=">,
30063006
Group<f_Group>, Visibility<[ClangOption, CC1Option, CLOption]>,
30073007
HelpText<"Set the maximum number of entries to print in a macro expansion backtrace (0 = no limit)">,

clang/lib/Driver/ToolChains/AMDGPU.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -624,19 +624,18 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
624624
CmdArgs.push_back("--no-undefined");
625625
CmdArgs.push_back("-shared");
626626

627-
addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
628-
Args.AddAllArgs(CmdArgs, options::OPT_L);
629-
getToolChain().AddFilePathLibArgs(Args, CmdArgs);
630-
AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
631627
if (C.getDriver().isUsingLTO()) {
632628
const bool ThinLTO = (C.getDriver().getLTOMode() == LTOK_Thin);
633629
addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0], ThinLTO);
634-
635-
if (!ThinLTO && JA.getOffloadingDeviceKind() == Action::OFK_HIP)
636-
addFullLTOPartitionOption(C.getDriver(), Args, CmdArgs);
637630
} else if (Args.hasArg(options::OPT_mcpu_EQ))
638631
CmdArgs.push_back(Args.MakeArgString(
639632
"-plugin-opt=mcpu=" + Args.getLastArgValue(options::OPT_mcpu_EQ)));
633+
634+
addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
635+
Args.AddAllArgs(CmdArgs, options::OPT_L);
636+
getToolChain().AddFilePathLibArgs(Args, CmdArgs);
637+
AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
638+
640639
CmdArgs.push_back("-o");
641640
CmdArgs.push_back(Output.getFilename());
642641
C.addCommand(std::make_unique<Command>(

clang/lib/Driver/ToolChains/CommonArgs.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -863,6 +863,17 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args,
863863
// files
864864
if (IsFatLTO)
865865
CmdArgs.push_back("--fat-lto-objects");
866+
867+
if (Args.hasArg(options::OPT_flto_partitions_EQ)) {
868+
int Value = 0;
869+
StringRef A = Args.getLastArgValue(options::OPT_flto_partitions_EQ, "8");
870+
if (A.getAsInteger(10, Value) || (Value < 1)) {
871+
Arg *Arg = Args.getLastArg(options::OPT_flto_partitions_EQ);
872+
D.Diag(diag::err_drv_invalid_int_value)
873+
<< Arg->getAsString(Args) << Arg->getValue();
874+
}
875+
CmdArgs.push_back(Args.MakeArgString("--lto-partitions=" + A));
876+
}
866877
}
867878

868879
const char *PluginOptPrefix = IsOSAIX ? "-bplugin_opt:" : "-plugin-opt=";

clang/lib/Driver/ToolChains/HIPAMD.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,8 +160,6 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
160160

161161
addLinkerCompressDebugSectionsOption(TC, Args, LldArgs);
162162

163-
amdgpu::addFullLTOPartitionOption(D, Args, LldArgs);
164-
165163
// Given that host and device linking happen in separate processes, the device
166164
// linker doesn't always have the visibility as to which device symbols are
167165
// needed by a program, especially for the device symbol dependencies that are
@@ -355,6 +353,10 @@ HIPAMDToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
355353
checkTargetID(*DAL);
356354
}
357355

356+
if (!Args.hasArg(options::OPT_flto_partitions_EQ))
357+
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_flto_partitions_EQ),
358+
"8");
359+
358360
return DAL;
359361
}
360362

clang/test/Driver/amdgpu-toolchain.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@
2020

2121
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
2222
// RUN: -L. -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s
23-
// LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions"
24-
// LTO: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx906"{{.*}}"
23+
// LTO: clang{{.*}}"-flto=full"{{.*}}"-fconvergent-functions"
24+
// LTO: ld.lld{{.*}}"-plugin-opt=mcpu=gfx906"{{.*}}"{{.*}}
2525

2626
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
2727
// RUN: -L. -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=MCPU %s
28-
// MCPU: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx906"
28+
// MCPU: ld.lld{{.*}}"-plugin-opt=mcpu=gfx906"{{.*}}
2929

3030
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
3131
// RUN: -fuse-ld=ld %s 2>&1 | FileCheck -check-prefixes=LD %s
Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: %clang -### --target=x86_64-linux-gnu \
2-
// RUN: -x hip --cuda-gpu-arch=gfx803 --flto-partitions=42 \
2+
// RUN: -x hip --cuda-gpu-arch=gfx803 -flto-partitions=42 \
33
// RUN: --no-offload-new-driver --emit-static-lib -nogpulib \
44
// RUN: -fuse-ld=lld -B%S/Inputs/lld -fgpu-rdc -nogpuinc \
55
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
@@ -10,26 +10,26 @@
1010
// FIXED-PARTS-NOT: ".*opt"
1111
// FIXED-PARTS-NOT: ".*llc"
1212
// FIXED-PARTS: [[LLD: ".*lld.*"]] {{.*}} "-plugin-opt=-amdgpu-internalize-symbols"
13-
// FIXED-PARTS-SAME: "-plugin-opt=mcpu=gfx803"
1413
// FIXED-PARTS-SAME: "--lto-partitions=42"
14+
// FIXED-PARTS-SAME: "-plugin-opt=mcpu=gfx803"
1515
// FIXED-PARTS-SAME: "-o" "{{.*out}}" "{{.*bc}}"
1616

1717
// RUN: not %clang -### --target=x86_64-linux-gnu \
18-
// RUN: -x hip --cuda-gpu-arch=gfx803 --flto-partitions=a \
18+
// RUN: -x hip --cuda-gpu-arch=gfx803 -flto-partitions=a \
1919
// RUN: --no-offload-new-driver --emit-static-lib -nogpulib \
2020
// RUN: -fuse-ld=lld -B%S/Inputs/lld -fgpu-rdc -nogpuinc \
2121
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
2222
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
2323
// RUN: 2>&1 | FileCheck %s --check-prefix=LTO_PARTS_INV0
2424

25-
// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '--flto-partitions=a'
25+
// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '-flto-partitions=a'
2626

2727
// RUN: not %clang -### --target=x86_64-linux-gnu \
28-
// RUN: -x hip --cuda-gpu-arch=gfx803 --flto-partitions=0 \
28+
// RUN: -x hip --cuda-gpu-arch=gfx803 -flto-partitions=0 \
2929
// RUN: --no-offload-new-driver --emit-static-lib -nogpulib \
3030
// RUN: -fuse-ld=lld -B%S/Inputs/lld -fgpu-rdc -nogpuinc \
3131
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
3232
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
3333
// RUN: 2>&1 | FileCheck %s --check-prefix=LTO_PARTS_INV1
3434

35-
// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '--flto-partitions=0'
35+
// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '-flto-partitions=0'

clang/test/Driver/hip-toolchain-rdc-static-lib.hip

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@
4848
// CHECK-NOT: ".*opt"
4949
// CHECK-NOT: ".*llc"
5050
// CHECK: [[LLD: ".*lld.*"]] {{.*}} "-plugin-opt=-amdgpu-internalize-symbols"
51-
// CHECK-SAME: "-plugin-opt=mcpu=gfx803"
5251
// CHECK-SAME: "--lto-partitions={{[0-9]+}}"
52+
// CHECK-SAME: "-plugin-opt=mcpu=gfx803"
5353
// CHECK-SAME: "-o" "[[IMG_DEV1:.*out]]" [[A_BC1]] [[B_BC1]]
5454

5555
// generate image for device side path on gfx900
@@ -77,8 +77,8 @@
7777
// CHECK-NOT: ".*opt"
7878
// CHECK-NOT: ".*llc"
7979
// CHECK: [[LLD]] {{.*}} "-plugin-opt=-amdgpu-internalize-symbols"
80-
// CHECK-SAME: "-plugin-opt=mcpu=gfx900"
8180
// CHECK-SAME: "--lto-partitions={{[0-9]+}}"
81+
// CHECK-SAME: "-plugin-opt=mcpu=gfx900"
8282
// CHECK-SAME: "--whole-archive"
8383
// CHECK-SAME: "-o" "[[IMG_DEV2:.*out]]" [[A_BC2]] [[B_BC2]]
8484
// CHECK-SAME: "--no-whole-archive"

clang/test/Driver/hip-toolchain-rdc.hip

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,8 @@
146146
// CHECK-NOT: ".*opt"
147147
// CHECK-NOT: ".*llc"
148148
// CHECK: {{".*lld.*"}} {{.*}} "-plugin-opt=-amdgpu-internalize-symbols"
149-
// CHECK-SAME: "-plugin-opt=mcpu=gfx900"
150149
// CHECK-SAME: "--lto-partitions={{[0-9]+}}"
150+
// CHECK-SAME: "-plugin-opt=mcpu=gfx900"
151151
// CHECK-SAME: "-o" "[[IMG_DEV2:.*.out]]" [[A_BC2]] [[B_BC2]]
152152

153153
// combine images generated into hip fat binary object
@@ -162,20 +162,20 @@
162162
// LNX: [[LD:".*ld.*"]] {{.*}}"-o" "a.out" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]]
163163
// MSVC: [[LD:".*lld-link.*"]] {{.*}}"-out:a.exe" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]]
164164

165-
// Check --flto-partitions
165+
// Check -flto-partitions
166166

167-
// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \
167+
// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --no-offload-new-driver \
168168
// RUN: -L. -foffload-lto %s 2>&1 | FileCheck -check-prefix=LTO_DEFAULT %s
169169
// LTO_DEFAULT: lld{{.*}}"--lto-partitions=8"
170170

171-
// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \
172-
// RUN: -L. -foffload-lto --flto-partitions=42 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS %s
171+
// RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --no-offload-new-driver \
172+
// RUN: -L. -foffload-lto -flto-partitions=42 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS %s
173173
// LTO_PARTS: lld{{.*}}"--lto-partitions=42"
174174

175-
// RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \
176-
// RUN: -L. -foffload-lto --flto-partitions=a %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV0 %s
177-
// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '--flto-partitions=a'
175+
// RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --no-offload-new-driver \
176+
// RUN: -L. -foffload-lto -flto-partitions=a %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV0 %s
177+
// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '-flto-partitions=a'
178178

179-
// RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc \
180-
// RUN: -L. -foffload-lto --flto-partitions=0 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV1 %s
181-
// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '--flto-partitions=0'
179+
// RUN: not %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --no-offload-new-driver \
180+
// RUN: -L. -foffload-lto -flto-partitions=0 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV1 %s
181+
// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '-flto-partitions=0'

0 commit comments

Comments
 (0)