Skip to content

Commit 8c8885d

Browse files
committed
[Clang] Make --lto-partitions only default for HIP
Summary: The default behavior for LTO on other targets does not specify the number of LTO partitions. Recent changes made this default to 8 on AMDGPU which had some issues with the `libc` project. The option to disable this is HIP only so I think for now we should restrict this just to HIP. I'm definitely on board with getting some more parallelism here, but I think it should probably be restricted to just offloading languages. The new driver goes through the `--target=amdgcn-amd-amdhsa` for its output, which means we'd need to forward the default somehow.
1 parent 0106e5a commit 8c8885d

File tree

3 files changed

+11
-37
lines changed

3 files changed

+11
-37
lines changed

clang/lib/Driver/ToolChains/AMDGPU.cpp

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,7 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
625625
CmdArgs.push_back("-shared");
626626
}
627627

628+
llvm::errs() << JA.getOffloadingDeviceKind() << "\n";
628629
addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
629630
Args.AddAllArgs(CmdArgs, options::OPT_L);
630631
getToolChain().AddFilePathLibArgs(Args, CmdArgs);
@@ -633,7 +634,7 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
633634
const bool ThinLTO = (C.getDriver().getLTOMode() == LTOK_Thin);
634635
addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0], ThinLTO);
635636

636-
if (!ThinLTO)
637+
if (!ThinLTO && JA.getOffloadingDeviceKind() == Action::OFK_HIP)
637638
addFullLTOPartitionOption(C.getDriver(), Args, CmdArgs);
638639
} else if (Args.hasArg(options::OPT_mcpu_EQ)) {
639640
CmdArgs.push_back(Args.MakeArgString(
@@ -712,14 +713,12 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
712713
}
713714

714715
static unsigned getFullLTOPartitions(const Driver &D, const ArgList &Args) {
715-
const Arg *A = Args.getLastArg(options::OPT_flto_partitions_EQ);
716-
// In the absence of an option, use 8 as the default.
717-
if (!A)
718-
return 8;
719716
int Value = 0;
720-
if (StringRef(A->getValue()).getAsInteger(10, Value) || (Value < 1)) {
717+
StringRef A = Args.getLastArgValue(options::OPT_flto_partitions_EQ, "8");
718+
if (A.getAsInteger(10, Value) || (Value < 1)) {
719+
Arg *Arg = Args.getLastArg(options::OPT_flto_partitions_EQ);
721720
D.Diag(diag::err_drv_invalid_int_value)
722-
<< A->getAsString(Args) << A->getValue();
721+
<< Arg->getAsString(Args) << Arg->getValue();
723722
return 1;
724723
}
725724

@@ -729,13 +728,8 @@ static unsigned getFullLTOPartitions(const Driver &D, const ArgList &Args) {
729728
void amdgpu::addFullLTOPartitionOption(const Driver &D,
730729
const llvm::opt::ArgList &Args,
731730
llvm::opt::ArgStringList &CmdArgs) {
732-
// TODO: Should this be restricted to fgpu-rdc only ? Currently we'll
733-
// also do it for non gpu-rdc LTO
734-
735-
if (unsigned NumParts = getFullLTOPartitions(D, Args); NumParts > 1) {
736-
CmdArgs.push_back(
737-
Args.MakeArgString("--lto-partitions=" + Twine(NumParts)));
738-
}
731+
CmdArgs.push_back(Args.MakeArgString("--lto-partitions=" +
732+
Twine(getFullLTOPartitions(D, Args))));
739733
}
740734

741735
/// AMDGPU Toolchain

clang/test/Driver/amdgpu-toolchain.c

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+:sramecc- -nogpulib \
2222
// RUN: -L. -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s
2323
// LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions"
24-
// LTO: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"--lto-partitions={{[0-9]+}}"{{.*}}"-plugin-opt=-mattr=-sramecc,+xnack"
24+
// LTO: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"-plugin-opt=-mattr=-sramecc,+xnack"
2525

2626
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+:sramecc- -nogpulib \
2727
// RUN: -L. -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=MCPU %s
@@ -38,17 +38,3 @@
3838
// RUN: %clang -target amdgcn-amd-amdhsa -march=gfx90a -stdlib -startfiles \
3939
// RUN: -nogpulib -nogpuinc -### %s 2>&1 | FileCheck -check-prefix=STARTUP %s
4040
// STARTUP: ld.lld{{.*}}"-lc" "-lm" "{{.*}}crt1.o"
41-
42-
// Check --flto-partitions
43-
44-
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a -nogpulib \
45-
// RUN: -L. -flto --flto-partitions=42 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS %s
46-
// LTO_PARTS: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx90a"{{.*}}"--lto-partitions=42"
47-
48-
// RUN: not %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a -nogpulib \
49-
// RUN: -L. -flto --flto-partitions=a %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV0 %s
50-
// LTO_PARTS_INV0: clang: error: invalid integral value 'a' in '--flto-partitions=a'
51-
52-
// RUN: not %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a -nogpulib \
53-
// RUN: -L. -flto --flto-partitions=0 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS_INV1 %s
54-
// LTO_PARTS_INV1: clang: error: invalid integral value '0' in '--flto-partitions=0'

libc/startup/gpu/CMakeLists.txt

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,8 @@ function(add_startup_object name)
3333
set_target_properties(${fq_target_name}.exe PROPERTIES
3434
RUNTIME_OUTPUT_DIRECTORY ${LIBC_LIBRARY_DIR}
3535
RUNTIME_OUTPUT_NAME ${name}.o)
36-
# FIXME: A bug in the AMDGPU LTO pass is incorrectly removing the kernels.
37-
if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
38-
target_link_options(${fq_target_name}.exe PRIVATE
39-
"-r" "-nostdlib" "-flto" "-Wl,--lto-emit-llvm")
40-
else()
41-
target_link_options(${fq_target_name}.exe PRIVATE
42-
"-r" "-nostdlib" "-Wl,--lto-emit-llvm")
43-
endif()
36+
target_link_options(${fq_target_name}.exe PRIVATE
37+
"-r" "-nostdlib" "-flto" "-Wl,--lto-emit-llvm")
4438
endif()
4539
endfunction()
4640

0 commit comments

Comments
 (0)