Skip to content

Commit 098e554

Browse files
committed
[LinkerWrapper] Extract device archives if symbol is used on the host
Summary: Standard archive linking semantics dictate that archive members are only extracted if they are used, i.e. define a currently undefined symbol. This poses some issues for offloading langues because there can be a kernel called `foo` in a device archive. The CPU archive is extracted but `foo` is never referenced on the device so it is not extracted. This patch augments this behavior to consider all symbols used by the offloading toolchain as 'undefined' so they will extract archives. We do this by getting a list of all the kernel names the runtime will call from the offloading entires list. Not that this does not bother checking if the *host* side will actually use the archive, so the case where a kernel is not used by the static library will still cause it to extract. I could fix this but it felt like a lot of effort for little gain. Previously this was solved by forcibly extracting anything with public visibility. This means that we can now link device code with externally visible symbols and don't need to worry about it always extracting as well. Depends on llvm#111890
1 parent c04b640 commit 098e554

File tree

3 files changed

+107
-70
lines changed

3 files changed

+107
-70
lines changed

clang/test/Driver/linker-wrapper-libs.c

Lines changed: 0 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -51,30 +51,6 @@ int bar() { return weak; }
5151
// LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
5252
// LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
5353

54-
//
55-
// Check that we extract a static library that defines a global visibile to the
56-
// host.
57-
//
58-
// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -DGLOBAL -o %t.nvptx.global.bc
59-
// RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -DGLOBAL -o %t.amdgpu.global.bc
60-
// RUN: clang-offload-packager -o %t-lib.out \
61-
// RUN: --image=file=%t.nvptx.global.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
62-
// RUN: --image=file=%t.amdgpu.global.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030
63-
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t-lib.out
64-
// RUN: llvm-ar rcs %t.a %t.o
65-
// RUN: clang-offload-packager -o %t.out \
66-
// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
67-
// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030
68-
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
69-
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
70-
// RUN: --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \
71-
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
72-
// RUN: --linker-path=/usr/bin/ld %t.a %t.o -o a.out 2>&1 \
73-
// RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL
74-
75-
// LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
76-
// LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
77-
7854
//
7955
// Check that we do not extract a global symbol if the source file was not
8056
// created by an offloading language that expects there to be a host version of
@@ -142,29 +118,6 @@ int bar() { return weak; }
142118
// LIBRARY-HIDDEN-NOT: {{.*}}.o {{.*}}.o
143119
// LIBRARY-HIDDEN: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030
144120

145-
//
146-
// Check that we do not extract a static library that defines a global visibile
147-
// to the host that is already defined.
148-
//
149-
// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -DGLOBAL -o %t.nvptx.global.bc
150-
// RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -DGLOBAL -o %t.amdgpu.global.bc
151-
// RUN: clang-offload-packager -o %t-lib.out \
152-
// RUN: --image=file=%t.nvptx.global.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
153-
// RUN: --image=file=%t.amdgpu.global.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030
154-
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t-lib.out
155-
// RUN: llvm-ar rcs %t.a %t.o
156-
// RUN: clang-offload-packager -o %t.out \
157-
// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
158-
// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030
159-
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
160-
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
161-
// RUN: --linker-path=/usr/bin/ld %t.o %t.a %t.a -o a.out 2>&1 \
162-
// RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL-DEFINED
163-
164-
// LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
165-
// LIBRARY-GLOBAL-DEFINED-NOT: {{.*}}gfx1030{{.*}}.o
166-
// LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
167-
168121
//
169122
// Check that we can use --[no-]whole-archive to control extraction.
170123
//

clang/test/Driver/linker-wrapper.c

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44

55
// REQUIRES: system-linux
66

7-
// An externally visible variable so static libraries extract.
8-
__attribute__((visibility("protected"), used)) int x;
9-
107
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.elf.o
118
// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -o %t.nvptx.bc
129
// RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -o %t.amdgpu.bc
@@ -169,7 +166,7 @@ __attribute__((visibility("protected"), used)) int x;
169166
// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90a:xnack-
170167
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t-off.o -fembed-offload-object=%t-off.out
171168
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
172-
// RUN: --linker-path=/usr/bin/ld %t-on.o %t-off.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=AMD-TARGET-ID
169+
// RUN: --linker-path=/usr/bin/ld %t-on.o %t-off.o --whole-archive %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=AMD-TARGET-ID
173170

174171
// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
175172
// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
@@ -185,7 +182,7 @@ __attribute__((visibility("protected"), used)) int x;
185182
// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908
186183
// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t2.o -fembed-offload-object=%t2.out
187184
// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
188-
// RUN: --linker-path=/usr/bin/ld %t1.o %t2.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=ARCH-ALL
185+
// RUN: --linker-path=/usr/bin/ld %t1.o %t2.o --whole-archive %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=ARCH-ALL
189186

190187
// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
191188
// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o

clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

Lines changed: 105 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1477,14 +1477,7 @@ Expected<bool> getSymbolsFromBitcode(MemoryBufferRef Buffer, OffloadKind Kind,
14771477
bool ResolvesStrongReference =
14781478
((OldSym & Sym_Undefined && !(OldSym & Sym_Weak)) &&
14791479
!Sym.isUndefined());
1480-
// We will extract if it defines a new global symbol visible to the
1481-
// host. This is only necessary for code targeting an offloading
1482-
// language.
1483-
bool NewGlobalSymbol =
1484-
((NewSymbol || (OldSym & Sym_Undefined)) && !Sym.isUndefined() &&
1485-
!Sym.canBeOmittedFromSymbolTable() && Kind != object::OFK_None &&
1486-
(Sym.getVisibility() != GlobalValue::HiddenVisibility));
1487-
ShouldExtract |= ResolvesStrongReference | NewGlobalSymbol;
1480+
ShouldExtract |= ResolvesStrongReference;
14881481

14891482
// Update this symbol in the "table" with the new information.
14901483
if (OldSym & Sym_Undefined && !Sym.isUndefined())
@@ -1533,15 +1526,7 @@ Expected<bool> getSymbolsFromObject(const ObjectFile &Obj, OffloadKind Kind,
15331526
bool ResolvesStrongReference = (OldSym & Sym_Undefined) &&
15341527
!(OldSym & Sym_Weak) &&
15351528
!(*FlagsOrErr & SymbolRef::SF_Undefined);
1536-
1537-
// We will extract if it defines a new global symbol visible to the
1538-
// host. This is only necessary for code targeting an offloading
1539-
// language.
1540-
bool NewGlobalSymbol =
1541-
((NewSymbol || (OldSym & Sym_Undefined)) &&
1542-
!(*FlagsOrErr & SymbolRef::SF_Undefined) && Kind != object::OFK_None &&
1543-
!(*FlagsOrErr & SymbolRef::SF_Hidden));
1544-
ShouldExtract |= ResolvesStrongReference | NewGlobalSymbol;
1529+
ShouldExtract |= ResolvesStrongReference;
15451530

15461531
// Update this symbol in the "table" with the new information.
15471532
if (OldSym & Sym_Undefined && !(*FlagsOrErr & SymbolRef::SF_Undefined))
@@ -1586,10 +1571,101 @@ Expected<bool> getSymbols(StringRef Image, OffloadKind Kind, bool IsArchive,
15861571
}
15871572
}
15881573

1574+
Error getNeededDeviceGlobalsFromBitcode(MemoryBufferRef Buffer,
1575+
SmallVectorImpl<std::string> &Symbols) {
1576+
1577+
LLVMContext Context;
1578+
SMDiagnostic Err;
1579+
std::unique_ptr<Module> M = getLazyIRModule(
1580+
MemoryBuffer::getMemBuffer(Buffer, /*RequiresNullTerminator=*/false), Err,
1581+
Context);
1582+
if (!M)
1583+
return createStringError(inconvertibleErrorCode(),
1584+
"Failed to create module");
1585+
1586+
// Extract offloading data from globals referenced by the
1587+
// `llvm.embedded.object` metadata with the `.llvm.offloading` section.
1588+
auto *MD = M->getNamedMetadata("llvm.offloading.symbols");
1589+
if (!MD)
1590+
return Error::success();
1591+
1592+
for (const MDNode *Op : MD->operands()) {
1593+
GlobalVariable *GV =
1594+
mdconst::dyn_extract_or_null<GlobalVariable>(Op->getOperand(0));
1595+
if (!GV)
1596+
continue;
1597+
1598+
auto *CDS = dyn_cast<ConstantDataSequential>(GV->getInitializer());
1599+
if (!CDS)
1600+
continue;
1601+
1602+
Symbols.emplace_back(CDS->getAsCString().str());
1603+
}
1604+
1605+
return Error::success();
1606+
}
1607+
1608+
Error getNeededDeviceGlobalsFromObject(const ObjectFile &Obj,
1609+
SmallVectorImpl<std::string> &Symbols) {
1610+
for (const auto &Sec : Obj.sections()) {
1611+
auto NameOrErr = Sec.getName();
1612+
if (!NameOrErr)
1613+
return NameOrErr.takeError();
1614+
1615+
if (*NameOrErr != ".llvm.rodata.offloading")
1616+
continue;
1617+
1618+
auto ContentsOrErr = Sec.getContents();
1619+
if (!ContentsOrErr)
1620+
return ContentsOrErr.takeError();
1621+
llvm::transform(llvm::split(ContentsOrErr->drop_back(), '\0'),
1622+
std::back_inserter(Symbols),
1623+
[](StringRef Str) { return Str.str(); });
1624+
}
1625+
return Error::success();
1626+
}
1627+
1628+
Error getNeededDeviceGlobals(MemoryBufferRef Buffer,
1629+
SmallVectorImpl<std::string> &Symbols) {
1630+
file_magic Type = identify_magic(Buffer.getBuffer());
1631+
switch (Type) {
1632+
case file_magic::bitcode:
1633+
return getNeededDeviceGlobalsFromBitcode(Buffer, Symbols);
1634+
case file_magic::elf_relocatable:
1635+
case file_magic::coff_object: {
1636+
Expected<std::unique_ptr<ObjectFile>> ObjFile =
1637+
ObjectFile::createObjectFile(Buffer, Type);
1638+
if (!ObjFile)
1639+
return ObjFile.takeError();
1640+
return getNeededDeviceGlobalsFromObject(*ObjFile->get(), Symbols);
1641+
}
1642+
case file_magic::archive: {
1643+
Expected<std::unique_ptr<llvm::object::Archive>> LibFile =
1644+
object::Archive::create(Buffer);
1645+
if (!LibFile)
1646+
return LibFile.takeError();
1647+
Error Err = Error::success();
1648+
for (auto Child : (*LibFile)->children(Err)) {
1649+
auto ChildBufferOrErr = Child.getMemoryBufferRef();
1650+
if (!ChildBufferOrErr)
1651+
return ChildBufferOrErr.takeError();
1652+
if (Error Err = getNeededDeviceGlobals(*ChildBufferOrErr, Symbols))
1653+
return Err;
1654+
}
1655+
if (Err)
1656+
return Err;
1657+
return Error::success();
1658+
}
1659+
default:
1660+
return Error::success();
1661+
}
1662+
}
1663+
15891664
/// Search the input files and libraries for embedded device offloading code
15901665
/// and add it to the list of files to be linked. Files coming from static
15911666
/// libraries are only added to the input if they are used by an existing
1592-
/// input file. Returns a list of input files intended for a single linking job.
1667+
/// input file. Returns a list of input files intended for a single linking
1668+
/// job.
15931669
Expected<SmallVector<SmallVector<OffloadFile>>>
15941670
getDeviceInput(const ArgList &Args) {
15951671
llvm::TimeTraceScope TimeScope("ExtractDeviceCode");
@@ -1610,6 +1686,7 @@ getDeviceInput(const ArgList &Args) {
16101686
bool WholeArchive = Args.hasArg(OPT_wholearchive_flag) ? true : false;
16111687
SmallVector<OffloadFile> ObjectFilesToExtract;
16121688
SmallVector<OffloadFile> ArchiveFilesToExtract;
1689+
SmallVector<std::string> NeededSymbols;
16131690
for (const opt::Arg *Arg : Args.filtered(
16141691
OPT_INPUT, OPT_library, OPT_whole_archive, OPT_no_whole_archive)) {
16151692
if (Arg->getOption().matches(OPT_whole_archive) ||
@@ -1640,6 +1717,9 @@ getDeviceInput(const ArgList &Args) {
16401717
if (identify_magic(Buffer.getBuffer()) == file_magic::elf_shared_object)
16411718
continue;
16421719

1720+
if (Error Err = getNeededDeviceGlobals(Buffer, NeededSymbols))
1721+
return std::move(Err);
1722+
16431723
SmallVector<OffloadFile> Binaries;
16441724
if (Error Err = extractOffloadBinaries(Buffer, Binaries))
16451725
return std::move(Err);
@@ -1666,6 +1746,13 @@ getDeviceInput(const ArgList &Args) {
16661746
CompatibleTargets.emplace_back(ID);
16671747

16681748
for (const auto &[Index, ID] : llvm::enumerate(CompatibleTargets)) {
1749+
// Initialize the symbol table with the device globals that host needs.
1750+
// This will force them to be extracted if they are defined in a library.
1751+
if (!Syms.contains(ID)) {
1752+
for (auto &Symbol : NeededSymbols)
1753+
auto &Val = Syms[ID][Symbol] = Sym_Undefined;
1754+
}
1755+
16691756
Expected<bool> ExtractOrErr = getSymbols(
16701757
Binary.getBinary()->getImage(), Binary.getBinary()->getOffloadKind(),
16711758
/*IsArchive=*/false, Saver, Syms[ID]);

0 commit comments

Comments
 (0)