diff --git a/clang/test/Driver/linker-wrapper-libs.c b/clang/test/Driver/linker-wrapper-libs.c index cb5c7c137a0ba..2adf4cc83c5d3 100644 --- a/clang/test/Driver/linker-wrapper-libs.c +++ b/clang/test/Driver/linker-wrapper-libs.c @@ -51,30 +51,6 @@ int bar() { return weak; } // LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o // LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o -// -// Check that we extract a static library that defines a global visibile to the -// host. -// -// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -DGLOBAL -o %t.nvptx.global.bc -// RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -DGLOBAL -o %t.amdgpu.global.bc -// RUN: clang-offload-packager -o %t-lib.out \ -// RUN: --image=file=%t.nvptx.global.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ -// RUN: --image=file=%t.amdgpu.global.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 -// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t-lib.out -// RUN: llvm-ar rcs %t.a %t.o -// RUN: clang-offload-packager -o %t.out \ -// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ -// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 -// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out -// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \ -// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld %t.a %t.o -o a.out 2>&1 \ -// RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL - -// LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o -// LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o - // // Check that we do not extract a global symbol if the source file was not // created by an offloading language that expects there to be a host version of @@ -142,29 +118,6 @@ int bar() { return weak; } // LIBRARY-HIDDEN-NOT: {{.*}}.o {{.*}}.o // LIBRARY-HIDDEN: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -// -// Check that we do not extract a static library that defines a global visibile -// to the host that is already defined. -// -// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -DGLOBAL -o %t.nvptx.global.bc -// RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -DGLOBAL -o %t.amdgpu.global.bc -// RUN: clang-offload-packager -o %t-lib.out \ -// RUN: --image=file=%t.nvptx.global.bc,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ -// RUN: --image=file=%t.amdgpu.global.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 -// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t-lib.out -// RUN: llvm-ar rcs %t.a %t.o -// RUN: clang-offload-packager -o %t.out \ -// RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ -// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 -// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out -// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld %t.o %t.a %t.a -o a.out 2>&1 \ -// RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL-DEFINED - -// LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o -// LIBRARY-GLOBAL-DEFINED-NOT: {{.*}}gfx1030{{.*}}.o -// LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o - // // Check that we can use --[no-]whole-archive to control extraction. // diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c index 470af4d5d70ca..640a3eb5c4453 100644 --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -4,9 +4,6 @@ // REQUIRES: system-linux -// An externally visible variable so static libraries extract. -__attribute__((visibility("protected"), used)) int x; - // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.elf.o // RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -o %t.nvptx.bc // RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -o %t.amdgpu.bc @@ -169,7 +166,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90a:xnack- // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t-off.o -fembed-offload-object=%t-off.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld %t-on.o %t-off.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=AMD-TARGET-ID +// RUN: --linker-path=/usr/bin/ld %t-on.o %t-off.o --whole-archive %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=AMD-TARGET-ID // AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o // AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o @@ -185,7 +182,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t2.o -fembed-offload-object=%t2.out // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ -// RUN: --linker-path=/usr/bin/ld %t1.o %t2.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=ARCH-ALL +// RUN: --linker-path=/usr/bin/ld %t1.o %t2.o --whole-archive %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=ARCH-ALL // ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o // ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 9fcecaee318a7..1699946bdf6a2 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -1479,14 +1479,7 @@ Expected getSymbolsFromBitcode(MemoryBufferRef Buffer, OffloadKind Kind, bool ResolvesStrongReference = ((OldSym & Sym_Undefined && !(OldSym & Sym_Weak)) && !Sym.isUndefined()); - // We will extract if it defines a new global symbol visible to the - // host. This is only necessary for code targeting an offloading - // language. - bool NewGlobalSymbol = - ((NewSymbol || (OldSym & Sym_Undefined)) && !Sym.isUndefined() && - !Sym.canBeOmittedFromSymbolTable() && Kind != object::OFK_None && - (Sym.getVisibility() != GlobalValue::HiddenVisibility)); - ShouldExtract |= ResolvesStrongReference | NewGlobalSymbol; + ShouldExtract |= ResolvesStrongReference; // Update this symbol in the "table" with the new information. if (OldSym & Sym_Undefined && !Sym.isUndefined()) @@ -1535,15 +1528,7 @@ Expected getSymbolsFromObject(const ObjectFile &Obj, OffloadKind Kind, bool ResolvesStrongReference = (OldSym & Sym_Undefined) && !(OldSym & Sym_Weak) && !(*FlagsOrErr & SymbolRef::SF_Undefined); - - // We will extract if it defines a new global symbol visible to the - // host. This is only necessary for code targeting an offloading - // language. - bool NewGlobalSymbol = - ((NewSymbol || (OldSym & Sym_Undefined)) && - !(*FlagsOrErr & SymbolRef::SF_Undefined) && Kind != object::OFK_None && - !(*FlagsOrErr & SymbolRef::SF_Hidden)); - ShouldExtract |= ResolvesStrongReference | NewGlobalSymbol; + ShouldExtract |= ResolvesStrongReference; // Update this symbol in the "table" with the new information. if (OldSym & Sym_Undefined && !(*FlagsOrErr & SymbolRef::SF_Undefined)) @@ -1588,10 +1573,101 @@ Expected getSymbols(StringRef Image, OffloadKind Kind, bool IsArchive, } } +Error getNeededDeviceGlobalsFromBitcode(MemoryBufferRef Buffer, + SmallVectorImpl &Symbols) { + + LLVMContext Context; + SMDiagnostic Err; + std::unique_ptr M = getLazyIRModule( + MemoryBuffer::getMemBuffer(Buffer, /*RequiresNullTerminator=*/false), Err, + Context); + if (!M) + return createStringError(inconvertibleErrorCode(), + "Failed to create module"); + + // Extract offloading data from globals referenced by the + // `llvm.embedded.object` metadata with the `.llvm.offloading` section. + auto *MD = M->getNamedMetadata("llvm.offloading.symbols"); + if (!MD) + return Error::success(); + + for (const MDNode *Op : MD->operands()) { + GlobalVariable *GV = + mdconst::dyn_extract_or_null(Op->getOperand(0)); + if (!GV) + continue; + + auto *CDS = dyn_cast(GV->getInitializer()); + if (!CDS) + continue; + + Symbols.emplace_back(CDS->getAsCString().str()); + } + + return Error::success(); +} + +Error getNeededDeviceGlobalsFromObject(const ObjectFile &Obj, + SmallVectorImpl &Symbols) { + for (const auto &Sec : Obj.sections()) { + auto NameOrErr = Sec.getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + + if (*NameOrErr != ".llvm.rodata.offloading") + continue; + + auto ContentsOrErr = Sec.getContents(); + if (!ContentsOrErr) + return ContentsOrErr.takeError(); + llvm::transform(llvm::split(ContentsOrErr->drop_back(), '\0'), + std::back_inserter(Symbols), + [](StringRef Str) { return Str.str(); }); + } + return Error::success(); +} + +Error getNeededDeviceGlobals(MemoryBufferRef Buffer, + SmallVectorImpl &Symbols) { + file_magic Type = identify_magic(Buffer.getBuffer()); + switch (Type) { + case file_magic::bitcode: + return getNeededDeviceGlobalsFromBitcode(Buffer, Symbols); + case file_magic::elf_relocatable: + case file_magic::coff_object: { + Expected> ObjFile = + ObjectFile::createObjectFile(Buffer, Type); + if (!ObjFile) + return ObjFile.takeError(); + return getNeededDeviceGlobalsFromObject(*ObjFile->get(), Symbols); + } + case file_magic::archive: { + Expected> LibFile = + object::Archive::create(Buffer); + if (!LibFile) + return LibFile.takeError(); + Error Err = Error::success(); + for (auto Child : (*LibFile)->children(Err)) { + auto ChildBufferOrErr = Child.getMemoryBufferRef(); + if (!ChildBufferOrErr) + return ChildBufferOrErr.takeError(); + if (Error Err = getNeededDeviceGlobals(*ChildBufferOrErr, Symbols)) + return Err; + } + if (Err) + return Err; + return Error::success(); + } + default: + return Error::success(); + } +} + /// Search the input files and libraries for embedded device offloading code /// and add it to the list of files to be linked. Files coming from static /// libraries are only added to the input if they are used by an existing -/// input file. Returns a list of input files intended for a single linking job. +/// input file. Returns a list of input files intended for a single linking +/// job. Expected>> getDeviceInput(const ArgList &Args) { llvm::TimeTraceScope TimeScope("ExtractDeviceCode"); @@ -1612,6 +1688,7 @@ getDeviceInput(const ArgList &Args) { bool WholeArchive = Args.hasArg(OPT_wholearchive_flag) ? true : false; SmallVector ObjectFilesToExtract; SmallVector ArchiveFilesToExtract; + SmallVector NeededSymbols; for (const opt::Arg *Arg : Args.filtered( OPT_INPUT, OPT_library, OPT_whole_archive, OPT_no_whole_archive)) { if (Arg->getOption().matches(OPT_whole_archive) || @@ -1642,6 +1719,9 @@ getDeviceInput(const ArgList &Args) { if (identify_magic(Buffer.getBuffer()) == file_magic::elf_shared_object) continue; + if (Error Err = getNeededDeviceGlobals(Buffer, NeededSymbols)) + return std::move(Err); + SmallVector Binaries; if (Error Err = extractOffloadBinaries(Buffer, Binaries)) return std::move(Err); @@ -1668,6 +1748,13 @@ getDeviceInput(const ArgList &Args) { CompatibleTargets.emplace_back(ID); for (const auto &[Index, ID] : llvm::enumerate(CompatibleTargets)) { + // Initialize the symbol table with the device globals that host needs. + // This will force them to be extracted if they are defined in a library. + if (!Syms.contains(ID)) { + for (auto &Symbol : NeededSymbols) + auto &Val = Syms[ID][Symbol] = Sym_Undefined; + } + Expected ExtractOrErr = getSymbols( Binary.getBinary()->getImage(), Binary.getBinary()->getOffloadKind(), /*IsArchive=*/false, Saver, Syms[ID]);