Skip to content

Commit 167a643

Browse files
committed
Merge from '"main"' to '"sycl-web"' (1 commits)
CONFLICT (content): Merge conflict in clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp CONFLICT (content): Merge conflict in clang/lib/Driver/ToolChains/Clang.cpp
2 parents 2cfa1a6 + 4c41170 commit 167a643

File tree

10 files changed

+550
-18
lines changed

10 files changed

+550
-18
lines changed

clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,10 @@ const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand(
114114
}
115115
}
116116

117+
AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "amdgcn",
118+
SubArchName,
119+
/* bitcode SDL?*/ true,
120+
/* PostClang Link? */ false);
117121
// Add an intermediate output file.
118122
CmdArgs.push_back("-o");
119123
const char *OutputFileName =

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8253,6 +8253,23 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
82538253
Triples += '-';
82548254
Triples += CurDep->getOffloadingArch();
82558255
}
8256+
8257+
// TODO: Replace parsing of -march flag. Can be done by storing GPUArch
8258+
// with each toolchain.
8259+
StringRef GPUArchName;
8260+
if (CurKind == Action::OFK_OpenMP) {
8261+
// Extract GPUArch from -march argument in TC argument list.
8262+
for (unsigned ArgIndex = 0; ArgIndex < TCArgs.size(); ArgIndex++) {
8263+
auto ArchStr = StringRef(TCArgs.getArgString(ArgIndex));
8264+
auto Arch = ArchStr.startswith_insensitive("-march=");
8265+
if (Arch) {
8266+
GPUArchName = ArchStr.substr(7);
8267+
Triples += "-";
8268+
break;
8269+
}
8270+
}
8271+
Triples += GPUArchName.str();
8272+
}
82568273
}
82578274
// If we see we are bundling for FPGA using -fintelfpga, add the
82588275
// dependency bundle
@@ -8412,6 +8429,22 @@ void OffloadBundler::ConstructJobMultipleOutputs(
84128429
Triples += '-';
84138430
Triples += Dep.DependentBoundArch;
84148431
}
8432+
// TODO: Replace parsing of -march flag. Can be done by storing GPUArch
8433+
// with each toolchain.
8434+
StringRef GPUArchName;
8435+
if (Dep.DependentOffloadKind == Action::OFK_OpenMP) {
8436+
// Extract GPUArch from -march argument in TC argument list.
8437+
for (uint ArgIndex = 0; ArgIndex < TCArgs.size(); ArgIndex++) {
8438+
StringRef ArchStr = StringRef(TCArgs.getArgString(ArgIndex));
8439+
auto Arch = ArchStr.startswith_insensitive("-march=");
8440+
if (Arch) {
8441+
GPUArchName = ArchStr.substr(7);
8442+
Triples += "-";
8443+
break;
8444+
}
8445+
}
8446+
Triples += GPUArchName.str();
8447+
}
84158448
}
84168449
if (IsFPGADepUnbundle || IsFPGADepLibUnbundle) {
84178450
// TODO - We are currently using the target triple inputs to slot a location

clang/lib/Driver/ToolChains/CommonArgs.cpp

Lines changed: 287 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "clang/Driver/Util.h"
3535
#include "clang/Driver/XRayArgs.h"
3636
#include "llvm/ADT/STLExtras.h"
37+
#include "llvm/ADT/SmallSet.h"
3738
#include "llvm/ADT/SmallString.h"
3839
#include "llvm/ADT/StringExtras.h"
3940
#include "llvm/ADT/StringSwitch.h"
@@ -1595,6 +1596,292 @@ void tools::addX86AlignBranchArgs(const Driver &D, const ArgList &Args,
15951596
}
15961597
}
15971598

1599+
/// SDLSearch: Search for Static Device Library
1600+
/// The search for SDL bitcode files is consistent with how static host
1601+
/// libraries are discovered. That is, the -l option triggers a search for
1602+
/// files in a set of directories called the LINKPATH. The host library search
1603+
/// procedure looks for a specific filename in the LINKPATH. The filename for
1604+
/// a host library is lib<libname>.a or lib<libname>.so. For SDLs, there is an
1605+
/// ordered-set of filenames that are searched. We call this ordered-set of
1606+
/// filenames as SEARCH-ORDER. Since an SDL can either be device-type specific,
1607+
/// architecture specific, or generic across all architectures, a naming
1608+
/// convention and search order is used where the file name embeds the
1609+
/// architecture name <arch-name> (nvptx or amdgcn) and the GPU device type
1610+
/// <device-name> such as sm_30 and gfx906. <device-name> is absent in case of
1611+
/// device-independent SDLs. To reduce congestion in host library directories,
1612+
/// the search first looks for files in the “libdevice” subdirectory. SDLs that
1613+
/// are bc files begin with the prefix “lib”.
1614+
///
1615+
/// Machine-code SDLs can also be managed as an archive (*.a file). The
1616+
/// convention has been to use the prefix “lib”. To avoid confusion with host
1617+
/// archive libraries, we use prefix "libbc-" for the bitcode SDL archives.
1618+
///
1619+
bool tools::SDLSearch(const Driver &D, const llvm::opt::ArgList &DriverArgs,
1620+
llvm::opt::ArgStringList &CC1Args,
1621+
SmallVector<std::string, 8> LibraryPaths, std::string Lib,
1622+
StringRef Arch, StringRef Target, bool isBitCodeSDL,
1623+
bool postClangLink) {
1624+
SmallVector<std::string, 12> SDLs;
1625+
1626+
std::string LibDeviceLoc = "/libdevice";
1627+
std::string LibBcPrefix = "/libbc-";
1628+
std::string LibPrefix = "/lib";
1629+
1630+
if (isBitCodeSDL) {
1631+
// SEARCH-ORDER for Bitcode SDLs:
1632+
// libdevice/libbc-<libname>-<arch-name>-<device-type>.a
1633+
// libbc-<libname>-<arch-name>-<device-type>.a
1634+
// libdevice/libbc-<libname>-<arch-name>.a
1635+
// libbc-<libname>-<arch-name>.a
1636+
// libdevice/libbc-<libname>.a
1637+
// libbc-<libname>.a
1638+
// libdevice/lib<libname>-<arch-name>-<device-type>.bc
1639+
// lib<libname>-<arch-name>-<device-type>.bc
1640+
// libdevice/lib<libname>-<arch-name>.bc
1641+
// lib<libname>-<arch-name>.bc
1642+
// libdevice/lib<libname>.bc
1643+
// lib<libname>.bc
1644+
1645+
for (StringRef Base : {LibBcPrefix, LibPrefix}) {
1646+
const auto *Ext = Base.contains(LibBcPrefix) ? ".a" : ".bc";
1647+
1648+
for (auto Suffix : {Twine(Lib + "-" + Arch + "-" + Target).str(),
1649+
Twine(Lib + "-" + Arch).str(), Twine(Lib).str()}) {
1650+
SDLs.push_back(Twine(LibDeviceLoc + Base + Suffix + Ext).str());
1651+
SDLs.push_back(Twine(Base + Suffix + Ext).str());
1652+
}
1653+
}
1654+
} else {
1655+
// SEARCH-ORDER for Machine-code SDLs:
1656+
// libdevice/lib<libname>-<arch-name>-<device-type>.a
1657+
// lib<libname>-<arch-name>-<device-type>.a
1658+
// libdevice/lib<libname>-<arch-name>.a
1659+
// lib<libname>-<arch-name>.a
1660+
1661+
const auto *Ext = ".a";
1662+
1663+
for (auto Suffix : {Twine(Lib + "-" + Arch + "-" + Target).str(),
1664+
Twine(Lib + "-" + Arch).str()}) {
1665+
SDLs.push_back(Twine(LibDeviceLoc + LibPrefix + Suffix + Ext).str());
1666+
SDLs.push_back(Twine(LibPrefix + Suffix + Ext).str());
1667+
}
1668+
}
1669+
1670+
// The CUDA toolchain does not use a global device llvm-link before the LLVM
1671+
// backend generates ptx. So currently, the use of bitcode SDL for nvptx is
1672+
// only possible with post-clang-cc1 linking. Clang cc1 has a feature that
1673+
// will link libraries after clang compilation while the LLVM IR is still in
1674+
// memory. This utilizes a clang cc1 option called “-mlink-builtin-bitcode”.
1675+
// This is a clang -cc1 option that is generated by the clang driver. The
1676+
// option value must a full path to an existing file.
1677+
bool FoundSDL = false;
1678+
for (auto LPath : LibraryPaths) {
1679+
for (auto SDL : SDLs) {
1680+
auto FullName = Twine(LPath + SDL).str();
1681+
if (llvm::sys::fs::exists(FullName)) {
1682+
if (postClangLink)
1683+
CC1Args.push_back("-mlink-builtin-bitcode");
1684+
CC1Args.push_back(DriverArgs.MakeArgString(FullName));
1685+
FoundSDL = true;
1686+
break;
1687+
}
1688+
}
1689+
if (FoundSDL)
1690+
break;
1691+
}
1692+
return FoundSDL;
1693+
}
1694+
1695+
/// Search if a user provided archive file lib<libname>.a exists in any of
1696+
/// the library paths. If so, add a new command to clang-offload-bundler to
1697+
/// unbundle this archive and create a temporary device specific archive. Name
1698+
/// of this SDL is passed to the llvm-link (for amdgcn) or to the
1699+
/// clang-nvlink-wrapper (for nvptx) commands by the driver.
1700+
bool tools::GetSDLFromOffloadArchive(
1701+
Compilation &C, const Driver &D, const Tool &T, const JobAction &JA,
1702+
const InputInfoList &Inputs, const llvm::opt::ArgList &DriverArgs,
1703+
llvm::opt::ArgStringList &CC1Args, SmallVector<std::string, 8> LibraryPaths,
1704+
StringRef Lib, StringRef Arch, StringRef Target, bool isBitCodeSDL,
1705+
bool postClangLink) {
1706+
1707+
// We don't support bitcode archive bundles for nvptx
1708+
if (isBitCodeSDL && Arch.contains("nvptx"))
1709+
return false;
1710+
1711+
bool FoundAOB = false;
1712+
SmallVector<std::string, 2> AOBFileNames;
1713+
std::string ArchiveOfBundles;
1714+
for (auto LPath : LibraryPaths) {
1715+
ArchiveOfBundles.clear();
1716+
1717+
AOBFileNames.push_back(Twine(LPath + "/libdevice/lib" + Lib + ".a").str());
1718+
AOBFileNames.push_back(Twine(LPath + "/lib" + Lib + ".a").str());
1719+
1720+
for (auto AOB : AOBFileNames) {
1721+
if (llvm::sys::fs::exists(AOB)) {
1722+
ArchiveOfBundles = AOB;
1723+
FoundAOB = true;
1724+
break;
1725+
}
1726+
}
1727+
1728+
if (!FoundAOB)
1729+
continue;
1730+
1731+
StringRef Prefix = isBitCodeSDL ? "libbc-" : "lib";
1732+
std::string OutputLib = D.GetTemporaryPath(
1733+
Twine(Prefix + Lib + "-" + Arch + "-" + Target).str(), "a");
1734+
1735+
C.addTempFile(C.getArgs().MakeArgString(OutputLib.c_str()));
1736+
1737+
ArgStringList CmdArgs;
1738+
SmallString<128> DeviceTriple;
1739+
DeviceTriple += Action::GetOffloadKindName(JA.getOffloadingDeviceKind());
1740+
DeviceTriple += '-';
1741+
std::string NormalizedTriple = T.getToolChain().getTriple().normalize();
1742+
DeviceTriple += NormalizedTriple;
1743+
if (!Target.empty()) {
1744+
DeviceTriple += '-';
1745+
DeviceTriple += Target;
1746+
}
1747+
1748+
std::string UnbundleArg("-unbundle");
1749+
std::string TypeArg("-type=a");
1750+
std::string InputArg("-inputs=" + ArchiveOfBundles);
1751+
std::string OffloadArg("-targets=" + std::string(DeviceTriple));
1752+
std::string OutputArg("-outputs=" + OutputLib);
1753+
1754+
const char *UBProgram = DriverArgs.MakeArgString(
1755+
T.getToolChain().GetProgramPath("clang-offload-bundler"));
1756+
1757+
ArgStringList UBArgs;
1758+
UBArgs.push_back(C.getArgs().MakeArgString(UnbundleArg.c_str()));
1759+
UBArgs.push_back(C.getArgs().MakeArgString(TypeArg.c_str()));
1760+
UBArgs.push_back(C.getArgs().MakeArgString(InputArg.c_str()));
1761+
UBArgs.push_back(C.getArgs().MakeArgString(OffloadArg.c_str()));
1762+
UBArgs.push_back(C.getArgs().MakeArgString(OutputArg.c_str()));
1763+
1764+
// Add this flag to not exit from clang-offload-bundler if no compatible
1765+
// code object is found in heterogenous archive library.
1766+
std::string AdditionalArgs("-allow-missing-bundles");
1767+
UBArgs.push_back(C.getArgs().MakeArgString(AdditionalArgs.c_str()));
1768+
1769+
C.addCommand(std::make_unique<Command>(
1770+
JA, T, ResponseFileSupport::AtFileCurCP(), UBProgram, UBArgs, Inputs,
1771+
InputInfo(&JA, C.getArgs().MakeArgString(OutputLib.c_str()))));
1772+
if (postClangLink)
1773+
CC1Args.push_back("-mlink-builtin-bitcode");
1774+
1775+
CC1Args.push_back(DriverArgs.MakeArgString(OutputLib));
1776+
break;
1777+
}
1778+
1779+
return FoundAOB;
1780+
}
1781+
1782+
// Wrapper function used by driver for adding SDLs during link phase.
1783+
void tools::AddStaticDeviceLibsLinking(Compilation &C, const Tool &T,
1784+
const JobAction &JA,
1785+
const InputInfoList &Inputs,
1786+
const llvm::opt::ArgList &DriverArgs,
1787+
llvm::opt::ArgStringList &CC1Args,
1788+
StringRef Arch, StringRef Target,
1789+
bool isBitCodeSDL, bool postClangLink) {
1790+
AddStaticDeviceLibs(&C, &T, &JA, &Inputs, C.getDriver(), DriverArgs, CC1Args,
1791+
Arch, Target, isBitCodeSDL, postClangLink);
1792+
}
1793+
1794+
// Wrapper function used for post clang linking of bitcode SDLS for nvptx by
1795+
// the CUDA toolchain.
1796+
void tools::AddStaticDeviceLibsPostLinking(const Driver &D,
1797+
const llvm::opt::ArgList &DriverArgs,
1798+
llvm::opt::ArgStringList &CC1Args,
1799+
StringRef Arch, StringRef Target,
1800+
bool isBitCodeSDL, bool postClangLink) {
1801+
AddStaticDeviceLibs(nullptr, nullptr, nullptr, nullptr, D, DriverArgs,
1802+
CC1Args, Arch, Target, isBitCodeSDL, postClangLink);
1803+
}
1804+
1805+
// User defined Static Device Libraries(SDLs) can be passed to clang for
1806+
// offloading GPU compilers. Like static host libraries, the use of a SDL is
1807+
// specified with the -l command line option. The primary difference between
1808+
// host and SDLs is the filenames for SDLs (refer SEARCH-ORDER for Bitcode SDLs
1809+
// and SEARCH-ORDER for Machine-code SDLs for the naming convention).
1810+
// SDLs are of following types:
1811+
//
1812+
// * Bitcode SDLs: They can either be a *.bc file or an archive of *.bc files.
1813+
// For NVPTX, these libraries are post-clang linked following each
1814+
// compilation. For AMDGPU, these libraries are linked one time
1815+
// during the application link phase.
1816+
//
1817+
// * Machine-code SDLs: They are archive files. For NVPTX, the archive members
1818+
// contain cubin for Nvidia GPUs and are linked one time during the
1819+
// link phase by the CUDA SDK linker called nvlink. For AMDGPU, the
1820+
// process for machine code SDLs is still in development. But they
1821+
// will be linked by the LLVM tool lld.
1822+
//
1823+
// * Bundled objects that contain both host and device codes: Bundled objects
1824+
// may also contain library code compiled from source. For NVPTX, the
1825+
// bundle contains cubin. For AMDGPU, the bundle contains bitcode.
1826+
//
1827+
// For Bitcode and Machine-code SDLs, current compiler toolchains hardcode the
1828+
// inclusion of specific SDLs such as math libraries and the OpenMP device
1829+
// library libomptarget.
1830+
void tools::AddStaticDeviceLibs(Compilation *C, const Tool *T,
1831+
const JobAction *JA,
1832+
const InputInfoList *Inputs, const Driver &D,
1833+
const llvm::opt::ArgList &DriverArgs,
1834+
llvm::opt::ArgStringList &CC1Args,
1835+
StringRef Arch, StringRef Target,
1836+
bool isBitCodeSDL, bool postClangLink) {
1837+
1838+
SmallVector<std::string, 8> LibraryPaths;
1839+
// Add search directories from LIBRARY_PATH env variable
1840+
llvm::Optional<std::string> LibPath =
1841+
llvm::sys::Process::GetEnv("LIBRARY_PATH");
1842+
if (LibPath) {
1843+
SmallVector<StringRef, 8> Frags;
1844+
const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
1845+
llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
1846+
for (StringRef Path : Frags)
1847+
LibraryPaths.emplace_back(Path.trim());
1848+
}
1849+
1850+
// Add directories from user-specified -L options
1851+
for (std::string Search_Dir : DriverArgs.getAllArgValues(options::OPT_L))
1852+
LibraryPaths.emplace_back(Search_Dir);
1853+
1854+
// Add path to lib-debug folders
1855+
SmallString<256> DefaultLibPath = llvm::sys::path::parent_path(D.Dir);
1856+
llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX);
1857+
LibraryPaths.emplace_back(DefaultLibPath.c_str());
1858+
1859+
// Build list of Static Device Libraries SDLs specified by -l option
1860+
llvm::SmallSet<std::string, 16> SDLNames;
1861+
static const StringRef HostOnlyArchives[] = {
1862+
"omp", "cudart", "m", "gcc", "gcc_s", "pthread", "hip_hcc"};
1863+
for (auto SDLName : DriverArgs.getAllArgValues(options::OPT_l)) {
1864+
if (!HostOnlyArchives->contains(SDLName)) {
1865+
SDLNames.insert(SDLName);
1866+
}
1867+
}
1868+
1869+
// The search stops as soon as an SDL file is found. The driver then provides
1870+
// the full filename of the SDL to the llvm-link or clang-nvlink-wrapper
1871+
// command. If no SDL is found after searching each LINKPATH with
1872+
// SEARCH-ORDER, it is possible that an archive file lib<libname>.a exists
1873+
// and may contain bundled object files.
1874+
for (auto SDLName : SDLNames) {
1875+
// This is the only call to SDLSearch
1876+
if (!SDLSearch(D, DriverArgs, CC1Args, LibraryPaths, SDLName, Arch, Target,
1877+
isBitCodeSDL, postClangLink)) {
1878+
GetSDLFromOffloadArchive(*C, D, *T, *JA, *Inputs, DriverArgs, CC1Args,
1879+
LibraryPaths, SDLName, Arch, Target,
1880+
isBitCodeSDL, postClangLink);
1881+
}
1882+
}
1883+
}
1884+
15981885
static llvm::opt::Arg *
15991886
getAMDGPUCodeObjectArgument(const Driver &D, const llvm::opt::ArgList &Args) {
16001887
// The last of -mcode-object-v3, -mno-code-object-v3 and

clang/lib/Driver/ToolChains/CommonArgs.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,39 @@ void AddRunTimeLibs(const ToolChain &TC, const Driver &D,
4949
llvm::opt::ArgStringList &CmdArgs,
5050
const llvm::opt::ArgList &Args);
5151

52+
void AddStaticDeviceLibsLinking(Compilation &C, const Tool &T,
53+
const JobAction &JA,
54+
const InputInfoList &Inputs,
55+
const llvm::opt::ArgList &DriverArgs,
56+
llvm::opt::ArgStringList &CmdArgs,
57+
StringRef Arch, StringRef Target,
58+
bool isBitCodeSDL, bool postClangLink);
59+
void AddStaticDeviceLibsPostLinking(const Driver &D,
60+
const llvm::opt::ArgList &DriverArgs,
61+
llvm::opt::ArgStringList &CmdArgs,
62+
StringRef Arch, StringRef Target,
63+
bool isBitCodeSDL, bool postClangLink);
64+
void AddStaticDeviceLibs(Compilation *C, const Tool *T, const JobAction *JA,
65+
const InputInfoList *Inputs, const Driver &D,
66+
const llvm::opt::ArgList &DriverArgs,
67+
llvm::opt::ArgStringList &CmdArgs, StringRef Arch,
68+
StringRef Target, bool isBitCodeSDL,
69+
bool postClangLink);
70+
71+
bool SDLSearch(const Driver &D, const llvm::opt::ArgList &DriverArgs,
72+
llvm::opt::ArgStringList &CmdArgs,
73+
SmallVector<std::string, 8> LibraryPaths, std::string Lib,
74+
StringRef Arch, StringRef Target, bool isBitCodeSDL,
75+
bool postClangLink);
76+
77+
bool GetSDLFromOffloadArchive(Compilation &C, const Driver &D, const Tool &T,
78+
const JobAction &JA, const InputInfoList &Inputs,
79+
const llvm::opt::ArgList &DriverArgs,
80+
llvm::opt::ArgStringList &CC1Args,
81+
SmallVector<std::string, 8> LibraryPaths,
82+
StringRef Lib, StringRef Arch, StringRef Target,
83+
bool isBitCodeSDL, bool postClangLink);
84+
5285
const char *SplitDebugName(const JobAction &JA, const llvm::opt::ArgList &Args,
5386
const InputInfo &Input, const InputInfo &Output);
5487

0 commit comments

Comments
 (0)