|
14 | 14 | // |
15 | 15 | //===---------------------------------------------------------------------===// |
16 | 16 |
|
| 17 | +#include "clang/Basic/Cuda.h" |
17 | 18 | #include "clang/Basic/Version.h" |
18 | 19 | #include "llvm/ADT/MapVector.h" |
19 | 20 | #include "llvm/BinaryFormat/Magic.h" |
@@ -409,6 +410,46 @@ fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles, |
409 | 410 |
|
410 | 411 | return *TempFileOrErr; |
411 | 412 | } |
| 413 | + |
| 414 | +// ptxas binary |
| 415 | +Expected<StringRef> ptxas(StringRef InputFile, const ArgList &Args, |
| 416 | + StringRef Arch) { |
| 417 | + llvm::TimeTraceScope TimeScope("NVPTX ptxas"); |
| 418 | + // NVPTX uses the ptxas program to process assembly files. |
| 419 | + Expected<std::string> PtxasPath = |
| 420 | + findProgram("ptxas", {CudaBinaryPath + "/bin"}); |
| 421 | + if (!PtxasPath) |
| 422 | + return PtxasPath.takeError(); |
| 423 | + |
| 424 | + llvm::Triple Triple( |
| 425 | + Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple())); |
| 426 | + |
| 427 | + // Create a new file to write the output to. |
| 428 | + auto TempFileOrErr = |
| 429 | + createOutputFile(sys::path::filename(ExecutableName), "cubin"); |
| 430 | + if (!TempFileOrErr) |
| 431 | + return TempFileOrErr.takeError(); |
| 432 | + |
| 433 | + SmallVector<StringRef, 16> CmdArgs; |
| 434 | + CmdArgs.push_back(*PtxasPath); |
| 435 | + CmdArgs.push_back(Triple.isArch64Bit() ? "-m64" : "-m32"); |
| 436 | + // Pass -v to ptxas if it was passed to the driver. |
| 437 | + if (Args.hasArg(OPT_verbose)) |
| 438 | + CmdArgs.push_back("-v"); |
| 439 | + StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2"); |
| 440 | + if (Args.hasArg(OPT_debug)) |
| 441 | + CmdArgs.push_back("-g"); |
| 442 | + else |
| 443 | + CmdArgs.push_back(Args.MakeArgString("-" + OptLevel)); |
| 444 | + CmdArgs.push_back("--gpu-name"); |
| 445 | + CmdArgs.push_back(Arch); |
| 446 | + CmdArgs.push_back("--output-file"); |
| 447 | + CmdArgs.push_back(*TempFileOrErr); |
| 448 | + CmdArgs.push_back(InputFile); |
| 449 | + if (Error Err = executeCommands(*PtxasPath, CmdArgs)) |
| 450 | + return std::move(Err); |
| 451 | + return *TempFileOrErr; |
| 452 | +} |
412 | 453 | } // namespace nvptx |
413 | 454 |
|
414 | 455 | namespace amdgcn { |
@@ -1240,7 +1281,8 @@ static Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles, |
1240 | 1281 | } // namespace sycl |
1241 | 1282 |
|
1242 | 1283 | namespace generic { |
1243 | | -Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) { |
| 1284 | +Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args, |
| 1285 | + bool IsSYCLKind = false) { |
1244 | 1286 | llvm::TimeTraceScope TimeScope("Clang"); |
1245 | 1287 | // Use `clang` to invoke the appropriate device tools. |
1246 | 1288 | Expected<std::string> ClangPath = |
@@ -1276,6 +1318,8 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) { |
1276 | 1318 | if (!Triple.isNVPTX()) |
1277 | 1319 | CmdArgs.push_back("-Wl,--no-undefined"); |
1278 | 1320 |
|
| 1321 | + if (IsSYCLKind && Triple.isNVPTX()) |
| 1322 | + CmdArgs.push_back("-S"); |
1279 | 1323 | for (StringRef InputFile : InputFiles) |
1280 | 1324 | CmdArgs.push_back(InputFile); |
1281 | 1325 |
|
@@ -1369,7 +1413,7 @@ Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles, |
1369 | 1413 | case Triple::ppc64: |
1370 | 1414 | case Triple::ppc64le: |
1371 | 1415 | case Triple::systemz: |
1372 | | - return generic::clang(InputFiles, Args); |
| 1416 | + return generic::clang(InputFiles, Args, IsSYCLKind); |
1373 | 1417 | case Triple::spirv32: |
1374 | 1418 | case Triple::spirv64: |
1375 | 1419 | case Triple::spir: |
@@ -2078,14 +2122,40 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles( |
2078 | 2122 | return OutputFile.takeError(); |
2079 | 2123 | WrappedOutput.push_back(*OutputFile); |
2080 | 2124 | } |
2081 | | - |
2082 | 2125 | for (size_t I = 0, E = SplitModules.size(); I != E; ++I) { |
2083 | 2126 | SmallVector<StringRef> Files = {SplitModules[I].ModuleFilePath}; |
2084 | | - auto LinkedFileFinalOrErr = |
| 2127 | + StringRef Arch = LinkerArgs.getLastArgValue(OPT_arch_EQ); |
| 2128 | + if (Arch.empty()) |
| 2129 | + Arch = "native"; |
| 2130 | + SmallVector<std::pair<StringRef, StringRef>, 4> BundlerInputFiles; |
| 2131 | + auto ClangOutputOrErr = |
2085 | 2132 | linkDevice(Files, LinkerArgs, true /* IsSYCLKind */); |
2086 | | - if (!LinkedFileFinalOrErr) |
2087 | | - return LinkedFileFinalOrErr.takeError(); |
2088 | | - SplitModules[I].ModuleFilePath = *LinkedFileFinalOrErr; |
| 2133 | + if (!ClangOutputOrErr) |
| 2134 | + return ClangOutputOrErr.takeError(); |
| 2135 | + if (Triple.isNVPTX()) { |
| 2136 | + auto VirtualArch = StringRef(clang::CudaArchToVirtualArchString( |
| 2137 | + clang::StringToCudaArch(Arch))); |
| 2138 | + auto PtxasOutputOrErr = |
| 2139 | + nvptx::ptxas(*ClangOutputOrErr, LinkerArgs, Arch); |
| 2140 | + if (!PtxasOutputOrErr) |
| 2141 | + return PtxasOutputOrErr.takeError(); |
| 2142 | + BundlerInputFiles.emplace_back(*ClangOutputOrErr, VirtualArch); |
| 2143 | + BundlerInputFiles.emplace_back(*PtxasOutputOrErr, Arch); |
| 2144 | + auto BundledFileOrErr = |
| 2145 | + nvptx::fatbinary(BundlerInputFiles, LinkerArgs); |
| 2146 | + if (!BundledFileOrErr) |
| 2147 | + return BundledFileOrErr.takeError(); |
| 2148 | + SplitModules[I].ModuleFilePath = *BundledFileOrErr; |
| 2149 | + } else if (Triple.isAMDGCN()) { |
| 2150 | + BundlerInputFiles.emplace_back(*ClangOutputOrErr, Arch); |
| 2151 | + auto BundledFileOrErr = |
| 2152 | + amdgcn::fatbinary(BundlerInputFiles, LinkerArgs); |
| 2153 | + if (!BundledFileOrErr) |
| 2154 | + return BundledFileOrErr.takeError(); |
| 2155 | + SplitModules[I].ModuleFilePath = *BundledFileOrErr; |
| 2156 | + } else { |
| 2157 | + SplitModules[I].ModuleFilePath = *ClangOutputOrErr; |
| 2158 | + } |
2089 | 2159 | } |
2090 | 2160 | // TODO(NOM7): Remove this call and use community flow for bundle/wrap |
2091 | 2161 | auto OutputFile = sycl::runWrapperAndCompile(SplitModules, LinkerArgs); |
|
0 commit comments