Skip to content

Commit 4ce85c6

Browse files
committed
Merge remote-tracking branch 'origin/sycl' into private/asachkov/detach-unittests-from-ur
2 parents 150ee47 + 95604ae commit 4ce85c6

File tree

127 files changed

+3846
-2527
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

127 files changed

+3846
-2527
lines changed

.github/CODEOWNERS

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,19 +34,18 @@ sycl/doc/design/ @intel/llvm-reviewers-runtime
3434
sycl/doc/design/spirv-extensions/ @intel/dpcpp-spirv-doc-reviewers
3535
sycl/doc/extensions/ @intel/dpcpp-specification-reviewers
3636

37-
# Unified Runtime
38-
sycl/plugins @intel/unified-runtime-reviewers
37+
# Unified Runtime
38+
sycl/cmake/modules/FetchUnifiedRuntime.cmake @intel/unified-runtime-reviewers
39+
sycl/include/sycl/detail/ur.hpp @intel/unified-runtime-reviewers
40+
sycl/source/detail/posix_ur.cpp @intel/unified-runtime-reviewers
41+
sycl/source/detail/ur.cpp @intel/unified-runtime-reviewers
42+
sycl/source/detail/windows_ur.cpp @intel/unified-runtime-reviewers
3943
sycl/test-e2e/Plugin/ @intel/unified-runtime-reviewers
4044

4145
# Win Proxy Loader
4246
sycl/pi_win_proxy_loader @intel/llvm-reviewers-runtime
43-
sycl/plugins/common_win_pi_trace @intel/llvm-reviewers-runtime
4447
sycl/test-e2e/Plugin/dll-detach-order.cpp @intel/llvm-reviewers-runtime
4548

46-
# CUDA and HIP plugins
47-
sycl/plugins/**/cuda/ @intel/llvm-reviewers-cuda
48-
sycl/plugins/**/hip/ @intel/llvm-reviewers-cuda
49-
5049
# CUDA specific runtime implementations
5150
sycl/include/sycl/ext/oneapi/experimental/cuda/ @intel/llvm-reviewers-cuda
5251

@@ -149,7 +148,6 @@ sycl/include/syclcompat.hpp @intel/syclcompat-lib-reviewers
149148
sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc @intel/bindless-images-reviewers
150149
sycl/include/sycl/ext/oneapi/bindless* @intel/bindless-images-reviewers
151150
sycl/source/detail/bindless* @intel/bindless-images-reviewers
152-
sycl/plugins/unified_runtime/ur/adapters/**/image.* @intel/bindless-images-reviewers
153151
sycl/test/check_device_code/extensions/bindless_images.cpp @intel/bindless-images-reviewers
154152
sycl/test-e2e/bindless_images/ @intel/bindless-images-reviewers
155153

clang/lib/CodeGen/Targets/AMDGPU.cpp

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -317,12 +317,6 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
317317
bool shouldEmitStaticExternCAliases() const override;
318318
bool shouldEmitDWARFBitFieldSeparators() const override;
319319
void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
320-
321-
private:
322-
// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
323-
// resulting MDNode to the amdgcn.annotations MDNode.
324-
static void addAMDGCNMetadata(llvm::GlobalValue *GV, StringRef Name,
325-
int Operand);
326320
};
327321
}
328322

@@ -404,33 +398,6 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
404398
}
405399
}
406400

407-
/// Helper function for AMDGCN and NVVM targets, adds a NamedMDNode with GV,
408-
/// Name, and Operand as operands, and adds the resulting MDNode to the
409-
/// AnnotationName MDNode.
410-
static void addAMDGCOrNVVMMetadata(const char *AnnotationName,
411-
llvm::GlobalValue *GV, StringRef Name,
412-
int Operand) {
413-
llvm::Module *M = GV->getParent();
414-
llvm::LLVMContext &Ctx = M->getContext();
415-
416-
// Get annotations metadata node.
417-
llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata(AnnotationName);
418-
419-
llvm::Metadata *MDVals[] = {
420-
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
421-
llvm::ConstantAsMetadata::get(
422-
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
423-
// Append metadata to annotations node.
424-
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
425-
}
426-
427-
428-
void AMDGPUTargetCodeGenInfo::addAMDGCNMetadata(llvm::GlobalValue *GV,
429-
StringRef Name, int Operand) {
430-
addAMDGCOrNVVMMetadata("amdgcn.annotations", GV, Name, Operand);
431-
}
432-
433-
434401
/// Emits control constants used to change per-architecture behaviour in the
435402
/// AMDGPU ROCm device libraries.
436403
void AMDGPUTargetCodeGenInfo::emitTargetGlobals(
@@ -483,12 +450,6 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
483450
if (FD)
484451
setFunctionDeclAttributes(FD, F, M);
485452

486-
// Create !{<func-ref>, metadata !"kernel", i32 1} node for SYCL kernels.
487-
const bool IsSYCLKernel =
488-
FD && M.getLangOpts().SYCLIsDevice && FD->hasAttr<SYCLKernelAttr>();
489-
if (IsSYCLKernel)
490-
addAMDGCNMetadata(F, "kernel", 1);
491-
492453
if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics())
493454
F->addFnAttr("amdgpu-unsafe-fp-atomics", "true");
494455

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11066,7 +11066,8 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
1106611066
ArgStringList CmdArgs;
1106711067

1106811068
// Pass the CUDA path to the linker wrapper tool.
11069-
for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP}) {
11069+
for (Action::OffloadKind Kind :
11070+
{Action::OFK_Cuda, Action::OFK_OpenMP, Action::OFK_SYCL}) {
1107011071
auto TCRange = C.getOffloadToolChains(Kind);
1107111072
for (auto &I : llvm::make_range(TCRange.first, TCRange.second)) {
1107211073
const ToolChain *TC = I.second;

clang/test/CodeGenSYCL/kernel-annotation.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,12 @@ class Functor {
2323
};
2424

2525
// CHECK-SPIR-NOT: annotations =
26+
// CHECK-AMDGCN-NOT: annotations =
2627

2728
// CHECK-NVPTX: nvvm.annotations = !{[[FIRST:![0-9]]], [[SECOND:![0-9]]]}
2829
// CHECK-NVPTX: [[FIRST]] = !{ptr @_ZTS7Functor, !"kernel", i32 1}
2930
// CHECK-NVPTX: [[SECOND]] = !{ptr @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E5foo_2, !"kernel", i32 1}
3031

31-
// CHECK-AMDGCN: amdgcn.annotations = !{[[FIRST:![0-9]]], [[SECOND:![0-9]]]}
32-
// CHECK-AMDGCN: [[FIRST]] = !{ptr @_ZTS7Functor, !"kernel", i32 1}
33-
// CHECK-AMDGCN: [[SECOND]] = !{ptr @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E5foo_2, !"kernel", i32 1}
34-
3532
int main() {
3633
sycl::queue q;
3734
q.submit([&](sycl::handler &cgh) {

clang/test/Driver/linker-wrapper-sycl-win.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,11 @@
9090
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}llvm-link.exe" -only-needed [[FIRSTLLVMLINKOUT]].bc {{.*}}.bc -o [[SECONDLLVMLINKOUT:.*]].bc --suppress-warnings
9191
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}sycl-post-link.exe"{{.*}} SYCL_POST_LINK_OPTIONS -o [[SYCLPOSTLINKOUT:.*]].table [[SECONDLLVMLINKOUT]].bc
9292
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}clang.exe"{{.*}} -o [[CLANGOUT:.*]] --target=nvptx64-nvidia-cuda -march={{.*}}
93-
// CHK-CMDS-AOT-NV-NEXT: offload-wrapper: input: {{.*}}, output: [[WRAPPEROUT:.*]].bc
93+
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}ptxas"{{.*}} --output-file [[PTXASOUT:.*]] [[CLANGOUT]]
94+
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}fatbinary"{{.*}} --create [[FATBINOUT:.*]] --image=profile={{.*}},file=[[CLANGOUT]] --image=profile={{.*}},file=[[PTXASOUT]]
95+
// CHK-CMDS-AOT-NV-NEXT: offload-wrapper: input: [[FATBINOUT]], output: [[WRAPPEROUT:.*]].bc
9496
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}llc.exe" -filetype=obj -o [[LLCOUT:.*]].o [[WRAPPEROUT]].bc
9597
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT]].o HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o
96-
9798
/// Check for list of commands for standalone clang-linker-wrapper run for sycl (AOT for AMD)
9899
// -------
99100
// Generate .o file as linker wrapper input.
@@ -107,6 +108,7 @@
107108
// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}llvm-link.exe" [[FIRSTLLVMLINKIN:.*]].bc -o [[FIRSTLLVMLINKOUT:.*]].bc --suppress-warnings
108109
// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}sycl-post-link.exe"{{.*}} SYCL_POST_LINK_OPTIONS -o [[SYCLPOSTLINKOUT:.*]].table [[FIRSTLLVMLINKOUT]].bc
109110
// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}clang.exe"{{.*}} -o [[CLANGOUT:.*]] --target=amdgcn-amd-amdhsa -mcpu={{.*}}
110-
// CHK-CMDS-AOT-AMD-NEXT: offload-wrapper: input: {{.*}}, output: [[WRAPPEROUT:.*]].bc
111+
// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}clang-offload-bundler.exe"{{.*}} -input=[[CLANGOUT]] -output=[[BUNDLEROUT:.*]]
112+
// CHK-CMDS-AOT-AMD-NEXT: offload-wrapper: input: [[BUNDLEROUT]], output: [[WRAPPEROUT:.*]].bc
111113
// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}llc.exe" -filetype=obj -o [[LLCOUT:.*]].o [[WRAPPEROUT]].bc
112114
// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT]].o HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o

clang/test/Driver/linker-wrapper-sycl.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,9 @@
108108
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}llvm-link" -only-needed [[FIRSTLLVMLINKOUT]].bc {{.*}}.bc -o [[SECONDLLVMLINKOUT:.*]].bc --suppress-warnings
109109
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}sycl-post-link"{{.*}} SYCL_POST_LINK_OPTIONS -o [[SYCLPOSTLINKOUT:.*]].table [[SECONDLLVMLINKOUT]].bc
110110
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}clang"{{.*}} -o [[CLANGOUT:.*]] --target=nvptx64-nvidia-cuda -march={{.*}}
111-
// CHK-CMDS-AOT-NV-NEXT: offload-wrapper: input: [[WRAPPERIN:.*]], output: [[WRAPPEROUT:.*]]
111+
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}ptxas"{{.*}} --output-file [[PTXASOUT:.*]] [[CLANGOUT]]
112+
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}fatbinary"{{.*}} --create [[FATBINOUT:.*]] --image=profile={{.*}},file=[[CLANGOUT]] --image=profile={{.*}},file=[[PTXASOUT]]
113+
// CHK-CMDS-AOT-NV-NEXT: offload-wrapper: input: [[FATBINOUT]], output: [[WRAPPEROUT:.*]]
112114
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}llc" -filetype=obj -o [[LLCOUT:.*]] [[WRAPPEROUT]]
113115
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT]] HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o
114116

@@ -125,7 +127,8 @@
125127
// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}llvm-link" [[FIRSTLLVMLINKIN]].bc -o [[FIRSTLLVMLINKOUT:.*]].bc --suppress-warnings
126128
// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}sycl-post-link"{{.*}} SYCL_POST_LINK_OPTIONS -o [[SYCLPOSTLINKOUT:.*]].table [[FIRSTLLVMLINKOUT]].bc
127129
// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}clang"{{.*}} -o [[CLANGOUT:.*]] --target=amdgcn-amd-amdhsa -mcpu={{.*}}
128-
// CHK-CMDS-AOT-AMD-NEXT: offload-wrapper: input: [[WRAPPERIN:.*]], output: [[WRAPPEROUT:.*]]
130+
// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}clang-offload-bundler"{{.*}} -targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa--gfx803 -input=/dev/null -input=[[CLANGOUT]] -output=[[BUNDLEROUT:.*]]
131+
// CHK-CMDS-AOT-AMD-NEXT: offload-wrapper: input: [[BUNDLEROUT]], output: [[WRAPPEROUT:.*]]
129132
// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}llc" -filetype=obj -o [[LLCOUT:.*]] [[WRAPPEROUT]]
130133
// CHK-CMDS-AOT-AMD-NEXT: "{{.*}}ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT]] HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o
131134

@@ -150,7 +153,9 @@
150153
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: offload-wrapper: input: {{.*}}.bc, output: [[WRAPPEROUT1:.*]]
151154
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: "{{.*}}llc" -filetype=obj -o [[LLCOUT1:.*]] [[WRAPPEROUT1]]
152155
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: "{{.*}}clang"{{.*}} -o [[CLANGOUT:.*]] --target=nvptx64-nvidia-cuda -march={{.*}}
153-
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: offload-wrapper: input: [[WRAPPERIN:.*]], output: [[WRAPPEROUT:.*]]
156+
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: "{{.*}}ptxas"{{.*}} --output-file [[PTXASOUT:.*]] [[CLANGOUT]]
157+
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: "{{.*}}fatbinary"{{.*}} --create [[FATBINOUT:.*]] --image=profile={{.*}},file=[[CLANGOUT]] --image=profile={{.*}},file=[[PTXASOUT]]
158+
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: offload-wrapper: input: [[FATBINOUT]], output: [[WRAPPEROUT:.*]]
154159
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: "{{.*}}llc" -filetype=obj -o [[LLCOUT2:.*]] [[WRAPPEROUT]]
155160
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: "{{.*}}ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT1]] [[LLCOUT2]] HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o
156161

@@ -169,6 +174,7 @@
169174
// CHK-CMDS-AOT-AMD-EMBED-IR-NEXT: offload-wrapper: input: {{.*}}.bc, output: [[WRAPPEROUT1:.*]]
170175
// CHK-CMDS-AOT-AMD-EMBED-IR-NEXT: "{{.*}}llc" -filetype=obj -o [[LLCOUT1:.*]] [[WRAPPEROUT1]]
171176
// CHK-CMDS-AOT-AMD-EMBED-IR-NEXT: "{{.*}}clang"{{.*}} -o [[CLANGOUT:.*]] --target=amdgcn-amd-amdhsa -mcpu={{.*}}
172-
// CHK-CMDS-AOT-AMD-EMBED-IR-NEXT: offload-wrapper: input: [[WRAPPERIN:.*]], output: [[WRAPPEROUT2:.*]]
177+
// CHK-CMDS-AOT-AMD-EMBED-IR-NEXT: "{{.*}}clang-offload-bundler"{{.*}} -input=[[CLANGOUT]] -output=[[BUNDLEROUT:.*]]
178+
// CHK-CMDS-AOT-AMD-EMBED-IR-NEXT: offload-wrapper: input: [[BUNDLEROUT]], output: [[WRAPPEROUT2:.*]]
173179
// CHK-CMDS-AOT-AMD-EMBED-IR-NEXT: "{{.*}}llc" -filetype=obj -o [[LLCOUT2:.*]] [[WRAPPEROUT2]]
174180
// CHK-CMDS-AOT-AMD-EMBED-IR-NEXT: "{{.*}}ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT1]] [[LLCOUT2]] HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o

clang/test/Driver/sycl-offload-new-driver.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,3 +188,10 @@
188188
// RUN: -Xsycl-target-backend=spir64_gen "-device pvc,bdw" %s 2>&1 \
189189
// RUN: | FileCheck -check-prefix COMMA_FILE %s
190190
// COMMA_FILE: clang-offload-packager{{.*}} "--image=file={{.*}}pvc@bdw{{.*}},triple=spir64_gen-unknown-unknown,arch=pvc,bdw,kind=sycl"
191+
192+
/// Verify that --cuda-path is passed to clang-linker-wrapper for SYCL offload
193+
// RUN: %clangxx -fsycl -### -fsycl-targets=nvptx64-nvidia-cuda \
194+
// RUN: --cuda-gpu-arch=sm_20 --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s \
195+
// RUN: --offload-new-driver 2>&1 \
196+
// RUN: | FileCheck -check-prefix NVPTX_CUDA_PATH %s
197+
// NVPTX_CUDA_PATH: clang-linker-wrapper{{.*}} "--cuda-path={{.*}}Inputs/CUDA_80/usr/local/cuda"

clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

Lines changed: 77 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
//
1515
//===---------------------------------------------------------------------===//
1616

17+
#include "clang/Basic/Cuda.h"
1718
#include "clang/Basic/Version.h"
1819
#include "llvm/ADT/MapVector.h"
1920
#include "llvm/BinaryFormat/Magic.h"
@@ -409,6 +410,46 @@ fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
409410

410411
return *TempFileOrErr;
411412
}
413+
414+
// ptxas binary
415+
Expected<StringRef> ptxas(StringRef InputFile, const ArgList &Args,
416+
StringRef Arch) {
417+
llvm::TimeTraceScope TimeScope("NVPTX ptxas");
418+
// NVPTX uses the ptxas program to process assembly files.
419+
Expected<std::string> PtxasPath =
420+
findProgram("ptxas", {CudaBinaryPath + "/bin"});
421+
if (!PtxasPath)
422+
return PtxasPath.takeError();
423+
424+
llvm::Triple Triple(
425+
Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple()));
426+
427+
// Create a new file to write the output to.
428+
auto TempFileOrErr =
429+
createOutputFile(sys::path::filename(ExecutableName), "cubin");
430+
if (!TempFileOrErr)
431+
return TempFileOrErr.takeError();
432+
433+
SmallVector<StringRef, 16> CmdArgs;
434+
CmdArgs.push_back(*PtxasPath);
435+
CmdArgs.push_back(Triple.isArch64Bit() ? "-m64" : "-m32");
436+
// Pass -v to ptxas if it was passed to the driver.
437+
if (Args.hasArg(OPT_verbose))
438+
CmdArgs.push_back("-v");
439+
StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2");
440+
if (Args.hasArg(OPT_debug))
441+
CmdArgs.push_back("-g");
442+
else
443+
CmdArgs.push_back(Args.MakeArgString("-" + OptLevel));
444+
CmdArgs.push_back("--gpu-name");
445+
CmdArgs.push_back(Arch);
446+
CmdArgs.push_back("--output-file");
447+
CmdArgs.push_back(*TempFileOrErr);
448+
CmdArgs.push_back(InputFile);
449+
if (Error Err = executeCommands(*PtxasPath, CmdArgs))
450+
return std::move(Err);
451+
return *TempFileOrErr;
452+
}
412453
} // namespace nvptx
413454

414455
namespace amdgcn {
@@ -1240,7 +1281,8 @@ static Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles,
12401281
} // namespace sycl
12411282

12421283
namespace generic {
1243-
Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
1284+
Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args,
1285+
bool IsSYCLKind = false) {
12441286
llvm::TimeTraceScope TimeScope("Clang");
12451287
// Use `clang` to invoke the appropriate device tools.
12461288
Expected<std::string> ClangPath =
@@ -1276,6 +1318,8 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
12761318
if (!Triple.isNVPTX())
12771319
CmdArgs.push_back("-Wl,--no-undefined");
12781320

1321+
if (IsSYCLKind && Triple.isNVPTX())
1322+
CmdArgs.push_back("-S");
12791323
for (StringRef InputFile : InputFiles)
12801324
CmdArgs.push_back(InputFile);
12811325

@@ -1369,7 +1413,7 @@ Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles,
13691413
case Triple::ppc64:
13701414
case Triple::ppc64le:
13711415
case Triple::systemz:
1372-
return generic::clang(InputFiles, Args);
1416+
return generic::clang(InputFiles, Args, IsSYCLKind);
13731417
case Triple::spirv32:
13741418
case Triple::spirv64:
13751419
case Triple::spir:
@@ -2078,14 +2122,40 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
20782122
return OutputFile.takeError();
20792123
WrappedOutput.push_back(*OutputFile);
20802124
}
2081-
20822125
for (size_t I = 0, E = SplitModules.size(); I != E; ++I) {
20832126
SmallVector<StringRef> Files = {SplitModules[I].ModuleFilePath};
2084-
auto LinkedFileFinalOrErr =
2127+
StringRef Arch = LinkerArgs.getLastArgValue(OPT_arch_EQ);
2128+
if (Arch.empty())
2129+
Arch = "native";
2130+
SmallVector<std::pair<StringRef, StringRef>, 4> BundlerInputFiles;
2131+
auto ClangOutputOrErr =
20852132
linkDevice(Files, LinkerArgs, true /* IsSYCLKind */);
2086-
if (!LinkedFileFinalOrErr)
2087-
return LinkedFileFinalOrErr.takeError();
2088-
SplitModules[I].ModuleFilePath = *LinkedFileFinalOrErr;
2133+
if (!ClangOutputOrErr)
2134+
return ClangOutputOrErr.takeError();
2135+
if (Triple.isNVPTX()) {
2136+
auto VirtualArch = StringRef(clang::CudaArchToVirtualArchString(
2137+
clang::StringToCudaArch(Arch)));
2138+
auto PtxasOutputOrErr =
2139+
nvptx::ptxas(*ClangOutputOrErr, LinkerArgs, Arch);
2140+
if (!PtxasOutputOrErr)
2141+
return PtxasOutputOrErr.takeError();
2142+
BundlerInputFiles.emplace_back(*ClangOutputOrErr, VirtualArch);
2143+
BundlerInputFiles.emplace_back(*PtxasOutputOrErr, Arch);
2144+
auto BundledFileOrErr =
2145+
nvptx::fatbinary(BundlerInputFiles, LinkerArgs);
2146+
if (!BundledFileOrErr)
2147+
return BundledFileOrErr.takeError();
2148+
SplitModules[I].ModuleFilePath = *BundledFileOrErr;
2149+
} else if (Triple.isAMDGCN()) {
2150+
BundlerInputFiles.emplace_back(*ClangOutputOrErr, Arch);
2151+
auto BundledFileOrErr =
2152+
amdgcn::fatbinary(BundlerInputFiles, LinkerArgs);
2153+
if (!BundledFileOrErr)
2154+
return BundledFileOrErr.takeError();
2155+
SplitModules[I].ModuleFilePath = *BundledFileOrErr;
2156+
} else {
2157+
SplitModules[I].ModuleFilePath = *ClangOutputOrErr;
2158+
}
20892159
}
20902160
// TODO(NOM7): Remove this call and use community flow for bundle/wrap
20912161
auto OutputFile = sycl::runWrapperAndCompile(SplitModules, LinkerArgs);

devops/cts_exclude_filter_L0_GPU

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# These two take too much time
22
kernel_bundle
33
marray
4-
# No issue created yet
4+
# Fix: https://github.com/intel/llvm/pull/14622
55
optional_kernel_features
6+
# https://github.com/intel/llvm/issues/14819
7+
queue
8+
spec_constants

devops/cts_exclude_filter_OCL_CPU

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,6 @@ marray
55
math_builtin_api
66
# https://github.com/intel/llvm/issues/13574
77
hierarchical
8+
# https://github.com/intel/llvm/issues/14819
9+
queue
10+
spec_constants

0 commit comments

Comments
 (0)