Skip to content

Commit 802f3cc

Browse files
authored
[SYCL] Don't add itt libdevice into linking list for NV,AMD,NativeCPU backend (#19603)
ITT device library is used by sycl-instrument-device-code for Intel Vtune support, they are not required by NV, AMD and NativeCPU backend. This PR removes ITT device library files into device linking list for these backend. After doing this, we don't need to add special check for NativeCPU backend in driver code to filter all libraries other than "native_cpu-utils". --------- Signed-off-by: jinge90 <[email protected]>
1 parent b752163 commit 802f3cc

File tree

8 files changed

+180
-175
lines changed

8 files changed

+180
-175
lines changed

clang/lib/Driver/Driver.cpp

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5903,13 +5903,6 @@ class OffloadingActionBuilder final {
59035903
SmallString<128> LibName(LLCandidate);
59045904
llvm::sys::path::append(LibName, DeviceLib);
59055905
if (llvm::sys::fs::exists(LibName)) {
5906-
// NativeCPU currently only needs libsycl-nativecpu_utils and
5907-
// libclc, so temporarily skip other device libs in invocation.
5908-
// Todo: remove once NativeCPU tests the other libraries.
5909-
if (isNativeCPU &&
5910-
!LibName.str().contains("libsycl-nativecpu_utils"))
5911-
continue;
5912-
59135906
++NumOfDeviceLibLinked;
59145907
Arg *InputArg = MakeInputArg(Args, C.getDriver().getOpts(),
59155908
Args.MakeArgString(LibName));
@@ -5934,7 +5927,7 @@ class OffloadingActionBuilder final {
59345927
}
59355928
}
59365929

5937-
if (!NumOfDeviceLibLinked)
5930+
if (!NumOfDeviceLibLinked && !TC->getTriple().isNVPTX())
59385931
return false;
59395932

59405933
// For NVPTX we need to also link libclc at the same stage that we link

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,10 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
434434

435435
// For NVPTX and AMDGCN we only use one single bitcode library and ignore
436436
// manually specified SYCL device libraries.
437-
bool IgnoreSingleLibs = TargetTriple.isNVPTX() || TargetTriple.isAMDGCN();
437+
// For NativeCPU, only native_utils devicelib is used.
438+
bool UseSingleLib = TargetTriple.isNVPTX() || TargetTriple.isAMDGCN() ||
439+
TargetTriple.isNativeCPU();
440+
bool IgnoreSingleLib = false;
438441

439442
struct DeviceLibOptInfo {
440443
StringRef DeviceLibName;
@@ -474,8 +477,10 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
474477

475478
// Make sure that internal libraries are still linked against
476479
// when -fno-sycl-device-lib contains "all" and single libraries
477-
// should be ignored.
478-
IgnoreSingleLibs = IgnoreSingleLibs && !ExcludeDeviceLibs;
480+
// should be ignored. For NativeCPU, the native_cpu utils library
481+
// is always linked without '-only-needed' flag.
482+
IgnoreSingleLib =
483+
UseSingleLib && ExcludeDeviceLibs && !TargetTriple.isNativeCPU();
479484

480485
for (const auto &K : DeviceLibLinkInfo.keys())
481486
DeviceLibLinkInfo[K] = (K == "internal") || !ExcludeDeviceLibs;
@@ -490,21 +495,24 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
490495
<< A->getSpelling() << Val;
491496
}
492497
DeviceLibLinkInfo[Val] = !ExcludeDeviceLibs;
493-
PrintUnusedExcludeWarning = IgnoreSingleLibs && ExcludeDeviceLibs;
498+
PrintUnusedExcludeWarning = UseSingleLib && ExcludeDeviceLibs;
494499
}
495500
if (PrintUnusedExcludeWarning)
496501
C.getDriver().Diag(diag::warn_drv_unused_argument) << A->getSpelling();
497502
}
498503
}
499504

500-
if (TargetTriple.isNVPTX() && IgnoreSingleLibs)
505+
if (TargetTriple.isNVPTX() && !IgnoreSingleLib)
501506
LibraryList.push_back(
502507
Args.MakeArgString("devicelib-nvptx64-nvidia-cuda.bc"));
503508

504-
if (TargetTriple.isAMDGCN() && IgnoreSingleLibs)
509+
if (TargetTriple.isAMDGCN() && !IgnoreSingleLib)
505510
LibraryList.push_back(Args.MakeArgString("devicelib-amdgcn-amd-amdhsa.bc"));
506511

507-
if (IgnoreSingleLibs)
512+
if (TargetTriple.isNativeCPU() && !IgnoreSingleLib)
513+
LibraryList.push_back(Args.MakeArgString("libsycl-nativecpu_utils.bc"));
514+
515+
if (UseSingleLib)
508516
return LibraryList;
509517

510518
using SYCLDeviceLibsList = SmallVector<DeviceLibOptInfo, 5>;

clang/test/Driver/sycl-device-lib-amdgcn.cpp

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,6 @@
44

55
// UNSUPPORTED: system-windows
66

7-
// Check if internal libraries are still linked against when linkage of all
8-
// device libs is manually excluded.
9-
// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl -fno-sycl-device-lib=all --sysroot=%S/Inputs/SYCL \
10-
// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 -fsycl-instrument-device-code %s 2>&1 \
11-
// RUN: | FileCheck -check-prefix=CHK-NO-DEVLIB %s
12-
13-
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib-amdgcn-amd-amdhsa.bc", ir, (device-sycl, gfx906)
14-
// CHK-NO-DEVLIB: [[LIB1:[0-9]+]]: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, gfx906)
15-
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib-amdgcn-amd-amdhsa.bc", ir, (device-sycl, gfx906)
16-
// CHK-NO-DEVLIB: [[LIB2:[0-9]+]]: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, gfx906)
17-
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib-amdgcn-amd-amdhsa.bc", ir, (device-sycl, gfx906)
18-
// CHK-NO-DEVLIB: [[LIB3:[0-9]+]]: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, gfx906)
19-
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib-amdgcn-amd-amdhsa.bc", ir, (device-sycl, gfx906)
20-
// CHK-NO-DEVLIB: {{[0-9]+}}: linker, {{{.*}}[[LIB1]], [[LIB2]], [[LIB3]]{{.*}}}, ir, (device-sycl, gfx906)
21-
227
// Check that the -fsycl-device-lib flag has no effect when "all" is specified.
238
// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl -fsycl-device-lib=all --sysroot=%S/Inputs/SYCL \
249
// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \

clang/test/Driver/sycl-device-lib-nvptx.cpp

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,6 @@
44

55
// UNSUPPORTED: system-windows
66

7-
// Check if internal libraries are still linked against when linkage of all
8-
// device libs is manually excluded.
9-
// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl -fno-sycl-device-lib=all --sysroot=%S/Inputs/SYCL \
10-
// RUN: -fsycl-targets=nvptx64-nvidia-cuda -fsycl-instrument-device-code %s 2>&1 \
11-
// RUN: | FileCheck -check-prefix=CHK-NO-DEVLIB %s
12-
13-
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib-nvptx64-nvidia-cuda.bc", ir, (device-sycl, sm_50)
14-
// CHK-NO-DEVLIB: [[LIB1:[0-9]+]]: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, sm_50)
15-
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib-nvptx64-nvidia-cuda.bc", ir, (device-sycl, sm_50)
16-
// CHK-NO-DEVLIB: [[LIB2:[0-9]+]]: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, sm_50)
17-
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib-nvptx64-nvidia-cuda.bc", ir, (device-sycl, sm_50)
18-
// CHK-NO-DEVLIB: [[LIB3:[0-9]+]]: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, sm_50)
19-
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib-nvptx64-nvidia-cuda.bc", ir, (device-sycl, sm_50)
20-
// CHK-NO-DEVLIB: {{[0-9]+}}: linker, {{{.*}}[[LIB1]], [[LIB2]], [[LIB3]]{{.*}}}, ir, (device-sycl, sm_50)
21-
227
// Check that the -fsycl-device-lib flag has no effect when "all" is specified.
238
// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl -fsycl-device-lib=all --sysroot=%S/Inputs/SYCL \
249
// RUN: -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \

clang/test/Driver/sycl-offload-nvptx.cpp

Lines changed: 28 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
/// Check phases w/out specifying a compute capability.
3838
// RUN: %clangxx -ccc-print-phases --sysroot=%S/Inputs/SYCL -std=c++11 \
3939
// RUN: -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all \
40-
// RUN: -fsycl-instrument-device-code -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \
40+
// RUN: -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \
4141
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/share/clc/remangled-l32-signed_char.libspirv-nvptx64-nvidia-cuda.bc \
4242
// RUN: --cuda-path=%S/Inputs/CUDA_111/usr/local/cuda \
4343
// RUN: | FileCheck -check-prefix=CHK-PHASES-NO-CC %s
@@ -54,27 +54,24 @@
5454
// CHK-PHASES-NO-CC: 7: backend, {6}, assembler, (host-sycl)
5555
// CHK-PHASES-NO-CC: 8: assembler, {7}, object, (host-sycl)
5656
// CHK-PHASES-NO-CC: 9: linker, {4}, ir, (device-sycl, sm_50)
57-
// CHK-PHASES-NO-CC: 10: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, sm_50)
58-
// CHK-PHASES-NO-CC: 11: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, sm_50)
59-
// CHK-PHASES-NO-CC: 12: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, sm_50)
60-
// CHK-PHASES-NO-CC: 13: input, "{{.*}}libspirv-nvptx64{{.*}}", ir, (device-sycl, sm_50)
61-
// CHK-PHASES-NO-CC: 14: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_50)
62-
// CHK-PHASES-NO-CC: 15: linker, {9, 10, 11, 12, 13, 14}, ir, (device-sycl, sm_50)
63-
// CHK-PHASES-NO-CC: 16: sycl-post-link, {15}, ir, (device-sycl, sm_50)
64-
// CHK-PHASES-NO-CC: 17: file-table-tform, {16}, ir, (device-sycl, sm_50)
65-
// CHK-PHASES-NO-CC: 18: backend, {17}, assembler, (device-sycl, sm_50)
66-
// CHK-PHASES-NO-CC: 19: assembler, {18}, object, (device-sycl, sm_50)
67-
// CHK-PHASES-NO-CC: 20: linker, {18, 19}, cuda-fatbin, (device-sycl, sm_50)
68-
// CHK-PHASES-NO-CC: 21: foreach, {17, 20}, cuda-fatbin, (device-sycl, sm_50)
69-
// CHK-PHASES-NO-CC: 22: file-table-tform, {16, 21}, tempfiletable, (device-sycl, sm_50)
70-
// CHK-PHASES-NO-CC: 23: clang-offload-wrapper, {22}, object, (device-sycl, sm_50)
71-
// CHK-PHASES-NO-CC: 24: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {23}, object
72-
// CHK-PHASES-NO-CC: 25: linker, {8, 24}, image, (host-sycl)
57+
// CHK-PHASES-NO-CC: 10: input, "{{.*}}libspirv-nvptx64{{.*}}", ir, (device-sycl, sm_50)
58+
// CHK-PHASES-NO-CC: 11: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_50)
59+
// CHK-PHASES-NO-CC: 12: linker, {9, 10, 11}, ir, (device-sycl, sm_50)
60+
// CHK-PHASES-NO-CC: 13: sycl-post-link, {12}, ir, (device-sycl, sm_50)
61+
// CHK-PHASES-NO-CC: 14: file-table-tform, {13}, ir, (device-sycl, sm_50)
62+
// CHK-PHASES-NO-CC: 15: backend, {14}, assembler, (device-sycl, sm_50)
63+
// CHK-PHASES-NO-CC: 16: assembler, {15}, object, (device-sycl, sm_50)
64+
// CHK-PHASES-NO-CC: 17: linker, {15, 16}, cuda-fatbin, (device-sycl, sm_50)
65+
// CHK-PHASES-NO-CC: 18: foreach, {14, 17}, cuda-fatbin, (device-sycl, sm_50)
66+
// CHK-PHASES-NO-CC: 19: file-table-tform, {13, 18}, tempfiletable, (device-sycl, sm_50)
67+
// CHK-PHASES-NO-CC: 20: clang-offload-wrapper, {19}, object, (device-sycl, sm_50)
68+
// CHK-PHASES-NO-CC: 21: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {20}, object
69+
// CHK-PHASES-NO-CC: 22: linker, {8, 21}, image, (host-sycl)
7370
//
7471
/// Check phases specifying a compute capability.
7572
// RUN: %clangxx -ccc-print-phases --sysroot=%S/Inputs/SYCL -std=c++11 \
7673
// RUN: -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all \
77-
// RUN: -fsycl-instrument-device-code -fsycl-targets=nvptx64-nvidia-cuda \
74+
// RUN: -fsycl-targets=nvptx64-nvidia-cuda \
7875
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/share/clc/remangled-l32-signed_char.libspirv-nvptx64-nvidia-cuda.bc \
7976
// RUN: --cuda-path=%S/Inputs/CUDA_111/usr/local/cuda \
8077
// RUN: -Xsycl-target-backend "--cuda-gpu-arch=sm_35" %s 2>&1 \
@@ -92,22 +89,19 @@
9289
// CHK-PHASES: 7: backend, {6}, assembler, (host-sycl)
9390
// CHK-PHASES: 8: assembler, {7}, object, (host-sycl)
9491
// CHK-PHASES: 9: linker, {4}, ir, (device-sycl, sm_35)
95-
// CHK-PHASES: 10: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, sm_35)
96-
// CHK-PHASES: 11: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, sm_35)
97-
// CHK-PHASES: 12: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, sm_35)
98-
// CHK-PHASES: 13: input, "{{.*}}libspirv-nvptx64{{.*}}", ir, (device-sycl, sm_35)
99-
// CHK-PHASES: 14: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_35)
100-
// CHK-PHASES: 15: linker, {9, 10, 11, 12, 13, 14}, ir, (device-sycl, sm_35)
101-
// CHK-PHASES: 16: sycl-post-link, {15}, ir, (device-sycl, sm_35)
102-
// CHK-PHASES: 17: file-table-tform, {16}, ir, (device-sycl, sm_35)
103-
// CHK-PHASES: 18: backend, {17}, assembler, (device-sycl, sm_35)
104-
// CHK-PHASES: 19: assembler, {18}, object, (device-sycl, sm_35)
105-
// CHK-PHASES: 20: linker, {18, 19}, cuda-fatbin, (device-sycl, sm_35)
106-
// CHK-PHASES: 21: foreach, {17, 20}, cuda-fatbin, (device-sycl, sm_35)
107-
// CHK-PHASES: 22: file-table-tform, {16, 21}, tempfiletable, (device-sycl, sm_35)
108-
// CHK-PHASES: 23: clang-offload-wrapper, {22}, object, (device-sycl, sm_35)
109-
// CHK-PHASES: 24: offload, "device-sycl (nvptx64-nvidia-cuda:sm_35)" {23}, object
110-
// CHK-PHASES: 25: linker, {8, 24}, image, (host-sycl)
92+
// CHK-PHASES: 10: input, "{{.*}}libspirv-nvptx64{{.*}}", ir, (device-sycl, sm_35)
93+
// CHK-PHASES: 11: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_35)
94+
// CHK-PHASES: 12: linker, {9, 10, 11}, ir, (device-sycl, sm_35)
95+
// CHK-PHASES: 13: sycl-post-link, {12}, ir, (device-sycl, sm_35)
96+
// CHK-PHASES: 14: file-table-tform, {13}, ir, (device-sycl, sm_35)
97+
// CHK-PHASES: 15: backend, {14}, assembler, (device-sycl, sm_35)
98+
// CHK-PHASES: 16: assembler, {15}, object, (device-sycl, sm_35)
99+
// CHK-PHASES: 17: linker, {15, 16}, cuda-fatbin, (device-sycl, sm_35)
100+
// CHK-PHASES: 18: foreach, {14, 17}, cuda-fatbin, (device-sycl, sm_35)
101+
// CHK-PHASES: 19: file-table-tform, {13, 18}, tempfiletable, (device-sycl, sm_35)
102+
// CHK-PHASES: 20: clang-offload-wrapper, {19}, object, (device-sycl, sm_35)
103+
// CHK-PHASES: 21: offload, "device-sycl (nvptx64-nvidia-cuda:sm_35)" {20}, object
104+
// CHK-PHASES: 22: linker, {8, 21}, image, (host-sycl)
111105

112106
/// Check calling preprocessor only
113107
// RUN: %clangxx -E -fsycl -fsycl-targets=nvptx64-nvidia-cuda -ccc-print-phases %s 2>&1 \

0 commit comments

Comments
 (0)