Skip to content

Commit d6b11f4

Browse files
committed
Explicitly export CUDA kernels in shared libraries
Mark CUDA kernels in shared libraries as having default visibility, so they can be used by other executables, libraries or plugins. The default linkage of CUDA kernels changed from public to private in CUDA 13.0.
1 parent a7e8f69 commit d6b11f4

File tree

3 files changed

+20
-18
lines changed

3 files changed

+20
-18
lines changed

HeterogeneousTest/CUDAKernel/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ kernels that call the device functions defined in the `HeterogeneousTest/CUDADev
1414
```c++
1515
namespace cms::cudatest {
1616

17-
__global__ void kernel_add_vectors_f(...);
18-
__global__ void kernel_add_vectors_d(...);
17+
__global__ __attribute__((visibility("default"))) void kernel_add_vectors_f(...);
18+
__global__ __attribute__((visibility("default"))) void kernel_add_vectors_d(...);
1919

2020
} // namespace cms::cudatest
2121
```

HeterogeneousTest/CUDAKernel/interface/DeviceAdditionKernel.h

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,17 @@
77

88
namespace cms::cudatest {
99

10-
__global__ void kernel_add_vectors_f(const float* __restrict__ in1,
11-
const float* __restrict__ in2,
12-
float* __restrict__ out,
13-
size_t size);
10+
// Mark the kernel with default visibility to export it as a public symbol for CUDA 12.8 and later
11+
__global__ __attribute__((visibility("default"))) void kernel_add_vectors_f(const float* __restrict__ in1,
12+
const float* __restrict__ in2,
13+
float* __restrict__ out,
14+
size_t size);
1415

15-
__global__ void kernel_add_vectors_d(const double* __restrict__ in1,
16-
const double* __restrict__ in2,
17-
double* __restrict__ out,
18-
size_t size);
16+
// Mark the kernel with default visibility to export it as a public symbol for CUDA 12.8 and later
17+
__global__ __attribute__((visibility("default"))) void kernel_add_vectors_d(const double* __restrict__ in1,
18+
const double* __restrict__ in2,
19+
double* __restrict__ out,
20+
size_t size);
1921

2022
} // namespace cms::cudatest
2123

HeterogeneousTest/CUDAKernel/src/DeviceAdditionKernel.cu

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,17 @@
77

88
namespace cms::cudatest {
99

10-
__global__ void kernel_add_vectors_f(const float* __restrict__ in1,
11-
const float* __restrict__ in2,
12-
float* __restrict__ out,
13-
size_t size) {
10+
__global__ __attribute__((visibility("default"))) void kernel_add_vectors_f(const float* __restrict__ in1,
11+
const float* __restrict__ in2,
12+
float* __restrict__ out,
13+
size_t size) {
1414
add_vectors_f(in1, in2, out, size);
1515
}
1616

17-
__global__ void kernel_add_vectors_d(const double* __restrict__ in1,
18-
const double* __restrict__ in2,
19-
double* __restrict__ out,
20-
size_t size) {
17+
__global__ __attribute__((visibility("default"))) void kernel_add_vectors_d(const double* __restrict__ in1,
18+
const double* __restrict__ in2,
19+
double* __restrict__ out,
20+
size_t size) {
2121
add_vectors_d(in1, in2, out, size);
2222
}
2323

0 commit comments

Comments
 (0)