Skip to content

Commit d3047ee

Browse files
authored
Merge pull request #49230 from fwyzard/from-CMSSW_16_0_X_2025-10-27-2300
Explicitly export CUDA kernels in shared libraries
2 parents 2d7d317 + d6b11f4 commit d3047ee

File tree

3 files changed

+20
-18
lines changed

3 files changed

+20
-18
lines changed

HeterogeneousTest/CUDAKernel/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ kernels that call the device functions defined in the `HeterogeneousTest/CUDADev
1414
```c++
1515
namespace cms::cudatest {
1616

17-
__global__ void kernel_add_vectors_f(...);
18-
__global__ void kernel_add_vectors_d(...);
17+
__global__ __attribute__((visibility("default"))) void kernel_add_vectors_f(...);
18+
__global__ __attribute__((visibility("default"))) void kernel_add_vectors_d(...);
1919

2020
} // namespace cms::cudatest
2121
```

HeterogeneousTest/CUDAKernel/interface/DeviceAdditionKernel.h

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,17 @@
77

88
namespace cms::cudatest {
99

10-
__global__ void kernel_add_vectors_f(const float* __restrict__ in1,
11-
const float* __restrict__ in2,
12-
float* __restrict__ out,
13-
size_t size);
10+
// Mark the kernel with default visibility to export it as a public symbol for CUDA 12.8 and later
11+
__global__ __attribute__((visibility("default"))) void kernel_add_vectors_f(const float* __restrict__ in1,
12+
const float* __restrict__ in2,
13+
float* __restrict__ out,
14+
size_t size);
1415

15-
__global__ void kernel_add_vectors_d(const double* __restrict__ in1,
16-
const double* __restrict__ in2,
17-
double* __restrict__ out,
18-
size_t size);
16+
// Mark the kernel with default visibility to export it as a public symbol for CUDA 12.8 and later
17+
__global__ __attribute__((visibility("default"))) void kernel_add_vectors_d(const double* __restrict__ in1,
18+
const double* __restrict__ in2,
19+
double* __restrict__ out,
20+
size_t size);
1921

2022
} // namespace cms::cudatest
2123

HeterogeneousTest/CUDAKernel/src/DeviceAdditionKernel.cu

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,17 @@
77

88
namespace cms::cudatest {
99

10-
__global__ void kernel_add_vectors_f(const float* __restrict__ in1,
11-
const float* __restrict__ in2,
12-
float* __restrict__ out,
13-
size_t size) {
10+
__global__ __attribute__((visibility("default"))) void kernel_add_vectors_f(const float* __restrict__ in1,
11+
const float* __restrict__ in2,
12+
float* __restrict__ out,
13+
size_t size) {
1414
add_vectors_f(in1, in2, out, size);
1515
}
1616

17-
__global__ void kernel_add_vectors_d(const double* __restrict__ in1,
18-
const double* __restrict__ in2,
19-
double* __restrict__ out,
20-
size_t size) {
17+
__global__ __attribute__((visibility("default"))) void kernel_add_vectors_d(const double* __restrict__ in1,
18+
const double* __restrict__ in2,
19+
double* __restrict__ out,
20+
size_t size) {
2121
add_vectors_d(in1, in2, out, size);
2222
}
2323

0 commit comments

Comments
 (0)