Skip to content

Commit 377769e

Browse files
authored
Merge pull request #51 from SeisSol/davschneller/cuda13
Make compile for CUDA 13
2 parents 6d39fb2 + c686147 commit 377769e

File tree

5 files changed

+45
-10
lines changed

5 files changed

+45
-10
lines changed

interfaces/cuda/Control.cu

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,9 @@ std::string ConcreteAPI::getDeviceInfoAsText(int deviceId) {
125125
info << "memPitch: " << property.memPitch << '\n';
126126
info << "maxThreadsPerBlock: " << property.maxThreadsPerBlock << '\n';
127127
info << "totalConstMem: " << property.totalConstMem << '\n';
128-
info << "clockRate: " << property.clockRate << '\n';
129128
info << "multiProcessorCount: " << property.multiProcessorCount << '\n';
130129
info << "integrated: " << property.integrated << '\n';
131130
info << "canMapHostMemory: " << property.canMapHostMemory << '\n';
132-
info << "computeMode: " << property.computeMode << '\n';
133131
info << "concurrentKernels: " << property.concurrentKernels << '\n';
134132
info << "pciBusID: " << property.pciBusID << '\n';
135133
info << "pciDeviceID: " << property.pciDeviceID << '\n';

interfaces/cuda/Copy.cu

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,28 @@ void ConcreteAPI::prefetchUnifiedMemTo(Destination type, const void *devPtr, siz
7171
void *streamPtr) {
7272
isFlagSet<InterfaceInitialized>(status);
7373
cudaStream_t stream = (streamPtr == nullptr) ? 0 : (static_cast<cudaStream_t>(streamPtr));
74+
75+
cudaMemLocation location{};
76+
if (type == Destination::Host) {
77+
location.id = cudaCpuDeviceId;
78+
#if CUDART_VERSION >= 13000
79+
location.type = cudaMemLocationTypeHost;
80+
#endif
81+
}
82+
else if (allowedConcurrentManagedAccess) {
83+
location.id = currentDeviceId;
84+
#if CUDART_VERSION >= 13000
85+
location.type = cudaMemLocationTypeDevice;
86+
#endif
87+
}
88+
7489
cudaMemPrefetchAsync(devPtr,
7590
count,
76-
type == Destination::CurrentDevice ? currentDeviceId : cudaCpuDeviceId,
91+
#if CUDART_VERSION >= 13000
92+
location, 0,
93+
#else
94+
location.id,
95+
#endif
7796
stream);
7897
CHECK_ERR;
7998
}

interfaces/cuda/Memory.cu

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,14 +96,30 @@ void *ConcreteAPI::allocUnifiedMem(size_t size, bool compress, Destination hint)
9696
void *devPtr;
9797
cudaMallocManaged(&devPtr, size, cudaMemAttachGlobal);
9898
CHECK_ERR;
99+
100+
cudaMemLocation location{};
99101
if (hint == Destination::Host) {
100-
cudaMemAdvise(devPtr, size, cudaMemAdviseSetPreferredLocation, cudaCpuDeviceId);
101-
CHECK_ERR;
102+
location.id = cudaCpuDeviceId;
103+
#if CUDART_VERSION >= 13000
104+
location.type = cudaMemLocationTypeHost;
105+
#endif
102106
}
103107
else if (allowedConcurrentManagedAccess) {
104-
cudaMemAdvise(devPtr, size, cudaMemAdviseSetPreferredLocation, currentDeviceId);
105-
CHECK_ERR;
108+
location.id = currentDeviceId;
109+
#if CUDART_VERSION >= 13000
110+
location.type = cudaMemLocationTypeDevice;
111+
#endif
106112
}
113+
114+
cudaMemAdvise(devPtr, size, cudaMemAdviseSetPreferredLocation,
115+
#if CUDART_VERSION >= 13000
116+
location
117+
#else
118+
location.id
119+
#endif
120+
);
121+
CHECK_ERR;
122+
107123
statistics.allocatedMemBytes += size;
108124
statistics.allocatedUnifiedMemBytes += size;
109125
memToSizeMap[devPtr] = size;

interfaces/hip/Control.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,11 +134,9 @@ std::string ConcreteAPI::getDeviceInfoAsText(int deviceId) {
134134
info << "memPitch: " << property.memPitch << '\n';
135135
info << "maxThreadsPerBlock: " << property.maxThreadsPerBlock << '\n';
136136
info << "totalConstMem: " << property.totalConstMem << '\n';
137-
info << "clockRate: " << property.clockRate << '\n';
138137
info << "multiProcessorCount: " << property.multiProcessorCount << '\n';
139138
info << "integrated: " << property.integrated << '\n';
140139
info << "canMapHostMemory: " << property.canMapHostMemory << '\n';
141-
info << "computeMode: " << property.computeMode << '\n';
142140
info << "concurrentKernels: " << property.concurrentKernels << '\n';
143141
info << "pciBusID: " << property.pciBusID << '\n';
144142
info << "pciDeviceID: " << property.pciDeviceID << '\n';

sycl.cmake

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,11 @@ if ((${DEVICE_BACKEND} STREQUAL "acpp") OR (${DEVICE_BACKEND} STREQUAL "hipsycl"
5151
find_package(AdaptiveCpp REQUIRED)
5252
find_package(OpenMP REQUIRED)
5353
target_compile_options(device PRIVATE -Wno-unknown-cuda-version)
54-
target_link_libraries(device PUBLIC ${OpenMP_CXX_FLAGS})
54+
55+
# we need to link both to OpenMP_CXX and the OpenMP_CXX_FLAGS (otherwise some symbols won't be found)
56+
target_link_libraries(device PRIVATE OpenMP::OpenMP_CXX)
57+
target_link_libraries(device PRIVATE ${OpenMP_CXX_FLAGS})
58+
5559
add_sycl_to_target(TARGET device SOURCES ${DEVICE_SOURCE_FILES})
5660
else()
5761
find_package(DpcppFlags REQUIRED)

0 commit comments

Comments
 (0)