Skip to content

Commit a11d6f8

Browse files
committed
Fix Offload build on CUDA and detect correct targets in the CTS
1 parent 4e8a581 commit a11d6f8

File tree

4 files changed

+46
-31
lines changed

4 files changed

+46
-31
lines changed

source/adapters/offload/CMakeLists.txt

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ if (NOT TARGET cudadrv)
1818
add_library(cudadrv SHARED IMPORTED GLOBAL)
1919
set_target_properties(
2020
cudadrv PROPERTIES
21-
IMPORTED_LOCATION ${CUDA_CUDA_LIBRARY}
22-
INTERFACE_INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS}
21+
IMPORTED_LOCATION ${CUDA_cuda_driver_LIBRARY}
22+
INTERFACE_INCLUDE_DIRECTORIES ${CUDAToolkit_INCLUDE_DIRS}
2323
)
2424
endif()
2525

@@ -44,20 +44,22 @@ set_target_properties(${TARGET_NAME} PROPERTIES
4444
SOVERSION "${PROJECT_VERSION_MAJOR}"
4545
)
4646

47+
set(ADDITIONAL_LINK_LIBS "")
48+
if (CUDA_cuda_driver_LIBRARY)
49+
list(APPEND ADDITIONAL_LINK_LIBS
50+
cudadrv
51+
)
52+
target_compile_definitions(${TARGET_NAME} PRIVATE UR_CUDA_ENABLED)
53+
endif()
54+
4755
target_link_libraries(${TARGET_NAME} PRIVATE
4856
${PROJECT_NAME}::headers
4957
${PROJECT_NAME}::common
5058
${PROJECT_NAME}::umf
5159
${UR_OFFLOAD_INSTALL_DIR}/lib/libLLVMOffload.so
60+
${ADDITIONAL_LINK_LIBS}
5261
)
5362

54-
if (CUDA_CUDA_LIBRARY)
55-
target_link_libraries(${TARGET_NAME}
56-
cudadrv
57-
)
58-
target_compile_definitions(${TARGET_NAME} PRIVATE UR_CUDA_ENABLED=1)
59-
endif()
60-
6163
target_include_directories(${TARGET_NAME} PRIVATE
6264
"${UR_OFFLOAD_INCLUDE_DIR}/offload"
6365
"${CMAKE_CURRENT_SOURCE_DIR}/../../"

source/adapters/offload/device.cpp

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -119,15 +119,20 @@ urDevicePartition(ur_device_handle_t, const ur_device_partition_properties_t *,
119119
UR_APIEXPORT ur_result_t UR_APICALL urDeviceSelectBinary(
120120
ur_device_handle_t hDevice, const ur_device_binary_t *pBinaries,
121121
uint32_t NumBinaries, uint32_t *pSelectedBinary) {
122-
std::ignore = hDevice;
123-
std::ignore = pBinaries;
124-
std::ignore = NumBinaries;
125-
std::ignore = pSelectedBinary;
126122

127-
// TODO: Don't hard code nvptx64!
128-
const char *image_target = UR_DEVICE_BINARY_TARGET_NVPTX64;
123+
ol_platform_backend_t Backend;
124+
olGetPlatformInfo(hDevice->Platform->OffloadPlatform,
125+
OL_PLATFORM_INFO_BACKEND, sizeof(Backend), &Backend);
126+
127+
const char *ImageTarget = UR_DEVICE_BINARY_TARGET_UNKNOWN;
128+
if (Backend == OL_PLATFORM_BACKEND_CUDA) {
129+
ImageTarget = UR_DEVICE_BINARY_TARGET_NVPTX64;
130+
} else if (Backend == OL_PLATFORM_BACKEND_AMDGPU) {
131+
ImageTarget = UR_DEVICE_BINARY_TARGET_AMDGCN;
132+
}
133+
129134
for (uint32_t i = 0; i < NumBinaries; ++i) {
130-
if (strcmp(pBinaries[i].pDeviceTargetSpec, image_target) == 0) {
135+
if (strcmp(pBinaries[i].pDeviceTargetSpec, ImageTarget) == 0) {
131136
*pSelectedBinary = i;
132137
return UR_RESULT_SUCCESS;
133138
}

source/adapters/offload/program.cpp

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
#include "context.hpp"
66
#include "device.hpp"
7+
#include "platform.hpp"
78
#include "program.hpp"
89
#include "ur2offload.hpp"
910

@@ -31,15 +32,17 @@ ur_result_t ProgramCreateCudaWorkaround(ur_context_handle_t hContext,
3132
cuLinkComplete(State, &CuBin, &CuBinSize);
3233
RealBinary = (uint8_t *)CuBin;
3334
RealLength = CuBinSize;
35+
36+
#if 0
3437
fprintf(stderr, "Performed CUDA bin workaround (size = %lu)\n", RealLength);
38+
#endif
3539

3640
ur_program_handle_t Program = new ur_program_handle_t_();
3741
auto Res = olCreateProgram(hContext->Device->OffloadDevice, RealBinary,
3842
RealLength, &Program->OffloadProgram);
3943

4044
// Program owns the linked module now
4145
cuLinkDestroy(State);
42-
(void)State;
4346

4447
if (Res != OL_SUCCESS) {
4548
delete Program;
@@ -146,13 +149,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary(
146149
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
147150
}
148151

149-
ur_platform_handle_t DevicePlatform;
150-
urDeviceGetInfo(phDevices[0], UR_DEVICE_INFO_PLATFORM,
151-
sizeof(ur_platform_handle_t), &DevicePlatform, nullptr);
152-
ur_platform_backend_t PlatformBackend;
153-
urPlatformGetInfo(DevicePlatform, UR_PLATFORM_INFO_BACKEND,
154-
sizeof(ur_platform_backend_t), &PlatformBackend, nullptr);
155-
156152
auto *RealBinary = ppBinaries[0];
157153
size_t RealLength = pLengths[0];
158154

@@ -171,7 +167,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary(
171167
}
172168
}
173169

174-
if (PlatformBackend == UR_PLATFORM_BACKEND_CUDA) {
170+
ol_platform_backend_t Backend;
171+
olGetPlatformInfo(phDevices[0]->Platform->OffloadPlatform,
172+
OL_PLATFORM_INFO_BACKEND, sizeof(Backend), &Backend);
173+
if (Backend == OL_PLATFORM_BACKEND_CUDA) {
175174
return ProgramCreateCudaWorkaround(hContext, RealBinary, RealLength,
176175
phProgram);
177176
}

test/conformance/source/environment.cpp

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -216,14 +216,23 @@ std::string KernelsEnvironment::getTargetName(ur_platform_handle_t platform) {
216216
case UR_PLATFORM_BACKEND_HIP:
217217
return "amdgcn-amd-amdhsa";
218218
case UR_PLATFORM_BACKEND_OFFLOAD: {
219-
// TODO: In future this should use urDeviceSelectBinary
220-
auto result = ur_getenv("UR_OFFLOAD_TARGET_NAME");
221-
if (!result) {
222-
error = "For offload testing, please specify a target in "
223-
"`UR_OFFLOAD_TARGET_NAME`";
219+
// All Offload platforms report this backend, use the platform name to select
220+
// the actual underlying backend.
221+
std::vector<char> PlatformName;
222+
size_t PlatformNameSize = 0;
223+
urPlatformGetInfo(platform, UR_PLATFORM_INFO_NAME, 0, nullptr,
224+
&PlatformNameSize);
225+
PlatformName.resize(PlatformNameSize);
226+
urPlatformGetInfo(platform, UR_PLATFORM_INFO_NAME, PlatformNameSize,
227+
PlatformName.data(), nullptr);
228+
if (std::strcmp(PlatformName.data(), "CUDA") == 0) {
229+
return "nvptx64-nvidia-cuda";
230+
} else if (std::strcmp(PlatformName.data(), "AMDGPU") == 0) {
231+
return "amdgcn-amd-amdhsa";
232+
} else {
233+
error = "Could not detect target for Offload platform";
224234
return {};
225235
}
226-
return *result;
227236
}
228237
case UR_PLATFORM_BACKEND_NATIVE_CPU:
229238
error = "native_cpu doesn't support kernel tests yet";

0 commit comments

Comments
 (0)