Skip to content

Commit 8ef37de

Browse files
RossBruntoncallumfare
authored andcommitted
[UR][Offload] Support compiling on non-CUDA
This makes the dependency on cudadrv optional and ifdefs away the cubin workaround if it isn't. This isn't sufficient to have HIP devices compile kernels, but does allow libur_adapter_offload to be built on said hosts. In addition, an unused variable error was fixed.
1 parent 49a18eb commit 8ef37de

File tree

4 files changed

+68
-32
lines changed

4 files changed

+68
-32
lines changed

source/adapters/offload/CMakeLists.txt

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ endif()
1414

1515
# For the PTX workaround we need to link with CUDA.
1616
if (NOT TARGET cudadrv)
17-
find_package(CUDA 10.1 REQUIRED)
17+
find_package(CUDA 10.1)
1818
add_library(cudadrv SHARED IMPORTED GLOBAL)
1919
set_target_properties(
2020
cudadrv PROPERTIES
@@ -49,9 +49,15 @@ target_link_libraries(${TARGET_NAME} PRIVATE
4949
${PROJECT_NAME}::common
5050
${PROJECT_NAME}::umf
5151
${UR_OFFLOAD_INSTALL_DIR}/lib/libLLVMOffload.so
52-
cudadrv
5352
)
5453

54+
if (CUDA_CUDA_LIBRARY)
55+
target_link_libraries(${TARGET_NAME}
56+
cudadrv
57+
)
58+
target_compile_definitions(${TARGET_NAME} PRIVATE UR_CUDA_ENABLED=1)
59+
endif()
60+
5561
target_include_directories(${TARGET_NAME} PRIVATE
5662
"${UR_OFFLOAD_INCLUDE_DIR}/offload"
5763
"${CMAKE_CURRENT_SOURCE_DIR}/../../"

source/adapters/offload/adapter.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ ur_result_t ur_adapter_handle_t_::init() {
4646
},
4747
this);
4848

49+
(void)Res;
50+
4951
return UR_RESULT_SUCCESS;
5052
}
5153

source/adapters/offload/program.cpp

Lines changed: 57 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,63 @@
11
#include <OffloadAPI.h>
22
#include <ur/ur.hpp>
33
#include <ur_api.h>
4-
#include <cuda.h>
54

65
#include "context.hpp"
76
#include "program.hpp"
87
#include "ur2offload.hpp"
98

9+
#ifdef UR_CUDA_ENABLED
10+
#include <cuda.h>
11+
#endif
12+
13+
namespace {
14+
// Workaround for Offload not supporting PTX binaries. Force CUDA programs
15+
// to be linked so they end up as CUBIN.
16+
#ifdef UR_CUDA_ENABLED
17+
ur_result_t ProgramCreateCudaWorkaround(ur_context_handle_t hContext,
18+
const uint8_t *Binary, size_t Length,
19+
ur_program_handle_t *phProgram) {
20+
uint8_t *RealBinary;
21+
size_t RealLength;
22+
CUlinkState State;
23+
cuLinkCreate(0, nullptr, nullptr, &State);
24+
25+
cuLinkAddData(State, CU_JIT_INPUT_PTX, (char *)(Binary), Length, nullptr, 0,
26+
nullptr, nullptr);
27+
28+
void *CuBin = nullptr;
29+
size_t CuBinSize = 0;
30+
cuLinkComplete(State, &CuBin, &CuBinSize);
31+
RealBinary = (uint8_t *)CuBin;
32+
RealLength = CuBinSize;
33+
fprintf(stderr, "Performed CUDA bin workaround (size = %lu)\n", RealLength);
34+
35+
ur_program_handle_t Program = new ur_program_handle_t_();
36+
auto Res =
37+
olCreateProgram(reinterpret_cast<ol_device_handle_t>(hContext->Device),
38+
RealBinary, RealLength, &Program->OffloadProgram);
39+
40+
// Program owns the linked module now
41+
cuLinkDestroy(State);
42+
(void)State;
43+
44+
if (Res != OL_SUCCESS) {
45+
delete Program;
46+
return offloadResultToUR(Res);
47+
}
48+
49+
*phProgram = Program;
50+
51+
return UR_RESULT_SUCCESS;
52+
}
53+
#else
54+
ur_result_t ProgramCreateCudaWorkaround(ur_context_handle_t, const uint8_t *,
55+
size_t, ur_program_handle_t *) {
56+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
57+
}
58+
#endif
59+
} // namespace
60+
1061
UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary(
1162
ur_context_handle_t hContext, uint32_t numDevices,
1263
ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries,
@@ -15,45 +66,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary(
1566
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
1667
}
1768

18-
// Workaround for Offload not supporting PTX binaries. Force CUDA programs
19-
// to be linked so they end up as CUBIN.
20-
uint8_t *RealBinary;
21-
size_t RealLength;
2269
ur_platform_handle_t DevicePlatform;
23-
bool DidLink = false;
24-
CUlinkState State;
2570
urDeviceGetInfo(phDevices[0], UR_DEVICE_INFO_PLATFORM,
2671
sizeof(ur_platform_handle_t), &DevicePlatform, nullptr);
2772
ur_platform_backend_t PlatformBackend;
2873
urPlatformGetInfo(DevicePlatform, UR_PLATFORM_INFO_BACKEND,
2974
sizeof(ur_platform_backend_t), &PlatformBackend, nullptr);
3075
if (PlatformBackend == UR_PLATFORM_BACKEND_CUDA) {
31-
cuLinkCreate(0, nullptr, nullptr, &State);
32-
33-
cuLinkAddData(State, CU_JIT_INPUT_PTX, (char *)(ppBinaries[0]), pLengths[0],
34-
nullptr, 0, nullptr, nullptr);
35-
36-
void *CuBin = nullptr;
37-
size_t CuBinSize = 0;
38-
cuLinkComplete(State, &CuBin, &CuBinSize);
39-
RealBinary = (uint8_t *)CuBin;
40-
RealLength = CuBinSize;
41-
DidLink = true;
42-
fprintf(stderr, "Performed CUDA bin workaround (size = %lu)\n", RealLength);
43-
} else {
44-
RealBinary = const_cast<uint8_t *>(ppBinaries[0]);
45-
RealLength = pLengths[0];
76+
return ProgramCreateCudaWorkaround(hContext, ppBinaries[0], pLengths[0],
77+
phProgram);
4678
}
4779

80+
auto *RealBinary = const_cast<uint8_t *>(ppBinaries[0]);
81+
4882
ur_program_handle_t Program = new ur_program_handle_t_();
4983
auto Res =
5084
olCreateProgram(reinterpret_cast<ol_device_handle_t>(hContext->Device),
51-
RealBinary, RealLength, &Program->OffloadProgram);
52-
53-
// Program owns the linked module now
54-
if (DidLink) {
55-
cuLinkDestroy(State);
56-
}
85+
RealBinary, pLengths[0], &Program->OffloadProgram);
5786

5887
if (Res != OL_SUCCESS) {
5988
delete Program;
@@ -80,7 +109,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp(ur_program_handle_t,
80109
return UR_RESULT_SUCCESS;
81110
}
82111

83-
84112
UR_APIEXPORT ur_result_t UR_APICALL
85113
urProgramRetain(ur_program_handle_t hProgram) {
86114
hProgram->RefCount++;

test/conformance/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ if(UR_DPCXX)
131131
if(UR_BUILD_ADAPTER_CUDA OR UR_BUILD_ADAPTER_ALL)
132132
list(APPEND TARGET_TRIPLES "nvptx64-nvidia-cuda")
133133
endif()
134-
if(UR_BUILD_ADAPTER_HIP OR UR_BUILD_ADAPTER_ALL)
134+
if(UR_BUILD_ADAPTER_HIP OR UR_BUILD_ADAPTER_OFFLOAD OR UR_BUILD_ADAPTER_ALL)
135135
list(APPEND TARGET_TRIPLES "amdgcn-amd-amdhsa")
136136
endif()
137137
else()

0 commit comments

Comments
 (0)