Skip to content

Commit 738c067

Browse files
authored
Add hipSYCL scope_handle and host_task (#122)
* [cublas] Add hipSYCL scope_handle and host_task
1 parent b296922 commit 738c067

File tree

11 files changed

+253
-26
lines changed

11 files changed

+253
-26
lines changed

include/oneapi/mkl/types.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#define _ONEMKL_TYPES_HPP_
2222

2323
#include "oneapi/mkl/bfloat16.hpp"
24+
#include <CL/sycl.hpp>
2425

2526
namespace oneapi {
2627
namespace mkl {
@@ -107,4 +108,10 @@ enum class order : char {
107108
} //namespace mkl
108109
} //namespace oneapi
109110

111+
// Workaround for supporting ::half for hipSYCL
112+
// TODO: This should be removed after the interface is SYCL2020 conformant
113+
#ifdef __HIPSYCL__
114+
using ::cl::sycl::half;
115+
#endif
116+
110117
#endif //_ONEMKL_TYPES_HPP_

src/blas/backends/cublas/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ set(SOURCES cublas_level1.cpp
2525
cublas_level3.cpp
2626
cublas_batch.cpp
2727
cublas_extensions.cpp
28-
cublas_scope_handle.cpp
28+
$<$<STREQUAL:${ONEMKL_SYCL_IMPLEMENTATION},dpc++>:cublas_scope_handle.cpp >
29+
$<$<STREQUAL:${ONEMKL_SYCL_IMPLEMENTATION},hipsycl>:cublas_scope_handle_hipsycl.cpp >
2930
$<$<BOOL:${BUILD_SHARED_LIBS}>: cublas_wrappers.cpp>)
3031
add_library(${LIB_NAME})
3132
add_library(${LIB_OBJ} OBJECT ${SOURCES})
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/***************************************************************************
2+
* Copyright (C) Codeplay Software Limited
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* For your convenience, a copy of the License has been included in this
10+
* repository.
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*
18+
**************************************************************************/
19+
#ifndef CUBLAS_HANDLE_HPP
20+
#define CUBLAS_HANDLE_HPP
21+
#include<atomic>
22+
#include<unordered_map>
23+
24+
namespace oneapi {
25+
namespace mkl {
26+
namespace blas {
27+
namespace cublas {
28+
29+
template<typename T>
30+
struct cublas_handle {
31+
using handle_container_t = std::unordered_map<T, std::atomic<cublasHandle_t> *>;
32+
handle_container_t cublas_handle_mapper_{};
33+
~cublas_handle() noexcept(false){
34+
for (auto &handle_pair : cublas_handle_mapper_) {
35+
cublasStatus_t err;
36+
if (handle_pair.second != nullptr) {
37+
auto handle = handle_pair.second->exchange(nullptr);
38+
if (handle != nullptr) {
39+
CUBLAS_ERROR_FUNC(cublasDestroy, err, handle);
40+
handle = nullptr;
41+
}
42+
delete handle_pair.second;
43+
handle_pair.second = nullptr;
44+
}
45+
}
46+
cublas_handle_mapper_.clear();
47+
}
48+
};
49+
50+
51+
} // namespace cublas
52+
} // namespace blas
53+
} // namespace mkl
54+
} // namespace oneapi
55+
56+
#endif // CUBLAS_HANDLE_HPP

src/blas/backends/cublas/cublas_scope_handle.cpp

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,29 +24,14 @@ namespace mkl {
2424
namespace blas {
2525
namespace cublas {
2626

27-
cublas_handle::~cublas_handle() noexcept(false) {
28-
for (auto &handle_pair : cublas_handle_mapper_) {
29-
cublasStatus_t err;
30-
if (handle_pair.second != nullptr) {
31-
auto handle = handle_pair.second->exchange(nullptr);
32-
if (handle != nullptr) {
33-
CUBLAS_ERROR_FUNC(cublasDestroy, err, handle);
34-
handle = nullptr;
35-
}
36-
delete handle_pair.second;
37-
handle_pair.second = nullptr;
38-
}
39-
}
40-
cublas_handle_mapper_.clear();
41-
}
4227
/**
4328
* Inserts a new element in the map if its key is unique. This new element
4429
* is constructed in place using args as the arguments for the construction
4530
* of a value_type (which is an object of a pair type). The insertion only
4631
* takes place if no other element in the container has a key equivalent to
4732
* the one being emplaced (keys in a map container are unique).
4833
*/
49-
thread_local cublas_handle CublasScopedContextHandler::handle_helper = cublas_handle{};
34+
thread_local cublas_handle<pi_context> CublasScopedContextHandler::handle_helper = cublas_handle<pi_context>{};
5035

5136
CublasScopedContextHandler::CublasScopedContextHandler(cl::sycl::queue queue,
5237
cl::sycl::interop_handler &ih)

src/blas/backends/cublas/cublas_scope_handle.hpp

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,13 @@
2727
#include <thread>
2828
#include <unordered_map>
2929
#include "cublas_helper.hpp"
30+
#include "cublas_handle.hpp"
31+
3032
namespace oneapi {
3133
namespace mkl {
3234
namespace blas {
3335
namespace cublas {
3436

35-
struct cublas_handle {
36-
using handle_container_t = std::unordered_map<pi_context, std::atomic<cublasHandle_t> *>;
37-
handle_container_t cublas_handle_mapper_{};
38-
~cublas_handle() noexcept(false);
39-
};
40-
4137
/**
4238
* @brief NVIDIA advise for handle creation:
4339
https://devtalk.nvidia.com/default/topic/838794/gpu-accelerated libraries/using-cublas-in-different-cuda-streams/
@@ -69,7 +65,7 @@ class CublasScopedContextHandler {
6965
cl::sycl::context placedContext_;
7066
bool needToRecover_;
7167
cl::sycl::interop_handler &ih;
72-
static thread_local cublas_handle handle_helper;
68+
static thread_local cublas_handle<pi_context> handle_helper;
7369
CUstream get_stream(const cl::sycl::queue &queue);
7470
cl::sycl::context get_context(const cl::sycl::queue &queue);
7571

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
/***************************************************************************
2+
* Copyright (C) Codeplay Software Limited
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* For your convenience, a copy of the License has been included in this
10+
* repository.
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*
18+
**************************************************************************/
19+
#include "cublas_scope_handle_hipsycl.hpp"
20+
#include "cublas_handle.hpp"
21+
22+
namespace oneapi {
23+
namespace mkl {
24+
namespace blas {
25+
namespace cublas {
26+
27+
thread_local cublas_handle<int> CublasScopedContextHandler::handle_helper = cublas_handle<int>{};
28+
29+
CublasScopedContextHandler::CublasScopedContextHandler(cl::sycl::queue queue,
30+
cl::sycl::interop_handle &ih)
31+
: interop_h(ih) {}
32+
33+
cublasHandle_t CublasScopedContextHandler::get_handle(const cl::sycl::queue &queue) {
34+
cl::sycl::device device = queue.get_device();
35+
int current_device = interop_h.get_native_device<cl::sycl::backend::cuda>();
36+
CUstream streamId = get_stream(queue);
37+
cublasStatus_t err;
38+
auto it = handle_helper.cublas_handle_mapper_.find(current_device);
39+
if (it != handle_helper.cublas_handle_mapper_.end()) {
40+
if (it->second == nullptr) {
41+
handle_helper.cublas_handle_mapper_.erase(it);
42+
}
43+
else {
44+
auto handle = it->second->load();
45+
if (handle != nullptr) {
46+
cudaStream_t currentStreamId;
47+
CUBLAS_ERROR_FUNC(cublasGetStream, err, handle, &currentStreamId);
48+
if (currentStreamId != streamId) {
49+
CUBLAS_ERROR_FUNC(cublasSetStream, err, handle, streamId);
50+
}
51+
return handle;
52+
}
53+
else {
54+
handle_helper.cublas_handle_mapper_.erase(it);
55+
}
56+
}
57+
}
58+
cublasHandle_t handle;
59+
60+
CUBLAS_ERROR_FUNC(cublasCreate, err, &handle);
61+
CUBLAS_ERROR_FUNC(cublasSetStream, err, handle, streamId);
62+
63+
auto insert_iter = handle_helper.cublas_handle_mapper_.insert(
64+
std::make_pair(current_device, new std::atomic<cublasHandle_t>(handle)));
65+
return handle;
66+
}
67+
68+
CUstream CublasScopedContextHandler::get_stream(const cl::sycl::queue &queue) {
69+
return interop_h.get_native_queue<cl::sycl::backend::cuda>();
70+
}
71+
72+
} // namespace cublas
73+
} // namespace blas
74+
} // namespace mkl
75+
} // namespace oneapi
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/***************************************************************************
2+
* Copyright (C) Codeplay Software Limited
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* For your convenience, a copy of the License has been included in this
10+
* repository.
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*
18+
**************************************************************************/
19+
#ifndef CUBLAS_SCOPED_HANDLE_HIPSYCL_HPP
20+
#define CUBLAS_SCOPED_HANDLE_HIPSYCL_HPP
21+
#include <CL/sycl.hpp>
22+
#include <memory>
23+
#include <thread>
24+
#include <unordered_map>
25+
#include "cublas_helper.hpp"
26+
#include "cublas_handle.hpp"
27+
namespace oneapi {
28+
namespace mkl {
29+
namespace blas {
30+
namespace cublas {
31+
32+
/**
33+
* @brief NVIDIA advise for handle creation:
34+
https://devtalk.nvidia.com/default/topic/838794/gpu-accelerated libraries/using-cublas-in-different-cuda-streams/
35+
According to NVIDIA:
36+
1) It is required that different handles to be used for different devices:
37+
http://docs.nvidia.com/cuda/cublas/index.html#cublas-context
38+
2) It is recommended (but not required, if care is taken) that different handles be used for different host threads:
39+
http://docs.nvidia.com/cuda/cublas/index.html#thread-safety2changeme
40+
3) It is neither required nor recommended that different handles be used for different streams on the same device,
41+
using the same host thread.
42+
However, the 3 above advises are for using cuda runtime API. The NVIDIA runtime API creates a default context for users.
43+
The createHandle function in cuBLAS uses the context located on top of the stack for each thread. Then, the cuBLAS routine
44+
uses this context for resource allocation/access. Calling a cuBLAS function with a handle created for context A and
45+
memories/queue created for context B results in a segmentation fault. Thus we need to create one handle per context
46+
and per thread. A context can have multiple streams, so the important thing here is to have one cublasHandle per driver
47+
context and that cuBLAS handle can switch between multiple streams created for that context. Here, we are dealing with
48+
CUDA driver API, therefore, the SYCL-CUDA backend controls the context. If a queue(equivalent of CUDA stream) is associated
49+
with a context different from the one on top of the thread stack(can be any context which associated at any time by either
50+
the runtime or user for any specific reason), the context associated with the queue must be moved on top of the stack
51+
temporarily for the requested routine operations. However, after the cuBLAS routine execution, the original context must
52+
be restored to prevent intervening with the original user/runtime execution set up. Here, the RAII type context switch
53+
is used to guarantee to recover the original CUDA context. The cuBLAS handle allocates internal resources, therefore,
54+
the handle must be destroyed when the context goes out of scope. This will bind the life of cuBLAS handle to the SYCL context.
55+
**/
56+
57+
class CublasScopedContextHandler {
58+
cl::sycl::interop_handle interop_h;
59+
static thread_local cublas_handle<int> handle_helper;
60+
cl::sycl::context get_context(const cl::sycl::queue &queue);
61+
CUstream get_stream(const cl::sycl::queue &queue);
62+
63+
public:
64+
CublasScopedContextHandler(cl::sycl::queue queue, cl::sycl::interop_handle &ih);
65+
66+
cublasHandle_t get_handle(const cl::sycl::queue &queue);
67+
68+
// This is a work-around function for reinterpret_casting the memory. This
69+
// will be fixed when SYCL-2020 has been implemented for Pi backend.
70+
template <typename T, typename U>
71+
inline T get_mem(U acc) {
72+
return reinterpret_cast<T>(interop_h.get_native_mem<cl::sycl::backend::cuda>(acc));
73+
}
74+
};
75+
76+
} // namespace cublas
77+
} // namespace blas
78+
} // namespace mkl
79+
} // namespace oneapi
80+
#endif //CUBLAS_SCOPED_HANDLE_HIPSYCL_HPP

src/blas/backends/cublas/cublas_task.hpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,37 @@
55
#include <complex>
66
#include <CL/sycl.hpp>
77
#include "oneapi/mkl/types.hpp"
8+
#ifndef __HIPSYCL__
89
#include "cublas_scope_handle.hpp"
910
#include <CL/sycl/detail/pi.hpp>
10-
11+
#else
12+
#include "cublas_scope_handle_hipsycl.hpp"
13+
namespace cl::sycl {
14+
using interop_handler = cl::sycl::interop_handle;
15+
}
16+
#endif
1117
namespace oneapi {
1218
namespace mkl {
1319
namespace blas {
1420
namespace cublas {
1521

22+
#ifdef __HIPSYCL__
23+
template <typename H, typename F>
24+
static inline void host_task_internal(H &cgh, cl::sycl::queue queue, F f) {
25+
cgh.hipSYCL_enqueue_custom_operation([f, queue](cl::sycl::interop_handle ih) {
26+
auto sc = CublasScopedContextHandler(queue, ih);
27+
f(sc);
28+
});
29+
}
30+
#else
1631
template <typename H, typename F>
1732
static inline void host_task_internal(H &cgh, cl::sycl::queue queue, F f) {
1833
cgh.interop_task([f, queue](cl::sycl::interop_handler ih) {
1934
auto sc = CublasScopedContextHandler(queue, ih);
2035
f(sc);
2136
});
2237
}
23-
38+
#endif
2439
template <typename H, typename F>
2540
static inline void onemkl_cublas_host_task(H &cgh, cl::sycl::queue queue, F f) {
2641
(void)host_task_internal(cgh, queue, f);

src/blas/backends/mklcpu/mklcpu_common.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,9 @@ static inline auto host_task_internal(H &cgh, F f, int) -> decltype(cgh.run_on_h
4747

4848
template <typename K, typename H, typename F>
4949
static inline void host_task_internal(H &cgh, F f, long) {
50+
#ifndef __SYCL_DEVICE_ONLY__
5051
cgh.template single_task<K>(f);
52+
#endif
5153
}
5254

5355
template <typename K, typename H, typename F>

tests/unit_tests/include/test_helper.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,4 +170,10 @@ static inline void free_shared(void *p, cl::sycl::context ctx) {
170170
} // namespace mkl
171171
} // namespace oneapi
172172

173+
// Workaround for supporting ::half for hipSYCL
174+
// TODO: This should be removed after the interface is SYCL2020 conformant
175+
#ifdef __HIPSYCL__
176+
using ::cl::sycl::half;
177+
#endif
178+
173179
#endif // _TEST_HELPER_HPP_

0 commit comments

Comments
 (0)