Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/oneapi/mkl/rng/engines.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ namespace rng {
//
// Supported parallelization methods:
// skip_ahead
using namespace cl;
class philox4x32x10 {
public:
static constexpr std::uint64_t default_seed = 0;
Expand Down
24 changes: 21 additions & 3 deletions src/blas/backends/cublas/cublas_batch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,14 @@ inline void gemm_batch(Func func, cl::sycl::queue &queue, transpose transa, tran
auto a_acc = a.template get_access<cl::sycl::access::mode::read>(cgh);
auto b_acc = b.template get_access<cl::sycl::access::mode::read>(cgh);
auto c_acc = c.template get_access<cl::sycl::access::mode::read_write>(cgh);
#ifdef __HIPSYCL__
cgh.hipSYCL_enqueue_custom_operation([=](cl::sycl::interop_handle ih) {
auto sc = CublasScopedContextHandler(queue,ih);
#else
cgh.interop_task([=](cl::sycl::interop_handler ih) {
auto sc = CublasScopedContextHandler(queue);
auto sc = CublasScopedContextHandler(queue);
#endif

auto handle = sc.get_handle(queue);
auto a_ = sc.get_mem<cuDataType *>(ih, a_acc);
auto b_ = sc.get_mem<cuDataType *>(ih, b_acc);
Expand Down Expand Up @@ -122,8 +128,14 @@ inline cl::sycl::event gemm_batch(Func func, cl::sycl::queue &queue, transpose t
for (int64_t i = 0; i < num_events; i++) {
cgh.depends_on(dependencies[i]);
}
#ifdef __HIPSYCL__
cgh.hipSYCL_enqueue_custom_operation([=](cl::sycl::interop_handle ih) {
auto sc = CublasScopedContextHandler(queue,ih);
#else
cgh.interop_task([=](cl::sycl::interop_handler ih) {
auto sc = CublasScopedContextHandler(queue);
auto sc = CublasScopedContextHandler(queue);
#endif

auto handle = sc.get_handle(queue);
auto a_ = reinterpret_cast<const cuDataType *>(a);
auto b_ = reinterpret_cast<const cuDataType *>(b);
Expand Down Expand Up @@ -170,8 +182,14 @@ inline cl::sycl::event gemm_batch(Func func, cl::sycl::queue &queue, transpose *
for (int64_t i = 0; i < num_events; i++) {
cgh.depends_on(dependencies[i]);
}
#ifdef __HIPSYCL__
cgh.hipSYCL_enqueue_custom_operation([=](cl::sycl::interop_handle ih) {
auto sc = CublasScopedContextHandler(queue,ih);
#else
cgh.interop_task([=](cl::sycl::interop_handler ih) {
auto sc = CublasScopedContextHandler(queue);
auto sc = CublasScopedContextHandler(queue);
#endif

auto handle = sc.get_handle(queue);
int64_t offset = 0;
cublasStatus_t err;
Expand Down
222 changes: 194 additions & 28 deletions src/blas/backends/cublas/cublas_level1.cpp

Large diffs are not rendered by default.

387 changes: 341 additions & 46 deletions src/blas/backends/cublas/cublas_level2.cpp

Large diffs are not rendered by default.

153 changes: 134 additions & 19 deletions src/blas/backends/cublas/cublas_level3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,14 @@ inline void gemm(Func func, cl::sycl::queue &queue, transpose transa, transpose
auto a_acc = a.template get_access<cl::sycl::access::mode::read>(cgh);
auto b_acc = b.template get_access<cl::sycl::access::mode::read>(cgh);
auto c_acc = c.template get_access<cl::sycl::access::mode::read_write>(cgh);
#ifdef __HIPSYCL__
cgh.hipSYCL_enqueue_custom_operation([=](cl::sycl::interop_handle ih) {
auto sc = CublasScopedContextHandler(queue,ih);
#else
cgh.interop_task([=](cl::sycl::interop_handler ih) {
auto sc = CublasScopedContextHandler(queue);
auto sc = CublasScopedContextHandler(queue);
#endif

auto handle = sc.get_handle(queue);
auto a_ = sc.get_mem<cuDataType *>(ih, a_acc);
auto b_ = sc.get_mem<cuDataType *>(ih, b_acc);
Expand Down Expand Up @@ -85,8 +91,14 @@ inline void gemm(Func func, DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C,
auto a_acc = a.template get_access<cl::sycl::access::mode::read>(cgh);
auto b_acc = b.template get_access<cl::sycl::access::mode::read>(cgh);
auto c_acc = c.template get_access<cl::sycl::access::mode::read_write>(cgh);
#ifdef __HIPSYCL__
cgh.hipSYCL_enqueue_custom_operation([=](cl::sycl::interop_handle ih) {
auto sc = CublasScopedContextHandler(queue,ih);
#else
cgh.interop_task([=](cl::sycl::interop_handler ih) {
auto sc = CublasScopedContextHandler(queue);
auto sc = CublasScopedContextHandler(queue);
#endif

auto handle = sc.get_handle(queue);
auto a_ = sc.get_mem<cuDataType_A *>(ih, a_acc);
auto b_ = sc.get_mem<cuDataType_B *>(ih, b_acc);
Expand Down Expand Up @@ -126,8 +138,14 @@ inline void symm(Func func, cl::sycl::queue &queue, side left_right, uplo upper_
auto a_acc = a.template get_access<cl::sycl::access::mode::read>(cgh);
auto b_acc = b.template get_access<cl::sycl::access::mode::read>(cgh);
auto c_acc = c.template get_access<cl::sycl::access::mode::read_write>(cgh);
#ifdef __HIPSYCL__
cgh.hipSYCL_enqueue_custom_operation([=](cl::sycl::interop_handle ih) {
auto sc = CublasScopedContextHandler(queue,ih);
#else
cgh.interop_task([=](cl::sycl::interop_handler ih) {
auto sc = CublasScopedContextHandler(queue);
auto sc = CublasScopedContextHandler(queue);
#endif

auto handle = sc.get_handle(queue);
auto a_ = sc.get_mem<cuDataType *>(ih, a_acc);
auto b_ = sc.get_mem<cuDataType *>(ih, b_acc);
Expand Down Expand Up @@ -166,8 +184,14 @@ inline void hemm(Func func, cl::sycl::queue &queue, side left_right, uplo upper_
auto a_acc = a.template get_access<cl::sycl::access::mode::read>(cgh);
auto b_acc = b.template get_access<cl::sycl::access::mode::read>(cgh);
auto c_acc = c.template get_access<cl::sycl::access::mode::read_write>(cgh);
#ifdef __HIPSYCL__
cgh.hipSYCL_enqueue_custom_operation([=](cl::sycl::interop_handle ih) {
auto sc = CublasScopedContextHandler(queue,ih);
#else
cgh.interop_task([=](cl::sycl::interop_handler ih) {
auto sc = CublasScopedContextHandler(queue);
auto sc = CublasScopedContextHandler(queue);
#endif

auto handle = sc.get_handle(queue);
auto a_ = sc.get_mem<cuDataType *>(ih, a_acc);
auto b_ = sc.get_mem<cuDataType *>(ih, b_acc);
Expand Down Expand Up @@ -201,8 +225,14 @@ inline void syrk(Func func, cl::sycl::queue &queue, uplo upper_lower, transpose
queue.submit([&](cl::sycl::handler &cgh) {
auto a_acc = a.template get_access<cl::sycl::access::mode::read>(cgh);
auto c_acc = c.template get_access<cl::sycl::access::mode::read_write>(cgh);
#ifdef __HIPSYCL__
cgh.hipSYCL_enqueue_custom_operation([=](cl::sycl::interop_handle ih) {
auto sc = CublasScopedContextHandler(queue,ih);
#else
cgh.interop_task([=](cl::sycl::interop_handler ih) {
auto sc = CublasScopedContextHandler(queue);
auto sc = CublasScopedContextHandler(queue);
#endif

auto handle = sc.get_handle(queue);
auto a_ = sc.get_mem<cuDataType *>(ih, a_acc);
auto c_ = sc.get_mem<cuDataType *>(ih, c_acc);
Expand Down Expand Up @@ -238,8 +268,14 @@ inline void herk(Func func, cl::sycl::queue &queue, uplo upper_lower, transpose
queue.submit([&](cl::sycl::handler &cgh) {
auto a_acc = a.template get_access<cl::sycl::access::mode::read>(cgh);
auto c_acc = c.template get_access<cl::sycl::access::mode::read_write>(cgh);
#ifdef __HIPSYCL__
cgh.hipSYCL_enqueue_custom_operation([=](cl::sycl::interop_handle ih) {
auto sc = CublasScopedContextHandler(queue,ih);
#else
cgh.interop_task([=](cl::sycl::interop_handler ih) {
auto sc = CublasScopedContextHandler(queue);
auto sc = CublasScopedContextHandler(queue);
#endif

auto handle = sc.get_handle(queue);
auto a_ = sc.get_mem<cuDataType *>(ih, a_acc);
auto c_ = sc.get_mem<cuDataType *>(ih, c_acc);
Expand Down Expand Up @@ -274,8 +310,14 @@ inline void syr2k(Func func, cl::sycl::queue &queue, uplo upper_lower, transpose
auto a_acc = a.template get_access<cl::sycl::access::mode::read>(cgh);
auto b_acc = b.template get_access<cl::sycl::access::mode::read>(cgh);
auto c_acc = c.template get_access<cl::sycl::access::mode::read_write>(cgh);
#ifdef __HIPSYCL__
cgh.hipSYCL_enqueue_custom_operation([=](cl::sycl::interop_handle ih) {
auto sc = CublasScopedContextHandler(queue,ih);
#else
cgh.interop_task([=](cl::sycl::interop_handler ih) {
auto sc = CublasScopedContextHandler(queue);
auto sc = CublasScopedContextHandler(queue);
#endif

auto handle = sc.get_handle(queue);
auto a_ = sc.get_mem<cuDataType *>(ih, a_acc);
auto b_ = sc.get_mem<cuDataType *>(ih, b_acc);
Expand Down Expand Up @@ -315,8 +357,14 @@ inline void her2k(Func func, cl::sycl::queue &queue, uplo upper_lower, transpose
auto a_acc = a.template get_access<cl::sycl::access::mode::read>(cgh);
auto b_acc = b.template get_access<cl::sycl::access::mode::read>(cgh);
auto c_acc = c.template get_access<cl::sycl::access::mode::read_write>(cgh);
#ifdef __HIPSYCL__
cgh.hipSYCL_enqueue_custom_operation([=](cl::sycl::interop_handle ih) {
auto sc = CublasScopedContextHandler(queue,ih);
#else
cgh.interop_task([=](cl::sycl::interop_handler ih) {
auto sc = CublasScopedContextHandler(queue);
auto sc = CublasScopedContextHandler(queue);
#endif

auto handle = sc.get_handle(queue);
auto a_ = sc.get_mem<cuDataType *>(ih, a_acc);
auto b_ = sc.get_mem<cuDataType *>(ih, b_acc);
Expand Down Expand Up @@ -356,8 +404,14 @@ inline void trmm(Func func, cl::sycl::queue &queue, side left_right, uplo upper_
queue.submit([&](cl::sycl::handler &cgh) {
auto a_acc = a.template get_access<cl::sycl::access::mode::read>(cgh);
auto b_acc = b.template get_access<cl::sycl::access::mode::read_write>(cgh);
#ifdef __HIPSYCL__
cgh.hipSYCL_enqueue_custom_operation([=](cl::sycl::interop_handle ih) {
auto sc = CublasScopedContextHandler(queue,ih);
#else
cgh.interop_task([=](cl::sycl::interop_handler ih) {
auto sc = CublasScopedContextHandler(queue);
auto sc = CublasScopedContextHandler(queue);
#endif

auto handle = sc.get_handle(queue);
auto a_ = sc.get_mem<cuDataType *>(ih, a_acc);
auto b_ = sc.get_mem<cuDataType *>(ih, b_acc);
Expand Down Expand Up @@ -393,8 +447,14 @@ inline void trsm(Func func, cl::sycl::queue &queue, side left_right, uplo upper_
queue.submit([&](cl::sycl::handler &cgh) {
auto a_acc = a.template get_access<cl::sycl::access::mode::read>(cgh);
auto b_acc = b.template get_access<cl::sycl::access::mode::read_write>(cgh);
#ifdef __HIPSYCL__
cgh.hipSYCL_enqueue_custom_operation([=](cl::sycl::interop_handle ih) {
auto sc = CublasScopedContextHandler(queue,ih);
#else
cgh.interop_task([=](cl::sycl::interop_handler ih) {
auto sc = CublasScopedContextHandler(queue);
auto sc = CublasScopedContextHandler(queue);
#endif

auto handle = sc.get_handle(queue);
auto a_ = sc.get_mem<cuDataType *>(ih, a_acc);
auto b_ = sc.get_mem<cuDataType *>(ih, b_acc);
Expand Down Expand Up @@ -435,8 +495,14 @@ inline cl::sycl::event gemm(Func func, cl::sycl::queue &queue, transpose transa,
for (int64_t i = 0; i < num_events; i++) {
cgh.depends_on(dependencies[i]);
}
#ifdef __HIPSYCL__
cgh.hipSYCL_enqueue_custom_operation([=](cl::sycl::interop_handle ih) {
auto sc = CublasScopedContextHandler(queue,ih);
#else
cgh.interop_task([=](cl::sycl::interop_handler ih) {
auto sc = CublasScopedContextHandler(queue);
auto sc = CublasScopedContextHandler(queue);
#endif

auto handle = sc.get_handle(queue);
auto a_ = reinterpret_cast<const cuDataType *>(a);
auto b_ = reinterpret_cast<const cuDataType *>(b);
Expand Down Expand Up @@ -485,8 +551,14 @@ inline cl::sycl::event symm(Func func, cl::sycl::queue &queue, side left_right,
for (int64_t i = 0; i < num_events; i++) {
cgh.depends_on(dependencies[i]);
}
#ifdef __HIPSYCL__
cgh.hipSYCL_enqueue_custom_operation([=](cl::sycl::interop_handle ih) {
auto sc = CublasScopedContextHandler(queue,ih);
#else
cgh.interop_task([=](cl::sycl::interop_handler ih) {
auto sc = CublasScopedContextHandler(queue);
auto sc = CublasScopedContextHandler(queue);
#endif

auto handle = sc.get_handle(queue);
auto a_ = reinterpret_cast<const cuDataType *>(a);
auto b_ = reinterpret_cast<const cuDataType *>(b);
Expand Down Expand Up @@ -528,8 +600,14 @@ inline cl::sycl::event hemm(Func func, cl::sycl::queue &queue, side left_right,
for (int64_t i = 0; i < num_events; i++) {
cgh.depends_on(dependencies[i]);
}
#ifdef __HIPSYCL__
cgh.hipSYCL_enqueue_custom_operation([=](cl::sycl::interop_handle ih) {
auto sc = CublasScopedContextHandler(queue,ih);
#else
cgh.interop_task([=](cl::sycl::interop_handler ih) {
auto sc = CublasScopedContextHandler(queue);
auto sc = CublasScopedContextHandler(queue);
#endif

auto handle = sc.get_handle(queue);
auto a_ = reinterpret_cast<const cuDataType *>(a);
auto b_ = reinterpret_cast<const cuDataType *>(b);
Expand Down Expand Up @@ -568,8 +646,14 @@ inline cl::sycl::event syrk(Func func, cl::sycl::queue &queue, uplo upper_lower,
for (int64_t i = 0; i < num_events; i++) {
cgh.depends_on(dependencies[i]);
}
#ifdef __HIPSYCL__
cgh.hipSYCL_enqueue_custom_operation([=](cl::sycl::interop_handle ih) {
auto sc = CublasScopedContextHandler(queue,ih);
#else
cgh.interop_task([=](cl::sycl::interop_handler ih) {
auto sc = CublasScopedContextHandler(queue);
auto sc = CublasScopedContextHandler(queue);
#endif

auto handle = sc.get_handle(queue);
auto a_ = reinterpret_cast<const cuDataType *>(a);
auto c_ = reinterpret_cast<cuDataType *>(c);
Expand Down Expand Up @@ -611,8 +695,14 @@ inline cl::sycl::event herk(Func func, cl::sycl::queue &queue, uplo upper_lower,
for (int64_t i = 0; i < num_events; i++) {
cgh.depends_on(dependencies[i]);
}
#ifdef __HIPSYCL__
cgh.hipSYCL_enqueue_custom_operation([=](cl::sycl::interop_handle ih) {
auto sc = CublasScopedContextHandler(queue,ih);
#else
cgh.interop_task([=](cl::sycl::interop_handler ih) {
auto sc = CublasScopedContextHandler(queue);
auto sc = CublasScopedContextHandler(queue);
#endif

auto handle = sc.get_handle(queue);
auto a_ = reinterpret_cast<const cuDataType *>(a);
auto c_ = reinterpret_cast<cuDataType *>(c);
Expand Down Expand Up @@ -651,8 +741,14 @@ inline cl::sycl::event syr2k(Func func, cl::sycl::queue &queue, uplo upper_lower
for (int64_t i = 0; i < num_events; i++) {
cgh.depends_on(dependencies[i]);
}
#ifdef __HIPSYCL__
cgh.hipSYCL_enqueue_custom_operation([=](cl::sycl::interop_handle ih) {
auto sc = CublasScopedContextHandler(queue,ih);
#else
cgh.interop_task([=](cl::sycl::interop_handler ih) {
auto sc = CublasScopedContextHandler(queue);
auto sc = CublasScopedContextHandler(queue);
#endif

auto handle = sc.get_handle(queue);
auto a_ = reinterpret_cast<const cuDataType *>(a);
auto b_ = reinterpret_cast<const cuDataType *>(b);
Expand Down Expand Up @@ -695,8 +791,14 @@ inline cl::sycl::event her2k(Func func, cl::sycl::queue &queue, uplo upper_lower
for (int64_t i = 0; i < num_events; i++) {
cgh.depends_on(dependencies[i]);
}
#ifdef __HIPSYCL__
cgh.hipSYCL_enqueue_custom_operation([=](cl::sycl::interop_handle ih) {
auto sc = CublasScopedContextHandler(queue,ih);
#else
cgh.interop_task([=](cl::sycl::interop_handler ih) {
auto sc = CublasScopedContextHandler(queue);
auto sc = CublasScopedContextHandler(queue);
#endif

auto handle = sc.get_handle(queue);
auto a_ = reinterpret_cast<const cuDataType *>(a);
auto b_ = reinterpret_cast<const cuDataType *>(b);
Expand Down Expand Up @@ -741,8 +843,14 @@ inline cl::sycl::event trmm(Func func, cl::sycl::queue &queue, side left_right,
for (int64_t i = 0; i < num_events; i++) {
cgh.depends_on(dependencies[i]);
}
#ifdef __HIPSYCL__
cgh.hipSYCL_enqueue_custom_operation([=](cl::sycl::interop_handle ih) {
auto sc = CublasScopedContextHandler(queue,ih);
#else
cgh.interop_task([=](cl::sycl::interop_handler ih) {
auto sc = CublasScopedContextHandler(queue);
auto sc = CublasScopedContextHandler(queue);
#endif

auto handle = sc.get_handle(queue);
auto a_ = reinterpret_cast<const cuDataType *>(a);
auto b_ = reinterpret_cast<cuDataType *>(b);
Expand Down Expand Up @@ -783,8 +891,15 @@ inline cl::sycl::event trsm(Func func, cl::sycl::queue &queue, side left_right,
for (int64_t i = 0; i < num_events; i++) {
cgh.depends_on(dependencies[i]);
}
#ifdef __HIPSYCL__
cgh.hipSYCL_enqueue_custom_operation([=](cl::sycl::interop_handle ih) {
auto sc = CublasScopedContextHandler(queue,ih);
#else
cgh.interop_task([=](cl::sycl::interop_handler ih) {
auto sc = CublasScopedContextHandler(queue);
auto sc = CublasScopedContextHandler(queue);
#endif


auto handle = sc.get_handle(queue);
auto a_ = reinterpret_cast<const cuDataType *>(a);
auto b_ = reinterpret_cast<cuDataType *>(b);
Expand Down
1 change: 0 additions & 1 deletion src/blas/backends/mklcpu/mklcpu_batch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

#include "mklcpu_common.hpp"
#include "oneapi/mkl/blas/detail/mklcpu/onemkl_blas_mklcpu.hpp"

namespace oneapi {
namespace mkl {
namespace blas {
Expand Down
Loading