Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion ggml/src/ggml-sycl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ else()
target_link_libraries(ggml-sycl PRIVATE sycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
elseif (GGML_SYCL_TARGET STREQUAL "NVIDIA")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
target_link_libraries(ggml-sycl PRIVATE sycl pthread m dl onemkl)
add_compile_definitions(GGML_SYCL_NVIDIA)
target_link_libraries(ggml-sycl PRIVATE sycl pthread m dl onemkl_blas_cublas)
elseif (GGML_SYCL_TARGET STREQUAL "AMD")
if (NOT GGML_SYCL_DEVICE_ARCH)
message(ERROR "Can't enable SYCL hip backend, GGML_SYCL_DEVICE_ARCH has not been set.")
Expand Down
31 changes: 21 additions & 10 deletions ggml/src/ggml-sycl/dpct/helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1690,8 +1690,12 @@ namespace dpct
auto data_b = get_memory<const Tb>(b);
auto data_c = get_memory<Tc>(c);
oneapi::mkl::blas::column_major::gemm(
q, a_trans, b_trans, m, n, k, alpha_value, data_a, lda,
data_b, ldb, beta_value, data_c, ldc);
#ifdef GGML_SYCL_NVIDIA
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The macro make the code is hard to understand.
I suggest:

#ifdef GGML_SYCL_NVIDIA
        oneapi::mkl::blas::column_major::gemm(
                oneapi::mkl::backend_selector<oneapi::mkl::backend::cublas>{ q },
                a_trans, b_trans, m, n, k, alpha_value, data_a, lda,
                data_b, ldb, beta_value, data_c, ldc);
        }
#else
        oneapi::mkl::blas::column_major::gemm(
                q, 
                a_trans, b_trans, m, n, k, alpha_value, data_a, lda,
                data_b, ldb, beta_value, data_c, ldc);
        }
#endif

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we start adding support for Intel GPU as well I think it would make more sense to have a helper function that returns either a backend_selector or a queue based on the backend.
It would avoid duplicating the call to gemm which I think is a risk.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please remember, the SYCL backend is initiated to support Intel GPU. :)
Support more vendor GPUs is added later.
The default code path should be optimized for Intel GPU.

It's OK to set special queue for other vendor GPUs.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Code update for readability in f6e6fc4

oneapi::mkl::backend_selector<oneapi::mkl::backend::cublas>{ q },
#else
q,
#endif
a_trans, b_trans, m, n, k, alpha_value, data_a, lda, data_b, ldb, beta_value, data_c, ldc);
}

template <typename VecT, class BinaryOperation, class = void>
Expand Down Expand Up @@ -1755,12 +1759,15 @@ namespace dpct
matrix_info->groupsize_info = batch_size;

sycl::event e = oneapi::mkl::blas::column_major::gemm_batch(
q, matrix_info->transpose_info, matrix_info->transpose_info + 1,
matrix_info->size_info, matrix_info->size_info + 1,
matrix_info->size_info + 2, matrix_info->value_info,
reinterpret_cast<const Ta **>(a), matrix_info->ld_info,
reinterpret_cast<const Tb **>(b), matrix_info->ld_info + 1,
matrix_info->value_info + 1, reinterpret_cast<Tc **>(c),
#ifdef GGML_SYCL_NVIDIA
oneapi::mkl::backend_selector<oneapi::mkl::backend::cublas>{ q },
#else
q,
#endif
matrix_info->transpose_info, matrix_info->transpose_info + 1, matrix_info->size_info,
matrix_info->size_info + 1, matrix_info->size_info + 2, matrix_info->value_info,
reinterpret_cast<const Ta **>(a), matrix_info->ld_info, reinterpret_cast<const Tb **>(b),
matrix_info->ld_info + 1, matrix_info->value_info + 1, reinterpret_cast<Tc **>(c),
matrix_info->ld_info + 2, 1, &(matrix_info->groupsize_info));

q.submit([&](sycl::handler &cgh)
Expand All @@ -1784,8 +1791,12 @@ namespace dpct
auto data_b = get_memory<const Tb>(b);
auto data_c = get_memory<Tc>(c);
oneapi::mkl::blas::column_major::gemm_batch(
q, a_trans, b_trans, m, n, k, alpha_value, data_a, lda,
stride_a, data_b, ldb, stride_b, beta_value,
#ifdef GGML_SYCL_NVIDIA
oneapi::mkl::backend_selector<oneapi::mkl::backend::cublas>{ q },
#else
q,
#endif
a_trans, b_trans, m, n, k, alpha_value, data_a, lda, stride_a, data_b, ldb, stride_b, beta_value,
data_c, ldc, stride_c, batch_size);
}

Expand Down
11 changes: 7 additions & 4 deletions ggml/src/ggml-sycl/ggml-sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2562,10 +2562,13 @@ inline void ggml_sycl_op_mul_mat_sycl(
const float beta = 0.0f;
#if !GGML_SYCL_DNNL
SYCL_CHECK(CHECK_TRY_ERROR(oneapi::mkl::blas::column_major::gemm(
*stream, oneapi::mkl::transpose::trans,
oneapi::mkl::transpose::nontrans, row_diff, src1_ncols, ne10,
dpct::get_value(&alpha, *stream), src0_ddf_i, ne00,
src1_ddf1_i, ne10, dpct::get_value(&beta, *stream),
# ifdef GGML_SYCL_NVIDIA
oneapi::mkl::backend_selector<oneapi::mkl::backend::cublas>{ *stream },
# else
*stream,
# endif
oneapi::mkl::transpose::trans, oneapi::mkl::transpose::nontrans, row_diff, src1_ncols, ne10,
dpct::get_value(&alpha, *stream), src0_ddf_i, ne00, src1_ddf1_i, ne10, dpct::get_value(&beta, *stream),
dst_dd_i, ldc)));
#else
auto dnnl_stream = ctx.stream_dnnl(stream);
Expand Down
16 changes: 8 additions & 8 deletions ggml/src/ggml-sycl/outprod.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,14 @@ void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, const ggml_tensor* sr

try {
// Perform matrix multiplication using oneMKL GEMM
oneapi::mkl::blas::column_major::gemm(*stream,
oneapi::mkl::transpose::nontrans, src1_op,
ne0, ne1, ne01,
alpha,
src0_d, ne00,
src1_d, ldb,
beta,
dst_d, ne0);
oneapi::mkl::blas::column_major::gemm(
#ifdef GGML_SYCL_NVIDIA
oneapi::mkl::backend_selector<oneapi::mkl::backend::cublas>{ *stream },
#else
*stream,
#endif
oneapi::mkl::transpose::nontrans, src1_op, ne0, ne1, ne01, alpha, src0_d, ne00, src1_d, ldb, beta, dst_d,
ne0);
}
catch (sycl::exception const& exc) {
std::cerr << exc.what() << std::endl;
Expand Down
Loading