Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions dpnp/backend/extensions/blas/blas_py.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,13 @@ PYBIND11_MODULE(_blas_impl, m)
py::arg("resultC"), py::arg("depends") = py::list());
}

{
m.def("_is_lnl_bm_architecture", &blas_ns::_is_lnl_bm_architecture,
"Return ``True`` if SYCL device belongs to either Lunar Lake or "
"Battlemage G21 Intel GPU architecture",
py::arg("device"));
}

{
m.def("_gemm_batch", &blas_ns::gemm_batch,
"Call `gemm_batch` from OneMKL BLAS library to compute "
Expand Down
19 changes: 19 additions & 0 deletions dpnp/backend/extensions/blas/gemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,25 @@ std::tuple<sycl::event, sycl::event, bool>
return std::make_tuple(args_ev, gemm_ev, is_row_major);
}

bool _is_lnl_bm_architecture(sycl::device &dev)
{
#if !defined(USE_ONEMKL_CUBLAS)
if (dev.ext_oneapi_architecture_is(
sycl::ext::oneapi::experimental::architecture::
intel_gpu_lnl_m)) /* Lunar Lake */
{
return true;
}
else if (dev.ext_oneapi_architecture_is(
sycl::ext::oneapi::experimental::architecture::
intel_gpu_bmg_g21)) /* Battlemage G21 */
{
return true;
}
#endif // !defined(USE_ONEMKL_CUBLAS)
return false;
}

template <typename fnT, typename Tab, typename Tc>
struct GemmContigFactory
{
Expand Down
2 changes: 2 additions & 0 deletions dpnp/backend/extensions/blas/gemm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ extern std::tuple<sycl::event, sycl::event, bool>
const dpctl::tensor::usm_ndarray &resultC,
const std::vector<sycl::event> &depends);

extern bool _is_lnl_bm_architecture(sycl::device &dev);

extern std::tuple<sycl::event, sycl::event, bool>
gemm_batch(sycl::queue &exec_q,
const dpctl::tensor::usm_ndarray &matrixA,
Expand Down
26 changes: 26 additions & 0 deletions dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
Original file line number Diff line number Diff line change
Expand Up @@ -894,6 +894,32 @@ def dpnp_matmul(
)
_manager.add_event_pair(ht_ev, gemv_ev)
elif call_flag == "gemm":
# MKLD-17976: due to known issue in OneMKL on Lunar Lake and
# Battlemage G21 Intel GPU architectures, it forces
# to implement a temporary workaround with extra copying of
# an input array in case when it has a small size and
# non-zero offset
# TODO: remove the workaround once OneMKL issue is resolved
if bi._is_lnl_bm_architecture(exec_q.get_sycl_device()):
def _need_to_copy(a):
a_usm = dpnp.get_usm_ndarray(a)
if a_usm._element_offset > 0 and a_usm.size < 16:
return True
return False

x1 = _copy_array(
x1,
copy_flag=_need_to_copy(x1),
dtype=compute_dtype,
order=res_order,
)
x2 = _copy_array(
x2,
copy_flag=_need_to_copy(x2),
dtype=compute_dtype,
order=res_order,
)

result = _gemm_matmul(
exec_q,
x1,
Expand Down
Loading