diff --git a/CHANGELOG.md b/CHANGELOG.md index bfc279dc03f5..917c04b2c362 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -128,6 +128,7 @@ In addition, this release completes implementation of `dpnp.fft` module and adds * Fixed a crash in `dpnp.choose` caused by missing control of releasing temporary allocated device memory [#2063](https://github.com/IntelPython/dpnp/pull/2063) * Resolved compilation warning and error while building in debug mode [#2066](https://github.com/IntelPython/dpnp/pull/2066) * Fixed an issue with asynchronous execution in `dpnp.fft` module [#2067](https://github.com/IntelPython/dpnp/pull/2067) +* Added a workaround to fix the incorrect result from `dpnp.matmul` computing on Lunar Lake or Arrow Lake Battlemage graphics [#2082](https://github.com/IntelPython/dpnp/pull/2082) ## [0.15.0] - 05/25/2024 diff --git a/dpnp/backend/extensions/blas/blas_py.cpp b/dpnp/backend/extensions/blas/blas_py.cpp index aa5ef52be9e2..a17ff03563ae 100644 --- a/dpnp/backend/extensions/blas/blas_py.cpp +++ b/dpnp/backend/extensions/blas/blas_py.cpp @@ -127,6 +127,13 @@ PYBIND11_MODULE(_blas_impl, m) py::arg("resultC"), py::arg("depends") = py::list()); } + { + m.def("_is_lnl_bm_architecture", &blas_ns::_is_lnl_bm_architecture, + "Return ``True`` if SYCL device belongs to either Lunar Lake or " + "Battlemage G21 Intel GPU architecture", + py::arg("device")); + } + { m.def("_gemm_batch", &blas_ns::gemm_batch, "Call `gemm_batch` from OneMKL BLAS library to compute " diff --git a/dpnp/backend/extensions/blas/gemm.cpp b/dpnp/backend/extensions/blas/gemm.cpp index e7043af59d25..42487eb9286a 100644 --- a/dpnp/backend/extensions/blas/gemm.cpp +++ b/dpnp/backend/extensions/blas/gemm.cpp @@ -323,6 +323,22 @@ std::tuple return std::make_tuple(args_ev, gemm_ev, is_row_major); } +bool _is_lnl_bm_architecture(const sycl::device &dev) +{ +#if !defined(USE_ONEMKL_CUBLAS) + namespace syclex = sycl::ext::oneapi::experimental; + const auto arch = dev.get_info(); + switch (arch) { + case syclex::architecture::intel_gpu_lnl_m: /* Lunar Lake */ + case syclex::architecture::intel_gpu_bmg_g21: /* Battlemage G21 */ + return true; + default: + return false; + } +#endif // !defined(USE_ONEMKL_CUBLAS) + return false; +} + template struct GemmContigFactory { diff --git a/dpnp/backend/extensions/blas/gemm.hpp b/dpnp/backend/extensions/blas/gemm.hpp index ee14400ae254..fed42755753e 100644 --- a/dpnp/backend/extensions/blas/gemm.hpp +++ b/dpnp/backend/extensions/blas/gemm.hpp @@ -39,6 +39,8 @@ extern std::tuple const dpctl::tensor::usm_ndarray &resultC, const std::vector &depends); +extern bool _is_lnl_bm_architecture(const sycl::device &dev); + extern std::tuple gemm_batch(sycl::queue &exec_q, const dpctl::tensor::usm_ndarray &matrixA, diff --git a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py index e15bd93d7bbd..f65ad3737b41 100644 --- a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py +++ b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py @@ -894,6 +894,34 @@ def dpnp_matmul( ) _manager.add_event_pair(ht_ev, gemv_ev) elif call_flag == "gemm": + # MKLD-17976: due to known issue in OneMKL on Lunar Lake and + # Battlemage G21 Intel GPU architectures, it forces + # to implement a temporary workaround with extra copying of + # an input array in case when it has a small size and + # non-zero offset + # The issue was detected by failing tests for eig/eigh + # TODO: remove the workaround once OneMKL issue is resolved + if bi._is_lnl_bm_architecture(exec_q.get_sycl_device()): + + def _need_to_copy(a): + a_usm = dpnp.get_usm_ndarray(a) + if a_usm._element_offset > 0 and a_usm.size < 16: + return True + return False + + x1 = _copy_array( + x1, + copy_flag=_need_to_copy(x1), + dtype=compute_dtype, + order=res_order, + ) + x2 = _copy_array( + x2, + copy_flag=_need_to_copy(x2), + dtype=compute_dtype, + order=res_order, + ) + result = _gemm_matmul( exec_q, x1,