diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp index 5ba1a7cc9b65..81231b5055af 100644 --- a/dpnp/backend/extensions/lapack/getrf.cpp +++ b/dpnp/backend/extensions/lapack/getrf.cpp @@ -44,6 +44,7 @@ namespace py = pybind11; namespace type_utils = dpctl::tensor::type_utils; typedef sycl::event (*getrf_impl_fn_ptr_t)(sycl::queue &, + const std::int64_t, const std::int64_t, char *, std::int64_t, @@ -56,6 +57,7 @@ static getrf_impl_fn_ptr_t getrf_dispatch_vector[dpctl_td_ns::num_types]; template static sycl::event getrf_impl(sycl::queue &exec_q, + const std::int64_t m, const std::int64_t n, char *in_a, std::int64_t lda, @@ -69,7 +71,7 @@ static sycl::event getrf_impl(sycl::queue &exec_q, T *a = reinterpret_cast(in_a); const std::int64_t scratchpad_size = - mkl_lapack::getrf_scratchpad_size(exec_q, n, n, lda); + mkl_lapack::getrf_scratchpad_size(exec_q, m, n, lda); T *scratchpad = nullptr; std::stringstream error_msg; @@ -82,13 +84,13 @@ static sycl::event getrf_impl(sycl::queue &exec_q, getrf_event = mkl_lapack::getrf( exec_q, - n, // The order of the square matrix A (0 ≤ n). + m, // The number of rows in the input matrix A (0 ≤ m). // It must be a non-negative integer. - n, // The number of columns in the square matrix A (0 ≤ n). + n, // The number of columns in the input matrix A (0 ≤ n). // It must be a non-negative integer. - a, // Pointer to the square matrix A (n x n). + a, // Pointer to the input matrix A (m x n). lda, // The leading dimension of matrix A. - // It must be at least max(1, n). + // It must be at least max(1, m). ipiv, // Pointer to the output array of pivot indices. scratchpad, // Pointer to scratchpad memory to be used by MKL // routine for storing intermediate results. @@ -99,7 +101,7 @@ static sycl::event getrf_impl(sycl::queue &exec_q, if (info < 0) { error_msg << "Parameter number " << -info - << " had an illegal value."; + << " had an illegal value"; } else if (info == scratchpad_size && e.detail() != 0) { error_msg @@ -168,13 +170,13 @@ std::pair if (a_array_nd != 2) { throw py::value_error( "The input array has ndim=" + std::to_string(a_array_nd) + - ", but a 2-dimensional array is expected."); + ", but a 2-dimensional array is expected"); } if (ipiv_array_nd != 1) { throw py::value_error("The array of pivot indices has ndim=" + std::to_string(ipiv_array_nd) + - ", but a 1-dimensional array is expected."); + ", but a 1-dimensional array is expected"); } // check compatibility of execution queue and allocation queue @@ -190,10 +192,11 @@ std::pair } bool is_a_array_c_contig = a_array.is_c_contiguous(); + bool is_a_array_f_contig = a_array.is_f_contiguous(); bool is_ipiv_array_c_contig = ipiv_array.is_c_contiguous(); - if (!is_a_array_c_contig) { + if (!is_a_array_c_contig && !is_a_array_f_contig) { throw py::value_error("The input array " - "must be C-contiguous"); + "must be contiguous"); } if (!is_ipiv_array_c_contig) { throw py::value_error("The array of pivot indices " @@ -208,7 +211,7 @@ std::pair if (getrf_fn == nullptr) { throw py::value_error( "No getrf implementation defined for the provided type " - "of the input matrix."); + "of the input matrix"); } auto ipiv_types = dpctl_td_ns::usm_ndarray_types(); @@ -216,19 +219,25 @@ std::pair ipiv_types.typenum_to_lookup_id(ipiv_array.get_typenum()); if (ipiv_array_type_id != static_cast(dpctl_td_ns::typenum_t::INT64)) { - throw py::value_error("The type of 'ipiv_array' must be int64."); + throw py::value_error("The type of 'ipiv_array' must be int64"); } - const std::int64_t n = a_array.get_shape_raw()[0]; + const py::ssize_t *a_array_shape = a_array.get_shape_raw(); + const std::int64_t m = a_array_shape[0]; + const std::int64_t n = a_array_shape[1]; + const std::int64_t lda = std::max(1UL, m); + + if (ipiv_array.get_size() != std::min(m, n)) { + throw py::value_error("The size of 'ipiv_array' must be min(m, n)"); + } char *a_array_data = a_array.get_data(); - const std::int64_t lda = std::max(1UL, n); char *ipiv_array_data = ipiv_array.get_data(); std::int64_t *d_ipiv = reinterpret_cast(ipiv_array_data); std::vector host_task_events; - sycl::event getrf_ev = getrf_fn(exec_q, n, a_array_data, lda, d_ipiv, + sycl::event getrf_ev = getrf_fn(exec_q, m, n, a_array_data, lda, d_ipiv, dev_info, host_task_events, depends); sycl::event args_ev = dpctl::utils::keep_args_alive( diff --git a/dpnp/backend/extensions/lapack/getrf_batch.cpp b/dpnp/backend/extensions/lapack/getrf_batch.cpp index 83d06816fce4..ec87c8b1f2ae 100644 --- a/dpnp/backend/extensions/lapack/getrf_batch.cpp +++ b/dpnp/backend/extensions/lapack/getrf_batch.cpp @@ -221,10 +221,11 @@ std::pair } bool is_a_array_c_contig = a_array.is_c_contiguous(); + bool is_a_array_f_contig = a_array.is_f_contiguous(); bool is_ipiv_array_c_contig = ipiv_array.is_c_contiguous(); - if (!is_a_array_c_contig) { + if (!is_a_array_c_contig && !is_a_array_f_contig) { throw py::value_error("The input array " - "must be C-contiguous"); + "must be must contiguous"); } if (!is_ipiv_array_c_contig) { throw py::value_error("The array of pivot indices " diff --git a/dpnp/backend/extensions/lapack/lapack_py.cpp b/dpnp/backend/extensions/lapack/lapack_py.cpp index 4d5adfe09e4a..fb4dce4643b7 100644 --- a/dpnp/backend/extensions/lapack/lapack_py.cpp +++ b/dpnp/backend/extensions/lapack/lapack_py.cpp @@ -135,7 +135,7 @@ PYBIND11_MODULE(_lapack_impl, m) m.def("_getrf", &lapack_ext::getrf, "Call `getrf` from OneMKL LAPACK library to return " - "the LU factorization of a general n x n matrix", + "the LU factorization of a general m x n matrix", py::arg("sycl_queue"), py::arg("a_array"), py::arg("ipiv_array"), py::arg("dev_info"), py::arg("depends") = py::list()); diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py index 07ed0078be28..55381c917203 100644 --- a/dpnp/linalg/dpnp_iface_linalg.py +++ b/dpnp/linalg/dpnp_iface_linalg.py @@ -56,6 +56,7 @@ dpnp_eigh, dpnp_inv, dpnp_lstsq, + dpnp_lu_factor, dpnp_matrix_power, dpnp_matrix_rank, dpnp_multi_dot, @@ -79,6 +80,7 @@ "eigvalsh", "inv", "lstsq", + "lu_factor", "matmul", "matrix_norm", "matrix_power", @@ -901,6 +903,68 @@ def lstsq(a, b, rcond=None): return dpnp_lstsq(a, b, rcond=rcond) +def lu_factor(a, overwrite_a=False, check_finite=True): + """ + Compute the pivoted LU decomposition of a matrix. + + The decomposition is:: + + A = P @ L @ U + + where `P` is a permutation matrix, `L` is lower triangular with unit + diagonal elements, and `U` is upper triangular. + + Parameters + ---------- + a : (M, N) {dpnp.ndarray, usm_ndarray} + Input array to decompose. + overwrite_a : {None, bool}, optional + Whether to overwrite data in `a` (may increase performance) + Default: ``False``. + check_finite : {None, bool}, optional + Whether to check that the input matrix contains only finite numbers. + Disabling may give a performance gain, but may result in problems + (crashes, non-termination) if the inputs do contain infinities or NaNs. + + Returns + ------- + lu :(M, N) dpnp.ndarray + Matrix containing U in its upper triangle, and L in its lower triangle. + The unit diagonal elements of L are not stored. + piv (K, ): dpnp.ndarray + Pivot indices representing the permutation matrix P: + row i of matrix was interchanged with row piv[i]. + ``K = min(M, N)``. + + Warning + ------- + This function synchronizes in order to validate array elements + when ``check_finite=True``. + + Limitations + ----------- + Only two-dimensional input matrices are supported. + Otherwise, the function raises ``NotImplementedError`` exception. + + Examples + -------- + >>> import dpnp as np + >>> a = np.array([[4., 3.], [6., 3.]]) + >>> lu, piv = np.linalg.lu_factor(a) + >>> lu + array([[6. , 3. ], + [0.66666667, 1. ]]) + >>> piv + array([1, 1]) + + """ + + dpnp.check_supported_arrays_type(a) + assert_stacked_2d(a) + + return dpnp_lu_factor(a, overwrite_a=overwrite_a, check_finite=check_finite) + + def matmul(x1, x2, /): """ Computes the matrix product. diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 55d140c5c88f..838daac66303 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -39,6 +39,7 @@ # pylint: disable=useless-import-alias from typing import NamedTuple +from warnings import warn import dpctl.tensor._tensor_impl as ti import dpctl.utils as dpu @@ -275,6 +276,126 @@ def _batched_inv(a, res_type): return a_h.reshape(orig_shape) +def _batched_lu_factor(a, res_type): + """Compute pivoted LU decomposition for a batch of matrices.""" + + # TODO: Find out at which array sizes the best performance is obtained + # getrf_batch implementation shows slow results with large arrays on GPU. + # Use getrf_batch only on CPU. + # On GPU call getrf for each two-dimensional array by loop + use_batch = a.sycl_device.has_aspect_cpu + + a_sycl_queue = a.sycl_queue + a_usm_type = a.usm_type + _manager = dpu.SequentialOrderManager[a_sycl_queue] + + n = a.shape[-2] + orig_shape = a.shape + # get 3d input arrays by reshape + a = dpnp.reshape(a, (-1, n, n)) + batch_size = a.shape[0] + a_usm_arr = dpnp.get_usm_ndarray(a) + + if use_batch: + # `a` must be copied because getrf_batch destroys the input matrix + a_h = dpnp.empty_like(a, order="C", dtype=res_type) + ipiv_h = dpnp.empty( + (batch_size, n), + dtype=dpnp.int64, + order="C", + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + dev_info_h = [0] * batch_size + + ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_usm_arr, + dst=a_h.get_array(), + sycl_queue=a_sycl_queue, + depends=_manager.submitted_events, + ) + _manager.add_event_pair(ht_ev, copy_ev) + + ipiv_stride = n + a_stride = a_h.strides[0] + + # Call the LAPACK extension function _getrf_batch + # to perform LU decomposition of a batch of general matrices + ht_ev, getrf_ev = li._getrf_batch( + a_sycl_queue, + a_h.get_array(), + ipiv_h.get_array(), + dev_info_h, + n, + a_stride, + ipiv_stride, + batch_size, + depends=[copy_ev], + ) + _manager.add_event_pair(ht_ev, getrf_ev) + + dev_info_array = dpnp.array( + dev_info_h, usm_type=a_usm_type, sycl_queue=a_sycl_queue + ) + + # Reshape the results back to their original shape + a_h = a_h.reshape(orig_shape) + ipiv_h = ipiv_h.reshape(orig_shape[:-1]) + dev_info_array = dev_info_array.reshape(orig_shape[:-2]) + + return (a_h, ipiv_h, dev_info_array) + + # Initialize lists for storing arrays and events for each batch + a_vecs = [None] * batch_size + ipiv_vecs = [None] * batch_size + dev_info_vecs = [None] * batch_size + + dep_evs = _manager.submitted_events + + # Process each batch + for i in range(batch_size): + # Copy each 2D slice to a new array because getrf will destroy + # the input matrix + a_vecs[i] = dpnp.empty_like(a[i], order="C", dtype=res_type) + + ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_usm_arr[i], + dst=a_vecs[i].get_array(), + sycl_queue=a_sycl_queue, + depends=dep_evs, + ) + _manager.add_event_pair(ht_ev, copy_ev) + + ipiv_vecs[i] = dpnp.empty( + (n,), + dtype=dpnp.int64, + order="C", + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + dev_info_vecs[i] = [0] + + # Call the LAPACK extension function _getrf + # to perform LU decomposition on each batch in 'a_vecs[i]' + ht_ev, getrf_ev = li._getrf( + a_sycl_queue, + a_vecs[i].get_array(), + ipiv_vecs[i].get_array(), + dev_info_vecs[i], + depends=[copy_ev], + ) + _manager.add_event_pair(ht_ev, getrf_ev) + + # Reshape the results back to their original shape + out_a = dpnp.array(a_vecs, order="C").reshape(orig_shape) + out_ipiv = dpnp.array(ipiv_vecs).reshape(orig_shape[:-1]) + out_dev_info = dpnp.array( + dev_info_vecs, usm_type=a_usm_type, sycl_queue=a_sycl_queue + ).reshape(orig_shape[:-2]) + + return (out_a, out_ipiv, out_dev_info) + + def _batched_solve(a, b, exec_q, res_usm_type, res_type): """ _batched_solve(a, b, exec_q, res_usm_type, res_type) @@ -866,6 +987,23 @@ def _hermitian_svd(a, compute_uv): return dpnp.sort(s)[..., ::-1] +def _is_copy_required(a, res_type): + """ + Determine if `a` needs to be copied before LU decomposition. + This matches SciPy behavior: copy is needed unless input is suitable + for in-place modification. + """ + + if a.dtype != res_type: + return True + if not a.flags["F_CONTIGUOUS"]: + return True + if not a.flags["WRITABLE"]: + return True + + return False + + def _is_empty_2d(arr): # check size first for efficiency return arr.size == 0 and numpy.prod(arr.shape[-2:]) == 0 @@ -901,125 +1039,15 @@ def _lu_factor(a, res_type): """ + if a.ndim > 2: + return _batched_lu_factor(a, res_type) + n = a.shape[-2] a_sycl_queue = a.sycl_queue a_usm_type = a.usm_type - - # TODO: Find out at which array sizes the best performance is obtained - # getrf_batch implementation shows slow results with large arrays on GPU. - # Use getrf_batch only on CPU. - # On GPU call getrf for each two-dimensional array by loop - use_batch = a.sycl_device.has_aspect_cpu - _manager = dpu.SequentialOrderManager[a_sycl_queue] - if a.ndim > 2: - orig_shape = a.shape - # get 3d input arrays by reshape - a = dpnp.reshape(a, (-1, n, n)) - batch_size = a.shape[0] - a_usm_arr = dpnp.get_usm_ndarray(a) - - if use_batch: - # `a` must be copied because getrf_batch destroys the input matrix - a_h = dpnp.empty_like(a, order="C", dtype=res_type) - ipiv_h = dpnp.empty( - (batch_size, n), - dtype=dpnp.int64, - order="C", - usm_type=a_usm_type, - sycl_queue=a_sycl_queue, - ) - dev_info_h = [0] * batch_size - - ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( - src=a_usm_arr, - dst=a_h.get_array(), - sycl_queue=a_sycl_queue, - depends=_manager.submitted_events, - ) - _manager.add_event_pair(ht_ev, copy_ev) - - ipiv_stride = n - a_stride = a_h.strides[0] - - # Call the LAPACK extension function _getrf_batch - # to perform LU decomposition of a batch of general matrices - ht_ev, getrf_ev = li._getrf_batch( - a_sycl_queue, - a_h.get_array(), - ipiv_h.get_array(), - dev_info_h, - n, - a_stride, - ipiv_stride, - batch_size, - depends=[copy_ev], - ) - _manager.add_event_pair(ht_ev, getrf_ev) - - dev_info_array = dpnp.array( - dev_info_h, usm_type=a_usm_type, sycl_queue=a_sycl_queue - ) - - # Reshape the results back to their original shape - a_h = a_h.reshape(orig_shape) - ipiv_h = ipiv_h.reshape(orig_shape[:-1]) - dev_info_array = dev_info_array.reshape(orig_shape[:-2]) - - return (a_h, ipiv_h, dev_info_array) - - # Initialize lists for storing arrays and events for each batch - a_vecs = [None] * batch_size - ipiv_vecs = [None] * batch_size - dev_info_vecs = [None] * batch_size - - dep_evs = _manager.submitted_events - - # Process each batch - for i in range(batch_size): - # Copy each 2D slice to a new array because getrf will destroy - # the input matrix - a_vecs[i] = dpnp.empty_like(a[i], order="C", dtype=res_type) - - ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( - src=a_usm_arr[i], - dst=a_vecs[i].get_array(), - sycl_queue=a_sycl_queue, - depends=dep_evs, - ) - _manager.add_event_pair(ht_ev, copy_ev) - - ipiv_vecs[i] = dpnp.empty( - (n,), - dtype=dpnp.int64, - order="C", - usm_type=a_usm_type, - sycl_queue=a_sycl_queue, - ) - dev_info_vecs[i] = [0] - - # Call the LAPACK extension function _getrf - # to perform LU decomposition on each batch in 'a_vecs[i]' - ht_ev, getrf_ev = li._getrf( - a_sycl_queue, - a_vecs[i].get_array(), - ipiv_vecs[i].get_array(), - dev_info_vecs[i], - depends=[copy_ev], - ) - _manager.add_event_pair(ht_ev, getrf_ev) - - # Reshape the results back to their original shape - out_a = dpnp.array(a_vecs, order="C").reshape(orig_shape) - out_ipiv = dpnp.array(ipiv_vecs).reshape(orig_shape[:-1]) - out_dev_info = dpnp.array( - dev_info_vecs, usm_type=a_usm_type, sycl_queue=a_sycl_queue - ).reshape(orig_shape[:-2]) - - return (out_a, out_ipiv, out_dev_info) - a_usm_arr = dpnp.get_usm_ndarray(a) # `a` must be copied because getrf destroys the input matrix @@ -2265,6 +2293,97 @@ def dpnp_lstsq(a, b, rcond=None): return x, resids, rank, s +def dpnp_lu_factor(a, overwrite_a=False, check_finite=True): + """ + dpnp_lu_factor(a, overwrite_a=False, check_finite=True) + + Compute pivoted LU decomposition (SciPy-compatible behavior). + + This function mimics the behavior of `scipy.linalg.lu_factor` including + support for `overwrite_a`, `check_finite` and 0-based pivot indexing. + + """ + + res_type = _common_type(a) + a_sycl_queue = a.sycl_queue + a_usm_type = a.usm_type + + # accommodate empty arrays + if a.size == 0: + lu = dpnp.empty_like(a) + piv = dpnp.arange( + 0, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue + ) + return lu, piv + + if check_finite: + if not dpnp.isfinite(a).all(): + raise ValueError("array must not contain infs or NaNs") + + if a.ndim > 2: + raise NotImplementedError("Batched matrices are not supported") + + _manager = dpu.SequentialOrderManager[a_sycl_queue] + a_usm_arr = dpnp.get_usm_ndarray(a) + + # SciPy-compatible behavior + # Copy is required if: + # - overwrite_a is False (always copy), + # - dtype mismatch, + # - not F-contiguous, + # - not writeable + if not overwrite_a or _is_copy_required(a, res_type): + a_h = dpnp.empty_like(a, order="F", dtype=res_type) + ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_usm_arr, + dst=a_h.get_array(), + sycl_queue=a_sycl_queue, + depends=_manager.submitted_events, + ) + _manager.add_event_pair(ht_ev, copy_ev) + else: + # input is suitable for in-place modification + a_h = a + copy_ev = None + + m, n = a.shape + + ipiv_h = dpnp.empty( + min(m, n), + dtype=dpnp.int64, + order="C", + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + dev_info_h = [0] + + # Call the LAPACK extension function _getrf + # to perform LU decomposition on the input matrix + ht_ev, getrf_ev = li._getrf( + a_sycl_queue, + a_h.get_array(), + ipiv_h.get_array(), + dev_info_h, + depends=[copy_ev] if copy_ev is not None else [], + ) + _manager.add_event_pair(ht_ev, getrf_ev) + + if any(dev_info_h): + diag_nums = ", ".join(str(v) for v in dev_info_h if v > 0) + warn( + f"Diagonal number {diag_nums} are exactly zero. Singular matrix.", + RuntimeWarning, + stacklevel=2, + ) + + # MKL lapack uses 1-origin while SciPy uses 0-origin + ipiv_h -= 1 + + # Return a tuple containing the factorized matrix 'a_h', + # pivot indices 'ipiv_h' + return (a_h, ipiv_h) + + def dpnp_matrix_power(a, n): """ dpnp_matrix_power(a, n) diff --git a/dpnp/tests/test_linalg.py b/dpnp/tests/test_linalg.py index f4dc1f74add0..dd5daa99b74d 100644 --- a/dpnp/tests/test_linalg.py +++ b/dpnp/tests/test_linalg.py @@ -1859,6 +1859,169 @@ def test_lstsq_errors(self): assert_raises(TypeError, dpnp.linalg.lstsq, a_dp, b_dp, [-1]) +class TestLuFactor: + @staticmethod + def _apply_pivots_rows(A_dp, piv_dp): + m = A_dp.shape[0] + + if m == 0 or piv_dp.size == 0: + return A_dp + + rows = list(range(m)) + piv_np = dpnp.asnumpy(piv_dp) + for i, r in enumerate(piv_np): + if i != r: + rows[i], rows[r] = rows[r], rows[i] + + rows = dpnp.asarray(rows) + return A_dp[rows] + + @staticmethod + def _split_lu(lu, m, n): + L = dpnp.tril(lu, k=-1) + dpnp.fill_diagonal(L, 1) + L = L[:, : min(m, n)] + U = dpnp.triu(lu)[: min(m, n), :] + return L, U + + @pytest.mark.parametrize( + "shape", [(1, 1), (2, 2), (3, 3), (1, 5), (5, 1), (2, 5), (5, 2)] + ) + @pytest.mark.parametrize("order", ["C", "F"]) + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) + def test_lu_factor(self, shape, order, dtype): + a_np = generate_random_numpy_array(shape, dtype, order) + a_dp = dpnp.array(a_np, order=order) + + lu, piv = dpnp.linalg.lu_factor( + a_dp, check_finite=False, overwrite_a=False + ) + + # verify piv + assert piv.shape == (min(shape),) + assert piv.dtype == dpnp.int64 + if shape[0] > 0: + assert int(dpnp.min(piv)) >= 0 + assert int(dpnp.max(piv)) < shape[0] + + m, n = shape + L, U = self._split_lu(lu, m, n) + LU = L @ U + + A_cast = a_dp.astype(LU.dtype, copy=False) + PA = self._apply_pivots_rows(A_cast, piv) + + assert_allclose(LU, PA, rtol=1e-6, atol=1e-6) + + @pytest.mark.parametrize("dtype", get_float_complex_dtypes()) + def test_overwrite_inplace(self, dtype): + a_dp = dpnp.array([[4, 3], [6, 3]], dtype=dtype, order="F") + a_dp_orig = a_dp.copy() + lu, piv = dpnp.linalg.lu_factor( + a_dp, overwrite_a=True, check_finite=False + ) + + assert lu is a_dp + assert lu.flags["F_CONTIGUOUS"] is True + + L, U = self._split_lu(lu, 2, 2) + PA = self._apply_pivots_rows(a_dp_orig, piv) + LU = L @ U + + assert_allclose(LU, PA, rtol=1e-6, atol=1e-6) + + @pytest.mark.parametrize("dtype", get_float_complex_dtypes()) + def test_overwrite_copy(self, dtype): + a_dp = dpnp.array([[4, 3], [6, 3]], dtype=dtype, order="C") + a_dp_orig = a_dp.copy() + lu, piv = dpnp.linalg.lu_factor( + a_dp, overwrite_a=True, check_finite=False + ) + + assert lu is not a_dp + assert lu.flags["F_CONTIGUOUS"] is True + + L, U = self._split_lu(lu, 2, 2) + PA = self._apply_pivots_rows(a_dp_orig, piv) + LU = L @ U + + assert_allclose(LU, PA, rtol=1e-6, atol=1e-6) + + def test_overwrite_copy_special(self): + # F-contig but dtype != res_type + a1 = dpnp.array([[4, 3], [6, 3]], dtype=dpnp.int32, order="F") + a1_orig = a1.copy() + + # F-contig, match dtype but read-only input + a2 = dpnp.array( + [[4, 3], [6, 3]], dtype=dpnp.default_float_type(), order="F" + ) + a2_orig = a2.copy() + a2.flags["WRITABLE"] = False + + for a_dp, a_orig in zip((a1, a2), (a1_orig, a2_orig)): + lu, piv = dpnp.linalg.lu_factor( + a_dp, overwrite_a=True, check_finite=False + ) + + assert lu is not a_dp + assert lu.flags["F_CONTIGUOUS"] is True + + L, U = self._split_lu(lu, 2, 2) + PA = self._apply_pivots_rows( + a_orig.astype(L.dtype, copy=False), piv + ) + LU = L @ U + assert_allclose(LU, PA, rtol=1e-6, atol=1e-6) + + @pytest.mark.parametrize("shape", [(0, 0), (0, 2), (2, 0)]) + def test_empty_inputs(self, shape): + a_dp = dpnp.empty(shape, dtype=dpnp.default_float_type(), order="F") + lu, piv = dpnp.linalg.lu_factor(a_dp, check_finite=False) + assert lu.shape == shape + assert piv.shape == (min(shape),) + + @pytest.mark.parametrize( + "sl", + [ + (slice(None, None, 2), slice(None, None, 2)), + (slice(None, None, -1), slice(None, None, -1)), + ], + ) + def test_strided(self, sl): + base = ( + numpy.arange(7 * 7, dtype=dpnp.default_float_type()).reshape( + 7, 7, order="F" + ) + + 0.1 + ) + a_np = base[sl] + a_dp = dpnp.array(a_np) + + lu, piv = dpnp.linalg.lu_factor(a_dp, check_finite=False) + L, U = self._split_lu(lu, *a_dp.shape) + PA = self._apply_pivots_rows(a_dp, piv) + LU = L @ U + + assert_allclose(LU, PA, rtol=1e-6, atol=1e-6) + + def test_singular_matrix(self): + a_dp = dpnp.array([[1.0, 2.0], [2.0, 4.0]]) + with pytest.warns(RuntimeWarning, match="Singular matrix"): + dpnp.linalg.lu_factor(a_dp, check_finite=False) + + @pytest.mark.parametrize("bad", [numpy.inf, -numpy.inf, numpy.nan]) + def test_check_finite_raises(self, bad): + a_dp = dpnp.array([[1.0, 2.0], [3.0, bad]], order="F") + assert_raises( + ValueError, dpnp.linalg.lu_factor, a_dp, check_finite=True + ) + + def test_batched_not_supported(self): + a_dp = dpnp.ones((2, 2, 2)) + assert_raises(NotImplementedError, dpnp.linalg.lu_factor, a_dp) + + class TestMatrixPower: @pytest.mark.parametrize("dtype", get_all_dtypes()) @pytest.mark.parametrize( diff --git a/dpnp/tests/test_sycl_queue.py b/dpnp/tests/test_sycl_queue.py index 31dfb74f2cfa..7c08263e672c 100644 --- a/dpnp/tests/test_sycl_queue.py +++ b/dpnp/tests/test_sycl_queue.py @@ -1570,6 +1570,18 @@ def test_lstsq(self, m, n, nrhs, device): assert_sycl_queue_equal(param_queue, a.sycl_queue) assert_sycl_queue_equal(param_queue, b.sycl_queue) + @pytest.mark.parametrize( + "data", + [[[1.0, 2.0], [3.0, 5.0]], [[]]], + ) + def test_lu_factor(self, data, device): + a = dpnp.array(data, device=device) + result = dpnp.linalg.lu_factor(a) + + for param in result: + param_queue = param.sycl_queue + assert_sycl_queue_equal(param_queue, a.sycl_queue) + @pytest.mark.parametrize("n", [-1, 0, 1, 2, 3]) def test_matrix_power(self, n, device): x = dpnp.array([[1.0, 2.0], [3.0, 5.0]], device=device) diff --git a/dpnp/tests/test_usm_type.py b/dpnp/tests/test_usm_type.py index aed316eca533..6f886f8ec3c7 100644 --- a/dpnp/tests/test_usm_type.py +++ b/dpnp/tests/test_usm_type.py @@ -1449,6 +1449,18 @@ def test_lstsq(self, m, n, nrhs, usm_type, usm_type_other): [usm_type, usm_type_other] ) + @pytest.mark.parametrize( + "data", + [[[1.0, 2.0], [3.0, 5.0]], [[]]], + ) + def test_lu_factor(self, data, usm_type): + a = dpnp.array(data, usm_type=usm_type) + result = dpnp.linalg.lu_factor(a) + + assert a.usm_type == usm_type + for param in result: + assert param.usm_type == a.usm_type + @pytest.mark.parametrize("n", [-1, 0, 1, 2, 3]) def test_matrix_power(self, n, usm_type): a = dpnp.array([[1, 2], [3, 5]], usm_type=usm_type)