IntelPython
diff --git a/‎.github/workflows/openssf-scorecard.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/openssf-scorecard.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/reference/linalg.rst‎
Lines changed: 1 addition & 0 deletions b/‎doc/reference/linalg.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎dpnp/__init__.py‎
Lines changed: 3 additions & 7 deletions b/‎dpnp/__init__.py‎
Lines changed: 3 additions & 7 deletions
diff --git a/‎dpnp/backend/extensions/lapack/getrf.cpp‎
Lines changed: 24 additions & 15 deletions b/‎dpnp/backend/extensions/lapack/getrf.cpp‎
Lines changed: 24 additions & 15 deletions
diff --git a/‎dpnp/backend/extensions/lapack/getrf.hpp‎
Lines changed: 1 addition & 0 deletions b/‎dpnp/backend/extensions/lapack/getrf.hpp‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎dpnp/backend/extensions/lapack/getrf_batch.cpp‎
Lines changed: 24 additions & 13 deletions b/‎dpnp/backend/extensions/lapack/getrf_batch.cpp‎
Lines changed: 24 additions & 13 deletions
diff --git a/‎dpnp/backend/extensions/lapack/lapack_py.cpp‎
Lines changed: 4 additions & 4 deletions b/‎dpnp/backend/extensions/lapack/lapack_py.cpp‎
Lines changed: 4 additions & 4 deletions
@@ -69,6 +69,6 @@ jobs:
 
       # Upload the results to GitHub's code scanning dashboard.
       - name: "Upload to code-scanning"
-        uses: github/codeql-action/upload-sarif@df559355d593797519d70b90fc8edd5db049e7a2 # v3.29.9
+        uses: github/codeql-action/upload-sarif@3c3833e0f8c1c83d449a7478aa59c036a9165498 # v3.29.11
         with:
           sarif_file: results.sarif
@@ -34,6 +34,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 * Replaced `ci` section in `.pre-commit-config.yaml` with a new GitHub workflow with scheduled run to autoupdate the `pre-commit` configuration [#2542](https://github.com/IntelPython/dpnp/pull/2542)
 * FFT module is updated to perform in-place FFT in intermediate steps of ND FFT [#2543](https://github.com/IntelPython/dpnp/pull/2543)
 * Reused dpctl tensor include to enable experimental SYCL namespace for complex types [#2546](https://github.com/IntelPython/dpnp/pull/2546)
+* Changed Windows-specific logic in dpnp initialization [#2553](https://github.com/IntelPython/dpnp/pull/2553)
 * Refactored backend implementation of `dpnp.linalg.solve` to use oneMKL LAPACK `gesv` directly [#2558](https://github.com/IntelPython/dpnp/pull/2558)
 
 ### Deprecated
 
@@ -43,6 +43,7 @@ Decompositions
    dpnp.linalg.cholesky
    dpnp.linalg.outer
    dpnp.linalg.qr
+   dpnp.linalg.lu_factor
    dpnp.linalg.svd
    dpnp.linalg.svdvals
 
 
@@ -40,17 +40,13 @@
 # where to search for DLLs towards both DPNP backend and DPCTL Sycl interface,
 # otherwise DPNP import will be failing. This is because the libraries
 # are not installed under any of default paths where Python is searching.
-from platform import system
-
-if system() == "Windows":  # pragma: no cover
-    if hasattr(os, "add_dll_directory"):
-        os.add_dll_directory(mypath)
-        os.add_dll_directory(dpctlpath)
 
+if sys.platform == "win32":  # pragma: no cover
+    os.add_dll_directory(mypath)
+    os.add_dll_directory(dpctlpath)
     os.environ["PATH"] = os.pathsep.join(
         [os.getenv("PATH", ""), mypath, dpctlpath]
     )
-
     # For virtual environments on Windows, add folder with DPC++ libraries
     # to the DLL search path
     if sys.base_exec_prefix != sys.exec_prefix and os.path.isfile(
 
@@ -44,6 +44,7 @@ namespace py = pybind11;
 namespace type_utils = dpctl::tensor::type_utils;
 
 typedef sycl::event (*getrf_impl_fn_ptr_t)(sycl::queue &,
+                                           const std::int64_t,
                                            const std::int64_t,
                                            char *,
                                            std::int64_t,
@@ -56,6 +57,7 @@ static getrf_impl_fn_ptr_t getrf_dispatch_vector[dpctl_td_ns::num_types];
 
 template <typename T>
 static sycl::event getrf_impl(sycl::queue &exec_q,
+                              const std::int64_t m,
                               const std::int64_t n,
                               char *in_a,
                               std::int64_t lda,
@@ -69,7 +71,7 @@ static sycl::event getrf_impl(sycl::queue &exec_q,
     T *a = reinterpret_cast<T *>(in_a);
 
     const std::int64_t scratchpad_size =
-        mkl_lapack::getrf_scratchpad_size<T>(exec_q, n, n, lda);
+        mkl_lapack::getrf_scratchpad_size<T>(exec_q, m, n, lda);
     T *scratchpad = nullptr;
 
     std::stringstream error_msg;
@@ -82,13 +84,13 @@ static sycl::event getrf_impl(sycl::queue &exec_q,
 
         getrf_event = mkl_lapack::getrf(
             exec_q,
-            n,    // The order of the square matrix A (0 ≤ n).
+            m,    // The number of rows in the input matrix A (0 ≤ m).
                   // It must be a non-negative integer.
-            n,    // The number of columns in the square matrix A (0 ≤ n).
+            n,    // The number of columns in the input matrix A (0 ≤ n).
                   // It must be a non-negative integer.
-            a,    // Pointer to the square matrix A (n x n).
+            a,    // Pointer to the input matrix A (m x n).
             lda,  // The leading dimension of matrix A.
-                  // It must be at least max(1, n).
+                  // It must be at least max(1, m).
             ipiv, // Pointer to the output array of pivot indices.
             scratchpad, // Pointer to scratchpad memory to be used by MKL
                         // routine for storing intermediate results.
@@ -99,7 +101,7 @@ static sycl::event getrf_impl(sycl::queue &exec_q,
 
         if (info < 0) {
             error_msg << "Parameter number " << -info
-                      << " had an illegal value.";
+                      << " had an illegal value";
         }
         else if (info == scratchpad_size && e.detail() != 0) {
             error_msg
@@ -168,13 +170,13 @@ std::pair<sycl::event, sycl::event>
     if (a_array_nd != 2) {
         throw py::value_error(
             "The input array has ndim=" + std::to_string(a_array_nd) +
-            ", but a 2-dimensional array is expected.");
+            ", but a 2-dimensional array is expected");
     }
 
     if (ipiv_array_nd != 1) {
         throw py::value_error("The array of pivot indices has ndim=" +
                               std::to_string(ipiv_array_nd) +
-                              ", but a 1-dimensional array is expected.");
+                              ", but a 1-dimensional array is expected");
     }
 
     // check compatibility of execution queue and allocation queue
@@ -190,10 +192,11 @@ std::pair<sycl::event, sycl::event>
     }
 
     bool is_a_array_c_contig = a_array.is_c_contiguous();
+    bool is_a_array_f_contig = a_array.is_f_contiguous();
     bool is_ipiv_array_c_contig = ipiv_array.is_c_contiguous();
-    if (!is_a_array_c_contig) {
+    if (!is_a_array_c_contig && !is_a_array_f_contig) {
         throw py::value_error("The input array "
-                              "must be C-contiguous");
+                              "must be contiguous");
     }
     if (!is_ipiv_array_c_contig) {
         throw py::value_error("The array of pivot indices "
@@ -208,27 +211,33 @@ std::pair<sycl::event, sycl::event>
     if (getrf_fn == nullptr) {
         throw py::value_error(
             "No getrf implementation defined for the provided type "
-            "of the input matrix.");
+            "of the input matrix");
     }
 
     auto ipiv_types = dpctl_td_ns::usm_ndarray_types();
     int ipiv_array_type_id =
         ipiv_types.typenum_to_lookup_id(ipiv_array.get_typenum());
 
     if (ipiv_array_type_id != static_cast<int>(dpctl_td_ns::typenum_t::INT64)) {
-        throw py::value_error("The type of 'ipiv_array' must be int64.");
+        throw py::value_error("The type of 'ipiv_array' must be int64");
     }
 
-    const std::int64_t n = a_array.get_shape_raw()[0];
+    const py::ssize_t *a_array_shape = a_array.get_shape_raw();
+    const std::int64_t m = a_array_shape[0];
+    const std::int64_t n = a_array_shape[1];
+    const std::int64_t lda = std::max<size_t>(1UL, m);
+
+    if (ipiv_array.get_size() != std::min(m, n)) {
+        throw py::value_error("The size of 'ipiv_array' must be min(m, n)");
+    }
 
     char *a_array_data = a_array.get_data();
-    const std::int64_t lda = std::max<size_t>(1UL, n);
 
     char *ipiv_array_data = ipiv_array.get_data();
     std::int64_t *d_ipiv = reinterpret_cast<std::int64_t *>(ipiv_array_data);
 
     std::vector<sycl::event> host_task_events;
-    sycl::event getrf_ev = getrf_fn(exec_q, n, a_array_data, lda, d_ipiv,
+    sycl::event getrf_ev = getrf_fn(exec_q, m, n, a_array_data, lda, d_ipiv,
                                     dev_info, host_task_events, depends);
 
     sycl::event args_ev = dpctl::utils::keep_args_alive(
 
@@ -44,6 +44,7 @@ extern std::pair<sycl::event, sycl::event>
                 const dpctl::tensor::usm_ndarray &a_array,
                 const dpctl::tensor::usm_ndarray &ipiv_array,
                 py::list dev_info,
+                std::int64_t m,
                 std::int64_t n,
                 std::int64_t stride_a,
                 std::int64_t stride_ipiv,
 
@@ -46,6 +46,7 @@ namespace type_utils = dpctl::tensor::type_utils;
 typedef sycl::event (*getrf_batch_impl_fn_ptr_t)(
     sycl::queue &,
     std::int64_t,
+    std::int64_t,
     char *,
     std::int64_t,
     std::int64_t,
@@ -61,6 +62,7 @@ static getrf_batch_impl_fn_ptr_t
 
 template <typename T>
 static sycl::event getrf_batch_impl(sycl::queue &exec_q,
+                                    std::int64_t m,
                                     std::int64_t n,
                                     char *in_a,
                                     std::int64_t lda,
@@ -77,7 +79,7 @@ static sycl::event getrf_batch_impl(sycl::queue &exec_q,
     T *a = reinterpret_cast<T *>(in_a);
 
     const std::int64_t scratchpad_size =
-        mkl_lapack::getrf_batch_scratchpad_size<T>(exec_q, n, n, lda, stride_a,
+        mkl_lapack::getrf_batch_scratchpad_size<T>(exec_q, m, n, lda, stride_a,
                                                    stride_ipiv, batch_size);
     T *scratchpad = nullptr;
 
@@ -91,11 +93,11 @@ static sycl::event getrf_batch_impl(sycl::queue &exec_q,
 
         getrf_batch_event = mkl_lapack::getrf_batch(
             exec_q,
-            n, // The order of each square matrix in the batch; (0 ≤ n).
+            m, // The number of rows in each matrix in the batch; (0 ≤ m).
                // It must be a non-negative integer.
             n, // The number of columns in each matrix in the batch; (0 ≤ n).
                // It must be a non-negative integer.
-            a, // Pointer to the batch of square matrices, each of size (n x n).
+            a, // Pointer to the batch of input matrices, each of size (m x n).
             lda,      // The leading dimension of each matrix in the batch.
             stride_a, // Stride between consecutive matrices in the batch.
             ipiv, // Pointer to the array of pivot indices for each matrix in
@@ -179,6 +181,7 @@ std::pair<sycl::event, sycl::event>
                 const dpctl::tensor::usm_ndarray &a_array,
                 const dpctl::tensor::usm_ndarray &ipiv_array,
                 py::list dev_info,
+                std::int64_t m,
                 std::int64_t n,
                 std::int64_t stride_a,
                 std::int64_t stride_ipiv,
@@ -191,21 +194,21 @@ std::pair<sycl::event, sycl::event>
     if (a_array_nd < 3) {
         throw py::value_error(
             "The input array has ndim=" + std::to_string(a_array_nd) +
-            ", but an array with ndim >= 3 is expected.");
+            ", but an array with ndim >= 3 is expected");
     }
 
     if (ipiv_array_nd != 2) {
         throw py::value_error("The array of pivot indices has ndim=" +
                               std::to_string(ipiv_array_nd) +
-                              ", but a 2-dimensional array is expected.");
+                              ", but a 2-dimensional array is expected");
     }
 
     const int dev_info_size = py::len(dev_info);
     if (dev_info_size != batch_size) {
         throw py::value_error("The size of 'dev_info' (" +
                               std::to_string(dev_info_size) +
                               ") does not match the expected batch size (" +
-                              std::to_string(batch_size) + ").");
+                              std::to_string(batch_size) + ")");
     }
 
     // check compatibility of execution queue and allocation queue
@@ -221,10 +224,11 @@ std::pair<sycl::event, sycl::event>
     }
 
     bool is_a_array_c_contig = a_array.is_c_contiguous();
+    bool is_a_array_f_contig = a_array.is_f_contiguous();
     bool is_ipiv_array_c_contig = ipiv_array.is_c_contiguous();
-    if (!is_a_array_c_contig) {
+    if (!is_a_array_c_contig && !is_a_array_f_contig) {
         throw py::value_error("The input array "
-                              "must be C-contiguous");
+                              "must be must contiguous");
     }
     if (!is_ipiv_array_c_contig) {
         throw py::value_error("The array of pivot indices "
@@ -240,27 +244,34 @@ std::pair<sycl::event, sycl::event>
     if (getrf_batch_fn == nullptr) {
         throw py::value_error(
             "No getrf_batch implementation defined for the provided type "
-            "of the input matrix.");
+            "of the input matrix");
     }
 
     auto ipiv_types = dpctl_td_ns::usm_ndarray_types();
     int ipiv_array_type_id =
         ipiv_types.typenum_to_lookup_id(ipiv_array.get_typenum());
 
     if (ipiv_array_type_id != static_cast<int>(dpctl_td_ns::typenum_t::INT64)) {
-        throw py::value_error("The type of 'ipiv_array' must be int64.");
+        throw py::value_error("The type of 'ipiv_array' must be int64");
+    }
+
+    const py::ssize_t *ipiv_array_shape = ipiv_array.get_shape_raw();
+    if (ipiv_array_shape[0] != batch_size ||
+        ipiv_array_shape[1] != std::min(m, n)) {
+        throw py::value_error(
+            "The shape of 'ipiv_array' must be (batch_size, min(m, n))");
     }
 
     char *a_array_data = a_array.get_data();
-    const std::int64_t lda = std::max<size_t>(1UL, n);
+    const std::int64_t lda = std::max<size_t>(1UL, m);
 
     char *ipiv_array_data = ipiv_array.get_data();
     std::int64_t *d_ipiv = reinterpret_cast<std::int64_t *>(ipiv_array_data);
 
     std::vector<sycl::event> host_task_events;
     sycl::event getrf_batch_ev = getrf_batch_fn(
-        exec_q, n, a_array_data, lda, stride_a, d_ipiv, stride_ipiv, batch_size,
-        dev_info, host_task_events, depends);
+        exec_q, m, n, a_array_data, lda, stride_a, d_ipiv, stride_ipiv,
+        batch_size, dev_info, host_task_events, depends);
 
     sycl::event args_ev = dpctl::utils::keep_args_alive(
         exec_q, {a_array, ipiv_array}, host_task_events);
 
@@ -135,16 +135,16 @@ PYBIND11_MODULE(_lapack_impl, m)
 
     m.def("_getrf", &lapack_ext::getrf,
           "Call `getrf` from OneMKL LAPACK library to return "
-          "the LU factorization of a general n x n matrix",
+          "the LU factorization of a general m x n matrix",
           py::arg("sycl_queue"), py::arg("a_array"), py::arg("ipiv_array"),
           py::arg("dev_info"), py::arg("depends") = py::list());
 
     m.def("_getrf_batch", &lapack_ext::getrf_batch,
           "Call `getrf_batch` from OneMKL LAPACK library to return "
-          "the LU factorization of a batch of general n x n matrices",
+          "the LU factorization of a batch of general m x n matrices",
           py::arg("sycl_queue"), py::arg("a_array"), py::arg("ipiv_array"),
-          py::arg("dev_info_array"), py::arg("n"), py::arg("stride_a"),
-          py::arg("stride_ipiv"), py::arg("batch_size"),
+          py::arg("dev_info_array"), py::arg("m"), py::arg("n"),
+          py::arg("stride_a"), py::arg("stride_ipiv"), py::arg("batch_size"),
           py::arg("depends") = py::list());
 
     m.def("_getri_batch", &lapack_ext::getri_batch,