IntelPython
diff --git a/‎.github/workflows/openssf-scorecard.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/openssf-scorecard.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 2 additions & 1 deletion b/‎.pre-commit-config.yaml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎CHANGELOG.md‎
Lines changed: 20 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎dpnp/dpnp_array.py‎
Lines changed: 1 addition & 1 deletion b/‎dpnp/dpnp_array.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dpnp/linalg/dpnp_utils_linalg.py‎
Lines changed: 78 additions & 37 deletions b/‎dpnp/linalg/dpnp_utils_linalg.py‎
Lines changed: 78 additions & 37 deletions
diff --git a/‎dpnp/random/dpnp_iface_random.py‎
Lines changed: 25 additions & 15 deletions b/‎dpnp/random/dpnp_iface_random.py‎
Lines changed: 25 additions & 15 deletions
@@ -68,6 +68,6 @@ jobs:
 
       # Upload the results to GitHub's code scanning dashboard.
       - name: "Upload to code-scanning"
-        uses: github/codeql-action/upload-sarif@f09c1c0a94de965c15400f5634aa42fac8fb8f88 # v3.27.5
+        uses: github/codeql-action/upload-sarif@aa578102511db1f4524ed59b8cc2bae4f6e88195 # v3.27.6
         with:
           sarif_file: results.sarif
@@ -34,7 +34,8 @@ repos:
             (?x)^(
                 dpnp/tests/test_arraycreation.py|
                 dpnp/tests/test_sycl_queue.py|
-                dpnp/tests/test_usm_type.py
+                dpnp/tests/test_usm_type.py|
+                dpnp/tests/third_party/cupy/core_tests/test_nep50_examples.py
             )$
     -   id: python-no-log-warn
     -   id: python-use-type-annotations
 
@@ -13,6 +13,26 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Fixed
 
 
+## [0.16.1] - 12/06/2024
+
+This is a bug-fix release.
+
+### Changed
+
+* Changed to use `Miniforge` installer in GitHub actions [#2057](https://github.com/IntelPython/dpnp/pull/2057)
+* Updated `README.md` to reflect current installation requirements and available options [#2166](https://github.com/IntelPython/dpnp/pull/2166)
+* Corrected the list of owners and code maintainers [#2185](https://github.com/IntelPython/dpnp/pull/2185)
+* Bumped the version of `oneMKL` interface used in dpnp build by default to align it with `2025.0` oneAPI release [#2193](https://github.com/IntelPython/dpnp/pull/2193)
+
+### Fixed
+
+* Resolved an issue with Compute Follows Data inconsistency in `dpnp.extract` function [#2172](https://github.com/IntelPython/dpnp/pull/2172)
+* Resolved an import error when using `dpnp` in virtual environment on Linux [#2199](https://github.com/IntelPython/dpnp/pull/2199)
+* Fixed incorrect result produced by `dpnp.fft.fft` function when input array has negative strides [#2202](https://github.com/IntelPython/dpnp/pull/2202)
+* Fixed an issue with `numpy.ndarray` input processing in the `dpnp.from_dlpack` function and updated the documentation [#2209](https://github.com/IntelPython/dpnp/pull/2209)
+* Resolved a compilation error when building with DPC++ 2025.1 compiler [#2211](https://github.com/IntelPython/dpnp/pull/2211)
+
+
 ## [0.16.0] - 10/14/2024
 
 This release reaches an important milestone by making offloading fully asynchronous. Calls to `dpnp` submit tasks for execution to DPC++ runtime and return without waiting for execution of these tasks to finish. The sequential semantics a user comes to expect from execution of Python script is preserved though.
 
@@ -150,7 +150,7 @@ def mT(self):
         if self.ndim < 2:
             raise ValueError("matrix transpose with ndim < 2 is undefined")
 
-        return self._array_obj.mT
+        return dpnp_array._create_from_usm_ndarray(self._array_obj.mT)
 
     def to_device(self, target_device):
         """Transfer array to target device."""
 
@@ -475,6 +475,7 @@ def _batched_qr(a, mode="reduced"):
     )
 
 
+# pylint: disable=too-many-locals
 def _batched_svd(
     a,
     uv_type,
@@ -532,29 +533,30 @@ def _batched_svd(
             batch_shape_orig,
         )
 
-    k = min(m, n)
-    if compute_uv:
-        if full_matrices:
-            u_shape = (m, m) + (batch_size,)
-            vt_shape = (n, n) + (batch_size,)
-            jobu = ord("A")
-            jobvt = ord("A")
-        else:
-            u_shape = (m, k) + (batch_size,)
-            vt_shape = (k, n) + (batch_size,)
-            jobu = ord("S")
-            jobvt = ord("S")
+    # Transpose if m < n:
+    # 1. cuSolver gesvd supports only m >= n
+    # 2. Reducing a matrix with m >= n to bidiagonal form is more efficient
+    if m < n:
+        n, m = a.shape[-2:]
+        trans_flag = True
     else:
-        u_shape = vt_shape = ()
-        jobu = ord("N")
-        jobvt = ord("N")
+        trans_flag = False
+
+    u_shape, vt_shape, s_shape, jobu, jobvt = _get_svd_shapes_and_flags(
+        m, n, compute_uv, full_matrices, batch_size=batch_size
+    )
 
     _manager = dpu.SequentialOrderManager[exec_q]
     dep_evs = _manager.submitted_events
 
     # Reorder the elements by moving the last two axes of `a` to the front
     # to match fortran-like array order which is assumed by gesvd.
-    a = dpnp.moveaxis(a, (-2, -1), (0, 1))
+    if trans_flag:
+        # Transpose axes for cuSolver and to optimize reduction
+        # to bidiagonal form
+        a = dpnp.moveaxis(a, (-1, -2), (0, 1))
+    else:
+        a = dpnp.moveaxis(a, (-2, -1), (0, 1))
 
     # oneMKL LAPACK gesvd destroys `a` and assumes fortran-like array
     # as input.
@@ -583,7 +585,7 @@ def _batched_svd(
         sycl_queue=exec_q,
     )
     s_h = dpnp.empty(
-        (batch_size,) + (k,),
+        s_shape,
         dtype=s_type,
         order="C",
         usm_type=usm_type,
@@ -607,16 +609,23 @@ def _batched_svd(
         # gesvd call writes `u_h` and `vt_h` in Fortran order;
         # reorder the axes to match C order by moving the last axis
         # to the front
-        u = dpnp.moveaxis(u_h, -1, 0)
-        vt = dpnp.moveaxis(vt_h, -1, 0)
+        if trans_flag:
+            # Transpose axes to restore U and V^T for the original matrix
+            u = dpnp.moveaxis(u_h, (0, -1), (-1, 0))
+            vt = dpnp.moveaxis(vt_h, (0, -1), (-1, 0))
+        else:
+            u = dpnp.moveaxis(u_h, -1, 0)
+            vt = dpnp.moveaxis(vt_h, -1, 0)
+
         if a_ndim > 3:
             u = u.reshape(batch_shape_orig + u.shape[-2:])
             vt = vt.reshape(batch_shape_orig + vt.shape[-2:])
         # dpnp.moveaxis can make the array non-contiguous if it is not 2D
         # Convert to contiguous to align with NumPy
         u = dpnp.ascontiguousarray(u)
         vt = dpnp.ascontiguousarray(vt)
-        return u, s, vt
+        # Swap `u` and `vt` for transposed input to restore correct order
+        return (vt, s, u) if trans_flag else (u, s, vt)
     return s
 
 
@@ -759,6 +768,36 @@ def _common_inexact_type(default_dtype, *dtypes):
     return dpnp.result_type(*inexact_dtypes)
 
 
+def _get_svd_shapes_and_flags(m, n, compute_uv, full_matrices, batch_size=None):
+    """Return the shapes and flags for SVD computations."""
+
+    k = min(m, n)
+    if compute_uv:
+        if full_matrices:
+            u_shape = (m, m)
+            vt_shape = (n, n)
+            jobu = ord("A")
+            jobvt = ord("A")
+        else:
+            u_shape = (m, k)
+            vt_shape = (k, n)
+            jobu = ord("S")
+            jobvt = ord("S")
+    else:
+        u_shape = vt_shape = ()
+        jobu = ord("N")
+        jobvt = ord("N")
+
+    s_shape = (k,)
+    if batch_size is not None:
+        if compute_uv:
+            u_shape += (batch_size,)
+            vt_shape += (batch_size,)
+        s_shape = (batch_size,) + s_shape
+
+    return u_shape, vt_shape, s_shape, jobu, jobvt
+
+
 def _hermitian_svd(a, compute_uv):
     """
     _hermitian_svd(a, compute_uv)
@@ -2695,6 +2734,16 @@ def dpnp_svd(
             a, uv_type, s_type, full_matrices, compute_uv, exec_q, usm_type
         )
 
+    # Transpose if m < n:
+    # 1. cuSolver gesvd supports only m >= n
+    # 2. Reducing a matrix with m >= n to bidiagonal form is more efficient
+    if m < n:
+        n, m = a.shape
+        a = a.transpose()
+        trans_flag = True
+    else:
+        trans_flag = False
+
     # oneMKL LAPACK gesvd destroys `a` and assumes fortran-like array as input.
     # Allocate 'F' order memory for dpnp arrays to comply with
     # these requirements.
@@ -2716,22 +2765,9 @@ def dpnp_svd(
     )
     _manager.add_event_pair(ht_ev, copy_ev)
 
-    k = min(m, n)
-    if compute_uv:
-        if full_matrices:
-            u_shape = (m, m)
-            vt_shape = (n, n)
-            jobu = ord("A")
-            jobvt = ord("A")
-        else:
-            u_shape = (m, k)
-            vt_shape = (k, n)
-            jobu = ord("S")
-            jobvt = ord("S")
-    else:
-        u_shape = vt_shape = ()
-        jobu = ord("N")
-        jobvt = ord("N")
+    u_shape, vt_shape, s_shape, jobu, jobvt = _get_svd_shapes_and_flags(
+        m, n, compute_uv, full_matrices
+    )
 
     # oneMKL LAPACK assumes fortran-like array as input.
     # Allocate 'F' order memory for dpnp output arrays to comply with
@@ -2746,7 +2782,7 @@ def dpnp_svd(
         shape=vt_shape,
         order="F",
     )
-    s_h = dpnp.empty_like(a_h, shape=(k,), dtype=s_type)
+    s_h = dpnp.empty_like(a_h, shape=s_shape, dtype=s_type)
 
     ht_ev, gesvd_ev = li._gesvd(
         exec_q,
@@ -2761,6 +2797,11 @@ def dpnp_svd(
     _manager.add_event_pair(ht_ev, gesvd_ev)
 
     if compute_uv:
+        # Transposing the input matrix swaps the roles of U and Vt:
+        # For A^T = V S^T U^T, `u_h` becomes V and `vt_h` becomes U^T.
+        # Transpose and swap them back to restore correct order for A.
+        if trans_flag:
+            return vt_h.T, s_h, u_h.T
         # gesvd call writes `u_h` and `vt_h` in Fortran order;
         # Convert to contiguous to align with NumPy
         u_h = dpnp.ascontiguousarray(u_h)
 
@@ -1022,25 +1022,31 @@ def power(a, size=None):
     return call_origin(numpy.random.power, a, size)
 
 
-def rand(d0, *dn, device=None, usm_type="device", sycl_queue=None):
+def rand(*args, device=None, usm_type="device", sycl_queue=None):
     """
     Random values in a given shape.
 
-    Create an array of the given shape and populate it with random samples
-    from a uniform distribution over [0, 1).
+    Create an array of the given shape and populate it with random samples from
+    a uniform distribution over ``[0, 1)``.
 
     For full documentation refer to :obj:`numpy.random.rand`.
 
     Parameters
     ----------
+    *args : sequence of ints, optional
+        The dimensions of the returned array, must be non-negative.
+        If no argument is given a single Python float is returned.
     device : {None, string, SyclDevice, SyclQueue}, optional
         An array API concept of device where the output array is created.
-        The `device` can be ``None`` (the default), an OneAPI filter selector string,
-        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
-        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        The `device` can be ``None`` (the default), an OneAPI filter selector
+        string, an instance of :class:`dpctl.SyclDevice` corresponding to
+        a non-partitioned SYCL device, an instance of :class:`dpctl.SyclQueue`,
+        or a `Device` object returned by
         :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+        Default: ``None``.
     usm_type : {"device", "shared", "host"}, optional
         The type of SYCL USM allocation for the output array.
+        Default: ``"device"``.
     sycl_queue : {None, SyclQueue}, optional
         A SYCL queue to use for output array allocation and copying. The
         `sycl_queue` can be passed as ``None`` (the default), which means
@@ -1051,23 +1057,27 @@ def rand(d0, *dn, device=None, usm_type="device", sycl_queue=None):
     Returns
     -------
     out : dpnp.ndarray
-        Random values in a given shape.
-        Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise.
+        Random values in a given shape ``(d0, d1, ..., dn)``.
+        Output array data type is :obj:`dpnp.float64` if a device supports it,
+        or :obj:`dpnp.float32` type otherwise.
 
-    Examples
+    See Also
     --------
-    >>> s = dpnp.random.rand(3, 2)
+    :obj:`dpnp.random.random` : Return random floats in the half-open interval
+                                ``[0.0, 1.0)``.
+    :obj:`dpnp.random.random_sample` : Return random floats in the half-open
+                                       interval ``[0.0, 1.0)``.
+    :obj:`dpnp.random.uniform` : Draw samples from a uniform distribution.
 
-    See Also
+    Examples
     --------
-    :obj:`dpnp.random.random`
-    :obj:`dpnp.random.random_sample`
-    :obj:`dpnp.random.uniform`
+    >>> import dpnp as np
+    >>> s = np.random.rand(3, 2)
 
     """
 
     rs = _get_random_state(device=device, sycl_queue=sycl_queue)
-    return rs.rand(d0, *dn, usm_type=usm_type)
+    return rs.rand(*args, usm_type=usm_type)
 
 
 def randint(