diff --git a/doc/reference/linalg.rst b/doc/reference/linalg.rst
index 759bf54d17c7..63f7413563ed 100644
--- a/doc/reference/linalg.rst
+++ b/doc/reference/linalg.rst
@@ -23,6 +23,8 @@ Matrix and vector products
    dpnp.outer
    dpnp.matmul
    dpnp.linalg.matmul (Array API compatible)
+   dpnp.matvec
+   dpnp.vecmat
    dpnp.tensordot
    dpnp.linalg.tensordot (Array API compatible)
    dpnp.einsum
diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py
index d9756c316c63..bac8544afee9 100644
--- a/dpnp/dpnp_iface.py
+++ b/dpnp/dpnp_iface.py
@@ -322,14 +322,25 @@ def as_usm_ndarray(a, dtype=None, device=None, usm_type=None, sycl_queue=None):
     )
 
 
-def check_limitations(subok=False, like=None, initial=None, where=True):
+def check_limitations(
+    subok=False,
+    like=None,
+    initial=None,
+    where=True,
+    subok_linalg=True,
+    signature=None,
+):
     """
     Checking limitation kwargs for their supported values.
 
-    Parameter `subok` is only supported with default value ``False``.
+    Parameter `subok` for array creation functions is only supported with
+    default value ``False``.
     Parameter `like` is only supported with default value ``None``.
     Parameter `initial` is only supported with default value ``None``.
     Parameter `where` is only supported with default value ``True``.
+    Parameter `subok` for linear algebra functions, named as `subok_linalg`
+    here, and is only supported with default value ``True``.
+    Parameter `signature` is only supported with default value ``None``.
 
     Raises
     ------
@@ -341,22 +352,32 @@ def check_limitations(subok=False, like=None, initial=None, where=True):
     if like is not None:
         raise NotImplementedError(
             "Keyword argument `like` is supported only with "
-            f"default value ``None``, but got {like}"
+            f"default value ``None``, but got {like}."
         )
     if subok is not False:
         raise NotImplementedError(
             "Keyword argument `subok` is supported only with "
-            f"default value ``False``, but got {subok}"
+            f"default value ``False``, but got {subok}."
         )
     if initial is not None:
         raise NotImplementedError(
             "Keyword argument `initial` is only supported with "
-            f"default value ``None``, but got {initial}"
+            f"default value ``None``, but got {initial}."
         )
     if where is not True:
         raise NotImplementedError(
             "Keyword argument `where` is supported only with "
-            f"default value ``True``, but got {where}"
+            f"default value ``True``, but got {where}."
+        )
+    if not subok_linalg:
+        raise NotImplementedError(
+            "keyword argument `subok` is only supported with "
+            f"default value ``True``, but got {subok_linalg}."
+        )
+    if signature is not None:
+        raise NotImplementedError(
+            "keyword argument `signature` is only supported with "
+            f"default value ``None``, but got {signature}."
         )
 
 
diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py
index 7e1b23a65892..974797c9a66c 100644
--- a/dpnp/dpnp_iface_linearalgebra.py
+++ b/dpnp/dpnp_iface_linearalgebra.py
@@ -45,7 +45,7 @@
 from .dpnp_utils.dpnp_utils_linearalgebra import (
     dpnp_dot,
     dpnp_kron,
-    dpnp_matmul,
+    dpnp_multiplication,
     dpnp_tensordot,
     dpnp_vecdot,
 )
@@ -57,10 +57,12 @@
     "inner",
     "kron",
     "matmul",
+    "matvec",
     "outer",
     "tensordot",
     "vdot",
     "vecdot",
+    "vecmat",
 ]
 
 
@@ -753,17 +755,21 @@ def matmul(
         Alternative output array in which to place the result. It must have
         a shape that matches the signature `(n,k),(k,m)->(n,m)` but the type
         (of the calculated values) will be cast if necessary.
+
         Default: ``None``.
     dtype : {None, dtype}, optional
         Type to use in computing the matrix product. By default, the returned
         array will have data type that is determined by considering
         Promotion Type Rule and device capabilities.
+
         Default: ``None``.
     casting : {"no", "equiv", "safe", "same_kind", "unsafe"}, optional
         Controls what kind of data casting may occur.
+
         Default: ``"same_kind"``.
     order : {"C", "F", "A", "K", None}, optional
         Memory layout of the newly output array, if parameter `out` is ``None``.
+
         Default: ``"K"``.
     axes : {None, list of tuples}, optional
         A list of tuples with indices of axes the matrix product should operate
@@ -771,6 +777,7 @@ def matmul(
         elements are 2d matrices and these are taken to be stored in the two
         last axes of each argument. The corresponding axes keyword would be
         ``[(-2, -1), (-2, -1), (-2, -1)]``.
+
         Default: ``None``.
 
     Returns
@@ -781,19 +788,50 @@ def matmul(
 
     Limitations
     -----------
-    Keyword arguments `subok`, `signature`, and `axis` are only supported with
-    their default values.
-    Otherwise ``NotImplementedError`` exception will be raised.
+    Keyword arguments `subok`, and `signature`, are only supported with their
+    default values. Otherwise ``NotImplementedError`` exception will be raised.
 
     See Also
     --------
     :obj:`dpnp.linalg.matmul` : Array API compatible version.
-    :obj:`dpnp.vdot` : Complex-conjugating dot product.
+    :obj:`dpnp.vecdot` : Complex-conjugating dot product for stacks of vectors.
+    :obj:`dpnp.matvec` : Matrix-vector product for stacks of
+                         matrices and vectors.
+    :obj:`dpnp.vecmat` : Vector-matrix product for stacks of
+                         vectors and matrices.
     :obj:`dpnp.tensordot` : Sum products over arbitrary axes.
     :obj:`dpnp.einsum` : Einstein summation convention.
     :obj:`dpnp.dot` : Alternative matrix product with
                       different broadcasting rules.
 
+    Notes
+    -----
+    The behavior depends on the arguments in the following way.
+
+    - If both arguments are 2-D they are multiplied like conventional matrices.
+    - If either argument is N-D, N > 2, it is treated as a stack of matrices
+      residing in the last two indexes and broadcast accordingly.
+    - If the first argument is 1-D, it is promoted to a matrix by prepending
+      a 1 to its dimensions. After matrix multiplication the prepended 1 is
+      removed. (For stacks of vectors, use :obj:`dpnp.vecmat`.)
+    - If the second argument is 1-D, it is promoted to a matrix by appending
+      a 1 to its dimensions. After matrix multiplication the appended 1 is
+      removed. (For stacks of vectors, use :obj:`dpnp.matvec`.)
+
+    :obj:`dpnp.matmul` differs from :obj:`dpnp.dot` in two important ways:
+
+    - Multiplication by scalars is not allowed, use ``*`` instead.
+    - Stacks of matrices are broadcast together as if the matrices
+      were elements, respecting the signature ``(n,k),(k,m)->(n,m)``:
+
+    >>> import dpnp as np
+    >>> a = np.ones([9, 5, 7, 4])
+    >>> c = np.ones([9, 5, 4, 3])
+    >>> np.dot(a, c).shape
+    (9, 5, 7, 9, 5, 3)
+    >>> np.matmul(a, c).shape
+    (9, 5, 7, 3) # n is 7, k is 4, m is 3
+
     Examples
     --------
     For 2-D arrays it is the matrix product:
@@ -833,7 +871,7 @@ def matmul(
     >>> np.matmul(x1, x2)
     array(-13+0j)
 
-    The ``@`` operator can be used as a shorthand for ``matmul`` on
+    The ``@`` operator can be used as a shorthand for :obj:`dpnp.matmul` on
     :class:`dpnp.ndarray`.
 
     >>> x1 @ x2
@@ -841,20 +879,11 @@ def matmul(
 
     """
 
-    if not subok:
-        raise NotImplementedError(
-            "subok keyword argument is only supported by its default value."
-        )
-    if signature is not None:
-        raise NotImplementedError(
-            "signature keyword argument is only supported by its default value."
-        )
-    if axis is not None:
-        raise NotImplementedError(
-            "axis keyword argument is only supported by its default value."
-        )
+    dpnp.check_limitations(subok_linalg=subok, signature=signature)
+    dpnp.check_supported_arrays_type(x1, x2)
 
-    return dpnp_matmul(
+    return dpnp_multiplication(
+        "matmul",
         x1,
         x2,
         out=out,
@@ -862,6 +891,127 @@ def matmul(
         order=order,
         dtype=dtype,
         axes=axes,
+        axis=axis,
+    )
+
+
+def matvec(
+    x1,
+    x2,
+    /,
+    out=None,
+    *,
+    casting="same_kind",
+    order="K",
+    dtype=None,
+    subok=True,
+    signature=None,
+    axes=None,
+    axis=None,
+):
+    r"""
+    Matrix-vector dot product of two arrays.
+
+    Given a matrix (or stack of matrices) :math:`\mathbf{A}` in `x1` and
+    a vector (or stack of vectors) :math:`\mathbf{v}` in `x2`, the
+    matrix-vector product is defined as:
+
+    .. math::
+       \mathbf{A} \cdot \mathbf{b} = \sum_{i=0}^{n-1} A_{ij}v_j
+
+    where the sum is over the last dimensions in `x1` and `x2` (unless `axes`
+    is specified). (For a matrix-vector product with the vector conjugated,
+    use ``dpnp.vecmat(x2, x1.mT)``.)
+
+    For full documentation refer to :obj:`numpy.matvec`.
+
+    Parameters
+    ----------
+    x1 : {dpnp.ndarray, usm_ndarray}
+        First input array.
+    x2 : {dpnp.ndarray, usm_ndarray}
+        Second input array.
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
+        A location into which the result is stored. If provided, it must have
+        the broadcasted shape of `x1` and `x2` with the summation axis removed.
+        If not provided or ``None``, a freshly-allocated array is used.
+
+        Default: ``None``.
+    dtype : {None, dtype}, optional
+        Type to use in computing the matrix product. By default, the returned
+        array will have data type that is determined by considering
+        Promotion Type Rule and device capabilities.
+
+        Default: ``None``.
+    casting : {"no", "equiv", "safe", "same_kind", "unsafe"}, optional
+        Controls what kind of data casting may occur.
+
+        Default: ``"same_kind"``.
+    order : {"C", "F", "A", "K", None}, optional
+        Memory layout of the newly output array, if parameter `out` is ``None``.
+
+        Default: ``"K"``.
+    axes : {None, list of tuples}, optional
+        A list of tuples with indices of axes the matrix-vector product should
+        operate on. For instance, for the signature of ``(i,j),(j)->(i)``, the
+        base elements are 2d matrices and 1d vectors, where the matrices are
+        assumed to be stored in the last two axes of the first argument, and
+        the vectors in the last axis of the second argument. The corresponding
+        axes keyword would be ``[(-2, -1), (-1,), (-1,)]``.
+
+        Default: ``None``.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        Returns the matrix-vector product of the inputs.
+
+    Limitations
+    -----------
+    Keyword arguments `subok`, and `signature` are only supported with their
+    default values. Otherwise ``NotImplementedError`` exception will be raised.
+
+    See Also
+    --------
+    :obj:`dpnp.vecdot` : Vector-vector product..
+    :obj:`dpnp.vecmat` : Vector-matrix product.
+    :obj:`dpnp.matmul` : Matrix-matrix product.
+    :obj:`dpnp.einsum` : Einstein summation convention.
+
+
+    Examples
+    --------
+    Rotate a set of vectors from Y to X along Z.
+
+    >>> import dpnp as np
+    >>> a = np.array([[0., 1., 0.],
+    ...               [-1., 0., 0.],
+    ...               [0., 0., 1.]])
+    >>> v = np.array([[1., 0., 0.],
+    ...               [0., 1., 0.],
+    ...               [0., 0., 1.],
+    ...               [0., 6., 8.]])
+    >>> np.matvec(a, v)
+    array([[ 0., -1.,  0.],
+           [ 1.,  0.,  0.],
+           [ 0.,  0.,  1.],
+           [ 6.,  0.,  8.]])
+
+    """
+
+    dpnp.check_limitations(subok_linalg=subok, signature=signature)
+    dpnp.check_supported_arrays_type(x1, x2)
+
+    return dpnp_multiplication(
+        "matvec",
+        x1,
+        x2,
+        out=out,
+        casting=casting,
+        order=order,
+        dtype=dtype,
+        axes=axes,
+        axis=axis,
     )
 
 
@@ -1165,27 +1315,31 @@ def vecdot(
         Second input array.
     out : {None, dpnp.ndarray, usm_ndarray}, optional
         A location into which the result is stored. If provided, it must have
-        a shape that the broadcasted shape of `x1` and `x2` with the last axis
-        removed. If not provided or ``None``, a freshly-allocated array is
-        used.
+        the broadcasted shape of `x1` and `x2` with the last axis removed.
+        If not provided or ``None``, a freshly-allocated array is used.
+
         Default: ``None``.
     casting : {"no", "equiv", "safe", "same_kind", "unsafe"}, optional
         Controls what kind of data casting may occur.
+
         Default: ``"same_kind"``.
     order : {"C", "F", "A", "K", None}, optional
         Memory layout of the newly output array, if parameter `out` is ``None``.
+
         Default: ``"K"``.
     dtype : {None, dtype}, optional
         Type to use in computing the vector dot product. By default, the
         returned array will have data type that is determined by considering
         Promotion Type Rule and device capabilities.
+
         Default: ``None``.
     axes : {None, list of tuples}, optional
-        A list of tuples with indices of axes the matrix product should operate
+        A list of tuples with indices of axes the dot product should operate
         on. For instance, for the signature of ``(i),(i)->()``, the base
         elements are vectors and these are taken to be stored in the last axes
         of each argument. The corresponding axes keyword would be
-        ``[(-1,), (-1), ()]``.
+        ``[(-1,), (-1,), ()]``.
+
         Default: ``None``.
     axis : {None, int}, optional
         Axis over which to compute the dot product. This is a short-cut for
@@ -1193,6 +1347,7 @@ def vecdot(
         single-core-dimension argument and ``()`` for all others. For instance,
         for a signature ``(i),(i)->()``, it is equivalent to passing in
         ``axes=[(axis,), (axis,), ()]``.
+
         Default: ``None``.
 
     Returns
@@ -1209,7 +1364,11 @@ def vecdot(
     See Also
     --------
     :obj:`dpnp.linalg.vecdot` : Array API compatible version.
-    :obj:`dpnp.vdot` : Complex-conjugating dot product.
+    :obj:`dpnp.vdot` : Complex-conjugating dot product but flattens
+                       arguments first.
+    :obj:`dpnp.matmul` : Matrix-matrix product.
+    :obj:`dpnp.vecmat` : Vector-matrix product.
+    :obj:`dpnp.matvec` : Matrix-vector product.
     :obj:`dpnp.einsum` : Einstein summation convention.
 
     Examples
@@ -1224,14 +1383,7 @@ def vecdot(
 
     """
 
-    if not subok:
-        raise NotImplementedError(
-            "subok keyword argument is only supported by its default value."
-        )
-    if signature is not None:
-        raise NotImplementedError(
-            "signature keyword argument is only supported by its default value."
-        )
+    dpnp.check_limitations(subok_linalg=subok, signature=signature)
 
     return dpnp_vecdot(
         x1,
@@ -1243,3 +1395,123 @@ def vecdot(
         axes=axes,
         axis=axis,
     )
+
+
+def vecmat(
+    x1,
+    x2,
+    /,
+    out=None,
+    *,
+    casting="same_kind",
+    order="K",
+    dtype=None,
+    subok=True,
+    signature=None,
+    axes=None,
+    axis=None,
+):
+    r"""
+    Vector-matrix dot product of two arrays.
+
+    Given a vector (or stack of vector) :math:`\mathbf{v}` in `x1` and a matrix
+    (or stack of matrices) :math:`\mathbf{A}` in `x2`, the vector-matrix product
+    is defined as:
+
+    .. math::
+       \mathbf{b} \cdot \mathbf{A} = \sum_{i=0}^{n-1} \overline{v_i}A_{ij}
+
+    where the sum is over the last dimension of `x1` and the one-but-last
+    dimensions in `x2` (unless `axes` is specified) and where
+    :math:`\overline{v_i}` denotes the complex conjugate if :math:`{v}` is
+    complex and the identity otherwise. (For a non-conjugated
+    vector-matrix product, use ``dpnp.matvec(x2.mT, x1)``.)
+
+    For full documentation refer to :obj:`numpy.vecmat`.
+
+    Parameters
+    ----------
+    x1 : {dpnp.ndarray, usm_ndarray}
+        First input array.
+    x2 : {dpnp.ndarray, usm_ndarray}
+        Second input array.
+    out : {None, dpnp.ndarray, usm_ndarray}, optional
+        A location into which the result is stored. If provided, it must have
+        the broadcasted shape of `x1` and `x2` with the summation axis removed.
+        If not provided or ``None``, a freshly-allocated array is used.
+
+        Default: ``None``.
+    dtype : {None, dtype}, optional
+        Type to use in computing the matrix product. By default, the returned
+        array will have data type that is determined by considering
+        Promotion Type Rule and device capabilities.
+
+        Default: ``None``.
+    casting : {"no", "equiv", "safe", "same_kind", "unsafe"}, optional
+        Controls what kind of data casting may occur.
+
+        Default: ``"same_kind"``.
+    order : {"C", "F", "A", "K", None}, optional
+        Memory layout of the newly output array, if parameter `out` is ``None``.
+
+        Default: ``"K"``.
+    axes : {None, list of tuples}, optional
+        A list of tuples with indices of axes the vector-matrix product should
+        operate on. For instance, for the signature of ``(i),(i,j)->(j)``, the
+        base elements are 1D vectors and 2D matrices, where the vectors are
+        assumed to be stored in the last axis of the first argument, and the
+        matrices in the last two axes of the second argument. The corresponding
+        axes keyword would be ``[(-1,), (-2, -1), (-1,)]``.
+
+        Default: ``None``.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        The vector-matrix product of the inputs.
+
+    Limitations
+    -----------
+    Keyword arguments `subok`, and `signature`, are only supported with their
+    default values. Otherwise ``NotImplementedError`` exception will be raised.
+
+    See Also
+    --------
+    :obj:`dpnp.vecdot` : Vector-vector product..
+    :obj:`dpnp.matvec` : Matrix-vector product.
+    :obj:`dpnp.matmul` : Matrix-matrix product.
+    :obj:`dpnp.einsum` : Einstein summation convention.
+
+
+    Examples
+    --------
+    Project a vector along X and Y.
+
+    >>> import dpnp as np
+    >>> v = np.array([0., 4., 2.])
+    >>> a = np.array([[1., 0., 0.],
+    ...               [0., 1., 0.],
+    ...               [0., 0., 0.]])
+    >>> np.vecmat(v, a)
+    array([0., 4., 0.])
+
+    """
+
+    dpnp.check_limitations(subok_linalg=subok, signature=signature)
+    dpnp.check_supported_arrays_type(x1, x2)
+
+    # cannot directly use dpnp.conj(x1) as it returns `int8` for boolean input
+    if dpnp.issubdtype(x1.dtype, dpnp.complexfloating):
+        x1 = dpnp.conj(x1)
+
+    return dpnp_multiplication(
+        "vecmat",
+        x1,
+        x2,
+        out=out,
+        casting=casting,
+        order=order,
+        dtype=dtype,
+        axes=axes,
+        axis=axis,
+    )
diff --git a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
index f90c95370cd4..128dd3b78c3f 100644
--- a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
+++ b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
@@ -28,7 +28,11 @@
 import dpctl.tensor._tensor_impl as ti
 import dpctl.utils as dpu
 import numpy
-from dpctl.tensor._numpy_helper import AxisError, normalize_axis_tuple
+from dpctl.tensor._numpy_helper import (
+    AxisError,
+    normalize_axis_index,
+    normalize_axis_tuple,
+)
 from dpctl.utils import ExecutionPlacementError
 
 import dpnp
@@ -40,7 +44,7 @@
     "dpnp_cross",
     "dpnp_dot",
     "dpnp_kron",
-    "dpnp_matmul",
+    "dpnp_multiplication",
     "dpnp_tensordot",
     "dpnp_vecdot",
 ]
@@ -196,7 +200,7 @@ def _define_contig_flag(x):
 
 def _define_dim_flags(x, axis):
     """
-    Define useful flags for the calculations in dpnp_matmul and dpnp_vecdot.
+    Define useful flags for the calculations in dpnp_multiplication and dpnp_vecdot.
     x_is_1D: `x` is 1D array or inherently 1D (all dimensions are equal to one
     except for dimension at `axis`), for instance, if x.shape = (1, 1, 1, 2),
     and axis=-1, then x_is_1D = True.
@@ -228,7 +232,7 @@ def _define_dim_flags(x, axis):
     return x_is_2D, x_is_1D, x_base_is_1D
 
 
-def _get_result_shape(x1, x2, out, _get_result_shape_fn, np_flag):
+def _get_result_shape(x1, x2, out, func, _get_result_shape_fn, np_flag):
     """
     Three task are completed in this function:
         - Get the shape of the result array.
@@ -247,7 +251,7 @@ def _get_result_shape(x1, x2, out, _get_result_shape_fn, np_flag):
             "The second input array does not have enough dimensions (has 0, but requires at least 1)"
         )
 
-    x1, x2, result_shape = _get_result_shape_fn(x1, x2, x1_ndim, x2_ndim)
+    x1, x2, result_shape = _get_result_shape_fn(x1, x2, func)
 
     if out is not None:
         out_shape = out.shape
@@ -257,33 +261,43 @@ def _get_result_shape(x1, x2, out, _get_result_shape_fn, np_flag):
     return x1, x2, result_shape
 
 
-def _get_result_shape_matmul(x1, x2, x1_ndim, x2_ndim):
+def _get_result_shape_multiplication(x1, x2, func):
 
-    x1_shape = x1.shape
-    x2_shape = x2.shape
+    x1_shape, x2_shape = x1.shape, x2.shape
+    x1_ndim, x2_ndim = x1.ndim, x2.ndim
 
-    if x1_ndim == 1 and x2_ndim == 1:
+    if x1_ndim == 1 and func == "matvec":
+        _shape_error(None, None, func, err_msg=3)
+    if x2_ndim == 1 and func == "vecmat":
+        _shape_error(None, None, func, err_msg=4)
+    elif x1_ndim == 1 and x2_ndim == 1:
         if x1_shape[-1] != x2_shape[-1]:
-            _shape_error(x1_shape[-1], x2_shape[-1], "matmul", err_msg=0)
+            _shape_error(x1_shape[-1], x2_shape[-1], func, err_msg=0)
         result_shape = ()
     elif x1_ndim == 1:
         if x1_shape[-1] != x2_shape[-2]:
-            _shape_error(x1_shape[-1], x2_shape[-2], "matmul", err_msg=0)
+            _shape_error(x1_shape[-1], x2_shape[-2], func, err_msg=0)
         result_shape = x2_shape[:-2] + (x2_shape[-1],)
     elif x2_ndim == 1:
         if x1_shape[-1] != x2_shape[-1]:
-            _shape_error(x1_shape[-1], x2_shape[-1], "matmul", err_msg=0)
+            _shape_error(x1_shape[-1], x2_shape[-1], func, err_msg=0)
         result_shape = x1_shape[:-1]
     else:  # at least 2D
-        x1_is_2D, x1_is_1D, _ = _define_dim_flags(x1, axis=-1)
-        x2_is_2D, x2_is_1D, _ = _define_dim_flags(x2, axis=-2)
+        if func == "matvec":
+            x2 = dpnp.reshape(x2, x2.shape + (1,))
+            x2_shape, x2_ndim = x2.shape, x2.ndim
+        elif func == "vecmat":
+            x1 = dpnp.reshape(x1, x1_shape[:-1] + (1, x1_shape[-1]))
+            x1_shape, x1_ndim = x1.shape, x1.ndim
 
         if x1_shape[-1] != x2_shape[-2]:
-            _shape_error(x1_shape[-1], x2_shape[-2], "matmul", err_msg=0)
+            _shape_error(x1_shape[-1], x2_shape[-2], func, err_msg=0)
 
         if x1_ndim == 2 and x2_ndim == 2:
             result_shape = (x1_shape[-2], x2_shape[-1])
         else:
+            x1_is_2D, x1_is_1D, _ = _define_dim_flags(x1, axis=-1)
+            x2_is_2D, x2_is_1D, _ = _define_dim_flags(x2, axis=-2)
             if x1_ndim != x2_ndim:
                 diff = abs(x1_ndim - x2_ndim)
                 if x1_ndim < x2_ndim:
@@ -312,20 +326,27 @@ def _get_result_shape_matmul(x1, x2, x1_ndim, x2_ndim):
                         if not (x2_is_2D or x2_is_1D):
                             x2 = dpnp.repeat(x2, x1_shape[i], axis=i)
                     else:
-                        _shape_error(x1_shape, x2_shape, "matmul", err_msg=1)
+                        _shape_error(x1_shape, x2_shape, func, err_msg=1)
 
-            result_shape = tuple(tmp_shape) + (x1.shape[-2], x2.shape[-1])
+            result_shape = tuple(tmp_shape)
+            if func == "matvec":
+                result_shape += (x1.shape[-2],)
+            elif func == "vecmat":
+                result_shape += (x2.shape[-1],)
+            else:
+                assert func == "matmul"
+                result_shape += (x1.shape[-2], x2.shape[-1])
 
     return x1, x2, result_shape
 
 
-def _get_result_shape_vecdot(x1, x2, x1_ndim, x2_ndim):
+def _get_result_shape_vecdot(x1, x2, func):
 
-    x1_shape = x1.shape
-    x2_shape = x2.shape
+    x1_shape, x2_shape = x1.shape, x2.shape
+    x1_ndim, x2_ndim = x1.ndim, x2.ndim
 
     if x1_shape[-1] != x2_shape[-1]:
-        _shape_error(x1_shape[-1], x2_shape[-1], "vecdot", err_msg=0)
+        _shape_error(x1_shape[-1], x2_shape[-1], func, err_msg=0)
 
     if x1_ndim == 1 and x2_ndim == 1:
         result_shape = ()
@@ -356,13 +377,33 @@ def _get_result_shape_vecdot(x1, x2, x1_ndim, x2_ndim):
                 elif x2_shape[i] == 1:
                     pass
                 else:
-                    _shape_error(x1_shape, x2_shape, "vecdot", err_msg=1)
+                    _shape_error(x1_shape, x2_shape, func, err_msg=1)
 
         result_shape = tuple(tmp_shape)
 
     return x1, x2, result_shape
 
 
+def _get_signature(func):
+    """Return signature of multiplication operation."""
+
+    if func == "matmul":
+        signature = "(n?,k),(k,m?)->(n?,m?)"
+        distinct_core = 3
+    elif func == "matvec":
+        signature = "(m,n),(n)->(m)"
+        distinct_core = 2
+    elif func == "vecdot":
+        signature = "(n?,),(n?,)->()"
+        distinct_core = 1
+    else:
+        assert func == "vecmat"
+        signature = "(n),(n,m)->(m)"
+        distinct_core = 2
+
+    return signature, distinct_core
+
+
 def _gemm_batch_matmul(exec_q, x1, x2, res):
     # arrays here are already at least 3D, make them 3D
     x1_shape = x1.shape
@@ -466,31 +507,36 @@ def _gemm_matmul(exec_q, x1, x2, res):
 def _shape_error(shape1, shape2, func, err_msg):
     """Validate the shapes of input and output arrays."""
 
-    if func == "matmul":
-        signature = "(n?,k),(k,m?)->(n?,m?)"
-    elif func == "vecdot":
-        signature = "(n?,),(n?,)->()"
-    else:
-        # applicable when err_msg == 2
-        assert func is None
+    # func=None is applicable when err_msg == 2
+    if func is not None:
+        signature, _ = _get_signature(func)
 
     if err_msg == 0:
         raise ValueError(
-            "Input arrays have a mismatch in their core dimensions. "
+            f"{func}: Input arrays have a mismatch in their core dimensions. "
             "The core dimensions should follow this signature: "
             f"{signature} (size {shape1} is different from {shape2})"
         )
     elif err_msg == 1:
         raise ValueError(
-            "The shapes of the input arrays are incompatible. "
+            f"{func}: The shapes of the input arrays are incompatible. "
             f"The first input array has shape {shape1} and the second input "
             f"array has shape {shape2}. "
-            f"These cannot be broadcast together for '{func}' function."
         )
-    else:  # err_msg == 2:
-        assert err_msg == 2
+    elif err_msg == 2:
+        raise ValueError(
+            f"{func}: Expected output array of shape {shape1}, but got {shape2}."
+        )
+    elif err_msg == 3:
         raise ValueError(
-            f"Expected output array of shape {shape1}, but got {shape2}."
+            f"{func}: The first input array does not have enough dimensions "
+            f"(has 1, while signature {signature} requires 2)."
+        )
+    else:
+        assert err_msg == 4
+        raise ValueError(
+            f"{func}: The second input array does not have enough dimensions "
+            f"(has 1, while signature {signature} requires 2)."
         )
 
 
@@ -517,64 +563,90 @@ def _standardize_strides_to_nonzero(strides, shape):
 
 
 def _validate_axes(x1, x2, axes, func):
-    """Check axes is valid for matmul function."""
+    """Check axes is valid for linear algebra functions."""
 
-    def _validate_internal(axes, i, ndim):
-        if ndim == 1:
-            iter = 1
+    def _validate_internal(axes, op, ncores, ndim=None):
+        if ncores == 0:
+            if axes != ():
+                raise AxisError(
+                    f"{func}: operand {op} has 0 core dimensions. "
+                    f"Axes item {op} should be an empty tuple."
+                )
+        elif ncores == 1:
             if isinstance(axes, int):
                 axes = (axes,)
             elif not isinstance(axes, tuple):
                 raise TypeError(
-                    f"Axes item {i}: {type(axes)} object cannot be interpreted as an integer."
+                    f"Axes item {op}: {type(axes)} object cannot be interpreted as an integer."
                 )
 
             if len(axes) != 1:
                 raise AxisError(
-                    f"Axes item {i} should be a tuple with a single element, or an integer."
+                    f"Axes item {op} should be a tuple with a single element, or an integer."
                 )
         else:
-            iter = 2
+            assert ncores == 2
             if not isinstance(axes, tuple):
-                raise TypeError(f"Axes item {i} should be a tuple.")
+                raise TypeError(f"Axes item {op} should be a tuple.")
             if len(axes) != 2:
                 raise AxisError(
-                    f"Axes item {i} should be a tuple with 2 elements."
+                    f"Axes item {op} should be a tuple with 2 elements."
                 )
 
-        for j in range(iter):
-            if not isinstance(axes[j], int):
-                raise TypeError(
-                    f"Axes item {i}: {type(axes[j])} object cannot be interpreted as an integer."
-                )
+        if ndim is not None:
+            return normalize_axis_tuple(axes, ndim, "axes")
+
         return axes
 
     if not isinstance(axes, list):
         raise TypeError("Axes should be a list.")
-    elif len(axes) != 3:
-        raise ValueError(
-            "Axes should be a list of three tuples: two inputs and one output."
-        )
 
+    x1_ndim, x2_ndim = x1.ndim, x2.ndim
+    # number of core dimensions for each operand
     if func == "matmul":
-        x1_ndim = x1.ndim
-        x2_ndim = x2.ndim
-    else:  # func == "vecdot"
+        x1_ncore = 2 if x1_ndim != 1 else 1
+        x2_ncore = 2 if x2_ndim != 1 else 1
+    elif func == "matvec":
+        x1_ncore = 2
+        x2_ncore = 1
+    elif func == "vecmat":
+        x1_ncore = 1
+        x2_ncore = 2
+    else:
         assert func == "vecdot"
-        x1_ndim = x2_ndim = 1
+        x1_ncore = x2_ncore = 1
 
-    axes[0] = _validate_internal(axes[0], 0, x1_ndim)
-    axes[1] = _validate_internal(axes[1], 1, x2_ndim)
+    axes[0] = _validate_internal(axes[0], 0, x1_ncore, x1_ndim)
+    axes[1] = _validate_internal(axes[1], 1, x2_ncore, x2_ndim)
 
-    if x1_ndim == 1 and x2_ndim == 1:
-        if axes[2] != ():
-            raise AxisError("Axes item 2 should be an empty tuple.")
-    elif x1_ndim == 1 or x2_ndim == 1:
-        axes[2] = _validate_internal(axes[2], 2, 1)
+    if func == "vecdot":
+        if len(axes) == 3:
+            axes[2] = _validate_internal(axes[2], 2, 0)
+            return axes
+
+        if len(axes) == 2:
+            return [axes[0], axes[1], ()]
+
+        raise ValueError(
+            "Axes should be a list of three tuples: two inputs and one "
+            "output. Entry for output can only be omitted if it does not "
+            "have a core axis."
+        )
     else:
-        axes[2] = _validate_internal(axes[2], 2, 2)
+        if len(axes) != 3:
+            raise ValueError(
+                "Axes should be a list of three tuples: two inputs and one "
+                "output; Entry for output can only be omitted if it does not "
+                "have a core axis."
+            )
+        if x1_ncore == 1 and x2_ncore == 1:
+            axes[2] = _validate_internal(axes[2], 2, 0)
+        elif x1_ncore == 1 or x2_ncore == 1:
+            axes[2] = _validate_internal(axes[2], 2, 1)
+        else:
+            axes[2] = _validate_internal(axes[2], 2, 2)
 
-    return axes
+        return axes
 
 
 def _validate_out_array(out, exec_q):
@@ -741,7 +813,8 @@ def dpnp_kron(a, b, a_ndim, b_ndim):
     return result.reshape(tuple(numpy.multiply(a_shape, b_shape)))
 
 
-def dpnp_matmul(
+def dpnp_multiplication(
+    func,
     x1,
     x2,
     /,
@@ -751,16 +824,16 @@ def dpnp_matmul(
     order="K",
     dtype=None,
     axes=None,
+    axis=None,
 ):
     """
-    Return the matrix product of two arrays.
+    Return the multiplications of two arrays.
 
     The main calculation is performed by calling an extension function
     for BLAS library of OneMKL.
 
     """
 
-    dpnp.check_supported_arrays_type(x1, x2)
     res_usm_type, exec_q = get_usm_allocations([x1, x2])
     _validate_out_array(out, exec_q)
 
@@ -775,22 +848,39 @@ def dpnp_matmul(
         # behaves differently for matmul and vecdot
         order = "C"
 
+    if axis is not None:
+        signature, distinct_core = _get_signature(func)
+        # "matmul," "matvec," and "vecmat" always have multiple distinct cores,
+        # and `axis` is not supported for these functions.
+        # Therefore, raise an error in all cases where `axis` is provided.
+        assert distinct_core != 1
+        raise TypeError(
+            f"{func}: axis can only be used with a single shared core "
+            f"dimension, not with the {distinct_core} distinct ones implied "
+            f"by signature {signature}."
+        )
+
     x1_ndim = x1.ndim
     x2_ndim = x2.ndim
     if axes is not None:
-        axes = _validate_axes(x1, x2, axes, "matmul")
-
-        axes_x1, axes_x2, axes_res = axes
-        axes_x1 = normalize_axis_tuple(axes_x1, x1_ndim, "axis")
-        axes_x2 = normalize_axis_tuple(axes_x2, x2_ndim, "axis")
+        axes_x1, axes_x2, axes_res = _validate_axes(x1, x2, axes, func)
 
         # Move the axes that are going to be used in matrix product,
         # to the end of "x1" and "x2"
-        x1 = dpnp.moveaxis(x1, axes_x1, (-2, -1)) if x1_ndim != 1 else x1
-        x2 = dpnp.moveaxis(x2, axes_x2, (-2, -1)) if x2_ndim != 1 else x2
+        if func == "matmul":
+            x1 = dpnp.moveaxis(x1, axes_x1, (-2, -1)) if x1_ndim != 1 else x1
+            x2 = dpnp.moveaxis(x2, axes_x2, (-2, -1)) if x2_ndim != 1 else x2
+        elif func == "matvec":
+            x1 = dpnp.moveaxis(x1, axes_x1, (-2, -1)) if x1_ndim != 1 else x1
+            x2 = dpnp.moveaxis(x2, axes_x2, (-1,))
+        else:
+            assert func == "vecmat"
+            x1 = dpnp.moveaxis(x1, axes_x1, (-1,))
+            x2 = dpnp.moveaxis(x2, axes_x2, (-2, -1)) if x2_ndim != 1 else x2
 
         out_orig = out
         if out is not None:
+            axes_res = normalize_axis_tuple(axes_res, out.ndim, "axes")
             # out that is passed to the backend should have the correct shape
             if len(axes_res) == 2:
                 out = dpnp.moveaxis(out, axes_res, (-2, -1))
@@ -799,14 +889,18 @@ def dpnp_matmul(
 
     # When inputs are 1-D arrays, the result is a 0-D array. For this case,
     # NumPy allows out keyword to have any shape and the result is broadcast to it
-    NumPy_special_behavior = (
+    NumPy_special_case = (
         out is not None and x1_ndim == 1 and x2_ndim == 1 and out.shape != ()
     )
 
     x1, x2, result_shape = _get_result_shape(
-        x1, x2, out, _get_result_shape_matmul, NumPy_special_behavior
+        x1, x2, out, func, _get_result_shape_multiplication, NumPy_special_case
     )
 
+    if axes is not None:
+        # Now that result array shape is calculated, check axes is within range
+        axes_res = normalize_axis_tuple(axes_res, len(result_shape), "axes")
+
     # Determine the appropriate data types
     compute_dtype, res_dtype = _compute_res_dtype(
         x1, x2, dtype=dtype, casting=casting, sycl_queue=exec_q
@@ -864,7 +958,10 @@ def dpnp_matmul(
             x1 = dpnp.reshape(x1, (1, 1, x1.size))
             res_shape = result_shape[:-1] + (1, result_shape[-1])
         else:
-            res_shape = result_shape
+            if func == "vecmat":
+                res_shape = result_shape[:-1] + (1, result_shape[-1])
+            else:
+                res_shape = result_shape
     elif x2_base_is_1D:
         # TODO: implement gemv_batch to use it here without transpose
         call_flag = "gemm_batch"
@@ -872,7 +969,10 @@ def dpnp_matmul(
             x2 = dpnp.reshape(x2, (1, x2.size, 1))
             res_shape = result_shape + (1,)
         else:
-            res_shape = result_shape
+            if func == "matvec":
+                res_shape = result_shape + (1,)
+            else:
+                res_shape = result_shape
     else:
         call_flag = "gemm_batch"
         res_shape = result_shape
@@ -960,7 +1060,7 @@ def dpnp_matmul(
                     result,
                 )
 
-    if NumPy_special_behavior:
+    if NumPy_special_case:
         result = dpnp.tile(result, out.shape)
     elif res_shape != result_shape:
         result = dpnp.reshape(result, result_shape)
@@ -970,7 +1070,7 @@ def dpnp_matmul(
 
     if out is None:
         if axes is not None:
-            # Move the data to the appropriate axes of the result array
+            # Move the data back to the appropriate axes of the result array
             if len(axes_res) == 2:
                 result = dpnp.moveaxis(result, (-2, -1), axes_res)
             elif len(axes_res) == 1:
@@ -1070,7 +1170,7 @@ def dpnp_vecdot(
 
     if order in "aAkK":
         # This logic is also used for order="K" to align with NumPy behavior.
-        # It is different than logic used in dpnp_matmul because NumPy
+        # It is different than logic used in dpnp_multiplication because NumPy
         # behaves differently for matmul and vecdot
         if x1.flags.fnc and x2.flags.fnc:
             order = "F"
@@ -1079,35 +1179,31 @@ def dpnp_vecdot(
 
     x1_ndim = x1.ndim
     x2_ndim = x2.ndim
-    if axes is None and axis is None:
-        # default behavior with axis=-1
-        pass
-    elif axes is not None:
+
+    if axes is not None:
         if axis is not None:
             raise TypeError("cannot specify both `axis` and `axes`.")
 
-        axes = _validate_axes(x1, x2, axes, "vecdot")
-
-        axes_x1, axes_x2, axes_res = axes
-        axes_x1 = normalize_axis_tuple(axes_x1, x1_ndim, "axis")
-        axes_x2 = normalize_axis_tuple(axes_x2, x2_ndim, "axis")
+        axes_x1, axes_x2, axes_res = _validate_axes(x1, x2, axes, "vecdot")
 
         # Move the axes that are going to be used in dot product,
         # to the end of "x1" and "x2"
-        x1 = dpnp.moveaxis(x1, axes_x1, -1) if x1_ndim != 1 else x1
-        x2 = dpnp.moveaxis(x2, axes_x2, -1) if x2_ndim != 1 else x2
-    else:
-        x1 = dpnp.moveaxis(x1, axis, -1) if x1_ndim != 1 else x1
-        x2 = dpnp.moveaxis(x2, axis, -1) if x2_ndim != 1 else x2
+        x1 = dpnp.moveaxis(x1, axes_x1, -1)
+        x2 = dpnp.moveaxis(x2, axes_x2, -1)
+    elif axis is not None:
+        normalize_axis_index(axis, x1_ndim, "axis")
+        normalize_axis_index(axis, x2_ndim, "axis")
+        x1 = dpnp.moveaxis(x1, axis, -1)
+        x2 = dpnp.moveaxis(x2, axis, -1)
 
     # When inputs are 1-D arrays, the result is a 0-D array. For this case,
     # NumPy allows out keyword to have any shape and the result is broadcast to it
-    NumPy_special_behavior = (
+    NumPy_special_case = (
         out is not None and x1_ndim == 1 and x2_ndim == 1 and out.shape != ()
     )
 
     x1, x2, result_shape = _get_result_shape(
-        x1, x2, out, _get_result_shape_vecdot, NumPy_special_behavior
+        x1, x2, out, "vecdot", _get_result_shape_vecdot, NumPy_special_case
     )
 
     # Determine the appropriate data types
@@ -1156,7 +1252,7 @@ def dpnp_vecdot(
             dpt.vecdot(x1_usm, x2_usm, axis=-1)
         )
 
-    if NumPy_special_behavior:
+    if NumPy_special_case:
         result = dpnp.tile(result, out.shape)
     elif result.shape != result_shape:
         result = dpnp.reshape(result, result_shape)
diff --git a/dpnp/tests/test_mathematical.py b/dpnp/tests/test_mathematical.py
index 51afa1eaf9bd..059cce931d21 100644
--- a/dpnp/tests/test_mathematical.py
+++ b/dpnp/tests/test_mathematical.py
@@ -2665,886 +2665,6 @@ def test_inplace_floor_divide(dtype):
     assert_allclose(dp_a, np_a)
 
 
-class TestMatmul:
-    def setup_method(self):
-        numpy.random.seed(42)
-
-    @pytest.mark.parametrize(
-        "order1, order2", [("C", "C"), ("C", "F"), ("F", "C"), ("F", "F")]
-    )
-    @pytest.mark.parametrize(
-        "shape1, shape2",
-        [
-            ((4,), (4,)),
-            ((1, 4), (4, 1)),
-            ((4,), (4, 2)),
-            ((1, 4), (4, 2)),
-            ((2, 4), (4,)),
-            ((2, 4), (4, 1)),
-            ((1, 4), (4,)),  # output should be 1-d not 0-d
-            ((4,), (4, 1)),
-            ((1, 4), (4, 1)),
-            ((2, 4), (4, 3)),
-            ((1, 2, 3), (1, 3, 5)),
-            ((4, 2, 3), (4, 3, 5)),
-            ((1, 2, 3), (4, 3, 5)),
-            ((2, 3), (4, 3, 5)),
-            ((4, 2, 3), (1, 3, 5)),
-            ((4, 2, 3), (3, 5)),
-            ((1, 1, 4, 3), (1, 1, 3, 5)),
-            ((6, 7, 4, 3), (6, 7, 3, 5)),
-            ((6, 7, 4, 3), (1, 1, 3, 5)),
-            ((6, 7, 4, 3), (1, 3, 5)),
-            ((6, 7, 4, 3), (3, 5)),
-            ((6, 7, 4, 3), (1, 7, 3, 5)),
-            ((6, 7, 4, 3), (7, 3, 5)),
-            ((6, 7, 4, 3), (6, 1, 3, 5)),
-            ((1, 1, 4, 3), (6, 7, 3, 5)),
-            ((1, 4, 3), (6, 7, 3, 5)),
-            ((4, 3), (6, 7, 3, 5)),
-            ((6, 1, 4, 3), (6, 7, 3, 5)),
-            ((1, 7, 4, 3), (6, 7, 3, 5)),
-            ((7, 4, 3), (6, 7, 3, 5)),
-            ((1, 5, 3, 2), (6, 5, 2, 4)),
-            ((5, 3, 2), (6, 5, 2, 4)),
-            ((1, 3, 3), (10, 1, 3, 1)),
-            ((2, 3, 3), (10, 1, 3, 1)),
-            ((10, 2, 3, 3), (10, 1, 3, 1)),
-            ((10, 2, 3, 3), (10, 2, 3, 1)),
-            ((10, 1, 1, 3), (1, 3, 3)),
-            ((10, 1, 1, 3), (2, 3, 3)),
-            ((10, 1, 1, 3), (10, 2, 3, 3)),
-            ((10, 2, 1, 3), (10, 2, 3, 3)),
-            ((3, 3, 1), (3, 1, 2)),
-            ((3, 3, 1), (1, 1, 2)),
-            ((1, 3, 1), (3, 1, 2)),
-            ((4,), (3, 4, 1)),
-            ((3, 1, 4), (4,)),
-            ((3, 1, 4), (3, 4, 1)),
-            ((4, 1, 3, 1), (1, 3, 1, 2)),
-            ((1, 3, 3, 1), (4, 1, 1, 2)),
-        ],
-    )
-    def test_matmul(self, order1, order2, shape1, shape2):
-        # input should be float type otherwise they are copied to c-contigous array
-        # so testing order becomes meaningless
-        dtype = dpnp.default_float_type()
-        a1 = numpy.arange(numpy.prod(shape1), dtype=dtype).reshape(shape1)
-        a2 = numpy.arange(numpy.prod(shape2), dtype=dtype).reshape(shape2)
-        a1 = numpy.array(a1, order=order1)
-        a2 = numpy.array(a2, order=order2)
-
-        b1 = dpnp.asarray(a1)
-        b2 = dpnp.asarray(a2)
-        result = dpnp.matmul(b1, b2)
-        expected = numpy.matmul(a1, a2)
-        assert_dtype_allclose(result, expected)
-
-    @pytest.mark.parametrize(
-        "order1, order2", [("C", "C"), ("C", "F"), ("F", "C"), ("F", "F")]
-    )
-    @pytest.mark.parametrize(
-        "shape1, shape2",
-        [
-            ((2, 0), (0, 3)),
-            ((0, 4), (4, 3)),
-            ((2, 4), (4, 0)),
-            ((1, 2, 3), (0, 3, 5)),
-            ((0, 2, 3), (1, 3, 5)),
-            ((2, 3), (0, 3, 5)),
-            ((0, 2, 3), (3, 5)),
-            ((0, 0, 4, 3), (1, 1, 3, 5)),
-            ((6, 0, 4, 3), (1, 3, 5)),
-            ((0, 7, 4, 3), (3, 5)),
-            ((0, 7, 4, 3), (1, 7, 3, 5)),
-            ((0, 7, 4, 3), (7, 3, 5)),
-            ((6, 0, 4, 3), (6, 1, 3, 5)),
-            ((1, 1, 4, 3), (0, 0, 3, 5)),
-            ((1, 4, 3), (6, 0, 3, 5)),
-            ((4, 3), (0, 0, 3, 5)),
-            ((6, 1, 4, 3), (6, 0, 3, 5)),
-            ((1, 7, 4, 3), (0, 7, 3, 5)),
-            ((7, 4, 3), (0, 7, 3, 5)),
-        ],
-    )
-    def test_matmul_empty(self, order1, order2, shape1, shape2):
-        dtype = dpnp.default_float_type()
-        a1 = numpy.arange(numpy.prod(shape1), dtype=dtype).reshape(shape1)
-        a2 = numpy.arange(numpy.prod(shape2), dtype=dtype).reshape(shape2)
-        a1 = numpy.array(a1, order=order1)
-        a2 = numpy.array(a2, order=order2)
-        b1 = dpnp.asarray(a1)
-        b2 = dpnp.asarray(a2)
-
-        result = dpnp.matmul(b1, b2)
-        expected = numpy.matmul(a1, a2)
-        assert_dtype_allclose(result, expected)
-
-    @pytest.mark.parametrize(
-        "shape1, shape2",
-        [
-            ((2, 4), (4, 3)),
-            ((4, 2, 3), (4, 3, 5)),
-            ((6, 7, 4, 3), (6, 7, 3, 5)),
-        ],
-        ids=[
-            "((2, 4), (4, 3))",
-            "((4, 2, 3), (4, 3, 5))",
-            "((6, 7, 4, 3), (6, 7, 3, 5))",
-        ],
-    )
-    def test_matmul_bool(self, shape1, shape2):
-        x = numpy.arange(2, dtype=numpy.bool_)
-        a1 = numpy.resize(x, numpy.prod(shape1)).reshape(shape1)
-        a2 = numpy.resize(x, numpy.prod(shape2)).reshape(shape2)
-        b1 = dpnp.asarray(a1)
-        b2 = dpnp.asarray(a2)
-
-        result = dpnp.matmul(b1, b2)
-        expected = numpy.matmul(a1, a2)
-        assert_dtype_allclose(result, expected)
-
-    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
-    @pytest.mark.parametrize(
-        "axes",
-        [
-            [(-3, -1), (0, 2), (-2, -3)],
-            [(3, 1), (2, 0), (3, 1)],
-            [(3, 1), (2, 0), (0, 1)],
-        ],
-    )
-    def test_matmul_axes_ND_ND(self, dtype, axes):
-        a = generate_random_numpy_array((2, 5, 3, 4), dtype)
-        b = generate_random_numpy_array((4, 2, 5, 3), dtype)
-        ia = dpnp.array(a)
-        ib = dpnp.array(b)
-
-        result = dpnp.matmul(ia, ib, axes=axes)
-        expected = numpy.matmul(a, b, axes=axes)
-        assert_dtype_allclose(result, expected)
-
-    @pytest.mark.parametrize(
-        "axes",
-        [
-            [(1, 0), (0), (0)],
-            [(1, 0), 0, 0],
-            [(1, 0), (0,), (0,)],
-        ],
-    )
-    def test_matmul_axes_ND_1D(self, axes):
-        a = numpy.arange(3 * 4 * 5).reshape(3, 4, 5)
-        b = numpy.arange(3)
-        ia = dpnp.array(a)
-        ib = dpnp.array(b)
-
-        result = dpnp.matmul(ia, ib, axes=axes)
-        expected = numpy.matmul(a, b, axes=axes)
-        assert_dtype_allclose(result, expected)
-
-    @pytest.mark.parametrize(
-        "axes",
-        [
-            [(0,), (0, 1), (0)],
-            [(0), (0, 1), 0],
-            [0, (0, 1), (0,)],
-        ],
-    )
-    def test_matmul_axes_1D_ND(self, axes):
-        a = numpy.arange(3 * 4 * 5).reshape(3, 4, 5)
-        b = numpy.arange(3)
-        ia = dpnp.array(a)
-        ib = dpnp.array(b)
-
-        result = dpnp.matmul(ib, ia, axes=axes)
-        expected = numpy.matmul(b, a, axes=axes)
-        assert_dtype_allclose(result, expected)
-
-    def test_matmul_axes_1D_1D(self):
-        a = numpy.arange(3)
-        ia = dpnp.array(a)
-
-        axes = [0, 0, ()]
-        result = dpnp.matmul(ia, ia, axes=axes)
-        expected = numpy.matmul(a, a, axes=axes)
-        assert_dtype_allclose(result, expected)
-
-        out = dpnp.empty((), dtype=ia.dtype)
-        result = dpnp.matmul(ia, ia, axes=axes, out=out)
-        assert out is result
-        assert_dtype_allclose(result, expected)
-
-    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
-    @pytest.mark.parametrize(
-        "axes, out_shape",
-        [
-            ([(-3, -1), (0, 2), (-2, -3)], (2, 5, 5, 3)),
-            ([(3, 1), (2, 0), (3, 1)], (2, 4, 3, 4)),
-            ([(3, 1), (2, 0), (1, 2)], (2, 4, 4, 3)),
-        ],
-    )
-    def test_matmul_axes_out(self, dtype, axes, out_shape):
-        a = generate_random_numpy_array((2, 5, 3, 4), dtype)
-        b = generate_random_numpy_array((4, 2, 5, 3), dtype)
-        ia = dpnp.array(a)
-        ib = dpnp.array(b)
-
-        out_dp = dpnp.empty(out_shape, dtype=dtype)
-        result = dpnp.matmul(ia, ib, axes=axes, out=out_dp)
-        assert result is out_dp
-        expected = numpy.matmul(a, b, axes=axes)
-        assert_dtype_allclose(result, expected)
-
-    @pytest.mark.parametrize(
-        "axes, b_shape, out_shape",
-        [
-            ([(1, 0), 0, 0], (3,), (4, 5)),
-            ([(1, 0), 0, 1], (3,), (5, 4)),
-            ([(1, 0), (0, 1), (1, 2)], (3, 1), (5, 4, 1)),
-            ([(1, 0), (0, 1), (0, 2)], (3, 1), (4, 5, 1)),
-            ([(1, 0), (0, 1), (1, 0)], (3, 1), (1, 4, 5)),
-        ],
-    )
-    def test_matmul_axes_out_1D(self, axes, b_shape, out_shape):
-        a = numpy.arange(3 * 4 * 5).reshape(3, 4, 5)
-        b = numpy.arange(3).reshape(b_shape)
-        ia = dpnp.array(a)
-        ib = dpnp.array(b)
-
-        out_dp = dpnp.empty(out_shape)
-        out_np = numpy.empty(out_shape)
-        result = dpnp.matmul(ia, ib, axes=axes, out=out_dp)
-        assert result is out_dp
-        expected = numpy.matmul(a, b, axes=axes, out=out_np)
-        assert_dtype_allclose(result, expected)
-
-    @pytest.mark.parametrize("in_dt", get_all_dtypes(no_bool=True))
-    @pytest.mark.parametrize(
-        "out_dt", get_all_dtypes(no_bool=True, no_none=True)
-    )
-    @pytest.mark.parametrize(
-        "shape1, shape2",
-        [
-            ((2, 4), (4, 3)),
-            ((4, 2, 3), (4, 3, 5)),
-            ((6, 7, 4, 3), (6, 7, 3, 5)),
-        ],
-        ids=[
-            "((2, 4), (4, 3))",
-            "((4, 2, 3), (4, 3, 5))",
-            "((6, 7, 4, 3), (6, 7, 3, 5))",
-        ],
-    )
-    def test_matmul_dtype_matrix_inout(self, in_dt, out_dt, shape1, shape2):
-        a1 = generate_random_numpy_array(shape1, in_dt)
-        a2 = generate_random_numpy_array(shape2, in_dt)
-        b1 = dpnp.asarray(a1)
-        b2 = dpnp.asarray(a2)
-
-        if dpnp.can_cast(dpnp.result_type(b1, b2), out_dt, casting="same_kind"):
-            result = dpnp.matmul(b1, b2, dtype=out_dt)
-            expected = numpy.matmul(a1, a2, dtype=out_dt)
-            assert_dtype_allclose(result, expected)
-        else:
-            with pytest.raises(TypeError):
-                dpnp.matmul(b1, b2, dtype=out_dt)
-
-    @pytest.mark.parametrize("dtype1", get_all_dtypes(no_bool=True))
-    @pytest.mark.parametrize("dtype2", get_all_dtypes(no_bool=True))
-    @pytest.mark.parametrize(
-        "shape1, shape2",
-        [
-            ((2, 4), (4, 3)),
-            ((4, 2, 3), (4, 3, 5)),
-            ((6, 7, 4, 3), (6, 7, 3, 5)),
-        ],
-        ids=[
-            "((2, 4), (4, 3))",
-            "((4, 2, 3), (4, 3, 5))",
-            "((6, 7, 4, 3), (6, 7, 3, 5))",
-        ],
-    )
-    def test_matmul_dtype_matrix_inputs(self, dtype1, dtype2, shape1, shape2):
-        a1 = generate_random_numpy_array(shape1, dtype1)
-        a2 = generate_random_numpy_array(shape2, dtype2)
-        b1 = dpnp.asarray(a1)
-        b2 = dpnp.asarray(a2)
-
-        result = dpnp.matmul(b1, b2)
-        expected = numpy.matmul(a1, a2)
-        assert_dtype_allclose(result, expected)
-
-    @pytest.mark.parametrize("order1", ["C", "F", "A"])
-    @pytest.mark.parametrize("order2", ["C", "F", "A"])
-    @pytest.mark.parametrize("order", ["C", "F", "K", "A"])
-    @pytest.mark.parametrize(
-        "shape1, shape2",
-        [
-            ((2, 4), (4, 3)),
-            ((4, 2, 3), (4, 3, 5)),
-            ((6, 7, 4, 3), (6, 7, 3, 5)),
-        ],
-        ids=[
-            "((2, 4), (4, 3))",
-            "((4, 2, 3), (4, 3, 5))",
-            "((6, 7, 4, 3), (6, 7, 3, 5))",
-        ],
-    )
-    def test_matmul_order(self, order1, order2, order, shape1, shape2):
-        a1 = numpy.arange(numpy.prod(shape1)).reshape(shape1, order=order1)
-        a2 = numpy.arange(numpy.prod(shape2)).reshape(shape2, order=order2)
-        b1 = dpnp.asarray(a1)
-        b2 = dpnp.asarray(a2)
-
-        result = dpnp.matmul(b1, b2, order=order)
-        expected = numpy.matmul(a1, a2, order=order)
-        # For the special case of shape1 = (6, 7, 4, 3), shape2 = (6, 7, 3, 5)
-        # and order1 = "F" and order2 = "F", NumPy result is not c-contiguous
-        # nor f-contiguous, while dpnp (and cupy) results are c-contiguous
-        if not (
-            shape1 == (6, 7, 4, 3)
-            and shape2 == (6, 7, 3, 5)
-            and order1 == "F"
-            and order2 == "F"
-            and order == "K"
-        ):
-            assert result.flags.c_contiguous == expected.flags.c_contiguous
-        assert result.flags.f_contiguous == expected.flags.f_contiguous
-        assert_dtype_allclose(result, expected)
-
-    @pytest.mark.parametrize(
-        "stride",
-        [(-2, -2, -2, -2), (2, 2, 2, 2), (-2, 2, -2, 2), (2, -2, 2, -2)],
-        ids=["-2", "2", "(-2, 2)", "(2, -2)"],
-    )
-    def test_matmul_strided1(self, stride):
-        for dim in [1, 2, 3, 4]:
-            shape = tuple(20 for _ in range(dim))
-            A = numpy.random.rand(*shape)
-            A_dp = dpnp.asarray(A)
-            slices = tuple(slice(None, None, stride[i]) for i in range(dim))
-            a = A[slices]
-            a_dp = A_dp[slices]
-            # input arrays will be copied into c-contiguous arrays
-            # the 2D base is not c-contiguous nor f-contigous
-            result = dpnp.matmul(a_dp, a_dp)
-            expected = numpy.matmul(a, a)
-            assert_dtype_allclose(result, expected)
-
-            OUT = dpnp.empty(shape, dtype=result.dtype)
-            out = OUT[slices]
-            result = dpnp.matmul(a_dp, a_dp, out=out)
-            assert result is out
-            assert_dtype_allclose(result, expected)
-
-    @pytest.mark.parametrize(
-        "shape", [(10, 3, 3), (12, 10, 3, 3)], ids=["3D", "4D"]
-    )
-    @pytest.mark.parametrize("stride", [-1, -2, 2], ids=["-1", "-2", "2"])
-    @pytest.mark.parametrize("transpose", [False, True], ids=["False", "True"])
-    def test_matmul_strided2(self, shape, stride, transpose):
-        # one dimension (-3) is strided
-        # if negative stride, copy is needed and the base becomes c-contiguous
-        # otherwise the base remains the same as input in gemm_batch
-        A = numpy.random.rand(*shape)
-        A_dp = dpnp.asarray(A)
-        if transpose:
-            A = numpy.moveaxis(A, (-2, -1), (-1, -2))
-            A_dp = dpnp.moveaxis(A_dp, (-2, -1), (-1, -2))
-        index = [slice(None)] * len(shape)
-        index[-3] = slice(None, None, stride)
-        index = tuple(index)
-        a = A[index]
-        a_dp = A_dp[index]
-        result = dpnp.matmul(a_dp, a_dp)
-        expected = numpy.matmul(a, a)
-        assert_dtype_allclose(result, expected)
-
-        OUT = dpnp.empty(shape, dtype=result.dtype)
-        out = OUT[index]
-        result = dpnp.matmul(a_dp, a_dp, out=out)
-        assert result is out
-        assert_dtype_allclose(result, expected)
-
-    @pytest.mark.parametrize(
-        "stride",
-        [(-2, -2), (2, 2), (-2, 2), (2, -2)],
-        ids=["(-2, -2)", "(2, 2)", "(-2, 2)", "(2, -2)"],
-    )
-    @pytest.mark.parametrize("transpose", [False, True], ids=["False", "True"])
-    def test_matmul_strided3(self, stride, transpose):
-        # 4D case, the 1st and 2nd dimensions are strided
-        # For negative stride, copy is needed and the base becomes c-contiguous.
-        # For positive stride, no copy but reshape makes the base c-contiguous.
-        stride0, stride1 = stride
-        shape = (12, 10, 3, 3)  # 4D array
-        A = numpy.random.rand(*shape)
-        A_dp = dpnp.asarray(A)
-        if transpose:
-            A = numpy.moveaxis(A, (-2, -1), (-1, -2))
-            A_dp = dpnp.moveaxis(A_dp, (-2, -1), (-1, -2))
-        a = A[::stride0, ::stride1]
-        a_dp = A_dp[::stride0, ::stride1]
-        result = dpnp.matmul(a_dp, a_dp)
-        expected = numpy.matmul(a, a)
-        assert_dtype_allclose(result, expected)
-
-        OUT = dpnp.empty(shape, dtype=result.dtype)
-        out = OUT[::stride0, ::stride1]
-        result = dpnp.matmul(a_dp, a_dp, out=out)
-        assert result is out
-        assert_dtype_allclose(result, expected)
-
-    @pytest.mark.parametrize("shape", [(8, 10)], ids=["2D"])
-    @pytest.mark.parametrize("incx", [-2, 2], ids=["-2", "2"])
-    @pytest.mark.parametrize("incy", [-2, 2], ids=["-2", "2"])
-    @pytest.mark.parametrize("transpose", [False, True], ids=["False", "True"])
-    def test_matmul_strided_mat_vec(self, shape, incx, incy, transpose):
-        # vector is strided
-        if transpose:
-            s1 = shape[-2]
-            s2 = shape[-1]
-        else:
-            s1 = shape[-1]
-            s2 = shape[-2]
-        a = numpy.random.rand(*shape)
-        B = numpy.random.rand(2 * s1)
-        a_dp = dpnp.asarray(a)
-        if transpose:
-            a = numpy.moveaxis(a, (-2, -1), (-1, -2))
-            a_dp = dpnp.moveaxis(a_dp, (-2, -1), (-1, -2))
-        B_dp = dpnp.asarray(B)
-        b = B[::incx]
-        b_dp = B_dp[::incx]
-
-        result = dpnp.matmul(a_dp, b_dp)
-        expected = numpy.matmul(a, b)
-        assert_dtype_allclose(result, expected)
-
-        out_shape = shape[:-2] + (2 * s2,)
-        OUT = dpnp.empty(out_shape, dtype=result.dtype)
-        out = OUT[..., ::incy]
-        result = dpnp.matmul(a_dp, b_dp, out=out)
-        assert result is out
-        assert_dtype_allclose(result, expected)
-
-    @pytest.mark.parametrize("shape", [(8, 10)], ids=["2D"])
-    @pytest.mark.parametrize("incx", [-2, 2], ids=["-2", "2"])
-    @pytest.mark.parametrize("incy", [-2, 2], ids=["-2", "2"])
-    @pytest.mark.parametrize("transpose", [False, True], ids=["False", "True"])
-    def test_matmul_strided_vec_mat(self, shape, incx, incy, transpose):
-        # vector is strided
-        if transpose:
-            s1 = shape[-2]
-            s2 = shape[-1]
-        else:
-            s1 = shape[-1]
-            s2 = shape[-2]
-        a = numpy.random.rand(*shape)
-        B = numpy.random.rand(2 * s2)
-        a_dp = dpnp.asarray(a)
-        if transpose:
-            a = numpy.moveaxis(a, (-2, -1), (-1, -2))
-            a_dp = dpnp.moveaxis(a_dp, (-2, -1), (-1, -2))
-        B_dp = dpnp.asarray(B)
-        b = B[::incx]
-        b_dp = B_dp[::incx]
-
-        result = dpnp.matmul(b_dp, a_dp)
-        expected = numpy.matmul(b, a)
-        assert_dtype_allclose(result, expected)
-
-        out_shape = shape[:-2] + (2 * s1,)
-        OUT = dpnp.empty(out_shape, dtype=result.dtype)
-        out = OUT[..., ::incy]
-        result = dpnp.matmul(b_dp, a_dp, out=out)
-        assert result is out
-        assert_dtype_allclose(result, expected)
-
-    @pytest.mark.parametrize(
-        "order1, order2, out_order",
-        [
-            ("C", "C", "C"),
-            ("C", "C", "F"),
-            ("C", "F", "C"),
-            ("C", "F", "F"),
-            ("F", "C", "C"),
-            ("F", "C", "F"),
-            ("F", "F", "F"),
-            ("F", "F", "C"),
-        ],
-    )
-    @pytest.mark.parametrize(
-        "dtype", get_all_dtypes(no_none=True, no_bool=True)
-    )
-    def test_matmul_out1(self, order1, order2, out_order, dtype):
-        # test gemm with out keyword
-        a1 = numpy.arange(20, dtype=dtype).reshape(5, 4, order=order1)
-        a2 = numpy.arange(28, dtype=dtype).reshape(4, 7, order=order2)
-
-        b1 = dpnp.asarray(a1)
-        b2 = dpnp.asarray(a2)
-
-        dpnp_out = dpnp.empty((5, 7), dtype=dtype, order=out_order)
-        result = dpnp.matmul(b1, b2, out=dpnp_out)
-        assert result is dpnp_out
-
-        out = numpy.empty((5, 7), dtype=dtype, order=out_order)
-        expected = numpy.matmul(a1, a2, out=out)
-        assert result.flags.c_contiguous == expected.flags.c_contiguous
-        assert result.flags.f_contiguous == expected.flags.f_contiguous
-        assert_dtype_allclose(result, expected)
-
-    @pytest.mark.parametrize("trans", [True, False])
-    @pytest.mark.parametrize(
-        "dtype", get_all_dtypes(no_none=True, no_bool=True)
-    )
-    def test_matmul_out2(self, trans, dtype):
-        # test gemm_batch with out keyword
-        # the base of input arrays is c-contiguous
-        # the base of output array is c-contiguous or f-contiguous
-        a1 = numpy.arange(24, dtype=dtype).reshape(2, 3, 4)
-        a2 = numpy.arange(40, dtype=dtype).reshape(2, 4, 5)
-        b1 = dpnp.asarray(a1)
-        b2 = dpnp.asarray(a2)
-
-        if trans:
-            dpnp_out = dpnp.empty((2, 5, 3), dtype=dtype).transpose(0, 2, 1)
-            out = numpy.empty((2, 5, 3), dtype=dtype).transpose(0, 2, 1)
-        else:
-            dpnp_out = dpnp.empty((2, 3, 5), dtype=dtype)
-            out = numpy.empty((2, 3, 5), dtype=dtype)
-
-        result = dpnp.matmul(b1, b2, out=dpnp_out)
-        assert result is dpnp_out
-
-        expected = numpy.matmul(a1, a2, out=out)
-        assert result.flags.c_contiguous == expected.flags.c_contiguous
-        assert result.flags.f_contiguous == expected.flags.f_contiguous
-        assert_dtype_allclose(result, expected)
-
-    @pytest.mark.parametrize("trans", [True, False])
-    @pytest.mark.parametrize(
-        "dtype", get_all_dtypes(no_none=True, no_bool=True)
-    )
-    def test_matmul_out3(self, trans, dtype):
-        # test gemm_batch with out keyword
-        # the base of input arrays is f-contiguous
-        # the base of output array is c-contiguous or f-contiguous
-        a1 = numpy.arange(24, dtype=dtype).reshape(2, 4, 3)
-        a2 = numpy.arange(40, dtype=dtype).reshape(2, 5, 4)
-        b1 = dpnp.asarray(a1)
-        b2 = dpnp.asarray(a2)
-
-        a1 = numpy.asarray(a1).transpose(0, 2, 1)
-        a2 = numpy.asarray(a2).transpose(0, 2, 1)
-        b1 = b1.transpose(0, 2, 1)
-        b2 = b2.transpose(0, 2, 1)
-
-        if trans:
-            dpnp_out = dpnp.empty((2, 5, 3), dtype=dtype).transpose(0, 2, 1)
-            out = numpy.empty((2, 5, 3), dtype=dtype).transpose(0, 2, 1)
-        else:
-            dpnp_out = dpnp.empty((2, 3, 5), dtype=dtype)
-            out = numpy.empty((2, 3, 5), dtype=dtype)
-
-        result = dpnp.matmul(b1, b2, out=dpnp_out)
-        assert result is dpnp_out
-
-        expected = numpy.matmul(a1, a2, out=out)
-        assert result.flags.c_contiguous == expected.flags.c_contiguous
-        assert result.flags.f_contiguous == expected.flags.f_contiguous
-        assert_dtype_allclose(result, expected)
-
-    @pytest.mark.parametrize(
-        "out_shape",
-        [
-            ((4, 5)),
-            ((6,)),
-            ((4, 7, 2)),
-        ],
-    )
-    def test_matmul_out_0D(self, out_shape):
-        # for matmul of 1-D arrays, output is 0-D and if out keyword is given
-        # NumPy repeats the data to match the shape of output array
-        a = numpy.arange(3)
-        b = dpnp.asarray(a)
-
-        numpy_out = numpy.empty(out_shape)
-        dpnp_out = dpnp.empty(out_shape)
-        result = dpnp.matmul(b, b, out=dpnp_out)
-        expected = numpy.matmul(a, a, out=numpy_out)
-        assert result is dpnp_out
-        assert_dtype_allclose(result, expected)
-
-    @testing.slow
-    @pytest.mark.parametrize(
-        "shape1, shape2",
-        [
-            ((5000, 5000, 2, 2), (5000, 5000, 2, 2)),
-            ((2, 2), (5000, 5000, 2, 2)),
-            ((5000, 5000, 2, 2), (2, 2)),
-        ],
-    )
-    def test_matmul_large(self, shape1, shape2):
-        a = generate_random_numpy_array(shape1)
-        b = generate_random_numpy_array(shape2)
-        a_dp = dpnp.asarray(a)
-        b_dp = dpnp.asarray(b)
-
-        result = dpnp.matmul(a_dp, b_dp)
-        expected = numpy.matmul(a, b)
-        assert_dtype_allclose(result, expected, factor=24)
-
-    @testing.with_requires("numpy>=2.0")
-    def test_linalg_matmul(self):
-        a = numpy.ones((3, 4))
-        b = numpy.ones((4, 5))
-        ia = dpnp.array(a)
-        ib = dpnp.array(b)
-
-        result = dpnp.linalg.matmul(ia, ib)
-        expected = numpy.linalg.matmul(a, b)
-        assert_array_equal(result, expected)
-
-    @pytest.mark.parametrize(
-        "sh1, sh2",
-        [
-            ((2, 3, 3), (2, 3, 3)),
-            ((3, 3, 3, 3), (3, 3, 3, 3)),
-        ],
-        ids=["gemm", "gemm_batch"],
-    )
-    def test_matmul_with_offsets(self, sh1, sh2):
-        a = generate_random_numpy_array(sh1)
-        b = generate_random_numpy_array(sh2)
-        ia, ib = dpnp.array(a), dpnp.array(b)
-
-        result = ia[1] @ ib[1]
-        expected = a[1] @ b[1]
-        assert_dtype_allclose(result, expected)
-
-
-class TestMatmulInplace:
-    ALL_DTYPES = get_all_dtypes(no_none=True)
-    DTYPES = {}
-    for i in ALL_DTYPES:
-        for j in ALL_DTYPES:
-            if numpy.can_cast(j, i):
-                DTYPES[f"{i}-{j}"] = (i, j)
-
-    @pytest.mark.parametrize("dtype1, dtype2", DTYPES.values())
-    def test_basic(self, dtype1, dtype2):
-        a = numpy.arange(10).reshape(5, 2).astype(dtype1)
-        b = numpy.ones((2, 2), dtype=dtype2)
-        ia, ib = dpnp.array(a), dpnp.array(b)
-        ia_id = id(ia)
-
-        a @= b
-        ia @= ib
-        assert id(ia) == ia_id
-        assert_dtype_allclose(ia, a)
-
-    @pytest.mark.parametrize(
-        "a_sh, b_sh",
-        [
-            pytest.param((10**5, 10), (10, 10), id="2d_large"),
-            pytest.param((10**4, 10, 10), (1, 10, 10), id="3d_large"),
-            pytest.param((3,), (3,), id="1d"),
-            pytest.param((3, 3), (3,), id="2d_1d"),
-            pytest.param((3,), (3, 3), id="1d_2d"),
-            pytest.param((3, 3), (3, 1), id="2d_broadcast"),
-            pytest.param((1, 3), (3, 3), id="2d_broadcast_reverse"),
-            pytest.param((3, 3, 3), (1, 3, 1), id="3d_broadcast1"),
-            pytest.param((3, 3, 3), (1, 3, 3), id="3d_broadcast2"),
-            pytest.param((3, 3, 3), (3, 3, 1), id="3d_broadcast3"),
-            pytest.param((1, 3, 3), (3, 3, 3), id="3d_broadcast_reverse1"),
-            pytest.param((3, 1, 3), (3, 3, 3), id="3d_broadcast_reverse2"),
-            pytest.param((1, 1, 3), (3, 3, 3), id="3d_broadcast_reverse3"),
-        ],
-    )
-    def test_shapes(self, a_sh, b_sh):
-        a_sz, b_sz = numpy.prod(a_sh), numpy.prod(b_sh)
-        a = numpy.arange(a_sz).reshape(a_sh).astype(numpy.float64)
-        b = numpy.arange(b_sz).reshape(b_sh)
-
-        ia, ib = dpnp.array(a), dpnp.array(b)
-        ia_id = id(ia)
-
-        expected = a @ b
-        if expected.shape != a_sh:
-            if len(b_sh) == 1:
-                # check the exception matches NumPy
-                match = "inplace matrix multiplication requires"
-            else:
-                match = None
-
-            with pytest.raises(ValueError, match=match):
-                a @= b
-
-            with pytest.raises(ValueError, match=match):
-                ia @= ib
-        else:
-            ia @= ib
-            assert id(ia) == ia_id
-            assert_dtype_allclose(ia, expected)
-
-
-class TestMatmulInvalidCases:
-    @pytest.mark.parametrize("xp", [numpy, dpnp])
-    @pytest.mark.parametrize(
-        "shape1, shape2",
-        [
-            ((3, 2), ()),
-            ((), (3, 2)),
-            ((), ()),
-        ],
-    )
-    def test_zero_dim(self, xp, shape1, shape2):
-        x1 = xp.arange(numpy.prod(shape1), dtype=xp.float32).reshape(shape1)
-        x2 = xp.arange(numpy.prod(shape2), dtype=xp.float32).reshape(shape2)
-
-        with pytest.raises(ValueError):
-            xp.matmul(x1, x2)
-
-    @pytest.mark.parametrize("xp", [numpy, dpnp])
-    @pytest.mark.parametrize(
-        "shape1, shape2",
-        [
-            ((3,), (4,)),
-            ((2, 3), (4, 5)),
-            ((2, 4), (3, 5)),
-            ((2, 3), (4,)),
-            ((3,), (4, 5)),
-            ((2, 2, 3), (2, 4, 5)),
-            ((3, 2, 3), (2, 4, 5)),
-            ((4, 3, 2), (6, 5, 2, 4)),
-            ((6, 5, 3, 2), (3, 2, 4)),
-        ],
-    )
-    def test_invalid_shape(self, xp, shape1, shape2):
-        x1 = xp.arange(numpy.prod(shape1), dtype=xp.float32).reshape(shape1)
-        x2 = xp.arange(numpy.prod(shape2), dtype=xp.float32).reshape(shape2)
-
-        with pytest.raises(ValueError):
-            xp.matmul(x1, x2)
-
-    @pytest.mark.parametrize("xp", [numpy, dpnp])
-    @pytest.mark.parametrize(
-        "shape1, shape2, out_shape",
-        [
-            ((5, 4, 3), (3, 1), (3, 4, 1)),
-            ((5, 4, 3), (3, 1), (5, 6, 1)),
-            ((5, 4, 3), (3, 1), (5, 4, 2)),
-            ((5, 4, 3), (3, 1), (4, 1)),
-            ((5, 4, 3), (3,), (5, 3)),
-            ((5, 4, 3), (3,), (6, 4)),
-            ((4,), (3, 4, 5), (4, 5)),
-            ((4,), (3, 4, 5), (3, 6)),
-        ],
-    )
-    def test_invalid_shape_out(self, xp, shape1, shape2, out_shape):
-        x1 = xp.arange(numpy.prod(shape1), dtype=xp.float32).reshape(shape1)
-        x2 = xp.arange(numpy.prod(shape2), dtype=xp.float32).reshape(shape2)
-        res = xp.empty(out_shape)
-
-        with pytest.raises(ValueError):
-            xp.matmul(x1, x2, out=res)
-
-    @pytest.mark.parametrize("xp", [numpy, dpnp])
-    @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)[:-2])
-    def test_invalid_dtype(self, xp, dtype):
-        dpnp_dtype = get_all_dtypes(no_none=True)[-1]
-        a1 = xp.arange(5 * 4, dtype=dpnp_dtype).reshape(5, 4)
-        a2 = xp.arange(7 * 4, dtype=dpnp_dtype).reshape(4, 7)
-        dp_out = xp.empty((5, 7), dtype=dtype)
-
-        with pytest.raises(TypeError):
-            xp.matmul(a1, a2, out=dp_out)
-
-    def test_exe_q(self):
-        x1 = dpnp.ones((5, 4), sycl_queue=dpctl.SyclQueue())
-        x2 = dpnp.ones((4, 7), sycl_queue=dpctl.SyclQueue())
-        with pytest.raises(ValueError):
-            dpnp.matmul(x1, x2)
-
-        x1 = dpnp.ones((5, 4))
-        x2 = dpnp.ones((4, 7))
-        out = dpnp.empty((5, 7), sycl_queue=dpctl.SyclQueue())
-        with pytest.raises(ExecutionPlacementError):
-            dpnp.matmul(x1, x2, out=out)
-
-    @pytest.mark.parametrize("xp", [numpy, dpnp])
-    def test_matmul_casting(self, xp):
-        a1 = xp.arange(2 * 4, dtype=xp.float32).reshape(2, 4)
-        a2 = xp.arange(4 * 3).reshape(4, 3)
-
-        res = xp.empty((2, 3), dtype=xp.int64)
-        with pytest.raises(TypeError):
-            xp.matmul(a1, a2, out=res, casting="safe")
-
-    def test_matmul_not_implemented(self):
-        a1 = dpnp.arange(2 * 4).reshape(2, 4)
-        a2 = dpnp.arange(4 * 3).reshape(4, 3)
-
-        with pytest.raises(NotImplementedError):
-            dpnp.matmul(a1, a2, subok=False)
-
-        with pytest.raises(NotImplementedError):
-            signature = (dpnp.float32, dpnp.float32, dpnp.float32)
-            dpnp.matmul(a1, a2, signature=signature)
-
-        with pytest.raises(NotImplementedError):
-            dpnp.matmul(a1, a2, axis=2)
-
-    @pytest.mark.parametrize("xp", [numpy, dpnp])
-    def test_matmul_axes(self, xp):
-        a1 = xp.arange(120).reshape(2, 5, 3, 4)
-        a2 = xp.arange(120).reshape(4, 2, 5, 3)
-
-        # axes must be a list
-        axes = ((3, 1), (2, 0), (0, 1))
-        with pytest.raises(TypeError):
-            xp.matmul(a1, a2, axes=axes)
-
-        # axes must be be a list of three tuples
-        axes = [(3, 1), (2, 0)]
-        with pytest.raises(ValueError):
-            xp.matmul(a1, a2, axes=axes)
-
-        # axes item should be a tuple
-        axes = [(3, 1), (2, 0), [0, 1]]
-        with pytest.raises(TypeError):
-            xp.matmul(a1, a2, axes=axes)
-
-        # axes item should be a tuple with 2 elements
-        axes = [(3, 1), (2, 0), (0, 1, 2)]
-        with pytest.raises(AxisError):
-            xp.matmul(a1, a2, axes=axes)
-
-        # axes must be an integer
-        axes = [(3, 1), (2, 0), (0.0, 1)]
-        with pytest.raises(TypeError):
-            xp.matmul(a1, a2, axes=axes)
-
-        # axes item 2 should be an empty tuple
-        a = xp.arange(3)
-        axes = [0, 0, 0]
-        with pytest.raises(AxisError):
-            xp.matmul(a, a, axes=axes)
-
-        a = xp.arange(3 * 4 * 5).reshape(3, 4, 5)
-        b = xp.arange(3)
-        # list object cannot be interpreted as an integer
-        axes = [(1, 0), (0), [0]]
-        with pytest.raises(TypeError):
-            xp.matmul(a, b, axes=axes)
-
-        # axes item should be a tuple with a single element, or an integer
-        axes = [(1, 0), (0), (0, 1)]
-        with pytest.raises(AxisError):
-            xp.matmul(a, b, axes=axes)
-
-
 def test_elemenwise_nin_nout():
     assert dpnp.abs.nin == 1
     assert dpnp.add.nin == 2
diff --git a/dpnp/tests/test_product.py b/dpnp/tests/test_product.py
index 09dfa7d815f2..927c3af94d0e 100644
--- a/dpnp/tests/test_product.py
+++ b/dpnp/tests/test_product.py
@@ -1,8 +1,9 @@
 import dpctl
 import numpy
 import pytest
+from dpctl.tensor._numpy_helper import AxisError
 from dpctl.utils import ExecutionPlacementError
-from numpy.testing import assert_raises
+from numpy.testing import assert_array_equal, assert_raises
 
 import dpnp
 
@@ -593,6 +594,906 @@ def test_order(self, order):
         assert_dtype_allclose(result, expected)
 
 
+class TestMatmul:
+    def setup_method(self):
+        numpy.random.seed(42)
+
+    @pytest.mark.parametrize(
+        "order1, order2", [("C", "C"), ("C", "F"), ("F", "C"), ("F", "F")]
+    )
+    @pytest.mark.parametrize(
+        "shape1, shape2",
+        [
+            ((4,), (4,)),
+            ((1, 4), (4, 1)),
+            ((4,), (4, 2)),
+            ((1, 4), (4, 2)),
+            ((2, 4), (4,)),
+            ((2, 4), (4, 1)),
+            ((1, 4), (4,)),  # output should be 1-d not 0-d
+            ((4,), (4, 1)),
+            ((1, 4), (4, 1)),
+            ((2, 4), (4, 3)),
+            ((1, 2, 3), (1, 3, 5)),
+            ((4, 2, 3), (4, 3, 5)),
+            ((1, 2, 3), (4, 3, 5)),
+            ((2, 3), (4, 3, 5)),
+            ((4, 2, 3), (1, 3, 5)),
+            ((4, 2, 3), (3, 5)),
+            ((1, 1, 4, 3), (1, 1, 3, 5)),
+            ((6, 7, 4, 3), (6, 7, 3, 5)),
+            ((6, 7, 4, 3), (1, 1, 3, 5)),
+            ((6, 7, 4, 3), (1, 3, 5)),
+            ((6, 7, 4, 3), (3, 5)),
+            ((6, 7, 4, 3), (1, 7, 3, 5)),
+            ((6, 7, 4, 3), (7, 3, 5)),
+            ((6, 7, 4, 3), (6, 1, 3, 5)),
+            ((1, 1, 4, 3), (6, 7, 3, 5)),
+            ((1, 4, 3), (6, 7, 3, 5)),
+            ((4, 3), (6, 7, 3, 5)),
+            ((6, 1, 4, 3), (6, 7, 3, 5)),
+            ((1, 7, 4, 3), (6, 7, 3, 5)),
+            ((7, 4, 3), (6, 7, 3, 5)),
+            ((1, 5, 3, 2), (6, 5, 2, 4)),
+            ((5, 3, 2), (6, 5, 2, 4)),
+            ((1, 3, 3), (10, 1, 3, 1)),
+            ((2, 3, 3), (10, 1, 3, 1)),
+            ((10, 2, 3, 3), (10, 1, 3, 1)),
+            ((10, 2, 3, 3), (10, 2, 3, 1)),
+            ((10, 1, 1, 3), (1, 3, 3)),
+            ((10, 1, 1, 3), (2, 3, 3)),
+            ((10, 1, 1, 3), (10, 2, 3, 3)),
+            ((10, 2, 1, 3), (10, 2, 3, 3)),
+            ((3, 3, 1), (3, 1, 2)),
+            ((3, 3, 1), (1, 1, 2)),
+            ((1, 3, 1), (3, 1, 2)),
+            ((4,), (3, 4, 1)),
+            ((3, 1, 4), (4,)),
+            ((3, 1, 4), (3, 4, 1)),
+            ((4, 1, 3, 1), (1, 3, 1, 2)),
+            ((1, 3, 3, 1), (4, 1, 1, 2)),
+            # empty arrays
+            ((2, 0), (0, 3)),
+            ((0, 4), (4, 3)),
+            ((2, 4), (4, 0)),
+            ((1, 2, 3), (0, 3, 5)),
+            ((0, 2, 3), (1, 3, 5)),
+            ((2, 3), (0, 3, 5)),
+            ((0, 2, 3), (3, 5)),
+            ((0, 0, 4, 3), (1, 1, 3, 5)),
+            ((6, 0, 4, 3), (1, 3, 5)),
+            ((0, 7, 4, 3), (3, 5)),
+            ((0, 7, 4, 3), (1, 7, 3, 5)),
+            ((0, 7, 4, 3), (7, 3, 5)),
+            ((6, 0, 4, 3), (6, 1, 3, 5)),
+            ((1, 1, 4, 3), (0, 0, 3, 5)),
+            ((1, 4, 3), (6, 0, 3, 5)),
+            ((4, 3), (0, 0, 3, 5)),
+            ((6, 1, 4, 3), (6, 0, 3, 5)),
+            ((1, 7, 4, 3), (0, 7, 3, 5)),
+            ((7, 4, 3), (0, 7, 3, 5)),
+        ],
+    )
+    def test_basic(self, order1, order2, shape1, shape2):
+        # input should be float type otherwise they are copied to c-contigous array
+        # so testing order becomes meaningless
+        dtype = dpnp.default_float_type()
+        a = numpy.arange(numpy.prod(shape1), dtype=dtype).reshape(shape1)
+        b = numpy.arange(numpy.prod(shape2), dtype=dtype).reshape(shape2)
+        a = numpy.array(a, order=order1)
+        b = numpy.array(b, order=order2)
+        ia, ib = dpnp.array(a), dpnp.array(b)
+
+        result = dpnp.matmul(ia, ib)
+        expected = numpy.matmul(a, b)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize(
+        "shape1, shape2",
+        [
+            ((2, 4), (4, 3)),
+            ((4, 2, 3), (4, 3, 5)),
+            ((6, 7, 4, 3), (6, 7, 3, 5)),
+        ],
+        ids=[
+            "((2, 4), (4, 3))",
+            "((4, 2, 3), (4, 3, 5))",
+            "((6, 7, 4, 3), (6, 7, 3, 5))",
+        ],
+    )
+    def test_bool(self, shape1, shape2):
+        x = numpy.arange(2, dtype=numpy.bool_)
+        a = numpy.resize(x, numpy.prod(shape1)).reshape(shape1)
+        b = numpy.resize(x, numpy.prod(shape2)).reshape(shape2)
+        ia, ib = dpnp.array(a), dpnp.array(b)
+
+        result = dpnp.matmul(ia, ib)
+        expected = numpy.matmul(a, b)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize(
+        "axes",
+        [
+            [(-3, -1), (0, 2), (-2, -3)],
+            [(3, 1), (2, 0), (3, 1)],
+            [(3, 1), (2, 0), (0, 1)],
+        ],
+    )
+    def test_axes_ND_ND(self, axes):
+        a = generate_random_numpy_array((2, 5, 3, 4))
+        b = generate_random_numpy_array((4, 2, 5, 3))
+        ia, ib = dpnp.array(a), dpnp.array(b)
+
+        result = dpnp.matmul(ia, ib, axes=axes)
+        expected = numpy.matmul(a, b, axes=axes)
+        assert_dtype_allclose(result, expected)
+
+    @testing.with_requires("numpy>=2.2")
+    @pytest.mark.parametrize("func", ["matmul", "matvec"])
+    @pytest.mark.parametrize(
+        "axes",
+        [
+            [(1, 0), (0), (0)],
+            [(1, 0), 0, 0],
+            [(1, 0), (0,), (0,)],
+        ],
+    )
+    def test_axes_ND_1D(self, func, axes):
+        a = numpy.arange(3 * 4 * 5).reshape(3, 4, 5)
+        b = numpy.arange(3)
+        ia, ib = dpnp.array(a), dpnp.array(b)
+
+        result = getattr(dpnp, func)(ia, ib, axes=axes)
+        expected = getattr(numpy, func)(a, b, axes=axes)
+        assert_dtype_allclose(result, expected)
+
+    @testing.with_requires("numpy>=2.2")
+    @pytest.mark.parametrize("func", ["matmul", "vecmat"])
+    @pytest.mark.parametrize(
+        "axes",
+        [
+            [(0,), (0, 1), (0)],
+            [(0), (0, 1), 0],
+            [0, (0, 1), (0,)],
+        ],
+    )
+    def test_axes_1D_ND(self, func, axes):
+        a = numpy.arange(3)
+        b = numpy.arange(3 * 4 * 5).reshape(3, 4, 5)
+        ia, ib = dpnp.array(a), dpnp.array(b)
+
+        result = getattr(dpnp, func)(ia, ib, axes=axes)
+        expected = getattr(numpy, func)(a, b, axes=axes)
+        assert_dtype_allclose(result, expected)
+
+    def test_axes_1D_1D(self):
+        a = numpy.arange(3)
+        ia = dpnp.array(a)
+
+        axes = [0, 0, ()]
+        result = dpnp.matmul(ia, ia, axes=axes)
+        expected = numpy.matmul(a, a, axes=axes)
+        assert_dtype_allclose(result, expected)
+
+        iout = dpnp.empty((), dtype=ia.dtype)
+        result = dpnp.matmul(ia, ia, axes=axes, out=iout)
+        assert iout is result
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
+    @pytest.mark.parametrize(
+        "axes, out_shape",
+        [
+            ([(-3, -1), (0, 2), (-2, -3)], (2, 5, 5, 3)),
+            ([(3, 1), (2, 0), (3, 1)], (2, 4, 3, 4)),
+            ([(3, 1), (2, 0), (1, 2)], (2, 4, 4, 3)),
+        ],
+    )
+    def test_axes_out(self, dtype, axes, out_shape):
+        a = generate_random_numpy_array((2, 5, 3, 4), dtype)
+        b = generate_random_numpy_array((4, 2, 5, 3), dtype)
+        ia, ib = dpnp.array(a), dpnp.array(b)
+
+        iout = dpnp.empty(out_shape, dtype=dtype)
+        result = dpnp.matmul(ia, ib, axes=axes, out=iout)
+        assert result is iout
+        expected = numpy.matmul(a, b, axes=axes)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize(
+        "axes, b_shape, out_shape",
+        [
+            ([(1, 0), 0, 0], (3,), (4, 5)),
+            ([(1, 0), 0, 1], (3,), (5, 4)),
+            ([(1, 0), (0, 1), (1, 2)], (3, 1), (5, 4, 1)),
+            ([(1, 0), (0, 1), (0, 2)], (3, 1), (4, 5, 1)),
+            ([(1, 0), (0, 1), (1, 0)], (3, 1), (1, 4, 5)),
+        ],
+    )
+    def test_axes_out_1D(self, axes, b_shape, out_shape):
+        a = numpy.arange(3 * 4 * 5).reshape(3, 4, 5)
+        b = numpy.arange(3).reshape(b_shape)
+        ia, ib = dpnp.array(a), dpnp.array(b)
+
+        iout = dpnp.empty(out_shape)
+        out = numpy.empty(out_shape)
+        result = dpnp.matmul(ia, ib, axes=axes, out=iout)
+        assert result is iout
+        expected = numpy.matmul(a, b, axes=axes, out=out)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("in_dt", get_all_dtypes(no_bool=True))
+    @pytest.mark.parametrize(
+        "out_dt", get_all_dtypes(no_bool=True, no_none=True)
+    )
+    @pytest.mark.parametrize(
+        "shape1, shape2",
+        [
+            ((2, 4), (4, 3)),
+            ((4, 2, 3), (4, 3, 5)),
+            ((6, 7, 4, 3), (6, 7, 3, 5)),
+        ],
+        ids=[
+            "((2, 4), (4, 3))",
+            "((4, 2, 3), (4, 3, 5))",
+            "((6, 7, 4, 3), (6, 7, 3, 5))",
+        ],
+    )
+    def test_dtype_matrix_inout(self, in_dt, out_dt, shape1, shape2):
+        a = generate_random_numpy_array(shape1, in_dt)
+        b = generate_random_numpy_array(shape2, in_dt)
+        ia, ib = dpnp.array(a), dpnp.array(b)
+
+        if dpnp.can_cast(dpnp.result_type(ia, ib), out_dt, casting="same_kind"):
+            result = dpnp.matmul(ia, ib, dtype=out_dt)
+            expected = numpy.matmul(a, b, dtype=out_dt)
+            assert_dtype_allclose(result, expected)
+        else:
+            assert_raises(TypeError, dpnp.matmul, ia, ib, dtype=out_dt)
+            assert_raises(TypeError, numpy.matmul, a, b, dtype=out_dt)
+
+    @pytest.mark.parametrize("dtype1", get_all_dtypes(no_bool=True))
+    @pytest.mark.parametrize("dtype2", get_all_dtypes(no_bool=True))
+    @pytest.mark.parametrize(
+        "shape1, shape2",
+        [
+            ((2, 4), (4, 3)),
+            ((4, 2, 3), (4, 3, 5)),
+            ((6, 7, 4, 3), (6, 7, 3, 5)),
+        ],
+        ids=[
+            "((2, 4), (4, 3))",
+            "((4, 2, 3), (4, 3, 5))",
+            "((6, 7, 4, 3), (6, 7, 3, 5))",
+        ],
+    )
+    def test_dtype_matrix_inputs(self, dtype1, dtype2, shape1, shape2):
+        a = generate_random_numpy_array(shape1, dtype1)
+        b = generate_random_numpy_array(shape2, dtype2)
+        ia, ib = dpnp.array(a), dpnp.array(b)
+
+        result = dpnp.matmul(ia, ib)
+        expected = numpy.matmul(a, b)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("order1", ["C", "F", "A"])
+    @pytest.mark.parametrize("order2", ["C", "F", "A"])
+    @pytest.mark.parametrize("order", ["C", "F", "K", "A"])
+    @pytest.mark.parametrize(
+        "shape1, shape2",
+        [
+            ((2, 4), (4, 3)),
+            ((4, 2, 3), (4, 3, 5)),
+            ((6, 7, 4, 3), (6, 7, 3, 5)),
+        ],
+        ids=[
+            "((2, 4), (4, 3))",
+            "((4, 2, 3), (4, 3, 5))",
+            "((6, 7, 4, 3), (6, 7, 3, 5))",
+        ],
+    )
+    def test_order(self, order1, order2, order, shape1, shape2):
+        a = numpy.arange(numpy.prod(shape1)).reshape(shape1, order=order1)
+        b = numpy.arange(numpy.prod(shape2)).reshape(shape2, order=order2)
+        ia, ib = dpnp.array(a), dpnp.array(b)
+
+        result = dpnp.matmul(ia, ib, order=order)
+        expected = numpy.matmul(a, b, order=order)
+        # For the special case of shape1 = (6, 7, 4, 3), shape2 = (6, 7, 3, 5)
+        # and order1 = "F" and order2 = "F", NumPy result is not c-contiguous
+        # nor f-contiguous, while dpnp (and cupy) results are c-contiguous
+        if not (
+            shape1 == (6, 7, 4, 3)
+            and shape2 == (6, 7, 3, 5)
+            and order1 == "F"
+            and order2 == "F"
+            and order == "K"
+        ):
+            assert result.flags.c_contiguous == expected.flags.c_contiguous
+        assert result.flags.f_contiguous == expected.flags.f_contiguous
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize(
+        "stride",
+        [(-2, -2, -2, -2), (2, 2, 2, 2), (-2, 2, -2, 2), (2, -2, 2, -2)],
+        ids=["-2", "2", "(-2, 2)", "(2, -2)"],
+    )
+    def test_strided1(self, stride):
+        for dim in [1, 2, 3, 4]:
+            shape = tuple(20 for _ in range(dim))
+            A = numpy.random.rand(*shape)
+            iA = dpnp.asarray(A)
+            slices = tuple(slice(None, None, stride[i]) for i in range(dim))
+            a = A[slices]
+            ia = iA[slices]
+            # input arrays will be copied into c-contiguous arrays
+            # the 2D base is not c-contiguous nor f-contigous
+            result = dpnp.matmul(ia, ia)
+            expected = numpy.matmul(a, a)
+            assert_dtype_allclose(result, expected)
+
+            iOUT = dpnp.empty(shape, dtype=result.dtype)
+            iout = iOUT[slices]
+            result = dpnp.matmul(ia, ia, out=iout)
+            assert result is iout
+            assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize(
+        "shape", [(10, 3, 3), (12, 10, 3, 3)], ids=["3D", "4D"]
+    )
+    @pytest.mark.parametrize("stride", [-1, -2, 2], ids=["-1", "-2", "2"])
+    @pytest.mark.parametrize("transpose", [False, True], ids=["False", "True"])
+    def test_strided2(self, shape, stride, transpose):
+        # one dimension (axis=-3) is strided
+        # if negative stride, copy is needed and the base becomes c-contiguous
+        # otherwise the base remains the same as input in gemm_batch
+        A = numpy.random.rand(*shape)
+        iA = dpnp.asarray(A)
+        if transpose:
+            A = numpy.moveaxis(A, (-2, -1), (-1, -2))
+            iA = dpnp.moveaxis(iA, (-2, -1), (-1, -2))
+        index = [slice(None)] * len(shape)
+        index[-3] = slice(None, None, stride)
+        index = tuple(index)
+        a = A[index]
+        ia = iA[index]
+        result = dpnp.matmul(ia, ia)
+        expected = numpy.matmul(a, a)
+        assert_dtype_allclose(result, expected)
+
+        iOUT = dpnp.empty(shape, dtype=result.dtype)
+        iout = iOUT[index]
+        result = dpnp.matmul(ia, ia, out=iout)
+        assert result is iout
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize(
+        "stride",
+        [(-2, -2), (2, 2), (-2, 2), (2, -2)],
+        ids=["(-2, -2)", "(2, 2)", "(-2, 2)", "(2, -2)"],
+    )
+    @pytest.mark.parametrize("transpose", [False, True])
+    def test_strided3(self, stride, transpose):
+        # 4D case, the 1st and 2nd dimensions are strided
+        # For negative stride, copy is needed and the base becomes c-contiguous.
+        # For positive stride, no copy but reshape makes the base c-contiguous.
+        stride0, stride1 = stride
+        shape = (12, 10, 3, 3)  # 4D array
+        A = numpy.random.rand(*shape)
+        iA = dpnp.asarray(A)
+        if transpose:
+            A = numpy.moveaxis(A, (-2, -1), (-1, -2))
+            iA = dpnp.moveaxis(iA, (-2, -1), (-1, -2))
+        a = A[::stride0, ::stride1]
+        ia = iA[::stride0, ::stride1]
+        result = dpnp.matmul(ia, ia)
+        expected = numpy.matmul(a, a)
+        assert_dtype_allclose(result, expected)
+
+        iOUT = dpnp.empty(shape, dtype=result.dtype)
+        iout = iOUT[::stride0, ::stride1]
+        result = dpnp.matmul(ia, ia, out=iout)
+        assert result is iout
+        assert_dtype_allclose(result, expected)
+
+    @testing.with_requires("numpy>=2.2")
+    @pytest.mark.parametrize("func", ["matmul", "matvec"])
+    @pytest.mark.parametrize("incx", [-2, 2])
+    @pytest.mark.parametrize("incy", [-2, 2])
+    @pytest.mark.parametrize("transpose", [False, True])
+    def test_strided_mat_vec(self, func, incx, incy, transpose):
+        # vector is strided
+        shape = (8, 10)  # 2D
+        if transpose:
+            s1 = shape[-2]
+            s2 = shape[-1]
+        else:
+            s1 = shape[-1]
+            s2 = shape[-2]
+        a = numpy.random.rand(*shape)
+        ia = dpnp.asarray(a)
+        if transpose:
+            a = numpy.moveaxis(a, (-2, -1), (-1, -2))
+            ia = dpnp.moveaxis(ia, (-2, -1), (-1, -2))
+        B = numpy.random.rand(2 * s1)
+        b = B[::incx]
+        iB = dpnp.asarray(B)
+        ib = iB[::incx]
+
+        result = getattr(dpnp, func)(ia, ib)
+        expected = getattr(numpy, func)(a, b)
+        assert_dtype_allclose(result, expected)
+
+        out_shape = shape[:-2] + (2 * s2,)
+        iOUT = dpnp.empty(out_shape, dtype=result.dtype)
+        iout = iOUT[..., ::incy]
+        result = getattr(dpnp, func)(ia, ib, out=iout)
+        assert result is iout
+        assert_dtype_allclose(result, expected)
+
+    @testing.with_requires("numpy>=2.2")
+    @pytest.mark.parametrize("func", ["matmul", "vecmat"])
+    @pytest.mark.parametrize("incx", [-2, 2])
+    @pytest.mark.parametrize("incy", [-2, 2])
+    @pytest.mark.parametrize("transpose", [False, True])
+    def test_strided_vec_mat(self, func, incx, incy, transpose):
+        # vector is strided
+        shape = (8, 10)  # 2D
+        if transpose:
+            s1 = shape[-2]
+            s2 = shape[-1]
+        else:
+            s1 = shape[-1]
+            s2 = shape[-2]
+        a = numpy.random.rand(*shape)
+        ia = dpnp.asarray(a)
+        if transpose:
+            a = numpy.moveaxis(a, (-2, -1), (-1, -2))
+            ia = dpnp.moveaxis(ia, (-2, -1), (-1, -2))
+        B = numpy.random.rand(2 * s2)
+        b = B[::incx]
+        iB = dpnp.asarray(B)
+        ib = iB[::incx]
+
+        result = getattr(dpnp, func)(ib, ia)
+        expected = getattr(numpy, func)(b, a)
+        assert_dtype_allclose(result, expected)
+
+        out_shape = shape[:-2] + (2 * s1,)
+        iOUT = dpnp.empty(out_shape, dtype=result.dtype)
+        iout = iOUT[..., ::incy]
+        result = getattr(dpnp, func)(ib, ia, out=iout)
+        assert result is iout
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize(
+        "order1, order2, out_order",
+        [
+            ("C", "C", "C"),
+            ("C", "C", "F"),
+            ("C", "F", "C"),
+            ("C", "F", "F"),
+            ("F", "C", "C"),
+            ("F", "C", "F"),
+            ("F", "F", "F"),
+            ("F", "F", "C"),
+        ],
+    )
+    @pytest.mark.parametrize(
+        "dtype", get_all_dtypes(no_none=True, no_bool=True)
+    )
+    def test_out1(self, order1, order2, out_order, dtype):
+        # test gemm with out keyword
+        a1 = numpy.arange(20, dtype=dtype).reshape(5, 4, order=order1)
+        a2 = numpy.arange(28, dtype=dtype).reshape(4, 7, order=order2)
+        b1, b2 = dpnp.array(a1), dpnp.array(a2)
+
+        iout = dpnp.empty((5, 7), dtype=dtype, order=out_order)
+        result = dpnp.matmul(b1, b2, out=iout)
+        assert result is iout
+
+        out = numpy.empty((5, 7), dtype=dtype, order=out_order)
+        expected = numpy.matmul(a1, a2, out=out)
+        assert result.flags.c_contiguous == expected.flags.c_contiguous
+        assert result.flags.f_contiguous == expected.flags.f_contiguous
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("trans", [True, False])
+    @pytest.mark.parametrize(
+        "dtype", get_all_dtypes(no_none=True, no_bool=True)
+    )
+    def test_out2(self, trans, dtype):
+        # test gemm_batch with out keyword
+        # the base of input arrays is c-contiguous
+        # the base of output array is c-contiguous or f-contiguous
+        a = numpy.arange(24, dtype=dtype).reshape(2, 3, 4)
+        b = numpy.arange(40, dtype=dtype).reshape(2, 4, 5)
+        ia, ib = dpnp.array(a), dpnp.array(b)
+
+        if trans:
+            iout = dpnp.empty((2, 5, 3), dtype=dtype).transpose(0, 2, 1)
+            out = numpy.empty((2, 5, 3), dtype=dtype).transpose(0, 2, 1)
+        else:
+            iout = dpnp.empty((2, 3, 5), dtype=dtype)
+            out = numpy.empty((2, 3, 5), dtype=dtype)
+
+        result = dpnp.matmul(ia, ib, out=iout)
+        assert result is iout
+
+        expected = numpy.matmul(a, b, out=out)
+        assert result.flags.c_contiguous == expected.flags.c_contiguous
+        assert result.flags.f_contiguous == expected.flags.f_contiguous
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("trans", [True, False])
+    @pytest.mark.parametrize(
+        "dtype", get_all_dtypes(no_none=True, no_bool=True)
+    )
+    def test_out3(self, trans, dtype):
+        # test gemm_batch with out keyword
+        # the base of input arrays is f-contiguous
+        # the base of output array is c-contiguous or f-contiguous
+        a = numpy.arange(24, dtype=dtype).reshape(2, 4, 3)
+        b = numpy.arange(40, dtype=dtype).reshape(2, 5, 4)
+        ia, ib = dpnp.array(a), dpnp.array(b)
+
+        a = numpy.asarray(a).transpose(0, 2, 1)
+        b = numpy.asarray(b).transpose(0, 2, 1)
+        ia = ia.transpose(0, 2, 1)
+        ib = ib.transpose(0, 2, 1)
+
+        if trans:
+            iout = dpnp.empty((2, 5, 3), dtype=dtype).transpose(0, 2, 1)
+            out = numpy.empty((2, 5, 3), dtype=dtype).transpose(0, 2, 1)
+        else:
+            iout = dpnp.empty((2, 3, 5), dtype=dtype)
+            out = numpy.empty((2, 3, 5), dtype=dtype)
+
+        result = dpnp.matmul(ia, ib, out=iout)
+        assert result is iout
+
+        expected = numpy.matmul(a, b, out=out)
+        assert result.flags.c_contiguous == expected.flags.c_contiguous
+        assert result.flags.f_contiguous == expected.flags.f_contiguous
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize(
+        "out_shape",
+        [
+            ((4, 5)),
+            ((6,)),
+            ((4, 7, 2)),
+        ],
+    )
+    def test_out_0D(self, out_shape):
+        # for matmul of 1-D arrays, output is 0-D and if out keyword is given
+        # NumPy repeats the data to match the shape of output array
+        a = numpy.arange(3)
+        ia = dpnp.asarray(a)
+
+        numpy_out = numpy.empty(out_shape)
+        iout = dpnp.empty(out_shape)
+        result = dpnp.matmul(ia, ia, out=iout)
+        expected = numpy.matmul(a, a, out=numpy_out)
+        assert result is iout
+        assert_dtype_allclose(result, expected)
+
+    @testing.slow
+    @pytest.mark.parametrize(
+        "shape1, shape2",
+        [
+            ((5000, 5000, 2, 2), (5000, 5000, 2, 2)),
+            ((2, 2), (5000, 5000, 2, 2)),
+            ((5000, 5000, 2, 2), (2, 2)),
+        ],
+    )
+    def test_large(self, shape1, shape2):
+        a = generate_random_numpy_array(shape1)
+        b = generate_random_numpy_array(shape2)
+        ia, ib = dpnp.array(a), dpnp.array(b)
+
+        result = dpnp.matmul(ia, ib)
+        expected = numpy.matmul(a, b)
+        assert_dtype_allclose(result, expected, factor=24)
+
+    @testing.with_requires("numpy>=2.0")
+    def test_linalg_matmul(self):
+        a = numpy.ones((3, 4))
+        b = numpy.ones((4, 5))
+        ia, ib = dpnp.array(a), dpnp.array(b)
+
+        result = dpnp.linalg.matmul(ia, ib)
+        expected = numpy.linalg.matmul(a, b)
+        assert_array_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "sh1, sh2",
+        [
+            ((2, 3, 3), (2, 3, 3)),
+            ((3, 3, 3, 3), (3, 3, 3, 3)),
+        ],
+        ids=["gemm", "gemm_batch"],
+    )
+    def test_matmul_with_offsets(self, sh1, sh2):
+        a = generate_random_numpy_array(sh1)
+        b = generate_random_numpy_array(sh2)
+        ia, ib = dpnp.array(a), dpnp.array(b)
+
+        result = ia[1] @ ib[1]
+        expected = a[1] @ b[1]
+        assert_dtype_allclose(result, expected)
+
+
+class TestMatmulInplace:
+    ALL_DTYPES = get_all_dtypes(no_none=True)
+    DTYPES = {}
+    for i in ALL_DTYPES:
+        for j in ALL_DTYPES:
+            if numpy.can_cast(j, i):
+                DTYPES[f"{i}-{j}"] = (i, j)
+
+    @pytest.mark.parametrize("dtype1, dtype2", DTYPES.values())
+    def test_basic(self, dtype1, dtype2):
+        a = numpy.arange(10).reshape(5, 2).astype(dtype1)
+        b = numpy.ones((2, 2), dtype=dtype2)
+        ia, ib = dpnp.array(a), dpnp.array(b)
+        ia_id = id(ia)
+
+        a @= b
+        ia @= ib
+        assert id(ia) == ia_id
+        assert_dtype_allclose(ia, a)
+
+    @pytest.mark.parametrize(
+        "a_sh, b_sh",
+        [
+            pytest.param((10**5, 10), (10, 10), id="2d_large"),
+            pytest.param((10**4, 10, 10), (1, 10, 10), id="3d_large"),
+            pytest.param((3,), (3,), id="1d"),
+            pytest.param((3, 3), (3,), id="2d_1d"),
+            pytest.param((3,), (3, 3), id="1d_2d"),
+            pytest.param((3, 3), (3, 1), id="2d_broadcast"),
+            pytest.param((1, 3), (3, 3), id="2d_broadcast_reverse"),
+            pytest.param((3, 3, 3), (1, 3, 1), id="3d_broadcast1"),
+            pytest.param((3, 3, 3), (1, 3, 3), id="3d_broadcast2"),
+            pytest.param((3, 3, 3), (3, 3, 1), id="3d_broadcast3"),
+            pytest.param((1, 3, 3), (3, 3, 3), id="3d_broadcast_reverse1"),
+            pytest.param((3, 1, 3), (3, 3, 3), id="3d_broadcast_reverse2"),
+            pytest.param((1, 1, 3), (3, 3, 3), id="3d_broadcast_reverse3"),
+        ],
+    )
+    def test_shapes(self, a_sh, b_sh):
+        a_sz, b_sz = numpy.prod(a_sh), numpy.prod(b_sh)
+        a = numpy.arange(a_sz).reshape(a_sh).astype(numpy.float64)
+        b = numpy.arange(b_sz).reshape(b_sh)
+
+        ia, ib = dpnp.array(a), dpnp.array(b)
+        ia_id = id(ia)
+
+        expected = a @ b
+        if expected.shape != a_sh:
+            if len(b_sh) == 1:
+                # check the exception matches NumPy
+                match = "inplace matrix multiplication requires"
+            else:
+                match = None
+
+            with pytest.raises(ValueError, match=match):
+                a @= b
+
+            with pytest.raises(ValueError, match=match):
+                ia @= ib
+        else:
+            ia @= ib
+            assert id(ia) == ia_id
+            assert_dtype_allclose(ia, expected)
+
+
+class TestMatmulInvalidCases:
+    @pytest.mark.parametrize("xp", [numpy, dpnp])
+    @pytest.mark.parametrize(
+        "shape1, shape2",
+        [
+            ((3, 2), ()),
+            ((), (3, 2)),
+            ((), ()),
+        ],
+    )
+    def test_zero_dim(self, xp, shape1, shape2):
+        a = xp.arange(numpy.prod(shape1), dtype=xp.float32).reshape(shape1)
+        b = xp.arange(numpy.prod(shape2), dtype=xp.float32).reshape(shape2)
+
+        assert_raises(ValueError, xp.matmul, a, b)
+
+    @pytest.mark.parametrize("xp", [numpy, dpnp])
+    @pytest.mark.parametrize(
+        "shape1, shape2",
+        [
+            ((3,), (4,)),
+            ((2, 3), (4, 5)),
+            ((2, 4), (3, 5)),
+            ((2, 3), (4,)),
+            ((3,), (4, 5)),
+            ((2, 2, 3), (2, 4, 5)),
+            ((3, 2, 3), (2, 4, 5)),
+            ((4, 3, 2), (6, 5, 2, 4)),
+            ((6, 5, 3, 2), (3, 2, 4)),
+        ],
+    )
+    def test_invalid_shape(self, xp, shape1, shape2):
+        a = xp.arange(numpy.prod(shape1), dtype=xp.float32).reshape(shape1)
+        b = xp.arange(numpy.prod(shape2), dtype=xp.float32).reshape(shape2)
+
+        assert_raises(ValueError, xp.matmul, a, b)
+
+    @pytest.mark.parametrize("xp", [numpy, dpnp])
+    @pytest.mark.parametrize(
+        "shape1, shape2, out_shape",
+        [
+            ((5, 4, 3), (3, 1), (3, 4, 1)),
+            ((5, 4, 3), (3, 1), (5, 6, 1)),
+            ((5, 4, 3), (3, 1), (5, 4, 2)),
+            ((5, 4, 3), (3, 1), (4, 1)),
+            ((5, 4, 3), (3,), (5, 3)),
+            ((5, 4, 3), (3,), (6, 4)),
+            ((4,), (3, 4, 5), (4, 5)),
+            ((4,), (3, 4, 5), (3, 6)),
+        ],
+    )
+    def test_invalid_shape_out(self, xp, shape1, shape2, out_shape):
+        a = xp.arange(numpy.prod(shape1), dtype=xp.float32).reshape(shape1)
+        b = xp.arange(numpy.prod(shape2), dtype=xp.float32).reshape(shape2)
+        out = xp.empty(out_shape)
+
+        assert_raises(ValueError, xp.matmul, a, b, out=out)
+
+    @pytest.mark.parametrize("xp", [numpy, dpnp])
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)[:-2])
+    def test_invalid_dtype(self, xp, dtype):
+        in_dtype = get_all_dtypes(no_none=True)[-1]
+        a = xp.arange(5 * 4, dtype=in_dtype).reshape(5, 4)
+        b = xp.arange(7 * 4, dtype=in_dtype).reshape(4, 7)
+        out = xp.empty((5, 7), dtype=dtype)
+
+        assert_raises(TypeError, xp.matmul, a, b, out=out)
+
+    def test_exe_q(self):
+        a = dpnp.ones((5, 4), sycl_queue=dpctl.SyclQueue())
+        b = dpnp.ones((4, 7), sycl_queue=dpctl.SyclQueue())
+        assert_raises(ValueError, dpnp.matmul, a, b)
+
+        a = dpnp.ones((5, 4))
+        b = dpnp.ones((4, 7))
+        out = dpnp.empty((5, 7), sycl_queue=dpctl.SyclQueue())
+        assert_raises(ExecutionPlacementError, dpnp.matmul, a, b, out=out)
+
+    @pytest.mark.parametrize("xp", [numpy, dpnp])
+    def test_matmul_casting(self, xp):
+        a = xp.arange(2 * 4, dtype=xp.float32).reshape(2, 4)
+        b = xp.arange(4 * 3).reshape(4, 3)
+
+        res = xp.empty((2, 3), dtype=xp.int64)
+        assert_raises(TypeError, xp.matmul, a, b, out=res, casting="safe")
+
+    def test_matmul_not_implemented(self):
+        a = dpnp.arange(2 * 4).reshape(2, 4)
+        b = dpnp.arange(4 * 3).reshape(4, 3)
+
+        assert_raises(NotImplementedError, dpnp.matmul, a, b, subok=False)
+
+        signature = (dpnp.float32, dpnp.float32, dpnp.float32)
+        assert_raises(
+            NotImplementedError, dpnp.matmul, a, b, signature=signature
+        )
+
+    @pytest.mark.parametrize("xp", [numpy, dpnp])
+    def test_invalid_axes(self, xp):
+        a = xp.arange(120).reshape(2, 5, 3, 4)
+        b = xp.arange(120).reshape(4, 2, 5, 3)
+
+        # axes must be a list
+        axes = ((3, 1), (2, 0), (0, 1))
+        assert_raises(TypeError, xp.matmul, a, b, axes=axes)
+
+        # axes must be be a list of three tuples
+        axes = [(3, 1), (2, 0)]
+        assert_raises(ValueError, xp.matmul, a, b, axes=axes)
+
+        # axes item should be a tuple
+        axes = [(3, 1), (2, 0), [0, 1]]
+        assert_raises(TypeError, xp.matmul, a, b, axes=axes)
+
+        # axes item should be a tuple with 2 elements
+        axes = [(3, 1), (2, 0), (0, 1, 2)]
+        assert_raises(AxisError, xp.matmul, a, b, axes=axes)
+
+        # axes must be an integer
+        axes = [(3, 1), (2, 0), (0.0, 1)]
+        assert_raises(TypeError, xp.matmul, a, b, axes=axes)
+
+        # axes item 2 should be an empty tuple
+        a = xp.arange(3)
+        axes = [0, 0, 0]
+        assert_raises(AxisError, xp.matmul, a, a, axes=axes)
+
+        # axes should be a list of three tuples
+        axes = [0, 0]
+        assert_raises(ValueError, xp.matmul, a, a, axes=axes)
+
+        a = xp.arange(3 * 4 * 5).reshape(3, 4, 5)
+        b = xp.arange(3)
+        # list object cannot be interpreted as an integer
+        axes = [(1, 0), (0), [0]]
+        assert_raises(TypeError, xp.matmul, a, b, axes=axes)
+
+        # axes item should be a tuple with a single element, or an integer
+        axes = [(1, 0), (0), (0, 1)]
+        assert_raises(AxisError, xp.matmul, a, b, axes=axes)
+
+
+@testing.with_requires("numpy>=2.2")
+class TestMatvec:
+    def setup_method(self):
+        numpy.random.seed(42)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
+    @pytest.mark.parametrize(
+        "shape1, shape2",
+        [
+            ((3, 4), (4,)),
+            ((2, 3, 4), (4,)),
+            ((3, 4), (2, 4)),
+            ((5, 1, 3, 4), (2, 4)),
+            ((2, 1, 4), (4,)),
+        ],
+    )
+    def test_basic(self, dtype, shape1, shape2):
+        a = generate_random_numpy_array(shape1, dtype)
+        b = generate_random_numpy_array(shape2, dtype)
+        ia, ib = dpnp.array(a), dpnp.array(b)
+
+        result = dpnp.matvec(ia, ib)
+        expected = numpy.matvec(a, b)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize(
+        "axes",
+        [
+            [(-1, -3), (-2,), -3],
+            [(3, 1), 2, (0,)],
+        ],
+    )
+    def test_axes(self, axes):
+        a = generate_random_numpy_array((2, 5, 3, 4))
+        b = generate_random_numpy_array((4, 2, 5, 3))
+        ia, ib = dpnp.array(a), dpnp.array(b)
+
+        result = dpnp.matvec(ia, ib, axes=axes)
+        expected = numpy.matvec(a, b, axes=axes)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("xp", [numpy, dpnp])
+    def test_error(self, xp):
+        a = xp.ones((5,))
+        # first input does not have enough dimensions
+        assert_raises(ValueError, xp.matvec, a, a)
+
+        a = xp.ones((3, 4))
+        b = xp.ones((5,))
+        # core dimensions do not match
+        assert_raises(ValueError, xp.matvec, a, b)
+
+        a = xp.ones((2, 3, 4))
+        b = xp.ones((5, 4))
+        # broadcasting is not possible
+        assert_raises(ValueError, xp.matvec, a, b)
+
+        a = xp.ones((3, 2))
+        b = xp.ones((3, 4))
+        # two distinct core dimensions are needed, axis cannot be used
+        assert_raises(TypeError, xp.matvec, a, b, axis=-2)
+
+
 class TestMultiDot:
     def setup_method(self):
         numpy.random.seed(70)
@@ -981,9 +1882,7 @@ class TestVecdot:
     def setup_method(self):
         numpy.random.seed(42)
 
-    @pytest.mark.parametrize(
-        "dtype", get_all_dtypes(no_none=True, no_complex=True)
-    )
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
     @pytest.mark.parametrize(
         "shape1, shape2",
         [
@@ -1007,8 +1906,7 @@ def setup_method(self):
     def test_basic(self, dtype, shape1, shape2):
         a = generate_random_numpy_array(shape1, dtype)
         b = generate_random_numpy_array(shape2, dtype)
-        ia = dpnp.array(a)
-        ib = dpnp.array(b)
+        ia, ib = dpnp.array(a), dpnp.array(b)
 
         result = dpnp.vecdot(ia, ib)
         expected = numpy.vecdot(a, b)
@@ -1022,19 +1920,17 @@ def test_basic(self, dtype, shape1, shape2):
     def test_axis1(self, axis, shape1, shape2):
         a = generate_random_numpy_array(shape1)
         b = generate_random_numpy_array(shape2)
-        ia = dpnp.array(a)
-        ib = dpnp.array(b)
+        ia, ib = dpnp.array(a), dpnp.array(b)
 
         result = dpnp.vecdot(ia, ib)
         expected = numpy.vecdot(a, b)
         assert_dtype_allclose(result, expected)
 
     def test_axis2(self):
-        # This is a special case, `a`` cannot be broadcast_to `b`
+        # This is a special case, `a` cannot be broadcast_to `b`
         a = numpy.arange(4).reshape(1, 4)
         b = numpy.arange(60).reshape(3, 4, 5)
-        ia = dpnp.array(a)
-        ib = dpnp.array(b)
+        ia, ib = dpnp.array(a), dpnp.array(b)
 
         result = dpnp.vecdot(ia, ib, axis=1)
         expected = numpy.vecdot(a, b, axis=1)
@@ -1046,7 +1942,7 @@ def test_axis2(self):
             [(1,), (1,), ()],
             [(0), (0), ()],
             [0, 1, ()],
-            [-2, -1, ()],
+            [-2, -1],
         ],
     )
     def test_axes(self, axes):
@@ -1057,9 +1953,9 @@ def test_axes(self, axes):
         expected = numpy.vecdot(a, a, axes=axes)
         assert_dtype_allclose(result, expected)
 
-        out = dpnp.empty((5, 5), dtype=ia.dtype)
-        result = dpnp.vecdot(ia, ia, axes=axes, out=out)
-        assert out is result
+        iout = dpnp.empty((5, 5), dtype=ia.dtype)
+        result = dpnp.vecdot(ia, ia, axes=axes, out=iout)
+        assert iout is result
         assert_dtype_allclose(result, expected)
 
     @pytest.mark.parametrize(
@@ -1073,22 +1969,20 @@ def test_axes(self, axes):
     def test_axes_out_1D(self, axes, b_shape):
         a = numpy.arange(60).reshape(4, 3, 5)
         b = numpy.arange(3).reshape(b_shape)
-        ia = dpnp.array(a)
-        ib = dpnp.array(b)
+        ia, ib = dpnp.array(a), dpnp.array(b)
 
-        out_dp = dpnp.empty((4, 5))
-        out_np = numpy.empty((4, 5))
-        result = dpnp.vecdot(ia, ib, axes=axes, out=out_dp)
-        assert result is out_dp
-        expected = numpy.vecdot(a, b, axes=axes, out=out_np)
+        iout = dpnp.empty((4, 5))
+        out = numpy.empty((4, 5))
+        result = dpnp.vecdot(ia, ib, axes=axes, out=iout)
+        assert result is iout
+        expected = numpy.vecdot(a, b, axes=axes, out=out)
         assert_dtype_allclose(result, expected)
 
     @pytest.mark.parametrize("stride", [2, -1, -2])
     def test_strided(self, stride):
         a = numpy.arange(100).reshape(10, 10)
         b = numpy.arange(100).reshape(10, 10)
-        ia = dpnp.array(a)
-        ib = dpnp.array(b)
+        ia, ib = dpnp.array(a), dpnp.array(b)
 
         result = dpnp.vecdot(ia[::stride, ::stride], ib[::stride, ::stride])
         expected = numpy.vecdot(a[::stride, ::stride], b[::stride, ::stride])
@@ -1099,8 +1993,7 @@ def test_strided(self, stride):
     def test_input_dtype_matrix(self, dtype1, dtype2):
         a = generate_random_numpy_array(10, dtype1)
         b = generate_random_numpy_array(10, dtype2)
-        ia = dpnp.array(a)
-        ib = dpnp.array(b)
+        ia, ib = dpnp.array(a), dpnp.array(b)
 
         result = dpnp.vecdot(ia, ib)
         expected = numpy.vecdot(a, b)
@@ -1117,10 +2010,9 @@ def test_input_dtype_matrix(self, dtype1, dtype2):
     def test_order(self, order1, order2, order, shape):
         a = numpy.arange(numpy.prod(shape)).reshape(shape, order=order1)
         b = numpy.arange(numpy.prod(shape)).reshape(shape, order=order2)
-        a_dp = dpnp.asarray(a)
-        b_dp = dpnp.asarray(b)
+        ia, ib = dpnp.array(a), dpnp.array(b)
 
-        result = dpnp.vecdot(a_dp, b_dp, order=order)
+        result = dpnp.vecdot(ia, ib, order=order)
         expected = numpy.vecdot(a, b, order=order)
         assert result.flags.c_contiguous == expected.flags.c_contiguous
         assert result.flags.f_contiguous == expected.flags.f_contiguous
@@ -1133,9 +2025,9 @@ def test_order(self, order1, order2, order, shape):
     def test_order_trivial(self, order, shape):
         # input is both c-contiguous and f-contiguous
         a = numpy.ones(shape)
-        a_dp = dpnp.asarray(a)
+        ia = dpnp.asarray(a)
 
-        result = dpnp.vecdot(a_dp, a_dp, order=order)
+        result = dpnp.vecdot(ia, ia, order=order)
         expected = numpy.vecdot(a, a, order=order)
         if shape == (2, 4, 0) and order == "A":
             # NumPy does not behave correctly for this case, for order="A",
@@ -1161,18 +2053,16 @@ def test_order_trivial(self, order, shape):
         ],
     )
     def test_out_order(self, order1, order2, out_order):
-        a1 = numpy.arange(20).reshape(5, 4, order=order1)
-        a2 = numpy.arange(20).reshape(5, 4, order=order2)
-
-        b1 = dpnp.asarray(a1)
-        b2 = dpnp.asarray(a2)
+        a = numpy.arange(20).reshape(5, 4, order=order1)
+        b = numpy.arange(20).reshape(5, 4, order=order2)
+        ia, ib = dpnp.array(a), dpnp.array(b)
 
-        dpnp_out = dpnp.empty(5, order=out_order)
-        result = dpnp.vecdot(b1, b2, out=dpnp_out)
-        assert result is dpnp_out
+        iout = dpnp.empty(5, order=out_order)
+        result = dpnp.vecdot(ia, ib, out=iout)
+        assert result is iout
 
         out = numpy.empty(5, order=out_order)
-        expected = numpy.vecdot(a1, a2, out=out)
+        expected = numpy.vecdot(a, b, out=out)
         assert result.flags.c_contiguous == expected.flags.c_contiguous
         assert result.flags.f_contiguous == expected.flags.f_contiguous
         assert_dtype_allclose(result, expected)
@@ -1191,18 +2081,18 @@ def test_out_order(self, order1, order2, out_order):
     )
     def test_out_dtype(self, dtype1, dtype2, shape1, shape2):
         a = numpy.ones(shape1, dtype=dtype1)
-        b = dpnp.asarray(a)
+        ia = dpnp.array(a)
 
-        out_np = numpy.empty(shape2, dtype=dtype2)
-        out_dp = dpnp.asarray(out_np)
+        out = numpy.empty(shape2, dtype=dtype2)
+        iout = dpnp.array(out)
 
         if dpnp.can_cast(dtype1, dtype2, casting="same_kind"):
-            result = dpnp.vecdot(b, b, out=out_dp)
-            expected = numpy.vecdot(a, a, out=out_np)
+            result = dpnp.vecdot(ia, ia, out=iout)
+            expected = numpy.vecdot(a, a, out=out)
             assert_dtype_allclose(result, expected)
         else:
-            with pytest.raises(TypeError):
-                dpnp.vecdot(b, b, out=out_dp)
+            assert_raises(TypeError, dpnp.vecdot, ia, ia, out=iout)
+            assert_raises(TypeError, numpy.vecdot, a, a, out=iout)
 
     @pytest.mark.parametrize(
         "out_shape",
@@ -1216,52 +2106,106 @@ def test_out_0D(self, out_shape):
         # for vecdot of 1-D arrays, output is 0-D and if out keyword is given
         # NumPy repeats the data to match the shape of output array
         a = numpy.arange(3)
-        b = dpnp.asarray(a)
+        ia = dpnp.array(a)
 
-        out_np = numpy.empty(out_shape)
-        out_dp = dpnp.empty(out_shape)
-        result = dpnp.vecdot(b, b, out=out_dp)
-        expected = numpy.vecdot(a, a, out=out_np)
-        assert result is out_dp
+        out = numpy.empty(out_shape)
+        iout = dpnp.array(out)
+        result = dpnp.vecdot(ia, ia, out=iout)
+        expected = numpy.vecdot(a, a, out=out)
+        assert result is iout
         assert_dtype_allclose(result, expected)
 
     @pytest.mark.parametrize("axis", [0, 1, 2, -1, -2, -3])
     def test_linalg(self, axis):
         a = generate_random_numpy_array(4)
         b = generate_random_numpy_array((4, 4, 4))
-        ia = dpnp.array(a)
-        ib = dpnp.array(b)
+        ia, ib = dpnp.array(a), dpnp.array(b)
 
         result = dpnp.linalg.vecdot(ia, ib)
         expected = numpy.linalg.vecdot(a, b)
         assert_dtype_allclose(result, expected)
 
-    def test_error(self):
-        a = dpnp.ones((5, 4))
-        b = dpnp.ones((5, 5))
+    @pytest.mark.parametrize("xp", [numpy, dpnp])
+    def test_error(self, xp):
+        a = xp.ones((5, 4))
+        b = xp.ones((5, 5))
         # core dimension differs
-        assert_raises(ValueError, dpnp.vecdot, a, b)
+        assert_raises(ValueError, xp.vecdot, a, b)
 
-        a = dpnp.ones((5, 4))
-        b = dpnp.ones((3, 4))
-        # input array not compatible
-        assert_raises(ValueError, dpnp.vecdot, a, b)
+        a = xp.ones((5, 4))
+        b = xp.ones((3, 4))
+        # input arrays are not compatible
+        assert_raises(ValueError, xp.vecdot, a, b)
 
-        a = dpnp.ones((3, 4))
-        b = dpnp.ones((3, 4))
-        c = dpnp.empty((5,))
+        a = xp.ones((3, 4))
+        b = xp.ones((3, 4))
+        c = xp.empty((5,))
         # out array shape is incorrect
-        assert_raises(ValueError, dpnp.vecdot, a, b, out=c)
+        assert_raises(ValueError, xp.vecdot, a, b, out=c)
 
         # both axes and axis cannot be specified
-        a = dpnp.ones((5, 5))
-        assert_raises(TypeError, dpnp.vecdot, a, a, axes=[0, 0, ()], axis=-1)
+        a = xp.ones((5, 5))
+        assert_raises(TypeError, xp.vecdot, a, a, axes=[0, 0, ()], axis=-1)
 
-        # subok keyword is not supported
-        assert_raises(NotImplementedError, dpnp.vecdot, a, a, subok=False)
+        # axes should be a list of three tuples
+        a = xp.ones(5)
+        axes = [0, 0, 0, 0]
+        assert_raises(ValueError, xp.vecdot, a, a, axes=axes)
 
-        # signature keyword is not supported
-        signature = (dpnp.float32, dpnp.float32, dpnp.float32)
-        assert_raises(
-            NotImplementedError, dpnp.vecdot, a, a, signature=signature
-        )
+
+@testing.with_requires("numpy>=2.2")
+class TestVecmat:
+    def setup_method(self):
+        numpy.random.seed(42)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
+    @pytest.mark.parametrize(
+        "shape1, shape2",
+        [
+            ((3,), (3, 4)),
+            ((3,), (2, 3, 4)),
+            ((2, 3), (3, 4)),
+            ((2, 3), (5, 1, 3, 4)),
+            ((3,), (2, 3, 1)),
+        ],
+    )
+    def test_basic(self, dtype, shape1, shape2):
+        a = generate_random_numpy_array(shape1, dtype)
+        b = generate_random_numpy_array(shape2, dtype)
+        ia, ib = dpnp.array(a), dpnp.array(b)
+
+        result = dpnp.vecmat(ia, ib)
+        expected = numpy.vecmat(a, b)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize(
+        "axes",
+        [
+            [-2, (-1, -3), (-3,)],
+            [(2,), (3, 1), 0],
+        ],
+    )
+    def test_axes(self, axes):
+        a = generate_random_numpy_array((2, 4, 3, 5))
+        b = generate_random_numpy_array((4, 2, 5, 3))
+        ia, ib = dpnp.array(a), dpnp.array(b)
+
+        result = dpnp.vecmat(ia, ib, axes=axes)
+        expected = numpy.vecmat(a, b, axes=axes)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("xp", [numpy, dpnp])
+    def test_error(self, xp):
+        a = xp.ones((5,))
+        # second input does not have enough dimensions
+        assert_raises(ValueError, xp.vecmat, a, a)
+
+        a = xp.ones((4,))
+        b = xp.ones((3, 5))
+        # core dimensions do not match
+        assert_raises(ValueError, xp.vecmat, a, b)
+
+        a = xp.ones((3, 4))
+        b = xp.ones((2, 4, 5))
+        # broadcasting is not possible
+        assert_raises(ValueError, xp.vecmat, a, b)
diff --git a/dpnp/tests/test_sycl_queue.py b/dpnp/tests/test_sycl_queue.py
index 08b7c3668126..c1041760823d 100644
--- a/dpnp/tests/test_sycl_queue.py
+++ b/dpnp/tests/test_sycl_queue.py
@@ -1011,7 +1011,7 @@ def test_2in_1out_diff_queue_but_equal_context(func, device):
     ids=[device.filter_string for device in valid_devices],
 )
 @pytest.mark.parametrize(
-    "shape_pair",
+    "shape1, shape2",
     [
         ((2, 4), (4,)),
         ((4,), (4, 3)),
@@ -1035,21 +1035,39 @@ def test_2in_1out_diff_queue_but_equal_context(func, device):
         "((6, 7, 4, 3), (6, 7, 3, 5))",
     ],
 )
-def test_matmul(device, shape_pair):
-    shape1, shape2 = shape_pair
-    a1 = numpy.arange(numpy.prod(shape1)).reshape(shape1)
-    a2 = numpy.arange(numpy.prod(shape2)).reshape(shape2)
+def test_matmul(device, shape1, shape2):
+    a = dpnp.arange(numpy.prod(shape1), device=device).reshape(shape1)
+    b = dpnp.arange(numpy.prod(shape2), device=device).reshape(shape2)
+    result = dpnp.matmul(a, b)
 
-    b1 = dpnp.asarray(a1, device=device)
-    b2 = dpnp.asarray(a2, device=device)
+    result_queue = result.sycl_queue
+    assert_sycl_queue_equal(result_queue, a.sycl_queue)
+    assert_sycl_queue_equal(result_queue, b.sycl_queue)
 
-    result = dpnp.matmul(b1, b2)
-    expected = numpy.matmul(a1, a2)
-    assert_allclose(expected, result)
+
+@pytest.mark.parametrize(
+    "device",
+    valid_devices,
+    ids=[device.filter_string for device in valid_devices],
+)
+@pytest.mark.parametrize(
+    "shape1, shape2",
+    [
+        ((3, 4), (4,)),
+        ((2, 3, 4), (4,)),
+        ((3, 4), (2, 4)),
+        ((5, 1, 3, 4), (2, 4)),
+        ((2, 1, 4), (4,)),
+    ],
+)
+def test_matvec(device, shape1, shape2):
+    a = dpnp.arange(numpy.prod(shape1), device=device).reshape(shape1)
+    b = dpnp.arange(numpy.prod(shape2), device=device).reshape(shape2)
+    result = dpnp.matvec(a, b)
 
     result_queue = result.sycl_queue
-    assert_sycl_queue_equal(result_queue, b1.sycl_queue)
-    assert_sycl_queue_equal(result_queue, b2.sycl_queue)
+    assert_sycl_queue_equal(result_queue, a.sycl_queue)
+    assert_sycl_queue_equal(result_queue, b.sycl_queue)
 
 
 @pytest.mark.parametrize(
@@ -1058,7 +1076,7 @@ def test_matmul(device, shape_pair):
     ids=[device.filter_string for device in valid_devices],
 )
 @pytest.mark.parametrize(
-    "shape_pair",
+    "shape1, shape2",
     [
         ((4,), (4,)),  # call_flag: dot
         ((3, 1), (3, 1)),
@@ -1067,18 +1085,39 @@ def test_matmul(device, shape_pair):
         ((3, 4), (3, 4)),  # call_flag: vecdot
     ],
 )
-def test_vecdot(device, shape_pair):
-    shape1, shape2 = shape_pair
-    a1 = numpy.arange(numpy.prod(shape1)).reshape(shape1)
-    a2 = numpy.arange(numpy.prod(shape2)).reshape(shape2)
+def test_vecdot(device, shape1, shape2):
+    a = dpnp.arange(numpy.prod(shape1), device=device).reshape(shape1)
+    b = dpnp.arange(numpy.prod(shape2), device=device).reshape(shape2)
+    result = dpnp.vecdot(a, b)
 
-    b1 = dpnp.asarray(a1, device=device)
-    b2 = dpnp.asarray(a2, device=device)
+    result_queue = result.sycl_queue
+    assert_sycl_queue_equal(result_queue, a.sycl_queue)
+    assert_sycl_queue_equal(result_queue, b.sycl_queue)
+
+
+@pytest.mark.parametrize(
+    "device",
+    valid_devices,
+    ids=[device.filter_string for device in valid_devices],
+)
+@pytest.mark.parametrize(
+    "shape1, shape2",
+    [
+        ((3,), (3, 4)),
+        ((3,), (2, 3, 4)),
+        ((2, 3), (3, 4)),
+        ((2, 3), (5, 1, 3, 4)),
+        ((3,), (2, 3, 1)),
+    ],
+)
+def test_vecmat(device, shape1, shape2):
+    a = dpnp.arange(numpy.prod(shape1), device=device).reshape(shape1)
+    b = dpnp.arange(numpy.prod(shape2), device=device).reshape(shape2)
+    result = dpnp.vecmat(a, b)
 
-    result = dpnp.vecdot(b1, b2)
     result_queue = result.sycl_queue
-    assert_sycl_queue_equal(result_queue, b1.sycl_queue)
-    assert_sycl_queue_equal(result_queue, b2.sycl_queue)
+    assert_sycl_queue_equal(result_queue, a.sycl_queue)
+    assert_sycl_queue_equal(result_queue, b.sycl_queue)
 
 
 @pytest.mark.parametrize(
diff --git a/dpnp/tests/test_umath.py b/dpnp/tests/test_umath.py
index 65b7908b09f5..5c74df6d5ed9 100644
--- a/dpnp/tests/test_umath.py
+++ b/dpnp/tests/test_umath.py
@@ -75,8 +75,6 @@ def get_id(val):
 # implement missing umaths and to remove the list
 new_umaths_numpy_20 = [
     "bitwise_count",  # SAT-7323
-    "matvec",  # SAT-7615
-    "vecmat",  # SAT-7615
 ]
 
 
@@ -88,7 +86,7 @@ def test_umaths(test_cases):
     if umath in new_umaths_numpy_20:
         pytest.skip("new umaths from numpy 2.0 are not supported yet")
 
-    if umath == "matmul":
+    if umath in ["matmul", "matvec", "vecmat"]:
         sh = (4, 4)
     elif umath in ["power", "pow"]:
         sh = (2, 3)
diff --git a/dpnp/tests/test_usm_type.py b/dpnp/tests/test_usm_type.py
index 800cbc519537..f37e55932155 100644
--- a/dpnp/tests/test_usm_type.py
+++ b/dpnp/tests/test_usm_type.py
@@ -461,7 +461,7 @@ def test_coerced_usm_types_bitwise_op(op, usm_type_x, usm_type_y):
 @pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
 @pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
 @pytest.mark.parametrize(
-    "shape_pair",
+    "shape1, shape2",
     [
         ((2, 4), (4,)),
         ((4,), (4, 3)),
@@ -485,13 +485,9 @@ def test_coerced_usm_types_bitwise_op(op, usm_type_x, usm_type_y):
         "((6, 7, 4, 3), (6, 7, 3, 5))",
     ],
 )
-def test_matmul(usm_type_x, usm_type_y, shape_pair):
-    shape1, shape2 = shape_pair
-    x = numpy.arange(numpy.prod(shape1)).reshape(shape1)
-    y = numpy.arange(numpy.prod(shape2)).reshape(shape2)
-
-    x = dp.array(x, usm_type=usm_type_x)
-    y = dp.array(y, usm_type=usm_type_y)
+def test_matmul(usm_type_x, usm_type_y, shape1, shape2):
+    x = dp.arange(numpy.prod(shape1), usm_type=usm_type_x).reshape(shape1)
+    y = dp.arange(numpy.prod(shape2), usm_type=usm_type_y).reshape(shape2)
     z = dp.matmul(x, y)
 
     assert x.usm_type == usm_type_x
@@ -502,7 +498,29 @@ def test_matmul(usm_type_x, usm_type_y, shape_pair):
 @pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
 @pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
 @pytest.mark.parametrize(
-    "shape_pair",
+    "shape1, shape2",
+    [
+        ((3, 4), (4,)),
+        ((2, 3, 4), (4,)),
+        ((3, 4), (2, 4)),
+        ((5, 1, 3, 4), (2, 4)),
+        ((2, 1, 4), (4,)),
+    ],
+)
+def test_matvec(usm_type_x, usm_type_y, shape1, shape2):
+    x = dp.arange(numpy.prod(shape1), usm_type=usm_type_x).reshape(shape1)
+    y = dp.arange(numpy.prod(shape2), usm_type=usm_type_y).reshape(shape2)
+    z = dp.matvec(x, y)
+
+    assert x.usm_type == usm_type_x
+    assert y.usm_type == usm_type_y
+    assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
+
+
+@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize(
+    "shape1, shape2",
     [
         ((4,), (4,)),  # call_flag: dot
         ((3, 1), (3, 1)),
@@ -511,13 +529,9 @@ def test_matmul(usm_type_x, usm_type_y, shape_pair):
         ((3, 4), (3, 4)),  # call_flag: vecdot
     ],
 )
-def test_vecdot(usm_type_x, usm_type_y, shape_pair):
-    shape1, shape2 = shape_pair
-    x = numpy.arange(numpy.prod(shape1)).reshape(shape1)
-    y = numpy.arange(numpy.prod(shape2)).reshape(shape2)
-
-    x = dp.array(x, usm_type=usm_type_x)
-    y = dp.array(y, usm_type=usm_type_y)
+def test_vecdot(usm_type_x, usm_type_y, shape1, shape2):
+    x = dp.arange(numpy.prod(shape1), usm_type=usm_type_x).reshape(shape1)
+    y = dp.arange(numpy.prod(shape2), usm_type=usm_type_y).reshape(shape2)
     z = dp.vecdot(x, y)
 
     assert x.usm_type == usm_type_x
@@ -525,6 +539,28 @@ def test_vecdot(usm_type_x, usm_type_y, shape_pair):
     assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
 
 
+@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize(
+    "shape1, shape2",
+    [
+        ((3,), (3, 4)),
+        ((3,), (2, 3, 4)),
+        ((2, 3), (3, 4)),
+        ((2, 3), (5, 1, 3, 4)),
+        ((3,), (2, 3, 1)),
+    ],
+)
+def test_vecmat(usm_type_x, usm_type_y, shape1, shape2):
+    x = dp.arange(numpy.prod(shape1), usm_type=usm_type_x).reshape(shape1)
+    y = dp.arange(numpy.prod(shape2), usm_type=usm_type_y).reshape(shape2)
+    z = dp.vecmat(x, y)
+
+    assert x.usm_type == usm_type_x
+    assert y.usm_type == usm_type_y
+    assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
+
+
 @pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
 @pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
 def test_meshgrid(usm_type_x, usm_type_y):