Skip to content

Commit 26cbbdb

Browse files
committed
Complete implementation of dpnp.cov
1 parent b7b2901 commit 26cbbdb

File tree

2 files changed

+198
-68
lines changed

2 files changed

+198
-68
lines changed

dpnp/dpnp_iface_statistics.py

Lines changed: 145 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
to_supported_dtypes,
5555
)
5656

57-
from .dpnp_utils import call_origin, get_usm_allocations
57+
from .dpnp_utils import get_usm_allocations
5858
from .dpnp_utils.dpnp_utils_reduction import dpnp_wrap_reduction_call
5959
from .dpnp_utils.dpnp_utils_statistics import dpnp_cov, dpnp_median
6060

@@ -748,61 +748,167 @@ def cov(
748748
749749
For full documentation refer to :obj:`numpy.cov`.
750750
751+
Parameters
752+
----------
753+
m : {dpnp.ndarray, usm_ndarray}
754+
A 1-D or 2-D array containing multiple variables and observations.
755+
Each row of `m` represents a variable, and each column a single
756+
observation of all those variables. Also see `rowvar` below.
757+
y : {None, dpnp.ndarray, usm_ndarray}, optional
758+
An additional set of variables and observations. `y` has the same form
759+
as that of `m`.
760+
761+
Default: ``None``.
762+
rowvar : bool, optional
763+
If `rowvar` is ``True``, then each row represents a variable, with
764+
observations in the columns. Otherwise, the relationship is transposed:
765+
each column represents a variable, while the rows contain observations.
766+
767+
Default: ``True``.
768+
bias : bool, optional
769+
Default normalization is by ``(N - 1)``, where ``N`` is the number of
770+
observations given (unbiased estimate). If `bias` is ``True``, then
771+
normalization is by ``N``. These values can be overridden by using the
772+
keyword `ddof`.
773+
774+
Default: ``False``.
775+
ddof : {None, int}, optional
776+
If not ``None`` the default value implied by `bias` is overridden. Note
777+
that ``ddof=1`` will return the unbiased estimate, even if both
778+
`fweights` and `aweights` are specified, and ``ddof=0`` will return the
779+
simple average. See the notes for the details.
780+
781+
Default: ``None``.
782+
fweights : {None, dpnp.ndarray, usm_ndarray}, optional
783+
1-D array of integer frequency weights; the number of times each
784+
observation vector should be repeated.
785+
786+
Default: ``None``.
787+
aweights : {None, dpnp.ndarray, usm_ndarray}, optional
788+
1-D array of observation vector weights. These relative weights are
789+
typically large for observations considered "important" and smaller for
790+
observations considered less "important". If ``ddof=0`` the array of
791+
weights can be used to assign probabilities to observation vectors.
792+
793+
Default: ``None``.
794+
dtype : data-type, optional
795+
Data-type of the result. By default, the return data-type will have
796+
at least floating point type based on the capabilities of the device on
797+
which the input arrays reside.
798+
799+
Default: ``None``.
800+
751801
Returns
752802
-------
753803
out : dpnp.ndarray
754804
The covariance matrix of the variables.
755805
756-
Limitations
757-
-----------
758-
Input array ``m`` is supported as :obj:`dpnp.ndarray`.
759-
Dimension of input array ``m`` is limited by ``m.ndim <= 2``.
760-
Size and shape of input arrays are supported to be equal.
761-
Parameter `y` is supported only with default value ``None``.
762-
Parameter `bias` is supported only with default value ``False``.
763-
Parameter `ddof` is supported only with default value ``None``.
764-
Parameter `fweights` is supported only with default value ``None``.
765-
Parameter `aweights` is supported only with default value ``None``.
766-
Otherwise the function will be executed sequentially on CPU.
767-
Input array data types are limited by supported DPNP :ref:`Data types`.
768-
769806
See Also
770807
--------
771-
:obj:`dpnp.corrcoef` : Normalized covariance matrix
808+
:obj:`dpnp.corrcoef` : Normalized covariance matrix.
809+
810+
Notes
811+
-----
812+
Assume that the observations are in the columns of the observation array `m`
813+
and let ``f = fweights`` and ``a = aweights`` for brevity. The steps to
814+
compute the weighted covariance are as follows::
815+
816+
>>> import dpnp as np
817+
>>> m = np.arange(10, dtype=np.float32)
818+
>>> f = np.arange(10) * 2
819+
>>> a = np.arange(10) ** 2.0
820+
>>> ddof = 1
821+
>>> w = f * a
822+
>>> v1 = np.sum(w)
823+
>>> v2 = np.sum(w * a)
824+
>>> m -= np.sum(m * w, axis=None, keepdims=True) / v1
825+
>>> cov = np.dot(m * w, m.T) * v1 / (v1**2 - ddof * v2)
826+
827+
Note that when ``a == 1``, the normalization factor
828+
``v1 / (v1**2 - ddof * v2)`` goes over to ``1 / (np.sum(f) - ddof)``
829+
as it should.
772830
773831
Examples
774832
--------
775833
>>> import dpnp as np
776834
>>> x = np.array([[0, 2], [1, 1], [2, 0]]).T
777-
>>> x.shape
778-
(2, 3)
779-
>>> [i for i in x]
780-
[0, 1, 2, 2, 1, 0]
781-
>>> out = np.cov(x)
782-
>>> out.shape
783-
(2, 2)
784-
>>> [i for i in out]
785-
[1.0, -1.0, -1.0, 1.0]
835+
>>> x
836+
array([[0, 1, 2],
837+
[2, 1, 0]])
838+
839+
Note how :math:`x_0` increases while :math:`x_1` decreases. The covariance
840+
matrix shows this clearly:
841+
842+
>>> np.cov(x)
843+
array([[ 1., -1.],
844+
[-1., 1.]])
845+
846+
Note that element :math:`C_{0,1}`, which shows the correlation between
847+
:math:`x_0` and :math:`x_1`, is negative.
848+
849+
Further, note how `x` and `y` are combined:
850+
851+
>>> x = np.array([-2.1, -1, 4.3])
852+
>>> y = np.array([3, 1.1, 0.12])
853+
>>> X = np.stack((x, y), axis=0)
854+
>>> np.cov(X)
855+
array([[11.71 , -4.286 ], # may vary
856+
[-4.286 , 2.14413333]])
857+
>>> np.cov(x, y)
858+
array([[11.71 , -4.286 ], # may vary
859+
[-4.286 , 2.14413333]])
860+
>>> np.cov(x)
861+
array(11.71)
786862
787863
"""
788864

789-
if not dpnp.is_supported_array_type(m):
790-
pass
791-
elif m.ndim > 2:
792-
pass
793-
elif bias:
794-
pass
795-
elif ddof is not None:
796-
pass
797-
elif fweights is not None:
798-
pass
799-
elif aweights is not None:
800-
pass
865+
arrays = [m]
866+
if y is not None:
867+
arrays.append(y)
868+
dpnp.check_supported_arrays_type(*arrays)
869+
870+
if m.ndim > 2:
871+
raise ValueError("m has more than 2 dimensions")
872+
873+
if y is not None:
874+
if y.ndim > 2:
875+
raise ValueError("y has more than 2 dimensions")
876+
877+
if ddof is not None:
878+
if not isinstance(ddof, int):
879+
raise ValueError("ddof must be integer")
801880
else:
802-
return dpnp_cov(m, y=y, rowvar=rowvar, dtype=dtype)
881+
ddof = 0 if bias else 1
882+
883+
def_float = dpnp.default_float_type(m.sycl_queue)
884+
if dtype is None:
885+
dtype = dpnp.result_type(*arrays, def_float)
886+
887+
if fweights is not None:
888+
dpnp.check_supported_arrays_type(fweights)
889+
if not dpnp.issubdtype(fweights.dtype, numpy.integer):
890+
raise TypeError("fweights must be integer")
891+
892+
if fweights.ndim > 1:
893+
raise ValueError("cannot handle multidimensional fweights")
894+
895+
fweights = dpnp.astype(fweights, dtype=def_float)
896+
897+
if aweights is not None:
898+
dpnp.check_supported_arrays_type(fweights)
899+
if aweights.ndim > 1:
900+
raise ValueError("cannot handle multidimensional aweights")
901+
902+
aweights = dpnp.astype(aweights, dtype=def_float)
803903

804-
return call_origin(
805-
numpy.cov, m, y, rowvar, bias, ddof, fweights, aweights, dtype=dtype
904+
return dpnp_cov(
905+
m,
906+
y=y,
907+
rowvar=rowvar,
908+
ddof=ddof,
909+
dtype=dtype,
910+
fweights=fweights,
911+
aweights=aweights,
806912
)
807913

808914

dpnp/dpnp_utils/dpnp_utils_statistics.py

Lines changed: 53 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232

3333
import dpnp
3434
from dpnp.dpnp_array import dpnp_array
35-
from dpnp.dpnp_utils import get_usm_allocations, map_dtype_to_device
3635

3736
__all__ = ["dpnp_cov", "dpnp_median"]
3837

@@ -119,19 +118,17 @@ def _flatten_array_along_axes(a, axes_to_flatten, overwrite_input):
119118
return a_flatten, overwrite_input
120119

121120

122-
def dpnp_cov(m, y=None, rowvar=True, dtype=None):
121+
def dpnp_cov(
122+
m, y=None, rowvar=True, ddof=1, dtype=None, fweights=None, aweights=None
123+
):
123124
"""
124-
dpnp_cov(m, y=None, rowvar=True, dtype=None)
125-
126125
Estimate a covariance matrix based on passed data.
127-
No support for given weights is provided now.
128126
129-
The implementation is done through existing dpnp and dpctl methods
130-
instead of separate function call of dpnp backend.
127+
The implementation is done through existing dpnp functions.
131128
132129
"""
133130

134-
def _get_2dmin_array(x, dtype):
131+
def _get_2dmin_array(x):
135132
"""
136133
Transform an input array to a form required for building a covariance matrix.
137134
@@ -141,7 +138,7 @@ def _get_2dmin_array(x, dtype):
141138
142139
"""
143140
if x.ndim == 0:
144-
x = x.reshape((1, 1))
141+
x = dpnp.reshape(x, (1, 1))
145142
elif x.ndim == 1:
146143
x = x[dpnp.newaxis, :]
147144

@@ -152,33 +149,60 @@ def _get_2dmin_array(x, dtype):
152149
x = dpnp.astype(x, dtype)
153150
return x
154151

155-
# input arrays must follow CFD paradigm
156-
_, queue = get_usm_allocations((m,) if y is None else (m, y))
152+
x = _get_2dmin_array(m)
153+
if x.shape[0] == 0:
154+
return dpnp.empty_like(
155+
x, shape=(0, 0), dtype=dpnp.default_float_type(m.sycl_queue)
156+
)
157157

158-
# calculate a type of result array if not passed explicitly
159-
if dtype is None:
160-
dtypes = [m.dtype, dpnp.default_float_type(sycl_queue=queue)]
161-
if y is not None:
162-
dtypes.append(y.dtype)
163-
dtype = dpnp.result_type(*dtypes)
164-
# TODO: remove when dpctl.result_type() is returned dtype based on fp64
165-
dtype = map_dtype_to_device(dtype, queue.sycl_device)
166-
167-
X = _get_2dmin_array(m, dtype)
168158
if y is not None:
169-
y = _get_2dmin_array(y, dtype)
159+
y = _get_2dmin_array(y)
160+
x = dpnp.concatenate((x, y), axis=0)
161+
162+
# get the product of frequencies and weights
163+
w = None
164+
if fweights is not None:
165+
if fweights.shape[0] != x.shape[1]:
166+
raise ValueError("incompatible numbers of samples and fweights")
170167

171-
X = dpnp.concatenate((X, y), axis=0)
168+
w = fweights
172169

173-
avg = X.mean(axis=1)
170+
if aweights is not None:
171+
if aweights.shape[0] != x.shape[1]:
172+
raise ValueError("incompatible numbers of samples and aweights")
174173

175-
fact = X.shape[1] - 1
176-
X -= avg[:, None]
174+
if w is None:
175+
w = aweights
176+
else:
177+
w *= aweights
178+
179+
avg, w_sum = dpnp.average(x, axis=1, weights=w, returned=True)
180+
w_sum = w_sum[0]
181+
182+
# determine the normalization
183+
if w is None:
184+
fact = x.shape[1] - ddof
185+
elif ddof == 0:
186+
fact = w_sum
187+
elif aweights is None:
188+
fact = w_sum - ddof
189+
else:
190+
fact = w_sum - ddof * dpnp.sum(w * aweights) / w_sum
177191

178-
c = dpnp.dot(X, X.T.conj())
179-
c *= 1 / fact if fact != 0 else dpnp.nan
192+
if fact <= 0:
193+
warnings.warn(
194+
"Degrees of freedom <= 0 for slice", RuntimeWarning, stacklevel=2
195+
)
196+
fact = 0.0
197+
198+
x -= avg[:, None]
199+
if w is None:
200+
x_t = x.T
201+
else:
202+
x_t = (x * w).T
180203

181-
return dpnp.squeeze(c)
204+
c = dpnp.dot(x, x_t.conj()) / fact
205+
return c.squeeze()
182206

183207

184208
def dpnp_median(

0 commit comments

Comments
 (0)