4242
4343import dpctl .utils as dpu
4444import numpy
45- from dpctl .tensor ._type_utils import _can_cast
4645
4746import dpnp
4847
4948# pylint: disable=no-name-in-module
5049import dpnp .backend .extensions .statistics ._statistics_impl as statistics_ext
50+ from dpnp .dpnp_utils .dpnp_utils_common import (
51+ result_type_for_device ,
52+ to_supported_dtypes ,
53+ )
5154
5255# pylint: disable=no-name-in-module
53- from .dpnp_utils import get_usm_allocations , map_dtype_to_device
56+ from .dpnp_utils import get_usm_allocations
5457
5558__all__ = [
5659 "bincount" ,
5760 "digitize" ,
5861 "histogram" ,
5962 "histogram_bin_edges" ,
63+ "histogram2d" ,
6064 "histogramdd" ,
6165]
6266
6569_range = range
6670
6771
68- def _result_type_for_device (dtypes , device ):
69- rt = dpnp .result_type (* dtypes )
70- return map_dtype_to_device (rt , device )
71-
72-
7372def _align_dtypes (a_dtype , bins_dtype , ntype , supported_types , device ):
74- has_fp64 = device .has_aspect_fp64
75- has_fp16 = device .has_aspect_fp16
76-
77- a_bin_dtype = _result_type_for_device ([a_dtype , bins_dtype ], device )
73+ a_bin_dtype = result_type_for_device ([a_dtype , bins_dtype ], device )
7874
7975 # histogram implementation doesn't support uint64 as histogram type
8076 # we can use int64 instead. Result would be correct even in case of overflow
8177 if ntype == numpy .uint64 :
8278 ntype = dpnp .int64
8379
84- if (a_bin_dtype , ntype ) in supported_types :
85- return a_bin_dtype , ntype
86-
87- for sample_type , hist_type in supported_types :
88- if _can_cast (
89- a_bin_dtype , sample_type , has_fp16 , has_fp64
90- ) and _can_cast (ntype , hist_type , has_fp16 , has_fp64 ):
91- return sample_type , hist_type
92-
93- # should not happen
94- return None , None # pragma: no cover
80+ return to_supported_dtypes ([a_bin_dtype , ntype ], supported_types , device )
9581
9682
9783def _ravel_check_a_and_weights (a , weights ):
@@ -138,6 +124,9 @@ def _is_finite(a):
138124 return numpy .isfinite (a )
139125
140126 if range is not None :
127+ if len (range ) != 2 :
128+ raise ValueError ("range argument must consist of 2 elements." )
129+
141130 first_edge , last_edge = range
142131 if first_edge > last_edge :
143132 raise ValueError ("max must be larger than min in range parameter." )
@@ -520,6 +509,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
520509 If `bins` is a sequence, it defines a monotonically increasing array
521510 of bin edges, including the rightmost edge, allowing for non-uniform
522511 bin widths.
512+
523513 Default: ``10``.
524514 range : {None, 2-tuple of float}, optional
525515 The lower and upper range of the bins. If not provided, range is simply
@@ -528,6 +518,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
528518 affects the automatic bin computation as well. While bin width is
529519 computed to be optimal based on the actual data within `range`, the bin
530520 count will fill the entire range including portions containing no data.
521+
531522 Default: ``None``.
532523 density : {None, bool}, optional
533524 If ``False`` or ``None``, the result will contain the number of samples
@@ -536,6 +527,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
536527 the range is ``1``. Note that the sum of the histogram values will not
537528 be equal to ``1`` unless bins of unity width are chosen; it is not
538529 a probability *mass* function.
530+
539531 Default: ``None``.
540532 weights : {None, dpnp.ndarray, usm_ndarray}, optional
541533 An array of weights, of the same shape as `a`. Each value in `a` only
@@ -545,6 +537,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
545537 Please note that the ``dtype`` of `weights` will also become the
546538 ``dtype`` of the returned accumulator (`hist`), so it must be large
547539 enough to hold accumulated values as well.
540+
548541 Default: ``None``.
549542
550543 Returns
@@ -751,6 +744,167 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None):
751744 return bin_edges
752745
753746
747+ def histogram2d (x , y , bins = 10 , range = None , density = None , weights = None ):
748+ """
749+ Compute the bi-dimensional histogram of two data samples.
750+
751+ Parameters
752+ ----------
753+ x : {dpnp.ndarray, usm_ndarray} of shape (N,)
754+ An array containing the `x` coordinates of the points to be
755+ histogrammed.
756+ y : {dpnp.ndarray, usm_ndarray} of shape (N,)
757+ An array containing the `y` coordinates of the points to be
758+ histogrammed.
759+ bins : {int, dpnp.ndarray, usm_ndarray, [int, int], [array, array], \
760+ [int, array], [array, int]}, optional
761+
762+ The bins specification:
763+
764+ * If int, the number of bins for the two dimensions (nx=ny=bins).
765+ * If array, the bin edges for the two dimensions
766+ (x_edges=y_edges=bins).
767+ * If [int, int], the number of bins in each dimension
768+ (nx, ny = bins).
769+ * If [array, array], the bin edges in each dimension
770+ (x_edges, y_edges = bins).
771+ * A combination [int, array] or [array, int], where int
772+ is the number of bins and array is the bin edges.
773+
774+ Default: ``10``.
775+ range : {None, dpnp.ndarray, usm_ndarray} of shape (2,2), optional
776+ The leftmost and rightmost edges of the bins along each dimension
777+ If ``None`` the ranges are
778+ ``[[x.min(), x.max()], [y.min(), y.max()]]``. All values outside
779+ of this range will be considered outliers and not tallied in the
780+ histogram.
781+
782+ Default: ``None``.
783+ density : {None, bool}, optional
784+ If ``False`` or ``None``, the default, returns the number of
785+ samples in each bin.
786+ If ``True``, returns the probability *density* function at the bin,
787+ ``bin_count / sample_count / bin_volume``.
788+
789+ Default: ``None``.
790+ weights : {None, dpnp.ndarray, usm_ndarray} of shape (N,), optional
791+ An array of values ``w_i`` weighing each sample ``(x_i, y_i)``.
792+ Weights are normalized to ``1`` if `density` is ``True``.
793+ If `density` is ``False``, the values of the returned histogram
794+ are equal to the sum of the weights belonging to the samples
795+ falling into each bin.
796+ If ``None`` all samples are assigned a weight of ``1``.
797+
798+ Default: ``None``.
799+ Returns
800+ -------
801+ H : dpnp.ndarray of shape (nx, ny)
802+ The bi-dimensional histogram of samples `x` and `y`. Values in `x`
803+ are histogrammed along the first dimension and values in `y` are
804+ histogrammed along the second dimension.
805+ xedges : dpnp.ndarray of shape (nx+1,)
806+ The bin edges along the first dimension.
807+ yedges : dpnp.ndarray of shape (ny+1,)
808+ The bin edges along the second dimension.
809+
810+ See Also
811+ --------
812+ :obj:`dpnp.histogram` : 1D histogram
813+ :obj:`dpnp.histogramdd` : Multidimensional histogram
814+
815+ Notes
816+ -----
817+ When `density` is ``True``, then the returned histogram is the sample
818+ density, defined such that the sum over bins of the product
819+ ``bin_value * bin_area`` is 1.
820+
821+ Please note that the histogram does not follow the Cartesian convention
822+ where `x` values are on the abscissa and `y` values on the ordinate
823+ axis. Rather, `x` is histogrammed along the first dimension of the
824+ array (vertical), and `y` along the second dimension of the array
825+ (horizontal). This ensures compatibility with `histogramdd`.
826+
827+ Examples
828+ --------
829+ >>> import dpnp as np
830+ >>> x = np.random.randn(20).astype("float32")
831+ >>> y = np.random.randn(20).astype("float32")
832+ >>> hist, edges_x, edges_y = np.histogram2d(x, y, bins=(4, 3))
833+ >>> hist.shape
834+ (4, 3)
835+ >>> hist
836+ array([[1., 2., 0.],
837+ [0., 3., 1.],
838+ [1., 4., 1.],
839+ [1., 3., 3.]], dtype=float32)
840+ >>> edges_x.shape
841+ (5,)
842+ >>> edges_x
843+ array([-1.7516936 , -0.96109843, -0.17050326, 0.62009203, 1.4106871 ],
844+ dtype=float32)
845+ >>> edges_y.shape
846+ (4,)
847+ >>> edges_y
848+ array([-2.6604428 , -0.94615364, 0.76813555, 2.4824247 ], dtype=float32)
849+
850+ Please note, that resulting values of histogram and edges may vary.
851+
852+ """
853+
854+ dpnp .check_supported_arrays_type (x , y )
855+ if weights is not None :
856+ dpnp .check_supported_arrays_type (weights )
857+
858+ if x .ndim != 1 or y .ndim != 1 :
859+ raise ValueError (
860+ f"x and y must be 1-dimensional arrays."
861+ f"Got { x .ndim } and { y .ndim } respectively"
862+ )
863+
864+ if len (x ) != len (y ):
865+ raise ValueError (
866+ f"x and y must have the same length."
867+ f"Got { len (x )} and { len (y )} respectively"
868+ )
869+
870+ usm_type , exec_q = get_usm_allocations ([x , y , bins , range , weights ])
871+ device = exec_q .sycl_device
872+
873+ sample_dtype = result_type_for_device ([x .dtype , y .dtype ], device )
874+
875+ # Unlike histogramdd histogram2d accepts 1d bins and
876+ # apply it to both dimensions
877+ # at the same moment two elements bins should be interpreted as
878+ # number of bins in each dimension and array-like bins with one element
879+ # is not allowed
880+ if isinstance (bins , Iterable ) and len (bins ) > 2 :
881+ bins = [bins ] * 2
882+
883+ bins = _histdd_normalize_bins (bins , 2 )
884+ bins_dtypes = [sample_dtype ]
885+ bins_dtypes += [b .dtype for b in bins if hasattr (b , "dtype" )]
886+
887+ bins_dtype = result_type_for_device (bins_dtypes , device )
888+ hist_dtype = _histdd_hist_dtype (exec_q , weights )
889+
890+ supported_types = statistics_ext .histogramdd_dtypes ()
891+
892+ sample_dtype , _ = _align_dtypes (
893+ sample_dtype , bins_dtype , hist_dtype , supported_types , device
894+ )
895+
896+ sample = dpnp .empty_like (
897+ x , shape = x .shape + (2 ,), dtype = sample_dtype , usm_type = usm_type
898+ )
899+ sample [:, 0 ] = x
900+ sample [:, 1 ] = y
901+
902+ hist , edges = histogramdd (
903+ sample , bins = bins , range = range , density = density , weights = weights
904+ )
905+ return hist , edges [0 ], edges [1 ]
906+
907+
754908def _histdd_validate_bins (bins ):
755909 for i , b in enumerate (bins ):
756910 if numpy .ndim (b ) == 0 :
@@ -873,9 +1027,7 @@ def _histdd_hist_dtype(queue, weights):
8731027 # hist_dtype is either float or complex, so it is ok
8741028 # to calculate it as result type between default_float and
8751029 # weights.dtype
876- hist_dtype = _result_type_for_device (
877- [hist_dtype , weights .dtype ], device
878- )
1030+ hist_dtype = result_type_for_device ([hist_dtype , weights .dtype ], device )
8791031
8801032 return hist_dtype
8811033
@@ -886,7 +1038,7 @@ def _histdd_sample_dtype(queue, sample, bin_edges_list):
8861038 dtypes_ = [bin_edges .dtype for bin_edges in bin_edges_list ]
8871039 dtypes_ .append (sample .dtype )
8881040
889- return _result_type_for_device (dtypes_ , device )
1041+ return result_type_for_device (dtypes_ , device )
8901042
8911043
8921044def _histdd_supported_dtypes (sample , bin_edges_list , weights ):
@@ -918,7 +1070,7 @@ def _histdd_extract_arrays(sample, weights, bins):
9181070 return all_arrays
9191071
9201072
921- def histogramdd (sample , bins = 10 , range = None , weights = None , density = False ):
1073+ def histogramdd (sample , bins = 10 , range = None , density = None , weights = None ):
9221074 """
9231075 Compute the multidimensional histogram of some data.
9241076
@@ -936,30 +1088,33 @@ def histogramdd(sample, bins=10, range=None, weights=None, density=False):
9361088 * The number of bins for each dimension (nx, ny, ... =bins)
9371089 * The number of bins for all dimensions (nx=ny=...=bins).
9381090
939- Default: ``10``
1091+ Default: ``10``.
9401092 range : {None, sequence}, optional
9411093 A sequence of length D, each an optional (lower, upper) tuple giving
9421094 the outer bin edges to be used if the edges are not given explicitly in
9431095 `bins`.
944- An entry of None in the sequence results in the minimum and maximum
1096+ An entry of `` None`` in the sequence results in the minimum and maximum
9451097 values being used for the corresponding dimension.
946- None is equivalent to passing a tuple of D None values.
947-
948- Default: ``None``
949- weights : {dpnp.ndarray, usm_ndarray}, optional
950- An (N,)-shaped array of values `w_i` weighing each sample
951- `(x_i, y_i, z_i, ...)`.
952- Weights are normalized to 1 if density is True. If density is False,
953- the values of the returned histogram are equal to the sum of the
954- weights belonging to the samples falling into each bin.
1098+ ``None`` is equivalent to passing a tuple of D ``None`` values.
9551099
956- Default: ``None``
957- density : bool, optional
958- If ``False``, the default, returns the number of samples in each bin.
1100+ Default: ``None``.
1101+ density : {None, bool}, optional
1102+ If ``False`` or ``None``, the default, returns the number of
1103+ samples in each bin.
9591104 If ``True``, returns the probability *density* function at the bin,
9601105 ``bin_count / sample_count / bin_volume``.
9611106
962- Default: ``False``
1107+ Default: ``None``.
1108+ weights : {None, dpnp.ndarray, usm_ndarray}, optional
1109+ An (N,)-shaped array of values `w_i` weighing each sample
1110+ `(x_i, y_i, z_i, ...)`.
1111+ Weights are normalized to ``1`` if density is ``True``.
1112+ If density is ``False``, the values of the returned histogram
1113+ are equal to the sum of the weights belonging to the samples
1114+ falling into each bin.
1115+ If ``None`` all samples are assigned a weight of ``1``.
1116+
1117+ Default: ``None``.
9631118
9641119 Returns
9651120 -------
@@ -993,7 +1148,7 @@ def histogramdd(sample, bins=10, range=None, weights=None, density=False):
9931148 elif sample .ndim > 2 :
9941149 raise ValueError ("sample must have no more than 2 dimensions" )
9951150
996- ndim = sample .shape [1 ] if sample . size > 0 else 1
1151+ ndim = sample .shape [1 ]
9971152
9981153 _arrays = _histdd_extract_arrays (sample , weights , bins )
9991154 usm_type , queue = get_usm_allocations (_arrays )
0 commit comments