IntelPython · AlexanderKalistratov · Jan 24, 2025 · Jan 3, 2025 · Jan 14, 2025 · Jan 20, 2025
@@ -57,6 +57,7 @@
     "digitize",
     "histogram",
     "histogram_bin_edges",
+    "histogram2d",
     "histogramdd",
 ]
 
@@ -138,6 +139,9 @@ def _is_finite(a):
         return numpy.isfinite(a)
 
     if range is not None:
+        if len(range) != 2:
+            raise ValueError("range argument must consist of 2 elements.")
+
         first_edge, last_edge = range
         if first_edge > last_edge:
             raise ValueError("max must be larger than min in range parameter.")
@@ -751,6 +755,155 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None):
     return bin_edges
 
 
+def histogram2d(x, y, bins=10, range=None, density=None, weights=None):
+    """
+    Compute the bi-dimensional histogram of two data samples.
+
+    Parameters
+    ----------
+    x : {dpnp.ndarray, usm_ndarray} of shape (N,)
+        An array containing the `x` coordinates of the points to be
+        histogrammed.
+    y : {dpnp.ndarray, usm_ndarray} of shape (N,)
+        An array containing the `y` coordinates of the points to be
+        histogrammed.
+    bins : {int, dpnp.ndarray, usm_ndarray, [int, int], [array, array], \
+        [int, array], [array, int]}, optional
+
+        The bins specification:
+
+        * If int, the number of bins for the two dimensions (nx=ny=bins).
+        * If array, the bin edges for the two dimensions
+          (x_edges=y_edges=bins).
+        * If [int, int], the number of bins in each dimension
+          (nx, ny = bins).
+        * If [array, array], the bin edges in each dimension
+          (x_edges, y_edges = bins).
+        * A combination [int, array] or [array, int], where int
+          is the number of bins and array is the bin edges.
+
+        Default: ``10``
+    range : {None, dpnp.ndarray, usm_ndarray} of shape (2,2), optional
+        The leftmost and rightmost edges of the bins along each dimension
+        (if not specified explicitly in the `bins` parameters):
+        ``[[xmin, xmax], [ymin, ymax]]``. All values outside of this range
+        will be considered outliers and not tallied in the histogram.
+
+        Default: ``None``
+    density : {None, bool}, optional
+        If ``False`` or ``None``, the default, returns the number of
+        samples in each bin.
+        If ``True``, returns the probability *density* function at the bin,
+        ``bin_count / sample_count / bin_volume``.
+
+        Default: ``None``
+    weights : {None, dpnp.ndarray, usm_ndarray} of shape (N,), optional
+        An array of values ``w_i`` weighing each sample ``(x_i, y_i)``.
+        Weights are normalized to ``1`` if `density` is ``True``.
+        If `density` is ``False``, the values of the returned histogram
+        are equal to the sum of the weights belonging to the samples
+        falling into each bin.
+
+        Default: ``None``
+    Returns
+    -------
+    H : dpnp.ndarray of shape (nx, ny)
+        The bi-dimensional histogram of samples `x` and `y`. Values in `x`
+        are histogrammed along the first dimension and values in `y` are
+        histogrammed along the second dimension.
+    xedges : dpnp.ndarray of shape (nx+1,)
+        The bin edges along the first dimension.
+    yedges : dpnp.ndarray of shape (ny+1,)
+        The bin edges along the second dimension.
+
+    See Also
+    --------
+    :obj:`dpnp.histogram` : 1D histogram
+    :obj:`dpnp.histogramdd` : Multidimensional histogram
+
+    Notes
+    -----
+    When `density` is ``True``, then the returned histogram is the sample
+    density, defined such that the sum over bins of the product
+    ``bin_value * bin_area`` is 1.
+
+    Please note that the histogram does not follow the Cartesian convention
+    where `x` values are on the abscissa and `y` values on the ordinate
+    axis. Rather, `x` is histogrammed along the first dimension of the
+    array (vertical), and `y` along the second dimension of the array
+    (horizontal). This ensures compatibility with `histogramdd`.
+
+    Examples
+    --------
+    >>> import dpnp as np
+    >>> x = np.random.randn(20)
+    >>> y = np.random.randn(20)
+    >>> hist, edges_x, edges_y = np.histogram2d(x, y, bins=(4, 3))
+    >>> hist
+    [[1. 0. 0.]
+     [0. 0. 0.]
+     [5. 6. 4.]
+     [1. 2. 1.]]
+    >>> edges_x
+    [-5.6575713 -3.5574734 -1.4573755  0.6427226  2.74282  ]
+    >>> edges_y
+    [-1.1889046  -0.07263839  1.0436279   2.159894  ]
+    """
+
+    dpnp.check_supported_arrays_type(x, y)
+    if weights is not None:
+        dpnp.check_supported_arrays_type(weights)
+
+    if x.ndim != 1 or y.ndim != 1:
+        raise ValueError(
+            f"x and y must be 1-dimensional arrays."
+            f"Got {x.ndim} and {y.ndim} respectively"
+        )
+
+    if len(x) != len(y):
+        raise ValueError(
+            f"x and y must have the same length."
+            f"Got {len(x)} and {len(y)} respectively"
+        )
+
+    usm_type, exec_q = get_usm_allocations([x, y, bins, range, weights])
+    device = exec_q.sycl_device
+
+    sample_dtype = _result_type_for_device([x.dtype, y.dtype], device)
+
+    # Unlike histogramdd histogram2d accepts 1d bins and
+    # apply it to both dimensions
+    # at the same moment two elements bins should be interpreted as
+    # number of bins in each dimension and array-like bins with one element
+    # is not allowed
+    if isinstance(bins, Iterable) and len(bins) > 2:
+        bins = [bins] * 2
+
+    bins = _histdd_normalize_bins(bins, 2)
+    bins_dtypes = [sample_dtype]
+    bins_dtypes += [b.dtype for b in bins if hasattr(b, "dtype")]
+
+    bins_dtype = _result_type_for_device(bins_dtypes, device)
+    hist_dtype = _histdd_hist_dtype(exec_q, weights)
+
+    supported_types = statistics_ext.histogramdd_dtypes()
+
+    sample_dtype, _ = _align_dtypes(
+        sample_dtype, bins_dtype, hist_dtype, supported_types, device
+    )
+
+    sample = dpnp.empty_like(
+        x, shape=x.shape + (2,), dtype=sample_dtype, usm_type=usm_type
+    )
+    sample[:, 0] = x
+    sample[:, 1] = y
+
+    hist, edges = histogramdd(
+        sample, bins=bins, range=range, density=density, weights=weights
+    )
+    return hist, edges[0], edges[1]
+
+
 def _histdd_validate_bins(bins):
     for i, b in enumerate(bins):
         if numpy.ndim(b) == 0:
@@ -918,7 +1071,7 @@ def _histdd_extract_arrays(sample, weights, bins):
     return all_arrays
 
 
-def histogramdd(sample, bins=10, range=None, weights=None, density=False):
+def histogramdd(sample, bins=10, range=None, density=None, weights=None):
     """
     Compute the multidimensional histogram of some data.
 
@@ -945,6 +1098,13 @@ def histogramdd(sample, bins=10, range=None, weights=None, density=False):
         values being used for the corresponding dimension.
         None is equivalent to passing a tuple of D None values.
 
+        Default: ``None``
+    density : {None, bool}, optional
+        If ``False`` or ``None``, the default, returns the number of
+        samples in each bin.
+        If ``True``, returns the probability *density* function at the bin,
+        ``bin_count / sample_count / bin_volume``.
+
         Default: ``None``
     weights : {dpnp.ndarray, usm_ndarray}, optional
         An (N,)-shaped array of values `w_i` weighing each sample
@@ -954,12 +1114,6 @@ def histogramdd(sample, bins=10, range=None, weights=None, density=False):
         weights belonging to the samples falling into each bin.
 
         Default: ``None``
-    density : bool, optional
-        If ``False``, the default, returns the number of samples in each bin.
-        If ``True``, returns the probability *density* function at the bin,
-        ``bin_count / sample_count / bin_volume``.
-
-        Default: ``False``
 
     Returns
     -------
@@ -993,7 +1147,7 @@ def histogramdd(sample, bins=10, range=None, weights=None, density=False):
     elif sample.ndim > 2:
         raise ValueError("sample must have no more than 2 dimensions")
 
-    ndim = sample.shape[1] if sample.size > 0 else 1
+    ndim = sample.shape[1]
 
     _arrays = _histdd_extract_arrays(sample, weights, bins)
     usm_type, queue = get_usm_allocations(_arrays)