Skip to content

Commit 948c3ce

Browse files
authored
Implement dpnp.histogram_bin_edges (#1823)
* Implement dpnp.histogram_bin_edges * Properly updated usm_type relating test * Apply review comment * Removed passing sycl_queue keyword in the example * Describe density=None use case in histogram
1 parent e1dcc45 commit 948c3ce

File tree

4 files changed

+168
-12
lines changed

4 files changed

+168
-12
lines changed

dpnp/dpnp_iface_histograms.py

Lines changed: 100 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747

4848
__all__ = [
4949
"histogram",
50+
"histogram_bin_edges",
5051
]
5152

5253
# range is a keyword argument to many functions, so save the builtin so they can
@@ -219,34 +220,36 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
219220
Input data. The histogram is computed over the flattened array.
220221
bins : {int, dpnp.ndarray, usm_ndarray, sequence of scalars}, optional
221222
If `bins` is an int, it defines the number of equal-width bins in the
222-
given range (``10``, by default).
223+
given range.
223224
If `bins` is a sequence, it defines a monotonically increasing array
224225
of bin edges, including the rightmost edge, allowing for non-uniform
225226
bin widths.
226-
If `bins` is a string, it defines the method used to calculate the
227-
optimal bin width, as defined by :obj:`dpnp.histogram_bin_edges`.
228-
range : {2-tuple of float}, optional
227+
Default: ``10``.
228+
range : {None, 2-tuple of float}, optional
229229
The lower and upper range of the bins. If not provided, range is simply
230230
``(a.min(), a.max())``. Values outside the range are ignored. The first
231231
element of the range must be less than or equal to the second. `range`
232232
affects the automatic bin computation as well. While bin width is
233233
computed to be optimal based on the actual data within `range`, the bin
234234
count will fill the entire range including portions containing no data.
235-
weights : {dpnp.ndarray, usm_ndarray}, optional
235+
Default: ``None``.
236+
density : {None, bool}, optional
237+
If ``False`` or ``None``, the result will contain the number of samples
238+
in each bin. If ``True``, the result is the value of the probability
239+
*density* function at the bin, normalized such that the *integral* over
240+
the range is ``1``. Note that the sum of the histogram values will not
241+
be equal to ``1`` unless bins of unity width are chosen; it is not
242+
a probability *mass* function.
243+
Default: ``None``.
244+
weights : {None, dpnp.ndarray, usm_ndarray}, optional
236245
An array of weights, of the same shape as `a`. Each value in `a` only
237246
contributes its associated weight towards the bin count (instead of 1).
238247
If `density` is ``True``, the weights are normalized, so that the
239248
integral of the density over the range remains ``1``.
240249
Please note that the ``dtype`` of `weights` will also become the
241250
``dtype`` of the returned accumulator (`hist`), so it must be large
242251
enough to hold accumulated values as well.
243-
density : {bool}, optional
244-
If ``False``, the result will contain the number of samples in each bin.
245-
If ``True``, the result is the value of the probability *density*
246-
function at the bin, normalized such that the *integral* over the range
247-
is ``1``. Note that the sum of the histogram values will not be equal
248-
to ``1`` unless bins of unity width are chosen; it is not a probability
249-
*mass* function.
252+
Default: ``None``.
250253
251254
Returns
252255
-------
@@ -337,3 +340,88 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
337340
return n / db / n.sum(), bin_edges
338341

339342
return n, bin_edges
343+
344+
345+
def histogram_bin_edges(a, bins=10, range=None, weights=None):
346+
"""
347+
Function to calculate only the edges of the bins used by the
348+
:obj:`dpnp.histogram` function.
349+
350+
For full documentation refer to :obj:`numpy.histogram_bin_edges`.
351+
352+
Parameters
353+
----------
354+
a : {dpnp.ndarray, usm_ndarray}
355+
Input data. The histogram is computed over the flattened array.
356+
bins : {int, dpnp.ndarray, usm_ndarray, sequence of scalars}, optional
357+
If `bins` is an int, it defines the number of equal-width bins in the
358+
given range.
359+
If `bins` is a sequence, it defines the bin edges, including the
360+
rightmost edge, allowing for non-uniform bin widths.
361+
Default: ``10``.
362+
range : {None, 2-tuple of float}, optional
363+
The lower and upper range of the bins. If not provided, range is simply
364+
``(a.min(), a.max())``. Values outside the range are ignored. The first
365+
element of the range must be less than or equal to the second. `range`
366+
affects the automatic bin computation as well. While bin width is
367+
computed to be optimal based on the actual data within `range`, the bin
368+
count will fill the entire range including portions containing no data.
369+
Default: ``None``.
370+
weights : {None, dpnp.ndarray, usm_ndarray}, optional
371+
An array of weights, of the same shape as `a`. Each value in `a` only
372+
contributes its associated weight towards the bin count (instead of 1).
373+
This is currently not used by any of the bin estimators, but may be in
374+
the future.
375+
Default: ``None``.
376+
377+
Returns
378+
-------
379+
bin_edges : {dpnp.ndarray of floating data type}
380+
The edges to pass into :obj:`dpnp.histogram`.
381+
382+
See Also
383+
--------
384+
:obj:`dpnp.histogram` : Compute the histogram of a data set.
385+
386+
Examples
387+
--------
388+
>>> import dpnp as np
389+
>>> arr = np.array([0, 0, 0, 1, 2, 3, 3, 4, 5])
390+
>>> np.histogram_bin_edges(arr, bins=2)
391+
array([0. , 2.5, 5. ])
392+
393+
For consistency with histogram, an array of pre-computed bins is
394+
passed through unmodified:
395+
396+
>>> np.histogram_bin_edges(arr, [1, 2])
397+
array([1, 2])
398+
399+
This function allows one set of bins to be computed, and reused across
400+
multiple histograms:
401+
402+
>>> shared_bins = np.histogram_bin_edges(arr, bins=5)
403+
>>> shared_bins
404+
array([0., 1., 2., 3., 4., 5.])
405+
406+
>>> gid = np.array([0, 1, 1, 0, 1, 1, 0, 1, 1])
407+
>>> hist_0, _ = np.histogram(arr[gid == 0], bins=shared_bins)
408+
>>> hist_1, _ = np.histogram(arr[gid == 1], bins=shared_bins)
409+
410+
>>> hist_0, hist_1
411+
(array([1, 1, 0, 1, 0]), array([2, 0, 1, 1, 2]))
412+
413+
Which gives more easily comparable results than using separate bins for
414+
each histogram:
415+
416+
>>> hist_0, bins_0 = np.histogram(arr[gid == 0], bins=3)
417+
>>> hist_1, bins_1 = np.histogram(arr[gid == 1], bins=4)
418+
>>> hist_0, hist_1
419+
(array([1, 1, 1]), array([2, 1, 1, 2]))
420+
>>> bins_0, bins_1
421+
(array([0., 1., 2., 3.]), array([0. , 1.25, 2.5 , 3.75, 5. ]))
422+
423+
"""
424+
425+
a, weights, usm_type = _ravel_check_a_and_weights(a, weights)
426+
bin_edges, _ = _get_bin_edges(a, bins, range, usm_type)
427+
return bin_edges

tests/test_histogram.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from .helper import (
1616
assert_dtype_allclose,
1717
get_all_dtypes,
18+
get_float_dtypes,
1819
has_support_aspect64,
1920
)
2021

@@ -389,3 +390,30 @@ def test_weights_another_sycl_queue(self):
389390
w = dpnp.arange(7, 12, sycl_queue=dpctl.SyclQueue())
390391
with assert_raises(ValueError):
391392
dpnp.histogram(v, weights=w)
393+
394+
395+
class TestHistogramBinEdges:
396+
@pytest.mark.parametrize(
397+
"dtype", get_all_dtypes(no_none=True, no_bool=True)
398+
)
399+
def test_basic(self, dtype):
400+
bins = [1, 2]
401+
v = numpy.array([1, 2, 3, 4], dtype=dtype)
402+
iv = dpnp.array(v)
403+
404+
expected_edges = numpy.histogram_bin_edges(v, bins=bins)
405+
result_edges = dpnp.histogram_bin_edges(iv, bins=bins)
406+
assert_array_equal(result_edges, expected_edges)
407+
408+
@pytest.mark.parametrize("range", [(-0.5, 5), (0, 1)])
409+
@pytest.mark.parametrize("dtype", get_float_dtypes())
410+
def test_range(self, range, dtype):
411+
bins = 30
412+
v = numpy.array(
413+
[0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 4.0, 5.0], dtype=dtype
414+
)
415+
iv = dpnp.array(v)
416+
417+
expected_edges = numpy.histogram_bin_edges(v, bins=bins, range=range)
418+
result_edges = dpnp.histogram_bin_edges(iv, bins=bins, range=range)
419+
assert_dtype_allclose(result_edges, expected_edges)

tests/test_sycl_queue.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,7 @@ def test_meshgrid(device_x, device_y):
426426
pytest.param("fabs", [-1.2, 1.2]),
427427
pytest.param("floor", [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]),
428428
pytest.param("gradient", [1.0, 2.0, 4.0, 7.0, 11.0, 16.0]),
429+
pytest.param("histogram_bin_edges", [0, 0, 0, 1, 2, 3, 3, 4, 5]),
429430
pytest.param(
430431
"imag", [complex(1.0, 2.0), complex(3.0, 4.0), complex(5.0, 6.0)]
431432
),
@@ -620,6 +621,11 @@ def test_reduce_hypot(device):
620621
[-3.0, -2.0, -1.0, 1.0, 2.0, 3.0],
621622
[2.0, 2.0, 2.0, 2.0, 2.0, 2.0],
622623
),
624+
pytest.param(
625+
"histogram_bin_edges",
626+
[0, 0, 0, 1, 2, 3, 3, 4, 5],
627+
[1, 2],
628+
),
623629
pytest.param(
624630
"hypot", [[1.0, 2.0, 3.0, 4.0]], [[-1.0, -2.0, -4.0, -5.0]]
625631
),
@@ -2128,3 +2134,24 @@ def test_histogram(weights, device):
21282134
edges_queue = result_edges.sycl_queue
21292135
assert_sycl_queue_equal(hist_queue, iv.sycl_queue)
21302136
assert_sycl_queue_equal(edges_queue, iv.sycl_queue)
2137+
2138+
2139+
@pytest.mark.parametrize("weights", [None, numpy.arange(7, 12)])
2140+
@pytest.mark.parametrize(
2141+
"device",
2142+
valid_devices,
2143+
ids=[device.filter_string for device in valid_devices],
2144+
)
2145+
def test_histogram_bin_edges(weights, device):
2146+
v = numpy.arange(5)
2147+
w = weights
2148+
2149+
iv = dpnp.array(v, device=device)
2150+
iw = None if weights is None else dpnp.array(w, sycl_queue=iv.sycl_queue)
2151+
2152+
expected_edges = numpy.histogram_bin_edges(v, weights=w)
2153+
result_edges = dpnp.histogram_bin_edges(iv, weights=iw)
2154+
assert_dtype_allclose(result_edges, expected_edges)
2155+
2156+
edges_queue = result_edges.sycl_queue
2157+
assert_sycl_queue_equal(edges_queue, iv.sycl_queue)

tests/test_usm_type.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,7 @@ def test_norm(usm_type, ord, axis):
539539
pytest.param("exp2", [0.0, 1.0, 2.0]),
540540
pytest.param("expm1", [1.0e-10, 1.0, 2.0, 4.0, 7.0]),
541541
pytest.param("floor", [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]),
542+
pytest.param("histogram_bin_edges", [0, 0, 0, 1, 2, 3, 3, 4, 5]),
542543
pytest.param(
543544
"imag", [complex(1.0, 2.0), complex(3.0, 4.0), complex(5.0, 6.0)]
544545
),
@@ -1240,3 +1241,15 @@ def test_histogram(usm_type_v, usm_type_w):
12401241
assert w.usm_type == usm_type_w
12411242
assert hist.usm_type == du.get_coerced_usm_type([usm_type_v, usm_type_w])
12421243
assert edges.usm_type == du.get_coerced_usm_type([usm_type_v, usm_type_w])
1244+
1245+
1246+
@pytest.mark.parametrize("usm_type_v", list_of_usm_types, ids=list_of_usm_types)
1247+
@pytest.mark.parametrize("usm_type_w", list_of_usm_types, ids=list_of_usm_types)
1248+
def test_histogram_bin_edges(usm_type_v, usm_type_w):
1249+
v = dp.arange(5, usm_type=usm_type_v)
1250+
w = dp.arange(7, 12, usm_type=usm_type_w)
1251+
1252+
edges = dp.histogram_bin_edges(v, weights=w)
1253+
assert v.usm_type == usm_type_v
1254+
assert w.usm_type == usm_type_w
1255+
assert edges.usm_type == du.get_coerced_usm_type([usm_type_v, usm_type_w])

0 commit comments

Comments
 (0)