Skip to content

Commit 49a9b20

Browse files
committed
BUG: Raise if histogram cannot create finite bin sizes
When many bins are requested in a small value region, it may not be possible to create enough distinct bin edges due to limited numeric precision. Up to now, `histogram` then returned identical subsequent bin edges, which would mean a bin width of 0. These bins could also have counts associated with them. Instead of returning such unlogical bin distributions, this PR raises a value error if the calculated bins do not all have a finite size. Closes numpy#27142.
1 parent 11eb606 commit 49a9b20

File tree

2 files changed

+10
-1
lines changed

2 files changed

+10
-1
lines changed

numpy/lib/_histograms_impl.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,10 @@ def _get_bin_edges(a, bins, range, weights):
450450
bin_edges = np.linspace(
451451
first_edge, last_edge, n_equal_bins + 1,
452452
endpoint=True, dtype=bin_type)
453+
if np.any(bin_edges[:-1] >= bin_edges[1:]):
454+
raise ValueError(
455+
f'Too many bins for data range. Cannot create {n_equal_bins} '
456+
f'finite-sized bins.')
453457
return bin_edges, (first_edge, last_edge, n_equal_bins)
454458
else:
455459
return bin_edges, None

numpy/lib/tests/test_histograms.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ def test_object_array_of_0d(self):
270270
histogram, [np.array(0.4) for i in range(10)] + [np.inf])
271271

272272
# these should not crash
273-
np.histogram([np.array(0.5) for i in range(10)] + [.500000000000001])
273+
np.histogram([np.array(0.5) for i in range(10)] + [.500000000000002])
274274
np.histogram([np.array(0.5) for i in range(10)] + [.5])
275275

276276
def test_some_nan_values(self):
@@ -395,6 +395,11 @@ def test_histogram_bin_edges(self):
395395
edges = histogram_bin_edges(arr, bins='auto', range=(0, 1))
396396
assert_array_equal(edges, e)
397397

398+
def test_small_value_range(self):
399+
arr = np.array([1, 1 + 2e-16] * 10)
400+
with pytest.raises(ValueError, match="Too many bins for data range"):
401+
histogram(arr, bins=10)
402+
398403
# @requires_memory(free_bytes=1e10)
399404
# @pytest.mark.slow
400405
@pytest.mark.skip(reason="Bad memory reports lead to OOM in ci testing")

0 commit comments

Comments
 (0)