-
-
Notifications
You must be signed in to change notification settings - Fork 33.6k
Description
Bug report
Bug description:
The builtin min() and max() functions always iterate over their argument. This makes sense for lists and tuples, but for containers where elements are created as needed, such as ranges, this approach is often unnecessary and can take a very, very long time (up to six or seven thousand years, in the case of a range.)
Where iteration can take place entirely in native functions and methods (as with range), the Python interpreter can't be stopped with ^C (SIGINT); it has to be kill(1)ed.
I imagine the "right" way to fix this is to have min()/max() delegate to __min__()/__max__() where the argument's class provides that method.
A simple reproduction case:
import time
r = range(1 << 29)
before = time.perf_counter()
assert min(r) == 0
# should be instantaneous, but takes several seconds
# `max()` is similar
after = time.perf_counter()
duration = after - before
print(f"That took {duration:.3f}s.")
# That took 7.390s.It doesn't have to be like this:
"""Demonstrates that finding the min/max of a `range` should be instantaneous
Prints (for example):
The new version is 33,200× faster.
"""
import time
def min_range(r: range) -> int:
if not r:
return min(())
return min((r[0], r[-1]))
def max_range(r: range) -> int:
if not r:
return max(())
return max((r[0], r[-1]))
new_time_s = 0.0
builtin_time_s = 0.0
for power in range(24+1):
r = range(1 << power)
before = time.perf_counter()
got_builtin = min(r), max(r)
builtin_time_s += time.perf_counter() - before
before = time.perf_counter()
got_new = min_range(r), max_range(r)
new_time_s += time.perf_counter() - before
assert got_new == got_builtin
ratio = int(round(builtin_time_s / new_time_s, -2))
print(f"The new version is {ratio:,}× faster.")
# The new version is 33,200× faster.A motivating example:
"""Motivating example
"""
import random
import sys
from typing import Final
def float_cent() -> float:
"""Return the smallest positive `float`"""
float_info: Final = sys.float_info
float_cent = float_info.min / (1 << (float_info.mant_dig - 1))
assert float_cent / 2 in (0, float_cent)
# no representable value exists between zero, `float_cent`
return float_cent
def random_subnormal() -> float:
"""Return a random, positive, subnormal float
"""
# (Yes, there are ways to write this that avoid `range` entirely;
# that's not really the point.)
cent: Final = float_cent()
range_ = range(1, round(sys.float_info.min / cent))
# length: roughly 2**52
lowest = min(range_) * cent # ← interpreter hangs here for a few years
highest = max(range_) * cent # ← hangs again for another few years
assert lowest == cent
assert highest == sys.float_info.min - cent
return random.choice(range_) * centEstimates of min(<range>) or max(<range>) runtime:
(below ; Python 3.13.3, Apple M1 Max; Python 3.14.0a7 takes about 1/3rd less time)
1 << 10: 23 us
1 << 11: 46 us
1 << 12: 92 us
1 << 13: 180 us
1 << 14: 370 us
1 << 15: 740 us
1 << 16: 1.5 ms
1 << 17: 2.9 ms
1 << 18: 5.9 ms
1 << 19: 11 ms
1 << 20: 23 ms
1 << 21: 47 ms
1 << 22: 94 ms
1 << 23: 190 ms
1 << 24: 380 ms
1 << 25: 750 ms
1 << 26: 1.5 seconds
1 << 27: 3.0 seconds
1 << 28: 6.0 seconds
1 << 29: 12 seconds
1 << 30: 24 seconds
1 << 31: 48 seconds
1 << 32: 1.6 mins
1 << 33: 3.2 mins
1 << 34: 6.4 mins
1 << 35: 12 mins
1 << 36: 25 mins
1 << 37: 51 mins
1 << 38: 1.7 hours
1 << 39: 3.4 hours
1 << 40: 6.9 hours
1 << 41: 13 hours
1 << 42: 1.1 days
1 << 43: 2.3 days
1 << 44: 4.6 days
1 << 45: 1.3 weeks
1 << 46: 2.6 weeks
1 << 47: 5.2 weeks
1 << 48: 10 weeks
1 << 49: 20 weeks
1 << 50: 41 weeks
1 << 51: 1.6 years
1 << 52: 3.2 years
1 << 53: 6.4 years
1 << 54: 12 years
1 << 55: 25 years
1 << 56: 51 years
1 << 57: 100 years
1 << 58: 210 years
1 << 59: 410 years
1 << 60: 820 years
1 << 61: 1600 years
1 << 62: 3300 years
1 << 63: 6600 years
The above was generated using:
"""Print estimated time required to compute `min(<range>)` or `max(<range>)`
for power-of-two `<range>` lengths
Sample output [[ removed; appears above. ]]
"""
import functools
import time
@functools.cache
def min_max_range_time_per_element_s() -> float:
"""Measure time required to run `min()`/`max()` on a `range`
Returns:
average time required per `range` element (seconds); accurate
for longer ranges (roughly 10⁵ elements or more)
"""
# rough, but good enough:
scale = 1 << 24 # takes about 300ms on my system
range_ = range(scale)
before = time.perf_counter()
min(range_)
after = time.perf_counter()
duration = after - before
duration_per_cycle = duration / scale
return duration_per_cycle
def est_time_min_max_range_s(elements: int) -> float:
"""Estimate time to run `min()` or `max()` on a `range()` with given length
Args:
elements: length of the `range`
Returns:
estimated runtime of `min(<range>)` or `max(<range>)` (seconds)
"""
per_element_s = min_max_range_time_per_element_s()
total_s = elements * per_element_s
return total_s
def fmt_time_s(total_s: float) -> str:
unit_divs = {
'years': 31_556_736, 'weeks': 604_800, 'days': 86_400,
'hours': 3600, 'mins': 60, 'seconds': 1, 'ms': 1/1000,
'us': 1/1_000_000,
}
unit_name = value = None
for unit_name, div in unit_divs.items():
if (value := total_s / div) >= 1:
break
assert (unit_name is not None) and (value is not None) # pyright
if value >= 1000:
fmt_value = f"{int(round(value, -2))}"
elif value >= 100:
fmt_value = f"{int(round(value, -1))}"
elif value >= 10:
fmt_value = f"{int(value)}"
elif value >= 1:
fmt_value = f"{value:.1f}"
else:
fmt_value = f"{value:.2f}"
assert (value < 0.1) or (0.8 <= float(fmt_value) / value <= 1.2)
if '.' not in fmt_value:
shift = 4 - len(fmt_value)
pad = 3
else:
index = fmt_value.index('.')
shift = 4 - index
pad = 3 - (len(fmt_value) - index)
shifts = shift * ' '
pads = pad * ' '
return f"{shifts}{fmt_value}{pads}{unit_name}"
def main() -> None:
for power in range(10, 63+1):
time_s = est_time_min_max_range_s(1 << power)
time_str = fmt_time_s(time_s)
print(f"1 << {power:2d}: {time_str}")
if __name__ == '__main__':
main()CPython versions tested on:
3.13, 3.12, 3.11, 3.10, 3.9, 3.14
Operating systems tested on:
macOS