From b2798cbf17947afba1c6dc786310662609fa9e09 Mon Sep 17 00:00:00 2001 From: MengAiDev Date: Mon, 21 Jul 2025 15:30:30 +0800 Subject: [PATCH 1/2] feat(collections): Add HeapDict class HeapDict is a hybrid data structure combining dictionary and heap functionalities, providing efficient key-value access and priority-based operations. Key features include: - Basic dictionary operations: set/get/delete key-value pairs, key existence check, size retrieval, key iteration - Heap operations: remove-and-return minimum key-value pair, peek minimum pair without removal, update priority of existing keys --- Lib/collections/__init__.py | 145 ++++++++++++++++++ ...-07-21-15-29-04.gh-issue-136898.YAz31U.rst | 1 + 2 files changed, 146 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-07-21-15-29-04.gh-issue-136898.YAz31U.rst diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py index b8653f40a942f0..1588ea34e274cf 100644 --- a/Lib/collections/__init__.py +++ b/Lib/collections/__init__.py @@ -1606,3 +1606,148 @@ def upper(self): def zfill(self, width): return self.__class__(self.data.zfill(width)) + + +################################################################################ +### HeapDict +################################################################################ + +import collections.abc +import heapq +from typing import Any, Dict, Iterator, List, Optional, Tuple, TypeVar, Union + +K = TypeVar('K') +V = TypeVar('V') + +class HeapDict(collections.abc.MutableMapping): + """Dictionary that maintains heap property based on values. + + HeapDict combines the functionality of a dictionary with a heap, + providing efficient access to key-value pairs while maintaining + a heap property for priority-based operations. + + Basic operations: + - d[key] = value: Set a key-value pair + - value = d[key]: Get value by key + - del d[key]: Remove a key-value pair + - key in d: Test if key exists + - len(d): Get number of items + - iter(d): Iterate through keys + + Heap operations: + - d.popmin(): Remove and return the (key, value) pair with minimum value + - d.peekmin(): Return the (key, value) pair with minimum value without removing + - d.update_priority(key, new_value): Update the value/priority of an existing key + """ + + def __init__(self, *args, **kwargs): + """Initialize a new HeapDict with optional initial values.""" + self._dict: Dict[K, V] = {} # Maps keys to values + self._heap: List[Tuple[V, K, int]] = [] # List of (value, key, counter) + self._counter = 0 # Used to break ties for values that compare equal + self._removed_keys = set() # Track removed keys for lazy deletion + + # Add initial items + if args or kwargs: + self.update(*args, **kwargs) + + def __setitem__(self, key: K, value: V) -> None: + """Set a key-value pair, maintaining heap property.""" + if key in self._dict: + self.update_priority(key, value) + else: + self._dict[key] = value + count = self._counter + self._counter += 1 + heapq.heappush(self._heap, (value, key, count)) + + def __getitem__(self, key: K) -> V: + """Get value by key.""" + return self._dict[key] + + def __delitem__(self, key: K) -> None: + """Remove a key-value pair.""" + if key not in self._dict: + raise KeyError(key) + + # Mark the key as removed + self._removed_keys.add(key) + + # Remove from dictionary + del self._dict[key] + + # Note: We don't remove from the heap here for efficiency. + # Instead, we do lazy deletion during heap operations. + + def __iter__(self) -> Iterator[K]: + """Iterate through keys.""" + return iter(self._dict) + + def __len__(self) -> int: + """Return the number of items.""" + return len(self._dict) + + def __repr__(self) -> str: + """Return string representation.""" + return f"{self.__class__.__name__}({dict(self.items())})" + + def _clean_heap(self) -> None: + """Clean the heap by removing marked items.""" + if len(self._removed_keys) > len(self._heap) // 2: + # If too many removed items, rebuild the heap + new_heap = [(v, k, c) for v, k, c in self._heap if k in self._dict] + heapq.heapify(new_heap) + self._heap = new_heap + self._removed_keys.clear() + + def popmin(self) -> Tuple[K, V]: + """Remove and return the (key, value) pair with minimum value.""" + if not self._dict: + raise KeyError("popmin from an empty HeapDict") + + # Skip items that were already removed + while self._heap: + value, key, _ = heapq.heappop(self._heap) + if key not in self._removed_keys and key in self._dict: + del self._dict[key] + return key, value + + # This should never happen if the data structure is consistent + raise RuntimeError("Heap is inconsistent with dictionary") + + def peekmin(self) -> Tuple[K, V]: + """Return the (key, value) pair with minimum value without removing it.""" + if not self._dict: + raise KeyError("peekmin from an empty HeapDict") + + # Skip items that were already removed + while self._heap: + value, key, _ = self._heap[0] + if key not in self._removed_keys and key in self._dict: + return key, value + + # If the top item is removed, pop it and continue + heapq.heappop(self._heap) + + # This should never happen if the data structure is consistent + raise RuntimeError("Heap is inconsistent with dictionary") + + def update_priority(self, key: K, new_value: V) -> None: + """Update the value/priority of an existing key.""" + if key not in self._dict: + raise KeyError(key) + + # Update the dictionary + self._dict[key] = new_value + + # Mark the old entry as removed + self._removed_keys.add(key) + + # Add a new entry to the heap + count = self._counter + self._counter += 1 + heapq.heappush(self._heap, (new_value, key, count)) + + # Clean the heap if there are too many removed items + if len(self._removed_keys) > len(self._heap) // 2: + self._clean_heap() \ No newline at end of file diff --git a/Misc/NEWS.d/next/Library/2025-07-21-15-29-04.gh-issue-136898.YAz31U.rst b/Misc/NEWS.d/next/Library/2025-07-21-15-29-04.gh-issue-136898.YAz31U.rst new file mode 100644 index 00000000000000..fbea34295520ec --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-21-15-29-04.gh-issue-136898.YAz31U.rst @@ -0,0 +1 @@ +Add HeapDict in collection From 8dae29d9486b478d6aabc36deb401f0a049088b6 Mon Sep 17 00:00:00 2001 From: MengAiDev Date: Mon, 21 Jul 2025 15:50:12 +0800 Subject: [PATCH 2/2] fix heapdict --- Lib/collections/__init__.py | 77 ++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 43 deletions(-) diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py index 1588ea34e274cf..c710c47a0051d3 100644 --- a/Lib/collections/__init__.py +++ b/Lib/collections/__init__.py @@ -1614,10 +1614,11 @@ def zfill(self, width): import collections.abc import heapq -from typing import Any, Dict, Iterator, List, Optional, Tuple, TypeVar, Union +import sys -K = TypeVar('K') -V = TypeVar('V') +# Use string forward references for type annotations to avoid circular imports +if sys.version_info >= (3, 7): + from __future__ import annotations class HeapDict(collections.abc.MutableMapping): """Dictionary that maintains heap property based on values. @@ -1625,25 +1626,12 @@ class HeapDict(collections.abc.MutableMapping): HeapDict combines the functionality of a dictionary with a heap, providing efficient access to key-value pairs while maintaining a heap property for priority-based operations. - - Basic operations: - - d[key] = value: Set a key-value pair - - value = d[key]: Get value by key - - del d[key]: Remove a key-value pair - - key in d: Test if key exists - - len(d): Get number of items - - iter(d): Iterate through keys - - Heap operations: - - d.popmin(): Remove and return the (key, value) pair with minimum value - - d.peekmin(): Return the (key, value) pair with minimum value without removing - - d.update_priority(key, new_value): Update the value/priority of an existing key """ def __init__(self, *args, **kwargs): """Initialize a new HeapDict with optional initial values.""" - self._dict: Dict[K, V] = {} # Maps keys to values - self._heap: List[Tuple[V, K, int]] = [] # List of (value, key, counter) + self._dict = {} # Maps keys to values + self._heap = [] # List of (value, key, counter) self._counter = 0 # Used to break ties for values that compare equal self._removed_keys = set() # Track removed keys for lazy deletion @@ -1651,7 +1639,7 @@ def __init__(self, *args, **kwargs): if args or kwargs: self.update(*args, **kwargs) - def __setitem__(self, key: K, value: V) -> None: + def __setitem__(self, key, value): """Set a key-value pair, maintaining heap property.""" if key in self._dict: self.update_priority(key, value) @@ -1661,37 +1649,32 @@ def __setitem__(self, key: K, value: V) -> None: self._counter += 1 heapq.heappush(self._heap, (value, key, count)) - def __getitem__(self, key: K) -> V: + def __getitem__(self, key): """Get value by key.""" return self._dict[key] - def __delitem__(self, key: K) -> None: + def __delitem__(self, key): """Remove a key-value pair.""" if key not in self._dict: raise KeyError(key) # Mark the key as removed self._removed_keys.add(key) - - # Remove from dictionary del self._dict[key] - - # Note: We don't remove from the heap here for efficiency. - # Instead, we do lazy deletion during heap operations. - def __iter__(self) -> Iterator[K]: + def __iter__(self): """Iterate through keys.""" return iter(self._dict) - def __len__(self) -> int: + def __len__(self): """Return the number of items.""" return len(self._dict) - def __repr__(self) -> str: + def __repr__(self): """Return string representation.""" return f"{self.__class__.__name__}({dict(self.items())})" - def _clean_heap(self) -> None: + def _clean_heap(self): """Clean the heap by removing marked items.""" if len(self._removed_keys) > len(self._heap) // 2: # If too many removed items, rebuild the heap @@ -1700,7 +1683,7 @@ def _clean_heap(self) -> None: self._heap = new_heap self._removed_keys.clear() - def popmin(self) -> Tuple[K, V]: + def popmin(self): """Remove and return the (key, value) pair with minimum value.""" if not self._dict: raise KeyError("popmin from an empty HeapDict") @@ -1712,10 +1695,9 @@ def popmin(self) -> Tuple[K, V]: del self._dict[key] return key, value - # This should never happen if the data structure is consistent raise RuntimeError("Heap is inconsistent with dictionary") - def peekmin(self) -> Tuple[K, V]: + def peekmin(self): """Return the (key, value) pair with minimum value without removing it.""" if not self._dict: raise KeyError("peekmin from an empty HeapDict") @@ -1725,29 +1707,38 @@ def peekmin(self) -> Tuple[K, V]: value, key, _ = self._heap[0] if key not in self._removed_keys and key in self._dict: return key, value - - # If the top item is removed, pop it and continue heapq.heappop(self._heap) - # This should never happen if the data structure is consistent raise RuntimeError("Heap is inconsistent with dictionary") - def update_priority(self, key: K, new_value: V) -> None: + def update_priority(self, key, new_value): """Update the value/priority of an existing key.""" if key not in self._dict: raise KeyError(key) # Update the dictionary self._dict[key] = new_value - - # Mark the old entry as removed self._removed_keys.add(key) - - # Add a new entry to the heap count = self._counter self._counter += 1 heapq.heappush(self._heap, (new_value, key, count)) - # Clean the heap if there are too many removed items if len(self._removed_keys) > len(self._heap) // 2: - self._clean_heap() \ No newline at end of file + self._clean_heap() + +# Add type hints for older Python versions or static type checkers +if sys.version_info < (3, 7) or typing.TYPE_CHECKING: + from typing import Any, Dict, Iterator, List, Optional, Tuple, TypeVar, Union + K = TypeVar('K') + V = TypeVar('V') + HeapDict.__annotations__ = { + '_dict': Dict[K, V], + '_heap': List[Tuple[V, K, int]], + '__setitem__': None, + '__getitem__': None, + '__delitem__': None, + '__iter__': Iterator[K], + 'popmin': Tuple[K, V], + 'peekmin': Tuple[K, V], + 'update_priority': None + } \ No newline at end of file