Skip to content

Commit f2d3fad

Browse files
committed
refactor: better performance of AttributeValueList when appending items
1 parent ecaad4f commit f2d3fad

File tree

3 files changed

+51
-54
lines changed

3 files changed

+51
-54
lines changed

src/igraph_ctypes/_internal/attributes/map.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def set(self, key: str, value: T | Iterable[T]) -> None:
9696
def _extend_common_length(self, n: int) -> None:
9797
self._common_length_of_values += n
9898
for value_list in self._items.values():
99-
value_list._extend_length(n)
99+
value_list._extend_length_by(n)
100100

101101
def __getitem__(self, key: str) -> AttributeValueList[T]:
102102
return self._items[key]

src/igraph_ctypes/_internal/attributes/value_list.py

Lines changed: 46 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import numpy as np
22

3-
from itertools import islice
43
from math import ceil
54
from numpy.typing import NDArray
65
from types import EllipsisType
@@ -82,12 +81,18 @@ class AttributeValueList(Sequence[T | None]):
8281
created.
8382
"""
8483

85-
_items: NDArray
86-
"""Internal storage area for the items in the list."""
84+
_buffer: NDArray
85+
"""NumPy array acting as a backing store for the ``_items`` array. The size
86+
of the ``_buffer`` doubles every time we need more space to store the items.
87+
This allows us to have better performance when appending items to the
88+
``_items`` array because most of the time we can just change ``_items`` to
89+
be a view into a longer part of the ``_buffer`` without having to
90+
re-allocate the buffer.
91+
"""
8792

88-
_num_items: int
89-
"""The number of items actually used in the backing storage (which may
90-
be larger to prevent reallocations when extending).
93+
_items: NDArray
94+
"""NumPy array view of the internal storage area of the items in the list.
95+
This array is a view into ``_buffer``.
9196
"""
9297

9398
_type: AttributeType
@@ -114,9 +119,7 @@ def __init__(
114119
if not isinstance(items, np.ndarray):
115120
raise RuntimeError("input is not a NumPy array")
116121

117-
self._items = items # type: ignore
118-
self._type = type
119-
self._fixed_length = bool(fixed_length)
122+
array = items
120123
else:
121124
# Normal, public constructor path
122125
if type is None:
@@ -127,18 +130,24 @@ def __init__(
127130
)
128131

129132
dtype = igraph_to_numpy_attribute_type(type)
130-
self._items = np.fromiter(items if items is not None else (), dtype=dtype)
131-
self._type = type
132-
self._fixed_length = bool(fixed_length)
133+
array = np.fromiter(items if items is not None else (), dtype=dtype)
133134

134-
self._num_items = len(self._items)
135+
# Now we have a NumPy array, but what we actually want is a chunk of
136+
# memory that we manage ourselves, and a NumPy view on top of it
137+
self._buffer = array
138+
self._items = self._buffer[:]
139+
self._type = type
140+
self._fixed_length = bool(fixed_length)
135141

136142
def compact(self) -> None:
137143
"""Compacts the list in-place, reclaiming any memory that was used
138144
earlier for storage when the list was longer.
139145
"""
140-
if len(self._items > self._num_items):
141-
self._items = self._items[: self._num_items]
146+
num_items = len(self._items)
147+
if len(self._buffer) > num_items:
148+
# refcheck=False is potentially dangerous but I think we are okay
149+
# with it here
150+
self._buffer.resize((num_items,), refcheck=False)
142151

143152
def copy(self: C) -> C:
144153
"""Returns a shallow copy of the list."""
@@ -174,17 +183,17 @@ def __eq__(self, other: Any) -> bool:
174183
def __delitem__(self, index: IndexLike) -> None: # noqa: C901
175184
if index is ...:
176185
if not self.fixed_length:
177-
self._num_items = 0
186+
self._items = self._buffer[:0]
178187
return
179188

180189
elif isinstance(index, (int, np.integer)):
181190
if not self.fixed_length:
182-
self._num_items -= 1
183191
self._items[index:-1] = self._items[(index + 1) :]
192+
self._items = self._buffer[: len(self._items) - 1]
184193
return
185194

186195
elif isinstance(index, slice):
187-
slice_len = _slice_length(index, self._num_items)
196+
slice_len = _slice_length(index, len(self))
188197
if slice_len <= 0:
189198
# Nothing to delete, this is allowed
190199
return
@@ -199,8 +208,9 @@ def __delitem__(self, index: IndexLike) -> None: # noqa: C901
199208
):
200209
del self[...]
201210
else:
202-
self._items = np.delete(self._items, index)
203-
self._num_items = len(self._items)
211+
tmp = np.delete(self._items, index)
212+
self._buffer = tmp
213+
self._items = self._buffer[:]
204214
return
205215

206216
elif hasattr(index, "__getitem__"):
@@ -210,8 +220,8 @@ def __delitem__(self, index: IndexLike) -> None: # noqa: C901
210220

211221
tmp = np.delete(self._items, index) # type: ignore
212222
if not self.fixed_length:
213-
self._items = tmp
214-
self._num_items = len(self._items)
223+
self._buffer = tmp
224+
self._items = self._buffer[:]
215225
return
216226
else:
217227
# Try to delete anyway, check if the length remains the same
@@ -242,13 +252,7 @@ def __getitem__(self, index: IndexLike):
242252
items = self._items
243253

244254
if isinstance(index, (int, np.integer)):
245-
if index < 0:
246-
new_index = index + self._num_items
247-
if new_index < 0 or new_index >= self._num_items:
248-
raise IndexError("list index out of range")
249-
return items[new_index]
250-
else:
251-
return items[index]
255+
return items[index]
252256

253257
elif isinstance(index, slice):
254258
return self.__class__(items[index].copy(), type=self._type, _wrap=True)
@@ -267,14 +271,10 @@ def __getitem__(self, index: IndexLike):
267271
self._raise_invalid_index_error()
268272

269273
def __iter__(self) -> Iterable[T]:
270-
return (
271-
islice(self._items, self._num_items)
272-
if self._num_items < len(self._items)
273-
else iter(self._items)
274-
)
274+
return iter(self._items)
275275

276276
def __len__(self) -> int:
277-
return self._num_items
277+
return len(self._items)
278278

279279
def __repr__(self) -> str:
280280
fl = ", fixed_length=True" if self._fixed_length else ""
@@ -339,19 +339,18 @@ def __setitem__( # noqa: C901
339339
else:
340340
self._raise_invalid_index_error()
341341

342-
def _extend_length(self, n: int) -> None:
342+
def _extend_length_by(self, n: int) -> None:
343343
"""Extends the list with a given number of new items at the end, even
344344
if the list is marked as fixed-length.
345345
346346
Do not use this method unless you know what you are doing.
347347
"""
348-
target_length = self._num_items + n
349-
if len(self._items) < target_length:
350-
# We do not have enough space pre-allocated
351-
new_length = self._num_items
352-
while new_length < target_length:
353-
new_length <<= 1
354-
348+
current_length = len(self)
349+
target_length = current_length + n
350+
if len(self._buffer) < target_length:
351+
# We do not have enough space pre-allocated, find the nearest
352+
# power of two that will suffice
353+
new_length = 2 ** int(np.ceil(np.log2(target_length)))
355354
if self._type is AttributeType.BOOLEAN:
356355
default_value = False
357356
elif self._type is AttributeType.NUMERIC:
@@ -361,12 +360,10 @@ def _extend_length(self, n: int) -> None:
361360
else:
362361
default_value = None
363362

364-
self._items = np.pad(
365-
self._items,
366-
((0, new_length - len(self._items)),),
367-
constant_values=(default_value,), # type: ignore
368-
)
369-
self._num_items = target_length
363+
self._buffer.resize((new_length,), refcheck=False)
364+
self._buffer[current_length:new_length] = default_value
365+
366+
self._items = self._buffer[:target_length]
370367

371368
def _raise_invalid_index_error(self) -> NoReturn:
372369
# Wording of error message similar to NumPy

tests/test_attribute_value_list.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ def test_getitem_invalid_index(items: AVL):
177177
@mark.skip("not implemented yet")
178178
def test_getitem_negative_indexing(items: AVL):
179179
items = items[:]
180-
items._extend_length(1)
180+
items._extend_length_by(1)
181181

182182
# Now the backing storage has 10 items, but the list has only 6 active items.
183183
# Indexing with negative indices should not allow us to get into the
@@ -376,9 +376,9 @@ def test_delitem_invalid_index(items: AVL):
376376
del items[RuntimeError] # type: ignore
377377

378378

379-
def test__extend_length(items: AVL):
379+
def test__extend_length_with(items: AVL):
380380
assert len(items) == 5 and items.fixed_length
381-
items._extend_length(3)
381+
items._extend_length_by(3)
382382
assert len(items) == 8 and items.fixed_length
383-
items._extend_length(0)
383+
items._extend_length_by(0)
384384
assert len(items) == 8 and items.fixed_length

0 commit comments

Comments
 (0)