Skip to content

Commit f07889f

Browse files
committed
fix bug when growing a medium string and when freeing a string associated with a newly allocated arena
1 parent 30675e6 commit f07889f

File tree

2 files changed

+43
-26
lines changed

2 files changed

+43
-26
lines changed

stringdtype/stringdtype/src/static_string.c

Lines changed: 24 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,11 @@ npy_string_arena_malloc(npy_string_arena *arena, npy_string_realloc_func r,
164164
int
165165
npy_string_arena_free(npy_string_arena *arena, _npy_static_string_u *str)
166166
{
167+
if (arena->size == 0 && arena->cursor == 0 && arena->buffer == NULL) {
168+
// empty arena, nothing to do
169+
return 0;
170+
}
171+
167172
char *ptr = vstring_buffer(arena, str);
168173
if (ptr == NULL) {
169174
return -1;
@@ -195,7 +200,7 @@ npy_string_new_allocator(npy_string_malloc_func m, npy_string_free_func f,
195200
allocator->malloc = m;
196201
allocator->free = f;
197202
allocator->realloc = r;
198-
// arenas don't get created until the dtype is used for array creation
203+
// arena buffer gets allocated in npy_string_arena_malloc
199204
allocator->arena = NEW_ARENA;
200205
return allocator;
201206
}
@@ -295,17 +300,24 @@ heap_or_arena_allocate(npy_string_allocator *allocator,
295300
_npy_static_string_u *to_init_u, size_t size,
296301
int *on_heap)
297302
{
298-
// check if it's a previously heap-allocated string or a short string
299-
// that has no heap allocation
300303
unsigned char *flags = &to_init_u->direct_buffer.flags_and_size;
304+
if (*flags & NPY_STRING_SHORT) {
305+
// Have to heap allocate since there isn't a preexisting
306+
// allocation. This leaves the NPY_STRING_SHORT flag set to indicate
307+
// that there is no room in the arena buffer for strings in this
308+
// entry
309+
*flags |= NPY_STRING_ON_HEAP;
310+
*on_heap = 1;
311+
return allocator->malloc(sizeof(char) * size);
312+
}
313+
npy_string_arena *arena = &allocator->arena;
314+
if (arena == NULL) {
315+
return NULL;
316+
}
301317
if (*flags & NPY_STRING_ARENA_FREED) {
302318
// Check if there's room for the new string in the existing
303319
// allocation. The size is stored one size_t "behind" the beginning of
304320
// the allocation.
305-
npy_string_arena *arena = &allocator->arena;
306-
if (arena == NULL) {
307-
return NULL;
308-
}
309321
char *buf = vstring_buffer(arena, to_init_u);
310322
if (buf == NULL) {
311323
return NULL;
@@ -326,29 +338,13 @@ heap_or_arena_allocate(npy_string_allocator *allocator,
326338
return buf;
327339
}
328340
else {
329-
// No room, resort to a heap allocation. This leaves the
330-
// NPY_STRING_ARENA_FREED flag set to possibly re-use the arena
331-
// allocation in the future if there is room for it
332-
*flags |= NPY_STRING_ON_HEAP;
341+
// No room, resort to a heap allocation.
342+
*flags = NPY_STRING_ON_HEAP;
333343
*on_heap = 1;
334344
return allocator->malloc(sizeof(char) * size);
335345
}
336346
}
337-
else if (*flags & NPY_STRING_SHORT) {
338-
// Have to heap allocate since there isn't a preexisting
339-
// allocation. This leaves the NPY_STRING_SHORT flag set to indicate
340-
// that there is no room in the arena buffer for strings in this
341-
// entry, avoiding possible reallocation of the entire arena buffer
342-
// when writing to a single string
343-
*flags |= NPY_STRING_ON_HEAP;
344-
*on_heap = 1;
345-
return allocator->malloc(sizeof(char) * size);
346-
}
347347
// string isn't previously allocated, so add to existing arena allocation
348-
npy_string_arena *arena = &allocator->arena;
349-
if (arena == NULL) {
350-
return NULL;
351-
}
352348
char *ret = npy_string_arena_malloc(arena, allocator->realloc,
353349
sizeof(char) * size);
354350
if (size < NPY_MEDIUM_STRING_MAX_SIZE) {
@@ -382,7 +378,9 @@ heap_or_arena_deallocate(npy_string_allocator *allocator,
382378
if (npy_string_arena_free(arena, str_u) < 0) {
383379
return -1;
384380
}
385-
str_u->direct_buffer.flags_and_size |= NPY_STRING_ARENA_FREED;
381+
if (arena->buffer != NULL) {
382+
str_u->direct_buffer.flags_and_size |= NPY_STRING_ARENA_FREED;
383+
}
386384
}
387385
return 0;
388386
}

stringdtype/tests/test_stringdtype.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -752,3 +752,22 @@ def test_string_too_large_error():
752752
arr = np.array(["a", "b", "c"], dtype=StringDType())
753753
with pytest.raises(MemoryError):
754754
arr * (2**63 - 2)
755+
756+
757+
def test_growing_strings(dtype):
758+
# growing a string leads to a heap allocation, this tests to make sure
759+
# we do that bookeeping correctly for all possible starting cases
760+
data = [
761+
"hello", # a short string
762+
"abcdefghijklmnopqestuvwxyz", # a medium heap-allocated string
763+
"hello" * 200, # a long heap-allocated string
764+
]
765+
766+
arr = np.array(data, dtype=dtype)
767+
uarr = np.array(data, dtype=str)
768+
769+
for _ in range(5):
770+
arr = arr + arr
771+
uarr = uarr + uarr
772+
773+
np.testing.assert_array_equal(arr, uarr)

0 commit comments

Comments
 (0)