Skip to content

Commit f839f1d

Browse files
committed
store the sizes of small strings in a char
1 parent a6d7381 commit f839f1d

File tree

1 file changed

+37
-8
lines changed

1 file changed

+37
-8
lines changed

stringdtype/stringdtype/src/static_string.c

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ typedef union _npy_static_string_u {
4949
#define NPY_SHORT_STRING_SIZE_MASK 0x0F // 0000 1111
5050
#define NPY_SHORT_STRING_MAX_SIZE \
5151
(sizeof(npy_static_string) - 1) // 15 or 7 depending on arch
52+
// one bit is used to signal a medium string
53+
#define NPY_MEDIUM_STRING_MAX_SIZE 0x7F // 0111 1111 or 127
54+
#define NPY_MEDIUM_STRING_FLAG 0x80 // 1000 0000
5255

5356
// Since this has no flags set, technically this is a heap-allocated string
5457
// with size zero. Practically, that doesn't matter because we always do size
@@ -110,9 +113,13 @@ npy_string_arena_malloc(npy_string_arena *arena, npy_string_realloc_func r,
110113
size_t size)
111114
{
112115
// one extra size_t to store the size of the allocation
113-
size_t string_storage_size = size + sizeof(size_t);
114-
// expand size to nearest multiple of 8 bytes to ensure 64 bit alignment
115-
string_storage_size += (8 - string_storage_size % 8);
116+
size_t string_storage_size;
117+
if (size <= NPY_MEDIUM_STRING_MAX_SIZE) {
118+
string_storage_size = size + sizeof(char);
119+
}
120+
else {
121+
string_storage_size = size + sizeof(size_t);
122+
}
116123
if ((arena->size - arena->cursor) <= string_storage_size) {
117124
// realloc the buffer so there is enough room
118125
// first guess is to double the size of the buffer
@@ -130,7 +137,7 @@ npy_string_arena_malloc(npy_string_arena *arena, npy_string_realloc_func r,
130137
// doubling the current size isn't enough
131138
newsize = 2 * (arena->cursor + size);
132139
}
133-
// realloc passed a NULL pointer acts like malloc
140+
// passing a NULL buffer to realloc is the same as malloc
134141
char *newbuf = r(arena->buffer, newsize);
135142
if (newbuf == NULL) {
136143
return NULL;
@@ -139,9 +146,17 @@ npy_string_arena_malloc(npy_string_arena *arena, npy_string_realloc_func r,
139146
arena->buffer = newbuf;
140147
arena->size = newsize;
141148
}
142-
size_t *size_loc = (size_t *)&arena->buffer[arena->cursor];
143-
*size_loc = size;
144-
char *ret = &arena->buffer[arena->cursor + sizeof(size_t)];
149+
char *ret;
150+
if (size <= NPY_MEDIUM_STRING_MAX_SIZE) {
151+
char *size_loc = (char *)&arena->buffer[arena->cursor];
152+
*size_loc = size | NPY_MEDIUM_STRING_FLAG;
153+
ret = &arena->buffer[arena->cursor + sizeof(char)];
154+
}
155+
else {
156+
size_t *size_ptr = (size_t *)&arena->buffer[arena->cursor];
157+
memcpy(size_ptr, &size, sizeof(size_t));
158+
ret = &arena->buffer[arena->cursor + sizeof(size_t)];
159+
}
145160
arena->cursor += string_storage_size;
146161
return ret;
147162
}
@@ -207,6 +222,12 @@ is_short_string(const npy_packed_static_string *s)
207222
return has_short_flag && !has_on_heap_flag;
208223
}
209224

225+
int
226+
is_medium_string(const char *buf)
227+
{
228+
return ((buf[0] & NPY_MEDIUM_STRING_FLAG) != 0);
229+
}
230+
210231
int
211232
npy_string_isnull(const npy_packed_static_string *s)
212233
{
@@ -286,7 +307,15 @@ heap_or_arena_allocate(npy_string_allocator *allocator,
286307
if (buf == NULL) {
287308
return NULL;
288309
}
289-
size_t alloc_size = *((size_t *)(buf - 1));
310+
size_t alloc_size;
311+
if (is_medium_string(buf)) {
312+
// stored in a char so direct access is OK
313+
alloc_size = (size_t) * (buf - 1) & ~NPY_MEDIUM_STRING_FLAG;
314+
}
315+
else {
316+
// not necessarily memory-aligned, so need to use memcpy
317+
memcpy(&alloc_size, ((size_t *)buf - 1), sizeof(size_t));
318+
}
290319
if (size <= alloc_size) {
291320
// we have room!
292321
*flags = NPY_STRING_ARENA_FREED;

0 commit comments

Comments
 (0)