Skip to content

Commit efc0e7f

Browse files
committed
store the flag in the packed string
1 parent 7d0c58f commit efc0e7f

File tree

1 file changed

+24
-16
lines changed

1 file changed

+24
-16
lines changed

stringdtype/stringdtype/src/static_string.c

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,15 @@ typedef union _npy_static_string_u {
4444
#define NPY_STRING_SHORT 0x40 // 0100 0000
4545
#define NPY_STRING_ARENA_FREED 0x20 // 0010 0000
4646
#define NPY_STRING_ON_HEAP 0x10 // 0001 0000
47+
#define NPY_STRING_MEDIUM 0x08 // 0000 1000
48+
#define NPY_STRING_FLAG_MASK 0xF8 // 1111 1000
4749

4850
// short string sizes fit in a 4-bit integer
4951
#define NPY_SHORT_STRING_SIZE_MASK 0x0F // 0000 1111
5052
#define NPY_SHORT_STRING_MAX_SIZE \
5153
(sizeof(npy_static_string) - 1) // 15 or 7 depending on arch
5254
// one bit is used to signal a medium string
53-
#define NPY_MEDIUM_STRING_MAX_SIZE 0x7F // 0111 1111 or 127
54-
#define NPY_MEDIUM_STRING_FLAG 0x80 // 1000 0000
55+
#define NPY_MEDIUM_STRING_MAX_SIZE 0xFF // 256
5556

5657
// Since this has no flags set, technically this is a heap-allocated string
5758
// with size zero. Practically, that doesn't matter because we always do size
@@ -89,8 +90,7 @@ struct npy_string_allocator {
8990
void
9091
set_vstring_size(_npy_static_string_u *str, size_t size)
9192
{
92-
unsigned char *flags = &str->direct_buffer.flags_and_size;
93-
unsigned char current_flags = *flags & ~NPY_SHORT_STRING_SIZE_MASK;
93+
unsigned char current_flags = str->direct_buffer.flags_and_size;
9494
str->vstring.size = size;
9595
str->direct_buffer.flags_and_size = current_flags;
9696
}
@@ -115,7 +115,7 @@ npy_string_arena_malloc(npy_string_arena *arena, npy_string_realloc_func r,
115115
// one extra size_t to store the size of the allocation
116116
size_t string_storage_size;
117117
if (size <= NPY_MEDIUM_STRING_MAX_SIZE) {
118-
string_storage_size = size + sizeof(char);
118+
string_storage_size = size + sizeof(unsigned char);
119119
}
120120
else {
121121
string_storage_size = size + sizeof(size_t);
@@ -148,12 +148,13 @@ npy_string_arena_malloc(npy_string_arena *arena, npy_string_realloc_func r,
148148
}
149149
char *ret;
150150
if (size <= NPY_MEDIUM_STRING_MAX_SIZE) {
151-
char *size_loc = (char *)&arena->buffer[arena->cursor];
152-
*size_loc = size | NPY_MEDIUM_STRING_FLAG;
151+
unsigned char *size_loc =
152+
(unsigned char *)&arena->buffer[arena->cursor];
153+
*size_loc = size;
153154
ret = &arena->buffer[arena->cursor + sizeof(char)];
154155
}
155156
else {
156-
size_t *size_ptr = (size_t *)&arena->buffer[arena->cursor];
157+
char *size_ptr = (char *)&arena->buffer[arena->cursor];
157158
memcpy(size_ptr, &size, sizeof(size_t));
158159
ret = &arena->buffer[arena->cursor + sizeof(size_t)];
159160
}
@@ -223,9 +224,12 @@ is_short_string(const npy_packed_static_string *s)
223224
}
224225

225226
int
226-
is_medium_string(const char *buf)
227+
is_medium_string(const _npy_static_string_u *s)
227228
{
228-
return ((buf[0] & NPY_MEDIUM_STRING_FLAG) != 0);
229+
unsigned char high_byte = s->direct_buffer.flags_and_size;
230+
int has_short_flag = (high_byte & NPY_STRING_SHORT);
231+
int has_medium_flag = (high_byte & NPY_STRING_MEDIUM);
232+
return (!has_short_flag && has_medium_flag);
229233
}
230234

231235
int
@@ -308,18 +312,18 @@ heap_or_arena_allocate(npy_string_allocator *allocator,
308312
return NULL;
309313
}
310314
size_t alloc_size;
311-
if (is_medium_string(buf)) {
315+
if (is_medium_string(to_init_u)) {
312316
// stored in a char so direct access is OK
313-
alloc_size = (size_t) * (buf - 1) & ~NPY_MEDIUM_STRING_FLAG;
317+
alloc_size = (size_t) * (buf - 1);
314318
}
315319
else {
316320
// not necessarily memory-aligned, so need to use memcpy
317-
char *size_loc = (char *)((uintptr_t)buf - sizeof(size_t));
321+
size_t *size_loc = (size_t *)((uintptr_t)buf - sizeof(size_t));
318322
memcpy(&alloc_size, size_loc, sizeof(size_t));
319323
}
320324
if (size <= alloc_size) {
321325
// we have room!
322-
*flags = NPY_STRING_ARENA_FREED;
326+
*flags &= ~NPY_STRING_ARENA_FREED;
323327
return buf;
324328
}
325329
else {
@@ -346,8 +350,12 @@ heap_or_arena_allocate(npy_string_allocator *allocator,
346350
if (arena == NULL) {
347351
return NULL;
348352
}
349-
return npy_string_arena_malloc(arena, allocator->realloc,
350-
sizeof(char) * size);
353+
char *ret = npy_string_arena_malloc(arena, allocator->realloc,
354+
sizeof(char) * size);
355+
if (size < NPY_MEDIUM_STRING_MAX_SIZE) {
356+
*flags |= NPY_STRING_MEDIUM;
357+
}
358+
return ret;
351359
}
352360

353361
int

0 commit comments

Comments
 (0)