Skip to content

Commit 97f3da5

Browse files
committed
implement small string optimization
1 parent 56ff099 commit 97f3da5

File tree

7 files changed

+281
-216
lines changed

7 files changed

+281
-216
lines changed

stringdtype/stringdtype/src/casts.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1057,8 +1057,7 @@ datetime_to_string(PyArrayMethod_Context *context, char *const data[],
10571057
npy_static_string *out_ss = (npy_static_string *)out;
10581058
npy_string_free(out_ss);
10591059
if (*in == NPY_DATETIME_NAT) {
1060-
/* convert to NA */
1061-
out_ss = NULL;
1060+
*out_ss = NPY_NULL_STRING;
10621061
}
10631062
else {
10641063
if (NpyDatetime_ConvertDatetime64ToDatetimeStruct(dt_meta, *in,

stringdtype/stringdtype/src/dtype.c

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,18 @@ new_stringdtype_instance(PyObject *na_object, int coerce)
2020

2121
Py_XINCREF(na_object);
2222
((StringDTypeObject *)new)->na_object = na_object;
23-
npy_static_string na_name = NPY_NULL_STRING;
23+
npy_static_string na_name = NPY_EMPTY_STRING;
24+
npy_static_string default_string = NPY_EMPTY_STRING;
2425
int hasnull = na_object != NULL;
2526
int has_nan_na = 0;
2627
int has_string_na = 0;
27-
npy_static_string default_string = NPY_EMPTY_STRING;
2828
if (hasnull) {
2929
// first check for a string
3030
if (PyUnicode_Check(na_object)) {
3131
has_string_na = 1;
3232
Py_ssize_t size = 0;
3333
const char *buf = PyUnicode_AsUTF8AndSize(na_object, &size);
34-
default_string = NPY_NULL_STRING;
34+
default_string = NPY_EMPTY_STRING;
3535
int res = npy_string_newsize(buf, (size_t)size, &default_string);
3636
if (res == -1) {
3737
PyErr_NoMemory();
@@ -206,8 +206,7 @@ stringdtype_setitem(StringDTypeObject *descr, PyObject *obj, char **dataptr)
206206
// setting NA *must* check pointer equality since NA types might not
207207
// allow equality
208208
if (na_object != NULL && obj == na_object) {
209-
// do nothing, npy_string_free already NULLed the struct ssdata points
210-
// to so it already contains a NA value
209+
*sdata = NPY_NULL_STRING;
211210
}
212211
else {
213212
PyObject *val_obj = get_value(obj, descr->coerce);
@@ -259,7 +258,7 @@ stringdtype_getitem(StringDTypeObject *descr, char **dataptr)
259258
}
260259
}
261260
else {
262-
char *data = npy_string_buf(sdata);
261+
const char *data = npy_string_buf(sdata);
263262
size_t size = npy_string_size(sdata);
264263
val_obj = PyUnicode_FromStringAndSize(data, size);
265264
if (val_obj == NULL) {
@@ -420,9 +419,7 @@ stringdtype_fill_zero_loop(void *NPY_UNUSED(traverse_context),
420419
NpyAuxData *NPY_UNUSED(auxdata))
421420
{
422421
while (size--) {
423-
if (npy_string_newsize("", 0, (npy_static_string *)(data)) < 0) {
424-
return -1;
425-
}
422+
*(npy_static_string *)(data) = NPY_EMPTY_STRING;
426423
data += stride;
427424
}
428425
return 0;

stringdtype/stringdtype/src/main.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,10 @@ _memory_usage(PyObject *NPY_UNUSED(self), PyObject *obj)
5858
npy_intp count = *innersizeptr;
5959

6060
while (count--) {
61-
memory_usage += npy_string_size(((npy_static_string *)in));
61+
size_t size = npy_string_size(((npy_static_string *)in));
62+
if (size > NPY_SHORT_STRING_MAX_SIZE) {
63+
memory_usage += size;
64+
}
6265
in += stride;
6366
}
6467

stringdtype/stringdtype/src/static_string.c

Lines changed: 108 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,43 @@
22

33
#include "static_string.h"
44

5-
// defined this way so NPY_EMPTY_STRING has an in-memory representation that is
6-
// distinct from a zero-filled struct, allowing us to use a NPY_NULL_STRING
7-
// to represent a sentinel value
8-
const npy_static_string NPY_EMPTY_STRING = {0, "\0"};
9-
const npy_static_string NPY_NULL_STRING = {0, NULL};
5+
// Since this has no flags set, technically this is a heap-allocated string
6+
// with size zero practically, that doesn't matter because we always do size
7+
// checks before accessing heap data, but that may be confusing. The nice part
8+
// of this choice is a calloc'd array buffer (e.g. from np.empty) is filled
9+
// with empty elements for free
10+
const npy_static_string NPY_EMPTY_STRING = {
11+
.base = {.direct_buffer = {.flags_and_size = 0, .buf = {0}}}};
12+
// zero-filled, but with the NULL flag set to distinguish from empty string
13+
const npy_static_string NPY_NULL_STRING = {
14+
.base = {.direct_buffer = {.flags_and_size = NPY_STRING_MISSING,
15+
.buf = {0}}}};
16+
17+
int
18+
is_short_string(const npy_static_string *s)
19+
{
20+
unsigned char high_byte = s->base.direct_buffer.flags_and_size;
21+
return (high_byte & NPY_STRING_SHORT) == NPY_STRING_SHORT;
22+
}
23+
24+
int
25+
npy_string_isnull(const npy_static_string *s)
26+
{
27+
unsigned char high_byte = s->base.direct_buffer.flags_and_size;
28+
return (high_byte & NPY_STRING_MISSING) == NPY_STRING_MISSING;
29+
}
30+
31+
int
32+
is_not_a_vstring(const npy_static_string *s)
33+
{
34+
return is_short_string(s) || npy_string_isnull(s);
35+
}
1036

1137
int
1238
npy_string_newsize(const char *init, size_t size, npy_static_string *to_init)
1339
{
14-
if ((to_init == NULL) || (to_init->buf != NULL) ||
15-
(npy_string_size(to_init) != 0)) {
40+
if (to_init == NULL || npy_string_size(to_init) != 0 ||
41+
size > MAX_STRING_SIZE) {
1642
return -2;
1743
}
1844

@@ -21,114 +47,128 @@ npy_string_newsize(const char *init, size_t size, npy_static_string *to_init)
2147
return 0;
2248
}
2349

24-
char *ret_buf = (char *)PyMem_RawMalloc(sizeof(char) * size);
25-
26-
if (ret_buf == NULL) {
27-
return -1;
28-
}
29-
30-
to_init->size = size;
50+
if (size > NPY_SHORT_STRING_MAX_SIZE) {
51+
char *ret_buf = (char *)PyMem_RawMalloc(sizeof(char) * size);
3152

32-
memcpy(ret_buf, init, size);
53+
if (ret_buf == NULL) {
54+
return -1;
55+
}
3356

34-
to_init->buf = ret_buf;
57+
to_init->base.vstring.size = size;
3558

36-
return 0;
37-
}
59+
memcpy(ret_buf, init, size);
3860

39-
void
40-
npy_string_free(npy_static_string *str)
41-
{
42-
if (str->buf != NULL && str->buf != NPY_EMPTY_STRING.buf) {
43-
PyMem_RawFree(str->buf);
44-
str->buf = NULL;
45-
}
46-
str->size = 0;
47-
}
48-
49-
int
50-
npy_string_dup(const npy_static_string *in, npy_static_string *out)
51-
{
52-
if (npy_string_isnull(in)) {
53-
out->size = 0;
54-
out->buf = NULL;
55-
return 0;
61+
to_init->base.vstring.buf = ret_buf;
5662
}
5763
else {
58-
return npy_string_newsize(in->buf, in->size, out);
64+
// size can be no longer than 7 or 15, depending on CPU architecture
65+
// in either case, the size data is in at most the least significant 4
66+
// bits of the byte so it's safe to | with one of 0x10, 0x20, 0x40, or
67+
// 0x80.
68+
to_init->base.direct_buffer.flags_and_size = NPY_STRING_SHORT | size;
69+
memcpy(&(to_init->base.direct_buffer.buf), init, size);
5970
}
71+
72+
return 0;
6073
}
6174

6275
int
6376
npy_string_newemptysize(size_t size, npy_static_string *out)
6477
{
65-
if (out->size != 0 || out->buf != NULL) {
78+
if (out == NULL || npy_string_size(out) != 0 || size > MAX_STRING_SIZE) {
6679
return -2;
6780
}
6881

69-
out->size = size;
70-
7182
if (size == 0) {
7283
*out = NPY_EMPTY_STRING;
7384
return 0;
7485
}
7586

76-
char *buf = (char *)PyMem_RawMalloc(sizeof(char) * size);
87+
if (size > NPY_SHORT_STRING_MAX_SIZE) {
88+
char *buf = (char *)PyMem_RawMalloc(sizeof(char) * size);
7789

78-
if (buf == NULL) {
79-
return -1;
80-
}
90+
if (buf == NULL) {
91+
return -1;
92+
}
8193

82-
out->buf = buf;
94+
out->base.vstring.buf = buf;
95+
out->base.vstring.size = size;
96+
}
97+
else {
98+
out->base.direct_buffer.flags_and_size = NPY_STRING_SHORT | size;
99+
}
83100

84101
return 0;
85102
}
86103

104+
void
105+
npy_string_free(npy_static_string *str)
106+
{
107+
if (is_not_a_vstring(str)) {
108+
// zero out
109+
memcpy(str, &NPY_EMPTY_STRING, sizeof(npy_static_string));
110+
}
111+
else {
112+
if (str->base.vstring.size != 0) {
113+
PyMem_RawFree(str->base.vstring.buf);
114+
}
115+
str->base.vstring.buf = NULL;
116+
str->base.vstring.size = 0;
117+
}
118+
}
119+
120+
int
121+
npy_string_dup(const npy_static_string *in, npy_static_string *out)
122+
{
123+
if (npy_string_isnull(in)) {
124+
*out = NPY_NULL_STRING;
125+
return 0;
126+
}
127+
128+
return npy_string_newsize(npy_string_buf(in), npy_string_size(in), out);
129+
}
130+
87131
int
88132
npy_string_cmp(const npy_static_string *s1, const npy_static_string *s2)
89133
{
90-
size_t minsize = s1->size < s2->size ? s1->size : s2->size;
134+
size_t s1_size = npy_string_size(s1);
135+
size_t s2_size = npy_string_size(s2);
136+
137+
char *s1_buf = npy_string_buf(s1);
138+
char *s2_buf = npy_string_buf(s2);
91139

92-
int cmp = strncmp(s1->buf, s2->buf, minsize);
140+
size_t minsize = s1_size < s2_size ? s1_size : s2_size;
141+
142+
int cmp = strncmp(s1_buf, s2_buf, minsize);
93143

94144
if (cmp == 0) {
95-
if (s1->size > minsize) {
145+
if (s1_size > minsize) {
96146
return 1;
97147
}
98-
if (s2->size > minsize) {
148+
if (s2_size > minsize) {
99149
return -1;
100150
}
101151
}
102152

103153
return cmp;
104154
}
105155

106-
int
107-
npy_string_isnull(const npy_static_string *in)
108-
{
109-
if (in->size == 0 && in->buf == NULL) {
110-
return 1;
111-
}
112-
return 0;
113-
}
114-
115156
size_t
116157
npy_string_size(const npy_static_string *s)
117158
{
118-
return s->size;
159+
if (is_short_string(s)) {
160+
unsigned char high_byte = s->base.direct_buffer.flags_and_size;
161+
return high_byte & NPY_SHORT_STRING_SIZE_MASK;
162+
}
163+
return s->base.vstring.size;
119164
}
120165

121166
char *
122167
npy_string_buf(const npy_static_string *s)
123168
{
124-
return s->buf;
125-
}
126-
127-
int
128-
npy_string_size_and_buf(const npy_static_string *s, size_t *size, char **buf)
129-
{
130-
*size = s->size;
131-
*buf = s->buf;
132-
133-
return 0;
169+
if (is_short_string(s)) {
170+
// the cast drops const, is there a better way?
171+
return (char *)&s->base.direct_buffer.buf[0];
172+
}
173+
return s->base.vstring.buf;
134174
}

stringdtype/stringdtype/src/static_string.h

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,59 @@
44
#include "stdlib.h"
55
#include "string.h"
66

7-
typedef struct npy_static_string {
7+
#if NPY_BYTE_ORDER == NPY_LITTLE_ENDIAN
8+
9+
// the high byte in vstring.size is resolved for flags
10+
// SSSS SSSF
11+
12+
typedef struct _npy_static_string_t {
13+
char *buf;
14+
size_t size;
15+
} _npy_static_string_t;
16+
17+
typedef struct _short_string_buffer {
18+
char buf[sizeof(_npy_static_string_t) - 1];
19+
unsigned char flags_and_size;
20+
} _short_string_buffer;
21+
22+
#elif NPY_BYTE_ORDER == NPY_BIG_ENDIAN
23+
24+
// the high byte in vstring.size is resolved for flags
25+
// FSSS SSSS
26+
27+
typedef struct _npy_static_string_t {
828
size_t size;
929
char *buf;
30+
} _npy_static_string_t;
31+
32+
typedef struct _short_string_buffer {
33+
unsigned char flags_and_size;
34+
char buf[sizeof(npy_static_string_t) - 1];
35+
} _short_string_buffer;
36+
37+
#endif
38+
39+
typedef union _npy_static_string_u {
40+
_npy_static_string_t vstring;
41+
_short_string_buffer direct_buffer;
42+
} _npy_static_string_u;
43+
44+
typedef struct npy_static_string {
45+
_npy_static_string_u base;
1046
} npy_static_string;
1147

48+
// room for two more flags with values 0x20 and 0x10
49+
#define NPY_STRING_MISSING 0x80 // 1000 0000
50+
#define NPY_STRING_SHORT 0x40 // 0100 0000
51+
52+
// short string sizes fit in a 4-bit integer
53+
#define NPY_SHORT_STRING_SIZE_MASK 0x0F // 0000 1111
54+
#define NPY_SHORT_STRING_MAX_SIZE \
55+
(sizeof(npy_static_string) - 1) // 15 or 7 depending on arch
56+
57+
// one byte in size is reserved for flags and small string optimization
58+
#define MAX_STRING_SIZE (1 << (sizeof(size_t) - 1)) - 1
59+
1260
// represents the empty string and can be passed safely to npy_static_string
1361
// API functions
1462
extern const npy_static_string NPY_EMPTY_STRING;
@@ -65,9 +113,4 @@ npy_string_size(const npy_static_string *s);
65113
char *
66114
npy_string_buf(const npy_static_string *s);
67115

68-
// Fills in *size* and *buf* pointers with the values in *s*.
69-
// Currently always returns 0.
70-
int
71-
npy_string_size_and_buf(const npy_static_string *s, size_t *size, char **buf);
72-
73116
#endif /*_NPY_STATIC_STRING_H */

0 commit comments

Comments
 (0)