2
2
3
3
#include "static_string.h"
4
4
5
- // defined this way so NPY_EMPTY_STRING has an in-memory representation that is
6
- // distinct from a zero-filled struct, allowing us to use a NPY_NULL_STRING
7
- // to represent a sentinel value
8
- const npy_static_string NPY_EMPTY_STRING = {0 , "\0" };
9
- const npy_static_string NPY_NULL_STRING = {0 , NULL };
5
+ // Since this has no flags set, technically this is a heap-allocated string
6
+ // with size zero practically, that doesn't matter because we always do size
7
+ // checks before accessing heap data, but that may be confusing. The nice part
8
+ // of this choice is a calloc'd array buffer (e.g. from np.empty) is filled
9
+ // with empty elements for free
10
+ const npy_static_string NPY_EMPTY_STRING = {
11
+ .base = {.direct_buffer = {.flags_and_size = 0 , .buf = {0 }}}};
12
+ // zero-filled, but with the NULL flag set to distinguish from empty string
13
+ const npy_static_string NPY_NULL_STRING = {
14
+ .base = {.direct_buffer = {.flags_and_size = NPY_STRING_MISSING ,
15
+ .buf = {0 }}}};
16
+
17
+ int
18
+ is_short_string (const npy_static_string * s )
19
+ {
20
+ unsigned char high_byte = s -> base .direct_buffer .flags_and_size ;
21
+ return (high_byte & NPY_STRING_SHORT ) == NPY_STRING_SHORT ;
22
+ }
23
+
24
+ int
25
+ npy_string_isnull (const npy_static_string * s )
26
+ {
27
+ unsigned char high_byte = s -> base .direct_buffer .flags_and_size ;
28
+ return (high_byte & NPY_STRING_MISSING ) == NPY_STRING_MISSING ;
29
+ }
30
+
31
+ int
32
+ is_not_a_vstring (const npy_static_string * s )
33
+ {
34
+ return is_short_string (s ) || npy_string_isnull (s );
35
+ }
10
36
11
37
int
12
38
npy_string_newsize (const char * init , size_t size , npy_static_string * to_init )
13
39
{
14
- if (( to_init == NULL ) || (to_init -> buf != NULL ) ||
15
- ( npy_string_size ( to_init ) != 0 ) ) {
40
+ if (to_init == NULL || npy_string_size (to_init ) != 0 ||
41
+ size > MAX_STRING_SIZE ) {
16
42
return -2 ;
17
43
}
18
44
@@ -21,114 +47,128 @@ npy_string_newsize(const char *init, size_t size, npy_static_string *to_init)
21
47
return 0 ;
22
48
}
23
49
24
- char * ret_buf = (char * )PyMem_RawMalloc (sizeof (char ) * size );
25
-
26
- if (ret_buf == NULL ) {
27
- return -1 ;
28
- }
29
-
30
- to_init -> size = size ;
50
+ if (size > NPY_SHORT_STRING_MAX_SIZE ) {
51
+ char * ret_buf = (char * )PyMem_RawMalloc (sizeof (char ) * size );
31
52
32
- memcpy (ret_buf , init , size );
53
+ if (ret_buf == NULL ) {
54
+ return -1 ;
55
+ }
33
56
34
- to_init -> buf = ret_buf ;
57
+ to_init -> base . vstring . size = size ;
35
58
36
- return 0 ;
37
- }
59
+ memcpy (ret_buf , init , size );
38
60
39
- void
40
- npy_string_free (npy_static_string * str )
41
- {
42
- if (str -> buf != NULL && str -> buf != NPY_EMPTY_STRING .buf ) {
43
- PyMem_RawFree (str -> buf );
44
- str -> buf = NULL ;
45
- }
46
- str -> size = 0 ;
47
- }
48
-
49
- int
50
- npy_string_dup (const npy_static_string * in , npy_static_string * out )
51
- {
52
- if (npy_string_isnull (in )) {
53
- out -> size = 0 ;
54
- out -> buf = NULL ;
55
- return 0 ;
61
+ to_init -> base .vstring .buf = ret_buf ;
56
62
}
57
63
else {
58
- return npy_string_newsize (in -> buf , in -> size , out );
64
+ // size can be no longer than 7 or 15, depending on CPU architecture
65
+ // in either case, the size data is in at most the least significant 4
66
+ // bits of the byte so it's safe to | with one of 0x10, 0x20, 0x40, or
67
+ // 0x80.
68
+ to_init -> base .direct_buffer .flags_and_size = NPY_STRING_SHORT | size ;
69
+ memcpy (& (to_init -> base .direct_buffer .buf ), init , size );
59
70
}
71
+
72
+ return 0 ;
60
73
}
61
74
62
75
int
63
76
npy_string_newemptysize (size_t size , npy_static_string * out )
64
77
{
65
- if (out -> size != 0 || out -> buf != NULL ) {
78
+ if (out == NULL || npy_string_size ( out ) != 0 || size > MAX_STRING_SIZE ) {
66
79
return -2 ;
67
80
}
68
81
69
- out -> size = size ;
70
-
71
82
if (size == 0 ) {
72
83
* out = NPY_EMPTY_STRING ;
73
84
return 0 ;
74
85
}
75
86
76
- char * buf = (char * )PyMem_RawMalloc (sizeof (char ) * size );
87
+ if (size > NPY_SHORT_STRING_MAX_SIZE ) {
88
+ char * buf = (char * )PyMem_RawMalloc (sizeof (char ) * size );
77
89
78
- if (buf == NULL ) {
79
- return -1 ;
80
- }
90
+ if (buf == NULL ) {
91
+ return -1 ;
92
+ }
81
93
82
- out -> buf = buf ;
94
+ out -> base .vstring .buf = buf ;
95
+ out -> base .vstring .size = size ;
96
+ }
97
+ else {
98
+ out -> base .direct_buffer .flags_and_size = NPY_STRING_SHORT | size ;
99
+ }
83
100
84
101
return 0 ;
85
102
}
86
103
104
+ void
105
+ npy_string_free (npy_static_string * str )
106
+ {
107
+ if (is_not_a_vstring (str )) {
108
+ // zero out
109
+ memcpy (str , & NPY_EMPTY_STRING , sizeof (npy_static_string ));
110
+ }
111
+ else {
112
+ if (str -> base .vstring .size != 0 ) {
113
+ PyMem_RawFree (str -> base .vstring .buf );
114
+ }
115
+ str -> base .vstring .buf = NULL ;
116
+ str -> base .vstring .size = 0 ;
117
+ }
118
+ }
119
+
120
+ int
121
+ npy_string_dup (const npy_static_string * in , npy_static_string * out )
122
+ {
123
+ if (npy_string_isnull (in )) {
124
+ * out = NPY_NULL_STRING ;
125
+ return 0 ;
126
+ }
127
+
128
+ return npy_string_newsize (npy_string_buf (in ), npy_string_size (in ), out );
129
+ }
130
+
87
131
int
88
132
npy_string_cmp (const npy_static_string * s1 , const npy_static_string * s2 )
89
133
{
90
- size_t minsize = s1 -> size < s2 -> size ? s1 -> size : s2 -> size ;
134
+ size_t s1_size = npy_string_size (s1 );
135
+ size_t s2_size = npy_string_size (s2 );
136
+
137
+ char * s1_buf = npy_string_buf (s1 );
138
+ char * s2_buf = npy_string_buf (s2 );
91
139
92
- int cmp = strncmp (s1 -> buf , s2 -> buf , minsize );
140
+ size_t minsize = s1_size < s2_size ? s1_size : s2_size ;
141
+
142
+ int cmp = strncmp (s1_buf , s2_buf , minsize );
93
143
94
144
if (cmp == 0 ) {
95
- if (s1 -> size > minsize ) {
145
+ if (s1_size > minsize ) {
96
146
return 1 ;
97
147
}
98
- if (s2 -> size > minsize ) {
148
+ if (s2_size > minsize ) {
99
149
return -1 ;
100
150
}
101
151
}
102
152
103
153
return cmp ;
104
154
}
105
155
106
- int
107
- npy_string_isnull (const npy_static_string * in )
108
- {
109
- if (in -> size == 0 && in -> buf == NULL ) {
110
- return 1 ;
111
- }
112
- return 0 ;
113
- }
114
-
115
156
size_t
116
157
npy_string_size (const npy_static_string * s )
117
158
{
118
- return s -> size ;
159
+ if (is_short_string (s )) {
160
+ unsigned char high_byte = s -> base .direct_buffer .flags_and_size ;
161
+ return high_byte & NPY_SHORT_STRING_SIZE_MASK ;
162
+ }
163
+ return s -> base .vstring .size ;
119
164
}
120
165
121
166
char *
122
167
npy_string_buf (const npy_static_string * s )
123
168
{
124
- return s -> buf ;
125
- }
126
-
127
- int
128
- npy_string_size_and_buf (const npy_static_string * s , size_t * size , char * * buf )
129
- {
130
- * size = s -> size ;
131
- * buf = s -> buf ;
132
-
133
- return 0 ;
169
+ if (is_short_string (s )) {
170
+ // the cast drops const, is there a better way?
171
+ return (char * )& s -> base .direct_buffer .buf [0 ];
172
+ }
173
+ return s -> base .vstring .buf ;
134
174
}
0 commit comments