@@ -20,19 +20,30 @@ new_stringdtype_instance(PyObject *na_object, int coerce)
20
20
21
21
Py_XINCREF (na_object );
22
22
((StringDTypeObject * )new )-> na_object = na_object ;
23
+ npy_packed_static_string packed_na_name = * NPY_EMPTY_STRING ;
24
+ npy_packed_static_string packed_default_string = * NPY_EMPTY_STRING ;
23
25
int hasnull = na_object != NULL ;
24
26
int has_nan_na = 0 ;
25
27
int has_string_na = 0 ;
26
- ss default_string = EMPTY_STRING ;
27
28
if (hasnull ) {
28
29
// first check for a string
29
30
if (PyUnicode_Check (na_object )) {
30
31
has_string_na = 1 ;
31
32
Py_ssize_t size = 0 ;
32
33
const char * buf = PyUnicode_AsUTF8AndSize (na_object , & size );
33
- default_string .len = size ;
34
- // discards const, how to avoid?
35
- default_string .buf = (char * )buf ;
34
+ int res = npy_string_newsize (buf , (size_t )size ,
35
+ & packed_default_string );
36
+ if (res == -1 ) {
37
+ PyErr_NoMemory ();
38
+ Py_DECREF (new );
39
+ return NULL ;
40
+ }
41
+ else if (res == -2 ) {
42
+ // this should never happen
43
+ assert (0 );
44
+ Py_DECREF (new );
45
+ return NULL ;
46
+ }
36
47
}
37
48
else {
38
49
// treat as nan-like if != comparison returns a object whose truth
@@ -53,15 +64,50 @@ new_stringdtype_instance(PyObject *na_object, int coerce)
53
64
}
54
65
Py_DECREF (eq );
55
66
}
67
+ PyObject * na_pystr = PyObject_Str (na_object );
68
+ if (na_pystr == NULL ) {
69
+ Py_DECREF (new );
70
+ return NULL ;
71
+ }
72
+
73
+ Py_ssize_t size = 0 ;
74
+ const char * utf8_ptr = PyUnicode_AsUTF8AndSize (na_pystr , & size );
75
+ // discard const to initialize buffer
76
+ int res = npy_string_newsize (utf8_ptr , (size_t )size , & packed_na_name );
77
+ if (res == -1 ) {
78
+ PyErr_NoMemory ();
79
+ Py_DECREF (new );
80
+ return NULL ;
81
+ }
82
+ else if (res == -2 ) {
83
+ // this should never happen
84
+ assert (0 );
85
+ Py_DECREF (new );
86
+ return NULL ;
87
+ }
88
+ Py_DECREF (na_pystr );
56
89
}
57
- ((StringDTypeObject * )new )-> has_nan_na = has_nan_na ;
58
- ((StringDTypeObject * )new )-> has_string_na = has_string_na ;
59
- ((StringDTypeObject * )new )-> default_string = default_string ;
60
- ((StringDTypeObject * )new )-> coerce = coerce ;
90
+
91
+ StringDTypeObject * snew = (StringDTypeObject * )new ;
92
+
93
+ snew -> has_nan_na = has_nan_na ;
94
+ snew -> has_string_na = has_string_na ;
95
+ snew -> packed_default_string = packed_default_string ;
96
+ snew -> packed_na_name = packed_na_name ;
97
+ snew -> coerce = coerce ;
98
+
99
+ npy_static_string default_string = {0 , NULL };
100
+ npy_load_string (& snew -> packed_default_string , & default_string );
101
+
102
+ npy_static_string na_name = {0 , NULL };
103
+ npy_load_string (& snew -> packed_na_name , & na_name );
104
+
105
+ snew -> na_name = na_name ;
106
+ snew -> default_string = default_string ;
61
107
62
108
PyArray_Descr * base = (PyArray_Descr * )new ;
63
- base -> elsize = sizeof (ss );
64
- base -> alignment = _Alignof(ss );
109
+ base -> elsize = sizeof (npy_static_string );
110
+ base -> alignment = _Alignof(npy_static_string );
65
111
base -> flags |= NPY_NEEDS_INIT ;
66
112
base -> flags |= NPY_LIST_PICKLE ;
67
113
base -> flags |= NPY_ITEM_REFCOUNT ;
@@ -161,20 +207,19 @@ string_discover_descriptor_from_pyobject(PyTypeObject *NPY_UNUSED(cls),
161
207
int
162
208
stringdtype_setitem (StringDTypeObject * descr , PyObject * obj , char * * dataptr )
163
209
{
164
- ss * sdata = (ss * )dataptr ;
210
+ npy_packed_static_string * sdata = (npy_packed_static_string * )dataptr ;
165
211
166
212
// free if dataptr holds preexisting string data,
167
- // ssfree does a NULL check
168
- ssfree (sdata );
213
+ // npy_string_free does a NULL check and checks for small strings
214
+ npy_string_free (sdata );
169
215
170
216
// borrow reference
171
217
PyObject * na_object = descr -> na_object ;
172
218
173
219
// setting NA *must* check pointer equality since NA types might not
174
220
// allow equality
175
221
if (na_object != NULL && obj == na_object ) {
176
- // do nothing, ssfree already NULLed the struct ssdata points to
177
- // so it already contains a NA value
222
+ * sdata = * NPY_NULL_STRING ;
178
223
}
179
224
else {
180
225
PyObject * val_obj = get_value (obj , descr -> coerce );
@@ -190,8 +235,7 @@ stringdtype_setitem(StringDTypeObject *descr, PyObject *obj, char **dataptr)
190
235
return -1 ;
191
236
}
192
237
193
- // copies contents of val into item_val->buf
194
- int res = ssnewlen (val , length , sdata );
238
+ int res = npy_string_newsize (val , length , sdata );
195
239
196
240
if (res == -1 ) {
197
241
PyErr_NoMemory ();
@@ -213,10 +257,11 @@ static PyObject *
213
257
stringdtype_getitem (StringDTypeObject * descr , char * * dataptr )
214
258
{
215
259
PyObject * val_obj = NULL ;
216
- ss * sdata = (ss * )dataptr ;
260
+ npy_packed_static_string * psdata = (npy_packed_static_string * )dataptr ;
261
+ npy_static_string sdata = {0 , NULL };
217
262
int hasnull = descr -> na_object != NULL ;
218
263
219
- if (ss_isnull ( sdata )) {
264
+ if (npy_load_string ( psdata , & sdata )) {
220
265
if (hasnull ) {
221
266
PyObject * na_object = descr -> na_object ;
222
267
Py_INCREF (na_object );
@@ -227,9 +272,7 @@ stringdtype_getitem(StringDTypeObject *descr, char **dataptr)
227
272
}
228
273
}
229
274
else {
230
- char * data = sdata -> buf ;
231
- size_t len = sdata -> len ;
232
- val_obj = PyUnicode_FromStringAndSize (data , len );
275
+ val_obj = PyUnicode_FromStringAndSize (sdata .buf , sdata .size );
233
276
if (val_obj == NULL ) {
234
277
return NULL ;
235
278
}
@@ -254,7 +297,7 @@ stringdtype_getitem(StringDTypeObject *descr, char **dataptr)
254
297
npy_bool
255
298
nonzero (void * data , void * NPY_UNUSED (arr ))
256
299
{
257
- return (( ss * )data )-> len != 0 ;
300
+ return npy_string_size (( npy_packed_static_string * )data ) != 0 ;
258
301
}
259
302
260
303
// Implementation of PyArray_CompareFunc.
@@ -278,11 +321,13 @@ _compare(void *a, void *b, StringDTypeObject *descr)
278
321
return 0 ;
279
322
}
280
323
}
281
- const ss * default_string = & descr -> default_string ;
282
- const ss * ss_a = (ss * )a ;
283
- const ss * ss_b = (ss * )b ;
284
- int a_is_null = ss_isnull (ss_a );
285
- int b_is_null = ss_isnull (ss_b );
324
+ npy_static_string * default_string = & descr -> default_string ;
325
+ const npy_packed_static_string * ps_a = (npy_packed_static_string * )a ;
326
+ npy_static_string s_a = {0 , NULL };
327
+ int a_is_null = npy_load_string (ps_a , & s_a );
328
+ const npy_packed_static_string * ps_b = (npy_packed_static_string * )b ;
329
+ npy_static_string s_b = {0 , NULL };
330
+ int b_is_null = npy_load_string (ps_b , & s_b );
286
331
if (NPY_UNLIKELY (a_is_null || b_is_null )) {
287
332
if (hasnull && !has_string_na ) {
288
333
if (has_nan_na ) {
@@ -303,22 +348,22 @@ _compare(void *a, void *b, StringDTypeObject *descr)
303
348
}
304
349
else {
305
350
if (a_is_null ) {
306
- ss_a = default_string ;
351
+ s_a = * default_string ;
307
352
}
308
353
if (b_is_null ) {
309
- ss_b = default_string ;
354
+ s_b = * default_string ;
310
355
}
311
356
}
312
357
}
313
- return sscmp ( ss_a , ss_b );
358
+ return npy_string_cmp ( & s_a , & s_b );
314
359
}
315
360
316
361
// PyArray_ArgFunc
317
362
// The max element is the one with the highest unicode code point.
318
363
int
319
364
argmax (void * data , npy_intp n , npy_intp * max_ind , void * arr )
320
365
{
321
- ss * dptr = (ss * )data ;
366
+ npy_packed_static_string * dptr = (npy_packed_static_string * )data ;
322
367
* max_ind = 0 ;
323
368
for (int i = 1 ; i < n ; i ++ ) {
324
369
if (compare (& dptr [i ], & dptr [* max_ind ], arr ) > 0 ) {
@@ -333,7 +378,7 @@ argmax(void *data, npy_intp n, npy_intp *max_ind, void *arr)
333
378
int
334
379
argmin (void * data , npy_intp n , npy_intp * min_ind , void * arr )
335
380
{
336
- ss * dptr = (ss * )data ;
381
+ npy_packed_static_string * dptr = (npy_packed_static_string * )data ;
337
382
* min_ind = 0 ;
338
383
for (int i = 1 ; i < n ; i ++ ) {
339
384
if (compare (& dptr [i ], & dptr [* min_ind ], arr ) < 0 ) {
@@ -358,8 +403,8 @@ stringdtype_clear_loop(void *NPY_UNUSED(traverse_context),
358
403
{
359
404
while (size -- ) {
360
405
if (data != NULL ) {
361
- ssfree (( ss * )data );
362
- memset (data , 0 , sizeof (ss ));
406
+ npy_string_free (( npy_packed_static_string * )data );
407
+ memset (data , 0 , sizeof (npy_packed_static_string ));
363
408
}
364
409
data += stride ;
365
410
}
@@ -388,9 +433,7 @@ stringdtype_fill_zero_loop(void *NPY_UNUSED(traverse_context),
388
433
NpyAuxData * NPY_UNUSED (auxdata ))
389
434
{
390
435
while (size -- ) {
391
- if (ssnewlen ("" , 0 , (ss * )(data )) < 0 ) {
392
- return -1 ;
393
- }
436
+ * (npy_packed_static_string * )(data ) = * NPY_EMPTY_STRING ;
394
437
data += stride ;
395
438
}
396
439
return 0 ;
@@ -538,6 +581,9 @@ stringdtype_new(PyTypeObject *NPY_UNUSED(cls), PyObject *args, PyObject *kwds)
538
581
static void
539
582
stringdtype_dealloc (StringDTypeObject * self )
540
583
{
584
+ Py_XDECREF (self -> na_object );
585
+ npy_string_free (& self -> packed_default_string );
586
+ npy_string_free (& self -> packed_na_name );
541
587
PyArrayDescr_Type .tp_dealloc ((PyObject * )self );
542
588
}
543
589
0 commit comments