Skip to content

Commit 74e97fd

Browse files
committed
access string internals via functions
1 parent 7ea409b commit 74e97fd

File tree

6 files changed

+96
-36
lines changed

6 files changed

+96
-36
lines changed

stringdtype/stringdtype/src/casts.c

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ unicode_to_string(PyArrayMethod_Context *context, char *const data[],
213213
gil_error(PyExc_MemoryError, "npy_string_newemptysize failed");
214214
return -1;
215215
}
216-
char *out_buf = out_ss->buf;
216+
char *out_buf = npy_string_buf(out_ss);
217217
for (size_t i = 0; i < num_codepoints; i++) {
218218
// get code point
219219
Py_UCS4 code = in[i];
@@ -338,28 +338,28 @@ string_to_unicode(PyArrayMethod_Context *context, char *const data[],
338338
s = (npy_static_string *)in;
339339
unsigned char *this_string = NULL;
340340
size_t n_bytes;
341+
const npy_static_string *name = NULL;
341342
if (npy_string_isnull(s)) {
342343
if (has_null && !has_string_na) {
343344
// lossy but not much else we can do
344-
this_string = (unsigned char *)descr->na_name.buf;
345-
n_bytes = descr->na_name.size;
345+
name = &descr->na_name;
346346
}
347347
else {
348-
this_string = (unsigned char *)(default_string.buf);
349-
n_bytes = default_string.size;
348+
name = &default_string;
350349
}
351350
}
352351
else {
353-
this_string = (unsigned char *)(s->buf);
354-
n_bytes = s->size;
352+
name = s;
355353
}
354+
355+
this_string = (unsigned char *)npy_string_buf(name);
356+
n_bytes = npy_string_size(name);
356357
size_t tot_n_bytes = 0;
357358

358359
for (int i = 0; i < max_out_size; i++) {
359360
Py_UCS4 code;
360361

361-
// get code point for character this_string is currently pointing
362-
// too
362+
// code point for character this_string is currently pointing at
363363
size_t num_bytes =
364364
utf8_char_to_ucs4_code(this_string, n_bytes, &code);
365365

@@ -440,10 +440,10 @@ string_to_bool(PyArrayMethod_Context *context, char *const data[],
440440
*out = (npy_bool)1;
441441
}
442442
else {
443-
*out = (npy_bool)(default_string.size == 0);
443+
*out = (npy_bool)(npy_string_size(&default_string) == 0);
444444
}
445445
}
446-
else if (s->size == 0) {
446+
else if (npy_string_size(s) == 0) {
447447
*out = (npy_bool)0;
448448
}
449449
else {
@@ -527,7 +527,8 @@ string_to_pylong(char *in, int hasnull)
527527
}
528528
s = &EMPTY_STRING;
529529
}
530-
PyObject *val_obj = PyUnicode_FromStringAndSize(s->buf, s->size);
530+
PyObject *val_obj =
531+
PyUnicode_FromStringAndSize(npy_string_buf(s), npy_string_size(s));
531532
if (val_obj == NULL) {
532533
return NULL;
533534
}
@@ -779,7 +780,8 @@ string_to_pyfloat(char *in, int hasnull)
779780
}
780781
s = &EMPTY_STRING;
781782
}
782-
PyObject *val_obj = PyUnicode_FromStringAndSize(s->buf, s->size);
783+
PyObject *val_obj =
784+
PyUnicode_FromStringAndSize(npy_string_buf(s), npy_string_size(s));
783785
if (val_obj == NULL) {
784786
return NULL;
785787
}
@@ -1004,8 +1006,9 @@ string_to_datetime(PyArrayMethod_Context *context, char *const data[],
10041006
s = &default_string;
10051007
}
10061008
if (NpyDatetime_ParseISO8601Datetime(
1007-
(const char *)s->buf, s->size, in_unit, NPY_UNSAFE_CASTING,
1008-
&dts, &in_meta.base, &out_special) < 0) {
1009+
(const char *)npy_string_buf(s), npy_string_size(s),
1010+
in_unit, NPY_UNSAFE_CASTING, &dts, &in_meta.base,
1011+
&out_special) < 0) {
10091012
return -1;
10101013
}
10111014
if (NpyDatetime_ConvertDatetimeStructToDatetime64(dt_meta, &dts, out) <

stringdtype/stringdtype/src/dtype.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,6 @@ stringdtype_setitem(StringDTypeObject *descr, PyObject *obj, char **dataptr)
223223
return -1;
224224
}
225225

226-
// copies contents of val into item_val->buf
227226
int res = npy_string_newsize(val, length, sdata);
228227

229228
if (res == -1) {
@@ -260,8 +259,8 @@ stringdtype_getitem(StringDTypeObject *descr, char **dataptr)
260259
}
261260
}
262261
else {
263-
char *data = sdata->buf;
264-
size_t size = sdata->size;
262+
char *data = npy_string_buf(sdata);
263+
size_t size = npy_string_size(sdata);
265264
val_obj = PyUnicode_FromStringAndSize(data, size);
266265
if (val_obj == NULL) {
267266
return NULL;
@@ -287,7 +286,7 @@ stringdtype_getitem(StringDTypeObject *descr, char **dataptr)
287286
npy_bool
288287
nonzero(void *data, void *NPY_UNUSED(arr))
289288
{
290-
return ((npy_static_string *)data)->size != 0;
289+
return npy_string_size((npy_static_string *)data) != 0;
291290
}
292291

293292
// Implementation of PyArray_CompareFunc.

stringdtype/stringdtype/src/main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ _memory_usage(PyObject *NPY_UNUSED(self), PyObject *obj)
5858
npy_intp count = *innersizeptr;
5959

6060
while (count--) {
61-
memory_usage += ((npy_static_string *)in)->size;
61+
memory_usage += npy_string_size(((npy_static_string *)in));
6262
in += stride;
6363
}
6464

stringdtype/stringdtype/src/static_string.c

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ const npy_static_string NULL_STRING = {0, NULL};
1111
int
1212
npy_string_newsize(const char *init, size_t size, npy_static_string *to_init)
1313
{
14-
if ((to_init == NULL) || (to_init->buf != NULL) || (to_init->size != 0)) {
14+
if ((to_init == NULL) || (to_init->buf != NULL) ||
15+
(npy_string_size(to_init) != 0)) {
1516
return -2;
1617
}
1718

@@ -110,3 +111,24 @@ npy_string_isnull(const npy_static_string *in)
110111
}
111112
return 0;
112113
}
114+
115+
size_t
116+
npy_string_size(const npy_static_string *s)
117+
{
118+
return s->size;
119+
}
120+
121+
char *
122+
npy_string_buf(const npy_static_string *s)
123+
{
124+
return s->buf;
125+
}
126+
127+
int
128+
npy_string_size_and_buf(const npy_static_string *s, size_t *size, char **buf)
129+
{
130+
*size = s->size;
131+
*buf = s->buf;
132+
133+
return 0;
134+
}

stringdtype/stringdtype/src/static_string.h

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,18 @@ extern const npy_static_string EMPTY_STRING;
1717
// working with it.
1818
extern const npy_static_string NULL_STRING;
1919

20-
// Allocates a new buffer for *to_init*, filling with the copied contents of
21-
// the first *size*entries in *init*, which must be valid and initialized
22-
// beforehand, and sets *to_init->size* to *size*. Returns -1 if malloc fails
23-
// and -2 if *to_init* is not NULL. Returns 0 on success.
20+
// Allocates a new buffer for *to_init*, which must be set to NULL before
21+
// calling this function, filling the newly allocated buffer with the copied
22+
// contents of the first *size* entries in *init*, which must be valid and
23+
// initialized beforehand. Calling npy_string_free on *to_init* before calling
24+
// this function on an existing string is sufficient to initialize it. Returns
25+
// -1 if malloc fails and -2 if the internal buffer in *to_init* is not NULL
26+
// to indicate a programming error. Returns 0 on success.
2427
int
2528
npy_string_newsize(const char *init, size_t size, npy_static_string *to_init);
2629

27-
// Sets len to 0 and if str->buf is not already NULL, frees it and sets it to
28-
// NULL. Cannot fail.
30+
// Sets len to 0 and if the internal buffer is not already NULL, frees it if
31+
// it is allocated on the heap and sets it to NULL. Cannot fail.
2932
void
3033
npy_string_free(npy_static_string *str);
3134

@@ -54,4 +57,17 @@ npy_string_isnull(const npy_static_string *in);
5457
int
5558
npy_string_cmp(const npy_static_string *s1, const npy_static_string *s2);
5659

60+
// Returns the *size* of *s*
61+
size_t
62+
npy_string_size(const npy_static_string *s);
63+
64+
// Returns the string *buf* of *s*. This is not a null-terminated buffer.
65+
char *
66+
npy_string_buf(const npy_static_string *s);
67+
68+
// Fills in *size* and *buf* pointers with the values in *s*.
69+
// Currently always returns 0.
70+
int
71+
npy_string_size_and_buf(const npy_static_string *s, size_t *size, char **buf);
72+
5773
#endif /*_NPY_STATIC_STRING_H */

stringdtype/stringdtype/src/umath.c

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,9 @@ multiply_resolve_descriptors(
6767
} \
6868
} \
6969
npy_##shortname factor = *(npy_##shortname *)iin; \
70-
size_t newsize = (size_t)((is->size) * factor); \
70+
size_t cursize = npy_string_size(is); \
71+
/* FIXME: check for overflow? */ \
72+
size_t newsize = cursize * factor; \
7173
\
7274
if (npy_string_newemptysize(newsize, os) < 0) { \
7375
gil_error(PyExc_MemoryError, \
@@ -76,7 +78,8 @@ multiply_resolve_descriptors(
7678
} \
7779
\
7880
for (size_t i = 0; i < (size_t)factor; i++) { \
79-
memcpy(os->buf + i * is->size, is->buf, is->size); \
81+
memcpy(npy_string_buf(os) + i * cursize, npy_string_buf(is), \
82+
cursize); \
8083
} \
8184
\
8285
sin += s_stride; \
@@ -215,7 +218,6 @@ add_strided_loop(PyArrayMethod_Context *context, char *const data[],
215218
npy_static_string *os = NULL;
216219

217220
while (N--) {
218-
int newsize = 0;
219221
s1 = (npy_static_string *)in1;
220222
s2 = (npy_static_string *)in2;
221223
int s1_isnull = npy_string_isnull(s1);
@@ -240,13 +242,20 @@ add_strided_loop(PyArrayMethod_Context *context, char *const data[],
240242
"Cannot add null that is not a nan-like value");
241243
}
242244
}
243-
newsize = s1->size + s2->size;
244-
if (npy_string_newemptysize(newsize, os) < 0) {
245+
246+
size_t s1_size = npy_string_size(s1);
247+
size_t s2_size = npy_string_size(s2);
248+
249+
if (npy_string_newemptysize(s1_size + s2_size, os) < 0) {
245250
return -1;
246251
}
247252

248-
memcpy(os->buf, s1->buf, s1->size);
249-
memcpy(os->buf + s1->size, s2->buf, s2->size);
253+
char *os_buf = npy_string_buf(os);
254+
char *s1_buf = npy_string_buf(s1);
255+
char *s2_buf = npy_string_buf(s2);
256+
257+
memcpy(os_buf, s1_buf, s1_size);
258+
memcpy(os_buf + s1_size, s2_buf, s2_size);
250259

251260
next_step:
252261
in1 += in1_stride;
@@ -385,7 +394,11 @@ string_equal_strided_loop(PyArrayMethod_Context *context, char *const data[],
385394
}
386395
}
387396
}
388-
if (s1->size == s2->size && strncmp(s1->buf, s2->buf, s1->size) == 0) {
397+
char *s1_buf = npy_string_buf(s1);
398+
char *s2_buf = npy_string_buf(s2);
399+
size_t s1_size = npy_string_size(s1);
400+
size_t s2_size = npy_string_size(s2);
401+
if (s1_size == s2_size && strncmp(s1_buf, s2_buf, s1_size) == 0) {
389402
*out = (npy_bool)1;
390403
}
391404
else {
@@ -450,7 +463,14 @@ string_not_equal_strided_loop(PyArrayMethod_Context *context,
450463
}
451464
}
452465
}
453-
if (s1->size == s2->size && strncmp(s1->buf, s2->buf, s1->size) == 0) {
466+
467+
size_t s1_size = npy_string_size(s1);
468+
size_t s2_size = npy_string_size(s2);
469+
470+
char *s1_buf = npy_string_buf(s1);
471+
char *s2_buf = npy_string_buf(s2);
472+
473+
if (s1_size == s2_size && strncmp(s1_buf, s2_buf, s1_size) == 0) {
454474
*out = (npy_bool)0;
455475
}
456476
else {

0 commit comments

Comments
 (0)