Skip to content

Commit 3240d93

Browse files
authored
Merge pull request #88 from ngoldbaum/api-cleanup
Clean up error handling and int overflow
2 parents 7c5bcbb + c170413 commit 3240d93

File tree

6 files changed

+71
-59
lines changed

6 files changed

+71
-59
lines changed

stringdtype/stringdtype/src/casts.c

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,8 @@ unicode_to_string(PyArrayMethod_Context *context, char *const data[],
207207
npy_packed_static_string *out_pss = (npy_packed_static_string *)out;
208208
npy_string_free(out_pss);
209209
if (npy_string_newemptysize(out_num_bytes, out_pss) < 0) {
210-
gil_error(PyExc_MemoryError, "npy_string_newemptysize failed");
210+
gil_error(PyExc_MemoryError,
211+
"Failed to allocate string in unicode to string cast");
211212
return -1;
212213
}
213214
npy_static_string out_ss = {0, NULL};
@@ -336,7 +337,7 @@ string_to_unicode(PyArrayMethod_Context *context, char *const data[],
336337
while (N--) {
337338
const npy_packed_static_string *ps = (npy_packed_static_string *)in;
338339
npy_static_string s = {0, NULL};
339-
npy_static_string name = {0, NULL};
340+
npy_static_string name;
340341
unsigned char *this_string = NULL;
341342
size_t n_bytes;
342343
int is_null = npy_load_string(ps, &s);
@@ -481,23 +482,28 @@ bool_to_string(PyArrayMethod_Context *NPY_UNUSED(context), char *const data[],
481482
while (N--) {
482483
npy_packed_static_string *out_pss = (npy_packed_static_string *)out;
483484
npy_string_free(out_pss);
485+
char *ret_val = NULL;
486+
size_t size = 0;
484487
if ((npy_bool)(*in) == 1) {
485-
if (npy_string_newsize("True", 4, out_pss) < 0) {
486-
gil_error(PyExc_MemoryError, "npy_string_newsize failed");
487-
return -1;
488-
}
488+
ret_val = "True";
489+
size = 4;
489490
}
490491
else if ((npy_bool)(*in) == 0) {
491-
if (npy_string_newsize("False", 5, out_pss) < 0) {
492-
gil_error(PyExc_MemoryError, "npy_string_newsize failed");
493-
return -1;
494-
}
492+
ret_val = "False";
493+
size = 5;
495494
}
496495
else {
497496
gil_error(PyExc_RuntimeError,
498497
"invalid value encountered in bool to string cast");
499498
return -1;
500499
}
500+
if (npy_string_newsize(ret_val, size, out_pss) < 0) {
501+
// execution should never get here because this will be a small
502+
// string on all platforms
503+
gil_error(PyExc_MemoryError,
504+
"Failed to allocate string in bool to string cast");
505+
return -1;
506+
}
501507
in += in_stride;
502508
out += out_stride;
503509
}
@@ -593,7 +599,9 @@ pyobj_to_string(PyObject *obj, char *out)
593599
npy_packed_static_string *out_ss = (npy_packed_static_string *)out;
594600
npy_string_free(out_ss);
595601
if (npy_string_newsize(cstr_val, length, out_ss) < 0) {
596-
PyErr_SetString(PyExc_MemoryError, "npy_string_newsize failed");
602+
PyErr_SetString(PyExc_MemoryError,
603+
"Failed to allocate numpy string when converting from "
604+
"python string.");
597605
Py_DECREF(pystr_val);
598606
return -1;
599607
}
@@ -1087,7 +1095,8 @@ datetime_to_string(PyArrayMethod_Context *context, char *const data[],
10871095
if (npy_string_newsize(datetime_buf, strlen(datetime_buf),
10881096
out_pss) < 0) {
10891097
PyErr_SetString(PyExc_MemoryError,
1090-
"npy_string_newsize failed");
1098+
"Failed to allocate string when converting "
1099+
"from a datetime.");
10911100
return -1;
10921101
}
10931102
}

stringdtype/stringdtype/src/dtype.c

Lines changed: 5 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -31,19 +31,12 @@ new_stringdtype_instance(PyObject *na_object, int coerce)
3131
has_string_na = 1;
3232
Py_ssize_t size = 0;
3333
const char *buf = PyUnicode_AsUTF8AndSize(na_object, &size);
34-
int res = npy_string_newsize(buf, (size_t)size,
35-
&packed_default_string);
36-
if (res == -1) {
34+
if (npy_string_newsize(buf, (size_t)size, &packed_default_string) <
35+
0) {
3736
PyErr_NoMemory();
3837
Py_DECREF(new);
3938
return NULL;
4039
}
41-
else if (res == -2) {
42-
// this should never happen
43-
assert(0);
44-
Py_DECREF(new);
45-
return NULL;
46-
}
4740
}
4841
else {
4942
// treat as nan-like if != comparison returns a object whose truth
@@ -72,17 +65,10 @@ new_stringdtype_instance(PyObject *na_object, int coerce)
7265

7366
Py_ssize_t size = 0;
7467
const char *utf8_ptr = PyUnicode_AsUTF8AndSize(na_pystr, &size);
75-
// discard const to initialize buffer
76-
int res = npy_string_newsize(utf8_ptr, (size_t)size, &packed_na_name);
77-
if (res == -1) {
68+
if (npy_string_newsize(utf8_ptr, (size_t)size, &packed_na_name)) {
7869
PyErr_NoMemory();
7970
Py_DECREF(new);
80-
return NULL;
81-
}
82-
else if (res == -2) {
83-
// this should never happen
84-
assert(0);
85-
Py_DECREF(new);
71+
Py_DECREF(na_pystr);
8672
return NULL;
8773
}
8874
Py_DECREF(na_pystr);
@@ -235,19 +221,11 @@ stringdtype_setitem(StringDTypeObject *descr, PyObject *obj, char **dataptr)
235221
return -1;
236222
}
237223

238-
int res = npy_string_newsize(val, length, sdata);
239-
240-
if (res == -1) {
224+
if (npy_string_newsize(val, length, sdata) < 0) {
241225
PyErr_NoMemory();
242226
Py_DECREF(val_obj);
243227
return -1;
244228
}
245-
else if (res == -2) {
246-
// this should never happen
247-
assert(0);
248-
Py_DECREF(val_obj);
249-
return -1;
250-
}
251229
}
252230

253231
return 0;

stringdtype/stringdtype/src/static_string.c

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,6 @@ typedef union _npy_static_string_u {
4848
#define NPY_SHORT_STRING_MAX_SIZE \
4949
(sizeof(npy_static_string) - 1) // 15 or 7 depending on arch
5050

51-
// one byte in size is reserved for flags and small string optimization
52-
#define NPY_MAX_STRING_SIZE (1 << (sizeof(size_t) - 1)) - 1
53-
5451
// Since this has no flags set, technically this is a heap-allocated string
5552
// with size zero. Practically, that doesn't matter because we always do size
5653
// checks before accessing heap data, but that may be confusing. The nice part
@@ -118,15 +115,15 @@ int
118115
npy_string_newsize(const char *init, size_t size,
119116
npy_packed_static_string *to_init)
120117
{
121-
if (to_init == NULL || size > NPY_MAX_STRING_SIZE) {
122-
return -2;
123-
}
124-
125118
if (size == 0) {
126119
*to_init = *NPY_EMPTY_STRING;
127120
return 0;
128121
}
129122

123+
if (size > NPY_MAX_STRING_SIZE) {
124+
return -1;
125+
}
126+
130127
_npy_static_string_u *to_init_u = ((_npy_static_string_u *)to_init);
131128

132129
if (size > NPY_SHORT_STRING_MAX_SIZE) {
@@ -157,15 +154,15 @@ npy_string_newsize(const char *init, size_t size,
157154
int
158155
npy_string_newemptysize(size_t size, npy_packed_static_string *out)
159156
{
160-
if (out == NULL || size > NPY_MAX_STRING_SIZE) {
161-
return -2;
162-
}
163-
164157
if (size == 0) {
165158
*out = *NPY_EMPTY_STRING;
166159
return 0;
167160
}
168161

162+
if (size > NPY_MAX_STRING_SIZE) {
163+
return -1;
164+
}
165+
169166
_npy_static_string_u *out_u = (_npy_static_string_u *)out;
170167

171168
if (size > NPY_SHORT_STRING_MAX_SIZE) {

stringdtype/stringdtype/src/static_string.h

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,20 @@ extern const npy_packed_static_string *NPY_EMPTY_STRING;
2121
// working with it.
2222
extern const npy_packed_static_string *NPY_NULL_STRING;
2323

24+
// one byte in size is reserved for flags and small string optimization
25+
#define NPY_MAX_STRING_SIZE (1 << (sizeof(size_t) - 1)) - 1
26+
2427
// Allocates a new buffer for *to_init*, which must be set to NULL before
2528
// calling this function, filling the newly allocated buffer with the copied
2629
// contents of the first *size* entries in *init*, which must be valid and
2730
// initialized beforehand. Calling npy_string_free on *to_init* before calling
28-
// this function on an existing string is sufficient to initialize it. Returns
29-
// -1 if malloc fails and -2 if the internal buffer in *to_init* is not NULL
30-
// to indicate a programming error. Returns 0 on success.
31+
// this function on an existing string or copying the contents of
32+
// NPY_EMPTY_STRING into *to_init* is sufficient to initialize it. Does not
33+
// check if *to_init* is NULL or if the internal buffer is non-NULL, undefined
34+
// behavior or memory leaks are possible if this function is passed a pointer
35+
// to a an unintialized struct, a NULL pointer, or an existing heap-allocated
36+
// string. Returns -1 if allocating the string would exceed the maximum
37+
// allowed string size or exhaust available memory. Returns 0 on success.
3138
int
3239
npy_string_newsize(const char *init, size_t size,
3340
npy_packed_static_string *to_init);
@@ -47,8 +54,15 @@ npy_string_dup(const npy_packed_static_string *in,
4754

4855
// Allocates a new string buffer for *out* with enough capacity to store
4956
// *size* bytes of text. Does not do any initialization, the caller must
50-
// initialize the string buffer. Returns -1 if malloc fails and -2 if *out* is
51-
// not NULL. Returns 0 on success.
57+
// initialize the string buffer after this function returns. Calling
58+
// npy_string_free on *to_init* before calling this function on an existing
59+
// string or copying the contents of NPY_EMPTY_STRING into *to_init* is
60+
// sufficient to initialize it.Does not check if *to_init* is NULL or if the
61+
// internal buffer is non-NULL, undefined behavior or memory leaks are
62+
// possible if this function is passed a pointer to a an unintialized struct,
63+
// a NULL pointer, or an existing heap-allocated string. Returns 0 on
64+
// success. Returns -1 if allocating the string would exceed the maximum
65+
// allowed string size or exhaust available memory. Returns 0 on success.
5266
int
5367
npy_string_newemptysize(size_t size, npy_packed_static_string *out);
5468

stringdtype/stringdtype/src/umath.c

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,19 +68,21 @@ multiply_resolve_descriptors(
6868
} \
6969
npy_##shortname factor = *(npy_##shortname *)iin; \
7070
size_t cursize = is.size; \
71-
/* FIXME: check for overflow? */ \
7271
size_t newsize = cursize * factor; \
73-
\
74-
if (npy_string_newemptysize(newsize, ops) < 0) { \
72+
/* newsize can only be less than cursize if there is overflow */ \
73+
if (((newsize < cursize) || \
74+
npy_string_newemptysize(newsize, ops) < 0)) { \
7575
gil_error(PyExc_MemoryError, \
76-
"npy_string_newemptysize failed"); \
76+
"Failed to allocate string in string mutiply"); \
7777
return -1; \
7878
} \
7979
\
8080
npy_static_string os = {0, NULL}; \
8181
npy_load_string(ops, &os); \
8282
for (size_t i = 0; i < (size_t)factor; i++) { \
8383
/* excplicitly discard const; initializing new buffer */ \
84+
/* multiply can't overflow because cursize * factor */ \
85+
/* has already been checked and doesn't overflow */ \
8486
memcpy((char *)os.buf + i * cursize, is.buf, cursize); \
8587
} \
8688
\
@@ -245,6 +247,12 @@ add_strided_loop(PyArrayMethod_Context *context, char *const data[],
245247
}
246248
}
247249

250+
if ((s1.size + s2.size) < s1.size) {
251+
// overflow
252+
gil_error(PyExc_MemoryError,
253+
"Failed to allocate string in string add");
254+
}
255+
248256
if (npy_string_newemptysize(s1.size + s2.size, ops) < 0) {
249257
return -1;
250258
}

stringdtype/tests/test_stringdtype.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -700,3 +700,9 @@ def test_null_roundtripping(dtype):
700700
arr = np.array(data, dtype=dtype)
701701
assert data[0] == arr[0]
702702
assert data[1] == arr[1]
703+
704+
705+
def test_string_too_large_error():
706+
arr = np.array(["a", "b", "c"], dtype=StringDType())
707+
with pytest.raises(MemoryError):
708+
arr * (2**63 - 2)

0 commit comments

Comments
 (0)