Skip to content
8 changes: 4 additions & 4 deletions pandas/_libs/src/datetime/date_conversions.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ char *int64ToIso(int64_t value, NPY_DATETIMEUNIT valueUnit,
pandas_datetime_to_datetimestruct(value, valueUnit, &dts);

*len = (size_t)get_datetime_iso_8601_strlen(0, base);
char *result = PyObject_Malloc(*len);
char *result = PyMem_Malloc(*len);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the focus of the PR is to fix the JSON modules, let's leave the others untouched for now. These may be viable fixes, but they can always be done in a follow up

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah nevermind - I guess you are doing this because the JSON module makes use of this function

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Exacly, I made this change because of the assignment of cStr in NpyDateTimeToIsoCallback.

I also made changes in other functions to allocate with PyMem_Malloc in int64ToIsoDuration and PyDateTimeToIso, but I think that the CI errors are because of this changes. Is it correct to use PyMem_Malloc in them or should I change back to PyObject_Malloc?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Strange - I don't think PyObject_Malloc is correct. Upstream, it looks like they have made a similar change (see python/cpython#114569)

However, if its a blocker for now sure lets stick with the PyObject_ functions; can figure out the rest later


if (result == NULL) {
PyErr_NoMemory();
Expand All @@ -60,7 +60,7 @@ char *int64ToIso(int64_t value, NPY_DATETIMEUNIT valueUnit,
if (ret_code != 0) {
PyErr_SetString(PyExc_ValueError,
"Could not convert datetime value to string");
PyObject_Free(result);
PyMem_Free(result);
}

// Note that get_datetime_iso_8601_strlen just gives a generic size
Expand All @@ -78,7 +78,7 @@ char *int64ToIsoDuration(int64_t value, size_t *len) {

// Max theoretical length of ISO Duration with 64 bit day
// as the largest unit is 70 characters + 1 for a null terminator
char *result = PyObject_Malloc(71);
char *result = PyMem_Malloc(71);
if (result == NULL) {
PyErr_NoMemory();
return NULL;
Expand All @@ -88,7 +88,7 @@ char *int64ToIsoDuration(int64_t value, size_t *len) {
if (ret_code == -1) {
PyErr_SetString(PyExc_ValueError,
"Could not convert timedelta value to string");
PyObject_Free(result);
PyMem_Free(result);
return NULL;
}

Expand Down
6 changes: 3 additions & 3 deletions pandas/_libs/src/datetime/pd_datetime.c
Original file line number Diff line number Diff line change
Expand Up @@ -170,14 +170,14 @@ static char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base,
}

*len = (size_t)get_datetime_iso_8601_strlen(0, base);
char *result = PyObject_Malloc(*len);
char *result = PyMem_Malloc(*len);
// Check to see if PyDateTime has a timezone.
// Don't convert to UTC if it doesn't.
int is_tz_aware = 0;
if (PyObject_HasAttrString(obj, "tzinfo")) {
PyObject *offset = extract_utc_offset(obj);
if (offset == NULL) {
PyObject_Free(result);
PyMem_Free(result);
return NULL;
}
is_tz_aware = offset != Py_None;
Expand All @@ -188,7 +188,7 @@ static char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base,
if (ret != 0) {
PyErr_SetString(PyExc_ValueError,
"Could not convert datetime value to string");
PyObject_Free(result);
PyMem_Free(result);
return NULL;
}

Expand Down
50 changes: 39 additions & 11 deletions pandas/_libs/src/vendored/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ Numeric decoder derived from TCL library
#include <numpy/ndarraytypes.h>
#include <numpy/npy_math.h>

static const int CSTR_SIZE = 20;

npy_int64 get_nat(void) { return NPY_MIN_INT64; }

typedef const char *(*PFN_PyTypeToUTF8)(JSOBJ obj, JSONTypeContext *ti,
Expand Down Expand Up @@ -106,7 +108,7 @@ typedef struct __TypeContext {
double doubleValue;
JSINT64 longValue;

const char *cStr;
char *cStr;
NpyArrContext *npyarr;
PdBlockContext *pdblock;
int transpose;
Expand Down Expand Up @@ -347,7 +349,8 @@ static const char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc,
}

NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
return PyDateTimeToIso(obj, base, len);
GET_TC(tc)->cStr = PyDateTimeToIso(obj, base, len);
return GET_TC(tc)->cStr;
}

static const char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc,
Expand Down Expand Up @@ -1007,16 +1010,24 @@ static const char *List_iterGetName(JSOBJ Py_UNUSED(obj),
//=============================================================================
static void Index_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
GET_TC(tc)->index = 0;
GET_TC(tc)->cStr = PyMem_Malloc(CSTR_SIZE);
if (!GET_TC(tc)->cStr) {
PyErr_NoMemory();
}
}

static int Index_iterNext(JSOBJ obj, JSONTypeContext *tc) {
const Py_ssize_t index = GET_TC(tc)->index;
Py_XDECREF(GET_TC(tc)->itemValue);
if (!GET_TC(tc)->cStr) {
return 0;
}

if (index == 0) {
GET_TC(tc)->cStr = "name";
strcpy(GET_TC(tc)->cStr, "name");
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
} else if (index == 1) {
GET_TC(tc)->cStr = "data";
strcpy(GET_TC(tc)->cStr, "data");
GET_TC(tc)->itemValue = get_values(obj);
if (!GET_TC(tc)->itemValue) {
return 0;
Expand Down Expand Up @@ -1049,19 +1060,27 @@ static void Series_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
GET_TC(tc)->index = 0;
enc->outputFormat = VALUES; // for contained series
GET_TC(tc)->cStr = PyMem_Malloc(CSTR_SIZE);
if (!GET_TC(tc)->cStr) {
PyErr_NoMemory();
}
}

static int Series_iterNext(JSOBJ obj, JSONTypeContext *tc) {
const Py_ssize_t index = GET_TC(tc)->index;
Py_XDECREF(GET_TC(tc)->itemValue);
if (!GET_TC(tc)->cStr) {
return 0;
}

if (index == 0) {
GET_TC(tc)->cStr = "name";
strcpy(GET_TC(tc)->cStr, "name");
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
} else if (index == 1) {
GET_TC(tc)->cStr = "index";
strcpy(GET_TC(tc)->cStr, "index");
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
} else if (index == 2) {
GET_TC(tc)->cStr = "data";
strcpy(GET_TC(tc)->cStr, "data");
GET_TC(tc)->itemValue = get_values(obj);
if (!GET_TC(tc)->itemValue) {
return 0;
Expand Down Expand Up @@ -1096,19 +1115,27 @@ static void DataFrame_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
GET_TC(tc)->index = 0;
enc->outputFormat = VALUES; // for contained series & index
GET_TC(tc)->cStr = PyMem_Malloc(CSTR_SIZE);
if (!GET_TC(tc)->cStr) {
PyErr_NoMemory();
}
}

static int DataFrame_iterNext(JSOBJ obj, JSONTypeContext *tc) {
const Py_ssize_t index = GET_TC(tc)->index;
Py_XDECREF(GET_TC(tc)->itemValue);
if (!GET_TC(tc)->cStr) {
return 0;
}

if (index == 0) {
GET_TC(tc)->cStr = "columns";
strcpy(GET_TC(tc)->cStr, "columns");
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "columns");
} else if (index == 1) {
GET_TC(tc)->cStr = "index";
strcpy(GET_TC(tc)->cStr, "index");
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
} else if (index == 2) {
GET_TC(tc)->cStr = "data";
strcpy(GET_TC(tc)->cStr, "data");
Py_INCREF(obj);
GET_TC(tc)->itemValue = obj;
} else {
Expand Down Expand Up @@ -1880,6 +1907,7 @@ static void Object_endTypeContext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
GET_TC(tc)->rowLabels = NULL;
NpyArr_freeLabels(GET_TC(tc)->columnLabels, GET_TC(tc)->columnLabelsLen);
GET_TC(tc)->columnLabels = NULL;
PyMem_Free(GET_TC(tc)->cStr);
GET_TC(tc)->cStr = NULL;
PyObject_Free(tc->prv);
tc->prv = NULL;
Expand All @@ -1903,7 +1931,7 @@ static const char *Object_getBigNumStringValue(JSOBJ obj, JSONTypeContext *tc,
size_t *_outLen) {
PyObject *repr = PyObject_Str(obj);
const char *str = PyUnicode_AsUTF8AndSize(repr, (Py_ssize_t *)_outLen);
char *bytes = PyObject_Malloc(*_outLen + 1);
char *bytes = PyMem_Malloc(*_outLen + 1);
memcpy(bytes, str, *_outLen + 1);
GET_TC(tc)->cStr = bytes;

Expand Down
Loading