Skip to content
44 changes: 35 additions & 9 deletions pandas/_libs/src/vendored/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,8 @@ static const char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc,
}

NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
return PyDateTimeToIso(obj, base, len);
GET_TC(tc)->cStr = PyDateTimeToIso(obj, base, len);
return GET_TC(tc)->cStr;
}

static const char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc,
Expand Down Expand Up @@ -1007,16 +1008,24 @@ static const char *List_iterGetName(JSOBJ Py_UNUSED(obj),
//=============================================================================
static void Index_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
GET_TC(tc)->index = 0;
GET_TC(tc)->cStr = PyObject_Malloc(20);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Its probably better to create a static const variable for a size within the module, rather than repeating the hard-coded 20 multiple times

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I created the variable CSTR_SIZE

if (!GET_TC(tc)->cStr) {
PyErr_NoMemory();
}
}

static int Index_iterNext(JSOBJ obj, JSONTypeContext *tc) {
const Py_ssize_t index = GET_TC(tc)->index;
Py_XDECREF(GET_TC(tc)->itemValue);
if (!GET_TC(tc)->cStr) {
return 0;
}

if (index == 0) {
GET_TC(tc)->cStr = "name";
strcpy((char *)GET_TC(tc)->cStr, "name");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think if you want to use strcpy here you will need to drop the const qualifier on the struct member; casting away const-ness like this is going to trigger undefined behavior

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed the const qualifier.

GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
} else if (index == 1) {
GET_TC(tc)->cStr = "data";
strcpy((char *)GET_TC(tc)->cStr, "data");
GET_TC(tc)->itemValue = get_values(obj);
if (!GET_TC(tc)->itemValue) {
return 0;
Expand Down Expand Up @@ -1049,19 +1058,27 @@ static void Series_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
GET_TC(tc)->index = 0;
enc->outputFormat = VALUES; // for contained series
GET_TC(tc)->cStr = PyObject_Malloc(20);
if (!GET_TC(tc)->cStr) {
PyErr_NoMemory();
}
}

static int Series_iterNext(JSOBJ obj, JSONTypeContext *tc) {
const Py_ssize_t index = GET_TC(tc)->index;
Py_XDECREF(GET_TC(tc)->itemValue);
if (!GET_TC(tc)->cStr) {
return 0;
}

if (index == 0) {
GET_TC(tc)->cStr = "name";
strcpy((char *)GET_TC(tc)->cStr, "name");
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
} else if (index == 1) {
GET_TC(tc)->cStr = "index";
strcpy((char *)GET_TC(tc)->cStr, "index");
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
} else if (index == 2) {
GET_TC(tc)->cStr = "data";
strcpy((char *)GET_TC(tc)->cStr, "data");
GET_TC(tc)->itemValue = get_values(obj);
if (!GET_TC(tc)->itemValue) {
return 0;
Expand Down Expand Up @@ -1096,19 +1113,27 @@ static void DataFrame_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
GET_TC(tc)->index = 0;
enc->outputFormat = VALUES; // for contained series & index
GET_TC(tc)->cStr = PyObject_Malloc(20);
if (!GET_TC(tc)->cStr) {
PyErr_NoMemory();
}
}

static int DataFrame_iterNext(JSOBJ obj, JSONTypeContext *tc) {
const Py_ssize_t index = GET_TC(tc)->index;
Py_XDECREF(GET_TC(tc)->itemValue);
if (!GET_TC(tc)->cStr) {
return 0;
}

if (index == 0) {
GET_TC(tc)->cStr = "columns";
strcpy((char *)GET_TC(tc)->cStr, "columns");
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "columns");
} else if (index == 1) {
GET_TC(tc)->cStr = "index";
strcpy((char *)GET_TC(tc)->cStr, "index");
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
} else if (index == 2) {
GET_TC(tc)->cStr = "data";
strcpy((char *)GET_TC(tc)->cStr, "data");
Py_INCREF(obj);
GET_TC(tc)->itemValue = obj;
} else {
Expand Down Expand Up @@ -1880,6 +1905,7 @@ static void Object_endTypeContext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
GET_TC(tc)->rowLabels = NULL;
NpyArr_freeLabels(GET_TC(tc)->columnLabels, GET_TC(tc)->columnLabelsLen);
GET_TC(tc)->columnLabels = NULL;
PyObject_Free((void *)GET_TC(tc)->cStr);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not too sure about this approach - generally the CPython API only wants you to call this function when you've allocated with the corresponding function(s)

https://docs.python.org/3/c-api/memory.html#c.PyObject_Free

Frees the memory block pointed to by p, which must have been returned by a previous call to PyObject_Malloc(), PyObject_Realloc() or PyObject_Calloc(). Otherwise, or if PyObject_Free(p) has been called before, undefined behavior occurs.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah sorry I misread your PR. I see you allocate 20 bytes to cStr, so this is clean in that regards. However, I think you want to use the PyMem_ functions since we are dealing with raw C constructs and not Python objects, so maybe replace this with PyMem_Free, any PyObject_Malloc calls with PyMem_Malloc

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unless I'm missing something, PyObject_Free() is being called on memory that's been allocated with PyObject_Malloc()...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why cast to a void pointer here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was because of the const qualifier in cStr, but since I removed this qualifier, I am no longer casting.

GET_TC(tc)->cStr = NULL;
PyObject_Free(tc->prv);
tc->prv = NULL;
Expand Down
Loading