Skip to content

Commit f324bd8

Browse files
committed
fix: use dynamic allocated memory to store strings
Revert "fix: flag to free `cStr` in `Object_getBigNumStringValue`" Revert "fix: memory leak in JSON datetime serialization" fix: fix memory leak in `PyDateTimeToIsoCallback` fix: ensure successful malloc.
1 parent f7b2f5b commit f324bd8

File tree

1 file changed

+33
-17
lines changed

1 file changed

+33
-17
lines changed

pandas/_libs/src/vendored/ujson/python/objToJSON.c

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,6 @@ typedef struct __TypeContext {
107107
JSINT64 longValue;
108108

109109
const char *cStr;
110-
int freeCStr;
111110
NpyArrContext *npyarr;
112111
PdBlockContext *pdblock;
113112
int transpose;
@@ -163,7 +162,6 @@ static TypeContext *createTypeContext(void) {
163162
pc->longValue = 0;
164163
pc->doubleValue = 0.0;
165164
pc->cStr = NULL;
166-
pc->freeCStr = 0;
167165
pc->npyarr = NULL;
168166
pc->pdblock = NULL;
169167
pc->rowLabels = NULL;
@@ -329,15 +327,13 @@ static const char *NpyDateTimeToIsoCallback(JSOBJ Py_UNUSED(unused),
329327
NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
330328
NPY_DATETIMEUNIT valueUnit = ((PyObjectEncoder *)tc->encoder)->valueUnit;
331329
GET_TC(tc)->cStr = int64ToIso(GET_TC(tc)->longValue, valueUnit, base, len);
332-
GET_TC(tc)->freeCStr = 1;
333330
return GET_TC(tc)->cStr;
334331
}
335332

336333
/* JSON callback. returns a char* and mutates the pointer to *len */
337334
static const char *NpyTimeDeltaToIsoCallback(JSOBJ Py_UNUSED(unused),
338335
JSONTypeContext *tc, size_t *len) {
339336
GET_TC(tc)->cStr = int64ToIsoDuration(GET_TC(tc)->longValue, len);
340-
GET_TC(tc)->freeCStr = 1;
341337
return GET_TC(tc)->cStr;
342338
}
343339

@@ -352,7 +348,6 @@ static const char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc,
352348

353349
NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
354350
GET_TC(tc)->cStr = PyDateTimeToIso(obj, base, len);
355-
GET_TC(tc)->freeCStr = 1;
356351
return GET_TC(tc)->cStr;
357352
}
358353

@@ -1013,16 +1008,24 @@ static const char *List_iterGetName(JSOBJ Py_UNUSED(obj),
10131008
//=============================================================================
10141009
static void Index_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
10151010
GET_TC(tc)->index = 0;
1011+
GET_TC(tc)->cStr = PyObject_Malloc(20);
1012+
if (!GET_TC(tc)->cStr) {
1013+
PyErr_NoMemory();
1014+
}
10161015
}
10171016

10181017
static int Index_iterNext(JSOBJ obj, JSONTypeContext *tc) {
10191018
const Py_ssize_t index = GET_TC(tc)->index;
10201019
Py_XDECREF(GET_TC(tc)->itemValue);
1020+
if (!GET_TC(tc)->cStr) {
1021+
return 0;
1022+
}
1023+
10211024
if (index == 0) {
1022-
GET_TC(tc)->cStr = "name";
1025+
strcpy((char *)GET_TC(tc)->cStr, "name");
10231026
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
10241027
} else if (index == 1) {
1025-
GET_TC(tc)->cStr = "data";
1028+
strcpy((char *)GET_TC(tc)->cStr, "data");
10261029
GET_TC(tc)->itemValue = get_values(obj);
10271030
if (!GET_TC(tc)->itemValue) {
10281031
return 0;
@@ -1055,19 +1058,27 @@ static void Series_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
10551058
PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
10561059
GET_TC(tc)->index = 0;
10571060
enc->outputFormat = VALUES; // for contained series
1061+
GET_TC(tc)->cStr = PyObject_Malloc(20);
1062+
if (!GET_TC(tc)->cStr) {
1063+
PyErr_NoMemory();
1064+
}
10581065
}
10591066

10601067
static int Series_iterNext(JSOBJ obj, JSONTypeContext *tc) {
10611068
const Py_ssize_t index = GET_TC(tc)->index;
10621069
Py_XDECREF(GET_TC(tc)->itemValue);
1070+
if (!GET_TC(tc)->cStr) {
1071+
return 0;
1072+
}
1073+
10631074
if (index == 0) {
1064-
GET_TC(tc)->cStr = "name";
1075+
strcpy((char *)GET_TC(tc)->cStr, "name");
10651076
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
10661077
} else if (index == 1) {
1067-
GET_TC(tc)->cStr = "index";
1078+
strcpy((char *)GET_TC(tc)->cStr, "index");
10681079
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
10691080
} else if (index == 2) {
1070-
GET_TC(tc)->cStr = "data";
1081+
strcpy((char *)GET_TC(tc)->cStr, "data");
10711082
GET_TC(tc)->itemValue = get_values(obj);
10721083
if (!GET_TC(tc)->itemValue) {
10731084
return 0;
@@ -1102,19 +1113,27 @@ static void DataFrame_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
11021113
PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
11031114
GET_TC(tc)->index = 0;
11041115
enc->outputFormat = VALUES; // for contained series & index
1116+
GET_TC(tc)->cStr = PyObject_Malloc(20);
1117+
if (!GET_TC(tc)->cStr) {
1118+
PyErr_NoMemory();
1119+
}
11051120
}
11061121

11071122
static int DataFrame_iterNext(JSOBJ obj, JSONTypeContext *tc) {
11081123
const Py_ssize_t index = GET_TC(tc)->index;
11091124
Py_XDECREF(GET_TC(tc)->itemValue);
1125+
if (!GET_TC(tc)->cStr) {
1126+
return 0;
1127+
}
1128+
11101129
if (index == 0) {
1111-
GET_TC(tc)->cStr = "columns";
1130+
strcpy((char *)GET_TC(tc)->cStr, "columns");
11121131
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "columns");
11131132
} else if (index == 1) {
1114-
GET_TC(tc)->cStr = "index";
1133+
strcpy((char *)GET_TC(tc)->cStr, "index");
11151134
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
11161135
} else if (index == 2) {
1117-
GET_TC(tc)->cStr = "data";
1136+
strcpy((char *)GET_TC(tc)->cStr, "data");
11181137
Py_INCREF(obj);
11191138
GET_TC(tc)->itemValue = obj;
11201139
} else {
@@ -1886,9 +1905,7 @@ static void Object_endTypeContext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
18861905
GET_TC(tc)->rowLabels = NULL;
18871906
NpyArr_freeLabels(GET_TC(tc)->columnLabels, GET_TC(tc)->columnLabelsLen);
18881907
GET_TC(tc)->columnLabels = NULL;
1889-
if (GET_TC(tc)->freeCStr) {
1890-
PyObject_Free((void *)GET_TC(tc)->cStr);
1891-
}
1908+
PyObject_Free((void *)GET_TC(tc)->cStr);
18921909
GET_TC(tc)->cStr = NULL;
18931910
PyObject_Free(tc->prv);
18941911
tc->prv = NULL;
@@ -1915,7 +1932,6 @@ static const char *Object_getBigNumStringValue(JSOBJ obj, JSONTypeContext *tc,
19151932
char *bytes = PyObject_Malloc(*_outLen + 1);
19161933
memcpy(bytes, str, *_outLen + 1);
19171934
GET_TC(tc)->cStr = bytes;
1918-
GET_TC(tc)->freeCStr = 1;
19191935

19201936
Py_DECREF(repr);
19211937

0 commit comments

Comments
 (0)