Skip to content

Commit 8129846

Browse files
committed
PYTHON-3222 Fix memory leak in cbson decode_all (#927)
Add decode_all keyword arg for codec_options. Make decode_all show up in docs. (cherry picked from commit dca72b7)
1 parent 4f0878d commit 8129846

File tree

4 files changed

+63
-73
lines changed

4 files changed

+63
-73
lines changed

bson/__init__.py

Lines changed: 40 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -965,38 +965,8 @@ def decode(data, codec_options=DEFAULT_CODEC_OPTIONS):
965965
return _bson_to_dict(data, codec_options)
966966

967967

968-
def decode_all(data, codec_options=DEFAULT_CODEC_OPTIONS):
969-
"""Decode BSON data to multiple documents.
970-
971-
`data` must be a bytes-like object implementing the buffer protocol that
972-
provides concatenated, valid, BSON-encoded documents.
973-
974-
:Parameters:
975-
- `data`: BSON data
976-
- `codec_options` (optional): An instance of
977-
:class:`~bson.codec_options.CodecOptions`.
978-
979-
.. versionchanged:: 3.9
980-
Supports bytes-like objects that implement the buffer protocol.
981-
982-
.. versionchanged:: 3.0
983-
Removed `compile_re` option: PyMongo now always represents BSON regular
984-
expressions as :class:`~bson.regex.Regex` objects. Use
985-
:meth:`~bson.regex.Regex.try_compile` to attempt to convert from a
986-
BSON regular expression to a Python regular expression object.
987-
988-
Replaced `as_class`, `tz_aware`, and `uuid_subtype` options with
989-
`codec_options`.
990-
991-
.. versionchanged:: 2.7
992-
Added `compile_re` option. If set to False, PyMongo represented BSON
993-
regular expressions as :class:`~bson.regex.Regex` objects instead of
994-
attempting to compile BSON regular expressions as Python native
995-
regular expressions, thus preventing errors for some incompatible
996-
patterns, see `PYTHON-500`_.
997-
998-
.. _PYTHON-500: https://jira.mongodb.org/browse/PYTHON-500
999-
"""
968+
def _decode_all(data, codec_options):
969+
"""Decode BSON data to multiple documents."""
1000970
data, view = get_data_and_view(data)
1001971
if not isinstance(codec_options, CodecOptions):
1002972
raise _CODEC_OPTIONS_TYPE_ERROR
@@ -1031,7 +1001,44 @@ def decode_all(data, codec_options=DEFAULT_CODEC_OPTIONS):
10311001

10321002

10331003
if _USE_C:
1034-
decode_all = _cbson.decode_all
1004+
_decode_all = _cbson._decode_all
1005+
1006+
1007+
def decode_all(data, codec_options=DEFAULT_CODEC_OPTIONS):
1008+
"""Decode BSON data to multiple documents.
1009+
1010+
`data` must be a bytes-like object implementing the buffer protocol that
1011+
provides concatenated, valid, BSON-encoded documents.
1012+
1013+
:Parameters:
1014+
- `data`: BSON data
1015+
- `codec_options` (optional): An instance of
1016+
:class:`~bson.codec_options.CodecOptions`.
1017+
1018+
.. versionchanged:: 3.9
1019+
Supports bytes-like objects that implement the buffer protocol.
1020+
1021+
.. versionchanged:: 3.0
1022+
Removed `compile_re` option: PyMongo now always represents BSON regular
1023+
expressions as :class:`~bson.regex.Regex` objects. Use
1024+
:meth:`~bson.regex.Regex.try_compile` to attempt to convert from a
1025+
BSON regular expression to a Python regular expression object.
1026+
1027+
Replaced `as_class`, `tz_aware`, and `uuid_subtype` options with
1028+
`codec_options`.
1029+
1030+
.. versionchanged:: 2.7
1031+
Added `compile_re` option. If set to False, PyMongo represented BSON
1032+
regular expressions as :class:`~bson.regex.Regex` objects instead of
1033+
attempting to compile BSON regular expressions as Python native
1034+
regular expressions, thus preventing errors for some incompatible
1035+
patterns, see `PYTHON-500`_.
1036+
1037+
.. _PYTHON-500: https://jira.mongodb.org/browse/PYTHON-500
1038+
"""
1039+
if not isinstance(codec_options, CodecOptions):
1040+
raise _CODEC_OPTIONS_TYPE_ERROR
1041+
return _decode_all(data, codec_options)
10351042

10361043

10371044
def _decode_selective(rawdoc, fields, codec_options):

bson/_cbsonmodule.c

Lines changed: 8 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ struct module_state {
5353
PyObject* BSONInt64;
5454
PyObject* Decimal128;
5555
PyObject* Mapping;
56-
PyObject* CodecOptions;
5756
};
5857

5958
/* The Py_TYPE macro was introduced in CPython 2.6 */
@@ -371,11 +370,10 @@ static int _load_python_objects(PyObject* module) {
371370
_load_object(&state->Decimal128, "bson.decimal128", "Decimal128") ||
372371
_load_object(&state->UUID, "uuid", "UUID") ||
373372
#if PY_MAJOR_VERSION >= 3
374-
_load_object(&state->Mapping, "collections.abc", "Mapping") ||
373+
_load_object(&state->Mapping, "collections.abc", "Mapping")) {
375374
#else
376-
_load_object(&state->Mapping, "collections", "Mapping") ||
375+
_load_object(&state->Mapping, "collections", "Mapping")) {
377376
#endif
378-
_load_object(&state->CodecOptions, "bson.codec_options", "CodecOptions")) {
379377
return 1;
380378
}
381379
/* Reload our REType hack too. */
@@ -537,26 +535,6 @@ int convert_codec_options(PyObject* options_obj, void* p) {
537535
return 1;
538536
}
539537

540-
/* Fill out a codec_options_t* with default options.
541-
*
542-
* Return 1 on success.
543-
* Return 0 on failure.
544-
*/
545-
int default_codec_options(struct module_state* state, codec_options_t* options) {
546-
PyObject* options_obj = NULL;
547-
PyObject* codec_options_func = _get_object(
548-
state->CodecOptions, "bson.codec_options", "CodecOptions");
549-
if (codec_options_func == NULL) {
550-
return 0;
551-
}
552-
options_obj = PyObject_CallFunctionObjArgs(codec_options_func, NULL);
553-
Py_DECREF(codec_options_func);
554-
if (options_obj == NULL) {
555-
return 0;
556-
}
557-
return convert_codec_options(options_obj, options);
558-
}
559-
560538
void destroy_codec_options(codec_options_t* options) {
561539
Py_CLEAR(options->document_class);
562540
Py_CLEAR(options->tzinfo);
@@ -2675,15 +2653,10 @@ static PyObject* _cbson_element_to_dict(PyObject* self, PyObject* args) {
26752653
PyObject* value;
26762654
PyObject* result_tuple;
26772655

2678-
if (!PyArg_ParseTuple(args, "OII|O&", &bson, &position, &max,
2656+
if (!PyArg_ParseTuple(args, "OIIO&", &bson, &position, &max,
26792657
convert_codec_options, &options)) {
26802658
return NULL;
26812659
}
2682-
if (PyTuple_GET_SIZE(args) < 4) {
2683-
if (!default_codec_options(GETSTATE(self), &options)) {
2684-
return NULL;
2685-
}
2686-
}
26872660

26882661
#if PY_MAJOR_VERSION >= 3
26892662
if (!PyBytes_Check(bson)) {
@@ -2867,17 +2840,13 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) {
28672840
PyObject* dict;
28682841
PyObject* result = NULL;
28692842
codec_options_t options;
2870-
PyObject* options_obj;
2871-
Py_buffer view;
2843+
PyObject* options_obj = NULL;
2844+
Py_buffer view = {0};
28722845

2873-
if (!PyArg_ParseTuple(args, "O|O", &bson, &options_obj)) {
2846+
if (!PyArg_ParseTuple(args, "OO", &bson, &options_obj)) {
28742847
return NULL;
28752848
}
2876-
if (PyTuple_GET_SIZE(args) < 2) {
2877-
if (!default_codec_options(GETSTATE(self), &options)) {
2878-
return NULL;
2879-
}
2880-
} else if (!convert_codec_options(options_obj, &options)) {
2849+
if (!convert_codec_options(options_obj, &options)) {
28812850
return NULL;
28822851
}
28832852

@@ -2971,7 +2940,7 @@ static PyMethodDef _CBSONMethods[] = {
29712940
"convert a dictionary to a string containing its BSON representation."},
29722941
{"_bson_to_dict", _cbson_bson_to_dict, METH_VARARGS,
29732942
"convert a BSON string to a SON object."},
2974-
{"decode_all", _cbson_decode_all, METH_VARARGS,
2943+
{"_decode_all", _cbson_decode_all, METH_VARARGS,
29752944
"convert binary data to a sequence of documents."},
29762945
{"_element_to_dict", _cbson_element_to_dict, METH_VARARGS,
29772946
"Decode a single key, value pair."},

doc/changelog.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ Bug fixes
1414

1515
- Fixed a bug where the client could be unable to discover the new primary
1616
after a simultaneous replica set election and reconfig (`PYTHON-2970`_).
17+
- Fixed a memory leak bug when calling :func:`~bson.decode_all` without a
18+
``codec_options`` argument (`PYTHON-3222`_).
19+
- Fixed a bug where :func:`~bson.decode_all` did not accept ``codec_options``
20+
as a keyword argument (`PYTHON-3222`_).
1721

1822
Deprecations
1923
............
@@ -26,6 +30,7 @@ See the `PyMongo 3.13.0 release notes in JIRA`_ for the list of resolved issues
2630
in this release.
2731

2832
.. _PYTHON-2970: https://jira.mongodb.org/browse/PYTHON-2970
33+
.. _PYTHON-3222: https://jira.mongodb.org/browse/PYTHON-3222
2934
.. _PyMongo 3.13.0 release notes in JIRA: https://jira.mongodb.org/secure/ReleaseNote.jspa?projectId=10004&version=31570
3035

3136
Changes in Version 3.12.3

test/test_bson.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
encode,
4242
is_valid,
4343
)
44-
from bson.binary import Binary, UUIDLegacy
44+
from bson.binary import Binary, UUIDLegacy, UuidRepresentation
4545
from bson.code import Code
4646
from bson.codec_options import CodecOptions
4747
from bson.dbref import DBRef
@@ -1036,6 +1036,15 @@ def test_decode_all_defaults(self):
10361036
self.assertEqual(decoded["uuid"], doc["uuid"])
10371037
self.assertIsNone(decoded["dt"].tzinfo)
10381038

1039+
def test_decode_all_kwarg(self):
1040+
doc = {"a": uuid.uuid4()}
1041+
opts = CodecOptions(uuid_representation=UuidRepresentation.STANDARD)
1042+
encoded = encode(doc, codec_options=opts)
1043+
# Positional codec_options
1044+
self.assertEqual([doc], decode_all(encoded, opts))
1045+
# Keyword codec_options
1046+
self.assertEqual([doc], decode_all(encoded, codec_options=opts))
1047+
10391048
def test_unicode_decode_error_handler(self):
10401049
enc = encode({"keystr": "foobar"})
10411050

0 commit comments

Comments
 (0)