Skip to content

Commit 4049b14

Browse files
committed
PYTHON-1783: disallow custom-encoding built-in types
1 parent f09d6fa commit 4049b14

File tree

5 files changed

+197
-51
lines changed

5 files changed

+197
-51
lines changed

bson/__init__.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -756,17 +756,9 @@ def _encode_maxkey(name, dummy0, dummy1, dummy2):
756756

757757

758758
def _name_value_to_bson(name, value, check_keys, opts,
759+
in_custom_call=False,
759760
in_fallback_call=False):
760761
"""Encode a single name, value pair."""
761-
# Custom encoder (if any) takes precedence over default encoders.
762-
# Using 'if' instead of 'try...except' for performance since this will
763-
# usually not be true.
764-
# No support for auto-encoding subtypes of registered custom types.
765-
if opts.type_registry._encoder_map:
766-
custom_encoder = opts.type_registry._encoder_map.get(type(value))
767-
if custom_encoder is not None:
768-
value = custom_encoder(value)
769-
770762
# First see if the type is already cached. KeyError will only ever
771763
# happen once per subtype.
772764
try:
@@ -784,8 +776,19 @@ def _name_value_to_bson(name, value, check_keys, opts,
784776
_ENCODERS[type(value)] = func
785777
return func(name, value, check_keys, opts)
786778

787-
# If all else fails test each base type. This will only happen once for
788-
# a subtype of a supported base type.
779+
# Third, check if a type encoder is registered for this type.
780+
# Note that subtypes of registered custom types are not auto-encoded.
781+
if not in_custom_call and opts.type_registry._encoder_map:
782+
custom_encoder = opts.type_registry._encoder_map.get(type(value))
783+
if custom_encoder is not None:
784+
return _name_value_to_bson(
785+
name, custom_encoder(value), check_keys, opts,
786+
in_custom_call=True)
787+
788+
# Fourth, test each base type. This will only happen once for
789+
# a subtype of a supported base type. Unlike in the C-extensions, this
790+
# is done after trying the custom type encoder because checking for each
791+
# subtype is expensive.
789792
for base in _BUILT_IN_TYPES:
790793
if isinstance(value, base):
791794
func = _ENCODERS[base]
@@ -798,7 +801,8 @@ def _name_value_to_bson(name, value, check_keys, opts,
798801
fallback_encoder = opts.type_registry._fallback_encoder
799802
if not in_fallback_call and fallback_encoder is not None:
800803
return _name_value_to_bson(
801-
name, fallback_encoder(value), check_keys, opts, True)
804+
name, fallback_encoder(value), check_keys, opts,
805+
in_fallback_call=True)
802806

803807
raise InvalidDocument(
804808
"cannot convert value of type %s to bson" % type(value))

bson/_cbsonmodule.c

Lines changed: 37 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
120120
int type_byte, PyObject* value,
121121
unsigned char check_keys,
122122
const codec_options_t* options,
123+
unsigned char in_custom_call,
123124
unsigned char in_fallback_call);
124125

125126
/* Date stuff */
@@ -563,38 +564,16 @@ void destroy_codec_options(codec_options_t* options) {
563564
static int write_element_to_buffer(PyObject* self, buffer_t buffer,
564565
int type_byte, PyObject* value,
565566
unsigned char check_keys,
566-
const codec_options_t* options) {
567+
const codec_options_t* options,
568+
unsigned char in_custom_call,
569+
unsigned char in_fallback_call) {
567570
int result = 0;
568-
PyObject* value_type = NULL;
569-
PyObject* converter = NULL;
570-
PyObject* new_value = NULL;
571-
572571
if(Py_EnterRecursiveCall(" while encoding an object to BSON ")) {
573572
return 0;
574573
}
575-
576-
if (!options->type_registry.is_encoder_empty) {
577-
value_type = PyObject_Type(value);
578-
if (value_type == NULL) {
579-
goto fail;
580-
}
581-
converter = PyDict_GetItem(options->type_registry.encoder_map, value_type);
582-
if (converter != NULL) {
583-
/* Transform types that have a registered converter.
584-
* A new reference is created upon transformation. */
585-
new_value = PyObject_CallFunctionObjArgs(converter, value, NULL);
586-
if (new_value == NULL) {
587-
goto fail;
588-
}
589-
value = new_value;
590-
}
591-
}
592574
result = _write_element_to_buffer(self, buffer, type_byte,
593-
value, check_keys, options, 0);
594-
595-
fail:
596-
Py_XDECREF(value_type);
597-
Py_XDECREF(new_value);
575+
value, check_keys, options,
576+
in_custom_call, in_fallback_call);
598577
Py_LeaveRecursiveCall();
599578
return result;
600579
}
@@ -781,6 +760,7 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
781760
int type_byte, PyObject* value,
782761
unsigned char check_keys,
783762
const codec_options_t* options,
763+
unsigned char in_custom_call,
784764
unsigned char in_fallback_call) {
785765
struct module_state *state = GETSTATE(self);
786766
PyObject* mapping_type;
@@ -1178,7 +1158,8 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
11781158
if (!(item_value = PySequence_GetItem(value, i)))
11791159
return 0;
11801160
if (!write_element_to_buffer(self, buffer, list_type_byte,
1181-
item_value, check_keys, options)) {
1161+
item_value, check_keys, options,
1162+
0, 0)) {
11821163
Py_DECREF(item_value);
11831164
return 0;
11841165
}
@@ -1377,6 +1358,31 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
13771358
Py_XDECREF(mapping_type);
13781359
Py_XDECREF(uuid_type);
13791360

1361+
/* Try a custom encoder if one is provided and we have not already
1362+
* attempted to use a type encoder. */
1363+
if (!in_custom_call && !options->type_registry.is_encoder_empty) {
1364+
PyObject* value_type = NULL;
1365+
PyObject* converter = NULL;
1366+
value_type = PyObject_Type(value);
1367+
if (value_type == NULL) {
1368+
return 0;
1369+
}
1370+
converter = PyDict_GetItem(options->type_registry.encoder_map, value_type);
1371+
Py_XDECREF(value_type);
1372+
if (converter != NULL) {
1373+
/* Transform types that have a registered converter.
1374+
* A new reference is created upon transformation. */
1375+
new_value = PyObject_CallFunctionObjArgs(converter, value, NULL);
1376+
if (new_value == NULL) {
1377+
return 0;
1378+
}
1379+
retval = write_element_to_buffer(self, buffer, type_byte, new_value,
1380+
check_keys, options, 1, 0);
1381+
Py_XDECREF(new_value);
1382+
return retval;
1383+
}
1384+
}
1385+
13801386
/* Try the fallback encoder if one is provided and we have not already
13811387
* attempted to use the fallback encoder. */
13821388
if (!in_fallback_call && options->type_registry.has_fallback_encoder) {
@@ -1386,12 +1392,11 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
13861392
// propagate any exception raised by the callback
13871393
return 0;
13881394
}
1389-
retval = _write_element_to_buffer(self, buffer, type_byte, new_value,
1390-
check_keys, options, 1);
1395+
retval = write_element_to_buffer(self, buffer, type_byte, new_value,
1396+
check_keys, options, 0, 1);
13911397
Py_XDECREF(new_value);
13921398
return retval;
13931399
}
1394-
Py_XDECREF(new_value);
13951400

13961401
/* We can't determine value's type. Fail. */
13971402
_set_cannot_encode(value);
@@ -1466,7 +1471,7 @@ int write_pair(PyObject* self, buffer_t buffer, const char* name, int name_lengt
14661471
return 0;
14671472
}
14681473
if (!write_element_to_buffer(self, buffer, type_byte,
1469-
value, check_keys, options)) {
1474+
value, check_keys, options, 0, 0)) {
14701475
return 0;
14711476
}
14721477
return 1;

bson/codec_options.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from collections import namedtuple
2121

2222
from bson.py3compat import ABC, abc, abstractproperty, string_type
23+
2324
from bson.binary import (ALL_UUID_REPRESENTATIONS,
2425
PYTHON_LEGACY,
2526
UUID_REPRESENTATION_NAMES)
@@ -118,6 +119,7 @@ def __init__(self, type_codecs=None, fallback_encoder=None):
118119
for codec in self.__type_codecs:
119120
is_valid_codec = False
120121
if isinstance(codec, TypeEncoder):
122+
self._validate_type_encoder(codec)
121123
is_valid_codec = True
122124
self._encoder_map[codec.python_type] = codec.transform_python
123125
if isinstance(codec, TypeDecoder):
@@ -129,6 +131,15 @@ def __init__(self, type_codecs=None, fallback_encoder=None):
129131
TypeEncoder.__name__, TypeDecoder.__name__,
130132
TypeCodec.__name__, codec))
131133

134+
def _validate_type_encoder(self, codec):
135+
from bson import _BUILT_IN_TYPES
136+
for pytype in _BUILT_IN_TYPES:
137+
if issubclass(codec.python_type, pytype):
138+
err_msg = ("TypeEncoders cannot change how built-in types are "
139+
"encoded (encoder %s transforms type %s)" %
140+
(codec, pytype))
141+
raise TypeError(err_msg)
142+
132143
def __repr__(self):
133144
return ('%s(type_codecs=%r, fallback_encoder=%r)' % (
134145
self.__class__.__name__, self.__type_codecs,

test/test_bson_custom_types.py

Lines changed: 123 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
decode_all,
2727
decode_file_iter,
2828
decode_iter,
29+
RE_TYPE,
30+
_BUILT_IN_TYPES,
2931
_dict_to_bson,
3032
_bson_to_dict)
3133
from bson.codec_options import (CodecOptions, TypeCodec, TypeDecoder,
@@ -189,21 +191,24 @@ def run_test(base, attrs, fail):
189191
else:
190192
codec()
191193

192-
run_test(TypeEncoder, {'python_type': int,}, fail=True)
194+
class MyType(object):
195+
pass
196+
197+
run_test(TypeEncoder, {'python_type': MyType,}, fail=True)
193198
run_test(TypeEncoder, {'transform_python': lambda s, x: x}, fail=True)
194199
run_test(TypeEncoder, {'transform_python': lambda s, x: x,
195-
'python_type': int}, fail=False)
200+
'python_type': MyType}, fail=False)
196201

197202
run_test(TypeDecoder, {'bson_type': Decimal128, }, fail=True)
198203
run_test(TypeDecoder, {'transform_bson': lambda s, x: x}, fail=True)
199204
run_test(TypeDecoder, {'transform_bson': lambda s, x: x,
200205
'bson_type': Decimal128}, fail=False)
201206

202207
run_test(TypeCodec, {'bson_type': Decimal128,
203-
'python_type': int}, fail=True)
208+
'python_type': MyType}, fail=True)
204209
run_test(TypeCodec, {'transform_bson': lambda s, x: x,
205210
'transform_python': lambda s, x: x}, fail=True)
206-
run_test(TypeCodec, {'python_type': int,
211+
run_test(TypeCodec, {'python_type': MyType,
207212
'transform_python': lambda s, x: x,
208213
'transform_bson': lambda s, x: x,
209214
'bson_type': Decimal128}, fail=False)
@@ -215,6 +220,91 @@ def test_type_checks(self):
215220
self.assertFalse(issubclass(TypeEncoder, TypeDecoder))
216221

217222

223+
class TestCustomTypeEncoderAndFallbackEncoderTandem(unittest.TestCase):
224+
@classmethod
225+
def setUpClass(cls):
226+
class TypeA(object):
227+
def __init__(self, x):
228+
self.value = x
229+
230+
class TypeB(object):
231+
def __init__(self, x):
232+
self.value = x
233+
234+
# transforms A, and only A into B
235+
def fallback_encoder_A2B(value):
236+
assert isinstance(value, TypeA)
237+
return TypeB(value.value)
238+
239+
# transforms A, and only A into something encodable
240+
def fallback_encoder_A2BSON(value):
241+
assert isinstance(value, TypeA)
242+
return value.value
243+
244+
# transforms B into something encodable
245+
class B2BSON(TypeEncoder):
246+
python_type = TypeB
247+
def transform_python(self, value):
248+
return value.value
249+
250+
# transforms A into B
251+
# technically, this isn't a proper type encoder as the output is not
252+
# BSON-encodable.
253+
class A2B(TypeEncoder):
254+
python_type = TypeA
255+
def transform_python(self, value):
256+
return TypeB(value.value)
257+
258+
# transforms B into A
259+
# technically, this isn't a proper type encoder as the output is not
260+
# BSON-encodable.
261+
class B2A(TypeEncoder):
262+
python_type = TypeB
263+
def transform_python(self, value):
264+
return TypeA(value.value)
265+
266+
cls.TypeA = TypeA
267+
cls.TypeB = TypeB
268+
cls.fallback_encoder_A2B = staticmethod(fallback_encoder_A2B)
269+
cls.fallback_encoder_A2BSON = staticmethod(fallback_encoder_A2BSON)
270+
cls.B2BSON = B2BSON
271+
cls.B2A = B2A
272+
cls.A2B = A2B
273+
274+
def test_encode_fallback_then_custom(self):
275+
codecopts = CodecOptions(type_registry=TypeRegistry(
276+
[self.B2BSON()], fallback_encoder=self.fallback_encoder_A2B))
277+
testdoc = {'x': self.TypeA(123)}
278+
expected_bytes = BSON.encode({'x': 123})
279+
280+
self.assertEqual(BSON.encode(testdoc, codec_options=codecopts),
281+
expected_bytes)
282+
283+
def test_encode_custom_then_fallback(self):
284+
codecopts = CodecOptions(type_registry=TypeRegistry(
285+
[self.B2A()], fallback_encoder=self.fallback_encoder_A2BSON))
286+
testdoc = {'x': self.TypeB(123)}
287+
expected_bytes = BSON.encode({'x': 123})
288+
289+
self.assertEqual(BSON.encode(testdoc, codec_options=codecopts),
290+
expected_bytes)
291+
292+
def test_chaining_encoders_fails(self):
293+
codecopts = CodecOptions(type_registry=TypeRegistry(
294+
[self.A2B(), self.B2BSON()]))
295+
296+
with self.assertRaises(InvalidDocument):
297+
BSON.encode({'x': self.TypeA(123)}, codec_options=codecopts)
298+
299+
def test_infinite_loop_exceeds_max_recursion_depth(self):
300+
codecopts = CodecOptions(type_registry=TypeRegistry(
301+
[self.B2A()], fallback_encoder=self.fallback_encoder_A2B))
302+
303+
# Raises max recursion depth exceeded error
304+
with self.assertRaises(RuntimeError):
305+
BSON.encode({'x': self.TypeA(100)}, codec_options=codecopts)
306+
307+
218308
class TestTypeRegistry(unittest.TestCase):
219309
@classmethod
220310
def setUpClass(cls):
@@ -347,6 +437,35 @@ def test_type_registry_eq(self):
347437
self.assertNotEqual(
348438
TypeRegistry(codec_instances), TypeRegistry(codec_instances_2))
349439

440+
def test_builtin_types_override_fails(self):
441+
def run_test(base, attrs):
442+
msg = ("TypeEncoders cannot change how built-in types "
443+
"are encoded \(encoder .* transforms type .*\)")
444+
for pytype in _BUILT_IN_TYPES:
445+
attrs.update({'python_type': pytype,
446+
'transform_python': lambda x: x})
447+
codec = type('testcodec', (base, ), attrs)
448+
codec_instance = codec()
449+
with self.assertRaisesRegex(TypeError, msg):
450+
TypeRegistry([codec_instance,])
451+
452+
# Test only some subtypes as not all can be subclassed.
453+
if pytype in [bool, type(None), RE_TYPE,]:
454+
continue
455+
456+
class MyType(pytype):
457+
pass
458+
attrs.update({'python_type': MyType,
459+
'transform_python': lambda x: x})
460+
codec = type('testcodec', (base, ), attrs)
461+
codec_instance = codec()
462+
with self.assertRaisesRegex(TypeError, msg):
463+
TypeRegistry([codec_instance,])
464+
465+
run_test(TypeEncoder, {})
466+
run_test(TypeCodec, {'bson_type': Decimal128,
467+
'transform_bson': lambda x: x})
468+
350469

351470
if __name__ == "__main__":
352471
unittest.main()

test/test_client.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -284,14 +284,21 @@ def test_metadata(self):
284284
self.assertEqual(options.pool_options.metadata, metadata)
285285

286286
def test_kwargs_codec_options(self):
287-
class FloatAsIntEncoder(TypeEncoder):
288-
python_type = float
287+
class MyFloatType(object):
288+
def __init__(self, x):
289+
self.__x = x
290+
@property
291+
def x(self):
292+
return self.__x
293+
294+
class MyFloatAsIntEncoder(TypeEncoder):
295+
python_type = MyFloatType
289296
def transform_python(self, value):
290297
return int(value)
291298

292299
# Ensure codec options are passed in correctly
293300
document_class = SON
294-
type_registry = TypeRegistry([FloatAsIntEncoder()])
301+
type_registry = TypeRegistry([MyFloatAsIntEncoder()])
295302
tz_aware = True
296303
uuid_representation_label = 'javaLegacy'
297304
unicode_decode_error_handler = 'ignore'

0 commit comments

Comments
 (0)