Skip to content

Commit 6c4e1c9

Browse files
thetorpedodogShaneHarvey
authored andcommitted
PYTHON-2061 bson: check for negative entry size in decode_file_iter (#429)
Raise InvalidBSON instead of ValueError when decode_file_iter reads an invalid BSON object size.
1 parent b8ce14d commit 6c4e1c9

File tree

3 files changed

+39
-36
lines changed

3 files changed

+39
-36
lines changed

bson/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1166,7 +1166,7 @@ def decode_file_iter(file_obj, codec_options=DEFAULT_CODEC_OPTIONS):
11661166
elif len(size_data) != 4:
11671167
raise InvalidBSON("cut off in middle of objsize")
11681168
obj_size = _UNPACK_INT_FROM(size_data, 0)[0] - 4
1169-
elements = size_data + file_obj.read(obj_size)
1169+
elements = size_data + file_obj.read(max(0, obj_size))
11701170
yield _bson_to_dict(elements, codec_options)
11711171

11721172

doc/contributors.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,4 @@ The following is a list of people who have contributed to
8686
- Shrey Batra(shreybatra)
8787
- Felipe Rodrigues(fbidu)
8888
- Terence Honles (terencehonles)
89+
- Paul Fisher (thetorpedodog)

test/test_bson.py

Lines changed: 37 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818

1919
import collections
2020
import datetime
21+
import os
2122
import re
2223
import sys
24+
import tempfile
2325
import uuid
2426

2527
sys.path[0:0] = [""]
@@ -335,41 +337,41 @@ def test_invalid_decodes(self):
335337
self.assertRaises(InvalidBSON, list,
336338
decode_file_iter(StringIO(b"\x1B")))
337339

338-
# An object size that's too small to even include the object size,
339-
# but is correctly encoded, along with a correct EOO (and no data).
340-
data = b"\x01\x00\x00\x00\x00"
341-
self.assertRaises(InvalidBSON, decode_all, data)
342-
self.assertRaises(InvalidBSON, list, decode_iter(data))
343-
self.assertRaises(InvalidBSON, list, decode_file_iter(StringIO(data)))
344-
345-
# One object, but with object size listed smaller than it is in the
346-
# data.
347-
data = (b"\x1A\x00\x00\x00\x0E\x74\x65\x73\x74"
348-
b"\x00\x0C\x00\x00\x00\x68\x65\x6C\x6C"
349-
b"\x6f\x20\x77\x6F\x72\x6C\x64\x00\x00"
350-
b"\x05\x00\x00\x00\x00")
351-
self.assertRaises(InvalidBSON, decode_all, data)
352-
self.assertRaises(InvalidBSON, list, decode_iter(data))
353-
self.assertRaises(InvalidBSON, list, decode_file_iter(StringIO(data)))
354-
355-
# One object, missing the EOO at the end.
356-
data = (b"\x1B\x00\x00\x00\x0E\x74\x65\x73\x74"
357-
b"\x00\x0C\x00\x00\x00\x68\x65\x6C\x6C"
358-
b"\x6f\x20\x77\x6F\x72\x6C\x64\x00\x00"
359-
b"\x05\x00\x00\x00")
360-
self.assertRaises(InvalidBSON, decode_all, data)
361-
self.assertRaises(InvalidBSON, list, decode_iter(data))
362-
self.assertRaises(InvalidBSON, list, decode_file_iter(StringIO(data)))
363-
364-
# One object, sized correctly, with a spot for an EOO, but the EOO
365-
# isn't 0x00.
366-
data = (b"\x1B\x00\x00\x00\x0E\x74\x65\x73\x74"
367-
b"\x00\x0C\x00\x00\x00\x68\x65\x6C\x6C"
368-
b"\x6f\x20\x77\x6F\x72\x6C\x64\x00\x00"
369-
b"\x05\x00\x00\x00\xFF")
370-
self.assertRaises(InvalidBSON, decode_all, data)
371-
self.assertRaises(InvalidBSON, list, decode_iter(data))
372-
self.assertRaises(InvalidBSON, list, decode_file_iter(StringIO(data)))
340+
bad_bsons = [
341+
# An object size that's too small to even include the object size,
342+
# but is correctly encoded, along with a correct EOO (and no data).
343+
b"\x01\x00\x00\x00\x00",
344+
# One object, but with object size listed smaller than it is in the
345+
# data.
346+
(b"\x1A\x00\x00\x00\x0E\x74\x65\x73\x74"
347+
b"\x00\x0C\x00\x00\x00\x68\x65\x6C\x6C"
348+
b"\x6f\x20\x77\x6F\x72\x6C\x64\x00\x00"
349+
b"\x05\x00\x00\x00\x00"),
350+
# One object, missing the EOO at the end.
351+
(b"\x1B\x00\x00\x00\x0E\x74\x65\x73\x74"
352+
b"\x00\x0C\x00\x00\x00\x68\x65\x6C\x6C"
353+
b"\x6f\x20\x77\x6F\x72\x6C\x64\x00\x00"
354+
b"\x05\x00\x00\x00"),
355+
# One object, sized correctly, with a spot for an EOO, but the EOO
356+
# isn't 0x00.
357+
(b"\x1B\x00\x00\x00\x0E\x74\x65\x73\x74"
358+
b"\x00\x0C\x00\x00\x00\x68\x65\x6C\x6C"
359+
b"\x6f\x20\x77\x6F\x72\x6C\x64\x00\x00"
360+
b"\x05\x00\x00\x00\xFF"),
361+
]
362+
for i, data in enumerate(bad_bsons):
363+
msg = "bad_bson[{}]".format(i)
364+
with self.assertRaises(InvalidBSON, msg=msg):
365+
decode_all(data)
366+
with self.assertRaises(InvalidBSON, msg=msg):
367+
list(decode_iter(data))
368+
with self.assertRaises(InvalidBSON, msg=msg):
369+
list(decode_file_iter(StringIO(data)))
370+
with tempfile.TemporaryFile() as scratch:
371+
scratch.write(data)
372+
scratch.seek(0, os.SEEK_SET)
373+
with self.assertRaises(InvalidBSON, msg=msg):
374+
list(decode_file_iter(scratch))
373375

374376
def test_data_timestamp(self):
375377
self.assertEqual({"test": Timestamp(4, 20)},

0 commit comments

Comments
 (0)