Skip to content

Commit 0e8bdf6

Browse files
committed
BSONVector - updated treatment of ignored bits to match spec.
1 parent 336163a commit 0e8bdf6

File tree

3 files changed

+32
-5
lines changed

3 files changed

+32
-5
lines changed

bson/binary.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from __future__ import annotations
1515

1616
import struct
17+
import warnings
1718
from enum import Enum
1819
from typing import TYPE_CHECKING, Any, Optional, Sequence, Tuple, Type, Union, overload
1920
from uuid import UUID
@@ -471,6 +472,10 @@ def from_vector(
471472

472473
metadata = struct.pack("<sB", dtype.value, padding)
473474
data = struct.pack(f"<{len(vector)}{format_str}", *vector) # type: ignore
475+
if padding and len(vector) and not (data[-1] & ((1 << padding) - 1)) == 0:
476+
raise ValueError(
477+
"Vector has a padding P, but bits in the final byte lower than P are non-zero. They must be zero."
478+
)
474479
return cls(metadata + data, subtype=VECTOR_SUBTYPE)
475480

476481
def as_vector(self) -> BinaryVector:
@@ -522,6 +527,12 @@ def as_vector(self) -> BinaryVector:
522527
dtype_format = "B"
523528
format_string = f"<{n_values}{dtype_format}"
524529
unpacked_uint8s = list(struct.unpack_from(format_string, self, position))
530+
if padding and n_values and unpacked_uint8s[-1] & (1 << padding) - 1 != 0:
531+
warnings.warn(
532+
"Vector has a padding P, but bits in the final byte lower than P are non-zero. In the next major version, they must be zero.",
533+
DeprecationWarning,
534+
stacklevel=2,
535+
)
525536
return BinaryVector(unpacked_uint8s, dtype, padding)
526537

527538
else:

test/bson_binary_vector/packed_bit.json

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,15 @@
2929
"padding": 3,
3030
"canonical_bson": "1600000005766563746F7200040000000910037F0800"
3131
},
32+
{
33+
"description": "PACKED_BIT with inconsistent padding",
34+
"valid": false,
35+
"vector": [127, 7],
36+
"dtype_hex": "0x10",
37+
"dtype_alias": "PACKED_BIT",
38+
"padding": 3,
39+
"canonical_bson": "1600000005766563746F7200040000000910037F0700"
40+
},
3241
{
3342
"description": "Empty Vector PACKED_BIT",
3443
"valid": true,

test/test_bson.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -739,7 +739,7 @@ def test_vector(self):
739739
"""Tests of subtype 9"""
740740
# We start with valid cases, across the 3 dtypes implemented.
741741
# Work with a simple vector that can be interpreted as int8, float32, or ubyte
742-
list_vector = [127, 7]
742+
list_vector = [127, 8]
743743
# As INT8, vector has length 2
744744
binary_vector = Binary.from_vector(list_vector, BinaryVectorDtype.INT8)
745745
vector = binary_vector.as_vector()
@@ -764,18 +764,18 @@ def test_vector(self):
764764
uncompressed = ""
765765
for val in list_vector:
766766
uncompressed += format(val, "08b")
767-
assert uncompressed[:-padding] == "0111111100000"
767+
assert uncompressed[:-padding] == "0111111100001"
768768

769769
# It is worthwhile explicitly showing the values encoded to BSON
770770
padded_doc = {"padded_vec": padded_vec}
771771
assert (
772772
encode(padded_doc)
773-
== b"\x1a\x00\x00\x00\x05padded_vec\x00\x04\x00\x00\x00\t\x10\x03\x7f\x07\x00"
773+
== b"\x1a\x00\x00\x00\x05padded_vec\x00\x04\x00\x00\x00\t\x10\x03\x7f\x08\x00"
774774
)
775775
# and dumped to json
776776
assert (
777777
json_util.dumps(padded_doc)
778-
== '{"padded_vec": {"$binary": {"base64": "EAN/Bw==", "subType": "09"}}}'
778+
== '{"padded_vec": {"$binary": {"base64": "EAN/CA==", "subType": "09"}}}'
779779
)
780780

781781
# FLOAT32 is also implemented
@@ -791,8 +791,15 @@ def test_vector(self):
791791
else:
792792
self.fail("Failed to raise an exception.")
793793

794-
# Test form of Binary.from_vector(BinaryVector)
794+
# Test one must pass zeros for all ignored bits
795+
try:
796+
Binary.from_vector([255], BinaryVectorDtype.PACKED_BIT, padding=7)
797+
except Exception as exc:
798+
self.assertIsInstance(exc, ValueError)
799+
else:
800+
self.fail("Failed to raise an exception.")
795801

802+
# Test form of Binary.from_vector(BinaryVector)
796803
assert padded_vec == Binary.from_vector(
797804
BinaryVector(list_vector, BinaryVectorDtype.PACKED_BIT, padding)
798805
)

0 commit comments

Comments
 (0)