diff --git a/bson/binary.py b/bson/binary.py index 96b61b6dab..f03173a8ef 100644 --- a/bson/binary.py +++ b/bson/binary.py @@ -16,7 +16,7 @@ import struct from dataclasses import dataclass from enum import Enum -from typing import TYPE_CHECKING, Any, Sequence, Tuple, Type, Union +from typing import TYPE_CHECKING, Any, Optional, Sequence, Tuple, Type, Union from uuid import UUID """Tools for representing BSON binary data. @@ -400,24 +400,35 @@ def as_uuid(self, uuid_representation: int = UuidRepresentation.STANDARD) -> UUI @classmethod def from_vector( cls: Type[Binary], - vector: list[int, float], - dtype: BinaryVectorDtype, - padding: int = 0, + vector: Union[BinaryVector, list[int, float]], + dtype: Optional[BinaryVectorDtype] = None, + padding: Optional[int] = None, ) -> Binary: - """**(BETA)** Create a BSON :class:`~bson.binary.Binary` of Vector subtype from a list of Numbers. + """**(BETA)** Create a BSON :class:`~bson.binary.Binary` of Vector subtype. To interpret the representation of the numbers, a data type must be included. See :class:`~bson.binary.BinaryVectorDtype` for available types and descriptions. The dtype and padding are prepended to the binary data's value. - :param vector: List of values + :param vector: Either a List of values, or a :class:`~bson.binary.BinaryVector` dataclass. :param dtype: Data type of the values :param padding: For fractional bytes, number of bits to ignore at end of vector. :return: Binary packed data identified by dtype and padding. .. versionadded:: 4.10 """ + if isinstance(vector, BinaryVector): + if dtype or padding: + raise ValueError( + "The first argument, vector, has type BinaryVector. " + "dtype or padding cannot be separately defined, but were." + ) + dtype = vector.dtype + padding = vector.padding + vector = vector.data # type: ignore + + padding = 0 if padding is None else padding if dtype == BinaryVectorDtype.INT8: # pack ints in [-128, 127] as signed int8 format_str = "b" if padding: @@ -432,7 +443,7 @@ def from_vector( raise NotImplementedError("%s not yet supported" % dtype) metadata = struct.pack(" BinaryVector: diff --git a/test/test_bson.py b/test/test_bson.py index 96aa897d19..5dc1377bcd 100644 --- a/test/test_bson.py +++ b/test/test_bson.py @@ -51,7 +51,13 @@ is_valid, json_util, ) -from bson.binary import USER_DEFINED_SUBTYPE, Binary, BinaryVectorDtype, UuidRepresentation +from bson.binary import ( + USER_DEFINED_SUBTYPE, + Binary, + BinaryVector, + BinaryVectorDtype, + UuidRepresentation, +) from bson.code import Code from bson.codec_options import CodecOptions, DatetimeConversion from bson.datetime_ms import _DATETIME_ERROR_SUGGESTION @@ -785,6 +791,18 @@ def test_vector(self): else: self.fail("Failed to raise an exception.") + # Test form of Binary.from_vector(BinaryVector) + + assert padded_vec == Binary.from_vector( + BinaryVector(list_vector, BinaryVectorDtype.PACKED_BIT, padding) + ) + assert binary_vector == Binary.from_vector( + BinaryVector(list_vector, BinaryVectorDtype.INT8) + ) + assert float_binary == Binary.from_vector( + BinaryVector(list_vector, BinaryVectorDtype.FLOAT32) + ) + def test_unicode_regex(self): """Tests we do not get a segfault for C extension on unicode RegExs. This had been happening.