Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 18 additions & 7 deletions bson/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import struct
from dataclasses import dataclass
from enum import Enum
from typing import TYPE_CHECKING, Any, Sequence, Tuple, Type, Union
from typing import TYPE_CHECKING, Any, Optional, Sequence, Tuple, Type, Union
from uuid import UUID

"""Tools for representing BSON binary data.
Expand Down Expand Up @@ -400,24 +400,35 @@ def as_uuid(self, uuid_representation: int = UuidRepresentation.STANDARD) -> UUI
@classmethod
def from_vector(
cls: Type[Binary],
vector: list[int, float],
dtype: BinaryVectorDtype,
padding: int = 0,
vector: Union[BinaryVector, list[int, float]],
dtype: Optional[BinaryVectorDtype] = None,
padding: Optional[int] = None,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using a typing overload could be a cleaner solution here. Then you could properly convey to the type checker that dtype is required when vector is a list.

) -> Binary:
"""**(BETA)** Create a BSON :class:`~bson.binary.Binary` of Vector subtype from a list of Numbers.
"""**(BETA)** Create a BSON :class:`~bson.binary.Binary` of Vector subtype.

To interpret the representation of the numbers, a data type must be included.
See :class:`~bson.binary.BinaryVectorDtype` for available types and descriptions.

The dtype and padding are prepended to the binary data's value.

:param vector: List of values
:param vector: Either a List of values, or a :class:`~bson.binary.BinaryVector` dataclass.
:param dtype: Data type of the values
:param padding: For fractional bytes, number of bits to ignore at end of vector.
:return: Binary packed data identified by dtype and padding.

.. versionadded:: 4.10
"""
if isinstance(vector, BinaryVector):
if dtype or padding:
raise ValueError(
"The first argument, vector, has type BinaryVector. "
"dtype or padding cannot be separately defined, but were."
)
dtype = vector.dtype
padding = vector.padding
vector = vector.data # type: ignore

padding = 0 if padding is None else padding
if dtype == BinaryVectorDtype.INT8: # pack ints in [-128, 127] as signed int8
format_str = "b"
if padding:
Expand All @@ -432,7 +443,7 @@ def from_vector(
raise NotImplementedError("%s not yet supported" % dtype)

metadata = struct.pack("<sB", dtype.value, padding)
data = struct.pack(f"<{len(vector)}{format_str}", *vector)
data = struct.pack(f"<{len(vector)}{format_str}", *vector) # type: ignore
return cls(metadata + data, subtype=VECTOR_SUBTYPE)

def as_vector(self) -> BinaryVector:
Expand Down
20 changes: 19 additions & 1 deletion test/test_bson.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,13 @@
is_valid,
json_util,
)
from bson.binary import USER_DEFINED_SUBTYPE, Binary, BinaryVectorDtype, UuidRepresentation
from bson.binary import (
USER_DEFINED_SUBTYPE,
Binary,
BinaryVector,
BinaryVectorDtype,
UuidRepresentation,
)
from bson.code import Code
from bson.codec_options import CodecOptions, DatetimeConversion
from bson.datetime_ms import _DATETIME_ERROR_SUGGESTION
Expand Down Expand Up @@ -785,6 +791,18 @@ def test_vector(self):
else:
self.fail("Failed to raise an exception.")

# Test form of Binary.from_vector(BinaryVector)

assert padded_vec == Binary.from_vector(
BinaryVector(list_vector, BinaryVectorDtype.PACKED_BIT, padding)
)
assert binary_vector == Binary.from_vector(
BinaryVector(list_vector, BinaryVectorDtype.INT8)
)
assert float_binary == Binary.from_vector(
BinaryVector(list_vector, BinaryVectorDtype.FLOAT32)
)

def test_unicode_regex(self):
"""Tests we do not get a segfault for C extension on unicode RegExs.
This had been happening.
Expand Down
Loading