Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 20 additions & 8 deletions bson/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import struct
from dataclasses import dataclass
from enum import Enum
from typing import TYPE_CHECKING, Any, Optional, Sequence, Tuple, Type, Union
from typing import TYPE_CHECKING, Any, Optional, Sequence, Tuple, Type, Union, overload
from uuid import UUID

"""Tools for representing BSON binary data.
Expand Down Expand Up @@ -195,7 +195,7 @@ class UuidRepresentation:


VECTOR_SUBTYPE = 9
"""**(BETA)** BSON binary subtype for densely packed vector data.
"""BSON binary subtype for densely packed vector data.

.. versionadded:: 4.10
"""
Expand All @@ -207,7 +207,7 @@ class UuidRepresentation:


class BinaryVectorDtype(Enum):
"""**(BETA)** Datatypes of vector subtype.
"""Datatypes of vector subtype.

:param FLOAT32: (0x27) Pack list of :class:`float` as float32
:param INT8: (0x03) Pack list of :class:`int` in [-128, 127] as signed int8
Expand All @@ -229,7 +229,7 @@ class BinaryVectorDtype(Enum):

@dataclass
class BinaryVector:
"""**(BETA)** Vector of numbers along with metadata for binary interoperability.
"""Vector of numbers along with metadata for binary interoperability.
.. versionadded:: 4.10
"""

Expand All @@ -256,7 +256,7 @@ class Binary(bytes):
the difference between what should be considered binary data and
what should be considered a string when we encode to BSON.

**(BETA)** Subtype 9 provides a space-efficient representation of 1-dimensional vector data.
Subtype 9 provides a space-efficient representation of 1-dimensional vector data.
Its data is prepended with two bytes of metadata.
The first (dtype) describes its data type, such as float32 or int8.
The second (padding) prescribes the number of bits to ignore in the final byte.
Expand All @@ -278,7 +278,7 @@ class Binary(bytes):
Support any bytes-like type that implements the buffer protocol.

.. versionchanged:: 4.10
**(BETA)** Addition of vector subtype.
Addition of vector subtype.
"""

_type_marker = 5
Expand Down Expand Up @@ -397,14 +397,26 @@ def as_uuid(self, uuid_representation: int = UuidRepresentation.STANDARD) -> UUI
f"cannot decode subtype {self.subtype} to {UUID_REPRESENTATION_NAMES[uuid_representation]}"
)

@classmethod
@overload
def from_vector(cls: Type[Binary], vector: BinaryVector) -> Binary:
...

@classmethod
@overload
def from_vector(
cls: Type[Binary], vector: list[int, float], dtype: BinaryVectorDtype, padding: int = 0
) -> Binary:
...

@classmethod
def from_vector(
cls: Type[Binary],
vector: Union[BinaryVector, list[int, float]],
dtype: Optional[BinaryVectorDtype] = None,
padding: Optional[int] = None,
) -> Binary:
"""**(BETA)** Create a BSON :class:`~bson.binary.Binary` of Vector subtype.
"""Create a BSON :class:`~bson.binary.Binary` of Vector subtype.

To interpret the representation of the numbers, a data type must be included.
See :class:`~bson.binary.BinaryVectorDtype` for available types and descriptions.
Expand Down Expand Up @@ -447,7 +459,7 @@ def from_vector(
return cls(metadata + data, subtype=VECTOR_SUBTYPE)

def as_vector(self) -> BinaryVector:
"""**(BETA)** From the Binary, create a list of numbers, along with dtype and padding.
"""From the Binary, create a list of numbers, along with dtype and padding.

:return: BinaryVector

Expand Down
6 changes: 6 additions & 0 deletions test/test_bson.py
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,12 @@ def test_vector(self):
assert float_binary == Binary.from_vector(
BinaryVector(list_vector, BinaryVectorDtype.FLOAT32)
)
# Confirm kwargs cannot be passed when BinaryVector is provided
with self.assertRaises(ValueError):
Binary.from_vector(
BinaryVector(list_vector, BinaryVectorDtype.PACKED_BIT, padding),
dtype=BinaryVectorDtype.PACKED_BIT,
) # type: ignore[call-overload]

def test_unicode_regex(self):
"""Tests we do not get a segfault for C extension on unicode RegExs.
Expand Down
Loading