Skip to content
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llama-index-core/llama_index/core/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ class BaseNode(BaseComponent):
id_: str = Field(
default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the node."
)
embedding: Optional[List[float]] = Field(
embedding: Optional[List[Union[float, int]]] = Field(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not 100% sure how I feel about changing this. But I guess it makes sense. This will likely cause some mypy errors? Lets see the linting output

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a low-level change that I wasn't expecting to change. Any other changes to the core schema would be a more considerable breaking change if we wanted to add an embedding_type field and switch based on that.

Do you have any thoughts on how we should proceed? I suspect we'll see more need to add byte or binary support in the future.

default=None, description="Embedding of the node."
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,16 @@
)
MATCH_ALL_QUERY = {"match_all": {}} # type: Dict

VALID_DATA_TYPES = ["float", "byte", "binary"]
BYTE_VECTOR_ENGINES = ["lucene", "faiss"]
BINARY_VECTOR_ENGINE = "faiss"
INVALID_BYTE_VECTOR_ENGINE = (
"Byte vectors only support 'lucene' or 'faiss' as the engine type."
)
INVALID_DATA_TYPE = f"Data type must be one of {VALID_DATA_TYPES}"
INVALID_BINARY_ENGINE = "Binary vectors must use 'faiss' as the engine type"
INVALID_BINARY_SPACE_TYPE = "Binary vectors must use 'hamming' as the space type"


class OpensearchVectorClient:
"""
Expand All @@ -48,18 +58,12 @@ class OpensearchVectorClient:
embedding_field (str): Name of the field in the index to store
embedding array in.
text_field (str): Name of the field to grab text from
data_type (str): Type of vector data. One of ["float", "byte", "binary"]
method (Optional[dict]): Opensearch "method" JSON obj for configuring
the KNN index.
This includes engine, metric, and other config params. Defaults to:
{"name": "hnsw", "space_type": "l2", "engine": "nmslib",
"parameters": {"ef_construction": 256, "m": 48}}
settings: Optional[dict]: Settings for the Opensearch index creation. Defaults to:
{"index": {"knn": True, "knn.algo_param.ef_search": 100}}
space_type (Optional[str]): space type for distance metric calculation. Defaults to: l2
os_client (Optional[OSClient]): Custom synchronous client (see OpenSearch from opensearch-py)
os_async_client (Optional[OSClient]): Custom asynchronous client (see AsyncOpenSearch from opensearch-py)
**kwargs: Optional arguments passed to the OpenSearch client from opensearch-py.

This includes engine, metric, and other config params.
space_type (Optional[str]): space type for distance metric calculation.
**kwargs: Optional arguments passed to the OpenSearch client.
"""

def __init__(
Expand All @@ -69,6 +73,7 @@ def __init__(
dim: int,
embedding_field: str = "embedding",
text_field: str = "content",
data_type: str = "float",
method: Optional[dict] = None,
settings: Optional[dict] = None,
engine: Optional[str] = "nmslib",
Expand All @@ -80,10 +85,26 @@ def __init__(
**kwargs: Any,
):
"""Init params."""
if method is not None:
engine = method.get("engine", engine)
space_type = method.get("space_type", space_type)

if data_type not in VALID_DATA_TYPES:
raise ValueError(INVALID_DATA_TYPE)

if data_type == "byte" and engine not in BYTE_VECTOR_ENGINES:
raise ValueError(INVALID_BYTE_VECTOR_ENGINE)

if data_type == "binary":
if engine != BINARY_VECTOR_ENGINE:
raise ValueError(INVALID_BINARY_ENGINE)
if space_type != "hamming":
raise ValueError(INVALID_BINARY_SPACE_TYPE)
# Default method configuration
if method is None:
method = {
"name": "hnsw",
"space_type": "l2",
"space_type": space_type,
"engine": engine,
"parameters": {"ef_construction": 256, "m": 48},
}
Expand All @@ -99,6 +120,7 @@ def __init__(
self._index = index
self._text_field = text_field
self._max_chunk_bytes = max_chunk_bytes
self._data_type = data_type

self._search_pipeline = search_pipeline
http_auth = kwargs.get("http_auth")
Expand All @@ -112,6 +134,7 @@ def __init__(
embedding_field: {
"type": "knn_vector",
"dimension": dim,
"data_type": data_type,
"method": method,
},
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
license = "MIT"
name = "llama-index-vector-stores-opensearch"
readme = "README.md"
version = "0.5.2"
version = "0.6.0"

[tool.poetry.dependencies]
python = ">=3.9,<4.0"
Expand Down
Loading
Loading