Skip to content

Commit 5c0ca8d

Browse files
authored
Fix sparse vector compatibility (#19882)
1 parent 014e8a8 commit 5c0ca8d

File tree

4 files changed

+148
-6
lines changed

4 files changed

+148
-6
lines changed

llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/llama_index/vector_stores/qdrant/base.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1578,13 +1578,14 @@ def get_default_sparse_query_encoder(
15781578

15791579
def _detect_vector_format(self, collection_name: str) -> None:
15801580
"""
1581-
Detect the vector format of an existing collection.
1582-
This allows backward compatibility with collections that were created before
1583-
the refactoring to use named vectors consistently.
1581+
Detect and handle old vector formats from existing collections.
1582+
- named vs non-named vectors
1583+
- new sparse vector field name vs old sparse vector field name
15841584
"""
15851585
try:
15861586
collection_info = self._client.get_collection(collection_name)
15871587
vectors_config = collection_info.config.params.vectors
1588+
sparse_vectors = collection_info.config.params.sparse_vectors or {}
15881589

15891590
# Check if we have an unnamed vector format (where name is empty string)
15901591
if isinstance(vectors_config, dict):
@@ -1597,18 +1598,28 @@ def _detect_vector_format(self, collection_name: str) -> None:
15971598
self._legacy_vector_format = True
15981599
self.dense_vector_name = LEGACY_UNNAMED_VECTOR
15991600

1601+
# Detect sparse vector name if any sparse vectors configured
1602+
if isinstance(sparse_vectors, dict) and len(sparse_vectors) > 0:
1603+
if self.sparse_vector_name in sparse_vectors:
1604+
pass
1605+
elif DEFAULT_SPARSE_VECTOR_NAME_OLD in sparse_vectors:
1606+
self.sparse_vector_name = DEFAULT_SPARSE_VECTOR_NAME_OLD
1607+
16001608
except Exception as e:
16011609
logger.warning(
16021610
f"Could not detect vector format for collection {collection_name}: {e}"
16031611
)
16041612

16051613
async def _adetect_vector_format(self, collection_name: str) -> None:
16061614
"""
1607-
Asynchronous method to detect the vector format of an existing collection.
1615+
Asynchronous method to detect and handle old vector formats from existing collections.
1616+
- named vs non-named vectors
1617+
- new sparse vector field name vs old sparse vector field name
16081618
"""
16091619
try:
16101620
collection_info = await self._aclient.get_collection(collection_name)
16111621
vectors_config = collection_info.config.params.vectors
1622+
sparse_vectors = collection_info.config.params.sparse_vectors or {}
16121623

16131624
# Check if we have an unnamed vector format (where name is empty string)
16141625
if isinstance(vectors_config, dict):
@@ -1621,6 +1632,13 @@ async def _adetect_vector_format(self, collection_name: str) -> None:
16211632
self._legacy_vector_format = True
16221633
self.dense_vector_name = LEGACY_UNNAMED_VECTOR
16231634

1635+
# Detect sparse vector name if any sparse vectors configured
1636+
if isinstance(sparse_vectors, dict) and len(sparse_vectors) > 0:
1637+
if self.sparse_vector_name in sparse_vectors:
1638+
pass
1639+
elif DEFAULT_SPARSE_VECTOR_NAME_OLD in sparse_vectors:
1640+
self.sparse_vector_name = DEFAULT_SPARSE_VECTOR_NAME_OLD
1641+
16241642
except Exception as e:
16251643
logger.warning(
16261644
f"Could not detect vector format for collection {collection_name}: {e}"

llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ dev = [
2828

2929
[project]
3030
name = "llama-index-vector-stores-qdrant"
31-
version = "0.8.4"
31+
version = "0.8.5"
3232
description = "llama-index vector_stores qdrant integration"
3333
authors = [{name = "Your Name", email = "[email protected]"}]
3434
requires-python = ">=3.9,<3.14"

llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/tests/test_vector_stores_qdrant.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
FilterCondition,
2222
FilterOperator,
2323
)
24+
from qdrant_client import AsyncQdrantClient
25+
from qdrant_client.http import models as qmodels
2426

2527
requires_qdrant_cluster = pytest.mark.skipif(
2628
not os.getenv("QDRANT_CLUSTER_URL"),
@@ -694,3 +696,125 @@ def test_create_payload_indexes_returns_early_when_no_payload_indexes(
694696
vector_store: QdrantVectorStore,
695697
):
696698
vector_store._create_payload_indexes()
699+
700+
701+
def test_sparse_vector_name_detection_switches_to_legacy() -> None:
702+
"""If only legacy sparse name exists in collection, switch to it."""
703+
mock_client = MagicMock(spec=QdrantClient)
704+
705+
class DummyParams:
706+
def __init__(self):
707+
self.vectors = {"text-dense": object()}
708+
self.sparse_vectors = {"text-sparse": object()}
709+
710+
class DummyConfig:
711+
def __init__(self):
712+
self.params = DummyParams()
713+
714+
class DummyCollection:
715+
def __init__(self):
716+
self.config = DummyConfig()
717+
718+
mock_client.collection_exists.return_value = True
719+
mock_client.get_collection.return_value = DummyCollection()
720+
721+
vs = QdrantVectorStore(collection_name="test_collection", client=mock_client)
722+
723+
assert vs.sparse_vector_name == "text-sparse"
724+
725+
726+
def test_sparse_vector_name_detection_keeps_new() -> None:
727+
"""If only new sparse name exists in collection, keep the default new name."""
728+
mock_client = MagicMock(spec=QdrantClient)
729+
730+
class DummyParams:
731+
def __init__(self):
732+
self.vectors = {"text-dense": object()}
733+
self.sparse_vectors = {"text-sparse-new": object()}
734+
735+
class DummyConfig:
736+
def __init__(self):
737+
self.params = DummyParams()
738+
739+
class DummyCollection:
740+
def __init__(self):
741+
self.config = DummyConfig()
742+
743+
mock_client.collection_exists.return_value = True
744+
mock_client.get_collection.return_value = DummyCollection()
745+
746+
vs = QdrantVectorStore(collection_name="test_collection", client=mock_client)
747+
748+
assert vs.sparse_vector_name == "text-sparse-new"
749+
750+
751+
def test_sparse_vector_name_respects_user_specified() -> None:
752+
"""If a user specifies a sparse vector name present in the collection, don't override it."""
753+
mock_client = MagicMock(spec=QdrantClient)
754+
755+
class DummyParams:
756+
def __init__(self):
757+
self.vectors = {"text-dense": object()}
758+
self.sparse_vectors = {
759+
"custom-sparse": object(),
760+
"text-sparse-new": object(),
761+
}
762+
763+
class DummyConfig:
764+
def __init__(self):
765+
self.params = DummyParams()
766+
767+
class DummyCollection:
768+
def __init__(self):
769+
self.config = DummyConfig()
770+
771+
mock_client.collection_exists.return_value = True
772+
mock_client.get_collection.return_value = DummyCollection()
773+
774+
vs = QdrantVectorStore(
775+
collection_name="test_collection",
776+
client=mock_client,
777+
sparse_vector_name="custom-sparse",
778+
)
779+
780+
assert vs.sparse_vector_name == "custom-sparse"
781+
782+
783+
@pytest.mark.asyncio
784+
async def test_async_query_initializes_with_async_client_only() -> None:
785+
"""
786+
When only an async client is provided and the collection already exists,
787+
aquery should lazily detect vector format and successfully return results.
788+
"""
789+
collection_name = "async_init_test"
790+
aclient = AsyncQdrantClient(":memory:")
791+
792+
# Create collection with named dense vector
793+
await aclient.create_collection(
794+
collection_name=collection_name,
795+
vectors_config={
796+
"text-dense": qmodels.VectorParams(size=2, distance=qmodels.Distance.COSINE)
797+
},
798+
)
799+
800+
# Insert a single point
801+
await aclient.upsert(
802+
collection_name=collection_name,
803+
points=[
804+
qmodels.PointStruct(
805+
id="11111111-1111-1111-1111-111111111111",
806+
vector={"text-dense": [1.0, 0.0]},
807+
payload={"text": "hello"},
808+
)
809+
],
810+
)
811+
812+
# Initialize store with async client only
813+
store = QdrantVectorStore(collection_name=collection_name, aclient=aclient)
814+
815+
query = VectorStoreQuery(query_embedding=[1.0, 0.0], similarity_top_k=1)
816+
result = await store.aquery(query)
817+
818+
assert result is not None
819+
assert len(result.nodes) == 1
820+
assert getattr(result.nodes[0], "text", None) == "hello"

llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)