Fix sparse vector compatibility (#19882)

adrianlyjak · web-flow · commit 5c0ca8d97e29 · 2025-09-16T19:10:55.000Z
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/llama_index/vector_stores/qdrant/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/llama_index/vector_stores/qdrant/base.py
@@ -1578,13 +1578,14 @@ def get_default_sparse_query_encoder(
 
     def _detect_vector_format(self, collection_name: str) -> None:
         """
-        Detect the vector format of an existing collection.
-        This allows backward compatibility with collections that were created before
-        the refactoring to use named vectors consistently.
+        Detect and handle old vector formats from existing collections.
+        - named vs non-named vectors
+        - new sparse vector field name vs old sparse vector field name
         """
         try:
             collection_info = self._client.get_collection(collection_name)
             vectors_config = collection_info.config.params.vectors
+            sparse_vectors = collection_info.config.params.sparse_vectors or {}
 
             # Check if we have an unnamed vector format (where name is empty string)
             if isinstance(vectors_config, dict):
@@ -1597,18 +1598,28 @@ def _detect_vector_format(self, collection_name: str) -> None:
                 self._legacy_vector_format = True
                 self.dense_vector_name = LEGACY_UNNAMED_VECTOR
 
+            # Detect sparse vector name if any sparse vectors configured
+            if isinstance(sparse_vectors, dict) and len(sparse_vectors) > 0:
+                if self.sparse_vector_name in sparse_vectors:
+                    pass
+                elif DEFAULT_SPARSE_VECTOR_NAME_OLD in sparse_vectors:
+                    self.sparse_vector_name = DEFAULT_SPARSE_VECTOR_NAME_OLD
+
         except Exception as e:
             logger.warning(
                 f"Could not detect vector format for collection {collection_name}: {e}"
             )
 
     async def _adetect_vector_format(self, collection_name: str) -> None:
         """
-        Asynchronous method to detect the vector format of an existing collection.
+        Asynchronous method to detect and handle old vector formats from existing collections.
+        - named vs non-named vectors
+        - new sparse vector field name vs old sparse vector field name
         """
         try:
             collection_info = await self._aclient.get_collection(collection_name)
             vectors_config = collection_info.config.params.vectors
+            sparse_vectors = collection_info.config.params.sparse_vectors or {}
 
             # Check if we have an unnamed vector format (where name is empty string)
             if isinstance(vectors_config, dict):
@@ -1621,6 +1632,13 @@ async def _adetect_vector_format(self, collection_name: str) -> None:
                 self._legacy_vector_format = True
                 self.dense_vector_name = LEGACY_UNNAMED_VECTOR
 
+            # Detect sparse vector name if any sparse vectors configured
+            if isinstance(sparse_vectors, dict) and len(sparse_vectors) > 0:
+                if self.sparse_vector_name in sparse_vectors:
+                    pass
+                elif DEFAULT_SPARSE_VECTOR_NAME_OLD in sparse_vectors:
+                    self.sparse_vector_name = DEFAULT_SPARSE_VECTOR_NAME_OLD
+
         except Exception as e:
             logger.warning(
                 f"Could not detect vector format for collection {collection_name}: {e}"
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/pyproject.toml
@@ -28,7 +28,7 @@ dev = [
 
 [project]
 name = "llama-index-vector-stores-qdrant"
-version = "0.8.4"
+version = "0.8.5"
 description = "llama-index vector_stores qdrant integration"
 authors = [{name = "Your Name", email = "you@example.com"}]
 requires-python = ">=3.9,<3.14"
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/tests/test_vector_stores_qdrant.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/tests/test_vector_stores_qdrant.py
@@ -21,6 +21,8 @@
     FilterCondition,
     FilterOperator,
 )
+from qdrant_client import AsyncQdrantClient
+from qdrant_client.http import models as qmodels
 
 requires_qdrant_cluster = pytest.mark.skipif(
     not os.getenv("QDRANT_CLUSTER_URL"),
@@ -694,3 +696,125 @@ def test_create_payload_indexes_returns_early_when_no_payload_indexes(
     vector_store: QdrantVectorStore,
 ):
     vector_store._create_payload_indexes()
+
+
+def test_sparse_vector_name_detection_switches_to_legacy() -> None:
+    """If only legacy sparse name exists in collection, switch to it."""
+    mock_client = MagicMock(spec=QdrantClient)
+
+    class DummyParams:
+        def __init__(self):
+            self.vectors = {"text-dense": object()}
+            self.sparse_vectors = {"text-sparse": object()}
+
+    class DummyConfig:
+        def __init__(self):
+            self.params = DummyParams()
+
+    class DummyCollection:
+        def __init__(self):
+            self.config = DummyConfig()
+
+    mock_client.collection_exists.return_value = True
+    mock_client.get_collection.return_value = DummyCollection()
+
+    vs = QdrantVectorStore(collection_name="test_collection", client=mock_client)
+
+    assert vs.sparse_vector_name == "text-sparse"
+
+
+def test_sparse_vector_name_detection_keeps_new() -> None:
+    """If only new sparse name exists in collection, keep the default new name."""
+    mock_client = MagicMock(spec=QdrantClient)
+
+    class DummyParams:
+        def __init__(self):
+            self.vectors = {"text-dense": object()}
+            self.sparse_vectors = {"text-sparse-new": object()}
+
+    class DummyConfig:
+        def __init__(self):
+            self.params = DummyParams()
+
+    class DummyCollection:
+        def __init__(self):
+            self.config = DummyConfig()
+
+    mock_client.collection_exists.return_value = True
+    mock_client.get_collection.return_value = DummyCollection()
+
+    vs = QdrantVectorStore(collection_name="test_collection", client=mock_client)
+
+    assert vs.sparse_vector_name == "text-sparse-new"
+
+
+def test_sparse_vector_name_respects_user_specified() -> None:
+    """If a user specifies a sparse vector name present in the collection, don't override it."""
+    mock_client = MagicMock(spec=QdrantClient)
+
+    class DummyParams:
+        def __init__(self):
+            self.vectors = {"text-dense": object()}
+            self.sparse_vectors = {
+                "custom-sparse": object(),
+                "text-sparse-new": object(),
+            }
+
+    class DummyConfig:
+        def __init__(self):
+            self.params = DummyParams()
+
+    class DummyCollection:
+        def __init__(self):
+            self.config = DummyConfig()
+
+    mock_client.collection_exists.return_value = True
+    mock_client.get_collection.return_value = DummyCollection()
+
+    vs = QdrantVectorStore(
+        collection_name="test_collection",
+        client=mock_client,
+        sparse_vector_name="custom-sparse",
+    )
+
+    assert vs.sparse_vector_name == "custom-sparse"
+
+
+@pytest.mark.asyncio
+async def test_async_query_initializes_with_async_client_only() -> None:
+    """
+    When only an async client is provided and the collection already exists,
+    aquery should lazily detect vector format and successfully return results.
+    """
+    collection_name = "async_init_test"
+    aclient = AsyncQdrantClient(":memory:")
+
+    # Create collection with named dense vector
+    await aclient.create_collection(
+        collection_name=collection_name,
+        vectors_config={
+            "text-dense": qmodels.VectorParams(size=2, distance=qmodels.Distance.COSINE)
+        },
+    )
+
+    # Insert a single point
+    await aclient.upsert(
+        collection_name=collection_name,
+        points=[
+            qmodels.PointStruct(
+                id="11111111-1111-1111-1111-111111111111",
+                vector={"text-dense": [1.0, 0.0]},
+                payload={"text": "hello"},
+            )
+        ],
+    )
+
+    # Initialize store with async client only
+    store = QdrantVectorStore(collection_name=collection_name, aclient=aclient)
+
+    query = VectorStoreQuery(query_embedding=[1.0, 0.0], similarity_top_k=1)
+    result = await store.aquery(query)
+
+    assert result is not None
+    assert len(result.nodes) == 1
+    assert getattr(result.nodes[0], "text", None) == "hello"
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/uv.lock b/llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/uv.lock