@@ -219,6 +219,10 @@ def __init__(
219219 dense_vector_name = dense_vector_name ,
220220 sparse_vector_name = sparse_vector_name ,
221221 )
222+ # Track if the user provided their own sparse functions. This is to prevent
223+ # them from being overwritten by the lazy-init correction for async clients.
224+ self ._user_provided_sparse_doc_fn = sparse_doc_fn is not None
225+ self ._user_provided_sparse_query_fn = sparse_query_fn is not None
222226
223227 if (
224228 client is None
@@ -1545,12 +1549,13 @@ def get_default_sparse_doc_encoder(
15451549 ) -> SparseEncoderCallable :
15461550 """
15471551 Get the default sparse document encoder.
1548- Use old format for backward compatibility if detected.
1552+ For async-only clients, assumes new format initially.
1553+ Will be auto-corrected on first async operation if collection uses old format.
15491554 """
1550- if self .use_old_sparse_encoder ( collection_name ) :
1551- # Update the sparse vector name to use the old format
1552- self .sparse_vector_name = DEFAULT_SPARSE_VECTOR_NAME_OLD
1553- return default_sparse_encoder ("naver/efficient-splade-VI-BT-large-doc" )
1555+ if self ._client is not None :
1556+ if self . use_old_sparse_encoder ( collection_name ):
1557+ self .sparse_vector_name = DEFAULT_SPARSE_VECTOR_NAME_OLD
1558+ return default_sparse_encoder ("naver/efficient-splade-VI-BT-large-doc" )
15541559
15551560 if fastembed_sparse_model is not None :
15561561 return fastembed_sparse_encoder (model_name = fastembed_sparse_model )
@@ -1564,12 +1569,16 @@ def get_default_sparse_query_encoder(
15641569 ) -> SparseEncoderCallable :
15651570 """
15661571 Get the default sparse query encoder.
1567- Use old format for backward compatibility if detected.
1572+ For async-only clients, assumes new format initially.
1573+ Will be auto-corrected on first async operation if collection uses old format.
15681574 """
1569- if self .use_old_sparse_encoder (collection_name ):
1570- # Update the sparse vector name to use the old format
1571- self .sparse_vector_name = DEFAULT_SPARSE_VECTOR_NAME_OLD
1572- return default_sparse_encoder ("naver/efficient-splade-VI-BT-large-query" )
1575+ if self ._client is not None :
1576+ if self .use_old_sparse_encoder (collection_name ):
1577+ # Update the sparse vector name to use the old format
1578+ self .sparse_vector_name = DEFAULT_SPARSE_VECTOR_NAME_OLD
1579+ return default_sparse_encoder (
1580+ "naver/efficient-splade-VI-BT-large-query"
1581+ )
15731582
15741583 if fastembed_sparse_model is not None :
15751584 return fastembed_sparse_encoder (model_name = fastembed_sparse_model )
@@ -1583,6 +1592,8 @@ def _detect_vector_format(self, collection_name: str) -> None:
15831592 - new sparse vector field name vs old sparse vector field name
15841593 """
15851594 try :
1595+ old_sparse_name = self .sparse_vector_name # Store state before detection
1596+
15861597 collection_info = self ._client .get_collection (collection_name )
15871598 vectors_config = collection_info .config .params .vectors
15881599 sparse_vectors = collection_info .config .params .sparse_vectors or {}
@@ -1605,6 +1616,10 @@ def _detect_vector_format(self, collection_name: str) -> None:
16051616 elif DEFAULT_SPARSE_VECTOR_NAME_OLD in sparse_vectors :
16061617 self .sparse_vector_name = DEFAULT_SPARSE_VECTOR_NAME_OLD
16071618
1619+ # If the name changed, our initial assumption was wrong. Correct it.
1620+ if self .enable_hybrid and old_sparse_name != self .sparse_vector_name :
1621+ self ._reinitialize_sparse_encoders ()
1622+
16081623 except Exception as e :
16091624 logger .warning (
16101625 f"Could not detect vector format for collection { collection_name } : { e } "
@@ -1613,10 +1628,10 @@ def _detect_vector_format(self, collection_name: str) -> None:
16131628 async def _adetect_vector_format (self , collection_name : str ) -> None :
16141629 """
16151630 Asynchronous method to detect and handle old vector formats from existing collections.
1616- - named vs non-named vectors
1617- - new sparse vector field name vs old sparse vector field name
16181631 """
16191632 try :
1633+ old_sparse_name = self .sparse_vector_name # Store state before detection
1634+
16201635 collection_info = await self ._aclient .get_collection (collection_name )
16211636 vectors_config = collection_info .config .params .vectors
16221637 sparse_vectors = collection_info .config .params .sparse_vectors or {}
@@ -1632,18 +1647,49 @@ async def _adetect_vector_format(self, collection_name: str) -> None:
16321647 self ._legacy_vector_format = True
16331648 self .dense_vector_name = LEGACY_UNNAMED_VECTOR
16341649
1635- # Detect sparse vector name if any sparse vectors configured
1650+ # Detect sparse vector name and correct if necessary
16361651 if isinstance (sparse_vectors , dict ) and len (sparse_vectors ) > 0 :
16371652 if self .sparse_vector_name in sparse_vectors :
16381653 pass
16391654 elif DEFAULT_SPARSE_VECTOR_NAME_OLD in sparse_vectors :
16401655 self .sparse_vector_name = DEFAULT_SPARSE_VECTOR_NAME_OLD
16411656
1657+ # If the name changed, our initial assumption was wrong. Correct it.
1658+ if self .enable_hybrid and old_sparse_name != self .sparse_vector_name :
1659+ self ._reinitialize_sparse_encoders ()
1660+
16421661 except Exception as e :
16431662 logger .warning (
16441663 f"Could not detect vector format for collection { collection_name } : { e } "
16451664 )
16461665
1666+ def _reinitialize_sparse_encoders (self ) -> None :
1667+ """Recreate default sparse encoders after vector format detection, respecting user-provided functions."""
1668+ if not self .enable_hybrid :
1669+ return
1670+
1671+ # Only override the doc function if the user did NOT provide one
1672+ if not self ._user_provided_sparse_doc_fn :
1673+ if self .sparse_vector_name == DEFAULT_SPARSE_VECTOR_NAME_OLD :
1674+ self ._sparse_doc_fn = default_sparse_encoder (
1675+ "naver/efficient-splade-VI-BT-large-doc"
1676+ )
1677+ else :
1678+ self ._sparse_doc_fn = fastembed_sparse_encoder (
1679+ model_name = self .fastembed_sparse_model
1680+ )
1681+
1682+ # Only override the query function if the user did NOT provide one
1683+ if not self ._user_provided_sparse_query_fn :
1684+ if self .sparse_vector_name == DEFAULT_SPARSE_VECTOR_NAME_OLD :
1685+ self ._sparse_query_fn = default_sparse_encoder (
1686+ "naver/efficient-splade-VI-BT-large-query"
1687+ )
1688+ else :
1689+ self ._sparse_query_fn = fastembed_sparse_encoder (
1690+ model_name = self .fastembed_sparse_model
1691+ )
1692+
16471693 def _validate_custom_sharding (
16481694 self ,
16491695 ):
0 commit comments