FEAT: [model] support jina-reranker-v3 (xorbitsai#4156)

llyycchhee · web-flow · commit 7f039ae81339 · 2025-10-22T12:45:56.000+08:00
diff --git a/doc/source/models/model_abilities/embed.rst b/doc/source/models/model_abilities/embed.rst
@@ -123,4 +123,10 @@ Does Embeddings API provides integration method for LangChain?
 -----------------------------------------------------------------------------------
 
 Yes, you can refer to the related sections in LangChain's respective official Xinference documentation.
-Here is the link: `Text Embedding Models: Xinference <https://python.langchain.com/docs/integrations/text_embedding/xinference>`_ 
+Here is the link: `Text Embedding Models: Xinference <https://python.langchain.com/docs/integrations/text_embedding/xinference>`_ 
+
+
+Does Embeddings API support hrbrid model?
+-----------------------------------------------------------------------------------
+
+Yes, you can use ``flag`` as the engine to deploy the model and call Embeddings API by setting the extra parameter ``return_parse=True`` which will return sparse vectors.
diff --git a/xinference/model/rerank/sentence_transformers/core.py b/xinference/model/rerank/sentence_transformers/core.py
@@ -81,6 +81,7 @@ def load(self):
         if (
             self.model_family.type == "normal"
             and "qwen3" not in self.model_family.model_name.lower()
+            and "jina-reranker-v3" not in self.model_family.model_name.lower()
         ):
             try:
                 import sentence_transformers
@@ -109,7 +110,10 @@ def load(self):
             )
             if self._use_fp16:
                 self._model.model.half()
-        elif "qwen3" in self.model_family.model_name.lower():
+        elif (
+            "qwen3" in self.model_family.model_name.lower()
+            or "jina-reranker-v3" in self.model_family.model_name.lower()
+        ):
             # qwen3-reranker
             # now we use transformers
             # TODO: support engines for rerank models
@@ -225,6 +229,7 @@ def rerank(
         if (
             self.model_family.type == "normal"
             and "qwen3" not in self.model_family.model_name.lower()
+            and "jina-reranker-v3" not in self.model_family.model_name.lower()
         ):
             logger.debug("Passing processed sentences: %s", sentence_combinations)
             similarity_scores = self._model.predict(
@@ -235,7 +240,10 @@ def rerank(
             ).cpu()
             if similarity_scores.dtype == torch.bfloat16:
                 similarity_scores = similarity_scores.float()
-        elif "qwen3" in self.model_family.model_name.lower():
+        elif (
+            "qwen3" in self.model_family.model_name.lower()
+            or "jina-reranker-v3" in self.model_family.model_name.lower()
+        ):
 
             def format_instruction(instruction, query, doc):
                 if instruction is None: