Skip to content

Commit c4cc7f5

Browse files
committed
Update docstrings to reflect reranking endpoint change
1 parent f3a489e commit c4cc7f5

File tree

3 files changed

+19
-15
lines changed

3 files changed

+19
-15
lines changed

nemo_retriever/src/nemo_retriever/rerank/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
rerank_hits
1414
Convenience function to rerank a list of LanceDB hit dicts for a single
1515
query string, using either a local ``NemotronRerankV2`` model or a remote
16-
vLLM / NIM ``/rerank`` endpoint.
16+
vLLM / NIM ranking endpoint.
1717
"""
1818

1919
from .rerank import NemotronRerankActor, NemotronRerankCPUActor, NemotronRerankGPUActor, rerank_hits

nemo_retriever/src/nemo_retriever/rerank/rerank.py

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,16 @@
1212
Remote endpoint
1313
---------------
1414
When ``invoke_url`` is set the actor/function calls a vLLM (>=0.14) or NIM
15-
server that exposes the OpenAI-compatible ``/rerank`` REST API::
15+
server that exposes the OpenAI-compatible ranking REST API. The helper accepts
16+
either a fully qualified ``.../reranking`` URL or a base URL and appends
17+
``/v1/ranking`` automatically::
1618
17-
POST /rerank
19+
POST /v1/ranking
1820
{
1921
"model": "nvidia/llama-nemotron-rerank-1b-v2",
20-
"query": "...",
21-
"documents": ["...", "..."],
22-
"top_n": N
22+
"query": {"text": "..."},
23+
"passages": [{"text": "..."}, {"text": "..."}],
24+
"truncate": "END"
2325
}
2426
2527
Local model
@@ -80,13 +82,14 @@ def _rerank_via_endpoint(
8082
api_key: str = "",
8183
) -> List[float]:
8284
"""
83-
Call a vLLM / NIM ``/rerank`` REST endpoint and return per-document scores.
85+
Call a vLLM / NIM ranking endpoint and return per-document scores.
8486
85-
The server must expose the OpenAI-compatible rerank API introduced in
86-
vLLM >= 0.14.0::
87+
The server must expose the ranking API used by NeMo Retriever and NIM. Pass
88+
either a full ``.../reranking`` URL or a base URL; base URLs are
89+
normalized to ``.../v1/ranking``::
8790
88-
POST {endpoint}/rerank
89-
{"model": ..., "query": ..., "documents": [...], "top_n": N}
91+
POST {endpoint}/v1/ranking
92+
{"model": ..., "query": {"text": ...}, "passages": [{"text": ...}]}
9093
9194
Parameters
9295
----------
@@ -96,7 +99,7 @@ def _rerank_via_endpoint(
9699
List of document strings to score against the query.
97100
endpoint:
98101
Base URL of the reranking endpoint (e.g. ``http://localhost:8015
99-
``). The function will append ``/v1/ranking`` if the URL does not
102+
``). The function will append ``/v1/ranking`` if the URL does not
100103
already end with ``/reranking``.
101104
model_name:
102105
Model identifier sent to the remote endpoint (default
@@ -174,8 +177,9 @@ def rerank_hits(
174177
A ``NemotronRerankV2`` instance (local GPU inference). Ignored when
175178
*invoke_url* is set.
176179
invoke_url:
177-
Base URL of a vLLM / NIM ``/rerank`` endpoint. Takes priority over
178-
*model*.
180+
Base URL of a vLLM / NIM ranking endpoint. Takes priority over
181+
*model*. Base URLs are normalized to ``/v1/ranking`` unless they
182+
already end with ``/reranking``.
179183
model_name:
180184
Model identifier sent to the remote endpoint (default
181185
``"nvidia/llama-nemotron-rerank-1b-v2"``).

nemo_retriever/src/nemo_retriever/retriever.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ class Retriever:
5858
reranker_model_name: Optional[str] = "nvidia/llama-nemotron-rerank-1b-v2"
5959
"""HuggingFace model ID for local reranking (e.g. 'nvidia/llama-nemotron-rerank-1b-v2')."""
6060
reranker_endpoint: Optional[str] = None
61-
"""Base URL of a vLLM / NIM /rerank endpoint. Takes priority over local model."""
61+
"""Base URL of a vLLM / NIM ranking endpoint. Appends ``/v1/ranking`` unless already using ``/reranking``."""
6262
reranker_api_key: str = ""
6363
"""Bearer token for the remote rerank endpoint."""
6464
reranker_max_length: int = 512

0 commit comments

Comments
 (0)