Skip to content

Commit a9bcc7a

Browse files
[Doc] Use intersphinx and update entrypoints docs (#5125)
1 parent d79d9ea commit a9bcc7a

File tree

4 files changed

+32
-13
lines changed

4 files changed

+32
-13
lines changed

docs/source/conf.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def setup(app):
8080
generate_examples()
8181

8282

83-
# Mock out external dependencies here.
83+
# Mock out external dependencies here, otherwise the autodoc pages may be blank.
8484
autodoc_mock_imports = [
8585
"cpuinfo",
8686
"torch",
@@ -115,4 +115,15 @@ def add_line(self, line: str, source: str, *lineno: int) -> None:
115115

116116
autodoc.ClassDocumenter = MockedClassDocumenter
117117

118+
intersphinx_mapping = {
119+
'python': ('https://docs.python.org/3', None),
120+
'typing_extensions':
121+
('https://typing-extensions.readthedocs.io/en/latest', None),
122+
'numpy': ('https://numpy.org/doc/stable', None),
123+
'torch': ('https://pytorch.org/docs/stable', None),
124+
'psutil': ('https://psutil.readthedocs.io/en/stable', None),
125+
}
126+
127+
autodoc_preserve_defaults = True
128+
118129
navigation_with_keys = False

vllm/engine/async_llm_engine.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -307,8 +307,6 @@ class AsyncLLMEngine:
307307
generate method when there are requests in the waiting queue. The generate
308308
method yields the outputs from the :class:`LLMEngine` to the caller.
309309
310-
NOTE: For the comprehensive list of arguments, see :class:`LLMEngine`.
311-
312310
Args:
313311
worker_use_ray: Whether to use Ray for model workers. Required for
314312
distributed execution. Should be the same as

vllm/engine/llm_engine.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@ class LLMEngine:
7070
The :class:`~vllm.LLM` class wraps this class for offline batched inference
7171
and the :class:`AsyncLLMEngine` class wraps this class for online serving.
7272
73-
NOTE: The config arguments are derived from the :class:`~vllm.EngineArgs`
74-
class. For the comprehensive list of arguments, see :ref:`engine_args`.
73+
The config arguments are derived from :class:`~vllm.EngineArgs`. (See
74+
:ref:`engine_args`)
7575
7676
Args:
7777
model_config: The configuration related to the LLM model.

vllm/entrypoints/llm.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,6 @@ class LLM:
3030
this class generates texts from the model, using an intelligent batching
3131
mechanism and efficient memory management.
3232
33-
NOTE: This class is intended to be used for offline inference. For online
34-
serving, use the :class:`~vllm.AsyncLLMEngine` class instead.
35-
36-
NOTE: For the comprehensive list of arguments, see
37-
:class:`~vllm.EngineArgs`.
38-
3933
Args:
4034
model: The name or path of a HuggingFace Transformers model.
4135
tokenizer: The name or path of a HuggingFace Transformers tokenizer.
@@ -84,6 +78,12 @@ class LLM:
8478
When a sequence has context length larger than this, we fall back
8579
to eager mode.
8680
disable_custom_all_reduce: See ParallelConfig
81+
**kwargs: Arguments for :class:`~vllm.EngineArgs`. (See
82+
:ref:`engine_args`)
83+
84+
Note:
85+
This class is intended to be used for offline inference. For online
86+
serving, use the :class:`~vllm.AsyncLLMEngine` class instead.
8787
"""
8888

8989
DEPRECATE_LEGACY: ClassVar[bool] = False
@@ -253,7 +253,7 @@ def generate(
253253
) -> List[RequestOutput]:
254254
"""Generates the completions for the input prompts.
255255
256-
NOTE: This class automatically batches the given prompts, considering
256+
This class automatically batches the given prompts, considering
257257
the memory constraint. For the best performance, put all of your prompts
258258
into a single list and pass it to this method.
259259
@@ -270,6 +270,11 @@ def generate(
270270
Returns:
271271
A list of `RequestOutput` objects containing the
272272
generated completions in the same order as the input prompts.
273+
274+
Note:
275+
Using ``prompts`` and ``prompt_token_ids`` as keyword parameters is
276+
considered legacy and may be deprecated in the future. You should
277+
instead pass them via the ``inputs`` parameter.
273278
"""
274279
if prompt_token_ids is not None or multi_modal_data is not None:
275280
inputs = self._convert_v1_inputs(
@@ -393,7 +398,7 @@ def encode(
393398
) -> List[EmbeddingRequestOutput]:
394399
"""Generates the completions for the input prompts.
395400
396-
NOTE: This class automatically batches the given prompts, considering
401+
This class automatically batches the given prompts, considering
397402
the memory constraint. For the best performance, put all of your prompts
398403
into a single list and pass it to this method.
399404
@@ -409,6 +414,11 @@ def encode(
409414
Returns:
410415
A list of `EmbeddingRequestOutput` objects containing the
411416
generated embeddings in the same order as the input prompts.
417+
418+
Note:
419+
Using ``prompts`` and ``prompt_token_ids`` as keyword parameters is
420+
considered legacy and may be deprecated in the future. You should
421+
instead pass them via the ``inputs`` parameter.
412422
"""
413423
if prompt_token_ids is not None or multi_modal_data is not None:
414424
inputs = self._convert_v1_inputs(

0 commit comments

Comments
 (0)