Skip to content

Commit 9701352

Browse files
[Doc]: fix typos in Python comments (#24001)
Signed-off-by: Didier Durand <[email protected]>
1 parent 749be00 commit 9701352

File tree

10 files changed

+14
-14
lines changed

10 files changed

+14
-14
lines changed

vllm/compilation/monitor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def end_monitoring_torch_compile(vllm_config: VllmConfig):
4343

4444

4545
def validate_cudagraph_capturing_enabled():
46-
# used to monitor whether an cudagraph capturing is legal at runtime.
46+
# used to monitor whether a cudagraph capturing is legal at runtime.
4747
# should be called before any cudagraph capturing.
4848
# if an illegal cudagraph capturing happens, raise an error.
4949
global cudagraph_capturing_enabled

vllm/core/evictor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ class LRUEvictor(Evictor):
7676
that's recorded in the Block. If there are multiple blocks with
7777
the same last_accessed time, then the one with the largest num_hashed_tokens
7878
will be evicted. If two blocks each have the lowest last_accessed time and
79-
highest num_hashed_tokens value, then one will be chose arbitrarily
79+
highest num_hashed_tokens value, then one will be chosen arbitrarily
8080
"""
8181

8282
# CLEANUP_THRESHOLD determines the maximum allowable size of the priority

vllm/engine/llm_engine.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1239,7 +1239,7 @@ def step(self) -> List[RequestOutput]:
12391239

12401240
# Stop the execute model loop in parallel workers until there are
12411241
# more requests to process. This avoids waiting indefinitely in
1242-
# torch.distributed ops which may otherwise timeout, and unblocks
1242+
# torch.distributed ops which may otherwise time out, and unblocks
12431243
# the RPC thread in the workers so that they can process any other
12441244
# queued control plane messages, such as add/remove lora adapters.
12451245
logger.debug("Stopping remote worker execution loop.")

vllm/entrypoints/llm.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ def generate(
329329
Args:
330330
prompts: The prompts to the LLM. You may pass a sequence of prompts
331331
for batch inference. See [PromptType][vllm.inputs.PromptType]
332-
for more details about the format of each prompts.
332+
for more details about the format of each prompt.
333333
sampling_params: The sampling parameters for text generation. If
334334
None, we use the default sampling parameters.
335335
When it is a single value, it is applied to every prompt.
@@ -853,7 +853,7 @@ def encode(
853853
Args:
854854
prompts: The prompts to the LLM. You may pass a sequence of prompts
855855
for batch inference. See [PromptType][vllm.inputs.PromptType]
856-
for more details about the format of each prompts.
856+
for more details about the format of each prompt.
857857
pooling_params: The pooling parameters for pooling. If None, we
858858
use the default pooling parameters.
859859
use_tqdm: If `True`, shows a tqdm progress bar.
@@ -946,7 +946,7 @@ def embed(
946946
Args:
947947
prompts: The prompts to the LLM. You may pass a sequence of prompts
948948
for batch inference. See [PromptType][vllm.inputs.PromptType]
949-
for more details about the format of each prompts.
949+
for more details about the format of each prompt.
950950
pooling_params: The pooling parameters for pooling. If None, we
951951
use the default pooling parameters.
952952
use_tqdm: If `True`, shows a tqdm progress bar.
@@ -994,7 +994,7 @@ def classify(
994994
Args:
995995
prompts: The prompts to the LLM. You may pass a sequence of prompts
996996
for batch inference. See [PromptType][vllm.inputs.PromptType]
997-
for more details about the format of each prompts.
997+
for more details about the format of each prompt.
998998
use_tqdm: If `True`, shows a tqdm progress bar.
999999
If a callable (e.g., `functools.partial(tqdm, leave=False)`),
10001000
it is used to create the progress bar.
@@ -1038,7 +1038,7 @@ def reward(
10381038
Args:
10391039
prompts: The prompts to the LLM. You may pass a sequence of prompts
10401040
for batch inference. See [PromptType][vllm.inputs.PromptType]
1041-
for more details about the format of each prompts.
1041+
for more details about the format of each prompt.
10421042
use_tqdm: If `True`, shows a tqdm progress bar.
10431043
If a callable (e.g., `functools.partial(tqdm, leave=False)`),
10441044
it is used to create the progress bar.

vllm/executor/mp_distributed_executor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def _init_executor(self) -> None:
101101
result_handler.start()
102102
self.worker_monitor.start()
103103

104-
# Set up signal handlers to shutdown the executor cleanly
104+
# Set up signal handlers to shut down the executor cleanly
105105
# sometimes gc does not work well
106106

107107
self.driver_worker = WorkerWrapperBase(self.vllm_config, 0)

vllm/lora/layers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -605,7 +605,7 @@ def can_replace_layer(
605605

606606
class MergedColumnParallelLinearWithLoRA(ColumnParallelLinearWithLoRA):
607607
"""ColumnParallelLinear layer that is composed of 2 sublayers (slices)
608-
packed together (eg. gate_proj + up_proj -> gate_up_proj).
608+
packed together (e.g. gate_proj + up_proj -> gate_up_proj).
609609
610610
This means we have 2 LoRAs, each applied to one half of the layer.
611611

vllm/platforms/interface.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -537,7 +537,7 @@ def __getattr__(self, key: str):
537537

538538
def get_global_graph_pool(self) -> Any:
539539
"""
540-
Return the global graph pool for the this platform.
540+
Return the global graph pool for this platform.
541541
"""
542542
cls = self.__class__
543543
if cls._global_graph_pool is None:

vllm/reasoning/hunyuan_a13b_reasoning_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ class HunyuanA13BReasoningParser(ReasoningParser):
3030
Key Features:
3131
- For non-stream output , Recognizes and extracts reasoning ("think")
3232
and answer ("answer") sections from text using regular expressions.
33-
- For stream process, it require a token id sequences to change the
33+
- For stream process, it requires a token id sequences to change the
3434
reasoning state and other state so it maintains internal state to
3535
manage parsing across multiple token.
3636

vllm/v1/worker/gpu_model_runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2734,7 +2734,7 @@ def get_attn_backends_for_layers(
27342734
layer_names)
27352735
attn_backends = {}
27362736
attn_backend_layers = defaultdict(list)
2737-
# Dedupe based on full class name; this is a bit safer than using
2737+
# Dedupe based on full class name; this is a bit safer than
27382738
# using the class itself as the key because when we create dynamic
27392739
# attention backend subclasses (e.g. ChunkedLocalAttention) unless
27402740
# they are cached correctly, there will be different objects per

vllm/v1/worker/gpu_worker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ def determine_available_memory(self) -> int:
224224
memory can be used for KV cache without OOMs.
225225
226226
The engine will first conduct a profiling of the existing memory usage.
227-
Then, it calculate the free memory that can be used for KV cache in
227+
Then, it calculates the free memory that can be used for KV cache in
228228
bytes.
229229
230230
Tip:

0 commit comments

Comments
 (0)