[Doc]: fix typos in Python comments (#24001)

didier-durand · web-flow · commit 9701352e4ba7 · 2025-08-31T08:21:59.000Z
Signed-off-by: Didier Durand &lt;durand.didier@gmail.com&gt;
diff --git a/vllm/compilation/monitor.py b/vllm/compilation/monitor.py
@@ -43,7 +43,7 @@ def end_monitoring_torch_compile(vllm_config: VllmConfig):
 
 
 def validate_cudagraph_capturing_enabled():
-    # used to monitor whether an cudagraph capturing is legal at runtime.
+    # used to monitor whether a cudagraph capturing is legal at runtime.
     # should be called before any cudagraph capturing.
     # if an illegal cudagraph capturing happens, raise an error.
     global cudagraph_capturing_enabled
diff --git a/vllm/core/evictor.py b/vllm/core/evictor.py
@@ -76,7 +76,7 @@ class LRUEvictor(Evictor):
     that's recorded in the Block. If there are multiple blocks with
     the same last_accessed time, then the one with the largest num_hashed_tokens
     will be evicted. If two blocks each have the lowest last_accessed time and
-    highest num_hashed_tokens value, then one will be chose arbitrarily
+    highest num_hashed_tokens value, then one will be chosen arbitrarily
     """
 
     # CLEANUP_THRESHOLD determines the maximum allowable size of the priority
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
@@ -1239,7 +1239,7 @@ def step(self) -> List[RequestOutput]:
 
             # Stop the execute model loop in parallel workers until there are
             # more requests to process. This avoids waiting indefinitely in
-            # torch.distributed ops which may otherwise timeout, and unblocks
+            # torch.distributed ops which may otherwise time out, and unblocks
             # the RPC thread in the workers so that they can process any other
             # queued control plane messages, such as add/remove lora adapters.
             logger.debug("Stopping remote worker execution loop.")
diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
@@ -329,7 +329,7 @@ def generate(
         Args:
             prompts: The prompts to the LLM. You may pass a sequence of prompts
                 for batch inference. See [PromptType][vllm.inputs.PromptType]
-                for more details about the format of each prompts.
+                for more details about the format of each prompt.
             sampling_params: The sampling parameters for text generation. If
                 None, we use the default sampling parameters.
                 When it is a single value, it is applied to every prompt.
@@ -853,7 +853,7 @@ def encode(
         Args:
             prompts: The prompts to the LLM. You may pass a sequence of prompts
                 for batch inference. See [PromptType][vllm.inputs.PromptType]
-                for more details about the format of each prompts.
+                for more details about the format of each prompt.
             pooling_params: The pooling parameters for pooling. If None, we
                 use the default pooling parameters.
             use_tqdm: If `True`, shows a tqdm progress bar.
@@ -946,7 +946,7 @@ def embed(
         Args:
             prompts: The prompts to the LLM. You may pass a sequence of prompts
                 for batch inference. See [PromptType][vllm.inputs.PromptType]
-                for more details about the format of each prompts.
+                for more details about the format of each prompt.
             pooling_params: The pooling parameters for pooling. If None, we
                 use the default pooling parameters.
             use_tqdm: If `True`, shows a tqdm progress bar.
@@ -994,7 +994,7 @@ def classify(
         Args:
             prompts: The prompts to the LLM. You may pass a sequence of prompts
                 for batch inference. See [PromptType][vllm.inputs.PromptType]
-                for more details about the format of each prompts.
+                for more details about the format of each prompt.
             use_tqdm: If `True`, shows a tqdm progress bar.
                 If a callable (e.g., `functools.partial(tqdm, leave=False)`),
                 it is used to create the progress bar.
@@ -1038,7 +1038,7 @@ def reward(
         Args:
             prompts: The prompts to the LLM. You may pass a sequence of prompts
                 for batch inference. See [PromptType][vllm.inputs.PromptType]
-                for more details about the format of each prompts.
+                for more details about the format of each prompt.
             use_tqdm: If `True`, shows a tqdm progress bar.
                 If a callable (e.g., `functools.partial(tqdm, leave=False)`),
                 it is used to create the progress bar.
diff --git a/vllm/executor/mp_distributed_executor.py b/vllm/executor/mp_distributed_executor.py
@@ -101,7 +101,7 @@ def _init_executor(self) -> None:
             result_handler.start()
             self.worker_monitor.start()
 
-        # Set up signal handlers to shutdown the executor cleanly
+        # Set up signal handlers to shut down the executor cleanly
         # sometimes gc does not work well
 
         self.driver_worker = WorkerWrapperBase(self.vllm_config, 0)
diff --git a/vllm/lora/layers.py b/vllm/lora/layers.py
@@ -605,7 +605,7 @@ def can_replace_layer(
 
 class MergedColumnParallelLinearWithLoRA(ColumnParallelLinearWithLoRA):
     """ColumnParallelLinear layer that is composed of 2 sublayers (slices)
-    packed together (eg. gate_proj + up_proj -> gate_up_proj).
+    packed together (e.g. gate_proj + up_proj -> gate_up_proj).
 
     This means we have 2 LoRAs, each applied to one half of the layer.
 
diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py
@@ -537,7 +537,7 @@ def __getattr__(self, key: str):
 
     def get_global_graph_pool(self) -> Any:
         """
-        Return the global graph pool for the this platform.
+        Return the global graph pool for this platform.
         """
         cls = self.__class__
         if cls._global_graph_pool is None:
diff --git a/vllm/reasoning/hunyuan_a13b_reasoning_parser.py b/vllm/reasoning/hunyuan_a13b_reasoning_parser.py
@@ -30,7 +30,7 @@ class HunyuanA13BReasoningParser(ReasoningParser):
     Key Features:
         - For non-stream output , Recognizes and extracts reasoning ("think")
          and answer ("answer") sections from text using regular expressions.
-        - For stream process, it require a token id sequences to change the 
+        - For stream process, it requires a token id sequences to change the
           reasoning state and other state so it maintains internal state to 
           manage parsing across multiple token.
 
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
@@ -2734,7 +2734,7 @@ def get_attn_backends_for_layers(
                                                  layer_names)
             attn_backends = {}
             attn_backend_layers = defaultdict(list)
-            # Dedupe based on full class name; this is a bit safer than using
+            # Dedupe based on full class name; this is a bit safer than
             # using the class itself as the key because when we create dynamic
             # attention backend subclasses (e.g. ChunkedLocalAttention) unless
             # they are cached correctly, there will be different objects per
diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py
@@ -224,7 +224,7 @@ def determine_available_memory(self) -> int:
         memory can be used for KV cache without OOMs.
 
         The engine will first conduct a profiling of the existing memory usage.
-        Then, it calculate the free memory that can be used for KV cache in
+        Then, it calculates the free memory that can be used for KV cache in
         bytes.
 
         Tip: