[Doc]: fix typos in various files (vllm-project#24726)

didier-durand · web-flow · commit bcb06d7baf22 · 2025-09-12T06:43:12.000-07:00
Signed-off-by: Didier Durand &lt;durand.didier@gmail.com&gt;
diff --git a/benchmarks/kernels/benchmark_w8a8_block_fp8.py b/benchmarks/kernels/benchmark_w8a8_block_fp8.py
@@ -56,7 +56,7 @@ def w8a8_block_matmul(
         Bs: The per-block quantization scale for `B`.
         block_size: The block size for per-block quantization.
                     It should be 2-dim, e.g., [128, 128].
-        output_dytpe: The dtype of the returned tensor.
+        output_dtype: The dtype of the returned tensor.
 
     Returns:
         torch.Tensor: The result of matmul.
diff --git a/csrc/cpu/cpu_types_vxe.hpp b/csrc/cpu/cpu_types_vxe.hpp
@@ -12,7 +12,7 @@ namespace vec_op {
 #define vec_sub(a, b) ((a) - (b))
 #define vec_mul(a, b) ((a) * (b))
 #define vec_div(a, b) ((a) / (b))
-#define vec_sr(a, b) ((a) >> (b))  // Vector Shift Right Algebaic
+#define vec_sr(a, b) ((a) >> (b))  // Vector Shift Right Algebraic
 #define vec_sl(a, b) ((a) << (b))  // Vector Shift Left
 
 // FIXME: FP16 is not fully supported in Torch-CPU
diff --git a/csrc/cpu/sgl-kernels/moe.cpp b/csrc/cpu/sgl-kernels/moe.cpp
@@ -215,7 +215,7 @@ int moe_align_block_size(
       offsets[mb + 1] = sorted_id_size(sorted_ids + mb * BLOCK_M);
     }
   });
-  // TODO: do we need to vecterize this ?
+  // TODO: do we need to vectorize this ?
   for (int mb = 0; mb < num_token_blocks; ++mb) {
     offsets[mb + 1] += offsets[mb];
   }
diff --git a/docs/design/multiprocessing.md b/docs/design/multiprocessing.md
@@ -8,7 +8,7 @@ page for information on known issues and how to solve them.
 ## Introduction
 
 !!! important
-    The source code references are to the state of the code at the time of writing in December, 2024.
+    The source code references are to the state of the code at the time of writing in December 2024.
 
 The use of Python multiprocessing in vLLM is complicated by:
 
diff --git a/vllm/attention/backends/flash_attn.py b/vllm/attention/backends/flash_attn.py
@@ -901,7 +901,7 @@ def _get_query_key_seq_metadata(
                 attn_metadata.encoder_seq_start_loc,
                 attn_metadata.max_encoder_seq_len)
     elif attn_type == AttentionType.ENCODER:
-        # For encoder attention both the query and the key are same i.e the
+        # For encoder attention both the query and the key are same i.e. the
         # encoder sequence.
         return (attn_metadata.encoder_seq_start_loc,
                 attn_metadata.max_encoder_seq_len,
diff --git a/vllm/benchmarks/datasets.py b/vllm/benchmarks/datasets.py
@@ -551,7 +551,7 @@ def generate_token_sequence(
         [6880, 6881] -> ['Ġcalls', 'here'] ->
         [1650, 939, 486] -> ['Ġcall', 'sh', 'ere']
         To avoid uncontrolled change of the prompt length,
-        the encoded sequence is truncated before being decode again.
+        the encoded sequence is truncated before being decoded again.
         """
         # Build the inner sequence by sampling sequentially from the vocab
         inner_seq = ((offset + index + np.arange(input_len)) 
diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
@@ -242,7 +242,7 @@ def get_logits_processors(processors: Optional[LogitsProcessors],
     elif processors:
         raise ValueError(
             "The `logits_processors` argument is not supported by this "
-            "server. See --logits-processor-pattern engine argugment "
+            "server. See --logits-processor-pattern engine argument "
             "for more information.")
     return None
 
diff --git a/vllm/model_executor/layers/mamba/mamba_mixer2.py b/vllm/model_executor/layers/mamba/mamba_mixer2.py
@@ -324,7 +324,7 @@ def __init__(self,
         # - the weight already has a "weight_loader" attribute
         #   which set_weight_attrs will raise if we do not
         #   delete before trying to override it
-        # - ditto for the otther two weights below
+        # - ditto for the other two weights below
         delattr(self.conv1d.bias, "weight_loader")
         set_weight_attrs(
             self.conv1d.bias,
diff --git a/vllm/model_executor/models/minicpmv.py b/vllm/model_executor/models/minicpmv.py
@@ -1117,7 +1117,7 @@ def _process_vision_input(
 
     def _process_multimodal_inputs(self, modalities: dict):
         # The result multimodal_embeddings is tuple of tensors, with each
-        # tensor correspoending to a multimodal data item (image or video).
+        # tensor corresponding to a multimodal data item (image or video).
         multimodal_embeddings: tuple[torch.Tensor, ...] = ()
 
         # NOTE: It is important to iterate over the keys in this dictionary
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
@@ -2659,7 +2659,7 @@ def _dummy_run(
         num_tokens += num_pad
 
         # If cudagraph_mode.decode_mode() == FULL and
-        # cudagraph_mode.seperate_routine(). This means that we are using
+        # cudagraph_mode.separate_routine(). This means that we are using
         # different graphs and/or modes for mixed prefill-decode batches vs.
         # uniform decode batches. A uniform decode batch means that all
         # requests have identical query length, except a potential virtual
diff --git a/vllm/v1/worker/tpu_input_batch.py b/vllm/v1/worker/tpu_input_batch.py
@@ -392,7 +392,7 @@ def swap_states(self, i1: int, i2: int) -> None:
         # NOTE: the following is unsafe
         # self.token_ids_cpu[i1, ...], self.token_ids_cpu[i2, ...], =\
         #     self.token_ids_cpu[i2, ...], self.token_ids_cpu[i1, ...]
-        # instead, we need to temporiarily copy the data for one of the indices
+        # instead, we need to temporarily copy the data for one of the indices
         # TODO(lucas): optimize this by only copying valid indices
         tmp = self.token_ids_cpu[i1, ...].copy()
         self.token_ids_cpu[i1, ...] = self.token_ids_cpu[i2, ...]

Original file line number	Diff line number	Diff line change
`@@ -215,7 +215,7 @@ int moe_align_block_size(`
`215`	`215`	`offsets[mb + 1] = sorted_id_size(sorted_ids + mb * BLOCK_M);`
`216`	`216`	`}`
`217`	`217`	`});`
`218`		`- // TODO: do we need to vecterize this ?`
	`218`	`+ // TODO: do we need to vectorize this ?`
`219`	`219`	`for (int mb = 0; mb < num_token_blocks; ++mb) {`
`220`	`220`	`offsets[mb + 1] += offsets[mb];`
`221`	`221`	`}`