Skip to content

Commit 41f17bf

Browse files
authored
[Docs] Fix warnings in mkdocs build (continued) (vllm-project#24740)
Signed-off-by: Zerohertz <[email protected]>
1 parent bcb06d7 commit 41f17bf

File tree

10 files changed

+121
-176
lines changed

10 files changed

+121
-176
lines changed

vllm/model_executor/layers/quantization/torchao.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -144,9 +144,9 @@ def torchao_quantize_param_data(param: torch.Tensor,
144144
"""Quantize a Tensor with torchao quantization specified by torchao_config
145145
146146
Args:
147-
`param`: weight parameter of the linear module
148-
`torchao_config`: type of quantization and their arguments we want to
149-
use to quantize the Tensor
147+
param: weight parameter of the linear module
148+
torchao_config: type of quantization and their arguments we want to
149+
use to quantize the Tensor
150150
"""
151151
from torchao.core.config import AOBaseConfig
152152
from torchao.quantization import quantize_
@@ -172,8 +172,8 @@ class TorchAOLinearMethod(LinearMethodBase):
172172
"""Linear method for torchao.
173173
174174
Args:
175-
torchao_config: The torchao quantization config, a string
176-
that encodes the type of quantization and all relevant arguments.
175+
quant_config: The torchao quantization config, a string that encodes
176+
the type of quantization and all relevant arguments.
177177
"""
178178

179179
def __init__(self, quant_config: TorchAOConfig):

vllm/model_executor/layers/quantization/utils/int8_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ def w8a8_block_int8_matmul(
423423
Bs: The per-block quantization scale for `B`.
424424
block_size: The block size for per-block quantization. It should be
425425
2-dim, e.g., [128, 128].
426-
output_dytpe: The dtype of the returned tensor.
426+
output_dtype: The dtype of the returned tensor.
427427
428428
Returns:
429429
torch.Tensor: The result of matmul.

vllm/model_executor/layers/rotary_embedding/mrope.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,8 @@ def triton_mrope(
135135
"""Qwen2VL mrope kernel.
136136
137137
Args:
138-
query: [num_tokens, num_heads * head_size]
139-
key: [num_tokens, num_kv_heads * head_size]
138+
q: [num_tokens, num_heads * head_size]
139+
k: [num_tokens, num_kv_heads * head_size]
140140
cos: [3, num_tokens, head_size //2 ]
141141
(T/H/W positions with multimodal inputs)
142142
sin: [3, num_tokens, head_size //2 ]

vllm/model_executor/model_loader/tensorizer.py

Lines changed: 45 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -171,51 +171,52 @@ class TensorizerConfig(MutableMapping):
171171
_is_sharded: bool = field(init=False, default=False)
172172
_fields: ClassVar[tuple[str, ...]]
173173
_keys: ClassVar[frozenset[str]]
174-
"""
175-
Args for the TensorizerConfig class. These are used to configure the
176-
behavior of model serialization and deserialization using Tensorizer.
174+
"""Configuration class for Tensorizer settings.
177175
178-
Args:
179-
tensorizer_uri: Path to serialized model tensors. Can be a local file
180-
path or a S3 URI. This is a required field unless lora_dir is
181-
provided and the config is meant to be used for the
182-
`tensorize_lora_adapter` function. Unless a `tensorizer_dir` or
183-
`lora_dir` is passed to this object's initializer, this is a required
184-
argument.
185-
tensorizer_dir: Path to a directory containing serialized model tensors,
186-
and all other potential model artifacts to load the model, such as
187-
configs and tokenizer files. Can be passed instead of `tensorizer_uri`
188-
where the `model.tensors` file will be assumed to be in this
189-
directory.
190-
vllm_tensorized: If True, indicates that the serialized model is a
191-
vLLM model. This is used to determine the behavior of the
192-
TensorDeserializer when loading tensors from a serialized model.
193-
It is far faster to deserialize a vLLM model as it utilizes
194-
tensorizer's optimized GPU loading. Note that this is now
195-
deprecated, as serialized vLLM models are now automatically
196-
inferred as vLLM models.
197-
verify_hash: If True, the hashes of each tensor will be verified against
198-
the hashes stored in the metadata. A `HashMismatchError` will be
199-
raised if any of the hashes do not match.
200-
num_readers: Controls how many threads are allowed to read concurrently
201-
from the source file. Default is `None`, which will dynamically set
202-
the number of readers based on the number of available
203-
resources and model size. This greatly increases performance.
204-
encryption_keyfile: File path to a binary file containing a
205-
binary key to use for decryption. `None` (the default) means
206-
no decryption. See the example script in
207-
examples/others/tensorize_vllm_model.py.
208-
s3_access_key_id: The access key for the S3 bucket. Can also be set via
209-
the S3_ACCESS_KEY_ID environment variable.
210-
s3_secret_access_key: The secret access key for the S3 bucket. Can also
211-
be set via the S3_SECRET_ACCESS_KEY environment variable.
212-
s3_endpoint: The endpoint for the S3 bucket. Can also be set via the
213-
S3_ENDPOINT_URL environment variable.
214-
lora_dir: Path to a directory containing LoRA adapter artifacts for
215-
serialization or deserialization. When serializing LoRA adapters
216-
this is the only necessary parameter to pass to this object's
217-
initializer.
218-
"""
176+
These settings configure the behavior of model serialization and
177+
deserialization using Tensorizer.
178+
179+
Attributes:
180+
tensorizer_uri: Path to serialized model tensors. Can be a local file
181+
path or a S3 URI. This is a required field unless lora_dir is
182+
provided and the config is meant to be used for the
183+
`tensorize_lora_adapter` function. Unless a `tensorizer_dir` or
184+
`lora_dir` is passed to this object's initializer, this is
185+
a required argument.
186+
tensorizer_dir: Path to a directory containing serialized model tensors,
187+
and all other potential model artifacts to load the model, such as
188+
configs and tokenizer files. Can be passed instead of
189+
`tensorizer_uri` where the `model.tensors` file will be assumed
190+
to be in this directory.
191+
vllm_tensorized: If True, indicates that the serialized model is a
192+
vLLM model. This is used to determine the behavior of the
193+
TensorDeserializer when loading tensors from a serialized model.
194+
It is far faster to deserialize a vLLM model as it utilizes
195+
tensorizer's optimized GPU loading. Note that this is now
196+
deprecated, as serialized vLLM models are now automatically
197+
inferred as vLLM models.
198+
verify_hash: If True, the hashes of each tensor will be verified
199+
against the hashes stored in the metadata. A `HashMismatchError`
200+
will be raised if any of the hashes do not match.
201+
num_readers: Controls how many threads are allowed to read concurrently
202+
from the source file. Default is `None`, which will dynamically set
203+
the number of readers based on the number of available
204+
resources and model size. This greatly increases performance.
205+
encryption_keyfile: File path to a binary file containing a
206+
binary key to use for decryption. `None` (the default) means
207+
no decryption. See the example script in
208+
examples/others/tensorize_vllm_model.py.
209+
s3_access_key_id: The access key for the S3 bucket. Can also be set via
210+
the S3_ACCESS_KEY_ID environment variable.
211+
s3_secret_access_key: The secret access key for the S3 bucket. Can also
212+
be set via the S3_SECRET_ACCESS_KEY environment variable.
213+
s3_endpoint: The endpoint for the S3 bucket. Can also be set via the
214+
S3_ENDPOINT_URL environment variable.
215+
lora_dir: Path to a directory containing LoRA adapter artifacts for
216+
serialization or deserialization. When serializing LoRA adapters
217+
this is the only necessary parameter to pass to this object's
218+
initializer.
219+
"""
219220

220221
def __post_init__(self):
221222
# check if the configuration is for a sharded vLLM model

vllm/model_executor/models/aria.py

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -143,16 +143,8 @@ class AriaProjector(nn.Module):
143143
projects ViT's outputs into MoE's inputs.
144144
145145
Args:
146-
patch_to_query_dict (dict): Maps patch numbers to their corresponding
147-
query numbers,
148-
e.g., {1225: 128, 4900: 256}. This allows for different query sizes
149-
based on image resolution.
150-
embed_dim (int): Embedding dimension.
151-
num_heads (int): Number of attention heads.
152-
kv_dim (int): Dimension of key and value.
153-
ff_dim (int): Hidden dimension of the feed-forward network.
154-
output_dim (int): Output dimension.
155-
norm_layer (nn.Module): Normalization layer. Default is nn.LayerNorm.
146+
config: [AriaConfig](https://huggingface.co/docs/transformers/main/model_doc/aria#transformers.AriaConfig)
147+
containing projector configuration parameters.
156148
157149
Outputs:
158150
A tensor with the shape of (batch_size, query_number, output_dim)
@@ -282,8 +274,8 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
282274
Forward pass of the MoE Layer.
283275
284276
Args:
285-
hidden_states (torch.Tensor): Input tensor of shape (batch_size,
286-
sequence_length, hidden_size).
277+
hidden_states: Input tensor of shape
278+
(batch_size, sequence_length, hidden_size).
287279
288280
Returns:
289281
torch.Tensor: Output tensor after passing through the MoE layer.

vllm/model_executor/models/bart.py

Lines changed: 40 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -401,8 +401,7 @@ def __init__(
401401
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
402402
r"""
403403
Args:
404-
hidden_states
405-
torch.Tensor of *encoder* input embeddings.
404+
hidden_states: torch.Tensor of *encoder* input embeddings.
406405
Returns:
407406
Encoder layer output torch.Tensor
408407
"""
@@ -490,10 +489,8 @@ def forward(
490489
) -> torch.Tensor:
491490
r"""
492491
Args:
493-
decoder_hidden_states
494-
torch.Tensor of *decoder* input embeddings.
495-
encoder_hidden_states
496-
torch.Tensor of *encoder* input embeddings.
492+
decoder_hidden_states: torch.Tensor of *decoder* input embeddings.
493+
encoder_hidden_states: torch.Tensor of *encoder* input embeddings.
497494
Returns:
498495
Decoder layer output torch.Tensor
499496
"""
@@ -584,12 +581,10 @@ def forward(
584581
) -> torch.Tensor:
585582
r"""
586583
Args:
587-
input_ids
588-
Indices of *encoder* input sequence tokens in the vocabulary.
589-
Padding will be ignored by default should you
590-
provide it.
591-
positions
592-
Positions of *encoder* input sequence tokens.
584+
input_ids: Indices of *encoder* input sequence tokens in the
585+
vocabulary.
586+
Padding will be ignored by default should you provide it.
587+
positions: Positions of *encoder* input sequence tokens.
593588
Returns:
594589
Decoder output torch.Tensor
595590
"""
@@ -663,14 +658,11 @@ def forward(
663658
) -> torch.Tensor:
664659
r"""
665660
Args:
666-
decoder_input_ids
667-
Indices of *decoder* input sequence tokens in the vocabulary.
668-
Padding will be ignored by default should you
669-
provide it.
670-
decoder_positions
671-
Positions of *decoder* input sequence tokens.
672-
encoder_hidden_states:
673-
Tensor of encoder output embeddings
661+
decoder_input_ids: Indices of *decoder* input sequence tokens
662+
in the vocabulary.
663+
Padding will be ignored by default should you provide it.
664+
decoder_positions: Positions of *decoder* input sequence tokens.
665+
encoder_hidden_states: Tensor of encoder output embeddings.
674666
Returns:
675667
Decoder output torch.Tensor
676668
"""
@@ -732,16 +724,13 @@ def forward(self, input_ids: torch.Tensor, positions: torch.Tensor,
732724
encoder_positions: torch.Tensor) -> torch.Tensor:
733725
r"""
734726
Args:
735-
input_ids
736-
Indices of *decoder* input sequence tokens in the vocabulary.
737-
Padding will be ignored by default should you
738-
provide it.
739-
positions
740-
Positions of *decoder* input sequence tokens.
741-
encoder_input_ids
742-
Indices of *encoder* input sequence tokens in the vocabulary.
743-
encoder_positions:
744-
Positions of *encoder* input sequence tokens.
727+
input_ids: Indices of *decoder* input sequence tokens
728+
in the vocabulary.
729+
Padding will be ignored by default should you provide it.
730+
positions: Positions of *decoder* input sequence tokens.
731+
encoder_input_ids: Indices of *encoder* input sequence tokens
732+
in the vocabulary.
733+
encoder_positions: Positions of *encoder* input sequence tokens.
745734
Returns:
746735
Model output torch.Tensor
747736
"""
@@ -848,14 +837,10 @@ def forward(
848837
) -> torch.Tensor:
849838
r"""
850839
Args:
851-
input_ids
852-
torch.Tensor of *decoder* input token ids.
853-
positions
854-
torch.Tensor of *decoder* position indices.
855-
encoder_input_ids
856-
torch.Tensor of *encoder* input token ids.
857-
encoder_positions
858-
torch.Tensor of *encoder* position indices
840+
input_ids: torch.Tensor of *decoder* input token ids.
841+
positions: torch.Tensor of *decoder* position indices.
842+
encoder_input_ids: torch.Tensor of *encoder* input token ids.
843+
encoder_positions: torch.Tensor of *encoder* position indices.
859844
Returns:
860845
Output torch.Tensor
861846
"""
@@ -912,8 +897,7 @@ class MBartEncoderLayer(BartEncoderLayer):
912897
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
913898
r"""
914899
Args:
915-
hidden_states
916-
torch.Tensor of *encoder* input embeddings.
900+
hidden_states: torch.Tensor of *encoder* input embeddings.
917901
Returns:
918902
Encoder layer output torch.Tensor
919903
"""
@@ -1035,12 +1019,10 @@ def forward(
10351019
) -> torch.Tensor:
10361020
r"""
10371021
Args:
1038-
input_ids
1039-
Indices of *encoder* input sequence tokens in the vocabulary.
1040-
Padding will be ignored by default should you
1041-
provide it.
1042-
positions
1043-
Positions of *encoder* input sequence tokens.
1022+
input_ids: Indices of *encoder* input sequence tokens in the
1023+
vocabulary.
1024+
Padding will be ignored by default should you provide it.
1025+
positions: Positions of *encoder* input sequence tokens.
10441026
Returns:
10451027
Decoder output torch.Tensor
10461028
"""
@@ -1116,14 +1098,11 @@ def forward(
11161098
) -> torch.Tensor:
11171099
r"""
11181100
Args:
1119-
decoder_input_ids
1120-
Indices of *decoder* input sequence tokens in the vocabulary.
1121-
Padding will be ignored by default should you
1122-
provide it.
1123-
decoder_positions
1124-
Positions of *decoder* input sequence tokens.
1125-
encoder_hidden_states:
1126-
Tensor of encoder output embeddings
1101+
decoder_input_ids: Indices of *decoder* input sequence tokens
1102+
in the vocabulary.
1103+
Padding will be ignored by default should you provide it.
1104+
decoder_positions: Positions of *decoder* input sequence tokens.
1105+
encoder_hidden_states: Tensor of encoder output embeddings.
11271106
Returns:
11281107
Decoder output torch.Tensor
11291108
"""
@@ -1185,16 +1164,13 @@ def forward(self, input_ids: torch.Tensor, positions: torch.Tensor,
11851164
encoder_positions: torch.Tensor) -> torch.Tensor:
11861165
r"""
11871166
Args:
1188-
input_ids
1189-
Indices of *decoder* input sequence tokens in the vocabulary.
1190-
Padding will be ignored by default should you
1191-
provide it.
1192-
positions
1193-
Positions of *decoder* input sequence tokens.
1194-
encoder_input_ids
1195-
Indices of *encoder* input sequence tokens in the vocabulary.
1196-
encoder_positions:
1197-
Positions of *encoder* input sequence tokens.
1167+
input_ids: Indices of *decoder* input sequence tokens
1168+
in the vocabulary.
1169+
Padding will be ignored by default should you provide it.
1170+
positions: Positions of *decoder* input sequence tokens.
1171+
encoder_input_ids: Indices of *encoder* input sequence tokens
1172+
in the vocabulary.
1173+
encoder_positions: Positions of *encoder* input sequence tokens.
11981174
Returns:
11991175
Model output torch.Tensor
12001176
"""

vllm/model_executor/models/blip2.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -678,7 +678,6 @@ def forward(
678678
Args:
679679
input_ids: Flattened (concatenated) input_ids corresponding to a
680680
batch.
681-
pixel_values: The pixels in each input image.
682681
683682
Info:
684683
[Blip2ImageInputs][]

vllm/model_executor/models/donut.py

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,8 @@ def forward(
7979
) -> torch.Tensor:
8080
r"""
8181
Args:
82-
input_ids
83-
torch.Tensor of *decoder* input token ids.
84-
positions
85-
torch.Tensor of *decoder* position indices.
82+
input_ids: torch.Tensor of *decoder* input token ids.
83+
positions: torch.Tensor of *decoder* position indices.
8684
Returns:
8785
Output torch.Tensor
8886
"""
@@ -351,14 +349,10 @@ def forward(
351349
) -> torch.Tensor:
352350
r"""
353351
Args:
354-
input_ids
355-
torch.Tensor of *decoder* input token ids.
356-
positions
357-
torch.Tensor of *decoder* position indices.
358-
encoder_input_ids
359-
torch.Tensor of *encoder* input token ids.
360-
encoder_positions
361-
torch.Tensor of *encoder* position indices
352+
input_ids: torch.Tensor of *decoder* input token ids.
353+
positions: torch.Tensor of *decoder* position indices.
354+
encoder_input_ids: torch.Tensor of *encoder* input token ids.
355+
encoder_positions: torch.Tensor of *encoder* position indices
362356
Returns:
363357
Output torch.Tensor
364358
"""

0 commit comments

Comments
 (0)