Update deprecated type hinting in vllm/transformers_utils (#18058)

hmellor · web-flow · commit 8c946cecca72 · 2025-05-13T04:34:37.000-07:00
Signed-off-by: Harry Mellor &lt;19981378+hmellor@users.noreply.github.com&gt;
diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
@@ -6,7 +6,7 @@
 import time
 from functools import cache
 from pathlib import Path
-from typing import Any, Callable, Dict, Literal, Optional, Type, Union
+from typing import Any, Callable, Literal, Optional, Union
 
 import huggingface_hub
 from huggingface_hub import hf_hub_download
@@ -55,11 +55,11 @@
 
 logger = init_logger(__name__)
 
-_CONFIG_REGISTRY_OVERRIDE_HF: Dict[str, Type[PretrainedConfig]] = {
+_CONFIG_REGISTRY_OVERRIDE_HF: dict[str, type[PretrainedConfig]] = {
     "mllama": MllamaConfig
 }
 
-_CONFIG_REGISTRY: Dict[str, Type[PretrainedConfig]] = {
+_CONFIG_REGISTRY: dict[str, type[PretrainedConfig]] = {
     "chatglm": ChatGLMConfig,
     "cohere2": Cohere2Config,
     "dbrx": DbrxConfig,
@@ -199,7 +199,7 @@ def patch_rope_scaling(config: PretrainedConfig) -> None:
         patch_rope_scaling_dict(rope_scaling)
 
 
-def patch_rope_scaling_dict(rope_scaling: Dict[str, Any]) -> None:
+def patch_rope_scaling_dict(rope_scaling: dict[str, Any]) -> None:
     if "rope_type" in rope_scaling and "type" in rope_scaling:
         rope_type = rope_scaling["rope_type"]
         rope_type_legacy = rope_scaling["type"]
@@ -748,7 +748,7 @@ def get_hf_image_processor_config(
     hf_token: Optional[Union[bool, str]] = None,
     revision: Optional[str] = None,
     **kwargs,
-) -> Dict[str, Any]:
+) -> dict[str, Any]:
     # ModelScope does not provide an interface for image_processor
     if VLLM_USE_MODELSCOPE:
         return dict()
diff --git a/vllm/transformers_utils/configs/arctic.py b/vllm/transformers_utils/configs/arctic.py
@@ -8,7 +8,7 @@
 """ Arctic model configuration"""
 
 from dataclasses import asdict, dataclass
-from typing import Any, Dict
+from typing import Any
 
 from transformers.configuration_utils import PretrainedConfig
 from transformers.utils import logging
@@ -192,14 +192,14 @@ def __init__(
         )
 
     @classmethod
-    def from_dict(cls, config_dict: Dict[str, Any], **kwargs) -> "ArcticConfig":
+    def from_dict(cls, config_dict: dict[str, Any], **kwargs) -> "ArcticConfig":
         result = super().from_dict(config_dict, **kwargs)
         config = result[0] if isinstance(result, tuple) else result
         if isinstance(config.quantization, dict):
             config.quantization = ArcticQuantizationConfig(**config.quantization)
         return result
 
-    def to_dict(self) -> Dict[str, Any]:
+    def to_dict(self) -> dict[str, Any]:
         ret = super().to_dict()
         if isinstance(ret["quantization"], ArcticQuantizationConfig):
             ret["quantization"] = asdict(ret["quantization"])
diff --git a/vllm/transformers_utils/configs/cohere2.py b/vllm/transformers_utils/configs/cohere2.py
@@ -61,7 +61,7 @@ class Cohere2Config(PretrainedConfig):
             Whether to tie weight embeddings
         rope_theta (`float`, *optional*, defaults to 10000.0):
             The base period of the RoPE embeddings.
-        rope_scaling (`Dict`, *optional*):
+        rope_scaling (`dict`, *optional*):
             Dictionary containing the scaling configuration for the RoPE embeddings. NOTE: if you apply new rope type
             and you expect the model to work on longer `max_position_embeddings`, we recommend you to update this value
             accordingly.
@@ -86,11 +86,11 @@ class Cohere2Config(PretrainedConfig):
                 `beta_slow` (`float`, *optional*):
                     Only used with 'yarn'. Parameter to set the boundary for interpolation (only) in the linear
                     ramp function. If unspecified, it defaults to 1.
-                `short_factor` (`List[float]`, *optional*):
+                `short_factor` (`list[float]`, *optional*):
                     Only used with 'longrope'. The scaling factor to be applied to short contexts (<
                     `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden
                     size divided by the number of attention heads divided by 2
-                `long_factor` (`List[float]`, *optional*):
+                `long_factor` (`list[float]`, *optional*):
                     Only used with 'longrope'. The scaling factor to be applied to long contexts (<
                     `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden
                     size divided by the number of attention heads divided by 2
diff --git a/vllm/transformers_utils/configs/deepseek_vl2.py b/vllm/transformers_utils/configs/deepseek_vl2.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # adapted from https://github.com/deepseek-ai/DeepSeek-VL2/blob/faf18023f24b962b32d9f0a2d89e402a8d383a78/deepseek_vl2/models/modeling_deepseek_vl_v2.py#L115-L268
-from typing import Tuple
 
 from transformers.configuration_utils import PretrainedConfig
 
@@ -191,12 +190,12 @@ class DeepseekVLV2Config(PretrainedConfig):
 
     tile_tag: str = "2D"
     global_view_pos: str = "head"
-    candidate_resolutions: Tuple[Tuple[int, int]] = ((384, 384), )
+    candidate_resolutions: tuple[tuple[int, int]] = ((384, 384), )
 
     def __init__(self,
                  tile_tag: str = "tile_tag",
                  global_view_pos: str = "head",
-                 candidate_resolutions: Tuple[Tuple[int,
+                 candidate_resolutions: tuple[tuple[int,
                                                     int]] = ((384, 384), ),
                  **kwargs):
         super().__init__(**kwargs)
diff --git a/vllm/transformers_utils/configs/exaone.py b/vllm/transformers_utils/configs/exaone.py
@@ -17,14 +17,12 @@
 # limitations under the License.
 """Exaone model configuration"""
 
-from typing import Dict
-
 from transformers.configuration_utils import PretrainedConfig
 from transformers.utils import logging
 
 logger = logging.get_logger(__name__)
 
-EXAONE_PRETRAINED_CONFIG_ARCHIVE_MAP: Dict[str, str] = {}
+EXAONE_PRETRAINED_CONFIG_ARCHIVE_MAP: dict[str, str] = {}
 
 
 class ExaoneConfig(PretrainedConfig):
diff --git a/vllm/transformers_utils/configs/jais.py b/vllm/transformers_utils/configs/jais.py
@@ -98,7 +98,7 @@ class JAISConfig(PretrainedConfig):
             Scale attention weights by dividing by hidden_size instead of
             sqrt(hidden_size). Need to set scale_attn_weights to `True` as
             well.
-        alibi_scaling (`Dict`, *optional*):
+        alibi_scaling (`dict`, *optional*):
             Dictionary containing the scaling configuration for ALiBi
             embeddings. Currently only supports linear
             scaling strategy. Can specify either the scaling `factor` (must be
@@ -108,7 +108,7 @@ class JAISConfig(PretrainedConfig):
             formats are `{"type": strategy name, "factor": scaling factor}` or
             `{"type": strategy name,
             "train_seq_len": training sequence length}`.
-        architectures (`List`, *optional*, defaults to ['JAISLMHeadModel']):
+        architectures (`list`, *optional*, defaults to ['JAISLMHeadModel']):
             architecture names for Jais.
 
     Example:
diff --git a/vllm/transformers_utils/configs/mlp_speculator.py b/vllm/transformers_utils/configs/mlp_speculator.py
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 
-from typing import List, Optional
+from typing import Optional
 
 from transformers import PretrainedConfig
 
@@ -17,7 +17,7 @@ def __init__(self,
                  emb_dim: int = 4096,
                  inner_dim: int = 0,
                  n_predict: int = 3,
-                 top_k_tokens_per_head: Optional[List[int]] = None,
+                 top_k_tokens_per_head: Optional[list[int]] = None,
                  n_candidates: int = 5,
                  tie_weights: bool = False,
                  scale_input: bool = False,
@@ -34,7 +34,7 @@ def __init__(self,
                 the inner dimension of the model. If 0, will be the emb_dim.
             n_predict: int
                 the number of lookaheads for the speculator
-            top_k_tokens_per_head: List[int]
+            top_k_tokens_per_head: list[int]
                 Number of tokens to consider from each head when forming the
                 candidate tree.
                 For each candidate branch in the tree, head n produces topk[n]
diff --git a/vllm/transformers_utils/configs/mpt.py b/vllm/transformers_utils/configs/mpt.py
@@ -4,11 +4,11 @@
 # https://huggingface.co/mosaicml/mpt-7b/blob/main/configuration_mpt.py
 """A HuggingFace-style model configuration."""
 import warnings
-from typing import Any, Dict, Optional, Union
+from typing import Any, Optional, Union
 
 from transformers import PretrainedConfig
 
-attn_config_defaults: Dict = {
+attn_config_defaults: dict = {
     'attn_type': 'multihead_attention',
     'attn_pdrop': 0.0,
     'attn_impl': 'triton',
@@ -20,8 +20,8 @@
     'alibi': False,
     'alibi_bias_max': 8
 }
-ffn_config_defaults: Dict = {'ffn_type': 'mptmlp'}
-init_config_defaults: Dict = {
+ffn_config_defaults: dict = {'ffn_type': 'mptmlp'}
+init_config_defaults: dict = {
     'name': 'kaiming_normal_',
     'fan_mode': 'fan_in',
     'init_nonlinearity': 'relu',
@@ -52,15 +52,15 @@ def __init__(self,
                  resid_pdrop: float = 0.0,
                  emb_pdrop: float = 0.0,
                  learned_pos_emb: bool = True,
-                 attn_config: Dict = attn_config_defaults,
-                 ffn_config: Dict = ffn_config_defaults,
+                 attn_config: dict = attn_config_defaults,
+                 ffn_config: dict = ffn_config_defaults,
                  init_device: str = 'cpu',
                  logit_scale: Optional[Union[float, str]] = None,
                  no_bias: bool = False,
                  embedding_fraction: float = 1.0,
                  norm_type: str = 'low_precision_layernorm',
                  use_cache: bool = False,
-                 init_config: Dict = init_config_defaults,
+                 init_config: dict = init_config_defaults,
                  fc_type: str = 'torch',
                  verbose: Optional[int] = None,
                  **kwargs: Any):
@@ -102,8 +102,8 @@ def __init__(self,
         self._validate_config()
 
     def _set_config_defaults(
-            self, config: Dict[str, Any],
-            config_defaults: Dict[str, Any]) -> Dict[str, Any]:
+            self, config: dict[str, Any],
+            config_defaults: dict[str, Any]) -> dict[str, Any]:
         for (k, v) in config_defaults.items():
             if k not in config:
                 config[k] = v
diff --git a/vllm/transformers_utils/configs/solar.py b/vllm/transformers_utils/configs/solar.py
@@ -108,7 +108,7 @@ class SolarConfig(PretrainedConfig):
             Whether to tie weight embeddings
         rope_theta (`float`, *optional*, defaults to 10000.0):
             The base period of the RoPE embeddings.
-        rope_scaling (`Dict`, *optional*):
+        rope_scaling (`dict`, *optional*):
             Dictionary containing the scaling configuration for
             the RoPE embeddings.
             Currently supports two scaling
diff --git a/vllm/transformers_utils/configs/ultravox.py b/vllm/transformers_utils/configs/ultravox.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # Adapted from https://github.com/fixie-ai/ultravox/blob/ecd58c4041030bae2ad15aa6bcf04ab43199ea02/ultravox/model/ultravox_config.py
-from typing import Any, Dict, Optional
+from typing import Any, Optional
 
 import transformers
 
@@ -48,8 +48,8 @@ class UltravoxConfig(transformers.PretrainedConfig):
 
     def __init__(
         self,
-        audio_config: Optional[Dict[str, Any]] = None,
-        text_config: Optional[Dict[str, Any]] = None,
+        audio_config: Optional[dict[str, Any]] = None,
+        text_config: Optional[dict[str, Any]] = None,
         audio_model_id: Optional[str] = None,
         text_model_id: Optional[str] = None,
         ignore_index: int = -100,
@@ -58,8 +58,8 @@ def __init__(
         stack_factor: int = 8,
         norm_init: float = 0.4,
         projector_act: str = "swiglu",
-        text_model_lora_config: Optional[Dict[str, Any]] = None,
-        audio_model_lora_config: Optional[Dict[str, Any]] = None,
+        text_model_lora_config: Optional[dict[str, Any]] = None,
+        audio_model_lora_config: Optional[dict[str, Any]] = None,
         projector_ln_mid: bool = False,
         **kwargs,
     ):
diff --git a/vllm/transformers_utils/detokenizer.py b/vllm/transformers_utils/detokenizer.py
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 
-from typing import Dict, List, Optional
+from typing import Optional
 
 from vllm.sequence import (VLLM_INVALID_TOKEN_ID, Logprob, SamplingParams,
                            Sequence, SequenceGroup)
@@ -22,7 +22,7 @@ def get_tokenizer_for_seq(self, sequence: Sequence) -> AnyTokenizer:
         return self.tokenizer_group.get_lora_tokenizer(sequence.lora_request)
 
     def decode_prompt_logprobs_inplace(self, seq_group: SequenceGroup,
-                                       prompt_logprobs: List[Optional[Dict[
+                                       prompt_logprobs: list[Optional[dict[
                                            int, Logprob]]],
                                        position_offset: int) -> None:
         """Decodes the logprobs for the prompt of a sequence group.
@@ -49,7 +49,7 @@ def decode_prompt_logprobs_inplace(self, seq_group: SequenceGroup,
         read_offset = 0
         next_iter_prefix_offset = 0
         next_iter_read_offset = 0
-        next_iter_tokens: List[str] = []
+        next_iter_tokens: list[str] = []
         prev_tokens = None
 
         for token_position_in_logprob, prompt_logprobs_for_token in enumerate(
diff --git a/vllm/transformers_utils/detokenizer_utils.py b/vllm/transformers_utils/detokenizer_utils.py
@@ -1,19 +1,19 @@
 # SPDX-License-Identifier: Apache-2.0
 
-from typing import List, Optional, Tuple
+from typing import Optional
 
 from .tokenizer import AnyTokenizer
 
 
-def _replace_none_with_empty(tokens: List[Optional[str]]):
+def _replace_none_with_empty(tokens: list[Optional[str]]):
     for i, token in enumerate(tokens):
         if token is None:
             tokens[i] = ""
 
 
 def _convert_tokens_to_string_with_added_encoders(
     tokenizer: AnyTokenizer,
-    output_tokens: List[str],
+    output_tokens: list[str],
     skip_special_tokens: bool,
     spaces_between_special_tokens: bool,
 ) -> str:
@@ -22,8 +22,8 @@ def _convert_tokens_to_string_with_added_encoders(
     # NOTE(woosuk): The following code is slow because it runs a for loop over
     # the output_tokens. In Python, running a for loop over a list can be slow
     # even when the loop body is very simple.
-    sub_texts: List[str] = []
-    current_sub_text: List[str] = []
+    sub_texts: list[str] = []
+    current_sub_text: list[str] = []
     all_special_tokens = set(tokenizer.all_special_tokens)
     for token in output_tokens:
         if skip_special_tokens and token in all_special_tokens:
@@ -52,9 +52,9 @@ def _convert_tokens_to_string_with_added_encoders(
 
 def convert_prompt_ids_to_tokens(
     tokenizer: AnyTokenizer,
-    prompt_ids: List[int],
+    prompt_ids: list[int],
     skip_special_tokens: bool = False,
-) -> Tuple[List[str], int, int]:
+) -> tuple[list[str], int, int]:
     """Converts the prompt ids to tokens and returns the tokens and offsets
     for incremental detokenization.
 
@@ -76,8 +76,8 @@ def convert_prompt_ids_to_tokens(
 
 def convert_ids_list_to_tokens(
     tokenizer: AnyTokenizer,
-    token_ids: List[int],
-) -> List[str]:
+    token_ids: list[int],
+) -> list[str]:
     """Detokenize the input ids individually.
 
     Args:
@@ -98,13 +98,13 @@ def convert_ids_list_to_tokens(
 # under Apache 2.0 license
 def detokenize_incrementally(
     tokenizer: AnyTokenizer,
-    all_input_ids: List[int],
-    prev_tokens: Optional[List[str]],
+    all_input_ids: list[int],
+    prev_tokens: Optional[list[str]],
     prefix_offset: int,
     read_offset: int,
     skip_special_tokens: bool = False,
     spaces_between_special_tokens: bool = True,
-) -> Tuple[List[str], str, int, int]:
+) -> tuple[list[str], str, int, int]:
     """Detokenizes the input ids incrementally and returns the new tokens
     and the new text.
 
diff --git a/vllm/transformers_utils/processors/deepseek_vl2.py b/vllm/transformers_utils/processors/deepseek_vl2.py
diff --git a/vllm/transformers_utils/processors/ovis.py b/vllm/transformers_utils/processors/ovis.py
diff --git a/vllm/transformers_utils/tokenizer_group.py b/vllm/transformers_utils/tokenizer_group.py
diff --git a/vllm/transformers_utils/tokenizers/mistral.py b/vllm/transformers_utils/tokenizers/mistral.py
diff --git a/vllm/transformers_utils/utils.py b/vllm/transformers_utils/utils.py