Skip to content

Commit d5943f0

Browse files
committed
fix Typing error: replace some list[int] to List[int]
Signed-off-by: fhl <[email protected]>
1 parent 6302a7d commit d5943f0

15 files changed

+54
-51
lines changed

vllm/model_executor/layers/fused_moe/batched_triton_or_deep_gemm_moe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# SPDX-License-Identifier: Apache-2.0
2-
from typing import Optional
2+
from typing import Optional, List
33

44
import torch
55

@@ -21,7 +21,7 @@ def __init__(self,
2121
use_int8_w8a16: bool = False,
2222
use_int4_w4a16: bool = False,
2323
per_channel_quant: bool = False,
24-
block_shape: Optional[list[int]] = None,
24+
block_shape: Optional[List[int]] = None,
2525
allow_deep_gemm: bool = False):
2626
super().__init__()
2727
assert not use_int8_w8a8, "NYI"

vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# SPDX-License-Identifier: Apache-2.0
2-
from typing import Optional
2+
from typing import Optional, List
33

44
import deep_ep
55
import torch
@@ -22,7 +22,7 @@ def __init__(self,
2222
dp_size: int,
2323
rank_expert_offset: int,
2424
quant_dtype: Optional[torch.dtype] = None,
25-
block_shape: Optional[list[int]] = None):
25+
block_shape: Optional[List[int]] = None):
2626
super().__init__()
2727
self.buffer = buffer
2828
self.world_size = world_size

vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# SPDX-License-Identifier: Apache-2.0
2-
from typing import Optional, Union
2+
from typing import Optional, Union, List
33

44
import deep_ep
55
import torch
@@ -43,7 +43,7 @@ def __init__(self,
4343
dp_size: int,
4444
max_tokens_per_rank: int,
4545
quant_dtype: Optional[torch.dtype] = None,
46-
block_shape: Optional[list[int]] = None,
46+
block_shape: Optional[List[int]] = None,
4747
use_fp8_dispatch: bool = False):
4848
super().__init__()
4949

vllm/model_executor/layers/fused_moe/fused_batched_moe.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33
"""Fused batched MoE kernel."""
4-
from typing import Optional
4+
from typing import Optional, List
55

66
import torch
77
import triton
@@ -325,7 +325,7 @@ def invoke_moe_batched_triton_kernel(
325325
use_int8_w8a16: bool,
326326
use_int4_w4a16: bool,
327327
config: dict[str, int],
328-
block_shape: Optional[list[int]] = None):
328+
block_shape: Optional[List[int]] = None):
329329

330330
assert not use_int4_w4a16
331331
max_num_tokens = A.size(1)
@@ -496,7 +496,7 @@ def __init__(
496496
use_int8_w8a8: bool = False,
497497
use_int8_w8a16: bool = False,
498498
use_int4_w4a16: bool = False,
499-
block_shape: Optional[list[int]] = None,
499+
block_shape: Optional[List[int]] = None,
500500
block_m: Optional[int] = None,
501501
):
502502
super().__init__()
@@ -596,7 +596,7 @@ def __init__(
596596
use_int8_w8a16: bool = False,
597597
use_int4_w4a16: bool = False,
598598
per_channel_quant: bool = False,
599-
block_shape: Optional[list[int]] = None,
599+
block_shape: Optional[List[int]] = None,
600600
world_size: int = 1,
601601
dp_size: int = 1,
602602
):

vllm/model_executor/layers/fused_moe/fused_moe.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import functools
55
import json
66
import os
7-
from typing import Any, Callable, Optional
7+
from typing import Any, Callable, Optional, List
88

99
import torch
1010

@@ -481,7 +481,7 @@ def invoke_fused_moe_kernel(A: torch.Tensor,
481481
use_int8_w8a16: bool,
482482
use_int4_w4a16: bool,
483483
per_channel_quant: bool,
484-
block_shape: Optional[list[int]] = None) -> None:
484+
block_shape: Optional[List[int]] = None) -> None:
485485
assert topk_weights is not None or not mul_routed_weight
486486
assert topk_weights is None or topk_weights.stride(1) == 1
487487
assert sorted_token_ids.stride(0) == 1
@@ -638,7 +638,7 @@ def invoke_fused_moe_kernel(A: torch.Tensor,
638638
def get_config_file_name(E: int,
639639
N: int,
640640
dtype: Optional[str],
641-
block_shape: Optional[list[int]] = None) -> str:
641+
block_shape: Optional[List[int]] = None) -> str:
642642
device_name = current_platform.get_device_name().replace(" ", "_")
643643
dtype_selector = "" if not dtype else f",dtype={dtype}"
644644
block_shape_selector = ("" if not block_shape or not all(block_shape) else
@@ -758,7 +758,7 @@ def get_default_config(
758758
topk: int,
759759
dtype: Optional[str],
760760
is_marlin: bool,
761-
block_shape: Optional[list[int]] = None,
761+
block_shape: Optional[List[int]] = None,
762762
) -> dict[str, int]:
763763
if dtype == "fp8_w8a8" and block_shape is not None:
764764
# Block-wise quant: BLOCK_SIZE_N must be divisible by block_shape[0]
@@ -817,7 +817,7 @@ def try_get_optimal_moe_config(
817817
dtype: Optional[str],
818818
M: int,
819819
is_marlin: bool = False,
820-
block_shape: Optional[list[int]] = None,
820+
block_shape: Optional[List[int]] = None,
821821
):
822822
from vllm.model_executor.layers.fused_moe import get_config
823823
override_config = get_config()
@@ -1014,7 +1014,7 @@ def inplace_fused_experts(hidden_states: torch.Tensor,
10141014
w2_zp: Optional[torch.Tensor] = None,
10151015
a1_scale: Optional[torch.Tensor] = None,
10161016
a2_scale: Optional[torch.Tensor] = None,
1017-
block_shape: Optional[list[int]] = None) -> None:
1017+
block_shape: Optional[List[int]] = None) -> None:
10181018
fused_experts_impl(hidden_states, w1, w2, topk_weights, topk_ids, True,
10191019
activation, apply_router_weight_on_input, use_fp8_w8a8,
10201020
use_int8_w8a8, use_int8_w8a16, use_int4_w4a16,
@@ -1044,7 +1044,7 @@ def inplace_fused_experts_fake(
10441044
w2_zp: Optional[torch.Tensor] = None,
10451045
a1_scale: Optional[torch.Tensor] = None,
10461046
a2_scale: Optional[torch.Tensor] = None,
1047-
block_shape: Optional[list[int]] = None) -> None:
1047+
block_shape: Optional[List[int]] = None) -> None:
10481048
pass
10491049

10501050

@@ -1078,7 +1078,7 @@ def outplace_fused_experts(
10781078
w2_zp: Optional[torch.Tensor] = None,
10791079
a1_scale: Optional[torch.Tensor] = None,
10801080
a2_scale: Optional[torch.Tensor] = None,
1081-
block_shape: Optional[list[int]] = None) -> torch.Tensor:
1081+
block_shape: Optional[List[int]] = None) -> torch.Tensor:
10821082
return fused_experts_impl(hidden_states, w1, w2, topk_weights, topk_ids,
10831083
False, activation, apply_router_weight_on_input,
10841084
use_fp8_w8a8, use_int8_w8a8, use_int8_w8a16,
@@ -1108,7 +1108,7 @@ def outplace_fused_experts_fake(
11081108
w2_zp: Optional[torch.Tensor] = None,
11091109
a1_scale: Optional[torch.Tensor] = None,
11101110
a2_scale: Optional[torch.Tensor] = None,
1111-
block_shape: Optional[list[int]] = None) -> torch.Tensor:
1111+
block_shape: Optional[List[int]] = None) -> torch.Tensor:
11121112
return torch.empty_like(hidden_states)
11131113

11141114

@@ -1158,7 +1158,7 @@ def fused_experts(hidden_states: torch.Tensor,
11581158
w2_zp: Optional[torch.Tensor] = None,
11591159
a1_scale: Optional[torch.Tensor] = None,
11601160
a2_scale: Optional[torch.Tensor] = None,
1161-
block_shape: Optional[list[int]] = None,
1161+
block_shape: Optional[List[int]] = None,
11621162
allow_deep_gemm: bool = False) -> torch.Tensor:
11631163
# For now, disable DeepGemm for small N (<= 512) until better
11641164
# permute/unpermute ops are available.
@@ -1229,7 +1229,7 @@ def fused_experts_impl(
12291229
w2_zp: Optional[torch.Tensor] = None,
12301230
a1_scale: Optional[torch.Tensor] = None,
12311231
a2_scale: Optional[torch.Tensor] = None,
1232-
block_shape: Optional[list[int]] = None,
1232+
block_shape: Optional[List[int]] = None,
12331233
) -> torch.Tensor:
12341234
# Check constraints.
12351235
if use_int4_w4a16:
@@ -1430,7 +1430,7 @@ def fused_moe(
14301430
w2_zp: Optional[torch.Tensor] = None,
14311431
a1_scale: Optional[torch.Tensor] = None,
14321432
a2_scale: Optional[torch.Tensor] = None,
1433-
block_shape: Optional[list[int]] = None,
1433+
block_shape: Optional[List[int]] = None,
14341434
) -> torch.Tensor:
14351435
"""
14361436
This function computes a Mixture of Experts (MoE) layer using two sets of
@@ -1526,7 +1526,7 @@ def __init__(
15261526
use_int8_w8a16: bool,
15271527
use_int4_w4a16: bool,
15281528
per_channel_quant: bool,
1529-
block_shape: Optional[list[int]] = None,
1529+
block_shape: Optional[List[int]] = None,
15301530
block_m: Optional[int] = None,
15311531
):
15321532
super().__init__()
@@ -1700,7 +1700,7 @@ def modular_triton_fused_moe(
17001700
use_int8_w8a16: bool,
17011701
use_int4_w4a16: bool,
17021702
per_channel_quant: bool,
1703-
block_shape: Optional[list[int]] = None,
1703+
block_shape: Optional[List[int]] = None,
17041704
) -> mk.FusedMoEModularKernel:
17051705
qtype = get_config_qtype(
17061706
use_fp8_w8a8=use_fp8_w8a8,

vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3-
from typing import Optional
3+
from typing import Optional, List
44

55
import pplx_kernels as pplx
66
import torch
@@ -21,7 +21,7 @@ def __init__(self,
2121
rank: int,
2222
dp_size: int,
2323
quant_dtype: Optional[torch.dtype] = None,
24-
block_shape: Optional[list[int]] = None,
24+
block_shape: Optional[List[int]] = None,
2525
per_act_token: bool = False):
2626
super().__init__()
2727
assert max_num_tokens > 0

vllm/model_executor/layers/fused_moe/prepare_finalize.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3-
from typing import Optional
3+
from typing import Optional, List
44

55
import torch
66

@@ -17,7 +17,7 @@ def __init__(
1717
self,
1818
quant_dtype: Optional[torch.dtype] = None,
1919
per_channel_quant: bool = False,
20-
block_shape: Optional[list[int]] = None,
20+
block_shape: Optional[List[int]] = None,
2121
):
2222
super().__init__()
2323
self.per_channel_quant = per_channel_quant

vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33
from enum import IntEnum
44
from functools import cache
5-
from typing import Optional
5+
from typing import Optional, List
66

77
import torch
88

@@ -315,7 +315,7 @@ def rocm_aiter_fused_experts(
315315
w2_scale: Optional[torch.Tensor] = None,
316316
a1_scale: Optional[torch.Tensor] = None,
317317
a2_scale: Optional[torch.Tensor] = None,
318-
block_shape: Optional[list[int]] = None) -> torch.Tensor:
318+
block_shape: Optional[List[int]] = None) -> torch.Tensor:
319319

320320
activation_method = (ActivationMethod.SILU
321321
if activation == "silu" else ActivationMethod.GELU)

vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3-
from typing import Optional
3+
from typing import Optional, List
44

55
import torch
66

@@ -18,7 +18,7 @@ def __init__(self,
1818
use_int8_w8a16: bool = False,
1919
use_int4_w4a16: bool = False,
2020
per_channel_quant: bool = False,
21-
block_shape: Optional[list[int]] = None,
21+
block_shape: Optional[List[int]] = None,
2222
block_m: Optional[int] = None,
2323
allow_deep_gemm: bool = False):
2424
super().__init__()

vllm/model_executor/layers/fused_moe/utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33
from math import prod
4-
from typing import Optional
4+
from typing import Optional, List
55

66
import torch
77

@@ -27,7 +27,7 @@ def _fp8_quantize(
2727
A: torch.Tensor,
2828
A_scale: Optional[torch.Tensor],
2929
per_act_token: bool,
30-
block_shape: Optional[list[int]] = None,
30+
block_shape: Optional[List[int]] = None,
3131
) -> tuple[torch.Tensor, torch.Tensor]:
3232
"""
3333
Perform fp8 quantization on the inputs. If a block_shape
@@ -49,7 +49,7 @@ def _int8_quantize(
4949
A: torch.Tensor,
5050
A_scale: Optional[torch.Tensor],
5151
per_act_token: bool,
52-
block_shape: Optional[list[int]] = None,
52+
block_shape: Optional[List[int]] = None,
5353
) -> tuple[torch.Tensor, torch.Tensor]:
5454
"""
5555
Perform int8 quantization on the inputs. If a block_shape
@@ -77,7 +77,7 @@ def moe_kernel_quantize_input(
7777
A_scale: Optional[torch.Tensor],
7878
qtype: Optional[torch.dtype],
7979
per_channel_quant: bool,
80-
block_shape: Optional[list[int]] = None,
80+
block_shape: Optional[List[int]] = None,
8181
) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
8282
if qtype == torch.float8_e4m3fn:
8383
return _fp8_quantize(A, A_scale, per_channel_quant, block_shape)

0 commit comments

Comments
 (0)