Skip to content

Commit e46c334

Browse files
hmellorskyloevil
authored andcommitted
Move KVTransferConfig from config/__init__.py to config/kv_transfer.py (vllm-project#24434)
Signed-off-by: Harry Mellor <[email protected]>
1 parent 0b123d1 commit e46c334

File tree

8 files changed

+120
-108
lines changed

8 files changed

+120
-108
lines changed

vllm/config/__init__.py

Lines changed: 1 addition & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
import inspect
1010
import json
1111
import textwrap
12-
import uuid
1312
import warnings
1413
from collections.abc import Mapping
1514
from contextlib import contextmanager
@@ -34,6 +33,7 @@
3433
from vllm.config.compilation import (CompilationConfig, CompilationLevel,
3534
CUDAGraphMode, PassConfig)
3635
from vllm.config.kv_events import KVEventsConfig
36+
from vllm.config.kv_transfer import KVTransferConfig
3737
from vllm.config.parallel import (DistributedExecutorBackend, EPLBConfig,
3838
ParallelConfig)
3939
from vllm.config.scheduler import SchedulerConfig, SchedulerPolicy
@@ -3210,107 +3210,6 @@ def _parse_collect_detailed_traces(self):
32103210
self.collect_detailed_traces[0].split(","))
32113211

32123212

3213-
KVProducer = Literal["kv_producer", "kv_both"]
3214-
KVConsumer = Literal["kv_consumer", "kv_both"]
3215-
KVRole = Literal[KVProducer, KVConsumer]
3216-
3217-
3218-
@config
3219-
@dataclass
3220-
class KVTransferConfig:
3221-
"""Configuration for distributed KV cache transfer."""
3222-
3223-
kv_connector: Optional[str] = None
3224-
"""The KV connector for vLLM to transmit KV caches between vLLM instances.
3225-
"""
3226-
3227-
engine_id: Optional[str] = None
3228-
"""The engine id for KV transfers."""
3229-
3230-
kv_buffer_device: Optional[str] = "cuda"
3231-
"""The device used by kv connector to buffer the KV cache.
3232-
Currently only support 'cuda'."""
3233-
3234-
kv_buffer_size: float = 1e9
3235-
"""The buffer size for TorchDistributedConnector. Measured in number of
3236-
bytes. Recommended value: 1e9 (about 1GB)."""
3237-
3238-
kv_role: Optional[KVRole] = None
3239-
"""Whether this vLLM instance produces, consumes KV cache, or both. Choices
3240-
are 'kv_producer', 'kv_consumer', and 'kv_both'."""
3241-
3242-
kv_rank: Optional[int] = None
3243-
"""The rank of this vLLM instance in the KV cache transfer. Typical value:
3244-
0 for prefill instance, 1 for decode instance.
3245-
Currently only 1P1D is supported."""
3246-
3247-
kv_parallel_size: int = 1
3248-
"""The number of parallel instances for KV cache transfer. For
3249-
P2pNcclConnector, this should be 2."""
3250-
3251-
kv_ip: str = "127.0.0.1"
3252-
"""The KV connector ip, used to build distributed connection."""
3253-
3254-
kv_port: int = 14579
3255-
"""The KV connector port, used to build distributed connection."""
3256-
3257-
kv_connector_extra_config: dict[str, Any] = field(default_factory=dict)
3258-
"""any extra config that the connector may need."""
3259-
3260-
kv_connector_module_path: Optional[str] = None
3261-
"""The Python module path to dynamically load the KV connector from.
3262-
Only supported in V1."""
3263-
3264-
def compute_hash(self) -> str:
3265-
"""
3266-
WARNING: Whenever a new field is added to this config,
3267-
ensure that it is included in the factors list if
3268-
it affects the computation graph.
3269-
3270-
Provide a hash that uniquely identifies all the configs
3271-
that affect the structure of the computation
3272-
graph from input ids/embeddings to the final hidden states,
3273-
excluding anything before input ids/embeddings and after
3274-
the final hidden states.
3275-
"""
3276-
# no factors to consider.
3277-
# this config will not affect the computation graph.
3278-
factors: list[Any] = []
3279-
hash_str = hashlib.md5(str(factors).encode(),
3280-
usedforsecurity=False).hexdigest()
3281-
return hash_str
3282-
3283-
def __post_init__(self) -> None:
3284-
if self.engine_id is None:
3285-
self.engine_id = str(uuid.uuid4())
3286-
3287-
if self.kv_role is not None and self.kv_role not in get_args(KVRole):
3288-
raise ValueError(f"Unsupported kv_role: {self.kv_role}. "
3289-
f"Supported roles are {get_args(KVRole)}")
3290-
3291-
if self.kv_connector is not None and self.kv_role is None:
3292-
raise ValueError("Please specify kv_disagg_role when kv_connector "
3293-
f"is set, supported roles are {get_args(KVRole)}")
3294-
3295-
@property
3296-
def is_kv_transfer_instance(self) -> bool:
3297-
return self.kv_connector is not None and \
3298-
self.kv_role in get_args(KVRole)
3299-
3300-
@property
3301-
def is_kv_producer(self) -> bool:
3302-
return self.kv_connector is not None and \
3303-
self.kv_role in get_args(KVProducer)
3304-
3305-
@property
3306-
def is_kv_consumer(self) -> bool:
3307-
return self.kv_connector is not None and \
3308-
self.kv_role in get_args(KVConsumer)
3309-
3310-
def get_from_extra_config(self, key, default) -> Any:
3311-
return self.kv_connector_extra_config.get(key, default)
3312-
3313-
33143213
@config
33153214
@dataclass(config=ConfigDict(arbitrary_types_allowed=True))
33163215
class VllmConfig:

vllm/config/kv_transfer.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
4+
import hashlib
5+
import uuid
6+
from dataclasses import field
7+
from typing import Any, Literal, Optional, get_args
8+
9+
from pydantic.dataclasses import dataclass
10+
11+
from vllm.config.utils import config
12+
13+
KVProducer = Literal["kv_producer", "kv_both"]
14+
KVConsumer = Literal["kv_consumer", "kv_both"]
15+
KVRole = Literal[KVProducer, KVConsumer]
16+
17+
18+
@config
19+
@dataclass
20+
class KVTransferConfig:
21+
"""Configuration for distributed KV cache transfer."""
22+
23+
kv_connector: Optional[str] = None
24+
"""The KV connector for vLLM to transmit KV caches between vLLM instances.
25+
"""
26+
27+
engine_id: Optional[str] = None
28+
"""The engine id for KV transfers."""
29+
30+
kv_buffer_device: Optional[str] = "cuda"
31+
"""The device used by kv connector to buffer the KV cache.
32+
Currently only support 'cuda'."""
33+
34+
kv_buffer_size: float = 1e9
35+
"""The buffer size for TorchDistributedConnector. Measured in number of
36+
bytes. Recommended value: 1e9 (about 1GB)."""
37+
38+
kv_role: Optional[KVRole] = None
39+
"""Whether this vLLM instance produces, consumes KV cache, or both. Choices
40+
are 'kv_producer', 'kv_consumer', and 'kv_both'."""
41+
42+
kv_rank: Optional[int] = None
43+
"""The rank of this vLLM instance in the KV cache transfer. Typical value:
44+
0 for prefill instance, 1 for decode instance.
45+
Currently only 1P1D is supported."""
46+
47+
kv_parallel_size: int = 1
48+
"""The number of parallel instances for KV cache transfer. For
49+
P2pNcclConnector, this should be 2."""
50+
51+
kv_ip: str = "127.0.0.1"
52+
"""The KV connector ip, used to build distributed connection."""
53+
54+
kv_port: int = 14579
55+
"""The KV connector port, used to build distributed connection."""
56+
57+
kv_connector_extra_config: dict[str, Any] = field(default_factory=dict)
58+
"""any extra config that the connector may need."""
59+
60+
kv_connector_module_path: Optional[str] = None
61+
"""The Python module path to dynamically load the KV connector from.
62+
Only supported in V1."""
63+
64+
def compute_hash(self) -> str:
65+
"""
66+
WARNING: Whenever a new field is added to this config,
67+
ensure that it is included in the factors list if
68+
it affects the computation graph.
69+
70+
Provide a hash that uniquely identifies all the configs
71+
that affect the structure of the computation
72+
graph from input ids/embeddings to the final hidden states,
73+
excluding anything before input ids/embeddings and after
74+
the final hidden states.
75+
"""
76+
# no factors to consider.
77+
# this config will not affect the computation graph.
78+
factors: list[Any] = []
79+
hash_str = hashlib.md5(str(factors).encode(),
80+
usedforsecurity=False).hexdigest()
81+
return hash_str
82+
83+
def __post_init__(self) -> None:
84+
if self.engine_id is None:
85+
self.engine_id = str(uuid.uuid4())
86+
87+
if self.kv_role is not None and self.kv_role not in get_args(KVRole):
88+
raise ValueError(f"Unsupported kv_role: {self.kv_role}. "
89+
f"Supported roles are {get_args(KVRole)}")
90+
91+
if self.kv_connector is not None and self.kv_role is None:
92+
raise ValueError("Please specify kv_disagg_role when kv_connector "
93+
f"is set, supported roles are {get_args(KVRole)}")
94+
95+
@property
96+
def is_kv_transfer_instance(self) -> bool:
97+
return self.kv_connector is not None and \
98+
self.kv_role in get_args(KVRole)
99+
100+
@property
101+
def is_kv_producer(self) -> bool:
102+
return self.kv_connector is not None and \
103+
self.kv_role in get_args(KVProducer)
104+
105+
@property
106+
def is_kv_consumer(self) -> bool:
107+
return self.kv_connector is not None and \
108+
self.kv_role in get_args(KVConsumer)
109+
110+
def get_from_extra_config(self, key, default) -> Any:
111+
return self.kv_connector_extra_config.get(key, default)

vllm/distributed/kv_transfer/kv_connector/factory.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
# yapf: enable
1515

1616
if TYPE_CHECKING:
17-
from vllm.config import KVTransferConfig, VllmConfig
17+
from vllm.config import VllmConfig
18+
from vllm.config.kv_transfer import KVTransferConfig
1819

1920
logger = init_logger(__name__)
2021

vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77

88
import torch
99

10-
from vllm.config import KVTransferConfig, VllmConfig
10+
from vllm.config import VllmConfig
11+
from vllm.config.kv_transfer import KVTransferConfig
1112
from vllm.distributed.kv_events import KVCacheEvent
1213
from vllm.distributed.kv_transfer.kv_connector.factory import (
1314
KVConnectorFactory)

vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_engine.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import torch
1616
import zmq
1717

18-
from vllm.config import KVTransferConfig
18+
from vllm.config.kv_transfer import KVTransferConfig
1919
from vllm.distributed.device_communicators.pynccl_wrapper import (
2020
NCCLLibrary, buffer_type, cudaStream_t, ncclComm_t, ncclDataTypeEnum)
2121
from vllm.distributed.kv_transfer.kv_connector.v1.p2p.tensor_memory_pool import ( # noqa: E501

vllm/distributed/kv_transfer/kv_pipe/mooncake_pipe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from safetensors.torch import load as safetensors_load
1414
from safetensors.torch import save as safetensors_save
1515

16-
from vllm.config import KVTransferConfig
16+
from vllm.config.kv_transfer import KVTransferConfig
1717
from vllm.distributed.kv_transfer.kv_pipe.base import KVPipeBase
1818
from vllm.logger import init_logger
1919
from vllm.utils import join_host_port, make_zmq_path, split_host_port

vllm/distributed/kv_transfer/kv_pipe/pynccl_pipe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
import torch
2222

23-
from vllm.config import KVTransferConfig
23+
from vllm.config.kv_transfer import KVTransferConfig
2424
from vllm.distributed.device_communicators.pynccl import PyNcclCommunicator
2525
from vllm.distributed.kv_transfer.kv_pipe.base import KVPipeBase
2626
from vllm.distributed.utils import StatelessProcessGroup

vllm/entrypoints/llm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ def __init__(
204204

205205
if "kv_transfer_config" in kwargs and isinstance(
206206
kwargs["kv_transfer_config"], dict):
207-
from vllm.config import KVTransferConfig
207+
from vllm.config.kv_transfer import KVTransferConfig
208208
raw_config_dict = kwargs["kv_transfer_config"]
209209
try:
210210
kwargs["kv_transfer_config"] = KVTransferConfig(

0 commit comments

Comments
 (0)