|
9 | 9 | import inspect |
10 | 10 | import json |
11 | 11 | import textwrap |
12 | | -import uuid |
13 | 12 | import warnings |
14 | 13 | from collections.abc import Mapping |
15 | 14 | from contextlib import contextmanager |
|
34 | 33 | from vllm.config.compilation import (CompilationConfig, CompilationLevel, |
35 | 34 | CUDAGraphMode, PassConfig) |
36 | 35 | from vllm.config.kv_events import KVEventsConfig |
| 36 | +from vllm.config.kv_transfer import KVTransferConfig |
37 | 37 | from vllm.config.parallel import (DistributedExecutorBackend, EPLBConfig, |
38 | 38 | ParallelConfig) |
39 | 39 | from vllm.config.scheduler import SchedulerConfig, SchedulerPolicy |
@@ -3210,107 +3210,6 @@ def _parse_collect_detailed_traces(self): |
3210 | 3210 | self.collect_detailed_traces[0].split(",")) |
3211 | 3211 |
|
3212 | 3212 |
|
3213 | | -KVProducer = Literal["kv_producer", "kv_both"] |
3214 | | -KVConsumer = Literal["kv_consumer", "kv_both"] |
3215 | | -KVRole = Literal[KVProducer, KVConsumer] |
3216 | | - |
3217 | | - |
3218 | | -@config |
3219 | | -@dataclass |
3220 | | -class KVTransferConfig: |
3221 | | - """Configuration for distributed KV cache transfer.""" |
3222 | | - |
3223 | | - kv_connector: Optional[str] = None |
3224 | | - """The KV connector for vLLM to transmit KV caches between vLLM instances. |
3225 | | - """ |
3226 | | - |
3227 | | - engine_id: Optional[str] = None |
3228 | | - """The engine id for KV transfers.""" |
3229 | | - |
3230 | | - kv_buffer_device: Optional[str] = "cuda" |
3231 | | - """The device used by kv connector to buffer the KV cache. |
3232 | | - Currently only support 'cuda'.""" |
3233 | | - |
3234 | | - kv_buffer_size: float = 1e9 |
3235 | | - """The buffer size for TorchDistributedConnector. Measured in number of |
3236 | | - bytes. Recommended value: 1e9 (about 1GB).""" |
3237 | | - |
3238 | | - kv_role: Optional[KVRole] = None |
3239 | | - """Whether this vLLM instance produces, consumes KV cache, or both. Choices |
3240 | | - are 'kv_producer', 'kv_consumer', and 'kv_both'.""" |
3241 | | - |
3242 | | - kv_rank: Optional[int] = None |
3243 | | - """The rank of this vLLM instance in the KV cache transfer. Typical value: |
3244 | | - 0 for prefill instance, 1 for decode instance. |
3245 | | - Currently only 1P1D is supported.""" |
3246 | | - |
3247 | | - kv_parallel_size: int = 1 |
3248 | | - """The number of parallel instances for KV cache transfer. For |
3249 | | - P2pNcclConnector, this should be 2.""" |
3250 | | - |
3251 | | - kv_ip: str = "127.0.0.1" |
3252 | | - """The KV connector ip, used to build distributed connection.""" |
3253 | | - |
3254 | | - kv_port: int = 14579 |
3255 | | - """The KV connector port, used to build distributed connection.""" |
3256 | | - |
3257 | | - kv_connector_extra_config: dict[str, Any] = field(default_factory=dict) |
3258 | | - """any extra config that the connector may need.""" |
3259 | | - |
3260 | | - kv_connector_module_path: Optional[str] = None |
3261 | | - """The Python module path to dynamically load the KV connector from. |
3262 | | - Only supported in V1.""" |
3263 | | - |
3264 | | - def compute_hash(self) -> str: |
3265 | | - """ |
3266 | | - WARNING: Whenever a new field is added to this config, |
3267 | | - ensure that it is included in the factors list if |
3268 | | - it affects the computation graph. |
3269 | | -
|
3270 | | - Provide a hash that uniquely identifies all the configs |
3271 | | - that affect the structure of the computation |
3272 | | - graph from input ids/embeddings to the final hidden states, |
3273 | | - excluding anything before input ids/embeddings and after |
3274 | | - the final hidden states. |
3275 | | - """ |
3276 | | - # no factors to consider. |
3277 | | - # this config will not affect the computation graph. |
3278 | | - factors: list[Any] = [] |
3279 | | - hash_str = hashlib.md5(str(factors).encode(), |
3280 | | - usedforsecurity=False).hexdigest() |
3281 | | - return hash_str |
3282 | | - |
3283 | | - def __post_init__(self) -> None: |
3284 | | - if self.engine_id is None: |
3285 | | - self.engine_id = str(uuid.uuid4()) |
3286 | | - |
3287 | | - if self.kv_role is not None and self.kv_role not in get_args(KVRole): |
3288 | | - raise ValueError(f"Unsupported kv_role: {self.kv_role}. " |
3289 | | - f"Supported roles are {get_args(KVRole)}") |
3290 | | - |
3291 | | - if self.kv_connector is not None and self.kv_role is None: |
3292 | | - raise ValueError("Please specify kv_disagg_role when kv_connector " |
3293 | | - f"is set, supported roles are {get_args(KVRole)}") |
3294 | | - |
3295 | | - @property |
3296 | | - def is_kv_transfer_instance(self) -> bool: |
3297 | | - return self.kv_connector is not None and \ |
3298 | | - self.kv_role in get_args(KVRole) |
3299 | | - |
3300 | | - @property |
3301 | | - def is_kv_producer(self) -> bool: |
3302 | | - return self.kv_connector is not None and \ |
3303 | | - self.kv_role in get_args(KVProducer) |
3304 | | - |
3305 | | - @property |
3306 | | - def is_kv_consumer(self) -> bool: |
3307 | | - return self.kv_connector is not None and \ |
3308 | | - self.kv_role in get_args(KVConsumer) |
3309 | | - |
3310 | | - def get_from_extra_config(self, key, default) -> Any: |
3311 | | - return self.kv_connector_extra_config.get(key, default) |
3312 | | - |
3313 | | - |
3314 | 3213 | @config |
3315 | 3214 | @dataclass(config=ConfigDict(arbitrary_types_allowed=True)) |
3316 | 3215 | class VllmConfig: |
|
0 commit comments