Skip to content

Commit fdf29ab

Browse files
authored
[TRTLLM-7846][feat] Http disagg-cluster management implemention (#7869)
Signed-off-by: Lizhi Zhou <[email protected]>
1 parent 6884d06 commit fdf29ab

File tree

6 files changed

+1187
-2
lines changed

6 files changed

+1187
-2
lines changed

tensorrt_llm/llmapi/disagg_utils.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import logging
22
from dataclasses import dataclass, field
3-
from enum import Enum
3+
from enum import IntEnum
44
from typing import Any, List, Literal, Optional, Tuple
55

66
import yaml
@@ -16,7 +16,7 @@
1616
]
1717

1818

19-
class ServerRole(Enum):
19+
class ServerRole(IntEnum):
2020
CONTEXT = 0
2121
GENERATION = 1
2222
MM_ENCODER = 2
@@ -43,6 +43,21 @@ class ConditionalDisaggConfig():
4343
max_local_prefill_length: int = 0
4444

4545

46+
@dataclass
47+
class MinimalInstances:
48+
context_servers: int = 1 # the minimal number of context servers
49+
generation_servers: int = 1 # the minimal number of generation servers
50+
51+
52+
@dataclass
53+
class DisaggClusterConfig:
54+
cluster_uri: str # the uri of the cluster storage
55+
cluster_name: str = "" # the name of the cluster, used like a namespace
56+
minimal_instances: Optional[MinimalInstances] = None
57+
heartbeat_interval_sec: int = 5 # the worker will send heartbeat to the cluster storage every heartbeat_interval_sec seconds
58+
inactive_timeout_sec: int = 10 # the worker will be considered inactive if it doesn't send heartbeat for inactive_timeout_sec seconds
59+
60+
4661
@dataclass
4762
class DisaggServerConfig():
4863
server_configs: List[CtxGenServerConfig]

0 commit comments

Comments
 (0)