Skip to content

Commit d535489

Browse files
authored
feat: Dynamically remove servers in PD (NVIDIA#5270)
Signed-off-by: Shunkang <[email protected]>
1 parent 5cffb7e commit d535489

File tree

4 files changed

+124
-140
lines changed

4 files changed

+124
-140
lines changed

tensorrt_llm/llmapi/disagg_utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ class MetadataServerConfig():
5858
hostname: str = "localhost"
5959
port: int = 2379
6060
health_check_timeout: float = 5.0
61+
refresh_interval: float = 10.0
6162

6263

6364
def parse_disagg_config_file(yaml_config_file: str):

tensorrt_llm/serve/openai_disagg_server.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,20 +35,20 @@
3535
class OpenAIDisaggServer:
3636

3737
def __init__(self,
38-
ctx_servers: List[str] = None,
39-
gen_servers: List[str] = None,
38+
ctx_servers: List[str],
39+
gen_servers: List[str],
4040
req_timeout_secs: int = 180,
4141
server_start_timeout_secs: int = 180,
4242
ctx_router_config: Optional[RouterConfig] = None,
4343
gen_router_config: Optional[RouterConfig] = None,
4444
conditional_disagg_config: Optional[ConditionalDisaggConfig] = None,
45-
metadata_server_cfg: MetadataServerConfig = None):
45+
metadata_server_cfg: Optional[MetadataServerConfig] = None):
4646

4747
self.ctx_servers = ctx_servers
4848
self.gen_servers = gen_servers
4949
self.metadata_server = create_metadata_server(metadata_server_cfg)
50-
self.ctx_router = create_router(ctx_router_config, ctx_servers, self.metadata_server)
51-
self.gen_router = create_router(gen_router_config, gen_servers, self.metadata_server)
50+
self.ctx_router = create_router(ctx_router_config, ctx_servers, metadata_server_cfg, self.metadata_server)
51+
self.gen_router = create_router(gen_router_config, gen_servers, metadata_server_cfg, self.metadata_server)
5252
self.conditional_disagg_config = conditional_disagg_config
5353

5454

@@ -76,8 +76,8 @@ async def lifespan(app: FastAPI):
7676

7777
if self.metadata_server:
7878
logger.info("Starting server monitoring via metadata service")
79-
await self.ctx_router.start_server_monitoring()
80-
await self.gen_router.start_server_monitoring()
79+
await self.ctx_router.start_server_monitoring(metadata_server_cfg.refresh_interval)
80+
await self.gen_router.start_server_monitoring(metadata_server_cfg.refresh_interval)
8181

8282
yield
8383

0 commit comments

Comments
 (0)