Skip to content

Commit 5694032

Browse files
authored
[Build][Router] Update vllm to v0.13.0 (#770)
* build: Update vllm to v0.13.0 Signed-off-by: Shern Shiou Tan <shernshiou@gmail.com> * feat: Add health_check_interval and lmcache_worker_timeout Signed-off-by: Shern Shiou Tan <shernshiou@gmail.com> --------- Signed-off-by: Shern Shiou Tan <shernshiou@gmail.com>
1 parent 63c056e commit 5694032

File tree

5 files changed

+31
-9
lines changed

5 files changed

+31
-9
lines changed

pyproject.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ classifiers = [
1414
dependencies = [
1515
"aiofiles==24.1.0",
1616
"black>=25.1.0",
17-
"fastapi==0.115.8",
17+
"fastapi==0.128.0",
1818
"aiohttp[speedups]==3.13.0",
1919
"kubernetes==32.0.0",
2020
"numpy==1.26.4",
@@ -45,13 +45,13 @@ semantic_cache = [
4545
"huggingface-hub==0.34.0",
4646
]
4747
lmcache = [
48-
"lmcache==0.3.9post2",
49-
"vllm==0.11.0",
48+
"lmcache==0.3.11",
49+
"vllm==0.13.0",
5050
]
5151
test = [
5252
"pytest>=8.3.4",
5353
"pytest-asyncio>=0.25.3",
54-
"vllm==0.11.0"
54+
"vllm==0.13.0"
5555
]
5656

5757
[build-system]

src/vllm_router/app.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,8 @@ def initialize_all(app: FastAPI, args):
253253
prefill_model_labels=args.prefill_model_labels,
254254
decode_model_labels=args.decode_model_labels,
255255
kv_aware_threshold=args.kv_aware_threshold,
256+
lmcache_health_check_interval=args.lmcache_health_check_interval,
257+
lmcache_worker_timeout=args.lmcache_worker_timeout,
256258
)
257259

258260
# Initialize feature gates

src/vllm_router/parsers/parser.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,20 @@ def parse_args():
399399
help="The threshold for kv-aware routing.",
400400
)
401401

402+
parser.add_argument(
403+
"--lmcache-health-check-interval",
404+
type=float,
405+
default=5.0,
406+
help="Health check interval for LMCache worker (seconds)",
407+
)
408+
409+
parser.add_argument(
410+
"--lmcache-worker-timeout",
411+
type=float,
412+
default=30.0,
413+
help="Timeout for LMCache worker (seconds)",
414+
)
415+
402416
args = parser.parse_args()
403417
args = load_initial_config_from_config_file_if_required(parser, args)
404418

src/vllm_router/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
aiofiles==24.1.0
22
aiohttp[speedups]==3.13.0
3-
fastapi==0.115.8
3+
fastapi==0.128.0
44
kubernetes==32.0.0
55
numpy==1.26.4
66

src/vllm_router/routers/routing_logic.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,8 @@ def __init__(
244244
lmcache_controller_port: int,
245245
session_key: str,
246246
kv_aware_threshold: int = 2000,
247+
health_check_interval: float = 5.0,
248+
lmcache_worker_timeout: float = 30.0,
247249
):
248250
self.lmcache_controller_port = lmcache_controller_port
249251
logger.info(
@@ -253,7 +255,9 @@ def __init__(
253255
{
254256
"pull": f"0.0.0.0:{self.lmcache_controller_port}",
255257
"reply": None,
256-
}
258+
},
259+
health_check_interval,
260+
lmcache_worker_timeout,
257261
)
258262
self.req_id = 0
259263
self.instance_id_to_ip = {}
@@ -528,9 +532,11 @@ def initialize_routing_logic(
528532
elif routing_logic == RoutingLogic.KVAWARE:
529533
logger.info("Initializing kvaware routing logic")
530534
router = KvawareRouter(
531-
kwargs.get("lmcache_controller_port"),
532-
kwargs.get("session_key"),
533-
kwargs.get("kv_aware_threshold"),
535+
lmcache_controller_port=kwargs.get("lmcache_controller_port"),
536+
session_key=kwargs.get("session_key"),
537+
kv_aware_threshold=kwargs.get("kv_aware_threshold"),
538+
health_check_interval=kwargs.get("lmcache_health_check_interval"),
539+
lmcache_worker_timeout=kwargs.get("lmcache_worker_timeout"),
534540
)
535541
router.start_kv_manager()
536542
return router

0 commit comments

Comments
 (0)