|
20 | 20 | RPCLoadAdapterRequest,
|
21 | 21 | RPCProcessRequest,
|
22 | 22 | RPCResetPrefixCacheRequest,
|
23 |
| - RPCStartupRequest, RPCStartupResponse, |
24 |
| - RPCUProfileRequest) |
| 23 | + RPCSleepRequest, RPCStartupRequest, |
| 24 | + RPCStartupResponse, |
| 25 | + RPCUProfileRequest, RPCWakeUpRequest) |
25 | 26 | # yapf: enable
|
26 | 27 | from vllm.logger import init_logger
|
27 | 28 | from vllm.outputs import RequestOutput
|
@@ -242,6 +243,10 @@ def handle_new_input(self):
|
242 | 243 | self._handle_load_adapter_request(request)
|
243 | 244 | elif isinstance(request, RPCResetPrefixCacheRequest):
|
244 | 245 | self.reset_prefix_cache()
|
| 246 | + elif isinstance(request, RPCSleepRequest): |
| 247 | + self.sleep(request.value) |
| 248 | + elif isinstance(request, RPCWakeUpRequest): |
| 249 | + self.wake_up() |
245 | 250 | else:
|
246 | 251 | raise ValueError("Unknown RPCRequest Type: "
|
247 | 252 | f"{type(request)}")
|
@@ -369,6 +374,12 @@ def stop_profile(self) -> None:
|
369 | 374 | def reset_prefix_cache(self) -> bool:
|
370 | 375 | return self.engine.reset_prefix_cache()
|
371 | 376 |
|
| 377 | + def sleep(self, level: int = 1) -> None: |
| 378 | + self.engine.sleep(level) |
| 379 | + |
| 380 | + def wake_up(self) -> None: |
| 381 | + self.engine.wake_up() |
| 382 | + |
372 | 383 |
|
373 | 384 | def signal_handler(*_) -> None:
|
374 | 385 | raise KeyboardInterrupt("MQLLMEngine terminated")
|
|
0 commit comments