Skip to content

Commit 8c70303

Browse files
committed
Latest state with the bulk of tests for the scheduler and bug fixes
1 parent 92b41e4 commit 8c70303

23 files changed

+5796
-557
lines changed

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,12 @@ keywords = [
4444
]
4545
dependencies = [
4646
"click>=8.0.0,<8.2.0",
47+
"culsans~=0.9.0",
4748
"datasets",
4849
"ftfy>=6.0.0",
4950
"httpx[http2]<1.0.0",
5051
"loguru",
52+
"msgpack",
5153
"numpy",
5254
"pillow",
5355
"protobuf",

src/guidellm/backend/interface.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
from abc import ABC, abstractmethod
2+
from collections.abc import AsyncIterator
3+
from typing import (
4+
Any,
5+
Generic,
6+
Literal,
7+
Optional,
8+
TypeVar,
9+
)
10+
11+
from pydantic import Field
12+
13+
from guidellm.objects import StandardBaseModel
14+
from guidellm.scheduler import (
15+
RequestT,
16+
RequestTimingsT,
17+
ResponseT,
18+
ScheduledRequestInfo,
19+
)
20+
21+
22+
class BackendInterface(ABC, Generic[RequestT, RequestTimingsT, ResponseT]):
23+
"""
24+
Abstract interface for request processing backends. Note: before process_startup
25+
is invoked, the implementation must ensure all properties are pickleable.
26+
"""
27+
28+
@property
29+
@abstractmethod
30+
def processes_limit(self) -> Optional[int]:
31+
"""Maximum worker processes supported, or None if unlimited."""
32+
33+
@property
34+
@abstractmethod
35+
def requests_limit(self) -> Optional[int]:
36+
"""Maximum concurrent requests supported, or None if unlimited."""
37+
38+
@abstractmethod
39+
def info(self) -> dict[str, Any]:
40+
"""
41+
:return: Backend metadata including model any initializaiton and
42+
configuration information.
43+
"""
44+
...
45+
46+
@abstractmethod
47+
async def process_startup(self) -> None:
48+
"""
49+
Perform backend initialization and startup procedures.
50+
51+
:raises: Implementation-specific exceptions for startup failures.
52+
"""
53+
54+
@abstractmethod
55+
async def validate(self) -> None:
56+
"""
57+
Validate backend configuration and operational status.
58+
59+
:raises: Implementation-specific exceptions for validation failures.
60+
"""
61+
62+
@abstractmethod
63+
async def process_shutdown(self) -> None:
64+
"""
65+
Perform backend cleanup and shutdown procedures.
66+
67+
:raises: Implementation-specific exceptions for shutdown failures.
68+
"""
69+
70+
@abstractmethod
71+
async def resolve(
72+
self,
73+
request: RequestT,
74+
request_info: ScheduledRequestInfo[RequestTimingsT],
75+
history: Optional[list[tuple[RequestT, ResponseT]]] = None,
76+
) -> AsyncIterator[tuple[ResponseT, ScheduledRequestInfo[RequestTimingsT]]]:
77+
"""
78+
Process a request and yield incremental response updates.
79+
80+
:param request: The request object to process.
81+
:param request_info: Scheduling metadata and timing information.
82+
:param history: Optional conversation history for multi-turn requests.
83+
:yield: Tuples of (response, updated_request_info) for each response chunk.
84+
:raises: Implementation-specific exceptions for processing failures.
85+
"""
86+
87+
88+
BackendT = TypeVar("BackendT", bound="BackendInterface")

src/guidellm/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ class Settings(BaseSettings):
133133
max_concurrency: int = 512
134134
max_worker_processes: int = 10
135135
max_add_requests_per_loop: int = 20
136+
scheduler_start_delay_non_distributed: float = 0.1
136137

137138
# Data settings
138139
dataset: DatasetSettings = DatasetSettings()

src/guidellm/scheduler/__init__.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
ScheduledRequestInfo,
2626
SchedulerState,
2727
SchedulerUpdateAction,
28+
SchedulerUpdateActionProgress,
2829
)
2930
from .scheduler import Scheduler
3031
from .strategy import (
@@ -41,9 +42,8 @@
4142
StrategyType,
4243
SynchronousStrategy,
4344
ThroughputStrategy,
44-
strategy_display_str,
4545
)
46-
from .worker import WorkerProcess, worker_sync_iterable_to_async
46+
from .worker import WorkerProcess
4747
from .worker_group import WorkerProcessGroup
4848

4949
__all__ = [
@@ -81,13 +81,12 @@
8181
"Scheduler",
8282
"SchedulerState",
8383
"SchedulerUpdateAction",
84+
"SchedulerUpdateActionProgress",
8485
"SchedulingStrategy",
8586
"StrategyT",
8687
"StrategyType",
8788
"SynchronousStrategy",
8889
"ThroughputStrategy",
8990
"WorkerProcess",
9091
"WorkerProcessGroup",
91-
"strategy_display_str",
92-
"worker_sync_iterable_to_async",
9392
]

0 commit comments

Comments
 (0)