Skip to content

Commit f3bf683

Browse files
committed
Rework of underlying messaging again to get better performance
1 parent 967c427 commit f3bf683

27 files changed

+1754
-3096
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ dependencies = [
6060
"pyyaml>=6.0.0",
6161
"rich",
6262
"transformers",
63+
"uvloop>=0.18",
6364
]
6465

6566
[project.optional-dependencies]

src/guidellm/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020
hf_logging.set_verbosity_error()
2121
logging.getLogger("transformers").setLevel(logging.ERROR)
2222

23-
from .config import (
23+
from .logger import configure_logger, logger
24+
from .settings import (
2425
DatasetSettings,
2526
Environment,
2627
LoggingSettings,
@@ -30,7 +31,6 @@
3031
reload_settings,
3132
settings,
3233
)
33-
from .logger import configure_logger, logger
3434

3535
__all__ = [
3636
"DatasetSettings",

src/guidellm/__main__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@
1616
from guidellm.benchmark.scenario import (
1717
GenerativeTextScenario,
1818
)
19-
from guidellm.config import print_config
2019
from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset
2120
from guidellm.scheduler import StrategyType
21+
from guidellm.settings import print_config
2222
from guidellm.utils import DefaultGroupHandler, get_literal_vals
2323
from guidellm.utils import cli as cli_tools
2424

src/guidellm/benchmark/aggregator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,14 @@
4646
GenerativeMetrics,
4747
GenerativeRequestStats,
4848
)
49-
from guidellm.config import settings
5049
from guidellm.scheduler import (
5150
MeasuredRequestTimingsT,
5251
RequestT,
5352
ResponseT,
5453
ScheduledRequestInfo,
5554
SchedulerState,
5655
)
56+
from guidellm.settings import settings
5757
from guidellm.utils import (
5858
InfoMixin,
5959
PydanticClassRegistryMixin,

src/guidellm/benchmark/output.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@
2525
SweepProfile,
2626
ThroughputProfile,
2727
)
28-
from guidellm.config import settings
2928
from guidellm.presentation import UIDataBuilder
3029
from guidellm.presentation.injector import create_report
30+
from guidellm.settings import settings
3131
from guidellm.utils import (
3232
Colors,
3333
DistributionSummary,

src/guidellm/logger.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141

4242
from loguru import logger
4343

44-
from guidellm.config import LoggingSettings, settings
44+
from guidellm.settings import LoggingSettings, settings
4545

4646
__all__ = ["configure_logger", "logger"]
4747

src/guidellm/presentation/injector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from loguru import logger
66

7-
from guidellm.config import settings
7+
from guidellm.settings import settings
88
from guidellm.utils.text import load_text
99

1010

src/guidellm/request/loader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
from transformers import PreTrainedTokenizerBase # type: ignore[import]
1313

1414
from guidellm.backend import GenerationRequest
15-
from guidellm.config import settings
1615
from guidellm.dataset import ColumnInputTypes, load_dataset
16+
from guidellm.settings import settings
1717
from guidellm.utils import StandardBaseModel
1818

1919
__all__ = [

src/guidellm/scheduler/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
RequestT,
2323
ResponseT,
2424
ScheduledRequestInfo,
25+
SchedulerMessagingPydanticRegistry,
2526
SchedulerState,
2627
SchedulerUpdateAction,
2728
SchedulerUpdateActionProgress,
@@ -75,6 +76,7 @@
7576
"ScheduledRequestInfo",
7677
"ScheduledRequestTimings",
7778
"Scheduler",
79+
"SchedulerMessagingPydanticRegistry",
7880
"SchedulerState",
7981
"SchedulerUpdateAction",
8082
"SchedulerUpdateActionProgress",

src/guidellm/scheduler/constraints.py

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@
1616

1717
from pydantic import Field, field_validator
1818

19-
from guidellm.config import settings
2019
from guidellm.scheduler.objects import (
2120
ScheduledRequestInfo,
2221
SchedulerState,
2322
SchedulerUpdateAction,
2423
SchedulerUpdateActionProgress,
2524
)
25+
from guidellm.settings import settings
2626
from guidellm.utils import InfoMixin, RegistryMixin, StandardBaseModel
2727

2828
__all__ = [
@@ -35,6 +35,7 @@
3535
"MaxGlobalErrorRateConstraint",
3636
"MaxNumberConstraint",
3737
"PydanticConstraintInitializer",
38+
"RequestsExhaustedConstraint",
3839
"SerializableConstraintInitializer",
3940
"UnserializableConstraintInitializer",
4041
]
@@ -988,3 +989,47 @@ def _validate_max_error_rate(
988989
)
989990

990991
return value[0] if isinstance(value, list) and len(value) == 1 else value
992+
993+
994+
class RequestsExhaustedConstraint(StandardBaseModel, InfoMixin):
995+
type_: Literal["requests_exhausted"] = "requests_exhausted" # type: ignore[assignment]
996+
num_requests: int
997+
998+
@property
999+
def info(self) -> dict[str, Any]:
1000+
"""
1001+
Extract serializable information from this constraint initializer.
1002+
1003+
:return: Dictionary containing constraint configuration and metadata
1004+
"""
1005+
return self.model_dump()
1006+
1007+
def __call__(
1008+
self,
1009+
state: SchedulerState,
1010+
request_info: ScheduledRequestInfo, # noqa: ARG002
1011+
) -> SchedulerUpdateAction:
1012+
create_exceeded = state.created_requests >= self.num_requests
1013+
processed_exceeded = state.processed_requests >= self.num_requests
1014+
remaining_fraction = min(
1015+
max(0.0, 1.0 - state.processed_requests / float(self.num_requests)), 1.0
1016+
)
1017+
remaining_requests = max(0, self.num_requests - state.processed_requests)
1018+
1019+
return SchedulerUpdateAction(
1020+
request_queuing="stop" if create_exceeded else "continue",
1021+
request_processing="stop_local" if processed_exceeded else "continue",
1022+
metadata={
1023+
"num_requests": self.num_requests,
1024+
"create_exceeded": create_exceeded,
1025+
"processed_exceeded": processed_exceeded,
1026+
"created_requests": state.created_requests,
1027+
"processed_requests": state.processed_requests,
1028+
"remaining_fraction": remaining_fraction,
1029+
"remaining_requests": remaining_requests,
1030+
},
1031+
progress=SchedulerUpdateActionProgress(
1032+
remaining_fraction=remaining_fraction,
1033+
remaining_requests=remaining_requests,
1034+
),
1035+
)

0 commit comments

Comments
 (0)