vllm-project
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions b/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/guidellm/__init__.py‎
Lines changed: 2 additions & 2 deletions b/‎src/guidellm/__init__.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/guidellm/__main__.py‎
Lines changed: 1 addition & 1 deletion b/‎src/guidellm/__main__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/guidellm/benchmark/aggregator.py‎
Lines changed: 1 addition & 1 deletion b/‎src/guidellm/benchmark/aggregator.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/guidellm/benchmark/output.py‎
Lines changed: 1 addition & 1 deletion b/‎src/guidellm/benchmark/output.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/guidellm/logger.py‎
Lines changed: 1 addition & 1 deletion b/‎src/guidellm/logger.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/guidellm/presentation/injector.py‎
Lines changed: 1 addition & 1 deletion b/‎src/guidellm/presentation/injector.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/guidellm/request/loader.py‎
Lines changed: 1 addition & 1 deletion b/‎src/guidellm/request/loader.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/guidellm/scheduler/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎src/guidellm/scheduler/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/guidellm/scheduler/constraints.py‎
Lines changed: 46 additions & 1 deletion b/‎src/guidellm/scheduler/constraints.py‎
Lines changed: 46 additions & 1 deletion
@@ -60,6 +60,7 @@ dependencies = [
     "pyyaml>=6.0.0",
     "rich",
     "transformers",
+    "uvloop>=0.18",
 ]
 
 [project.optional-dependencies]
 
@@ -20,7 +20,8 @@
     hf_logging.set_verbosity_error()
     logging.getLogger("transformers").setLevel(logging.ERROR)
 
-from .config import (
+from .logger import configure_logger, logger
+from .settings import (
     DatasetSettings,
     Environment,
     LoggingSettings,
@@ -30,7 +31,6 @@
     reload_settings,
     settings,
 )
-from .logger import configure_logger, logger
 
 __all__ = [
     "DatasetSettings",
 
@@ -16,9 +16,9 @@
 from guidellm.benchmark.scenario import (
     GenerativeTextScenario,
 )
-from guidellm.config import print_config
 from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset
 from guidellm.scheduler import StrategyType
+from guidellm.settings import print_config
 from guidellm.utils import DefaultGroupHandler, get_literal_vals
 from guidellm.utils import cli as cli_tools
 
 
@@ -46,14 +46,14 @@
     GenerativeMetrics,
     GenerativeRequestStats,
 )
-from guidellm.config import settings
 from guidellm.scheduler import (
     MeasuredRequestTimingsT,
     RequestT,
     ResponseT,
     ScheduledRequestInfo,
     SchedulerState,
 )
+from guidellm.settings import settings
 from guidellm.utils import (
     InfoMixin,
     PydanticClassRegistryMixin,
 
@@ -25,9 +25,9 @@
     SweepProfile,
     ThroughputProfile,
 )
-from guidellm.config import settings
 from guidellm.presentation import UIDataBuilder
 from guidellm.presentation.injector import create_report
+from guidellm.settings import settings
 from guidellm.utils import (
     Colors,
     DistributionSummary,
 
@@ -41,7 +41,7 @@
 
 from loguru import logger
 
-from guidellm.config import LoggingSettings, settings
+from guidellm.settings import LoggingSettings, settings
 
 __all__ = ["configure_logger", "logger"]
 
 
@@ -4,7 +4,7 @@
 
 from loguru import logger
 
-from guidellm.config import settings
+from guidellm.settings import settings
 from guidellm.utils.text import load_text
 
 
 
@@ -12,8 +12,8 @@
 from transformers import PreTrainedTokenizerBase  # type: ignore[import]
 
 from guidellm.backend import GenerationRequest
-from guidellm.config import settings
 from guidellm.dataset import ColumnInputTypes, load_dataset
+from guidellm.settings import settings
 from guidellm.utils import StandardBaseModel
 
 __all__ = [
 
@@ -22,6 +22,7 @@
     RequestT,
     ResponseT,
     ScheduledRequestInfo,
+    SchedulerMessagingPydanticRegistry,
     SchedulerState,
     SchedulerUpdateAction,
     SchedulerUpdateActionProgress,
@@ -75,6 +76,7 @@
     "ScheduledRequestInfo",
     "ScheduledRequestTimings",
     "Scheduler",
+    "SchedulerMessagingPydanticRegistry",
     "SchedulerState",
     "SchedulerUpdateAction",
     "SchedulerUpdateActionProgress",
 
@@ -16,13 +16,13 @@
 
 from pydantic import Field, field_validator
 
-from guidellm.config import settings
 from guidellm.scheduler.objects import (
     ScheduledRequestInfo,
     SchedulerState,
     SchedulerUpdateAction,
     SchedulerUpdateActionProgress,
 )
+from guidellm.settings import settings
 from guidellm.utils import InfoMixin, RegistryMixin, StandardBaseModel
 
 __all__ = [
@@ -35,6 +35,7 @@
     "MaxGlobalErrorRateConstraint",
     "MaxNumberConstraint",
     "PydanticConstraintInitializer",
+    "RequestsExhaustedConstraint",
     "SerializableConstraintInitializer",
     "UnserializableConstraintInitializer",
 ]
@@ -988,3 +989,47 @@ def _validate_max_error_rate(
                 )
 
         return value[0] if isinstance(value, list) and len(value) == 1 else value
+
+
+class RequestsExhaustedConstraint(StandardBaseModel, InfoMixin):
+    type_: Literal["requests_exhausted"] = "requests_exhausted"  # type: ignore[assignment]
+    num_requests: int
+
+    @property
+    def info(self) -> dict[str, Any]:
+        """
+        Extract serializable information from this constraint initializer.
+
+        :return: Dictionary containing constraint configuration and metadata
+        """
+        return self.model_dump()
+
+    def __call__(
+        self,
+        state: SchedulerState,
+        request_info: ScheduledRequestInfo,  # noqa: ARG002
+    ) -> SchedulerUpdateAction:
+        create_exceeded = state.created_requests >= self.num_requests
+        processed_exceeded = state.processed_requests >= self.num_requests
+        remaining_fraction = min(
+            max(0.0, 1.0 - state.processed_requests / float(self.num_requests)), 1.0
+        )
+        remaining_requests = max(0, self.num_requests - state.processed_requests)
+
+        return SchedulerUpdateAction(
+            request_queuing="stop" if create_exceeded else "continue",
+            request_processing="stop_local" if processed_exceeded else "continue",
+            metadata={
+                "num_requests": self.num_requests,
+                "create_exceeded": create_exceeded,
+                "processed_exceeded": processed_exceeded,
+                "created_requests": state.created_requests,
+                "processed_requests": state.processed_requests,
+                "remaining_fraction": remaining_fraction,
+                "remaining_requests": remaining_requests,
+            },
+            progress=SchedulerUpdateActionProgress(
+                remaining_fraction=remaining_fraction,
+                remaining_requests=remaining_requests,
+            ),
+        )
Original file line number	Diff line number	Diff line change
`@@ -60,6 +60,7 @@ dependencies = [`
`60`	`60`	`"pyyaml>=6.0.0",`
`61`	`61`	`"rich",`
`62`	`62`	`"transformers",`
	`63`	`+ "uvloop>=0.18",`
`63`	`64`	`]`
`64`	`65`
`65`	`66`	`[project.optional-dependencies]`