Skip to content

Commit 69c86bf

Browse files
Implement incremental rate type
1 parent 8f1e001 commit 69c86bf

File tree

5 files changed

+260
-3
lines changed

5 files changed

+260
-3
lines changed

src/guidellm/__main__.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,9 +149,28 @@ def benchmark():
149149
help=(
150150
"Benchmark rate(s) to test. Meaning depends on profile: "
151151
"sweep=number of benchmarks, concurrent=concurrent requests, "
152-
"async/constant/poisson=requests per second."
152+
"async/constant/poisson=requests per second. "
153+
"Not used for incremental profile."
153154
),
154155
)
156+
@click.option(
157+
"--start-rate",
158+
type=float,
159+
default=BenchmarkGenerativeTextArgs.get_default("start_rate"),
160+
help="Initial rate for incremental profile in requests per second.",
161+
)
162+
@click.option(
163+
"--increment-factor",
164+
type=float,
165+
default=BenchmarkGenerativeTextArgs.get_default("increment_factor"),
166+
help="Factor by which to increase rate over time for incremental profile.",
167+
)
168+
@click.option(
169+
"--rate-limit",
170+
type=int,
171+
default=BenchmarkGenerativeTextArgs.get_default("rate_limit"),
172+
help="Maximum rate cap for incremental profile.",
173+
)
155174
# Backend configuration
156175
@click.option(
157176
"--backend",

src/guidellm/benchmark/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from .profiles import (
2222
AsyncProfile,
2323
ConcurrentProfile,
24+
IncrementalProfile,
2425
Profile,
2526
ProfileType,
2627
SweepProfile,
@@ -81,6 +82,7 @@
8182
"GenerativeRequestsAccumulator",
8283
"GenerativeTextMetricsSummary",
8384
"GenerativeVideoMetricsSummary",
85+
"IncrementalProfile",
8486
"Profile",
8587
"ProfileType",
8688
"RunningMetricStats",

src/guidellm/benchmark/profiles.py

Lines changed: 120 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from guidellm import settings
3030
from guidellm.scheduler import (
3131
AsyncConstantStrategy,
32+
AsyncIncrementalStrategy,
3233
AsyncPoissonStrategy,
3334
ConcurrentStrategy,
3435
Constraint,
@@ -46,6 +47,7 @@
4647
__all__ = [
4748
"AsyncProfile",
4849
"ConcurrentProfile",
50+
"IncrementalProfile",
4951
"Profile",
5052
"ProfileType",
5153
"SweepProfile",
@@ -54,7 +56,7 @@
5456
]
5557

5658
ProfileType = Annotated[
57-
Literal["synchronous", "concurrent", "throughput", "async", "sweep"],
59+
Literal["synchronous", "concurrent", "throughput", "async", "sweep", "incremental"],
5860
"Profile type identifiers for polymorphic deserialization",
5961
]
6062

@@ -712,3 +714,120 @@ def next_strategy(
712714
)
713715
else:
714716
raise ValueError(f"Invalid strategy type: {self.strategy_type}")
717+
718+
719+
@Profile.register("incremental")
720+
class IncrementalProfile(ThroughputProfile):
721+
"""
722+
Incremental rate execution profile with incremental load over time.
723+
724+
Schedules requests starting at a base rate and incrementally increasing
725+
the rate by a factor over time until reaching an optional rate limit.
726+
"""
727+
728+
type_: Literal["incremental"] = "incremental" # type: ignore[assignment]
729+
start_rate: PositiveFloat = Field(
730+
description="Initial rate at which to schedule requests in requests per second",
731+
)
732+
increment_factor: PositiveFloat = Field(
733+
description="Factor by which to increase the rate over time",
734+
)
735+
rate_limit: PositiveInt | None = Field(
736+
default=None,
737+
description="Maximum rate cap after which load remains constant",
738+
)
739+
initial_burst: bool = Field(
740+
default=True,
741+
description=(
742+
"Whether to send initial burst of math.floor(start_rate) requests "
743+
"to reach target rate"
744+
),
745+
)
746+
747+
@classmethod
748+
def resolve_args(
749+
cls,
750+
rate_type: str,
751+
rate: list[float] | None,
752+
random_seed: int,
753+
start_rate: float | None = None,
754+
increment_factor: float | None = None,
755+
rate_limit: int | None = None,
756+
**kwargs: Any,
757+
) -> dict[str, Any]:
758+
"""
759+
Resolve arguments for incremental profile construction.
760+
761+
:param rate_type: Profile type identifier
762+
:param rate: Rate parameter (must be None for incremental)
763+
:param random_seed: Random seed (ignored)
764+
:param start_rate: Initial rate in requests per second
765+
:param increment_factor: Rate increase factor over time
766+
:param rate_limit: Optional maximum rate cap
767+
:param kwargs: Additional arguments passed through unchanged
768+
:return: Resolved arguments dictionary
769+
:raises ValueError: If rate is not None or required params missing
770+
"""
771+
_ = random_seed # unused
772+
if rate_type != "incremental":
773+
raise ValueError("Rate type must be 'incremental' for incremental profile")
774+
775+
if rate is not None:
776+
raise ValueError(
777+
"rate does not apply to incremental profile, it must be set to None "
778+
"or not set at all. Use start_rate and increment_factor instead."
779+
)
780+
781+
if start_rate is None:
782+
raise ValueError("start_rate is required for incremental profile")
783+
784+
if increment_factor is None:
785+
raise ValueError("increment_factor is required for incremental profile")
786+
787+
if start_rate <= 0:
788+
raise ValueError("start_rate must be a positive number")
789+
790+
if increment_factor <= 0:
791+
raise ValueError("increment_factor must be a positive number")
792+
793+
if rate_limit is not None and rate_limit <= 0:
794+
raise ValueError("rate_limit must be a positive integer")
795+
796+
kwargs["start_rate"] = start_rate
797+
kwargs["increment_factor"] = increment_factor
798+
if rate_limit is not None:
799+
kwargs["rate_limit"] = rate_limit
800+
801+
return kwargs
802+
803+
@property
804+
def strategy_types(self) -> list[StrategyType]:
805+
"""
806+
:return: Single incremental strategy type
807+
"""
808+
return [self.type_]
809+
810+
def next_strategy(
811+
self,
812+
prev_strategy: SchedulingStrategy | None,
813+
prev_benchmark: Benchmark | None,
814+
) -> AsyncIncrementalStrategy | None:
815+
"""
816+
Generate incremental strategy or None if already completed.
817+
818+
:param prev_strategy: Previously completed strategy (unused)
819+
:param prev_benchmark: Benchmark results from previous execution (unused)
820+
:return: AsyncIncrementalStrategy for first execution, None afterward
821+
"""
822+
_ = (prev_strategy, prev_benchmark) # unused
823+
if len(self.completed_strategies) >= 1:
824+
return None
825+
826+
return AsyncIncrementalStrategy(
827+
start_rate=self.start_rate,
828+
increment_factor=self.increment_factor,
829+
rate_limit=self.rate_limit,
830+
initial_burst=self.initial_burst,
831+
max_concurrency=self.max_concurrency,
832+
startup_duration=self.startup_duration,
833+
)

src/guidellm/scheduler/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
)
3939
from .strategies import (
4040
AsyncConstantStrategy,
41+
AsyncIncrementalStrategy,
4142
AsyncPoissonStrategy,
4243
ConcurrentStrategy,
4344
SchedulingStrategy,
@@ -51,6 +52,7 @@
5152

5253
__all__ = [
5354
"AsyncConstantStrategy",
55+
"AsyncIncrementalStrategy",
5456
"AsyncPoissonStrategy",
5557
"BackendInterface",
5658
"BackendT",

src/guidellm/scheduler/strategies.py

Lines changed: 116 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from __future__ import annotations
1717

1818
import asyncio
19+
import math
1920
import random
2021
from abc import abstractmethod
2122
from multiprocessing import Lock, Value
@@ -28,6 +29,7 @@
2829

2930
__all__ = [
3031
"AsyncConstantStrategy",
32+
"AsyncIncrementalStrategy",
3133
"AsyncPoissonStrategy",
3234
"ConcurrentStrategy",
3335
"SchedulingStrategy",
@@ -39,7 +41,9 @@
3941

4042

4143
StrategyType = Annotated[
42-
Literal["synchronous", "concurrent", "throughput", "constant", "poisson"],
44+
Literal[
45+
"synchronous", "concurrent", "throughput", "constant", "poisson", "incremental"
46+
],
4347
"Valid strategy type identifiers for scheduling request patterns",
4448
]
4549

@@ -627,3 +631,114 @@ def request_completed(self, request_info: RequestInfo):
627631
:param request_info: Completed request metadata (unused)
628632
"""
629633
_ = request_info # request_info unused for async poisson strategy
634+
635+
636+
@SchedulingStrategy.register("incremental")
637+
class AsyncIncrementalStrategy(ThroughputStrategy):
638+
"""
639+
Incremental rate scheduling with gradual load increase over time.
640+
641+
Schedules requests starting at a base rate and incrementally increasing
642+
the rate by a factor over time until reaching an optional rate limit.
643+
Supports initial burst mode to quickly reach the target starting rate.
644+
Useful for finding system saturation points or progressive load testing.
645+
"""
646+
647+
type_: Literal["incremental"] = "incremental" # type: ignore[assignment]
648+
start_rate: float = Field(
649+
description="Initial rate at which to schedule requests in requests/second",
650+
gt=0,
651+
)
652+
increment_factor: float = Field(
653+
description="Factor by which to increase the rate over time",
654+
gt=0,
655+
)
656+
rate_limit: int | None = Field(
657+
default=None,
658+
description="Maximum rate cap after which load remains constant",
659+
gt=0,
660+
)
661+
initial_burst: bool = Field(
662+
default=True,
663+
description=(
664+
"Whether to send initial burst of math.floor(start_rate) requests "
665+
"to reach target rate"
666+
),
667+
)
668+
669+
_process_offset: float | None = PrivateAttr(None)
670+
_burst_sent: bool = PrivateAttr(False)
671+
672+
def __str__(self) -> str:
673+
"""
674+
:return: String identifier with start rate and increment factor
675+
"""
676+
return f"incremental@{self.start_rate:.2f}+{self.increment_factor:.2f}"
677+
678+
def init_processes_timings(
679+
self,
680+
worker_count: int,
681+
max_concurrency: int,
682+
startup_duration: float,
683+
):
684+
"""
685+
Initialize incremental-specific timing state.
686+
687+
:param worker_count: Number of worker processes to coordinate
688+
:param max_concurrency: Maximum number of concurrent requests allowed
689+
:param startup_duration: Duration in seconds for request startup ramping
690+
"""
691+
super().init_processes_timings(worker_count, max_concurrency, startup_duration)
692+
with self._processes_lock:
693+
self._process_offset = None
694+
695+
async def next_request_time(self, offset: int) -> float:
696+
"""
697+
Calculate next request time with incremental rate increase.
698+
699+
Implements gradual rate increase: rate = start_rate + (increment_factor * elapsed_time)
700+
Optionally sends initial burst and caps at rate_limit.
701+
702+
:param offset: Unused for incremental strategy
703+
:return: Next request time based on incremental rate calculation
704+
"""
705+
_ = offset # offset unused for incremental strategy
706+
start_time = await self.get_processes_start_time()
707+
708+
# Handle initial burst if enabled
709+
if self.initial_burst and not self._burst_sent:
710+
self._burst_sent = True
711+
burst_count = math.floor(self.start_rate)
712+
for _ in range(burst_count):
713+
pass
714+
if self._process_offset is None:
715+
self._process_offset = start_time
716+
717+
if self._process_offset is None:
718+
self._process_offset = start_time
719+
720+
current_time = time.time()
721+
if current_time <= start_time:
722+
return start_time
723+
724+
# Calculate current rate based on elapsed time
725+
elapsed_time = current_time - start_time
726+
next_rate = self.start_rate + (self.increment_factor * elapsed_time)
727+
728+
# Cap at rate limit if specified
729+
if self.rate_limit and next_rate >= self.rate_limit:
730+
increment = 1.0 / self.rate_limit
731+
else:
732+
increment = 1.0 / next_rate
733+
734+
self._process_offset += increment
735+
736+
return self._process_offset
737+
738+
def request_completed(self, request_info: RequestInfo):
739+
"""
740+
Handle request completion (no-op for incremental strategy).
741+
742+
:param request_info: Completed request metadata (unused)
743+
"""
744+
_ = request_info # request_info unused for async incremental strategy

0 commit comments

Comments
 (0)