Skip to content

Commit 0917f66

Browse files
author
Dmytro Parfeniuk
committed
Concurrent load generation option is implemented
1 parent ecf2984 commit 0917f66

File tree

4 files changed

+106
-55
lines changed

4 files changed

+106
-55
lines changed

src/guidellm/executor/profile_generator.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,12 @@
1717
]
1818

1919
ProfileGenerationMode = Literal[
20-
"sweep", "synchronous", "throughput", "constant", "poisson"
20+
"sweep",
21+
"synchronous",
22+
"throughput",
23+
"constant",
24+
"poisson",
25+
"concurrent",
2126
]
2227

2328

@@ -61,7 +66,7 @@ def __init__(
6166
logger.error(err)
6267
raise err
6368

64-
self._mode = mode
69+
self._mode: ProfileGenerationMode = mode
6570

6671
if self._mode in ("sweep", "throughput", "synchronous"):
6772
if rate is not None:
@@ -135,7 +140,7 @@ def generated_count(self) -> int:
135140
return self._generated_count
136141

137142
@property
138-
def profile_generation_modes(self) -> Sequence[ProfileGenerationMode]:
143+
def profile_generation_modes(self) -> List[ProfileGenerationMode]:
139144
"""
140145
Return the list of profile modes to be run in the report.
141146
@@ -147,7 +152,8 @@ def profile_generation_modes(self) -> Sequence[ProfileGenerationMode]:
147152
settings.num_sweep_profiles
148153
)
149154

150-
if self._mode in ["throughput", "synchronous"]:
155+
# WIP: think about moving this concurrent above
156+
if self._mode in ["throughput", "synchronous", "concurrent"]:
151157
return [self._mode]
152158

153159
if self._rates is None:
@@ -156,6 +162,13 @@ def profile_generation_modes(self) -> Sequence[ProfileGenerationMode]:
156162
if self._mode in ["constant", "poisson"]:
157163
return [self._mode] * len(self._rates)
158164

165+
# WIP
166+
# if self._mode in ["concurrent"]:
167+
# if self._rates is None:
168+
# raise ValueError("rate ")
169+
170+
# return [self._mode] * int(self._rates[0])
171+
159172
raise ValueError(f"Invalid mode: {self._mode}")
160173

161174
def next(self, current_report: TextGenerationBenchmarkReport) -> Optional[Profile]:
@@ -173,13 +186,13 @@ def next(self, current_report: TextGenerationBenchmarkReport) -> Optional[Profil
173186
current_report,
174187
)
175188

176-
if self.mode in ["constant", "poisson"]:
189+
if self.mode in ("constant", "poisson", "concurrent"):
177190
if not self.rates:
178191
err = ValueError(f"Rates are required for {self.mode} mode")
179192
logger.error(err)
180193
raise err
181194

182-
profile = self.create_fixed_rate_profile(
195+
profile: Optional[Profile] = self.create_fixed_rate_profile(
183196
self.generated_count,
184197
self.mode,
185198
self.rates,
@@ -229,9 +242,11 @@ def create_fixed_rate_profile(
229242
:return: The generated profile or None if index is out of range.
230243
:rtype: Optional[Profile]
231244
"""
245+
232246
modes_map: Dict[str, LoadGenerationMode] = {
233247
"constant": "constant",
234248
"poisson": "poisson",
249+
"concurrent": "consistent",
235250
}
236251

237252
if mode not in modes_map:

src/guidellm/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ def generate_benchmark_report(
264264
backend=backend_inst,
265265
request_generator=request_generator,
266266
mode=rate_type,
267-
rate=rate if rate_type in ("constant", "poisson") else None,
267+
rate=rate if rate_type in ("constant", "poisson", "concurrent") else None,
268268
max_number=(
269269
len(request_generator) if max_requests == "dataset" else max_requests
270270
),

src/guidellm/scheduler/base.py

Lines changed: 78 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -114,12 +114,12 @@ def __init__(
114114
logger.error(err)
115115
raise err
116116

117-
self._generator = generator
118-
self._worker = worker
119-
self._mode = mode
120-
self._rate = rate
121-
self._max_number = max_number
122-
self._max_duration = max_duration
117+
self._generator: RequestGenerator = generator
118+
self._worker: Backend = worker
119+
self._mode: LoadGenerationMode = mode
120+
self._rate: Optional[float] = rate
121+
self._max_number: Optional[int] = max_number
122+
self._max_duration: Optional[float] = max_duration
123123

124124
self._load_generator = LoadGenerator(mode, rate)
125125

@@ -227,9 +227,7 @@ async def run(self) -> AsyncGenerator[SchedulerResult, None]:
227227
count_total = (
228228
self.max_number
229229
if self.max_number
230-
else round(self.max_duration)
231-
if self.max_duration
232-
else 0
230+
else round(self.max_duration) if self.max_duration else 0
233231
)
234232

235233
# yield initial result for progress tracking
@@ -246,9 +244,7 @@ async def run(self) -> AsyncGenerator[SchedulerResult, None]:
246244
count_completed = (
247245
min(run_count, self.max_number)
248246
if self.max_number
249-
else round(time.time() - start_time)
250-
if self.max_duration
251-
else 0
247+
else round(time.time() - start_time) if self.max_duration else 0
252248
)
253249

254250
yield SchedulerResult(
@@ -267,16 +263,16 @@ async def run(self) -> AsyncGenerator[SchedulerResult, None]:
267263
count_completed=(
268264
benchmark.request_count + benchmark.error_count
269265
if self.max_number
270-
else round(time.time() - start_time)
271-
if self.max_duration
272-
else 0
266+
else round(time.time() - start_time) if self.max_duration else 0
273267
),
274268
benchmark=benchmark,
275269
)
276270

277271
async def _run_sync(
278272
self, benchmark: TextGenerationBenchmark, end_time: float, max_number: float
279273
) -> AsyncGenerator[Union[TextGenerationResult, TextGenerationError], None]:
274+
"""Runs only for "synchronous" mode."""
275+
280276
for index, (request, submit_at) in enumerate(
281277
zip(self.generator, self.load_generator.times())
282278
):
@@ -298,42 +294,80 @@ async def _run_sync(
298294
async def _run_async(
299295
self, benchmark: TextGenerationBenchmark, end_time: float, max_number: float
300296
) -> AsyncGenerator[Union[TextGenerationResult, TextGenerationError], None]:
297+
"""
298+
Notes:
299+
if the Load Generation Mode is set to 'consistent' - timestamps should
300+
not be generated in order to make as many requests as possible to
301+
simulate concurrent clients interaction.
302+
"""
303+
301304
tasks = []
302305
completed = 0
303306

304-
for index, (request, submit_at) in enumerate(
305-
zip(self.generator, self.load_generator.times())
306-
):
307-
while (index + 1 - completed) >= settings.max_concurrency:
308-
await asyncio.sleep(0.1)
307+
def _completed(_task: asyncio.Task) -> None:
308+
nonlocal completed
309+
completed += 1
310+
_res = _task.result()
309311

310-
if index >= max_number or time.time() >= end_time or submit_at >= end_time:
311-
break
312+
if _res:
313+
benchmark.request_completed(_res)
314+
logger.debug("Request completed: {}", _res)
312315

313-
logger.debug(
314-
"Running asynchronous request={} at submit_at={}",
315-
request,
316-
submit_at,
317-
)
318-
319-
def _completed(_task: asyncio.Task) -> None:
320-
nonlocal completed
321-
completed += 1
322-
_res = _task.result()
323-
324-
if _res:
325-
benchmark.request_completed(_res)
326-
logger.debug("Request completed: {}", _res)
316+
if self.mode == "consistent":
317+
if self.rate is None:
318+
raise ValueError(
319+
"The rate must be specified in order to provide concurrent execution"
320+
)
321+
for index, request in enumerate(self.generator):
322+
while (index + 1 - completed) >= settings.max_concurrency:
323+
await asyncio.sleep(0.1)
324+
325+
if index >= max_number or time.time() >= end_time:
326+
break
327+
328+
logger.debug(f"Running concurrently request={request}")
329+
330+
benchmark.request_started()
331+
332+
# Create multiple concurrent tasks
333+
tasks: list[asyncio.Task] = []
334+
for _ in range(int(self.rate)):
335+
task: asyncio.Task = asyncio.create_task(
336+
self._submit_task_coroutine( # submit the call with 'Backend'
337+
request=request, submit_at=0.0, end_time=end_time
338+
)
339+
)
340+
task.add_done_callback(_completed)
341+
tasks.append(task)
342+
else:
343+
for index, (request, submit_at) in enumerate(
344+
zip(self.generator, self.load_generator.times())
345+
):
346+
while (index + 1 - completed) >= settings.max_concurrency:
347+
await asyncio.sleep(0.1)
348+
349+
if (
350+
index >= max_number
351+
or time.time() >= end_time
352+
or submit_at >= end_time
353+
):
354+
break
355+
356+
logger.debug(
357+
"Running asynchronous request={} at submit_at={}",
358+
request,
359+
submit_at,
360+
)
327361

328-
benchmark.request_started()
329-
task = asyncio.create_task(
330-
self._submit_task_coroutine(request, submit_at, end_time)
331-
)
332-
task.add_done_callback(_completed)
333-
tasks.append(task)
362+
benchmark.request_started()
363+
task = asyncio.create_task(
364+
self._submit_task_coroutine(request, submit_at, end_time)
365+
)
366+
task.add_done_callback(_completed)
367+
tasks.append(task)
334368

335-
# release control to the event loop for other tasks
336-
await asyncio.sleep(0.001)
369+
# release control to the event loop for other tasks
370+
await asyncio.sleep(0.001)
337371

338372
for compl_task in asyncio.as_completed(tasks):
339373
task_res = await compl_task

src/guidellm/scheduler/load_generator.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66

77
__all__ = ["LoadGenerationMode", "LoadGenerator"]
88

9-
LoadGenerationMode = Literal["synchronous", "constant", "poisson", "throughput"]
9+
LoadGenerationMode = Literal[
10+
"synchronous", "constant", "poisson", "throughput", "consistent"
11+
]
1012

1113

1214
class LoadGenerator:
@@ -18,7 +20,7 @@ class LoadGenerator:
1820
timestamps based on the rate provided during initialization.
1921
2022
:param mode: The mode of load generation. Valid options are "constant",
21-
"poisson", "throughput", and "synchronous".
23+
"poisson", "throughput", and "synchronous", "consistent"
2224
:type mode: LoadGenerationMode
2325
:param rate: The rate at which to generate timestamps. This value is
2426
interpreted differently depending on the mode.
@@ -52,8 +54,8 @@ def __init__(self, mode: LoadGenerationMode, rate: Optional[float] = None):
5254
logger.error(error)
5355
raise error
5456

55-
self._mode = mode
56-
self._rate = rate
57+
self._mode: LoadGenerationMode = mode
58+
self._rate: Optional[float] = rate
5759
logger.debug(
5860
"Initialized LoadGenerator with mode: {mode}, rate: {rate}",
5961
mode=mode,

0 commit comments

Comments
 (0)