Skip to content

Commit f8161ed

Browse files
committed
Ensure style and types pass, remove tests that are no longer relevant, finish up the objects module with docs and tests
1 parent 4183512 commit f8161ed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+1398
-6187
lines changed

src/guidellm/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
with open(os.devnull, "w") as devnull, contextlib.redirect_stderr(
1414
devnull
1515
), contextlib.redirect_stdout(devnull):
16-
from transformers.utils import logging as hf_logging
16+
from transformers.utils import logging as hf_logging # type: ignore[import]
1717

1818
# Set the log level for the transformers library to ERROR
1919
# to ignore None of PyTorch, TensorFlow found

src/guidellm/__main__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
)
1515

1616

17-
def parse_json(ctx, param, value):
17+
def parse_json(ctx, param, value): # noqa: ARG001
1818
if value is None:
1919
return None
2020
try:
@@ -23,7 +23,7 @@ def parse_json(ctx, param, value):
2323
raise click.BadParameter(f"{param.name} must be a valid JSON string.") from err
2424

2525

26-
def parse_number_str(ctx, param, value):
26+
def parse_number_str(ctx, param, value): # noqa: ARG001
2727
if value is None:
2828
return None
2929

src/guidellm/backend/openai.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ def __init__(
9292
if max_output_tokens is not None
9393
else settings.openai.max_output_tokens
9494
)
95-
self._async_client: Optional[httpx.Client] = None
95+
self._async_client: Optional[httpx.AsyncClient] = None
9696

9797
@property
9898
def target(self) -> str:
@@ -311,11 +311,12 @@ def _get_async_client(self) -> httpx.AsyncClient:
311311
:return: The async HTTP client.
312312
"""
313313
if self._async_client is None:
314-
self._async_client = httpx.AsyncClient(
315-
http2=self.http2, timeout=self.timeout
316-
)
314+
client = httpx.AsyncClient(http2=self.http2, timeout=self.timeout)
315+
self._async_client = client
316+
else:
317+
client = self._async_client
317318

318-
return self._async_client
319+
return client
319320

320321
def _headers(self) -> Dict[str, str]:
321322
headers = {

src/guidellm/benchmark/aggregator.py

Lines changed: 39 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -141,17 +141,17 @@ class BenchmarkAggregator(ABC, StandardBaseModel, Generic[BENCH, REQ, RES]):
141141
"if any. These are requests that were not included in the final results."
142142
)
143143
)
144-
worker_description: Optional[
145-
Union[GenerativeRequestsWorkerDescription, WorkerDescription]
144+
worker_description: Union[
145+
GenerativeRequestsWorkerDescription, WorkerDescription
146146
] = Field(
147147
description=(
148148
"The description and specifics for the worker used to resolve requests "
149149
"for this benchmark."
150150
),
151151
discriminator="type_",
152152
)
153-
request_loader_description: Optional[
154-
Union[GenerativeRequestLoaderDescription, RequestLoaderDescription]
153+
request_loader_description: Union[
154+
GenerativeRequestLoaderDescription, RequestLoaderDescription
155155
] = Field(
156156
description=(
157157
"The description and specifics for the request loader used to create "
@@ -391,26 +391,26 @@ def add_result(
391391
"completed, canceled, or errored."
392392
)
393393

394-
self.queued_time += (
394+
self.queued_time += ( # type: ignore[misc]
395395
result.request_info.dequeued_time - result.request_info.queued_time
396396
)
397-
self.scheduled_time_delay += (
397+
self.scheduled_time_delay += ( # type: ignore[misc]
398398
result.request_info.scheduled_time - result.request_info.dequeued_time
399399
)
400400
sleep_time = max(
401401
0.0,
402402
result.request_info.targeted_start_time
403403
- result.request_info.scheduled_time,
404404
)
405-
self.scheduled_time_sleep += sleep_time
406-
time_to_worker_start = (
405+
self.scheduled_time_sleep += sleep_time # type: ignore[misc]
406+
time_to_worker_start = ( # type: ignore[misc]
407407
result.request_info.worker_start - result.request_info.scheduled_time
408408
)
409-
self.worker_start_delay += time_to_worker_start - sleep_time
410-
self.worker_time += (
409+
self.worker_start_delay += time_to_worker_start - sleep_time # type: ignore[misc]
410+
self.worker_time += ( # type: ignore[misc]
411411
result.request_info.worker_end - result.request_info.worker_start
412412
)
413-
self.worker_start_time_targeted_delay += (
413+
self.worker_start_time_targeted_delay += ( # type: ignore[misc]
414414
result.request_info.worker_start - result.request_info.targeted_start_time
415415
)
416416

@@ -433,9 +433,11 @@ def add_result(
433433

434434
if (
435435
self.cooldown_number
436+
and self.max_number
436437
and total_completed > self.max_number - self.cooldown_number
437438
) or (
438439
self.cooldown_duration
440+
and self.max_duration
439441
and result.request_info.worker_start
440442
>= global_start_time + self.max_duration - self.cooldown_duration
441443
):
@@ -459,14 +461,14 @@ def compile(self) -> BENCH:
459461
...
460462

461463

462-
AGG = TypeVar("AGG", bound=BenchmarkAggregator[BENCH, REQ, RES])
464+
AGG = TypeVar("AGG", bound=BenchmarkAggregator)
463465

464466

465467
class GenerativeBenchmarkAggregator(
466468
BenchmarkAggregator[GenerativeBenchmark, GenerationRequest, ResponseSummary]
467469
):
468470
type_: Literal["generative_benchmark_aggregator"] = (
469-
"generative_benchmark_aggregator"
471+
"generative_benchmark_aggregator" # type: ignore[assignment]
470472
)
471473
processor: Optional[Union[str, Path, Any]] = Field(
472474
description=(
@@ -531,20 +533,26 @@ def add_result(
531533
if not super().add_result(result):
532534
return False
533535

534-
self.request_start_time_delay += (
536+
if result.request is None:
537+
raise ValueError("Request is None, cannot add result.")
538+
539+
if result.response is None:
540+
raise ValueError("Response is None, cannot add result.")
541+
542+
self.request_start_time_delay += ( # type: ignore[misc]
535543
result.response.start_time - result.request_info.worker_start
536544
)
537-
self.request_start_time_targeted_delay += (
545+
self.request_start_time_targeted_delay += ( # type: ignore[misc]
538546
result.response.start_time - result.request_info.targeted_start_time
539547
)
540-
self.request_time_delay += (
548+
self.request_time_delay += ( # type: ignore[misc]
541549
(result.response.start_time - result.request_info.worker_start)
542550
+ result.request_info.worker_end
543551
- result.response.end_time
544552
)
545-
self.request_time += result.response.end_time - result.response.start_time
553+
self.request_time += result.response.end_time - result.response.start_time # type: ignore[misc]
546554

547-
self.time_to_first_token += (
555+
self.time_to_first_token += ( # type: ignore[misc]
548556
(result.response.first_iter_time - result.response.start_time) * 1000.0
549557
if result.response.first_iter_time
550558
else 0.0
@@ -590,9 +598,9 @@ def compile(self) -> GenerativeBenchmark:
590598
run_stats=BenchmarkRunStats(
591599
start_time=self.scheduler_created_requests.start_time,
592600
end_time=time.time(),
593-
total_successful=self.successful_requests.total,
594-
total_incomplete=self.incomplete_requests.total,
595-
total_errored=self.errored_requests.total,
601+
total_successful=int(self.successful_requests.total),
602+
total_incomplete=int(self.incomplete_requests.total),
603+
total_errored=int(self.errored_requests.total),
596604
queued_time_avg=self.queued_time.mean,
597605
scheduled_time_delay_avg=self.scheduled_time_delay.mean,
598606
scheduled_time_sleep_avg=self.scheduled_time_sleep.mean,
@@ -621,6 +629,12 @@ def _compile_results(
621629
error: List[GenerativeTextErrorStats] = []
622630

623631
for result in self.results:
632+
if result.request is None:
633+
raise ValueError("Request is None, cannot compile results.")
634+
635+
if result.response is None:
636+
raise ValueError("Response is None, cannot compile results.")
637+
624638
prompt_tokens = self._compile_tokens_count(
625639
value=str(result.request.content),
626640
requests_tokens=result.response.request_prompt_tokens,
@@ -639,7 +653,7 @@ def _compile_results(
639653
if result.request_info.canceled:
640654
incomplete.append(
641655
GenerativeTextErrorStats(
642-
error=result.response.error,
656+
error=result.response.error or "",
643657
request_id=result.request.request_id,
644658
request_type=result.request.request_type,
645659
scheduler_info=result.request_info,
@@ -656,7 +670,7 @@ def _compile_results(
656670
elif result.request_info.errored:
657671
error.append(
658672
GenerativeTextErrorStats(
659-
error=result.response.error,
673+
error=result.response.error or "",
660674
request_id=result.request.request_id,
661675
request_type=result.request.request_type,
662676
scheduler_info=result.request_info,
@@ -682,8 +696,8 @@ def _compile_results(
682696
output_tokens=output_tokens,
683697
start_time=result.response.start_time,
684698
end_time=result.response.end_time,
685-
first_token_time=result.response.first_iter_time,
686-
last_token_time=result.response.last_iter_time,
699+
first_token_time=result.response.first_iter_time or -1,
700+
last_token_time=result.response.last_iter_time or -1,
687701
)
688702
)
689703

0 commit comments

Comments
 (0)