Skip to content

Commit 34f851c

Browse files
committed
latest working state with data loaders and benchmarking API
1 parent faacb63 commit 34f851c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+3047
-2163
lines changed

plot.png

42.3 KB
Loading

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ dependencies = [
3232
"loguru",
3333
"numpy",
3434
"pillow",
35+
"protobuf",
3536
"pydantic>=2.0.0",
3637
"pydantic-settings>=2.0.0",
3738
"pyyaml>=6.0.0",

src/guidellm/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,6 @@
1414

1515
from .config import settings
1616
from .logger import configure_logger, logger
17-
from .main import generate_benchmark_report
17+
# from .main import generate_benchmark_report
1818

1919
__all__ = ["configure_logger", "logger", "settings", "generate_benchmark_report"]

src/guidellm/backend/backend.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import asyncio
21
from abc import ABC, abstractmethod
32
from pathlib import Path
43
from typing import Any, AsyncGenerator, Dict, List, Literal, Optional, Type, Union
@@ -110,7 +109,7 @@ def info(self) -> Dict[str, Any]:
110109
"""
111110
...
112111

113-
def validate(self):
112+
async def validate(self):
114113
"""
115114
Handle final setup and validate the backend is ready for use.
116115
If not successful, raises the appropriate exception.
@@ -121,13 +120,10 @@ def validate(self):
121120
if not models:
122121
raise ValueError("No models available for the backend")
123122

124-
async def _test_request():
125-
async for _ in self.text_completions(
126-
prompt="Test connection", output_token_count=1
127-
): # type: ignore[attr-defined]
128-
pass
129-
130-
asyncio.run(_test_request())
123+
async for _ in self.text_completions(
124+
prompt="Test connection", output_token_count=1
125+
): # type: ignore[attr-defined]
126+
pass
131127

132128
@abstractmethod
133129
def check_setup(self):

src/guidellm/backend/openai.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ async def text_completions( # type: ignore[override]
202202

203203
try:
204204
async for resp in self._iterative_completions_request(
205-
type_="text",
205+
type_="text_completions",
206206
request_id=request_id,
207207
request_prompt_tokens=prompt_token_count,
208208
request_output_tokens=output_token_count,
@@ -277,7 +277,7 @@ async def chat_completions( # type: ignore[override]
277277

278278
try:
279279
async for resp in self._iterative_completions_request(
280-
type_="chat",
280+
type_="chat_completions",
281281
request_id=request_id,
282282
request_prompt_tokens=prompt_token_count,
283283
request_output_tokens=output_token_count,
@@ -403,16 +403,16 @@ def _create_chat_messages(
403403

404404
async def _iterative_completions_request(
405405
self,
406-
type_: Literal["text", "chat"],
406+
type_: Literal["text_completions", "chat_completions"],
407407
request_id: Optional[str],
408408
request_prompt_tokens: Optional[int],
409409
request_output_tokens: Optional[int],
410410
headers: Dict,
411411
payload: Dict,
412412
) -> AsyncGenerator[Union[StreamingTextResponse, ResponseSummary], None]:
413-
if type_ == "text":
413+
if type_ == "text_completions":
414414
target = f"{self.target}{TEXT_COMPLETIONS_PATH}"
415-
elif type_ == "chat":
415+
elif type_ == "chat_completions":
416416
target = f"{self.target}{CHAT_COMPLETIONS_PATH}"
417417
else:
418418
raise ValueError(f"Unsupported type: {type_}")
@@ -525,15 +525,15 @@ async def _iterative_completions_request(
525525

526526
@staticmethod
527527
def _extract_completions_delta_content(
528-
type_: Literal["text", "chat"], data: Dict
528+
type_: Literal["text_completions", "chat_completions"], data: Dict
529529
) -> Optional[str]:
530530
if "choices" not in data or not data["choices"]:
531531
return None
532532

533-
if type_ == "text":
533+
if type_ == "text_completions":
534534
return data["choices"][0]["text"]
535535

536-
if type_ == "chat":
536+
if type_ == "chat_completions":
537537
return data["choices"][0]["delta"]["content"]
538538

539539
raise ValueError(f"Unsupported type: {type_}")

0 commit comments

Comments
 (0)