Skip to content
Closed
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
c3abc8d
WIP
Aug 22, 2024
a6d9a05
✅ Tests are fixed
Aug 22, 2024
d116c0c
📌 deepsparse is added to dependencies
Aug 22, 2024
c000dbf
✨ deepsparse backend integration is added
Aug 22, 2024
52e1d3b
deepsparse package limitations are applied
Aug 26, 2024
7218795
⚰️ removed `pytest.mark.asyncio()` due to pytest-asyncio module
Aug 27, 2024
a5357ca
📝 fixed class example
Aug 27, 2024
68381a5
🧵 rollback `pytest.mark.asyncio` fixtures
Aug 28, 2024
5acb3a8
✨ Deepsparse Backend integration first implementation
Aug 28, 2024
45e07d0
code quality is provided
Aug 28, 2024
1753469
Merge branch 'main' into parfeniukink/features/deepsparse-backend
Aug 28, 2024
1f1e038
fit Deepsparse Backend to work with new Backend abstraction
Aug 28, 2024
ce1c3ba
🔧 `GUIDELLM__LLM_MODEL` shared across all the backends
Aug 29, 2024
8e88bae
Test emulated data source constant -> settings value
Aug 29, 2024
75e708b
💄 mdformat is happy
Aug 29, 2024
3c03961
Merge branch 'main' into parfeniukink/features/deepsparse-backend
Aug 29, 2024
913253f
✅ Tests are fixed according to a new Backend base implementation
Aug 29, 2024
e376ed9
🔨 tox tests include `deepsparse` dependency
Aug 30, 2024
3a2c6c1
🏷️ Type annotations are added
Aug 30, 2024
74a6dfd
🐛 Assert with config values instead of constants
Aug 30, 2024
1a53951
📌 .[deepsparse] dependency is skipped if Python>3.11
Aug 30, 2024
39ffcb3
🚚 DeepsparseBackend is moved to a another module
Aug 30, 2024
29e38e4
✅ Deepsparse tests are ignored if Python>=3.12
Aug 30, 2024
4b3b4b5
💚 Linters are happy
Aug 30, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 30 additions & 13 deletions DEVELOPING.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,33 @@ Before you begin, ensure you have the following installed:
- `pip` (Python package installer)
- `git` (version control system)

### Installation
### Clone the repository:

1. Clone the repository:
```sh
git clone https://github.com/neuralmagic/guidellm.git
cd guidellm
```

```bash
git clone https://github.com/neuralmagic/guidellm.git
cd guidellm
```
### Install dependencies:

2. Install the required dependencies:
```bash
pip install -e .[dev]
```
All the dependencies are specified in `pyproject.toml` file. There is an option to install only required dependencies and optional dependencies

Install required dependencies along with optional `dev` dependencies.

```sh
pip install -e .[dev]
# or pip install -e '.[dev]'
```

#### Working with **Not-Default** backends

If you work with `deepsparse` backend, etc it has some other software limitations.
In order to install dependencies for the specific backend, run:

```sh
pip install -e .[deepsparse]
# or pip install -e '.[deepsparse]'
```

## Project Structure

Expand All @@ -46,8 +60,9 @@ guidellm/
└── README.md
```

- **src/guidellm/**: Main source code for the project.
- **tests/**: Test cases categorized into unit, integration, and end-to-end tests.
- `pyproject.toml`: Project metadata
- `**src/guidellm/**`: Main source code for the project.
- `**tests/**`: Test cases categorized into unit, integration, and end-to-end tests.

## Development Environment Setup

Expand Down Expand Up @@ -239,12 +254,14 @@ The project configuration entrypoint is represented by lazy-loaded `settigns` si
The project is fully configurable with environment variables. All the default values and

```py
class NestedIntoLogging(BaseModel):
class Nested(BaseModel):
nested: str = "default value"

class LoggingSettings(BaseModel):
# ...

disabled: bool = False
nested: Nested = Nested()


class Settings(BaseSettings):
Expand Down
20 changes: 15 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ dev = [
"types-requests~=2.32.0",
"types-toml",
]
deepsparse = [
"deepsparse",
]


[project.entry-points.console_scripts]
Expand Down Expand Up @@ -96,6 +99,10 @@ exclude = ["venv", ".tox"]
# Check: https://mypy.readthedocs.io/en/latest/config_file.html#import-discovery
follow_imports = 'silent'

[[tool.mypy.overrides]]
module = ["deepsparse.*", "transformers.*"]
ignore_missing_imports=true


[tool.ruff]
line-length = 88
Expand All @@ -109,11 +116,14 @@ indent-style = "space"
[tool.ruff.lint]
ignore = [
"PLR0913",
"PLR2004", # allow numbers without constants definitions
"RET505", # allow `else` block after `if (condition): return value` line
"TCH001",
"COM812",
"ISC001",
"TCH002",
"PLW1514", # allow Path.open without encoding
"S311", # allow standard pseudo-random generators

]
select = [
Expand Down Expand Up @@ -169,19 +179,19 @@ select = [
"FIX", # flake8-fixme: detects FIXMEs and other temporary comments that should be resolved
]

[tool.ruff.lint.extend-per-file-ignores]
"tests/**/*.py" = [

[tool.ruff.lint.per-file-ignores]
"tests/*" = [
"S101", # asserts allowed in tests
"S105", # allow hardcoded passwords in tests
"S106", # allow hardcoded passwords in tests
"ARG", # Unused function args allowed in tests
"PLR2004", # Magic value used in comparison
"TCH002", # No import only type checking in tests
"SLF001", # enable private member access in tests
"S105", # allow hardcoded passwords in tests
"S311", # allow standard pseudo-random generators in tests
"PT011", # allow generic exceptions in tests
"N806", # allow uppercase variable names in tests
"PGH003", # allow general ignores in tests
"S106", # allow hardcoded passwords in tests
]

[tool.ruff.lint.isort]
Expand Down
2 changes: 2 additions & 0 deletions src/guidellm/backend/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from .base import Backend, BackendEngine, GenerativeResponse
from .deepsparse.backend import DeepsparseBackend
from .openai import OpenAIBackend

__all__ = [
"Backend",
"BackendEngine",
"GenerativeResponse",
"OpenAIBackend",
"DeepsparseBackend",
]
2 changes: 1 addition & 1 deletion src/guidellm/backend/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
__all__ = ["Backend", "BackendEngine", "GenerativeResponse"]


BackendEngine = Literal["test", "openai_server"]
BackendEngine = Literal["test", "openai_server", "deepsparse"]


class GenerativeResponse(BaseModel):
Expand Down
24 changes: 24 additions & 0 deletions src/guidellm/backend/deepsparse/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""
This package encapsulates the "Deepsparse Backend" implementation.
ref: https://github.com/neuralmagic/deepsparse

The `deepsparse` package supports Python3.6..Python3.11,
when the `guidellm` start from Python3.8.

Safe range of versions is Python3.8..Python3.11
for the Deepsparse Backend implementation.
"""

from guidellm.utils import check_python_version, module_is_available

# Ensure that python is in valid range
check_python_version(min_version="3.8", max_version="3.11")

# Ensure that deepsparse is installed
module_is_available(
module="deepsparse",
helper=(
"`deepsparse` package is not available. "
"Please try `pip install -e '.[deepsparse]'`"
),
)
106 changes: 106 additions & 0 deletions src/guidellm/backend/deepsparse/backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
from typing import Any, AsyncGenerator, Dict, List, Optional

from deepsparse import Pipeline, TextGeneration
from loguru import logger

from guidellm.backend import Backend, GenerativeResponse
from guidellm.config import settings
from guidellm.core import TextGenerationRequest


@Backend.register(backend_type="deepsparse")
class DeepsparseBackend(Backend):
"""
An Deepsparse backend implementation for the generative AI result.
"""

def __init__(self, model: Optional[str] = None, **request_args):
self._request_args: Dict[str, Any] = request_args
self.model: str = self._get_model(model)
self.pipeline: Pipeline = TextGeneration(model=self.model)

def _get_model(self, model_from_cli: Optional[str] = None) -> str:
"""Provides the model by the next priority list:
1. from function argument (comes from CLI)
1. from environment variable
2. `self.default_model` from `self.available_models`
"""

if model_from_cli is not None:
return model_from_cli
elif settings.deepsprase.model is not None:
logger.info(
"Using Deepsparse model from environment variable: "
f"{settings.deepsprase.model}"
)
return settings.deepsprase.model
else:
logger.info(f"Using default Deepsparse model: {self.default_model}")
return self.default_model

async def make_request(
self, request: TextGenerationRequest
) -> AsyncGenerator[GenerativeResponse, None]:
"""
Make a request to the Deepsparse Python API client.

:param request: The result request to submit.
:type request: TextGenerationRequest
:return: An iterator over the generative responses.
:rtype: Iterator[GenerativeResponse]
"""

logger.debug(
f"Making request to Deepsparse backend with prompt: {request.prompt}"
)

token_count = 0
request_args = {
**self._request_args,
"streaming": True,
"max_new_tokens": request.output_token_count,
}

if not (output := self.pipeline(prompt=request.prompt, **request_args)):
yield GenerativeResponse(
type_="final",
prompt=request.prompt,
prompt_token_count=request.prompt_token_count,
output_token_count=token_count,
)
return

for generation in output.generations:
if not (token := generation.text):
yield GenerativeResponse(
type_="final",
prompt=request.prompt,
prompt_token_count=request.prompt_token_count,
output_token_count=token_count,
)
break
else:
token_count += 1
yield GenerativeResponse(
type_="token_iter",
add_token=token,
prompt=request.prompt,
prompt_token_count=request.prompt_token_count,
output_token_count=token_count,
)

def available_models(self) -> List[str]:
"""
Get the available models for the backend.

:return: A list of available models.
:rtype: List[str]
"""

# WARNING: The default model from the documentation is defined here
return ["hf:mgoin/TinyStories-33M-quant-deepsparse"]

def _token_count(self, text: str) -> int:
token_count = len(text.split())
logger.debug(f"Token count for text '{text}': {token_count}")
return token_count
12 changes: 10 additions & 2 deletions src/guidellm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,14 @@ class OpenAISettings(BaseModel):
max_gen_tokens: int = 4096


class DeepsparseSettings(BaseModel):
"""
Deepsparse settings for the Python API library
"""

model: Optional[str] = None


class ReportGenerationSettings(BaseModel):
"""
Report generation settings for the application
Expand All @@ -127,6 +135,7 @@ class Settings(BaseSettings):
```sh
export GUIDELLM__LOGGING__DISABLED=true
export GUIDELLM__OPENAI__API_KEY=******
export GUIDELLM__DEEPSPARSE__MODEL=******
```
"""

Expand All @@ -151,8 +160,7 @@ class Settings(BaseSettings):

# Request settings
openai: OpenAISettings = OpenAISettings()

# Report settings
deepsprase: DeepsparseSettings = DeepsparseSettings()
report_generation: ReportGenerationSettings = ReportGenerationSettings()

@model_validator(mode="after")
Expand Down
14 changes: 8 additions & 6 deletions src/guidellm/executor/profile_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,12 +162,14 @@ def next(self, current_report: TextGenerationBenchmarkReport) -> Optional[Profil
elif self.mode == "sweep":
profile = self.create_sweep_profile(
self.generated_count,
sync_benchmark=current_report.benchmarks[0]
if current_report.benchmarks
else None,
throughput_benchmark=current_report.benchmarks[1]
if len(current_report.benchmarks) > 1
else None,
sync_benchmark=(
current_report.benchmarks[0] if current_report.benchmarks else None
),
throughput_benchmark=(
current_report.benchmarks[1]
if len(current_report.benchmarks) > 1
else None
),
)
else:
err = ValueError(f"Invalid mode: {self.mode}")
Expand Down
2 changes: 1 addition & 1 deletion src/guidellm/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
@click.option("--port", type=str, default=None, help="Port for benchmarking")
@click.option(
"--backend",
type=click.Choice(["test", "openai_server"]),
type=click.Choice(["test", "openai_server", "deepsparse"]),
default="openai_server",
help="Backend type for benchmarking",
)
Expand Down
12 changes: 6 additions & 6 deletions src/guidellm/scheduler/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,16 +229,16 @@ def _get_count_total():
return (
self.max_number
if self.max_number
else round(self.max_duration)
if self.max_duration
else 0
else round(self.max_duration) if self.max_duration else 0
)

def _get_count_completed():
return min(
benchmark.request_count + benchmark.error_count
if self.max_number
else round(time.time() - start_time),
(
benchmark.request_count + benchmark.error_count
if self.max_number
else round(time.time() - start_time)
),
_get_count_total(),
)

Expand Down
5 changes: 5 additions & 0 deletions src/guidellm/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .dependencies import check_python_version, module_is_available
from .injector import create_report, inject_data
from .text import (
clean_text,
Expand All @@ -8,6 +9,7 @@
load_text,
load_text_lines,
parse_text_objects,
random_strings,
split_lines_by_punctuation,
split_text,
)
Expand All @@ -29,10 +31,13 @@
"load_text",
"load_text_lines",
"load_transformers_dataset",
"random_strings",
"parse_text_objects",
"resolve_transformers_dataset",
"resolve_transformers_dataset_column",
"resolve_transformers_dataset_split",
"split_lines_by_punctuation",
"split_text",
"check_python_version",
"module_is_available",
]
Loading