vllm-project
diff --git a/‎.github/workflows/ci.yml
Lines changed: 0 additions & 22 deletions b/‎.github/workflows/ci.yml
Lines changed: 0 additions & 22 deletions
diff --git a/‎.github/workflows/code-quality.yml
Lines changed: 39 additions & 0 deletions b/‎.github/workflows/code-quality.yml
Lines changed: 39 additions & 0 deletions
diff --git a/‎.github/workflows/publish.yml
Lines changed: 40 additions & 0 deletions b/‎.github/workflows/publish.yml
Lines changed: 40 additions & 0 deletions
diff --git a/‎Makefile
Lines changed: 2 additions & 1 deletion b/‎Makefile
Lines changed: 2 additions & 1 deletion
diff --git a/‎pyproject.toml
Lines changed: 5 additions & 5 deletions b/‎pyproject.toml
Lines changed: 5 additions & 5 deletions
diff --git a/‎src/guidellm/backend/openai.py
Lines changed: 2 additions & 2 deletions b/‎src/guidellm/backend/openai.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/guidellm/core/result.py
Lines changed: 17 additions & 11 deletions b/‎src/guidellm/core/result.py
Lines changed: 17 additions & 11 deletions
diff --git a/‎src/guidellm/executor/__init__.py
Lines changed: 2 additions & 2 deletions b/‎src/guidellm/executor/__init__.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/guidellm/executor/executor.py
Lines changed: 25 additions & 13 deletions b/‎src/guidellm/executor/executor.py
Lines changed: 25 additions & 13 deletions
@@ -0,0 +1,39 @@
+name: Code Quality Check
+
+on:
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  code-quality-check:
+    name: Code quality check
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e '.[dev]'
+
+      - name: Run tests
+        run: python -m pytest tests/unit
+
+      - name: Run linter
+        run: python -m ruff check src tests
+
+      - name: Check formatting
+        run: |
+          python -m black --check src tests
+          python -m isort --check src tests
+
+      - name: Check types
+        run: python -m mypy --check-untyped-defs src tests
@@ -0,0 +1,40 @@
+name: Publish Python distribution to PyPI
+
+on:
+  push:
+    tags:
+      - v*
+
+jobs:
+  build:
+    name: Build distribution
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.8"
+
+      - name: Install pypa/build nad pypa/twine
+        run: >-
+          python3 -m pip install build twine --user
+
+      - name: Build a binary wheel
+        run: python3 -m build
+
+  publish-to-pypi:
+    name: Publish Python distribution to PyPI
+    needs:
+      - build
+    if: startsWith(github.ref, 'refs/tags/v')
+    runs-on: ubuntu-latest
+    steps:
+      - name: 🚀📦 Publish to PyPI
+        env:
+          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+
+        run: python -m twine upload dist/*
@@ -8,9 +8,10 @@ install.dev:
 	python -m pip install -e .[dev]
 
 
+
 .PHONY: build
 build:
-	python setup.py sdist bdist_wheel
+	python -m build
 
 .PHONY: style
 style:
 
@@ -1,5 +1,5 @@
 [build-system]
-requires = ["setuptools", "wheel"]
+requires = ["setuptools >= 61.0", "wheel", "build"]
 build-backend = "setuptools.build_meta"
 
 
@@ -46,8 +46,9 @@ dev = [
     "black~=24.4.2",
     "isort~=5.13.2",
     "mypy~=1.10.1",
-    "pytest~=8.2.2",
+    "pytest-cov~=5.0.0",
     "pytest-mock~=3.14.0",
+    "pytest~=8.2.2",
     "ruff~=0.5.2",
     "tox~=4.16.0",
     "types-requests~=2.32.0",
@@ -77,7 +78,7 @@ profile = "black"
 
 
 [tool.mypy]
-files = "src/guidellm"
+files = ["src/guidellm", "tests"]
 python_version = '3.8'
 warn_redundant_casts = true
 warn_unused_ignores = true
@@ -101,8 +102,7 @@ lint.select = ["E", "F", "W"]
 
 
 [tool.pytest.ini_options]
-addopts = '-s -vvv --cache-clear'
-asyncio_mode = 'auto'
+addopts = '-s -vvv --cache-clear --cov-report=term-missing --cov --cov-fail-under=75'
 markers = [
     "smoke: quick tests to check basic functionality",
     "sanity: detailed tests to ensure major functions work correctly",
 
@@ -1,6 +1,6 @@
 import functools
 import os
-from typing import Any, Dict, Iterator, List, Optional
+from typing import Any, Dict, Generator, List, Optional
 
 from loguru import logger
 from openai import OpenAI, Stream
@@ -72,7 +72,7 @@ def __init__(
 
     def make_request(
         self, request: TextGenerationRequest
-    ) -> Iterator[GenerativeResponse]:
+    ) -> Generator[GenerativeResponse, None, None]:
         """
         Make a request to the OpenAI backend.
 
 
@@ -44,17 +44,19 @@ class TextGenerationResult(Serializable):
     output_token_count: int = Field(
         default=0, description="The number of tokens in the output."
     )
-    last_time: float = Field(default=None, description="The last time recorded.")
+    last_time: Optional[float] = Field(
+        default=None, description="The last time recorded."
+    )
     first_token_set: bool = Field(
         default=False, description="Whether the first token time is set."
     )
-    start_time: float = Field(
+    start_time: Optional[float] = Field(
         default=None, description="The start time of the text generation."
     )
-    end_time: float = Field(
+    end_time: Optional[float] = Field(
         default=None, description="The end time of the text generation."
     )
-    first_token_time: float = Field(
+    first_token_time: Optional[float] = Field(
         default=None, description="The time taken to decode the first token."
     )
     decode_times: Distribution = Field(
@@ -86,6 +88,9 @@ def output_token(self, token: str):
         """
         current_counter = time()
 
+        if not self.last_time:
+            raise ValueError("Last time is not specified to get the output token.")
+
         if not self.first_token_set:
             self.first_token_time = current_counter - self.last_time
             self.first_token_set = True
@@ -157,13 +162,12 @@ class TextGenerationError(Serializable):
     request: TextGenerationRequest = Field(
         description="The text generation request that resulted in an error."
     )
-    error: str = Field(
+    message: str = Field(
         description="The error message that occurred during text generation."
     )
 
-    def __init__(self, request: TextGenerationRequest, error: Exception):
-        super().__init__(request=request, error=str(error))
-        logger.error("Text generation error occurred: {}", error)
+    def model_post_init(self, _: Any):
+        logger.error(f"Text generation error occurred: {self.message}")
 
 
 class RequestConcurrencyMeasurement(Serializable):
@@ -185,7 +189,7 @@ class TextGenerationBenchmark(Serializable):
     """
 
     mode: str = Field(description="The generation mode, either 'async' or 'sync'.")
-    rate: float = Field(
+    rate: Optional[float] = Field(
         default=None, description="The requested rate of requests per second."
     )
     results: List[TextGenerationResult] = Field(
@@ -238,6 +242,9 @@ def completed_request_rate(self) -> float:
         if not self.results:
             return 0.0
         else:
+            if not self.results[0].start_time or not self.results[-1].end_time:
+                raise ValueError("Start time and End time are not defined")
+
             return self.request_count / (
                 self.results[-1].end_time - self.results[0].start_time
             )
@@ -264,7 +271,6 @@ def overloaded(self) -> bool:
         # overall this means that a relatively flat or decreasing throughput curve
         # over time in addition to a growing processing queue is a sign of overload
 
-        # TODO
         return False
 
     def request_started(self):
@@ -311,7 +317,7 @@ def request_completed(
                 )
             )
             logger.warning(
-                "Text generation request resulted in error: {}", result.error
+                f"Text generation request resulted in error: {result.message}"
             )
         else:
             self.results.append(result)
 
@@ -1,15 +1,15 @@
 from .executor import Executor
 from .profile_generator import (
     Profile,
-    ProfileGenerationModes,
+    ProfileGenerationMode,
     ProfileGenerator,
     SingleProfileGenerator,
     SweepProfileGenerator,
 )
 
 __all__ = [
     "Executor",
-    "ProfileGenerationModes",
+    "ProfileGenerationMode",
     "Profile",
     "ProfileGenerator",
     "SingleProfileGenerator",
 
@@ -1,39 +1,51 @@
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Optional
 
 from guidellm.backend import Backend
-from guidellm.core import TextGenerationBenchmarkReport
-from guidellm.executor.profile_generator import ProfileGenerationModes, ProfileGenerator
+from guidellm.core import TextGenerationBenchmark, TextGenerationBenchmarkReport
 from guidellm.request import RequestGenerator
-from guidellm.scheduler.scheduler import Scheduler
+from guidellm.scheduler import Scheduler
+
+from .profile_generator import ProfileGenerationMode, ProfileGenerator
 
 __all__ = ["Executor"]
 
 
 class Executor:
+    """
+    The main purpose of the `class Executor` is to dispatch running tasks according
+    to the Profile Generation mode
+    """
+
     def __init__(
         self,
-        request_generator: RequestGenerator,
         backend: Backend,
-        profile_mode: Union[str, ProfileGenerationModes] = "single",
+        request_generator: RequestGenerator,
+        profile_mode: ProfileGenerationMode = ProfileGenerationMode.SINGLE,
         profile_args: Optional[Dict[str, Any]] = None,
         max_requests: Optional[int] = None,
         max_duration: Optional[float] = None,
     ):
         self.request_generator = request_generator
         self.backend = backend
-        self.profile = ProfileGenerator.create_generator(
+        self.profile_generator: ProfileGenerator = ProfileGenerator.create(
             profile_mode, **(profile_args or {})
         )
-        self.max_requests = max_requests
-        self.max_duration = max_duration
+        self.max_requests: Optional[int] = max_requests
+        self.max_duration: Optional[float] = max_duration
+        self._scheduler: Optional[Scheduler] = None
+
+    @property
+    def scheduler(self) -> Scheduler:
+        if self._scheduler is None:
+            raise ValueError("The scheduler is not set. Did you run the execution?")
+        else:
+            return self._scheduler
 
     def run(self) -> TextGenerationBenchmarkReport:
         report = TextGenerationBenchmarkReport()
 
         while True:
-            profile = self.profile.next_profile(report)
-
-            if profile is None:
+            if not (profile := self.profile_generator.next(report)):
                 break
 
             scheduler = Scheduler(
@@ -45,7 +57,7 @@ def run(self) -> TextGenerationBenchmarkReport:
                 max_duration=self.max_duration,
             )
 
-            benchmark = scheduler.run()
+            benchmark: TextGenerationBenchmark = scheduler.run()
             report.add_benchmark(benchmark)
 
         return report