diff --git a/pyproject.toml b/pyproject.toml index db2e65a7..6ab2c6e9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -104,6 +104,10 @@ exclude = ["venv", ".tox"] # Check: https://mypy.readthedocs.io/en/latest/config_file.html#import-discovery follow_imports = 'silent' +[[tool.mypy.overrides]] +module = ["datasets.*"] +ignore_missing_imports=true + [tool.ruff] line-length = 88 @@ -122,6 +126,8 @@ ignore = [ "ISC001", "TCH002", "PLW1514", # allow Path.open without encoding + "RET505", # allow `else` blocks + "RET506" # allow `else` blocks ] select = [ diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py index 8a49d5ef..8c83f914 100644 --- a/src/guidellm/backend/openai.py +++ b/src/guidellm/backend/openai.py @@ -111,6 +111,7 @@ async def make_request( stream=True, **request_args, ) + token_count = 0 async for chunk in stream: choice = chunk.choices[0] diff --git a/src/guidellm/executor/profile_generator.py b/src/guidellm/executor/profile_generator.py index 703ea05d..757646cf 100644 --- a/src/guidellm/executor/profile_generator.py +++ b/src/guidellm/executor/profile_generator.py @@ -1,7 +1,8 @@ -from typing import Any, Dict, Literal, Optional, Sequence, Union, get_args +from typing import Any, Dict, List, Literal, Optional, Sequence, Union, get_args import numpy as np from loguru import logger +from numpy._typing import NDArray from pydantic import Field from guidellm.config import settings @@ -190,12 +191,14 @@ def next(self, current_report: TextGenerationBenchmarkReport) -> Optional[Profil elif self.mode == "sweep": profile = self.create_sweep_profile( self.generated_count, - sync_benchmark=current_report.benchmarks[0] - if current_report.benchmarks - else None, - throughput_benchmark=current_report.benchmarks[1] - if len(current_report.benchmarks) > 1 - else None, + sync_benchmark=( + current_report.benchmarks[0] if current_report.benchmarks else None + ), + throughput_benchmark=( + current_report.benchmarks[1] + if len(current_report.benchmarks) > 1 + else None + ), ) else: err = ValueError(f"Invalid mode: {self.mode}") @@ -333,11 +336,15 @@ def create_sweep_profile( min_rate = sync_benchmark.completed_request_rate max_rate = throughput_benchmark.completed_request_rate - intermediate_rates = list( + intermediate_rates: List[NDArray] = list( np.linspace(min_rate, max_rate, settings.num_sweep_profiles + 1) )[1:] return Profile( load_gen_mode="constant", - load_gen_rate=intermediate_rates[index - 2], + load_gen_rate=( + float(load_gen_rate) + if (load_gen_rate := intermediate_rates[index - 2]) + else 1.0 # the fallback value + ), ) diff --git a/src/guidellm/main.py b/src/guidellm/main.py index c0bea832..4016ecec 100644 --- a/src/guidellm/main.py +++ b/src/guidellm/main.py @@ -1,5 +1,5 @@ import asyncio -from typing import Literal, Optional, get_args +from typing import Literal, Optional, Union, get_args import click from loguru import logger @@ -13,7 +13,7 @@ TransformersDatasetRequestGenerator, ) from guidellm.request.base import RequestGenerator -from guidellm.utils import BenchmarkReportProgress +from guidellm.utils import BenchmarkReportProgress, cli_params __all__ = ["generate_benchmark_report"] @@ -120,7 +120,7 @@ ) @click.option( "--max-requests", - type=int, + type=cli_params.MAX_REQUESTS, default=None, help=( "The maximum number of requests for each benchmark run. " @@ -161,7 +161,7 @@ def generate_benchmark_report_cli( rate_type: ProfileGenerationMode, rate: Optional[float], max_seconds: Optional[int], - max_requests: Optional[int], + max_requests: Union[Literal["dataset"], int, None], output_path: str, enable_continuous_refresh: bool, ): @@ -194,7 +194,7 @@ def generate_benchmark_report( rate_type: ProfileGenerationMode, rate: Optional[float], max_seconds: Optional[int], - max_requests: Optional[int], + max_requests: Union[Literal["dataset"], int, None], output_path: str, cont_refresh_table: bool, ) -> GuidanceReport: @@ -256,13 +256,18 @@ def generate_benchmark_report( else: raise ValueError(f"Unknown data type: {data_type}") + if data_type == "emulated" and max_requests == "dataset": + raise ValueError("Cannot use 'dataset' for emulated data") + # Create executor executor = Executor( backend=backend_inst, request_generator=request_generator, mode=rate_type, rate=rate if rate_type in ("constant", "poisson") else None, - max_number=max_requests, + max_number=( + len(request_generator) if max_requests == "dataset" else max_requests + ), max_duration=max_seconds, ) diff --git a/src/guidellm/request/base.py b/src/guidellm/request/base.py index 242bf894..9fd303e6 100644 --- a/src/guidellm/request/base.py +++ b/src/guidellm/request/base.py @@ -105,6 +105,21 @@ def __iter__(self) -> Iterator[TextGenerationRequest]: while not self._stop_event.is_set(): yield self.create_item() + @abstractmethod + def __len__(self) -> int: + """ + Abstract method to get the length of the collection to be generated. + """ + + @abstractmethod + def create_item(self) -> TextGenerationRequest: + """ + Abstract method to create a new result request item. + + :return: A new result request. + :rtype: TextGenerationRequest + """ + @property def type_(self) -> str: """ @@ -155,15 +170,6 @@ def async_queue_size(self) -> int: """ return self._async_queue_size - @abstractmethod - def create_item(self) -> TextGenerationRequest: - """ - Abstract method to create a new result request item. - - :return: A new result request. - :rtype: TextGenerationRequest - """ - def stop(self): """ Stop the background task that populates the queue. diff --git a/src/guidellm/request/emulated.py b/src/guidellm/request/emulated.py index d3c4bd92..7d481cb7 100644 --- a/src/guidellm/request/emulated.py +++ b/src/guidellm/request/emulated.py @@ -339,6 +339,12 @@ def __init__( async_queue_size=async_queue_size, ) + def __len__(self) -> int: + raise NotImplementedError( + "Can't get the length of the emulated dataset. " + "Check the `--data-type` CLI parameter." + ) + def create_item(self) -> TextGenerationRequest: """ Create a new text generation request item from the data. diff --git a/src/guidellm/request/file.py b/src/guidellm/request/file.py index e0f3471a..b187f7b4 100644 --- a/src/guidellm/request/file.py +++ b/src/guidellm/request/file.py @@ -54,6 +54,13 @@ def __init__( async_queue_size=async_queue_size, ) + def __len__(self) -> int: + """ + Return the number of text lines. + """ + + return len(self._data) + def create_item(self) -> TextGenerationRequest: """ Create a new result request item from the data. diff --git a/src/guidellm/request/transformers.py b/src/guidellm/request/transformers.py index eaab8629..3fd24040 100644 --- a/src/guidellm/request/transformers.py +++ b/src/guidellm/request/transformers.py @@ -1,12 +1,7 @@ from pathlib import Path from typing import Optional, Union -from datasets import ( # type: ignore # noqa: PGH003 - Dataset, - DatasetDict, - IterableDataset, - IterableDatasetDict, -) +from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict from loguru import logger from transformers import PreTrainedTokenizer # type: ignore # noqa: PGH003 @@ -57,7 +52,9 @@ def __init__( self._column = column self._kwargs = kwargs - self._hf_dataset = load_transformers_dataset(dataset, split=split, **kwargs) + self._hf_dataset: Union[Dataset, IterableDataset] = load_transformers_dataset( + dataset, split=split, **kwargs + ) self._hf_column = resolve_transformers_dataset_column( self._hf_dataset, column=column ) @@ -73,6 +70,12 @@ def __init__( async_queue_size=async_queue_size, ) + def __len__(self) -> int: + if not isinstance(self._hf_dataset, Dataset): + raise ValueError("Can't get dataset size for IterableDataset object") + else: + return len(self._hf_dataset) + def create_item(self) -> TextGenerationRequest: """ Create a new result request item from the dataset. diff --git a/src/guidellm/utils/cli_params.py b/src/guidellm/utils/cli_params.py new file mode 100644 index 00000000..4e8800d2 --- /dev/null +++ b/src/guidellm/utils/cli_params.py @@ -0,0 +1,34 @@ +""" +This module includes custom CLI parameters for the `click` package. +""" + +from typing import Any, Optional + +from click import Context, Parameter, ParamType + +__all__ = ["MAX_REQUESTS"] + + +class MaxRequestsType(ParamType): + """ + Catch the `dataset` string parameter to determine the behavior of the Scheduler. + """ + + name = "max_requests" + + def convert( + self, value: Any, param: Optional[Parameter], ctx: Optional[Context] + ) -> Any: + if isinstance(value, int): + return value + + try: + return int(value) + except ValueError: + if value == "dataset": + return value + else: + self.fail(f"{value} is not a valid integer or 'dataset'", param, ctx) + + +MAX_REQUESTS = MaxRequestsType() diff --git a/src/guidellm/utils/progress.py b/src/guidellm/utils/progress.py index 5c7a8457..a1e1e798 100644 --- a/src/guidellm/utils/progress.py +++ b/src/guidellm/utils/progress.py @@ -139,6 +139,7 @@ def update_benchmark( :type req_per_sec: float :raises ValueError: If trying to update a completed benchmark. """ + if self.benchmark_tasks_completed[index]: err = ValueError(f"Benchmark {index} already completed") logger.error("Error updating benchmark: {}", err) @@ -162,9 +163,11 @@ def update_benchmark( total=completed_total, completed=completed_count if not completed else completed_total, req_per_sec=(f"{req_per_sec:.2f}" if req_per_sec else "#.##"), - start_time_str=datetime.fromtimestamp(start_time).strftime("%H:%M:%S") - if start_time - else "--:--:--", + start_time_str=( + datetime.fromtimestamp(start_time).strftime("%H:%M:%S") + if start_time + else "--:--:--" + ), ) logger.debug( "Updated benchmark task at index {}: {}% complete", diff --git a/src/guidellm/utils/text.py b/src/guidellm/utils/text.py index 13a0dffb..f8c5038c 100644 --- a/src/guidellm/utils/text.py +++ b/src/guidellm/utils/text.py @@ -428,7 +428,7 @@ def load_text_lines( format_ = "txt" # load the data if it's a path or URL - if isinstance(data, Path) or (isinstance(data, str) and data.startswith("http")): + if isinstance(data, (Path, str)): data = load_text(data, encoding=encoding) data = clean_text(data) diff --git a/tests/dummy/services/requests.py b/tests/dummy/services/requests.py index c5023189..e7e29402 100644 --- a/tests/dummy/services/requests.py +++ b/tests/dummy/services/requests.py @@ -26,3 +26,6 @@ def __init__( def create_item(self) -> TextGenerationRequest: return TextGenerationRequest(prompt="Test prompt") + + def __len__(self) -> int: + raise NotImplementedError diff --git a/tests/unit/cli/__init__.py b/tests/unit/cli/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/cli/test_custom_type_params.py b/tests/unit/cli/test_custom_type_params.py new file mode 100644 index 00000000..1e66311d --- /dev/null +++ b/tests/unit/cli/test_custom_type_params.py @@ -0,0 +1,38 @@ +import pytest +from click import BadParameter + +from guidellm.utils import cli_params + + +@pytest.fixture() +def max_requests_param_type(): + return cli_params.MaxRequestsType() + + +def test_valid_integer_input(max_requests_param_type): + assert max_requests_param_type.convert(10, None, None) == 10 + assert max_requests_param_type.convert("42", None, None) == 42 + + +def test_valid_dataset_input(max_requests_param_type): + assert max_requests_param_type.convert("dataset", None, None) == "dataset" + + +def test_invalid_string_input(max_requests_param_type): + with pytest.raises(BadParameter): + max_requests_param_type.convert("invalid", None, None) + + +def test_invalid_float_input(max_requests_param_type): + with pytest.raises(BadParameter): + max_requests_param_type.convert("10.5", None, None) + + +def test_invalid_non_numeric_string_input(max_requests_param_type): + with pytest.raises(BadParameter): + max_requests_param_type.convert("abc", None, None) + + +def test_invalid_mixed_string_input(max_requests_param_type): + with pytest.raises(BadParameter): + max_requests_param_type.convert("123abc", None, None) diff --git a/tests/unit/request/test_file.py b/tests/unit/request/test_file.py index 429de523..69e538a1 100644 --- a/tests/unit/request/test_file.py +++ b/tests/unit/request/test_file.py @@ -98,3 +98,64 @@ def test_file_request_generator_file_types_lifecycle( if index == 2: break + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("file_extension", "file_content"), + [ + ("txt", "Test content 1.\nTest content 2.\nTest content 3.\n"), + ( + "csv", + "text,label,extra\n" + "Test content 1.,1,extra 1\n" + "Test content 2.,2,extra 2\n" + "Test content 3.,3,extra 3\n", + ), + ( + "jsonl", + '{"text": "Test content 1."}\n' + '{"text": "Test content 2."}\n' + '{"text": "Test content 3."}\n', + ), + ( + "csv", + "prompt,text,extra\n" + "Test content 1., text 1, extra 1\n" + "Test content 2., text 2, extra 2\n" + "Test content 3., text 3, extra 3\n", + ), + ( + "json", + '[{"text": "Test content 1."}, ' + '{"text": "Test content 2."}, ' + '{"text": "Test content 3."}]\n', + ), + ( + "json", + '{"object_1": {"text": "Test content 1."}, ' + '"object_2": {"text": "Test content 2."}, ' + '"object_3": {"text": "Test content 3."}}\n', + ), + ( + "yaml", + "items:\n" + " - text: Test content 1.\n" + " - text: Test content 2.\n" + " - text: Test content 3.\n", + ), + ( + "yaml", + "object_1:\n text: Test content 1.\n" + "object_2:\n text: Test content 2.\n" + "object_3:\n text: Test content 3.\n", + ), + ], +) +def test_file_request_generator_len(mock_auto_tokenizer, file_extension, file_content): + with tempfile.TemporaryDirectory() as temp_dir: + file_path = Path(temp_dir) / f"example.{file_extension}" + file_path.write_text(file_content) + generator = FileRequestGenerator(file_path, tokenizer="mock-tokenizer") + + assert len(generator) == 3 diff --git a/tests/unit/request/test_transformers.py b/tests/unit/request/test_transformers.py index 415fed65..eaa465e6 100644 --- a/tests/unit/request/test_transformers.py +++ b/tests/unit/request/test_transformers.py @@ -95,3 +95,38 @@ def test_transformers_dataset_request_generator_lifecycle( if index == 2: break + + +@pytest.mark.smoke() +@pytest.mark.parametrize( + ("dataset_arg", "dataset"), + [ + ( + "mock/directory/file.csv", + create_sample_dataset_dict(splits=["train"]), + ), + ( + "mock/directory/file.json", + create_sample_dataset(column="prompt"), + ), + ( + "mock/directory/file.py", + create_sample_dataset_dict(splits=["test"], column="output"), + ), + (create_sample_dataset_dict(splits=["val", "train"], column="custom"), None), + (create_sample_dataset(), None) + ], +) +def test_transformers_dataset_request_generator_len( + mock_auto_tokenizer, dataset_arg, dataset +): + with patch( + "guidellm.utils.transformers.load_dataset", + return_value=dataset, + ): + generator = TransformersDatasetRequestGenerator( + dataset=dataset_arg, tokenizer="mock-tokenizer", mode="sync" + ) + + # Check if __len__ returns the correct length + assert len(generator) == 3 diff --git a/tests/unit/test_main.py b/tests/unit/test_main.py index 6923092e..82de3edf 100644 --- a/tests/unit/test_main.py +++ b/tests/unit/test_main.py @@ -1,3 +1,5 @@ +import tempfile +from pathlib import Path from typing import List, Optional from unittest.mock import create_autospec, patch @@ -15,6 +17,42 @@ TransformersDatasetRequestGenerator, ) from guidellm.scheduler import SchedulerResult +from guidellm.utils.progress import BenchmarkReportProgress + + +@pytest.fixture() +def mock_benchmark_report(): + with patch("guidellm.main.GuidanceReport") as mock_benchmark_report: + + def _mock_const(*args, **kwargs): + instance = create_autospec(BenchmarkReportProgress, instance=True) + instance.args = args + instance.kwargs = kwargs + instance.benchmarks = [] + instance.save_file = lambda output_path: None + instance.print = lambda *args, **kwargs: None + + return instance + + mock_benchmark_report.side_effect = _mock_const + yield mock_benchmark_report + + +@pytest.fixture() +def mock_benchmark_report_progress(): + with patch( + "guidellm.main.BenchmarkReportProgress" + ) as mock_benchmark_report_progress: + + def _mock_const(*args, **kwargs): + instance = create_autospec(BenchmarkReportProgress, instance=True) + instance.args = args + instance.kwargs = kwargs + + return instance + + mock_benchmark_report_progress.side_effect = _mock_const + yield mock_benchmark_report_progress @pytest.fixture() @@ -252,3 +290,125 @@ def test_generate_benchmark_report_cli_smoke( assert "Benchmarks" in result.output assert "Generating report..." in result.output assert "Benchmark Report 1" in result.output + + +@pytest.mark.smoke() +def test_generate_benchmark_report_emulated_with_dataset_requests( + mock_backend, mock_request_generator_emulated, mock_executor +): + with pytest.raises(ValueError, match="Cannot use 'dataset' for emulated data"): + generate_benchmark_report( + target="http://localhost:8000/v1", + backend="openai_server", + model=None, + data_type="emulated", + data=None, + tokenizer=None, + rate_type="sweep", + rate=None, + max_seconds=10, + max_requests="dataset", + output_path="benchmark_report.json", + cont_refresh_table=False, + ) + + +@pytest.mark.smoke() +def test_generate_benchmark_report_cli_emulated_with_dataset_requests( + mock_backend, mock_request_generator_emulated, mock_executor +): + runner = CliRunner() + with pytest.raises(ValueError, match="Cannot use 'dataset' for emulated data"): + runner.invoke( + generate_benchmark_report_cli, + [ + "--target", + "http://localhost:8000/v1", + "--backend", + "openai_server", + "--data-type", + "emulated", + "--data", + "prompt_tokens=512", + "--rate-type", + "sweep", + "--max-seconds", + "10", + "--max-requests", + "dataset", + "--output-path", + "benchmark_report.json", + ], + catch_exceptions=False, + ) + + +@pytest.mark.sanity() +@pytest.mark.parametrize(("rate_type", "rate"), [("constant", 1.0), ("sweep", 1.0)]) +@pytest.mark.parametrize( + ("file_extension", "file_content", "expected_results"), + [ + ("txt", "Test prompt 1", 1), + ("txt", "Test prompt 1\nTest prompt 2\nTest prompt 3\n", 3), + ], +) +def test_generate_benchmark_report_openai_limited_by_file_dataset( + mocker, + mock_auto_tokenizer, + mock_benchmark_report, + mock_benchmark_report_progress, + rate_type, + rate, + file_extension, + file_content, + expected_results, +): + """ + Mock only a few functions to get the proper report result + from the ``Backend.make_request``. + + Notes: + All the results are collected in the `benchmark.errors``, + since the most of the responses are mocked and can't be processed. + But the ordering of the results is still the same for both collections. + + ``mock_benchmark_report`` and ``mock_benchmark_report_progress`` + are used for preventing working with IO bound tasks. + """ + + mocker.patch("guidellm.backend.openai.AsyncOpenAI") + mocker.patch("openai.AsyncOpenAI") + mocker.patch("guidellm.backend.openai.OpenAIBackend.test_connection") + mocker.patch("guidellm.backend.openai.OpenAIBackend.available_models") + + with tempfile.TemporaryDirectory() as temp_dir: + file_path = Path(temp_dir) / f"example.{file_extension}" + file_path.write_text(file_content) + + # Run the benchmark report generation + report = generate_benchmark_report( + target="http://localhost:8000/v1", + backend="openai_server", + model=None, + data=str(file_path), + data_type="file", + tokenizer=None, + rate_type=rate_type, + rate=rate, + max_seconds=None, + max_requests="dataset", + output_path="benchmark_report.json", + cont_refresh_table=False, + ) + + assert report is not None + assert len(report.benchmarks) == 1 + assert len(report.benchmarks[0].benchmarks[0].errors) == expected_results + + file_lines: List[str] = [line for line in file_content.split("\n") if line] + output_prompts = [ + text_generation.request.prompt + for text_generation in report.benchmarks[0].benchmarks[0].errors + ] + + assert output_prompts == file_lines diff --git a/tests/unit/test_type.py b/tests/unit/test_type.py new file mode 100644 index 00000000..e69de29b