diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py index b666d199..b4407aa0 100644 --- a/src/guidellm/backend/openai.py +++ b/src/guidellm/backend/openai.py @@ -202,6 +202,13 @@ async def text_completions( # type: ignore[override] and a ResponseSummary for the final response. """ logger.debug("{} invocation with args: {}", self.__class__.__name__, locals()) + + if isinstance(prompt, list): + raise ValueError( + "List prompts (batching) is currently not supported for " + f"text_completions OpenAI pathways. Received: {prompt}" + ) + headers = self._headers() payload = self._completions_payload( orig_kwargs=kwargs, diff --git a/src/guidellm/config.py b/src/guidellm/config.py index cfd0dcd5..cd7cb7ae 100644 --- a/src/guidellm/config.py +++ b/src/guidellm/config.py @@ -134,6 +134,9 @@ class Settings(BaseSettings): Literal["request", "response", "local"] ] = "response" preferred_backend: Literal["openai"] = "openai" + preferred_route: Literal["text_completions", "chat_completions"] = ( + "text_completions" + ) openai: OpenAISettings = OpenAISettings() # Output settings diff --git a/src/guidellm/dataset/file.py b/src/guidellm/dataset/file.py index f8847e44..5d6df1d9 100644 --- a/src/guidellm/dataset/file.py +++ b/src/guidellm/dataset/file.py @@ -71,19 +71,21 @@ def load_dataset( dataset = Dataset.from_dict({"text": items}, **(data_args or {})) elif path.suffix.lower() == ".csv": - dataset = load_dataset("csv", data_files=path, **(data_args or {})) + dataset = load_dataset("csv", data_files=str(path), **(data_args or {})) elif path.suffix.lower() in {".json", ".jsonl"}: - dataset = load_dataset("json", data_files=path, **(data_args or {})) + dataset = load_dataset("json", data_files=str(path), **(data_args or {})) elif path.suffix.lower() == ".parquet": - dataset = load_dataset("parquet", data_files=path, **(data_args or {})) + dataset = load_dataset("parquet", data_files=str(path), **(data_args or {})) elif path.suffix.lower() == ".arrow": - dataset = load_dataset("arrow", data_files=path, **(data_args or {})) + dataset = load_dataset("arrow", data_files=str(path), **(data_args or {})) elif path.suffix.lower() == ".hdf5": - dataset = Dataset.from_pandas(pd.read_hdf(path), **(data_args or {})) + dataset = Dataset.from_pandas(pd.read_hdf(str(path)), **(data_args or {})) elif path.suffix.lower() == ".db": - dataset = Dataset.from_sql(con=path, **(data_args or {})) + dataset = Dataset.from_sql(con=str(path), **(data_args or {})) elif path.suffix.lower() == ".tar": - dataset = load_dataset("webdataset", data_files=path, **(data_args or {})) + dataset = load_dataset( + "webdataset", data_files=str(path), **(data_args or {}) + ) else: raise ValueError(f"Unsupported file type: {path.suffix} given for {path}. ") diff --git a/src/guidellm/request/loader.py b/src/guidellm/request/loader.py index 2204f26c..7c122c5d 100644 --- a/src/guidellm/request/loader.py +++ b/src/guidellm/request/loader.py @@ -11,6 +11,7 @@ from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict from transformers import PreTrainedTokenizerBase # type: ignore[import] +from guidellm.config import settings from guidellm.dataset import ColumnInputTypes, load_dataset from guidellm.objects import StandardBaseModel from guidellm.request.request import GenerationRequest @@ -61,6 +62,8 @@ class GenerativeRequestLoader(RequestLoader): "content", "conversation", "conversations", + "turn", + "turns", "text", ] @@ -270,7 +273,7 @@ def _create_request(self, item: dict[str, Any]) -> GenerationRequest: ) return GenerationRequest( - request_type="text_completions", + request_type=settings.preferred_route, content=item[self.column_mappings["prompt_column"]], stats=( {"prompt_tokens": prompt_tokens} if prompt_tokens is not None else {}