Skip to content

Commit 2309d50

Browse files
committed
Attempt to parse sythetic config with model validate
Signed-off-by: Samuel Monson <[email protected]>
1 parent 65becf0 commit 2309d50

File tree

1 file changed

+14
-1
lines changed

1 file changed

+14
-1
lines changed

src/guidellm/data/deserializers/synthetic.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import yaml
1010
from datasets import Features, IterableDataset, Value
1111
from faker import Faker
12-
from pydantic import ConfigDict, Field, model_validator
12+
from pydantic import ConfigDict, Field, ValidationError, model_validator
1313
from transformers import PreTrainedTokenizerBase
1414

1515
from guidellm.data.deserializers.deserializer import (
@@ -242,6 +242,10 @@ def __call__(
242242
if (config := self._load_config_str(data)) is not None:
243243
return self(config, processor_factory, random_seed, **data_kwargs)
244244

245+
# Try to parse dict-like data directly
246+
if (config := self._load_config_dict(data)) is not None:
247+
return self(config, processor_factory, random_seed, **data_kwargs)
248+
245249
if not isinstance(data, SyntheticTextDatasetConfig):
246250
raise DataNotSupportedError(
247251
"Unsupported data for SyntheticTextDatasetDeserializer, "
@@ -266,6 +270,15 @@ def __call__(
266270
),
267271
)
268272

273+
def _load_config_dict(self, data: Any) -> SyntheticTextDatasetConfig | None:
274+
if not isinstance(data, dict | list):
275+
return None
276+
277+
try:
278+
return SyntheticTextDatasetConfig.model_validate(data)
279+
except ValidationError:
280+
return None
281+
269282
def _load_config_file(self, data: Any) -> SyntheticTextDatasetConfig | None:
270283
if (not isinstance(data, str) and not isinstance(data, Path)) or (
271284
not Path(data).is_file()

0 commit comments

Comments
 (0)