99import yaml
1010from datasets import Features , IterableDataset , Value
1111from faker import Faker
12- from pydantic import ConfigDict , Field , model_validator
12+ from pydantic import ConfigDict , Field , ValidationError , model_validator
1313from transformers import PreTrainedTokenizerBase
1414
1515from guidellm .data .deserializers .deserializer import (
@@ -242,6 +242,10 @@ def __call__(
242242 if (config := self ._load_config_str (data )) is not None :
243243 return self (config , processor_factory , random_seed , ** data_kwargs )
244244
245+ # Try to parse dict-like data directly
246+ if (config := self ._load_config_dict (data )) is not None :
247+ return self (config , processor_factory , random_seed , ** data_kwargs )
248+
245249 if not isinstance (data , SyntheticTextDatasetConfig ):
246250 raise DataNotSupportedError (
247251 "Unsupported data for SyntheticTextDatasetDeserializer, "
@@ -266,6 +270,15 @@ def __call__(
266270 ),
267271 )
268272
273+ def _load_config_dict (self , data : Any ) -> SyntheticTextDatasetConfig | None :
274+ if not isinstance (data , dict | list ):
275+ return None
276+
277+ try :
278+ return SyntheticTextDatasetConfig .model_validate (data )
279+ except ValidationError :
280+ return None
281+
269282 def _load_config_file (self , data : Any ) -> SyntheticTextDatasetConfig | None :
270283 if (not isinstance (data , str ) and not isinstance (data , Path )) or (
271284 not Path (data ).is_file ()
0 commit comments