File tree Expand file tree Collapse file tree 2 files changed +10
-2
lines changed
tests/artifacts/predefined_data_configs Expand file tree Collapse file tree 2 files changed +10
-2
lines changed Original file line number Diff line number Diff line change 1- dataprocessor :
2- type : default
31datasets :
42 - name : pretokenized_dataset
53 data_paths :
Original file line number Diff line number Diff line change @@ -188,11 +188,21 @@ def load_and_validate_data_config(data_config_file: str) -> DataConfig:
188188 assert isinstance (
189189 raw_data ["datasets" ], list
190190 ), "datasets should be provided as a list"
191+
191192 datasets = []
193+ dataprocessor = None
194+
192195 for d in raw_data ["datasets" ]:
193196 datasets .append (_validate_dataset_config (d ))
194197 if "dataprocessor" in raw_data :
195198 dataprocessor = _validate_dataprocessor_config (raw_data ["dataprocessor" ])
196199
200+ if dataprocessor is None :
201+ logging .info (
202+ "`dataprocessor` filed is absent from data config. Using default dataprocessor"
203+ )
204+ dataprocessor = DataPreProcessorConfig ()
205+ logging .info ("Default datapreprocessor is %s" , str (dataprocessor ))
206+
197207 data_config = DataConfig (dataprocessor = dataprocessor , datasets = datasets )
198208 return data_config
You can’t perform that action at this time.
0 commit comments