@@ -202,9 +202,9 @@ def build_dataload_pipeline(
202202 self , rank : int = 0 , world_size : int = 1
203203 ) -> DataPipelineBuilder :
204204 if world_size > 1 :
205- assert (
206- self . loading_config . seed is not None
207- ), "for distributed training with `world_size` > 1, `seed` should be set !"
205+ assert self . loading_config . seed is not None , (
206+ "for distributed training with `world_size` > 1, ` seed` should be set !"
207+ )
208208 if self .is_validation :
209209 self .set_validation_params (world_size )
210210
@@ -321,12 +321,12 @@ def create_on_the_fly_columns(
321321 self , pipeline : DataPipelineBuilder
322322 ) -> DataPipelineBuilder :
323323 if self .dataset_config .source_sequences is not None :
324- assert (
325- self .dataset_config .source_column is not None
326- ), f"Expected a source_column - found { self . dataset_config . source_column } "
327- assert (
328- self .dataset_config .source_text_column is not None
329- ), f"Expected a source_text_column - found { self . dataset_config . source_text_column } "
324+ assert self . dataset_config . source_column is not None , (
325+ f"Expected a source_column - found { self .dataset_config .source_column } "
326+ )
327+ assert self . dataset_config . source_text_column is not None , (
328+ f"Expected a source_text_column - found { self .dataset_config .source_text_column } "
329+ )
330330
331331 pipeline = pipeline .map (
332332 partial (
@@ -338,12 +338,12 @@ def create_on_the_fly_columns(
338338 num_parallel_calls = self ._num_parallel_call (self .nb_parallel_fragments ),
339339 )
340340 if self .dataset_config .target_sequences is not None :
341- assert (
342- self .dataset_config .target_column is not None
343- ), f"Expected a target_column, found { self . dataset_config . target_column } "
344- assert (
345- self .dataset_config .target_text_column is not None
346- ), f"Expected a target_text_columns, found { self . dataset_config . target_text_column } "
341+ assert self . dataset_config . target_column is not None , (
342+ f"Expected a target_column, found { self .dataset_config .target_column } "
343+ )
344+ assert self . dataset_config . target_text_column is not None , (
345+ f"Expected a target_text_columns, found { self .dataset_config .target_text_column } "
346+ )
347347
348348 pipeline = pipeline .map (
349349 partial (
@@ -426,9 +426,9 @@ def config_post_init(self) -> None:
426426 )
427427
428428 if self .loading_config .even_sharding :
429- assert (
430- self . loading_config . seed is not None
431- ), "`even_sharding` sharding requires to seed to be set"
429+ assert self . loading_config . seed is not None , (
430+ "`even_sharding` sharding requires to seed to be set"
431+ )
432432
433433 if self .loading_config .max_tokens == 0 :
434434 self .loading_config .max_tokens = None
@@ -876,9 +876,9 @@ def add_min_max_sentence_len_in_doc_filter(
876876 self .loading_config .max_sentence_len_in_doc
877877 or self .loading_config .min_sentence_len_in_doc
878878 ):
879- assert (
880- self .dataset_config .source_text_column is not None
881- ), f"Expexted a source_text_columns, found { self . dataset_config . source_text_column } "
879+ assert self . dataset_config . source_text_column is not None , (
880+ f"Expexted a source_text_columns, found { self .dataset_config .source_text_column } "
881+ )
882882
883883 pipeline = pipeline .map (
884884 partial (
@@ -962,9 +962,9 @@ def add_quality_score_filters(
962962 if source_quality_range is None :
963963 return pipeline
964964
965- assert (
966- self .dataset_config .source_quality_column is not None
967- ), f"Expected a source_quality_columns, found { self . dataset_config . source_quality_column } "
965+ assert self . dataset_config . source_quality_column is not None , (
966+ f"Expected a source_quality_columns, found { self .dataset_config .source_quality_column } "
967+ )
968968
969969 pipeline = pipeline .map (
970970 partial (
0 commit comments