facebookresearch
diff --git a/‎lcm/datasets/base.py‎
Lines changed: 3 additions & 3 deletions b/‎lcm/datasets/base.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎lcm/datasets/batch.py‎
Lines changed: 6 additions & 6 deletions b/‎lcm/datasets/batch.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎lcm/datasets/dataloader.py‎
Lines changed: 3 additions & 3 deletions b/‎lcm/datasets/dataloader.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎lcm/datasets/dataloading.py‎
Lines changed: 24 additions & 24 deletions b/‎lcm/datasets/dataloading.py‎
Lines changed: 24 additions & 24 deletions
diff --git a/‎lcm/datasets/parquet_utils.py‎
Lines changed: 6 additions & 6 deletions b/‎lcm/datasets/parquet_utils.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎lcm/evaluation/arun.py‎
Lines changed: 9 additions & 9 deletions b/‎lcm/evaluation/arun.py‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎lcm/evaluation/cli/configs.py‎
Lines changed: 3 additions & 3 deletions b/‎lcm/evaluation/cli/configs.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎lcm/evaluation/metrics/__init__.py‎
Lines changed: 3 additions & 3 deletions b/‎lcm/evaluation/metrics/__init__.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎lcm/evaluation/metrics/multilingual_similarity.py‎
Lines changed: 3 additions & 3 deletions b/‎lcm/evaluation/metrics/multilingual_similarity.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎lcm/evaluation/metrics/sentence_fluency.py‎
Lines changed: 6 additions & 6 deletions b/‎lcm/evaluation/metrics/sentence_fluency.py‎
Lines changed: 6 additions & 6 deletions
@@ -53,9 +53,9 @@ def pipeline(self) -> DataPipeline:
             self._pipeline = self.builder_func(
                 self.datasets, self.data_config, gang_rank, world_size
             )
-        assert (
-            self._pipeline
-        ), f"Cannot build data pipeline from config {self.data_config}"
+        assert self._pipeline, (
+            f"Cannot build data pipeline from config {self.data_config}"
+        )
         return self._pipeline
 
     def destroy(self) -> None:
 
@@ -249,9 +249,9 @@ def __post_init__(self):
 
         length = len(self.source)
 
-        assert (
-            (self.target is None) or (len(self.target) == length)
-        ), f"all elements in LCMInput should be of the same length, got {len(self.target)} and {length}"
+        assert (self.target is None) or (len(self.target) == length), (
+            f"all elements in LCMInput should be of the same length, got {len(self.target)} and {length}"
+        )
 
     def __len__(self) -> int:
         return len(self.source)
@@ -296,9 +296,9 @@ def prepare_input(
             )
 
         elif style == LCMStyle.SUPERVISED:
-            assert (
-                self.target is not None
-            ), "Missing target embeddings for a supervised batch"
+            assert self.target is not None, (
+                "Missing target embeddings for a supervised batch"
+            )
             return get_embeddings_sequence(
                 src_seqs=self.source,
                 tgt_seqs=self.target,
 
@@ -195,9 +195,9 @@ def _tokenize_batch(self, batch: Dict[str, Any]) -> LCMInput:
             else:
                 embs = None
             outputs[key] = embs
-        assert (
-            outputs["source"] is not None
-        ), "LCMDataLoader requires `source` sequences to be present in batches"
+        assert outputs["source"] is not None, (
+            "LCMDataLoader requires `source` sequences to be present in batches"
+        )
         return LCMInput(**outputs)
 
     def iterate_batches(self) -> Iterator[LCMInput]:
 
@@ -202,9 +202,9 @@ def build_dataload_pipeline(
         self, rank: int = 0, world_size: int = 1
     ) -> DataPipelineBuilder:
         if world_size > 1:
-            assert (
-                self.loading_config.seed is not None
-            ), "for distributed training with `world_size` > 1,  `seed` should be set !"
+            assert self.loading_config.seed is not None, (
+                "for distributed training with `world_size` > 1,  `seed` should be set !"
+            )
         if self.is_validation:
             self.set_validation_params(world_size)
 
@@ -321,12 +321,12 @@ def create_on_the_fly_columns(
         self, pipeline: DataPipelineBuilder
     ) -> DataPipelineBuilder:
         if self.dataset_config.source_sequences is not None:
-            assert (
-                self.dataset_config.source_column is not None
-            ), f"Expected a source_column - found {self.dataset_config.source_column}"
-            assert (
-                self.dataset_config.source_text_column is not None
-            ), f"Expected a source_text_column - found {self.dataset_config.source_text_column}"
+            assert self.dataset_config.source_column is not None, (
+                f"Expected a source_column - found {self.dataset_config.source_column}"
+            )
+            assert self.dataset_config.source_text_column is not None, (
+                f"Expected a source_text_column - found {self.dataset_config.source_text_column}"
+            )
 
             pipeline = pipeline.map(
                 partial(
@@ -338,12 +338,12 @@ def create_on_the_fly_columns(
                 num_parallel_calls=self._num_parallel_call(self.nb_parallel_fragments),
             )
         if self.dataset_config.target_sequences is not None:
-            assert (
-                self.dataset_config.target_column is not None
-            ), f"Expected a target_column, found {self.dataset_config.target_column}"
-            assert (
-                self.dataset_config.target_text_column is not None
-            ), f"Expected a target_text_columns, found {self.dataset_config.target_text_column}"
+            assert self.dataset_config.target_column is not None, (
+                f"Expected a target_column, found {self.dataset_config.target_column}"
+            )
+            assert self.dataset_config.target_text_column is not None, (
+                f"Expected a target_text_columns, found {self.dataset_config.target_text_column}"
+            )
 
             pipeline = pipeline.map(
                 partial(
@@ -426,9 +426,9 @@ def config_post_init(self) -> None:
                 )
 
         if self.loading_config.even_sharding:
-            assert (
-                self.loading_config.seed is not None
-            ), "`even_sharding` sharding requires to seed to be set"
+            assert self.loading_config.seed is not None, (
+                "`even_sharding` sharding requires to seed to be set"
+            )
 
         if self.loading_config.max_tokens == 0:
             self.loading_config.max_tokens = None
@@ -876,9 +876,9 @@ def add_min_max_sentence_len_in_doc_filter(
             self.loading_config.max_sentence_len_in_doc
             or self.loading_config.min_sentence_len_in_doc
         ):
-            assert (
-                self.dataset_config.source_text_column is not None
-            ), f"Expexted a source_text_columns, found {self.dataset_config.source_text_column}"
+            assert self.dataset_config.source_text_column is not None, (
+                f"Expexted a source_text_columns, found {self.dataset_config.source_text_column}"
+            )
 
             pipeline = pipeline.map(
                 partial(
@@ -962,9 +962,9 @@ def add_quality_score_filters(
         if source_quality_range is None:
             return pipeline
 
-        assert (
-            self.dataset_config.source_quality_column is not None
-        ), f"Expected a source_quality_columns, found {self.dataset_config.source_quality_column}"
+        assert self.dataset_config.source_quality_column is not None, (
+            f"Expected a source_quality_columns, found {self.dataset_config.source_quality_column}"
+        )
 
         pipeline = pipeline.map(
             partial(
 
@@ -486,9 +486,9 @@ def build_batching_loop_over_one_table(
     num_parallel_calls: int = 1,
 ) -> DataPipeline:
     if max_tokens is not None:
-        assert (
-            length_column is not None
-        ), "Need to provide a column to compute the number of tokens"
+        assert length_column is not None, (
+            "Need to provide a column to compute the number of tokens"
+        )
 
     random_state = np.random.RandomState(seed)
     if length_column is not None and len(length_column) > 0:
@@ -1109,9 +1109,9 @@ def get_row_group_level_metadata(
     columns_to_exclude = set(["row_group_id", "num_rows", "total_byte_size"]) & set(
         columns
     )
-    assert (
-        len(columns_to_exclude) == 0
-    ), f"names conflict, rename/remove : {columns_to_exclude}"
+    assert len(columns_to_exclude) == 0, (
+        f"names conflict, rename/remove : {columns_to_exclude}"
+    )
 
     def get_one_row_group_stats(row_group):
         metadata = row_group.metadata
 
@@ -114,12 +114,12 @@ def run(self, iteration_value: Optional[Any] = None, iteration_index: int = 0):
         )
 
         if iteration_value is not None:
-            assert (
-                isinstance(iteration_value, int) and self.config.nshards
-            ), f"Invalid shard value ({self.config.nshards}) or iteration value ({iteration_value})"
-            assert (
-                self.config.data_loading
-            ), f"Data loading is not specified: \n {self.config}"
+            assert isinstance(iteration_value, int) and self.config.nshards, (
+                f"Invalid shard value ({self.config.nshards}) or iteration value ({iteration_value})"
+            )
+            assert self.config.data_loading, (
+                f"Data loading is not specified: \n {self.config}"
+            )
             self.config.data_loading.rank = iteration_value
             self.config.data_loading.world_size = int(self.config.nshards)
 
@@ -194,9 +194,9 @@ async def schedule_task(
         result = (metrics, result_file)
 
     result_metrics, result_file = result
-    assert isinstance(
-        result_metrics, dict
-    ), f"Expected Tuple[Dict[str, AverageMetrics], str], get {type(result_metrics)}"
+    assert isinstance(result_metrics, dict), (
+        f"Expected Tuple[Dict[str, AverageMetrics], str], get {type(result_metrics)}"
+    )
 
     metrics = {}
     cf = getattr(module.config, "confidence_level", None)
 
@@ -78,9 +78,9 @@ class CliConfig:
 
     def __post_init__(self) -> None:
         self.metric_log_dir = self.metric_log_dir or self.dump_dir
-        assert (
-            self.temperature >= 0.0
-        ), f"Expect non-zero temperature, get {self.temperature}"
+        assert self.temperature >= 0.0, (
+            f"Expect non-zero temperature, get {self.temperature}"
+        )
         if self.temperature == 0:
             self.top_p = 0
             self.top_k = 0
 
@@ -57,9 +57,9 @@ def get_scorer(
     if "outputs" in defaults:
         output_columns = defaults["outputs"].default
     else:
-        assert (
-            config.model_name
-        ), f"Cannot resolve output name for the scorer type {scorer_cls}"
+        assert config.model_name, (
+            f"Cannot resolve output name for the scorer type {scorer_cls}"
+        )
         output_columns = scorer_cls.default_outputs(config.model_name)
 
     if isinstance(output_columns, str):
 
@@ -68,9 +68,9 @@ def translate(
     ) -> List[str]:
         src_lang, tgt_lang = self.src_lang, self.tgt_lang
         sent_translations = []
-        assert isinstance(
-            self.model, EncoderDecoderModel
-        ), f"Unsupported type: {type(self.model)}"
+        assert isinstance(self.model, EncoderDecoderModel), (
+            f"Unsupported type: {type(self.model)}"
+        )
         generator = BeamSearchSeq2SeqGenerator(
             self.model, echo_prompt=True, max_seq_len=self.max_seq_len
         )
 
@@ -200,9 +200,9 @@ def score_texts(
             bos_token = bos_token or getattr(self.tokenizer, "bos_token", "\n")
         if eos_token != "":
             eos_token = eos_token or getattr(self.tokenizer, "eos_token", "\n")
-        assert (
-            eos_token is not None and bos_token is not None
-        ), "Expecting eos and bos tokens, for perplexity without any surrounding tokens, use eos_token='' and bos_token=''"
+        assert eos_token is not None and bos_token is not None, (
+            "Expecting eos and bos tokens, for perplexity without any surrounding tokens, use eos_token='' and bos_token=''"
+        )
         logger.info(
             f"Computing perplexity with bos_token={repr(bos_token)} and eos_token={repr(eos_token)}"
         )
@@ -340,9 +340,9 @@ def backtranslate(
         translations = []
         back_translations = []
         losses = []
-        assert isinstance(
-            self.model, EncoderDecoderModel
-        ), f"Unsupported type: {type(self.model)}"
+        assert isinstance(self.model, EncoderDecoderModel), (
+            f"Unsupported type: {type(self.model)}"
+        )
         generator = BeamSearchSeq2SeqGenerator(
             self.model, echo_prompt=True, max_seq_len=self.max_seq_len
         )