Add check for bf16 in deepspeed inference (#16973)

colehawkins · carmocca · pre-commit-ci[bot] · lantiga · commit ec9fd02b658a · 2023-04-03T15:39:12.000+02:00
Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: Cole Hawkins <colehawk> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: awaelchli <aedu.waelchli@gmail.com> (cherry picked from commit c271d4c)
diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md
@@ -26,6 +26,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - Fixed `num_nodes` not being set for `DDPFullyShardedNativeStrategy` ([#17160](https://github.com/Lightning-AI/lightning/pull/17160))
 
+- Fixed parsing the precision config for inference in `DeepSpeedStrategy` ([#16973](https://github.com/Lightning-AI/lightning/pull/16973))
+
+
 - Fixed the availability check for `rich` that prevented Lightning to be imported in Google Colab ([#17156](https://github.com/Lightning-AI/lightning/pull/17156))
 
 
diff --git a/src/pytorch_lightning/strategies/deepspeed.py b/src/pytorch_lightning/strategies/deepspeed.py
@@ -553,6 +553,8 @@ def _initialize_deepspeed_inference(self, model: Module) -> None:
         inference_config = {"train_micro_batch_size_per_gpu": 1}
         if "fp16" in self.config:
             inference_config.update({"fp16": self.config["fp16"]})
+        if "bf16" in self.config:
+            inference_config.update({"bf16": self.config["bf16"]})
         if self.zero_stage_3:
             inference_config.update(
                 {
diff --git a/tests/tests_pytorch/strategies/test_deepspeed_strategy.py b/tests/tests_pytorch/strategies/test_deepspeed_strategy.py
@@ -371,6 +371,32 @@ def on_train_start(self, trainer, pl_module) -> None:
         trainer.fit(model)
 
 
+@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
+@pytest.mark.parametrize("precision", ["fp16", "bf16"])
+def test_deepspeed_inference_precision_during_inference(precision, tmpdir):
+    """Ensure if we modify the precision for deepspeed and execute inference-only, the deepspeed config contains
+    these changes."""
+
+    class TestCB(Callback):
+        def on_validation_start(self, trainer, pl_module) -> None:
+            assert trainer.strategy.config[precision]
+            raise SystemExit()
+
+    model = BoringModel()
+    strategy = DeepSpeedStrategy(config={precision: {"enabled": True}})
+
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        strategy=strategy,
+        accelerator="cuda",
+        devices=1,
+        callbacks=[TestCB()],
+        barebones=True,
+    )
+    with pytest.raises(SystemExit):
+        trainer.validate(model)
+
+
 @RunIf(deepspeed=True)
 def test_deepspeed_custom_activation_checkpointing_params(tmpdir):
     """Ensure if we modify the activation checkpointing parameters, the deepspeed config contains these changes."""

Original file line number	Diff line number	Diff line change
`@@ -553,6 +553,8 @@ def _initialize_deepspeed_inference(self, model: Module) -> None:`
`553`	`553`	`inference_config = {"train_micro_batch_size_per_gpu": 1}`
`554`	`554`	`if "fp16" in self.config:`
`555`	`555`	`inference_config.update({"fp16": self.config["fp16"]})`
	`556`	`+ if "bf16" in self.config:`
	`557`	`+ inference_config.update({"bf16": self.config["bf16"]})`
`556`	`558`	`if self.zero_stage_3:`
`557`	`559`	`inference_config.update(`
`558`	`560`	`{`