Add support for more dtypes in ModelSummary and warning on non-supported (#21034)

SkafteNicki · Borda · commit 24dc9af722a2 · 2025-08-13T18:49:24.000+02:00
(cherry picked from commit 1d8cf20)
diff --git a/src/lightning/pytorch/CHANGELOG.md b/src/lightning/pytorch/CHANGELOG.md
@@ -35,6 +35,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fix double iteration bug when resumed from a checkpoint. ([#20775](https://github.com/Lightning-AI/pytorch-lightning/pull/20775))
 
 
+- Fix support for more dtypes in `ModelSummary` ([#21034](https://github.com/Lightning-AI/pytorch-lightning/pull/21034))
+
+
 - Fixed metrics in `RichProgressBar` being updated according to user provided `refresh_rate` ([#21032](https://github.com/Lightning-AI/pytorch-lightning/pull/21032))
 
 
diff --git a/src/lightning/pytorch/utilities/model_summary/model_summary.py b/src/lightning/pytorch/utilities/model_summary/model_summary.py
@@ -25,6 +25,7 @@
 from torch.utils.hooks import RemovableHandle
 
 import lightning.pytorch as pl
+from lightning.fabric.utilities import rank_zero_warn
 from lightning.fabric.utilities.distributed import _is_dtensor
 from lightning.pytorch.utilities.model_helpers import _ModuleMode
 from lightning.pytorch.utilities.rank_zero import WarningCache
@@ -216,7 +217,22 @@ def __init__(self, model: "pl.LightningModule", max_depth: int = 1) -> None:
         self._layer_summary = self.summarize()
         # 1 byte -> 8 bits
         # TODO: how do we compute precision_megabytes in case of mixed precision?
-        precision_to_bits = {"64": 64, "32": 32, "16": 16, "bf16": 16}
+        precision_to_bits = {
+            "64": 64,
+            "32": 32,
+            "16": 16,
+            "bf16": 16,
+            "16-true": 16,
+            "bf16-true": 16,
+            "32-true": 32,
+            "64-true": 64,
+        }
+        if self._model._trainer and self._model.trainer.precision not in precision_to_bits:
+            rank_zero_warn(
+                f"Precision {self._model.trainer.precision} is not supported by the model summary. "
+                " Estimated model size in MB will not be accurate. Using 32 bits instead.",
+                category=UserWarning,
+            )
         precision = precision_to_bits.get(self._model.trainer.precision, 32) if self._model._trainer else 32
         self._precision_megabytes = (precision / 8.0) * 1e-6
 
diff --git a/tests/tests_pytorch/utilities/test_model_summary.py b/tests/tests_pytorch/utilities/test_model_summary.py
@@ -323,19 +323,33 @@ def test_empty_model_size(max_depth):
         pytest.param("mps", marks=RunIf(mps=True)),
     ],
 )
-def test_model_size_precision(tmp_path, accelerator):
-    """Test model size for half and full precision."""
-    model = PreCalculatedModel()
+@pytest.mark.parametrize("precision", ["16-true", "32-true", "64-true"])
+def test_model_size_precision(tmp_path, accelerator, precision):
+    """Test model size for different precision types."""
+    model = PreCalculatedModel(precision=int(precision.split("-")[0]))
 
     # fit model
     trainer = Trainer(
-        default_root_dir=tmp_path, accelerator=accelerator, devices=1, max_steps=1, max_epochs=1, precision=32
+        default_root_dir=tmp_path, accelerator=accelerator, devices=1, max_steps=1, max_epochs=1, precision=precision
     )
     trainer.fit(model)
     summary = summarize(model)
     assert model.pre_calculated_model_size == summary.model_size
 
 
+def test_model_size_warning_on_unsupported_precision(tmp_path):
+    """Test that a warning is raised when the precision is not supported."""
+    model = PreCalculatedModel(precision=32)  # fallback to 32 bits
+
+    # supported precision by lightning but not by the model summary
+    trainer = Trainer(max_epochs=1, precision="16-mixed", default_root_dir=tmp_path)
+    trainer.fit(model)
+
+    with pytest.warns(UserWarning, match="Precision .* is not supported by the model summary.*"):
+        summary = summarize(model)
+        assert model.pre_calculated_model_size == summary.model_size
+
+
 def test_lazy_model_summary():
     """Test that the model summary can work with lazy layers."""
     lazy_model = LazyModel()