fix: ptl 2.6.0 explicitly pass weights_only=False (#710)

cathalobrien · web-flow · commit e18824cf8932 · 2025-11-28T18:35:17.000+01:00
## Description [this](Lightning-AI/pytorch-lightning#21072) change in ptl 2.6.0 means we have to explicitly specify "weight_only=False" when calling `BaseGraphModule.load_from_checkpoint` (nice spot Ana!) ***As a contributor to the Anemoi framework, please ensure that your changes include unit tests, updates to any affected dependencies and documentation, and have been tested in a parallel setting (i.e., with multiple GPUs). As a reviewer, you are also responsible for verifying these aspects and requesting changes if they are not adequately addressed. For guidelines about those please refer to https://anemoi.readthedocs.io/en/latest/*** By opening this pull request, I affirm that all authors agree to the [Contributor License Agreement.](https://github.com/ecmwf/codex/blob/main/Legal/contributor_license_agreement.md)
diff --git a/training/src/anemoi/training/train/train.py b/training/src/anemoi/training/train/train.py
@@ -208,7 +208,12 @@ def model(self) -> pl.LightningModule:
                 # pop data_indices so that the data indices on the checkpoint do not get overwritten
                 # by the data indices from the new config
                 kwargs.pop("data_indices")
-                model = model_task.load_from_checkpoint(self.last_checkpoint, **kwargs, strict=False)
+                model = model_task.load_from_checkpoint(
+                    self.last_checkpoint,
+                    **kwargs,
+                    strict=False,
+                    weights_only=False,
+                )
 
             model.data_indices = self.data_indices
             # check data indices in original checkpoint and current data indices are the same
@@ -436,6 +441,7 @@ def _check_dry_run(self) -> None:
             LOGGER.info("Dry run: %s", self.dry_run)
 
     def prepare_compilation(self) -> None:
+
         if hasattr(self.config.model, "compile"):
             self.model = mark_for_compilation(self.model, self.config.model_dump(by_alias=True).model.compile)
         if hasattr(self.config.training, "recompile_limit"):
diff --git a/training/src/anemoi/training/utils/checkpoint.py b/training/src/anemoi/training/utils/checkpoint.py
@@ -44,7 +44,7 @@ def load_and_prepare_model(lightning_checkpoint_path: str) -> tuple[torch.nn.Mod
         pytorch model, metadata
 
     """
-    module = BaseGraphModule.load_from_checkpoint(lightning_checkpoint_path)
+    module = BaseGraphModule.load_from_checkpoint(lightning_checkpoint_path, weights_only=False)
     model = module.model
 
     metadata = dict(**model.metadata)
diff --git a/training/tests/unit/diagnostics/test_checkpoint.py b/training/tests/unit/diagnostics/test_checkpoint.py
@@ -134,7 +134,7 @@ def test_same_uuid(tmp_path: str, callback: AnemoiCheckpoint, model: DummyModule
             if Path(tmp_path + "/" + pl_ckpt_name).exists():
                 uuid = load_metadata(ckpt_path)["uuid"]
 
-                pl_model = DummyModule.load_from_checkpoint(tmp_path + "/" + pl_ckpt_name)
+                pl_model = DummyModule.load_from_checkpoint(tmp_path + "/" + pl_ckpt_name, weights_only=False)
 
                 assert uuid == pl_model.hparams["metadata"]["uuid"]