[optim] override SWALR.state_dict and load_state_dict (pytorch#163122)

filipviz · pytorchmergebot · commit 167ad09be5af · 2025-09-17T18:17:26.000Z
Fixes pytorch#163105 Note that the new `SWALR.load_state_dict` is **not backwards compatible**: ```python @OverRide def load_state_dict(self, state_dict: dict[str, Any]) -> None: """Load the scheduler's state. Args: state_dict (dict): scheduler state. Should be an object returned from a call to :meth:`state_dict`. """ self.__dict__.update(state_dict) self._set_anneal_func(self._anneal_strategy) ``` If we'd like to maintain compatibility with old state_dicts (loaded with `weights_only=False`), we could use something along these lines: ```python @OverRide def load_state_dict(self, state_dict: dict[str, Any]) -> None: """Load the scheduler's state. Args: state_dict (dict): scheduler state. Should be an object returned from a call to :meth:`state_dict`. """ anneal_func = state_dict.pop("anneal_func", None) strategy = state_dict.get("_anneal_strategy") self.__dict__.update(state_dict) if anneal_func is not None: state_dict["anneal_func"] = anneal_func if strategy is None: if anneal_func == self._linear_anneal: strategy = "linear" elif anneal_func == self._cosine_anneal: strategy = "cos" if strategy is None: strategy = getattr(self, "_anneal_strategy", "cos") self._set_anneal_func(strategy) ``` But given the fact that loading an `SWALR` state_dict before this PR would have caused an error, this seems okay. A GitHub/Google search for `SWALR.load_state_dict` had no results. Happy to change if not, or add a warning just in case. Pull Request resolved: pytorch#163122 Approved by: https://github.com/janeyx99
diff --git a/test/optim/test_lrscheduler.py b/test/optim/test_lrscheduler.py
@@ -2442,6 +2442,7 @@ def test_cosine_then_cyclic(self):
             partial(CyclicLR, base_lr=0.01, max_lr=0.1),
             partial(OneCycleLR, max_lr=0.01, total_steps=10, anneal_strategy="linear"),
             partial(CosineAnnealingWarmRestarts, T_0=20),
+            partial(SWALR, swa_lr=0.01),
         ],
     )
     @parametrize("weights_only", [True, False])
diff --git a/torch/optim/swa_utils.py b/torch/optim/swa_utils.py
@@ -7,6 +7,7 @@
 from collections.abc import Iterable
 from copy import deepcopy
 from typing import Any, Callable, cast, Literal, Optional, Union
+from typing_extensions import override
 
 import torch
 from torch import Tensor
@@ -431,10 +432,7 @@ def __init__(
                 "anneal_strategy must by one of 'cos' or 'linear', "
                 f"instead got {anneal_strategy}"
             )
-        elif anneal_strategy == "cos":
-            self.anneal_func = self._cosine_anneal
-        elif anneal_strategy == "linear":
-            self.anneal_func = self._linear_anneal
+        self._set_anneal_func(anneal_strategy)
         if not isinstance(anneal_epochs, int) or anneal_epochs < 0:
             raise ValueError(
                 f"anneal_epochs must be equal or greater than 0, got {anneal_epochs}"
@@ -482,3 +480,34 @@ def get_lr(self):
             group["swa_lr"] * alpha + lr * (1 - alpha)
             for group, lr in zip(self.optimizer.param_groups, prev_lrs)
         ]
+
+    def _set_anneal_func(self, anneal_strategy: Literal["cos", "linear"]):
+        self._anneal_strategy = anneal_strategy
+        if anneal_strategy == "cos":
+            self.anneal_func = self._cosine_anneal
+        else:
+            self.anneal_func = self._linear_anneal
+
+    @override
+    def state_dict(self) -> dict[str, Any]:
+        """Return the state of the scheduler as a :class:`dict`.
+
+        It contains an entry for every variable in self.__dict__ which
+        is not the optimizer or anneal_func.
+        """
+        return {
+            key: value
+            for key, value in self.__dict__.items()
+            if key not in ("optimizer", "anneal_func")
+        }
+
+    @override
+    def load_state_dict(self, state_dict: dict[str, Any]) -> None:
+        """Load the scheduler's state.
+
+        Args:
+            state_dict (dict): scheduler state. Should be an object returned
+                from a call to :meth:`state_dict`.
+        """
+        self.__dict__.update(state_dict)
+        self._set_anneal_func(self._anneal_strategy)

Original file line number	Diff line number	Diff line change
`@@ -2442,6 +2442,7 @@ def test_cosine_then_cyclic(self):`
`2442`	`2442`	`partial(CyclicLR, base_lr=0.01, max_lr=0.1),`
`2443`	`2443`	`partial(OneCycleLR, max_lr=0.01, total_steps=10, anneal_strategy="linear"),`
`2444`	`2444`	`partial(CosineAnnealingWarmRestarts, T_0=20),`
	`2445`	`+ partial(SWALR, swa_lr=0.01),`
`2445`	`2446`	`],`
`2446`	`2447`	`)`
`2447`	`2448`	`@parametrize("weights_only", [True, False])`