ues ParamScheduler from fvcore

ppwwyyxx · facebook-github-bot · commit 4b539e41f4b1 · 2021-02-03T16:18:22.000-08:00
Summary: Pull Request resolved: #2585 Can replace our existing LR schedulers Reviewed By: theschnitz Differential Revision: D26220618 fbshipit-source-id: e3fd7a4427bcd3506554292764bb362a39618a9f
diff --git a/detectron2/engine/hooks.py b/detectron2/engine/hooks.py
@@ -206,7 +206,10 @@ class LRScheduler(HookBase):
     def __init__(self, optimizer=None, scheduler=None):
         """
         Args:
-            No args needed. Will obtain optimizer and scheduler from trainer.
+            optimizer (torch.optim.Optimizer):
+            scheduler (torch.optim.LRScheduler):
+
+        If any argument is not given, will try to obtain it from the trainer.
         """
         self._optimizer = optimizer
         self._scheduler = scheduler
diff --git a/detectron2/solver/__init__.py b/detectron2/solver/__init__.py
@@ -1,5 +1,5 @@
 # Copyright (c) Facebook, Inc. and its affiliates.
 from .build import build_lr_scheduler, build_optimizer, get_default_optimizer_params
-from .lr_scheduler import WarmupCosineLR, WarmupMultiStepLR
+from .lr_scheduler import WarmupCosineLR, WarmupMultiStepLR, LRMultiplier
 
 __all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/detectron2/solver/build.py b/detectron2/solver/build.py
@@ -3,10 +3,17 @@
 from enum import Enum
 from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Type, Union
 import torch
+from fvcore.common.param_scheduler import (
+    CompositeParamScheduler,
+    ConstantParamScheduler,
+    CosineParamScheduler,
+    LinearParamScheduler,
+    MultiStepParamScheduler,
+)
 
 from detectron2.config import CfgNode
 
-from .lr_scheduler import WarmupCosineLR, WarmupMultiStepLR
+from .lr_scheduler import LRMultiplier
 
 _GradientClipperInput = Union[torch.Tensor, Iterable[torch.Tensor]]
 _GradientClipper = Callable[[_GradientClipperInput], None]
@@ -41,7 +48,7 @@ def _generate_optimizer_class_with_gradient_clipping(
     optimizer: Type[torch.optim.Optimizer],
     *,
     per_param_clipper: Optional[_GradientClipper] = None,
-    global_clipper: Optional[_GradientClipper] = None
+    global_clipper: Optional[_GradientClipper] = None,
 ) -> Type[torch.optim.Optimizer]:
     """
     Dynamically creates a new type that inherits the type of a given instance
@@ -202,22 +209,30 @@ def build_lr_scheduler(
     Build a LR scheduler from config.
     """
     name = cfg.SOLVER.LR_SCHEDULER_NAME
+
     if name == "WarmupMultiStepLR":
-        return WarmupMultiStepLR(
-            optimizer,
-            cfg.SOLVER.STEPS,
-            cfg.SOLVER.GAMMA,
-            warmup_factor=cfg.SOLVER.WARMUP_FACTOR,
-            warmup_iters=cfg.SOLVER.WARMUP_ITERS,
-            warmup_method=cfg.SOLVER.WARMUP_METHOD,
+        sched = MultiStepParamScheduler(
+            values=[cfg.SOLVER.GAMMA ** k for k in range(len(cfg.SOLVER.STEPS) + 1)],
+            milestones=cfg.SOLVER.STEPS,
+            num_updates=cfg.SOLVER.MAX_ITER,
         )
     elif name == "WarmupCosineLR":
-        return WarmupCosineLR(
-            optimizer,
-            cfg.SOLVER.MAX_ITER,
-            warmup_factor=cfg.SOLVER.WARMUP_FACTOR,
-            warmup_iters=cfg.SOLVER.WARMUP_ITERS,
-            warmup_method=cfg.SOLVER.WARMUP_METHOD,
-        )
+        sched = CosineParamScheduler(1, 0)
     else:
         raise ValueError("Unknown LR scheduler: {}".format(name))
+
+    # Add warmup
+    warmup_method = cfg.SOLVER.WARMUP_METHOD
+    if warmup_method == "constant":
+        warmup = ConstantParamScheduler(cfg.SOLVER.WARMUP_FACTOR)
+    elif warmup_method == "linear":
+        warmup = LinearParamScheduler(cfg.SOLVER.WARMUP_FACTOR, 1.0)
+    else:
+        raise ValueError("Unknown warmup method: {}".format(warmup_method))
+    warmup_ratio = cfg.SOLVER.WARMUP_ITERS / cfg.SOLVER.MAX_ITER
+    sched = CompositeParamScheduler(
+        [warmup, sched],
+        interval_scaling=["rescaled", "fixed"],
+        lengths=[warmup_ratio, 1 - warmup_ratio],
+    )
+    return LRMultiplier(optimizer, multiplier=sched, max_iter=cfg.SOLVER.MAX_ITER)
diff --git a/detectron2/solver/lr_scheduler.py b/detectron2/solver/lr_scheduler.py
@@ -1,8 +1,88 @@
 # Copyright (c) Facebook, Inc. and its affiliates.
+import logging
 import math
 from bisect import bisect_right
 from typing import List
 import torch
+from fvcore.common.param_scheduler import ParamScheduler
+
+logger = logging.getLogger(__name__)
+
+
+class LRMultiplier(torch.optim.lr_scheduler._LRScheduler):
+    """
+    A LRScheduler which uses fvcore :class:`ParamScheduler` to multiply the
+    learning rate of each param in the optimizer.
+    Every step, the learning rate of each parameter becomes its initial value
+    multiplied by the output of the given :class:`ParamScheduler`.
+
+    The absolute learning rate value of each parameter can be different.
+    This scheduler can be used as long as the relative scale among them do
+    not change during training.
+
+    Examples:
+
+    ::
+        LRMultiplier(
+            opt,
+            CompositeParamScheduler([
+                LinearParamScheduler(0.001, 1),  # warmup
+                MultiStepParamScheduler(
+                    [1, 0.1, 0.01],
+                    milestones=[60000, 80000],
+                    num_updates=90000,
+                )],
+                interval_scaling=["rescaled", "fixed"],
+                lengths=[100 / 90000, 89900 / 90000],
+            ),
+            max_iter=90000
+        )
+    """
+
+    # NOTES: in the most general case, every LR can use its own scheduler.
+    # Supporting this requires interaction with the optimizer when its parameter
+    # group is initialized. For example, classyvision implements its own optimizer
+    # that allows different schedulers for every parameter group.
+    # To avoid this complexity, we use this class to support the most common cases
+    # where the relative scale among all LRs stay unchanged during training.  In this
+    # case we only need a total of one scheduler that defines the relative LR multiplier.
+
+    def __init__(
+        self,
+        optimizer: torch.optim.Optimizer,
+        multiplier: ParamScheduler,
+        max_iter: int,
+        last_iter: int = -1,
+    ):
+        """
+        Args:
+            optimizer, last_iter: See ``torch.optim.lr_scheduler._LRScheduler``.
+                ``last_iter`` is the same as ``last_epoch``.
+            multiplier: a fvcore ParamScheduler that defines the multiplier on
+                every LR of the optimizer
+            max_iter: the total number of training iterations
+        """
+        if not isinstance(multiplier, ParamScheduler):
+            raise ValueError(
+                "_LRMultiplier(multiplier=) must be an instance of fvcore "
+                f"ParamScheduler. Got {multiplier} instead."
+            )
+        self._multiplier = multiplier
+        self._max_iter = max_iter
+        super().__init__(optimizer, last_epoch=last_iter)
+
+    def state_dict(self):
+        # fvcore schedulers are stateless. Only keep pytorch scheduler states
+        return {"base_lrs": self.base_lrs, "last_epoch": self.last_epoch}
+
+    def get_lr(self) -> List[float]:
+        multiplier = self._multiplier(self.last_epoch / self._max_iter)
+        return [base_lr * multiplier for base_lr in self.base_lrs]
+
+
+"""
+Content below is no longer needed!
+"""
 
 # NOTE: PyTorch's LR scheduler interface uses names that assume the LR changes
 # only on epoch boundaries. We typically use iteration based schedules instead.
@@ -24,6 +104,9 @@ def __init__(
         warmup_method: str = "linear",
         last_epoch: int = -1,
     ):
+        logger.warning(
+            "WarmupMultiStepLR is deprecated! Use LRMultipilier with fvcore ParamScheduler instead!"
+        )
         if not list(milestones) == sorted(milestones):
             raise ValueError(
                 "Milestones should be a list of" " increasing integers. Got {}", milestones
@@ -59,6 +142,9 @@ def __init__(
         warmup_method: str = "linear",
         last_epoch: int = -1,
     ):
+        logger.warning(
+            "WarmupCosineLR is deprecated! Use LRMultipilier with fvcore ParamScheduler instead!"
+        )
         self.max_iters = max_iters
         self.warmup_factor = warmup_factor
         self.warmup_iters = warmup_iters
diff --git a/docs/conf.py b/docs/conf.py
@@ -277,6 +277,8 @@ def autodoc_skip_member(app, what, name, obj, skip, options):
         "StandardAugInput",
         "build_batch_data_loader",
         "draw_panoptic_seg_predictions",
+        "WarmupCosineLR",
+        "WarmupMultiStepLR",
     }
     try:
         if name in HIDDEN or (
diff --git a/setup.py b/setup.py
@@ -211,7 +211,7 @@ def get_model_zoo_configs() -> List[str]:
         "matplotlib",
         "tqdm>4.29.0",
         "tensorboard",
-        "fvcore>=0.1.2,<0.1.3",  # required like this to make it pip installable
+        "fvcore>=0.1.3,<0.1.4",  # required like this to make it pip installable
         "iopath>=0.1.2",
         "pycocotools>=2.0.2",  # corresponds to https://github.com/ppwwyyxx/cocoapi
         "future",  # used by caffe2
diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py
@@ -0,0 +1,79 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import math
+import numpy as np
+from unittest import TestCase
+import torch
+from fvcore.common.param_scheduler import (
+    CompositeParamScheduler,
+    CosineParamScheduler,
+    LinearParamScheduler,
+    MultiStepParamScheduler,
+)
+from torch import nn
+
+from detectron2.solver import LRMultiplier
+
+
+class TestScheduler(TestCase):
+    def test_warmup_multistep(self):
+        p = nn.Parameter(torch.zeros(0))
+        opt = torch.optim.SGD([p], lr=5)
+
+        multiplier = CompositeParamScheduler(
+            [
+                LinearParamScheduler(0.001, 1),  # warmup
+                MultiStepParamScheduler(
+                    [1, 0.1, 0.01, 0.001],
+                    milestones=[10, 15, 20],
+                    num_updates=30,
+                ),
+            ],
+            interval_scaling=["rescaled", "fixed"],
+            lengths=[5 / 30, 25 / 30],
+        )
+        sched = LRMultiplier(opt, multiplier, 30)
+        # This is an equivalent of:
+        # sched = WarmupMultiStepLR(
+        # opt, milestones=[10, 15, 20], gamma=0.1, warmup_factor=0.001, warmup_iters=5)
+
+        p.sum().backward()
+        opt.step()
+
+        lrs = [0.005]
+        for _ in range(30):
+            sched.step()
+            lrs.append(opt.param_groups[0]["lr"])
+        self.assertTrue(np.allclose(lrs[:5], [0.005, 1.004, 2.003, 3.002, 4.001]))
+        self.assertTrue(np.allclose(lrs[5:10], 5.0))
+        self.assertTrue(np.allclose(lrs[10:15], 0.5))
+        self.assertTrue(np.allclose(lrs[15:20], 0.05))
+        self.assertTrue(np.allclose(lrs[20:], 0.005))
+
+    def test_warmup_cosine(self):
+        p = nn.Parameter(torch.zeros(0))
+        opt = torch.optim.SGD([p], lr=5)
+        multiplier = CompositeParamScheduler(
+            [
+                LinearParamScheduler(0.001, 1),  # warmup
+                CosineParamScheduler(1, 0),
+            ],
+            interval_scaling=["rescaled", "fixed"],
+            lengths=[5 / 30, 25 / 30],
+        )
+        sched = LRMultiplier(opt, multiplier, 30)
+
+        p.sum().backward()
+        opt.step()
+        self.assertEqual(opt.param_groups[0]["lr"], 0.005)
+        lrs = [0.005]
+
+        for _ in range(30):
+            sched.step()
+            lrs.append(opt.param_groups[0]["lr"])
+        for idx, lr in enumerate(lrs):
+            expected_cosine = 2.5 * (1.0 + math.cos(math.pi * idx / 30))
+            if idx >= 5:
+                self.assertAlmostEqual(lr, expected_cosine)
+            else:
+                self.assertNotAlmostEqual(lr, expected_cosine)

Original file line number	Diff line number	Diff line change
`@@ -277,6 +277,8 @@ def autodoc_skip_member(app, what, name, obj, skip, options):`
`277`	`277`	`"StandardAugInput",`
`278`	`278`	`"build_batch_data_loader",`
`279`	`279`	`"draw_panoptic_seg_predictions",`
	`280`	`+ "WarmupCosineLR",`
	`281`	`+ "WarmupMultiStepLR",`
`280`	`282`	`}`
`281`	`283`	`try:`
`282`	`284`	`if name in HIDDEN or (`