11# Copyright (c) Facebook, Inc. and its affiliates.
2+ import logging
23import math
34from bisect import bisect_right
45from typing import List
56import torch
7+ from fvcore .common .param_scheduler import ParamScheduler
8+
9+ logger = logging .getLogger (__name__ )
10+
11+
12+ class LRMultiplier (torch .optim .lr_scheduler ._LRScheduler ):
13+ """
14+ A LRScheduler which uses fvcore :class:`ParamScheduler` to multiply the
15+ learning rate of each param in the optimizer.
16+ Every step, the learning rate of each parameter becomes its initial value
17+ multiplied by the output of the given :class:`ParamScheduler`.
18+
19+ The absolute learning rate value of each parameter can be different.
20+ This scheduler can be used as long as the relative scale among them do
21+ not change during training.
22+
23+ Examples:
24+
25+ ::
26+ LRMultiplier(
27+ opt,
28+ CompositeParamScheduler([
29+ LinearParamScheduler(0.001, 1), # warmup
30+ MultiStepParamScheduler(
31+ [1, 0.1, 0.01],
32+ milestones=[60000, 80000],
33+ num_updates=90000,
34+ )],
35+ interval_scaling=["rescaled", "fixed"],
36+ lengths=[100 / 90000, 89900 / 90000],
37+ ),
38+ max_iter=90000
39+ )
40+ """
41+
42+ # NOTES: in the most general case, every LR can use its own scheduler.
43+ # Supporting this requires interaction with the optimizer when its parameter
44+ # group is initialized. For example, classyvision implements its own optimizer
45+ # that allows different schedulers for every parameter group.
46+ # To avoid this complexity, we use this class to support the most common cases
47+ # where the relative scale among all LRs stay unchanged during training. In this
48+ # case we only need a total of one scheduler that defines the relative LR multiplier.
49+
50+ def __init__ (
51+ self ,
52+ optimizer : torch .optim .Optimizer ,
53+ multiplier : ParamScheduler ,
54+ max_iter : int ,
55+ last_iter : int = - 1 ,
56+ ):
57+ """
58+ Args:
59+ optimizer, last_iter: See ``torch.optim.lr_scheduler._LRScheduler``.
60+ ``last_iter`` is the same as ``last_epoch``.
61+ multiplier: a fvcore ParamScheduler that defines the multiplier on
62+ every LR of the optimizer
63+ max_iter: the total number of training iterations
64+ """
65+ if not isinstance (multiplier , ParamScheduler ):
66+ raise ValueError (
67+ "_LRMultiplier(multiplier=) must be an instance of fvcore "
68+ f"ParamScheduler. Got { multiplier } instead."
69+ )
70+ self ._multiplier = multiplier
71+ self ._max_iter = max_iter
72+ super ().__init__ (optimizer , last_epoch = last_iter )
73+
74+ def state_dict (self ):
75+ # fvcore schedulers are stateless. Only keep pytorch scheduler states
76+ return {"base_lrs" : self .base_lrs , "last_epoch" : self .last_epoch }
77+
78+ def get_lr (self ) -> List [float ]:
79+ multiplier = self ._multiplier (self .last_epoch / self ._max_iter )
80+ return [base_lr * multiplier for base_lr in self .base_lrs ]
81+
82+
83+ """
84+ Content below is no longer needed!
85+ """
686
787# NOTE: PyTorch's LR scheduler interface uses names that assume the LR changes
888# only on epoch boundaries. We typically use iteration based schedules instead.
@@ -24,6 +104,9 @@ def __init__(
24104 warmup_method : str = "linear" ,
25105 last_epoch : int = - 1 ,
26106 ):
107+ logger .warning (
108+ "WarmupMultiStepLR is deprecated! Use LRMultipilier with fvcore ParamScheduler instead!"
109+ )
27110 if not list (milestones ) == sorted (milestones ):
28111 raise ValueError (
29112 "Milestones should be a list of" " increasing integers. Got {}" , milestones
@@ -59,6 +142,9 @@ def __init__(
59142 warmup_method : str = "linear" ,
60143 last_epoch : int = - 1 ,
61144 ):
145+ logger .warning (
146+ "WarmupCosineLR is deprecated! Use LRMultipilier with fvcore ParamScheduler instead!"
147+ )
62148 self .max_iters = max_iters
63149 self .warmup_factor = warmup_factor
64150 self .warmup_iters = warmup_iters
0 commit comments