diff --git a/deepmd/dpmodel/utils/learning_rate.py b/deepmd/dpmodel/utils/learning_rate.py index 10f7ec8d04..768f038575 100644 --- a/deepmd/dpmodel/utils/learning_rate.py +++ b/deepmd/dpmodel/utils/learning_rate.py @@ -55,3 +55,47 @@ def value(self, step: int) -> np.float64: if step_lr < self.min_lr: step_lr = self.min_lr return step_lr + + +class LearningRateCosine: + def __init__( + self, + start_lr: float, + stop_steps: int, + stop_lr: float | None = None, + stop_lr_factor: float | None = None, + **kwargs: Any, + ) -> None: + """ + Construct a cosine-annealed learning rate. + + Parameters + ---------- + start_lr + The learning rate at the start of the training. + stop_steps + The total training steps for learning rate scheduler. + stop_lr + The desired learning rate at the end of the training. + If provided, this value will be used directly. + stop_lr_factor + The factor to scale the learning rate at the end of the training. + If stop_lr is not provided, the stop_lr will be calculated as + start_lr * stop_lr_factor. + """ + self.start_lr = start_lr + if stop_lr is not None: + self.stop_lr = stop_lr + elif stop_lr_factor is not None: + self.stop_lr = start_lr * stop_lr_factor + else: + raise ValueError( + "Either stop_lr or stop_lr_factor must be provided for LearningRateCosine" + ) + self.stop_steps = max(1, stop_steps) + + def value(self, step: int) -> np.float64: + """Get the learning rate at the given step.""" + clamped_step = min(step, self.stop_steps) + cosine = 0.5 * (1.0 + np.cos(np.pi * clamped_step / self.stop_steps)) + return np.float64(self.stop_lr + (self.start_lr - self.stop_lr) * cosine) diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py index 24440e19de..e9c9d75999 100644 --- a/deepmd/pt/train/training.py +++ b/deepmd/pt/train/training.py @@ -63,6 +63,7 @@ SAMPLER_RECORD, ) from deepmd.pt.utils.learning_rate import ( + LearningRateCosine, LearningRateExp, ) from deepmd.pt.utils.stat import ( @@ -273,13 +274,17 @@ def get_sample() -> Any: _stat_file_path.root.close() return get_sample - def get_lr(lr_params: dict[str, Any]) -> LearningRateExp: - assert lr_params.get("type", "exp") == "exp", ( - "Only learning rate `exp` is supported!" - ) - lr_params["stop_steps"] = self.num_steps - self.warmup_steps - lr_exp = LearningRateExp(**lr_params) - return lr_exp + def get_lr( + lr_params: dict[str, Any], + ) -> LearningRateExp | LearningRateCosine: + lr_type = lr_params.get("type", "exp") + lr_config = lr_params.copy() + lr_config["stop_steps"] = self.num_steps - self.warmup_steps + if lr_type == "exp": + return LearningRateExp(**lr_config) + if lr_type == "cosine": + return LearningRateCosine(**lr_config) + raise ValueError(f"Unsupported learning rate type '{lr_type}'") # Optimizer if self.multi_task and training_params.get("optim_dict", None) is not None: diff --git a/deepmd/pt/utils/learning_rate.py b/deepmd/pt/utils/learning_rate.py index 3502434bc0..31ae1c3152 100644 --- a/deepmd/pt/utils/learning_rate.py +++ b/deepmd/pt/utils/learning_rate.py @@ -1,8 +1,10 @@ # SPDX-License-Identifier: LGPL-3.0-or-later from deepmd.dpmodel.utils.learning_rate import ( + LearningRateCosine, LearningRateExp, ) __all__ = [ + "LearningRateCosine", "LearningRateExp", ] diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py index 7fcc117ab5..8b4773d1c9 100644 --- a/deepmd/utils/argcheck.py +++ b/deepmd/utils/argcheck.py @@ -2509,12 +2509,54 @@ def learning_rate_exp() -> list[Argument]: return args +def learning_rate_cosine() -> list[Argument]: + doc_start_lr = "The learning rate at the start of the training." + doc_stop_lr = "The desired learning rate at the end of the training." + doc_stop_lr_factor = ( + "The factor to scale the learning rate at the end of the training. " + "The actual stop_lr is calculated as `start_lr * stop_lr_factor`. " + "If `stop_lr` is not provided, this option will be used." + ) + + args = [ + Argument( + "start_lr", + float, + optional=True, + default=1e-3, + doc=doc_only_pt_supported + doc_start_lr, + ), + Argument( + "stop_lr", + float, + optional=True, + default=None, + doc=doc_only_pt_supported + doc_stop_lr, + ), + Argument( + "stop_lr_factor", + float, + optional=True, + doc=doc_only_pt_supported + doc_stop_lr_factor, + ), + ] + return args + + def learning_rate_variant_type_args() -> Variant: doc_lr = "The type of the learning rate." return Variant( "type", - [Argument("exp", dict, learning_rate_exp())], + [ + Argument("exp", dict, learning_rate_exp()), + Argument( + "cosine", + dict, + learning_rate_cosine(), + doc=doc_only_pt_supported, + ), + ], optional=True, default_tag="exp", doc=doc_lr, diff --git a/source/tests/pt/test_lr.py b/source/tests/pt/test_lr.py index 2d6bf156e1..75f663f041 100644 --- a/source/tests/pt/test_lr.py +++ b/source/tests/pt/test_lr.py @@ -7,6 +7,7 @@ tf.disable_eager_execution() from deepmd.pt.utils.learning_rate import ( + LearningRateCosine, LearningRateExp, ) from deepmd.tf.utils import ( @@ -102,5 +103,21 @@ def decay_rate_pt(self) -> None: ) +class TestLearningRateCosine(unittest.TestCase): + def test_basic_curve(self) -> None: + start_lr = 1.0 + stop_lr = 0.1 + stop_steps = 10 + lr = LearningRateCosine(start_lr, stop_lr, stop_steps) + + self.assertTrue(np.allclose(lr.value(0), start_lr)) + self.assertTrue(np.allclose(lr.value(stop_steps), stop_lr)) + self.assertTrue(np.allclose(lr.value(stop_steps + 5), stop_lr)) + + mid_step = stop_steps // 2 + expected_mid = stop_lr + (start_lr - stop_lr) * 0.5 + self.assertTrue(np.allclose(lr.value(mid_step), expected_mid)) + + if __name__ == "__main__": unittest.main()