Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions deepmd/dpmodel/utils/learning_rate.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,47 @@ def value(self, step: int) -> np.float64:
if step_lr < self.min_lr:
step_lr = self.min_lr
return step_lr


class LearningRateCosine:
def __init__(
self,
start_lr: float,
stop_steps: int,
stop_lr: float | None = None,
stop_lr_factor: float | None = None,
**kwargs: Any,
) -> None:
"""
Construct a cosine-annealed learning rate.

Parameters
----------
start_lr
The learning rate at the start of the training.
stop_steps
The total training steps for learning rate scheduler.
stop_lr
The desired learning rate at the end of the training.
If provided, this value will be used directly.
stop_lr_factor
The factor to scale the learning rate at the end of the training.
If stop_lr is not provided, the stop_lr will be calculated as
start_lr * stop_lr_factor.
"""
self.start_lr = start_lr
if stop_lr is not None:
self.stop_lr = stop_lr
elif stop_lr_factor is not None:
self.stop_lr = start_lr * stop_lr_factor
else:
raise ValueError(
"Either stop_lr or stop_lr_factor must be provided for LearningRateCosine"
)
self.stop_steps = max(1, stop_steps)

def value(self, step: int) -> np.float64:
"""Get the learning rate at the given step."""
clamped_step = min(step, self.stop_steps)
cosine = 0.5 * (1.0 + np.cos(np.pi * clamped_step / self.stop_steps))
return np.float64(self.stop_lr + (self.start_lr - self.stop_lr) * cosine)
19 changes: 12 additions & 7 deletions deepmd/pt/train/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
SAMPLER_RECORD,
)
from deepmd.pt.utils.learning_rate import (
LearningRateCosine,
LearningRateExp,
)
from deepmd.pt.utils.stat import (
Expand Down Expand Up @@ -273,13 +274,17 @@ def get_sample() -> Any:
_stat_file_path.root.close()
return get_sample

def get_lr(lr_params: dict[str, Any]) -> LearningRateExp:
assert lr_params.get("type", "exp") == "exp", (
"Only learning rate `exp` is supported!"
)
lr_params["stop_steps"] = self.num_steps - self.warmup_steps
lr_exp = LearningRateExp(**lr_params)
return lr_exp
def get_lr(
lr_params: dict[str, Any],
) -> LearningRateExp | LearningRateCosine:
lr_type = lr_params.get("type", "exp")
lr_config = lr_params.copy()
lr_config["stop_steps"] = self.num_steps - self.warmup_steps
if lr_type == "exp":
return LearningRateExp(**lr_config)
if lr_type == "cosine":
return LearningRateCosine(**lr_config)
raise ValueError(f"Unsupported learning rate type '{lr_type}'")

# Optimizer
if self.multi_task and training_params.get("optim_dict", None) is not None:
Expand Down
2 changes: 2 additions & 0 deletions deepmd/pt/utils/learning_rate.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
from deepmd.dpmodel.utils.learning_rate import (
LearningRateCosine,
LearningRateExp,
)

__all__ = [
"LearningRateCosine",
"LearningRateExp",
]
44 changes: 43 additions & 1 deletion deepmd/utils/argcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -2509,12 +2509,54 @@ def learning_rate_exp() -> list[Argument]:
return args


def learning_rate_cosine() -> list[Argument]:
doc_start_lr = "The learning rate at the start of the training."
doc_stop_lr = "The desired learning rate at the end of the training."
doc_stop_lr_factor = (
"The factor to scale the learning rate at the end of the training. "
"The actual stop_lr is calculated as `start_lr * stop_lr_factor`. "
"If `stop_lr` is not provided, this option will be used."
)

args = [
Argument(
"start_lr",
float,
optional=True,
default=1e-3,
doc=doc_only_pt_supported + doc_start_lr,
),
Argument(
"stop_lr",
float,
optional=True,
default=None,
doc=doc_only_pt_supported + doc_stop_lr,
),
Argument(
"stop_lr_factor",
float,
optional=True,
doc=doc_only_pt_supported + doc_stop_lr_factor,
),
]
return args


def learning_rate_variant_type_args() -> Variant:
doc_lr = "The type of the learning rate."

return Variant(
"type",
[Argument("exp", dict, learning_rate_exp())],
[
Argument("exp", dict, learning_rate_exp()),
Argument(
"cosine",
dict,
learning_rate_cosine(),
doc=doc_only_pt_supported,
),
],
optional=True,
default_tag="exp",
doc=doc_lr,
Expand Down
17 changes: 17 additions & 0 deletions source/tests/pt/test_lr.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
tf.disable_eager_execution()

from deepmd.pt.utils.learning_rate import (
LearningRateCosine,
LearningRateExp,
)
from deepmd.tf.utils import (
Expand Down Expand Up @@ -102,5 +103,21 @@ def decay_rate_pt(self) -> None:
)


class TestLearningRateCosine(unittest.TestCase):
def test_basic_curve(self) -> None:
start_lr = 1.0
stop_lr = 0.1
stop_steps = 10
lr = LearningRateCosine(start_lr, stop_lr, stop_steps)

self.assertTrue(np.allclose(lr.value(0), start_lr))
self.assertTrue(np.allclose(lr.value(stop_steps), stop_lr))
self.assertTrue(np.allclose(lr.value(stop_steps + 5), stop_lr))

mid_step = stop_steps // 2
expected_mid = stop_lr + (start_lr - stop_lr) * 0.5
self.assertTrue(np.allclose(lr.value(mid_step), expected_mid))


if __name__ == "__main__":
unittest.main()
Loading