Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 26 additions & 26 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,13 @@ repos:
- id: clang-format
exclude: ^(source/3rdparty|source/lib/src/gpu/cudart/.+\.inc|.+\.ipynb$|.+\.json$)
# markdown, yaml, CSS, javascript
- repo: https://github.com/pre-commit/mirrors-prettier
rev: v4.0.0-alpha.8
hooks:
- id: prettier
types_or: [markdown, yaml, css]
# workflow files cannot be modified by pre-commit.ci
exclude: ^(source/3rdparty|\.github/workflows|\.clang-format)
# - repo: https://github.com/pre-commit/mirrors-prettier
# rev: v4.0.0-alpha.8
# hooks:
# - id: prettier
# types_or: [markdown, yaml, css]
# # workflow files cannot be modified by pre-commit.ci
# exclude: ^(source/3rdparty|\.github/workflows|\.clang-format)
# Shell
- repo: https://github.com/scop/pre-commit-shfmt
rev: v3.11.0-1
Expand All @@ -83,25 +83,25 @@ repos:
hooks:
- id: cmake-format
#- id: cmake-lint
- repo: https://github.com/njzjz/mirrors-bibtex-tidy
rev: v1.13.0
hooks:
- id: bibtex-tidy
args:
- --curly
- --numeric
- --align=13
- --blank-lines
# disable sort: the order of keys and fields has explict meanings
#- --sort=key
- --duplicates=key,doi,citation,abstract
- --merge=combine
#- --sort-fields
#- --strip-comments
- --trailing-commas
- --encode-urls
- --remove-empty-fields
- --wrap=80
# - repo: https://github.com/njzjz/mirrors-bibtex-tidy
# rev: v1.13.0
# hooks:
# - id: bibtex-tidy
# args:
# - --curly
# - --numeric
# - --align=13
# - --blank-lines
# # disable sort: the order of keys and fields has explict meanings
# #- --sort=key
# - --duplicates=key,doi,citation,abstract
# - --merge=combine
# #- --sort-fields
# #- --strip-comments
# - --trailing-commas
# - --encode-urls
# - --remove-empty-fields
# - --wrap=80
# license header
- repo: https://github.com/Lucas-C/pre-commit-hooks
rev: v1.5.5
Expand Down
4 changes: 4 additions & 0 deletions deepmd/pd/entrypoints/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@
from deepmd.pd.train.wrapper import (
ModelWrapper,
)
from deepmd.pd.utils import (
env,
)
from deepmd.pd.utils.dataloader import (
DpLoaderSet,
)
Expand Down Expand Up @@ -233,6 +236,7 @@ def train(
output: str = "out.json",
) -> None:
log.info("Configuration path: %s", input_file)
env.CUSTOM_OP_USE_JIT = False
if LOCAL_RANK == 0:
SummaryPrinter()()
with open(input_file) as fin:
Expand Down
11 changes: 9 additions & 2 deletions deepmd/pd/loss/ener.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from deepmd.utils.version import (
check_version_compatibility,
)
import paddle.distributed as dist


def custom_huber_loss(predictions, targets, delta=1.0):
Expand Down Expand Up @@ -205,7 +206,11 @@ def forward(self, input_dict, model, label, natoms, learning_rate, mae=False):
find_energy = label.get("find_energy", 0.0)
pref_e = pref_e * find_energy
if not self.use_l1_all:
l2_ener_loss = paddle.mean(paddle.square(energy_pred - energy_label))

tmp = energy_pred - energy_label
logit = dist.reshard(tmp, tmp.process_mesh, [dist.Replicate()])

l2_ener_loss = paddle.mean(paddle.square(logit))
if not self.inference:
more_loss["l2_ener_loss"] = self.display_if_exist(
l2_ener_loss.detach(), find_energy
Expand Down Expand Up @@ -258,7 +263,8 @@ def forward(self, input_dict, model, label, natoms, learning_rate, mae=False):
force_pred = model_pred["force"]
force_label = label["force"]
diff_f = (force_label - force_pred).reshape([-1])

diff_f = dist.reshard(diff_f, diff_f.process_mesh, [dist.Replicate()])

if self.relative_f is not None:
force_label_3 = force_label.reshape([-1, 3])
norm_f = force_label_3.norm(axis=1, keepdim=True) + self.relative_f
Expand Down Expand Up @@ -354,6 +360,7 @@ def forward(self, input_dict, model, label, natoms, learning_rate, mae=False):
find_virial = label.get("find_virial", 0.0)
pref_v = pref_v * find_virial
diff_v = label["virial"] - model_pred["virial"].reshape([-1, 9])
diff_v = dist.reshard(diff_v, diff_v.process_mesh, [dist.Replicate()])
l2_virial_loss = paddle.mean(paddle.square(diff_v))
Comment on lines +363 to 364
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Same conditional reshard for virial difference.

Align with the pattern above to avoid attribute errors off-mesh.

-            diff_v = dist.reshard(diff_v, diff_v.process_mesh, [dist.Replicate()])
+            if hasattr(diff_v, "process_mesh"):
+                diff_v = dist.reshard(diff_v, diff_v.process_mesh, [dist.Replicate()])
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
diff_v = dist.reshard(diff_v, diff_v.process_mesh, [dist.Replicate()])
l2_virial_loss = paddle.mean(paddle.square(diff_v))
if hasattr(diff_v, "process_mesh"):
diff_v = dist.reshard(diff_v, diff_v.process_mesh, [dist.Replicate()])
l2_virial_loss = paddle.mean(paddle.square(diff_v))
🤖 Prompt for AI Agents
In deepmd/pd/loss/ener.py around lines 362 to 363, the virial diff tensor is
reshared unconditionally causing potential attribute errors when off the process
mesh; follow the pattern used above by checking if diff_v has a process_mesh
attribute (or comparing diff_v.process_mesh against the target) and only call
dist.reshard when needed, then compute l2_virial_loss from the (possibly
reshared) diff_v.

if not self.inference:
more_loss["l2_virial_loss"] = self.display_if_exist(
Expand Down
14 changes: 10 additions & 4 deletions deepmd/pd/model/descriptor/dpa3.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ class DescrptDPA3(BaseDescriptor, paddle.nn.Layer):
Whether to use bias in the type embedding layer.
use_loc_mapping : bool, Optional
Whether to use local atom index mapping in training or non-parallel inference.
Not supported yet in Paddle.
When True, local indexing and mapping are applied to neighbor lists and embeddings during descriptor computation.
type_map : list[str], Optional
A list of strings. Give the name to each type of atoms.

Expand All @@ -117,7 +117,7 @@ def __init__(
seed: Optional[Union[int, list[int]]] = None,
use_econf_tebd: bool = False,
use_tebd_bias: bool = False,
use_loc_mapping: bool = False,
use_loc_mapping: bool = True,
type_map: Optional[list[str]] = None,
) -> None:
super().__init__()
Expand Down Expand Up @@ -160,6 +160,8 @@ def init_subclass_params(sub_data, sub_class):
fix_stat_std=self.repflow_args.fix_stat_std,
optim_update=self.repflow_args.optim_update,
smooth_edge_update=self.repflow_args.smooth_edge_update,
edge_init_use_dist=self.repflow_args.edge_init_use_dist,
use_exp_switch=self.repflow_args.use_exp_switch,
use_dynamic_sel=self.repflow_args.use_dynamic_sel,
sel_reduce_factor=self.repflow_args.sel_reduce_factor,
use_loc_mapping=use_loc_mapping,
Expand All @@ -170,8 +172,8 @@ def init_subclass_params(sub_data, sub_class):
)

self.use_econf_tebd = use_econf_tebd
self.use_tebd_bias = use_tebd_bias
self.use_loc_mapping = use_loc_mapping
self.use_tebd_bias = use_tebd_bias
self.type_map = type_map
self.tebd_dim = self.repflow_args.n_dim
self.type_embedding = TypeEmbedNet(
Expand Down Expand Up @@ -487,12 +489,16 @@ def forward(
The smooth switch function. shape: nf x nloc x nnei

"""
parallel_mode = comm_dict is not None
# cast the input to internal precsion
extended_coord = extended_coord.to(dtype=self.prec)
nframes, nloc, nnei = nlist.shape
nall = extended_coord.reshape([nframes, -1]).shape[1] // 3

node_ebd_ext = self.type_embedding(extended_atype)
if not parallel_mode and self.use_loc_mapping:
node_ebd_ext = self.type_embedding(extended_atype[:, :nloc])
else:
node_ebd_ext = self.type_embedding(extended_atype)
node_ebd_inp = node_ebd_ext[:, :nloc, :]
# repflows
node_ebd, edge_ebd, h2, rot_mat, sw = self.repflows(
Expand Down
14 changes: 12 additions & 2 deletions deepmd/pd/model/descriptor/env_mat.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import paddle

from deepmd.pd.utils.preprocess import (
compute_exp_sw,
compute_smooth_weight,
)

Expand All @@ -14,6 +15,7 @@ def _make_env_mat(
ruct_smth: float,
radial_only: bool = False,
protection: float = 0.0,
use_exp_switch: bool = False,
):
"""Make smooth environment matrix."""
bsz, natoms, nnei = nlist.shape
Expand All @@ -24,15 +26,20 @@ def _make_env_mat(
nlist = paddle.where(mask, nlist, nall - 1)
coord_l = coord[:, :natoms].reshape([bsz, -1, 1, 3])
index = nlist.reshape([bsz, -1]).unsqueeze(-1).expand([-1, -1, 3])
coord_r = paddle.take_along_axis(coord, axis=1, indices=index)
coord_pad = paddle.concat([coord, coord[:, -1:, :] + rcut], axis=1)
coord_r = paddle.take_along_axis(coord_pad, axis=1, indices=index)
coord_r = coord_r.reshape([bsz, natoms, nnei, 3])
Comment on lines 26 to 31
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Padded sentinel is never indexed; nlist still redirects invalid entries to nall-1.

You append one extra coordinate (coord_pad has length nall+1), but invalid nlist entries are replaced with nall - 1, which points to the last real atom, not the sentinel at index nall. This makes the padding ineffective.

-    nlist = paddle.where(mask, nlist, nall - 1)
+    # Redirect masked neighbors to the padded sentinel at index nall
+    nlist = paddle.where(mask, nlist, paddle.full_like(nlist, nall))
     coord_l = coord[:, :natoms].reshape([bsz, -1, 1, 3])
     index = nlist.reshape([bsz, -1]).unsqueeze(-1).expand([-1, -1, 3])
-    coord_pad = paddle.concat([coord, coord[:, -1:, :] + rcut], axis=1)
+    coord_pad = paddle.concat([coord, coord[:, -1:, :] + rcut], axis=1)
     coord_r = paddle.take_along_axis(coord_pad, axis=1, indices=index)

Note: The specific sentinel value is irrelevant because weight and diff are masked; the key is to avoid out-of-bounds and to keep gradients defined.

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
nlist = paddle.where(mask, nlist, nall - 1)
coord_l = coord[:, :natoms].reshape([bsz, -1, 1, 3])
index = nlist.reshape([bsz, -1]).unsqueeze(-1).expand([-1, -1, 3])
coord_r = paddle.take_along_axis(coord, axis=1, indices=index)
coord_pad = paddle.concat([coord, coord[:, -1:, :] + rcut], axis=1)
coord_r = paddle.take_along_axis(coord_pad, axis=1, indices=index)
coord_r = coord_r.reshape([bsz, natoms, nnei, 3])
# Redirect masked neighbors to the padded sentinel at index nall
nlist = paddle.where(mask, nlist, paddle.full_like(nlist, nall))
coord_l = coord[:, :natoms].reshape([bsz, -1, 1, 3])
index = nlist.reshape([bsz, -1]).unsqueeze(-1).expand([-1, -1, 3])
coord_pad = paddle.concat([coord, coord[:, -1:, :] + rcut], axis=1)
coord_r = paddle.take_along_axis(coord_pad, axis=1, indices=index)
coord_r = coord_r.reshape([bsz, natoms, nnei, 3])

diff = coord_r - coord_l
length = paddle.linalg.norm(diff, axis=-1, keepdim=True)
# for index 0 nloc atom
length = length + (~mask.unsqueeze(-1)).astype(length.dtype)
t0 = 1 / (length + protection)
t1 = diff / (length + protection) ** 2
weight = compute_smooth_weight(length, ruct_smth, rcut)
weight = (
compute_smooth_weight(length, ruct_smth, rcut)
if not use_exp_switch
else compute_exp_sw(length, ruct_smth, rcut)
)
weight = weight * mask.unsqueeze(-1).astype(weight.dtype)
if radial_only:
env_mat = t0 * weight
Expand All @@ -51,6 +58,7 @@ def prod_env_mat(
rcut_smth: float,
radial_only: bool = False,
protection: float = 0.0,
use_exp_switch: bool = False,
):
"""Generate smooth environment matrix from atom coordinates and other context.

Expand All @@ -63,6 +71,7 @@ def prod_env_mat(
- rcut_smth: Smooth hyper-parameter for pair force & energy.
- radial_only: Whether to return a full description or a radial-only descriptor.
- protection: Protection parameter to prevent division by zero errors during calculations.
- use_exp_switch: Whether to use the exponential switch function.

Returns
-------
Expand All @@ -75,6 +84,7 @@ def prod_env_mat(
rcut_smth,
radial_only,
protection=protection,
use_exp_switch=use_exp_switch,
) # shape [n_atom, dim, 4 or 1]
t_avg = mean[atype] # [n_atom, dim, 4 or 1]
t_std = stddev[atype] # [n_atom, dim, 4 or 1]
Expand Down
Loading