Skip to content

Commit 6cf7544

Browse files
Merge devel into master (#2772)
2 parents 53a1078 + 835d6e5 commit 6cf7544

File tree

102 files changed

+1277
-916
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

102 files changed

+1277
-916
lines changed

.github/workflows/build_wheel.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ jobs:
4747
name: Setup QEMU
4848
if: matrix.platform_id == 'manylinux_aarch64'
4949
- name: Build wheels
50-
uses: pypa/cibuildwheel@v2.14
50+
uses: pypa/cibuildwheel@v2.15
5151
env:
5252
CIBW_BUILD_VERBOSITY: 1
5353
CIBW_ARCHS: all

.pre-commit-config.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ repos:
99
- id: end-of-file-fixer
1010
exclude: "^.+\\.pbtxt$"
1111
- id: check-yaml
12-
#- id: check-json
12+
- id: check-json
1313
- id: check-added-large-files
1414
args: ['--maxkb=1024', '--enforce-all']
1515
# TODO: remove the following after resolved
@@ -33,7 +33,7 @@ repos:
3333
files: \.py$
3434
- repo: https://github.com/astral-sh/ruff-pre-commit
3535
# Ruff version.
36-
rev: v0.0.280
36+
rev: v0.0.286
3737
hooks:
3838
- id: ruff
3939
args: ["--fix"]
@@ -45,7 +45,7 @@ repos:
4545
args: ["--write"]
4646
# Python inside docs
4747
- repo: https://github.com/asottile/blacken-docs
48-
rev: 1.15.0
48+
rev: 1.16.0
4949
hooks:
5050
- id: blacken-docs
5151
# C++
@@ -72,7 +72,7 @@ repos:
7272
#- id: cmake-lint
7373
# license header
7474
- repo: https://github.com/Lucas-C/pre-commit-hooks
75-
rev: v1.5.1
75+
rev: v1.5.4
7676
hooks:
7777
# C++, js
7878
- id: insert-license

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ A full [document](doc/train/train-input-auto.rst) on options in the training inp
102102
- [Descriptor `"se_e2_r"`](doc/model/train-se-e2-r.md)
103103
- [Descriptor `"se_e3"`](doc/model/train-se-e3.md)
104104
- [Descriptor `"se_atten"`](doc/model/train-se-atten.md)
105+
- [Descriptor `"se_atten_v2"`](doc/model/train-se-atten.md#descriptor-se_atten_v2)
105106
- [Descriptor `"hybrid"`](doc/model/train-hybrid.md)
106107
- [Descriptor `sel`](doc/model/sel.md)
107108
- [Fit energy](doc/model/train-energy.md)

deepmd/descriptor/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@
2424
from .se_atten import (
2525
DescrptSeAtten,
2626
)
27+
from .se_atten_v2 import (
28+
DescrptSeAttenV2,
29+
)
2730
from .se_r import (
2831
DescrptSeR,
2932
)
@@ -41,6 +44,7 @@
4144
"DescrptSeAEfLower",
4245
"DescrptSeAMask",
4346
"DescrptSeAtten",
47+
"DescrptSeAttenV2",
4448
"DescrptSeR",
4549
"DescrptSeT",
4650
]

deepmd/descriptor/se_atten.py

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,13 @@ class DescrptSeAtten(DescrptSeA):
108108
Whether to mask the diagonal in the attention weights.
109109
multi_task
110110
If the model has multi fitting nets to train.
111+
stripped_type_embedding
112+
Whether to strip the type embedding into a separated embedding network.
113+
Default value will be True in `se_atten_v2` descriptor.
114+
smooth_type_embdding
115+
When using stripped type embedding, whether to dot smooth factor on the network output of type embedding
116+
to keep the network smooth, instead of setting `set_davg_zero` to be True.
117+
Default value will be True in `se_atten_v2` descriptor.
111118
"""
112119

113120
def __init__(
@@ -133,9 +140,10 @@ def __init__(
133140
attn_mask: bool = False,
134141
multi_task: bool = False,
135142
stripped_type_embedding: bool = False,
143+
smooth_type_embdding: bool = False,
136144
**kwargs,
137145
) -> None:
138-
if not set_davg_zero:
146+
if not set_davg_zero and not (stripped_type_embedding and smooth_type_embdding):
139147
warnings.warn(
140148
"Set 'set_davg_zero' False in descriptor 'se_atten' "
141149
"may cause unexpected incontinuity during model inference!"
@@ -166,6 +174,7 @@ def __init__(
166174
"2"
167175
), "se_atten only support tensorflow version 2.0 or higher."
168176
self.stripped_type_embedding = stripped_type_embedding
177+
self.smooth = smooth_type_embdding
169178
self.ntypes = ntypes
170179
self.att_n = attn
171180
self.attn_layer = attn_layer
@@ -607,6 +616,7 @@ def build(
607616
sel_a=self.sel_all_a,
608617
sel_r=self.sel_all_r,
609618
)
619+
610620
self.nei_type_vec = tf.reshape(self.nei_type_vec, [-1])
611621
self.nmask = tf.cast(
612622
tf.reshape(self.nmask, [-1, 1, self.sel_all_a[0]]),
@@ -625,6 +635,41 @@ def build(
625635
tf.slice(atype, [0, 0], [-1, natoms[0]]), [-1]
626636
) ## lammps will have error without this
627637
self._identity_tensors(suffix=suffix)
638+
if self.smooth:
639+
self.sliced_avg = tf.reshape(
640+
tf.slice(
641+
tf.reshape(self.t_avg, [self.ntypes, -1, 4]), [0, 0, 0], [-1, 1, 1]
642+
),
643+
[self.ntypes, 1],
644+
)
645+
self.sliced_std = tf.reshape(
646+
tf.slice(
647+
tf.reshape(self.t_std, [self.ntypes, -1, 4]), [0, 0, 0], [-1, 1, 1]
648+
),
649+
[self.ntypes, 1],
650+
)
651+
self.avg_looked_up = tf.reshape(
652+
tf.nn.embedding_lookup(self.sliced_avg, self.atype_nloc),
653+
[-1, natoms[0], 1],
654+
)
655+
self.std_looked_up = tf.reshape(
656+
tf.nn.embedding_lookup(self.sliced_std, self.atype_nloc),
657+
[-1, natoms[0], 1],
658+
)
659+
self.recovered_r = (
660+
tf.reshape(
661+
tf.slice(tf.reshape(self.descrpt, [-1, 4]), [0, 0], [-1, 1]),
662+
[-1, natoms[0], self.sel_all_a[0]],
663+
)
664+
* self.std_looked_up
665+
+ self.avg_looked_up
666+
)
667+
uu = 1 - self.rcut_r_smth * self.recovered_r
668+
self.recovered_switch = -uu * uu * uu + 1
669+
self.recovered_switch = tf.clip_by_value(self.recovered_switch, 0.0, 1.0)
670+
self.recovered_switch = tf.cast(
671+
self.recovered_switch, self.filter_precision
672+
)
628673

629674
self.dout, self.qmat = self._pass_filter(
630675
self.descrpt_reshape,
@@ -1146,9 +1191,10 @@ def _filter_lower(
11461191
two_embd = tf.nn.embedding_lookup(
11471192
embedding_of_two_side_type_embedding, index_of_two_side
11481193
)
1149-
1194+
if self.smooth:
1195+
two_embd = two_embd * tf.reshape(self.recovered_switch, [-1, 1])
11501196
if not self.compress:
1151-
xyz_scatter = xyz_scatter * two_embd + two_embd
1197+
xyz_scatter = xyz_scatter * two_embd + xyz_scatter
11521198
else:
11531199
return op_module.tabulate_fusion_se_atten(
11541200
tf.cast(self.table.data[net], self.filter_precision),

deepmd/descriptor/se_atten_v2.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
# SPDX-License-Identifier: LGPL-3.0-or-later
2+
import logging
3+
from typing import (
4+
List,
5+
Optional,
6+
)
7+
8+
from .descriptor import (
9+
Descriptor,
10+
)
11+
from .se_atten import (
12+
DescrptSeAtten,
13+
)
14+
15+
log = logging.getLogger(__name__)
16+
17+
18+
@Descriptor.register("se_atten_v2")
19+
class DescrptSeAttenV2(DescrptSeAtten):
20+
r"""Smooth version 2.0 descriptor with attention.
21+
22+
Parameters
23+
----------
24+
rcut
25+
The cut-off radius :math:`r_c`
26+
rcut_smth
27+
From where the environment matrix should be smoothed :math:`r_s`
28+
sel : list[str]
29+
sel[i] specifies the maxmum number of type i atoms in the cut-off radius
30+
neuron : list[int]
31+
Number of neurons in each hidden layers of the embedding net :math:`\mathcal{N}`
32+
axis_neuron
33+
Number of the axis neuron :math:`M_2` (number of columns of the sub-matrix of the embedding matrix)
34+
resnet_dt
35+
Time-step `dt` in the resnet construction:
36+
y = x + dt * \phi (Wx + b)
37+
trainable
38+
If the weights of embedding net are trainable.
39+
seed
40+
Random seed for initializing the network parameters.
41+
type_one_side
42+
Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets
43+
exclude_types : List[List[int]]
44+
The excluded pairs of types which have no interaction with each other.
45+
For example, `[[0, 1]]` means no interaction between type 0 and type 1.
46+
set_davg_zero
47+
Set the shift of embedding net input to zero.
48+
activation_function
49+
The activation function in the embedding net. Supported options are |ACTIVATION_FN|
50+
precision
51+
The precision of the embedding net parameters. Supported options are |PRECISION|
52+
uniform_seed
53+
Only for the purpose of backward compatibility, retrieves the old behavior of using the random seed
54+
attn
55+
The length of hidden vector during scale-dot attention computation.
56+
attn_layer
57+
The number of layers in attention mechanism.
58+
attn_dotr
59+
Whether to dot the relative coordinates on the attention weights as a gated scheme.
60+
attn_mask
61+
Whether to mask the diagonal in the attention weights.
62+
multi_task
63+
If the model has multi fitting nets to train.
64+
"""
65+
66+
def __init__(
67+
self,
68+
rcut: float,
69+
rcut_smth: float,
70+
sel: int,
71+
ntypes: int,
72+
neuron: List[int] = [24, 48, 96],
73+
axis_neuron: int = 8,
74+
resnet_dt: bool = False,
75+
trainable: bool = True,
76+
seed: Optional[int] = None,
77+
type_one_side: bool = True,
78+
set_davg_zero: bool = False,
79+
exclude_types: List[List[int]] = [],
80+
activation_function: str = "tanh",
81+
precision: str = "default",
82+
uniform_seed: bool = False,
83+
attn: int = 128,
84+
attn_layer: int = 2,
85+
attn_dotr: bool = True,
86+
attn_mask: bool = False,
87+
multi_task: bool = False,
88+
**kwargs,
89+
) -> None:
90+
DescrptSeAtten.__init__(
91+
self,
92+
rcut,
93+
rcut_smth,
94+
sel,
95+
ntypes,
96+
neuron=neuron,
97+
axis_neuron=axis_neuron,
98+
resnet_dt=resnet_dt,
99+
trainable=trainable,
100+
seed=seed,
101+
type_one_side=type_one_side,
102+
set_davg_zero=set_davg_zero,
103+
exclude_types=exclude_types,
104+
activation_function=activation_function,
105+
precision=precision,
106+
uniform_seed=uniform_seed,
107+
attn=attn,
108+
attn_layer=attn_layer,
109+
attn_dotr=attn_dotr,
110+
attn_mask=attn_mask,
111+
multi_task=multi_task,
112+
stripped_type_embedding=True,
113+
smooth_type_embdding=True,
114+
**kwargs,
115+
)

deepmd/entrypoints/test.py

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def test(
7171
set_prefix : str
7272
string prefix of set
7373
numb_test : int
74-
munber of tests to do
74+
munber of tests to do. 0 means all data.
7575
rand_seed : Optional[int]
7676
seed for random generator
7777
shuffle_test : bool
@@ -88,6 +88,9 @@ def test(
8888
RuntimeError
8989
if no valid system was found
9090
"""
91+
if numb_test == 0:
92+
# only float has inf, but should work for min
93+
numb_test = float("inf")
9194
if datafile is not None:
9295
datalist = open(datafile)
9396
all_sys = datalist.read().splitlines()
@@ -934,18 +937,40 @@ def test_dipole(
934937

935938
if detail_file is not None:
936939
detail_path = Path(detail_file)
940+
if not atomic:
941+
pe = np.concatenate(
942+
(
943+
np.reshape(test_data["dipole"][:numb_test], [-1, 3]),
944+
np.reshape(dipole, [-1, 3]),
945+
),
946+
axis=1,
947+
)
948+
header_text = "data_x data_y data_z pred_x pred_y pred_z"
949+
else:
950+
pe = np.concatenate(
951+
(
952+
np.reshape(
953+
test_data["atomic_dipole"][:numb_test], [-1, 3 * sel_natoms]
954+
),
955+
np.reshape(dipole, [-1, 3 * sel_natoms]),
956+
),
957+
axis=1,
958+
)
959+
header_text = [
960+
f"{letter}{number}"
961+
for number in range(1, sel_natoms + 1)
962+
for letter in ["data_x", "data_y", "data_z"]
963+
] + [
964+
f"{letter}{number}"
965+
for number in range(1, sel_natoms + 1)
966+
for letter in ["pred_x", "pred_y", "pred_z"]
967+
]
968+
header_text = " ".join(header_text)
937969

938-
pe = np.concatenate(
939-
(
940-
np.reshape(test_data["dipole"][:numb_test], [-1, 3]),
941-
np.reshape(dipole, [-1, 3]),
942-
),
943-
axis=1,
944-
)
945970
np.savetxt(
946971
detail_path.with_suffix(".out"),
947972
pe,
948-
header="data_x data_y data_z pred_x pred_y pred_z",
973+
header=header_text,
949974
)
950975
return {"rmse": (rmse_f, dipole.size)}
951976

deepmd/entrypoints/train.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,7 @@ def get_min_nbor_dist(jdata, rcut):
445445

446446

447447
def parse_auto_sel(sel):
448-
if type(sel) is not str:
448+
if not isinstance(sel, str):
449449
return False
450450
words = sel.split(":")
451451
if words[0] == "auto":
@@ -476,7 +476,15 @@ def update_one_sel(jdata, descriptor):
476476
if descriptor["type"] == "loc_frame":
477477
return descriptor
478478
rcut = descriptor["rcut"]
479-
tmp_sel = get_sel(jdata, rcut, one_type=descriptor["type"] in ("se_atten",))
479+
tmp_sel = get_sel(
480+
jdata,
481+
rcut,
482+
one_type=descriptor["type"]
483+
in (
484+
"se_atten",
485+
"se_atten_v2",
486+
),
487+
)
480488
sel = descriptor["sel"]
481489
if isinstance(sel, int):
482490
# convert to list and finnally convert back to int
@@ -495,7 +503,10 @@ def update_one_sel(jdata, descriptor):
495503
"not less than %d, but you set it to %d. The accuracy"
496504
" of your model may get worse." % (ii, tt, dd)
497505
)
498-
if descriptor["type"] in ("se_atten",):
506+
if descriptor["type"] in (
507+
"se_atten",
508+
"se_atten_v2",
509+
):
499510
descriptor["sel"] = sel = sum(sel)
500511
return descriptor
501512

0 commit comments

Comments
 (0)