deepmodeling
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎deepmd/descriptor/__init__.py‎
Lines changed: 4 additions & 0 deletions b/‎deepmd/descriptor/__init__.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎deepmd/descriptor/se_atten.py‎
Lines changed: 49 additions & 3 deletions b/‎deepmd/descriptor/se_atten.py‎
Lines changed: 49 additions & 3 deletions
diff --git a/‎deepmd/descriptor/se_atten_v2.py‎
Lines changed: 115 additions & 0 deletions b/‎deepmd/descriptor/se_atten_v2.py‎
Lines changed: 115 additions & 0 deletions
diff --git a/‎deepmd/entrypoints/train.py‎
Lines changed: 13 additions & 2 deletions b/‎deepmd/entrypoints/train.py‎
Lines changed: 13 additions & 2 deletions
diff --git a/‎deepmd/utils/argcheck.py‎
Lines changed: 41 additions & 14 deletions b/‎deepmd/utils/argcheck.py‎
Lines changed: 41 additions & 14 deletions
diff --git a/‎deepmd/utils/finetune.py‎
Lines changed: 3 additions & 2 deletions b/‎deepmd/utils/finetune.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎doc/credits.rst‎
Lines changed: 1 addition & 1 deletion b/‎doc/credits.rst‎
Lines changed: 1 addition & 1 deletion
@@ -102,6 +102,7 @@ A full [document](doc/train/train-input-auto.rst) on options in the training inp
     - [Descriptor `"se_e2_r"`](doc/model/train-se-e2-r.md)
     - [Descriptor `"se_e3"`](doc/model/train-se-e3.md)
     - [Descriptor `"se_atten"`](doc/model/train-se-atten.md)
+    - [Descriptor `"se_atten_v2"`](doc/model/train-se-atten.md#descriptor-se_atten_v2)
     - [Descriptor `"hybrid"`](doc/model/train-hybrid.md)
     - [Descriptor `sel`](doc/model/sel.md)
     - [Fit energy](doc/model/train-energy.md)
 
@@ -24,6 +24,9 @@
 from .se_atten import (
     DescrptSeAtten,
 )
+from .se_atten_v2 import (
+    DescrptSeAttenV2,
+)
 from .se_r import (
     DescrptSeR,
 )
@@ -41,6 +44,7 @@
     "DescrptSeAEfLower",
     "DescrptSeAMask",
     "DescrptSeAtten",
+    "DescrptSeAttenV2",
     "DescrptSeR",
     "DescrptSeT",
 ]
@@ -108,6 +108,13 @@ class DescrptSeAtten(DescrptSeA):
             Whether to mask the diagonal in the attention weights.
     multi_task
             If the model has multi fitting nets to train.
+    stripped_type_embedding
+            Whether to strip the type embedding into a separated embedding network.
+            Default value will be True in `se_atten_v2` descriptor.
+    smooth_type_embdding
+            When using stripped type embedding, whether to dot smooth factor on the network output of type embedding
+            to keep the network smooth, instead of setting `set_davg_zero` to be True.
+            Default value will be True in `se_atten_v2` descriptor.
     """
 
     def __init__(
@@ -133,9 +140,10 @@ def __init__(
         attn_mask: bool = False,
         multi_task: bool = False,
         stripped_type_embedding: bool = False,
+        smooth_type_embdding: bool = False,
         **kwargs,
     ) -> None:
-        if not set_davg_zero:
+        if not set_davg_zero and not (stripped_type_embedding and smooth_type_embdding):
             warnings.warn(
                 "Set 'set_davg_zero' False in descriptor 'se_atten' "
                 "may cause unexpected incontinuity during model inference!"
@@ -166,6 +174,7 @@ def __init__(
                 "2"
             ), "se_atten only support tensorflow version 2.0 or higher."
         self.stripped_type_embedding = stripped_type_embedding
+        self.smooth = smooth_type_embdding
         self.ntypes = ntypes
         self.att_n = attn
         self.attn_layer = attn_layer
@@ -607,6 +616,7 @@ def build(
             sel_a=self.sel_all_a,
             sel_r=self.sel_all_r,
         )
+
         self.nei_type_vec = tf.reshape(self.nei_type_vec, [-1])
         self.nmask = tf.cast(
             tf.reshape(self.nmask, [-1, 1, self.sel_all_a[0]]),
@@ -625,6 +635,41 @@ def build(
             tf.slice(atype, [0, 0], [-1, natoms[0]]), [-1]
         )  ## lammps will have error without this
         self._identity_tensors(suffix=suffix)
+        if self.smooth:
+            self.sliced_avg = tf.reshape(
+                tf.slice(
+                    tf.reshape(self.t_avg, [self.ntypes, -1, 4]), [0, 0, 0], [-1, 1, 1]
+                ),
+                [self.ntypes, 1],
+            )
+            self.sliced_std = tf.reshape(
+                tf.slice(
+                    tf.reshape(self.t_std, [self.ntypes, -1, 4]), [0, 0, 0], [-1, 1, 1]
+                ),
+                [self.ntypes, 1],
+            )
+            self.avg_looked_up = tf.reshape(
+                tf.nn.embedding_lookup(self.sliced_avg, self.atype_nloc),
+                [-1, natoms[0], 1],
+            )
+            self.std_looked_up = tf.reshape(
+                tf.nn.embedding_lookup(self.sliced_std, self.atype_nloc),
+                [-1, natoms[0], 1],
+            )
+            self.recovered_r = (
+                tf.reshape(
+                    tf.slice(tf.reshape(self.descrpt, [-1, 4]), [0, 0], [-1, 1]),
+                    [-1, natoms[0], self.sel_all_a[0]],
+                )
+                * self.std_looked_up
+                + self.avg_looked_up
+            )
+            uu = 1 - self.rcut_r_smth * self.recovered_r
+            self.recovered_switch = -uu * uu * uu + 1
+            self.recovered_switch = tf.clip_by_value(self.recovered_switch, 0.0, 1.0)
+            self.recovered_switch = tf.cast(
+                self.recovered_switch, self.filter_precision
+            )
 
         self.dout, self.qmat = self._pass_filter(
             self.descrpt_reshape,
@@ -1146,9 +1191,10 @@ def _filter_lower(
                         two_embd = tf.nn.embedding_lookup(
                             embedding_of_two_side_type_embedding, index_of_two_side
                         )
-
+                    if self.smooth:
+                        two_embd = two_embd * tf.reshape(self.recovered_switch, [-1, 1])
                     if not self.compress:
-                        xyz_scatter = xyz_scatter * two_embd + two_embd
+                        xyz_scatter = xyz_scatter * two_embd + xyz_scatter
                     else:
                         return op_module.tabulate_fusion_se_atten(
                             tf.cast(self.table.data[net], self.filter_precision),
 
@@ -0,0 +1,115 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
+from typing import (
+    List,
+    Optional,
+)
+
+from .descriptor import (
+    Descriptor,
+)
+from .se_atten import (
+    DescrptSeAtten,
+)
+
+log = logging.getLogger(__name__)
+
+
+@Descriptor.register("se_atten_v2")
+class DescrptSeAttenV2(DescrptSeAtten):
+    r"""Smooth version 2.0 descriptor with attention.
+
+    Parameters
+    ----------
+    rcut
+            The cut-off radius :math:`r_c`
+    rcut_smth
+            From where the environment matrix should be smoothed :math:`r_s`
+    sel : list[str]
+            sel[i] specifies the maxmum number of type i atoms in the cut-off radius
+    neuron : list[int]
+            Number of neurons in each hidden layers of the embedding net :math:`\mathcal{N}`
+    axis_neuron
+            Number of the axis neuron :math:`M_2` (number of columns of the sub-matrix of the embedding matrix)
+    resnet_dt
+            Time-step `dt` in the resnet construction:
+            y = x + dt * \phi (Wx + b)
+    trainable
+            If the weights of embedding net are trainable.
+    seed
+            Random seed for initializing the network parameters.
+    type_one_side
+            Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets
+    exclude_types : List[List[int]]
+            The excluded pairs of types which have no interaction with each other.
+            For example, `[[0, 1]]` means no interaction between type 0 and type 1.
+    set_davg_zero
+            Set the shift of embedding net input to zero.
+    activation_function
+            The activation function in the embedding net. Supported options are |ACTIVATION_FN|
+    precision
+            The precision of the embedding net parameters. Supported options are |PRECISION|
+    uniform_seed
+            Only for the purpose of backward compatibility, retrieves the old behavior of using the random seed
+    attn
+            The length of hidden vector during scale-dot attention computation.
+    attn_layer
+            The number of layers in attention mechanism.
+    attn_dotr
+            Whether to dot the relative coordinates on the attention weights as a gated scheme.
+    attn_mask
+            Whether to mask the diagonal in the attention weights.
+    multi_task
+            If the model has multi fitting nets to train.
+    """
+
+    def __init__(
+        self,
+        rcut: float,
+        rcut_smth: float,
+        sel: int,
+        ntypes: int,
+        neuron: List[int] = [24, 48, 96],
+        axis_neuron: int = 8,
+        resnet_dt: bool = False,
+        trainable: bool = True,
+        seed: Optional[int] = None,
+        type_one_side: bool = True,
+        set_davg_zero: bool = False,
+        exclude_types: List[List[int]] = [],
+        activation_function: str = "tanh",
+        precision: str = "default",
+        uniform_seed: bool = False,
+        attn: int = 128,
+        attn_layer: int = 2,
+        attn_dotr: bool = True,
+        attn_mask: bool = False,
+        multi_task: bool = False,
+        **kwargs,
+    ) -> None:
+        DescrptSeAtten.__init__(
+            self,
+            rcut,
+            rcut_smth,
+            sel,
+            ntypes,
+            neuron=neuron,
+            axis_neuron=axis_neuron,
+            resnet_dt=resnet_dt,
+            trainable=trainable,
+            seed=seed,
+            type_one_side=type_one_side,
+            set_davg_zero=set_davg_zero,
+            exclude_types=exclude_types,
+            activation_function=activation_function,
+            precision=precision,
+            uniform_seed=uniform_seed,
+            attn=attn,
+            attn_layer=attn_layer,
+            attn_dotr=attn_dotr,
+            attn_mask=attn_mask,
+            multi_task=multi_task,
+            stripped_type_embedding=True,
+            smooth_type_embdding=True,
+            **kwargs,
+        )
@@ -476,7 +476,15 @@ def update_one_sel(jdata, descriptor):
     if descriptor["type"] == "loc_frame":
         return descriptor
     rcut = descriptor["rcut"]
-    tmp_sel = get_sel(jdata, rcut, one_type=descriptor["type"] in ("se_atten",))
+    tmp_sel = get_sel(
+        jdata,
+        rcut,
+        one_type=descriptor["type"]
+        in (
+            "se_atten",
+            "se_atten_v2",
+        ),
+    )
     sel = descriptor["sel"]
     if isinstance(sel, int):
         # convert to list and finnally convert back to int
@@ -495,7 +503,10 @@ def update_one_sel(jdata, descriptor):
                     "not less than %d, but you set it to %d. The accuracy"
                     " of your model may get worse." % (ii, tt, dd)
                 )
-    if descriptor["type"] in ("se_atten",):
+    if descriptor["type"] in (
+        "se_atten",
+        "se_atten_v2",
+    ):
         descriptor["sel"] = sel = sum(sel)
     return descriptor
 
 
@@ -333,9 +333,7 @@ def descrpt_hybrid_args():
     ]
 
 
-@descrpt_args_plugin.register("se_atten")
-def descrpt_se_atten_args():
-    doc_stripped_type_embedding = "Whether to strip the type embedding into a separated embedding network. Setting it to `False` will fall back to the previous version of `se_atten` which is non-compressible."
+def descrpt_se_atten_common_args():
     doc_sel = 'This parameter set the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\
     - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
     - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\
@@ -350,21 +348,13 @@ def descrpt_se_atten_args():
     doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
     doc_trainable = "If the parameters in the embedding net is trainable"
     doc_seed = "Random seed for parameter initialization"
-    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `se_atten` descriptor or `atom_ener` in the energy fitting is used"
     doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
     doc_attn = "The length of hidden vectors in attention layers"
     doc_attn_layer = "The number of attention layers. Note that model compression of `se_atten` is only enabled when attn_layer==0 and stripped_type_embedding is True"
     doc_attn_dotr = "Whether to do dot product with the normalized relative coordinates"
     doc_attn_mask = "Whether to do mask on the diagonal in the attention matrix"
 
     return [
-        Argument(
-            "stripped_type_embedding",
-            bool,
-            optional=True,
-            default=False,
-            doc=doc_stripped_type_embedding,
-        ),
         Argument("sel", [int, list, str], optional=True, default="auto", doc=doc_sel),
         Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
         Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
@@ -394,16 +384,51 @@ def descrpt_se_atten_args():
         Argument(
             "exclude_types", list, optional=True, default=[], doc=doc_exclude_types
         ),
-        Argument(
-            "set_davg_zero", bool, optional=True, default=True, doc=doc_set_davg_zero
-        ),
         Argument("attn", int, optional=True, default=128, doc=doc_attn),
         Argument("attn_layer", int, optional=True, default=2, doc=doc_attn_layer),
         Argument("attn_dotr", bool, optional=True, default=True, doc=doc_attn_dotr),
         Argument("attn_mask", bool, optional=True, default=False, doc=doc_attn_mask),
     ]
 
 
+@descrpt_args_plugin.register("se_atten")
+def descrpt_se_atten_args():
+    doc_stripped_type_embedding = "Whether to strip the type embedding into a separated embedding network. Setting it to `False` will fall back to the previous version of `se_atten` which is non-compressible."
+    doc_smooth_type_embdding = "When using stripped type embedding, whether to dot smooth factor on the network output of type embedding to keep the network smooth, instead of setting `set_davg_zero` to be True."
+    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `se_atten` descriptor or `atom_ener` in the energy fitting is used"
+
+    return descrpt_se_atten_common_args() + [
+        Argument(
+            "stripped_type_embedding",
+            bool,
+            optional=True,
+            default=False,
+            doc=doc_stripped_type_embedding,
+        ),
+        Argument(
+            "smooth_type_embdding",
+            bool,
+            optional=True,
+            default=False,
+            doc=doc_smooth_type_embdding,
+        ),
+        Argument(
+            "set_davg_zero", bool, optional=True, default=True, doc=doc_set_davg_zero
+        ),
+    ]
+
+
+@descrpt_args_plugin.register("se_atten_v2")
+def descrpt_se_atten_v2_args():
+    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `se_atten` descriptor or `atom_ener` in the energy fitting is used"
+
+    return descrpt_se_atten_common_args() + [
+        Argument(
+            "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero
+        ),
+    ]
+
+
 @descrpt_args_plugin.register("se_a_mask")
 def descrpt_se_a_mask_args():
     doc_sel = 'This parameter sets the number of selected neighbors for each type of atom. It can be:\n\n\
@@ -459,13 +484,15 @@ def descrpt_variant_type_args(exclude_hybrid: bool = False) -> Variant:
     link_se_a_tpe = make_link("se_a_tpe", "model/descriptor[se_a_tpe]")
     link_hybrid = make_link("hybrid", "model/descriptor[hybrid]")
     link_se_atten = make_link("se_atten", "model/descriptor[se_atten]")
+    link_se_atten_v2 = make_link("se_atten_v2", "model/descriptor[se_atten_v2]")
     doc_descrpt_type = "The type of the descritpor. See explanation below. \n\n\
 - `loc_frame`: Defines a local frame at each atom, and the compute the descriptor as local coordinates under this frame.\n\n\
 - `se_e2_a`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor.\n\n\
 - `se_e2_r`: Used by the smooth edition of Deep Potential. Only the distance between atoms is used to construct the descriptor.\n\n\
 - `se_e3`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Three-body embedding will be used by this descriptor.\n\n\
 - `se_a_tpe`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Type embedding will be used by this descriptor.\n\n\
 - `se_atten`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism will be used by this descriptor.\n\n\
+- `se_atten_v2`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism with new modifications will be used by this descriptor.\n\n\
 - `se_a_mask`: Used by the smooth edition of Deep Potential. It can accept a variable number of atoms in a frame (Non-PBC system). *aparam* are required as an indicator matrix for the real/virtual sign of input atoms. \n\n\
 - `hybrid`: Concatenate of a list of descriptors as a new descriptor."
 
 
@@ -42,10 +42,11 @@ def replace_model_params_with_pretrained_model(
 
     # Check the model type
     assert pretrained_jdata["model"]["descriptor"]["type"] in [
-        "se_atten"
+        "se_atten",
+        "se_atten_v2",
     ] and pretrained_jdata["model"]["fitting_net"]["type"] in [
         "ener"
-    ], "The finetune process only supports models pretrained with 'se_atten' descriptor and 'ener' fitting_net!"
+    ], "The finetune process only supports models pretrained with 'se_atten' or 'se_atten_v2' descriptor and 'ener' fitting_net!"
 
     # Check the type map
     pretrained_type_map = pretrained_jdata["model"]["type_map"]
 
@@ -42,7 +42,7 @@ Cite DeePMD-kit and methods
 
    Wang_NuclFusion_2022_v62_p126013
 
-- If attention-based descriptor (`se_atten`) is used,
+- If attention-based descriptor (`se_atten`, `se_atten_v2`) is used,
 
 .. bibliography::
    :filter: False
Original file line number	Diff line number	Diff line change
`@@ -24,6 +24,9 @@`
`24`	`24`	`from .se_atten import (`
`25`	`25`	`DescrptSeAtten,`
`26`	`26`	`)`
	`27`	`+from .se_atten_v2 import (`
	`28`	`+ DescrptSeAttenV2,`
	`29`	`+)`
`27`	`30`	`from .se_r import (`
`28`	`31`	`DescrptSeR,`
`29`	`32`	`)`
`@@ -41,6 +44,7 @@`
`41`	`44`	`"DescrptSeAEfLower",`
`42`	`45`	`"DescrptSeAMask",`
`43`	`46`	`"DescrptSeAtten",`
	`47`	`+ "DescrptSeAttenV2",`
`44`	`48`	`"DescrptSeR",`
`45`	`49`	`"DescrptSeT",`
`46`	`50`	`]`