feat(tf): implement decoupled out_bias and out_std in TensorFlow backend with Model-level application

Copilot · njzjz · Copilot · commit 93cd873457b3 · 2025-09-06T15:22:57.000Z
Co-authored-by: njzjz &lt;9496702+njzjz@users.noreply.github.com&gt;
diff --git a/deepmd/tf/model/dos.py b/deepmd/tf/model/dos.py
@@ -149,6 +149,9 @@ def build(
             t_ver = tf.constant(MODEL_VERSION, name="model_version", dtype=tf.string)
             t_od = tf.constant(self.numb_dos, name="output_dim", dtype=tf.int32)
 
+            # Initialize out_bias and out_std for DOS models
+            self.init_out_stat(suffix=suffix)
+
         coord = tf.reshape(coord_, [-1, natoms[1] * 3])
         atype = tf.reshape(atype_, [-1, natoms[1]])
         input_dict["nframes"] = tf.shape(coord)[0]
@@ -181,6 +184,31 @@ def build(
         atom_dos = self.fitting.build(
             dout, natoms, input_dict, reuse=reuse, suffix=suffix
         )
+
+        # Apply out_bias and out_std directly to DOS output
+        # atom_dos shape: [nframes * nloc * numb_dos] for DOS models
+        # t_out_bias shape: [1, ntypes, numb_dos], t_out_std shape: [1, ntypes, numb_dos]
+        if hasattr(self, "t_out_bias") and hasattr(self, "t_out_std"):
+            nframes = tf.shape(coord)[0]
+            nloc = natoms[0]
+            # Reshape atom_dos to [nframes, nloc, numb_dos] for bias/std application
+            atom_dos_reshaped = tf.reshape(atom_dos, [nframes, nloc, self.numb_dos])
+
+            # Get bias and std for each atom type: [nframes, nloc, numb_dos]
+            atype_flat = tf.reshape(atype, [nframes, nloc])
+            bias_per_atom = tf.gather(
+                self.t_out_bias[0], atype_flat
+            )  # [nframes, nloc, numb_dos]
+            std_per_atom = tf.gather(
+                self.t_out_std[0], atype_flat
+            )  # [nframes, nloc, numb_dos]
+
+            # Apply bias and std: dos = dos * std + bias
+            atom_dos_reshaped = atom_dos_reshaped * std_per_atom + bias_per_atom
+
+            # Reshape back to original shape
+            atom_dos = tf.reshape(atom_dos_reshaped, tf.shape(atom_dos))
+
         self.atom_dos = atom_dos
 
         dos_raw = atom_dos
diff --git a/deepmd/tf/model/ener.py b/deepmd/tf/model/ener.py
@@ -193,6 +193,9 @@ def build(
             t_mt = tf.constant(self.model_type, name="model_type", dtype=tf.string)
             t_ver = tf.constant(MODEL_VERSION, name="model_version", dtype=tf.string)
 
+            # Initialize out_bias and out_std for energy models
+            self.init_out_stat(suffix=suffix)
+
             if self.srtab is not None:
                 tab_info, tab_data = self.srtab.get()
                 self.tab_info = tf.get_variable(
@@ -253,6 +256,32 @@ def build(
         atom_ener = self.fitting.build(
             dout, natoms, input_dict, reuse=reuse, suffix=suffix
         )
+
+        # Apply out_bias and out_std directly to atom energy
+        # atom_ener shape: [nframes * nloc] (for energy models, dim_out=1)
+        # t_out_bias shape: [1, ntypes, 1], t_out_std shape: [1, ntypes, 1]
+        # atype shape: [nframes, nloc]
+        if hasattr(self, "t_out_bias") and hasattr(self, "t_out_std"):
+            # Reshape atom_ener to [nframes, nloc, 1] to match bias/std application
+            nframes = tf.shape(coord)[0]
+            nloc = natoms[0]
+            atom_ener_reshaped = tf.reshape(atom_ener, [nframes, nloc, 1])
+
+            # Get bias and std for each atom type: [nframes, nloc, 1]
+            atype_flat = tf.reshape(atype, [nframes, nloc])
+            bias_per_atom = tf.gather(
+                self.t_out_bias[0], atype_flat
+            )  # [nframes, nloc, 1]
+            std_per_atom = tf.gather(
+                self.t_out_std[0], atype_flat
+            )  # [nframes, nloc, 1]
+
+            # Apply bias and std: energy = energy * std + bias
+            atom_ener_reshaped = atom_ener_reshaped * std_per_atom + bias_per_atom
+
+            # Reshape back to original shape
+            atom_ener = tf.reshape(atom_ener_reshaped, tf.shape(atom_ener))
+
         self.atom_ener = atom_ener
 
         if self.srtab is not None:
diff --git a/deepmd/tf/model/model.py b/deepmd/tf/model/model.py
@@ -851,9 +851,20 @@ def init_out_stat(self, suffix: str = "") -> None:
             else:
                 dim_out = 1
 
-        # Initialize out_bias and out_std as numpy arrays first
-        out_bias_data = np.zeros([1, ntypes, dim_out], dtype=GLOBAL_NP_FLOAT_PRECISION)
-        out_std_data = np.ones([1, ntypes, dim_out], dtype=GLOBAL_NP_FLOAT_PRECISION)
+        # Initialize out_bias and out_std as numpy arrays, preserving existing values if set
+        if hasattr(self, "out_bias") and self.out_bias is not None:
+            out_bias_data = self.out_bias.copy()
+        else:
+            out_bias_data = np.zeros(
+                [1, ntypes, dim_out], dtype=GLOBAL_NP_FLOAT_PRECISION
+            )
+
+        if hasattr(self, "out_std") and self.out_std is not None:
+            out_std_data = self.out_std.copy()
+        else:
+            out_std_data = np.ones(
+                [1, ntypes, dim_out], dtype=GLOBAL_NP_FLOAT_PRECISION
+            )
 
         # Create TensorFlow variables
         with tf.variable_scope("model_attr" + suffix, reuse=tf.AUTO_REUSE):
@@ -960,43 +971,7 @@ def deserialize(cls, data: dict, suffix: str = "") -> "Descriptor":
         data = data.copy()
         check_version_compatibility(data.pop("@version", 2), 2, 1)
         descriptor = Descriptor.deserialize(data.pop("descriptor"), suffix=suffix)
-        if data["fitting"].get("@variables", {}).get("bias_atom_e") is not None:
-            # careful: copy each level and don't modify the input array,
-            # otherwise it will affect the original data
-            # deepcopy is not used for performance reasons
-            data["fitting"] = data["fitting"].copy()
-            data["fitting"]["@variables"] = data["fitting"]["@variables"].copy()
-            if (
-                int(np.any(data["fitting"]["@variables"]["bias_atom_e"]))
-                + int(np.any(data["@variables"]["out_bias"]))
-                > 1
-            ):
-                raise ValueError(
-                    "fitting/@variables/bias_atom_e and @variables/out_bias should not be both non-zero"
-                )
-            # Improved handling for different shapes (dipole/polar vs energy)
-            bias_atom_e_shape = data["fitting"]["@variables"]["bias_atom_e"].shape
-            out_bias_data = data["@variables"]["out_bias"]
-
-            # For dipole/polar models, out_bias has shape [1, ntypes, 3]
-            # but bias_atom_e has shape [ntypes] where embedding_width might != 3
-            if len(bias_atom_e_shape) == 1 and len(out_bias_data.shape) == 3:
-                # Convert out_bias to bias_atom_e shape safely
-                # We sum over the output dimensions for energy-like models
-                if out_bias_data.shape[2] == 1:
-                    # Energy case: out_bias [1, ntypes, 1] -> bias_atom_e [ntypes]
-                    bias_increment = out_bias_data[0, :, 0]
-                else:
-                    # Dipole/Polar case: take norm or sum for compatibility
-                    # This is still a workaround, but safer than reshape
-                    bias_increment = np.linalg.norm(out_bias_data[0], axis=-1)
-            else:
-                # Fallback to original reshape if shapes are compatible
-                bias_increment = out_bias_data.reshape(bias_atom_e_shape)
-
-            data["fitting"]["@variables"]["bias_atom_e"] = (
-                data["fitting"]["@variables"]["bias_atom_e"] + bias_increment
-            )
+        # bias_atom_e and out_bias are now completely independent - no conversion needed
         fitting = Fitting.deserialize(data.pop("fitting"), suffix=suffix)
         # pass descriptor type embedding to model
         if descriptor.explicit_ntypes:
@@ -1029,57 +1004,6 @@ def deserialize(cls, data: dict, suffix: str = "") -> "Descriptor":
             model.out_std = out_std
         return model
 
-    def apply_out_stat(
-        self,
-        ret: dict[str, np.ndarray],
-        atype: np.ndarray,
-    ) -> dict[str, np.ndarray]:
-        """Apply the bias and std to the atomic output.
-
-        Parameters
-        ----------
-        ret : dict[str, np.ndarray]
-            The returned dict by the forward_atomic method
-        atype : np.ndarray
-            The atom types. nf x nloc
-
-        Returns
-        -------
-        dict[str, np.ndarray]
-            The output with bias and std applied
-        """
-        if self.out_bias is None:
-            return ret
-
-        # Get the output keys that need bias/std applied
-        fitting_output_def = (
-            self.fitting.fitting_output_def()
-            if hasattr(self.fitting, "fitting_output_def")
-            else {}
-        )
-
-        # Apply bias for each output
-        for kk in ret.keys():
-            if kk in ["mask"]:  # Skip mask
-                continue
-
-            # Get the corresponding bias and std
-            # For now, we assume single output (idx=0), which works for most cases
-            bias_idx = 0
-            ntypes = self.get_ntypes()
-
-            if self.out_bias.shape[0] > bias_idx:
-                # Extract bias for this output: shape [ntypes, output_dim]
-                out_bias_kk = self.out_bias[bias_idx]  # [ntypes, output_dim]
-
-                # Apply bias: ret[kk] shape is [nframes, nloc, output_dim]
-                # atype shape is [nframes, nloc]
-                # We need to index out_bias_kk with atype to get [nframes, nloc, output_dim]
-                bias_for_atoms = out_bias_kk[atype]  # [nframes, nloc, output_dim]
-                ret[kk] = ret[kk] + bias_for_atoms
-
-        return ret
-
     def serialize(self, suffix: str = "") -> dict:
         """Serialize the model.
 
diff --git a/deepmd/tf/model/tensor.py b/deepmd/tf/model/tensor.py
@@ -126,6 +126,9 @@ def build(
             t_ver = tf.constant(MODEL_VERSION, name="model_version", dtype=tf.string)
             t_od = tf.constant(self.get_out_size(), name="output_dim", dtype=tf.int32)
 
+            # Initialize out_bias and out_std for tensor models (dipole/polar)
+            self.init_out_stat(suffix=suffix)
+
         natomsel = sum(natoms[2 + type_i] for type_i in self.get_sel_type())
         nout = self.get_out_size()
 
@@ -164,6 +167,31 @@ def build(
         output = self.fitting.build(
             dout, rot_mat, natoms, input_dict, reuse=reuse, suffix=suffix
         )
+
+        # Apply out_bias and out_std directly to tensor output
+        # output shape: [nframes * natomsel * nout] for tensor models
+        # t_out_bias shape: [1, ntypes, nout], t_out_std shape: [1, ntypes, nout]
+        if hasattr(self, "t_out_bias") and hasattr(self, "t_out_std"):
+            nframes = tf.shape(coord)[0]
+            # Reshape output to [nframes, natomsel, nout] for bias/std application
+            output_reshaped = tf.reshape(output, [nframes, natomsel, nout])
+
+            # Get atom types for selected atoms only (matching natomsel)
+            atype_selected = self._get_selected_atype(atype, natoms)
+
+            # Get bias and std for each selected atom type: [nframes, natomsel, nout]
+            bias_per_atom = tf.gather(
+                self.t_out_bias[0], atype_selected
+            )  # [nframes, natomsel, nout]
+            std_per_atom = tf.gather(
+                self.t_out_std[0], atype_selected
+            )  # [nframes, natomsel, nout]
+
+            # Apply bias and std: output = output * std + bias
+            output_reshaped = output_reshaped * std_per_atom + bias_per_atom
+
+            # Reshape back to original shape
+            output = tf.reshape(output_reshaped, tf.shape(output))
         framesize = nout if "global" in self.model_type else natomsel * nout
         output = tf.reshape(
             output, [-1, framesize], name="o_" + self.model_type + suffix
@@ -206,6 +234,24 @@ def build(
 
         return model_dict
 
+    def _get_selected_atype(self, atype, natoms):
+        """Get atom types for selected atoms only (matching tensor model selection)."""
+        # For tensor models, the fitting output corresponds to selected atom types
+        # atype shape: [nframes, nloc]
+        # We need to extract atom types that match the natomsel count
+
+        # Simplified approach: take the first natomsel atoms from each frame
+        # This works because natoms and descriptor arrangement should be consistent
+        nframes = tf.shape(atype)[0]
+        selected_types = self.get_sel_type()
+        natomsel = sum(natoms[2 + type_i] for type_i in selected_types)
+
+        # Take the first natomsel atoms from each frame
+        # This assumes the atom ordering is consistent with how fitting produces output
+        atype_selected = atype[:, :natomsel]  # [nframes, natomsel]
+
+        return atype_selected
+
     def init_variables(
         self,
         graph: tf.Graph,
diff --git a/source/tests/tf/test_out_bias_std.py b/source/tests/tf/test_out_bias_std.py