move AutoBatchSize from dp test to DeepPot.eval (#1173)

njzjz · web-flow · commit 5843c02211ef · 2021-09-28T19:12:41.000+08:00
* move AutoBatchSize from dp test to dp.eval Bugfix. * Revert "Bug fix of memory overflow when calculating model deviation (#1153)" This reverts commit d8acbb8. * reshape coords before get shape * use the same AutoBatchSize for all models in model deviation
diff --git a/deepmd/entrypoints/test.py b/deepmd/entrypoints/test.py
@@ -9,7 +9,6 @@
 from deepmd.utils import random as dp_random
 from deepmd.utils.data import DeepmdData
 from deepmd.utils.weight_avg import weighted_average
-from deepmd.utils.batch_size import AutoBatchSize
 
 if TYPE_CHECKING:
     from deepmd.infer import DeepDipole, DeepPolar, DeepPot, DeepWFC
@@ -70,7 +69,6 @@ def test(
 
     # init model
     dp = DeepPotential(model)
-    auto_batch_size = AutoBatchSize()
 
     for cc, system in enumerate(all_sys):
         log.info("# ---------------output of dp test--------------- ")
@@ -84,7 +82,6 @@ def test(
             err = test_ener(
                 dp,
                 data,
-                auto_batch_size,
                 system,
                 numb_test,
                 detail_file,
@@ -162,7 +159,6 @@ def save_txt_file(
 def test_ener(
     dp: "DeepPot",
     data: DeepmdData,
-    auto_batch_size: AutoBatchSize,
     system: str,
     numb_test: int,
     detail_file: Optional[str],
@@ -230,10 +226,7 @@ def test_ener(
     else:
         aparam = None
 
-    ret = auto_batch_size.execute_all(
-        dp.eval,
-        numb_test,
-        natoms,
+    ret = dp.eval(
         coord,
         box,
         atype,
diff --git a/deepmd/infer/deep_eval.py b/deepmd/infer/deep_eval.py
@@ -1,17 +1,31 @@
 import os
-from typing import List, Optional, TYPE_CHECKING
+from typing import List, Optional, TYPE_CHECKING, Union
 
 import numpy as np
 from deepmd.common import make_default_mesh
 from deepmd.env import default_tf_session_config, tf, MODEL_VERSION
 from deepmd.utils.sess import run_sess
+from deepmd.utils.batch_size import AutoBatchSize
 
 if TYPE_CHECKING:
     from pathlib import Path
 
 
 class DeepEval:
-    """Common methods for DeepPot, DeepWFC, DeepPolar, ..."""
+    """Common methods for DeepPot, DeepWFC, DeepPolar, ...
+    
+    Parameters
+    ----------
+    model_file : Path
+        The name of the frozen model file.
+    load_prefix: str
+        The prefix in the load computational graph
+    default_tf_graph : bool
+        If uses the default tf graph, otherwise build a new tf graph for evaluation
+    auto_batch_size : bool or int or AutomaticBatchSize, default: False
+        If True, automatic batch size will be used. If int, it will be used
+        as the initial batch size.
+    """
 
     _model_type: Optional[str] = None
     _model_version: Optional[str] = None
@@ -21,7 +35,8 @@ def __init__(
         self,
         model_file: "Path",
         load_prefix: str = "load",
-        default_tf_graph: bool = False
+        default_tf_graph: bool = False,
+        auto_batch_size: Union[bool, int, AutoBatchSize] = False,
     ):
         self.graph = self._load_graph(
             model_file, prefix=load_prefix, default_tf_graph=default_tf_graph
@@ -34,6 +49,19 @@ def __init__(
                 f"model in graph (version {self.model_version}) is incompatible"
                 f"with the model (version {MODEL_VERSION}) supported by the current code."
             )
+        
+        # set default to False, as subclasses may not support
+        if isinstance(auto_batch_size, bool):
+            if auto_batch_size:
+                self.auto_batch_size = AutoBatchSize()
+            else:
+                self.auto_batch_size = None
+        elif isinstance(auto_batch_size, int):
+            self.auto_batch_size = AutoBatchSize(auto_batch_size)
+        elif isinstance(auto_batch_size, AutoBatchSize):
+            self.auto_batch_size = auto_batch_size
+        else:
+            raise TypeError("auto_batch_size should be bool, int, or AutoBatchSize")
 
     @property
     def model_type(self) -> str:
diff --git a/deepmd/infer/deep_pot.py b/deepmd/infer/deep_pot.py
@@ -1,12 +1,13 @@
 import logging
-from typing import TYPE_CHECKING, List, Optional, Tuple
+from typing import TYPE_CHECKING, List, Optional, Tuple, Union
 
 import numpy as np
 from deepmd.common import make_default_mesh
 from deepmd.env import default_tf_session_config, tf
 from deepmd.infer.data_modifier import DipoleChargeModifier
 from deepmd.infer.deep_eval import DeepEval
 from deepmd.utils.sess import run_sess
+from deepmd.utils.batch_size import AutoBatchSize
 
 if TYPE_CHECKING:
     from pathlib import Path
@@ -25,6 +26,9 @@ class DeepPot(DeepEval):
         The prefix in the load computational graph
     default_tf_graph : bool
         If uses the default tf graph, otherwise build a new tf graph for evaluation
+    auto_batch_size : bool or int or AutomaticBatchSize, default: True
+        If True, automatic batch size will be used. If int, it will be used
+        as the initial batch size.
 
     Examples
     --------
@@ -49,7 +53,8 @@ def __init__(
         self,
         model_file: "Path",
         load_prefix: str = "load",
-        default_tf_graph: bool = False
+        default_tf_graph: bool = False,
+        auto_batch_size: Union[bool, int, AutoBatchSize] = True,
     ) -> None:
 
         # add these tensors on top of what is defined by DeepTensor Class
@@ -83,7 +88,8 @@ def __init__(
             self,
             model_file,
             load_prefix=load_prefix,
-            default_tf_graph=default_tf_graph
+            default_tf_graph=default_tf_graph,
+            auto_batch_size=auto_batch_size,
         )
 
         # load optional tensors
@@ -224,12 +230,23 @@ def eval(
         atom_virial
             The atomic virial. Only returned when atomic == True
         """
+        # reshape coords before getting shape
+        natoms = len(atom_types)
+        coords = np.reshape(np.array(coords), [-1, natoms * 3])
+        numb_test = coords.shape[0]
         if atomic:
             if self.modifier_type is not None:
                 raise RuntimeError('modifier does not support atomic modification')
+            if self.auto_batch_size is not None:
+                return self.auto_batch_size.execute_all(self._eval_inner, numb_test, natoms,
+                           coords, cells, atom_types, fparam = fparam, aparam = aparam, atomic = atomic, efield = efield)
             return self._eval_inner(coords, cells, atom_types, fparam = fparam, aparam = aparam, atomic = atomic, efield = efield)
         else :
-            e, f, v = self._eval_inner(coords, cells, atom_types, fparam = fparam, aparam = aparam, atomic = atomic, efield = efield)
+            if self.auto_batch_size is not None:
+                e, f, v = self.auto_batch_size.execute_all(self._eval_inner, numb_test, natoms,
+                              coords, cells, atom_types, fparam = fparam, aparam = aparam, atomic = atomic, efield = efield)
+            else:
+                e, f, v = self._eval_inner(coords, cells, atom_types, fparam = fparam, aparam = aparam, atomic = atomic, efield = efield)
             if self.modifier_type is not None:
                 me, mf, mv = self.dm.eval(coords, cells, atom_types)
                 e += me.reshape(e.shape)
diff --git a/deepmd/infer/model_devi.py b/deepmd/infer/model_devi.py
@@ -1,6 +1,7 @@
 import numpy as np
 from .deep_pot import DeepPot
 from ..utils.data import DeepmdData
+from ..utils.batch_size import AutoBatchSize
         
 
 def calc_model_devi_f(fs: np.ndarray):
@@ -174,8 +175,9 @@ def make_model_devi(
         in a trajectory by a MD engine (such as Gromacs / Lammps).
         This paramter is used to determine the index in the output file.
     '''
+    auto_batch_size = AutoBatchSize()
     # init models
-    dp_models = [DeepPot(model) for model in models]
+    dp_models = [DeepPot(model, auto_batch_size=auto_batch_size) for model in models]
 
     # check type maps
     tmaps = [dp.get_type_map() for dp in dp_models]
@@ -195,13 +197,12 @@ def make_model_devi(
     nframes_tot = 0
     devis = []
     for data in data_sets:
-        coords = data["coord"]
-        boxs = data["box"]
-        atypes = data["type"]
-        for coord, box, atype in zip(coords, boxs, atypes):
-            devi = calc_model_devi(np.array([coord]), np.array([box]), atype, dp_models, nopbc=nopbc)
-            nframes_tot += 1
-            devis.append(devi)
+        coord = data["coord"]
+        box = data["box"]
+        atype = data["type"][0] 
+        devi = calc_model_devi(coord, box, atype, dp_models, nopbc=nopbc)
+        nframes_tot += coord.shape[0]
+        devis.append(devi)
     devis = np.vstack(devis)
     devis[:, 0] = np.arange(nframes_tot) * frequency
     write_model_devi_out(devis, output)
diff --git a/deepmd/utils/batch_size.py b/deepmd/utils/batch_size.py
@@ -79,7 +79,7 @@ def execute(self, callable: Callable, start_index: int, natoms: int) -> Tuple[in
             n_tot = n_batch * natoms
             self.maximum_working_batch_size = max(self.maximum_working_batch_size, n_tot)
             # adjust the next batch size
-            if n_tot >= self.current_batch_size and self.current_batch_size * self.factor < self.minimal_not_working_batch_size:
+            if n_tot + natoms > self.current_batch_size and self.current_batch_size * self.factor < self.minimal_not_working_batch_size:
                 self._adjust_batch_size(self.factor)
             return n_batch, result