deepmodeling
diff --git a/‎dpdata/plugins/vasp.py
Lines changed: 3 additions & 1 deletion b/‎dpdata/plugins/vasp.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎dpdata/vasp/outcar.py
Lines changed: 30 additions & 26 deletions b/‎dpdata/vasp/outcar.py
Lines changed: 30 additions & 26 deletions
@@ -56,6 +56,7 @@ class VASPOutcarFormat(Format):
     @Format.post("rot_lower_triangular")
     def from_labeled_system(self, file_name, begin=0, step=1, **kwargs):
         data = {}
+        ml = kwargs.get("ml", False)
         data['atom_names'], \
             data['atom_numbs'], \
             data['atom_types'], \
@@ -64,7 +65,7 @@ def from_labeled_system(self, file_name, begin=0, step=1, **kwargs):
             data['energies'], \
             data['forces'], \
             tmp_virial, \
-            = dpdata.vasp.outcar.get_frames(file_name, begin=begin, step=step)
+            = dpdata.vasp.outcar.get_frames(file_name, begin=begin, step=step, ml=ml)
         if tmp_virial is not None:
             data['virials'] = tmp_virial
         # scale virial to the unit of eV
@@ -107,3 +108,4 @@ def from_labeled_system(self, file_name, begin=0, step=1, **kwargs):
             data['virials'][ii] *= v_pref * vol
         data = uniq_atom_names(data)
         return data
+        
@@ -1,7 +1,7 @@
 import numpy as np
 import re
 
-def system_info (lines, type_idx_zero = False) :
+def system_info(lines, type_idx_zero = False):
     atom_names = []
     atom_numbs = None
     nelm = None
@@ -30,7 +30,7 @@ def system_info (lines, type_idx_zero = False) :
     assert(atom_numbs is not None), "cannot find ion type info in OUTCAR"
     atom_names = atom_names[:len(atom_numbs)]
     atom_types = []
-    for idx,ii in enumerate(atom_numbs) :
+    for idx,ii in enumerate(atom_numbs):
         for jj in range(ii) :
             if type_idx_zero :
                 atom_types.append(idx)
@@ -39,18 +39,20 @@ def system_info (lines, type_idx_zero = False) :
     return atom_names, atom_numbs, np.array(atom_types, dtype = int), nelm
 
 
-def get_outcar_block(fp) :
+def get_outcar_block(fp, ml = False):
     blk = []
+    energy_token = ['free  energy   TOTEN', 'free  energy ML TOTEN']
+    ml_index = int(ml)
     for ii in fp :
         if not ii :
             return blk
         blk.append(ii.rstrip('\n'))
-        if 'free  energy   TOTEN' in ii:
+        if energy_token[ml_index] in ii:
             return blk
     return blk
 
 # we assume that the force is printed ...
-def get_frames (fname, begin = 0, step = 1) :
+def get_frames(fname, begin = 0, step = 1, ml = False):
     fp = open(fname)
     blk = get_outcar_block(fp)
 
@@ -66,7 +68,7 @@ def get_frames (fname, begin = 0, step = 1) :
     cc = 0
     while len(blk) > 0 :
         if cc >= begin and (cc - begin) % step == 0 :
-            coord, cell, energy, force, virial, is_converge = analyze_block(blk, ntot, nelm)
+            coord, cell, energy, force, virial, is_converge = analyze_block(blk, ntot, nelm, ml)
             if is_converge : 
                 if len(coord) == 0:
                     break
@@ -76,9 +78,10 @@ def get_frames (fname, begin = 0, step = 1) :
                 all_forces.append(force)
                 if virial is not None :
                     all_virials.append(virial)
-        blk = get_outcar_block(fp)
+
+        blk = get_outcar_block(fp, ml)
         cc += 1
-        
+
     if len(all_virials) == 0 :
         all_virials = None
     else :
@@ -87,36 +90,39 @@ def get_frames (fname, begin = 0, step = 1) :
     return atom_names, atom_numbs, atom_types, np.array(all_cells), np.array(all_coords), np.array(all_energies), np.array(all_forces), all_virials
 
 
-def analyze_block(lines, ntot, nelm) :
+def analyze_block(lines, ntot, nelm, ml = False):
     coord = []
     cell = []
     energy = None
     force = []
     virial = None
     is_converge = True
     sc_index = 0
-    for idx,ii in enumerate(lines) :
-        if 'Iteration' in ii:
+    #select different searching tokens based on the ml label
+    energy_token = ['free  energy   TOTEN', 'free  energy ML TOTEN']
+    energy_index = [4, 5]
+    viral_token = ['FORCE on cell =-STRESS in cart. coord.  units', 'ML FORCE']
+    viral_index = [14, 4]
+    cell_token = ['VOLUME and BASIS', 'ML FORCE']
+    cell_index = [5, 12]
+    ml_index = int(ml)
+    for idx,ii in enumerate(lines):
+        #if set ml == True, is_converged will always be True
+        if ('Iteration' in ii) and (not ml):
             sc_index = int(ii.split()[3][:-1])
             if sc_index >= nelm:
                 is_converge = False
-        elif 'free  energy   TOTEN' in ii:
-            energy = float(ii.split()[4])
+        elif energy_token[ml_index] in ii:
+            energy = float(ii.split()[energy_index[ml_index]])
             assert((force is not None) and len(coord) > 0 and len(cell) > 0)
-            # all_coords.append(coord)
-            # all_cells.append(cell)
-            # all_energies.append(energy)
-            # all_forces.append(force)
-            # if virial is not None :
-            #     all_virials.append(virial)
             return coord, cell, energy, force, virial, is_converge
-        elif 'VOLUME and BASIS' in ii:
+        elif cell_token[ml_index] in ii:
             for dd in range(3) :
-                tmp_l = lines[idx+5+dd]
+                tmp_l = lines[idx+cell_index[ml_index]+dd]
                 cell.append([float(ss) 
                              for ss in tmp_l.replace('-',' -').split()[0:3]])
-        elif 'in kB' in ii:
-            tmp_v = [float(ss) for ss in ii.split()[2:8]]
+        elif viral_token[ml_index] in ii:
+            tmp_v = [float(ss) for ss in lines[idx+viral_index[ml_index]].split()[2:8]]
             virial = np.zeros([3,3])
             virial[0][0] = tmp_v[0]
             virial[1][1] = tmp_v[1]
@@ -127,9 +133,7 @@ def analyze_block(lines, ntot, nelm) :
             virial[2][1] = tmp_v[4]
             virial[0][2] = tmp_v[5]
             virial[2][0] = tmp_v[5]
-        elif 'TOTAL-FORCE' in ii and ("ML" not in ii):
-            # use the lines with " POSITION                                       TOTAL-FORCE (eV/Angst)"
-            # exclude the lines with "  POSITION                                       TOTAL-FORCE (eV/Angst) (ML)"
+        elif 'TOTAL-FORCE' in ii and (("ML" in ii) == ml):
             for jj in range(idx+2, idx+2+ntot) :
                 tmp_l = lines[jj]
                 info = [float(ss) for ss in tmp_l.split()]