Skip to content

Commit 2625e79

Browse files
authored
Merge pull request #61 from felix5572/devel
support equi/gap/xyz
2 parents b1a8e1e + d87c34a commit 2625e79

20 files changed

+314
-7
lines changed

README.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,33 @@ The labels provided in the `OUTCAR`, i.e. energies, forces and virials (if any),
5151

5252
The `System` or `LabeledSystem` can be constructed from the following file formats with the `format key` in the table passed to argument `fmt`:
5353

54+
For `quip/gap xyz` files, single .xyz file may include many different configurations with different atom numbers and atom type.
55+
56+
The Class `dpdata.MultiSystems` can read data this kind of file.
57+
Now Class dpdata.MultiSystems Only support quip/gap xyz format file.
58+
59+
The following commands relating to `Class dpdata.MultiSystems` may be useful.
60+
```python
61+
# load data
62+
63+
xyz_multi_systems = dpdata.MultiSystems.from_file('tests/xyz/xyz_unittest.xyz','quip/gap/xyz')
64+
65+
# print the multi_system infomation
66+
print(xyz_multi_systems)
67+
print(xyz_multi_systems.systems) # return a dictionaries
68+
69+
# print the system infomation
70+
print(xyz_multi_systems.systems['B1C9'].data)
71+
72+
# dump a system's data to ./my_work_dir/B1C9_raw folder
73+
xyz_multi_systems.systems['B1C9'].to_deepmd_raw('./my_work_dir/B1C9_raw')
74+
75+
# dump all systems
76+
xyz_multi_systems.to_deepmd_raw('./my_deepmd_data/')
77+
78+
79+
```
80+
5481
| Software| format | multi frames | labeled | class | format key |
5582
| ------- | :--- | :---: | :---: | :--- | :--- |
5683
| vasp | poscar | False | False | System | 'vasp/poscar' |
@@ -70,6 +97,7 @@ The `System` or `LabeledSystem` can be constructed from the following file forma
7097
| QE | log | False | True | LabeledSystem | 'qe/pw/scf' |
7198
| QE | log | True | False | System | 'qe/cp/traj' |
7299
| QE | log | True | True | LabeledSystem | 'qe/cp/traj' |
100+
|quip/gap|xyz|True|True|MultiSystems|'quip/gap/xyz'|
73101

74102
## Access data
75103
These properties stored in `System` and `LabeledSystem` can be accessed by operator `[]` with the key of the property supplied, for example

dpdata/system.py

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#%%
12
import os
23
import numpy as np
34
import dpdata.lammps.lmp
@@ -18,6 +19,7 @@
1819
from monty.json import MSONable
1920
from monty.serialization import loadfn,dumpfn
2021
from dpdata.periodic_table import Element
22+
from dpdata.xyz.quip_gap_xyz import QuipGapxyzSystems
2123

2224
class System (MSONable) :
2325
'''
@@ -312,8 +314,8 @@ def append(self, system) :
312314
for ii in ['atom_numbs', 'atom_names'] :
313315
assert(system.data[ii] == self.data[ii])
314316
for ii in ['atom_types','orig'] :
315-
eq = (system.data[ii] == self.data[ii])
316-
assert(eq.all())
317+
eq = [v1==v2 for v1,v2 in zip(system.data[ii], self.data[ii])]
318+
assert(all(eq))
317319
for ii in ['coords', 'cells'] :
318320
self.data[ii] = np.concatenate((self.data[ii], system[ii]), axis = 0)
319321
return True
@@ -903,7 +905,7 @@ def sort_atom_types(self):
903905
class MultiSystems:
904906
'''A set containing several systems.'''
905907

906-
def __init__(self, *systems, type_map=None):
908+
def __init__(self, *systems,type_map=None):
907909
"""
908910
Parameters
909911
----------
@@ -942,6 +944,22 @@ def __add__(self, others) :
942944
elif isinstance(others, list):
943945
return self.__class__(self, *others)
944946
raise RuntimeError("Unspported data structure")
947+
948+
@classmethod
949+
def from_file(cls,file_name,fmt):
950+
multi_systems = cls()
951+
multi_systems.load_systems_from_file(file_name=file_name,fmt=fmt)
952+
return multi_systems
953+
954+
def load_systems_from_file(self, file_name=None, fmt=None):
955+
if file_name is not None:
956+
if fmt is None:
957+
raise RuntimeError("must specify file format for file {}".format(file_name))
958+
elif fmt == 'quip/gap/xyz' or 'xyz':
959+
self.from_quip_gap_xyz_file(file_name)
960+
else:
961+
raise RuntimeError("unknown file format for file {} format {},now supported 'quip/gap/xyz'".format(file_name, fmt))
962+
945963

946964
def get_nframes(self) :
947965
"""Returns number of frames in all systems"""
@@ -997,6 +1015,14 @@ def check_atom_names(self, system):
9971015
system.add_atom_names(new_in_self)
9981016
system.sort_atom_names()
9991017

1018+
def from_quip_gap_xyz_file(self,filename):
1019+
# quip_gap_xyz_systems = QuipGapxyzSystems(filename)
1020+
# print(next(quip_gap_xyz_systems))
1021+
for info_dict in QuipGapxyzSystems(filename):
1022+
system=LabeledSystem(data=info_dict)
1023+
self.append(system)
1024+
1025+
10001026
def to_deepmd_raw(self, folder) :
10011027
"""
10021028
Dump systems in deepmd raw format to `folder` for each system.
@@ -1053,9 +1079,12 @@ def check_LabeledSystem(data):
10531079

10541080
def elements_index_map(elements,standard=False,inverse=False):
10551081
if standard:
1056-
elements.sort(key=lambda x: Element(x).Z)
1082+
elements.sort(key=lambda x: Element(x).Z)
10571083
if inverse:
1058-
return dict(zip(range(len(elements)),elements))
1084+
return dict(zip(range(len(elements)),elements))
10591085
else:
1060-
return dict(zip(elements,range(len(elements))))
1086+
return dict(zip(elements,range(len(elements))))
1087+
1088+
10611089

1090+
# %%

dpdata/xyz/quip_gap_xyz.py

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
#!/usr/bin/env python3
2+
3+
#%%
4+
# with open('./test.xyz', 'r') as xyz_file:
5+
# lines = xyz_file.readlines()
6+
# print(lines)
7+
#%%
8+
import numpy as np
9+
from collections import OrderedDict
10+
import re
11+
class QuipGapxyzSystems(object):
12+
"""
13+
deal with QuipGapxyzFile
14+
"""
15+
def __init__(self, file_name):
16+
self.file_object = open(file_name, 'r')
17+
self.block_generator = self.get_block_generator()
18+
def __iter__(self):
19+
return self
20+
def __next__(self):
21+
return self.handle_single_xyz_frame(next(self.block_generator))
22+
def __del__(self):
23+
self.file_object.close()
24+
25+
def get_block_generator(self):
26+
p3 = re.compile(r'^\s*(\d+)\s*')
27+
while True:
28+
line = self.file_object.readline()
29+
if not line:
30+
break
31+
if p3.match(line):
32+
atom_num = int(p3.match(line).group(1))
33+
lines = []
34+
lines.append(line)
35+
for ii in range(atom_num+1):
36+
lines.append(self.file_object.readline())
37+
if not lines[-1]:
38+
raise RuntimeError("this xyz file may lack of lines, should be {};lines:{}".format(atom_num+2, lines))
39+
yield lines
40+
41+
@staticmethod
42+
def handle_single_xyz_frame(lines):
43+
atom_num = int(lines[0].strip('\n').strip())
44+
if len(lines) != atom_num + 2:
45+
raise RuntimeError("format error, atom_num=={}, {}!=atom_num+2".format(atom_num, len(lines)))
46+
data_format_line = lines[1].strip('\n').strip()+str(' ')
47+
p1 = re.compile(r'(?P<key>\S+)=(?P<quote>[\'\"]?)(?P<value>.*?)(?P=quote)\s+')
48+
p2 = re.compile(r'(?P<key>\w+?):(?P<datatype>[a-zA-Z]):(?P<value>\d+)')
49+
field_list = [kv_dict.groupdict() for kv_dict in p1.finditer(data_format_line)]
50+
field_dict = {}
51+
for item in field_list:
52+
field_dict[item['key']]=item['value']
53+
data_format_line = lines[1]
54+
data_format_list= [m.groupdict() for m in p1.finditer(data_format_line)]
55+
field_dict = {}
56+
for item in data_format_list:
57+
field_dict[item['key']]=item['value']
58+
59+
Properties = field_dict['Properties']
60+
prop_list = [m.groupdict() for m in p2.finditer(Properties)]
61+
62+
data_lines = []
63+
for line in lines[2:]:
64+
data_lines.append(list(filter(bool, line.strip().split())))
65+
data_array = np.array(data_lines)
66+
used_colomn = 0
67+
68+
type_array = None
69+
coords_array = None
70+
Z_array = None
71+
force_array = None
72+
virials = None
73+
for kv_dict in prop_list:
74+
if kv_dict['key'] == 'species':
75+
if kv_dict['datatype'] != 'S':
76+
raise RuntimeError("datatype for species must be 'S' instead of {}".format(kv_dict['datatype']))
77+
field_length = int(kv_dict['value'])
78+
type_array = data_array[:,used_colomn:used_colomn+field_length].flatten()
79+
used_colomn += field_length
80+
continue
81+
elif kv_dict['key'] == 'pos':
82+
if kv_dict['datatype'] != 'R':
83+
raise RuntimeError("datatype for pos must be 'R' instead of {}".format(kv_dict['datatype']))
84+
field_length = int(kv_dict['value'])
85+
coords_array = data_array[:,used_colomn:used_colomn+field_length]
86+
used_colomn += field_length
87+
continue
88+
elif kv_dict['key'] == 'Z':
89+
if kv_dict['datatype'] != 'I':
90+
raise RuntimeError("datatype for pos must be 'R' instead of {}".format(kv_dict['datatype']))
91+
field_length = int(kv_dict['value'])
92+
Z_array = data_array[:,used_colomn:used_colomn+field_length].flatten()
93+
used_colomn += field_length
94+
continue
95+
elif kv_dict['key'] == 'force':
96+
if kv_dict['datatype'] != 'R':
97+
raise RuntimeError("datatype for pos must be 'R' instead of {}".format(kv_dict['datatype']))
98+
field_length = int(kv_dict['value'])
99+
force_array = data_array[:,used_colomn:used_colomn+field_length]
100+
used_colomn += field_length
101+
continue
102+
else:
103+
raise RuntimeError("unknown field {}".format(kv_dict['key']))
104+
105+
type_num_dict = OrderedDict()
106+
atom_type_list = []
107+
type_map = {}
108+
temp_atom_max_index = 0
109+
if type_array is None:
110+
raise RuntimeError("type_array can't be None type, check .xyz file")
111+
for ii in type_array:
112+
if ii not in type_map:
113+
type_map[ii] = temp_atom_max_index
114+
temp_atom_max_index += 1
115+
temp_atom_index = type_map[ii]
116+
atom_type_list.append(temp_atom_index)
117+
type_num_dict[ii] = 1
118+
else:
119+
temp_atom_index = type_map[ii]
120+
atom_type_list.append(temp_atom_index)
121+
type_num_dict[ii] += 1
122+
type_num_list = []
123+
for atom_type,atom_num in type_num_dict.items():
124+
type_num_list.append((atom_type,atom_num))
125+
type_num_array = np.array(type_num_list)
126+
if field_dict.get('virial', None):
127+
virials = np.array([np.array(list(filter(bool,field_dict['virial'].split(' ')))).reshape(3,3)]).astype('float32')
128+
else:
129+
virials = None
130+
info_dict = {}
131+
info_dict['atom_names'] = list(type_num_array[:,0])
132+
info_dict['atom_numbs'] = list(type_num_array[:,1].astype(int))
133+
info_dict['atom_types'] = np.array(atom_type_list).astype(int)
134+
info_dict['cells'] = np.array([np.array(list(filter(bool,field_dict['Lattice'].split(' ')))).reshape(3,3)]).astype('float32')
135+
info_dict['coords'] = np.array([coords_array]).astype('float32')
136+
info_dict['energies'] = np.array([field_dict['energy']]).astype('float32')
137+
info_dict['forces'] = np.array([force_array]).astype('float32')
138+
info_dict['virials'] = virials
139+
info_dict['orig'] = [0,0,0]
140+
return info_dict

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
long_description=readme,
2525
long_description_content_type="text/markdown",
2626
url="https://github.com/deepmodeling/dpdata",
27-
packages=['dpdata', 'dpdata/vasp', 'dpdata/lammps', 'dpdata/md', 'dpdata/deepmd', 'dpdata/qe', 'dpdata/siesta', 'dpdata/gaussian', 'dpdata/cp2k'],
27+
packages=['dpdata', 'dpdata/vasp', 'dpdata/lammps', 'dpdata/md', 'dpdata/deepmd', 'dpdata/qe', 'dpdata/siesta', 'dpdata/gaussian', 'dpdata/cp2k','dpdata/xyz'],
2828
package_data={'dpdata':['*.json']},
2929
classifiers=[
3030
"Programming Language :: Python :: 3.6",

tests/test_quip_gap_xyz.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import os
2+
import numpy as np
3+
import unittest
4+
from context import dpdata
5+
from comp_sys import CompLabeledSys
6+
7+
class TestQuipGapxyz(unittest.TestCase, CompLabeledSys):
8+
def setUp (self) :
9+
self.multi_systems = dpdata.MultiSystems.from_file('xyz/xyz_unittest.xyz','quip/gap/xyz')
10+
self.system_1 = self.multi_systems.systems['B1C9']
11+
self.system_2 = dpdata.LabeledSystem('xyz/B1C9', fmt='deepmd')
12+
self.places = 6
13+
self.e_places = 6
14+
self.f_places = 6
15+
self.v_places = 4
16+
17+
class TestQuipGapxyz2(unittest.TestCase, CompLabeledSys):
18+
def setUp (self) :
19+
self.system_temp0 = dpdata.MultiSystems.from_file(file_name='xyz/xyz_unittest.xyz', fmt='quip/gap/xyz')
20+
self.system_1 = self.system_temp0.systems['B5C7']
21+
self.system_temp1 = dpdata.LabeledSystem('xyz/B1C9', fmt='deepmd')
22+
self.system_temp2 = dpdata.LabeledSystem('xyz/B5C7', fmt='deepmd')
23+
self.system_temp3 = dpdata.MultiSystems(self.system_temp2, self.system_temp1)
24+
self.system_2 = self.system_temp3.systems['B5C7']
25+
self.places = 6
26+
self.e_places = 6
27+
self.f_places = 6
28+
self.v_places = 4
29+
30+
if __name__ == '__main__':
31+
unittest.main()

tests/xyz/B1C9/box.raw

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
9.217081069946289062e+00 0.000000000000000000e+00 0.000000000000000000e+00 4.859999762629740871e-07 6.431267261505126953e+00 0.000000000000000000e+00 4.201562881469726562e+00 4.099999983964153216e-08 2.205334901809692383e+00
2+
5.512073516845703125e+00 0.000000000000000000e+00 0.000000000000000000e+00 4.160000059982849052e-07 5.512073516845703125e+00 0.000000000000000000e+00 2.529999960643181112e-07 2.529999960643181112e-07 3.347159147262573242e+00

tests/xyz/B1C9/coord.raw

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
1.238022994995117188e+01 3.215630054473876953e+00 2.131030082702636719e+00 1.296782970428466797e+01 4.328790187835693359e+00 2.141720056533813477e+00 1.296782970428466797e+01 2.102469921112060547e+00 2.141720056533813477e+00 7.954239845275878906e+00 1.034819960594177246e+00 2.102900028228759766e+00 7.954239845275878906e+00 5.396450042724609375e+00 2.102900028228759766e+00 4.896999895572662354e-01 5.378680229187011719e+00 3.689999878406524658e-02 4.896999895572662354e-01 1.052590012550354004e+00 3.689999878406524658e-02 2.719000101089477539e+00 1.711969971656799316e+00 1.720999926328659058e-01 2.719000101089477539e+00 4.719299793243408203e+00 1.720999926328659058e-01 9.699810028076171875e+00 3.215630054473876953e+00 5.539500117301940918e-01
2+
4.085810184478759766e+00 4.576230049133300781e+00 2.942379951477050781e+00 1.426270008087158203e+00 9.358400106430053711e-01 2.942379951477050781e+00 9.358400106430053711e-01 4.085810184478759766e+00 2.942379951477050781e+00 4.576230049133300781e+00 1.426270008087158203e+00 2.942379951477050781e+00 2.756040096282958984e+00 2.756040096282958984e+00 3.207600116729736328e-01 2.949359893798828125e+00 1.197110056877136230e+00 3.320899963378906250e+00 2.562710046768188477e+00 4.314960002899169922e+00 3.320899963378906250e+00 4.314960002899169922e+00 2.949359893798828125e+00 3.320899963378906250e+00 1.197110056877136230e+00 2.562710046768188477e+00 3.320899963378906250e+00 2.756040096282958984e+00 2.756040096282958984e+00 1.720729947090148926e+00

tests/xyz/B1C9/energy.raw

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
-5.834249877929687500e+01
2+
-5.639742660522460938e+01

tests/xyz/B1C9/force.raw

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
-9.355509757995605469e+00 -0.000000000000000000e+00 -2.236460000276565552e-01 7.056536197662353516e+00 1.119171047210693359e+01 1.800871014595031738e+00 7.056534767150878906e+00 -1.119171047210693359e+01 1.800871014595031738e+00 1.710104942321777344e+00 -3.803577899932861328e+00 -1.240210011601448059e-01 1.710106015205383301e+00 3.803577899932861328e+00 -1.240210011601448059e-01 4.354149997234344482e-01 7.533950209617614746e-01 -6.208680272102355957e-01 4.354149997234344482e-01 -7.533950209617614746e-01 -6.208680272102355957e-01 -3.430412054061889648e+00 5.665700137615203857e-02 -4.403569996356964111e-01 -3.430412054061889648e+00 -5.665700137615203857e-02 -4.403569996356964111e-01 -2.187779903411865234e+00 -0.000000000000000000e+00 -1.007603049278259277e+00
2+
3.989300131797790527e-02 -1.872578978538513184e+00 9.720240235328674316e-01 -3.989300131797790527e-02 1.872578978538513184e+00 9.720240235328674316e-01 1.872578978538513184e+00 3.989300131797790527e-02 9.720240235328674316e-01 -1.872578978538513184e+00 -3.989300131797790527e-02 9.720240235328674316e-01 -0.000000000000000000e+00 -0.000000000000000000e+00 -1.597074985504150391e+00 7.413330078125000000e-01 -2.438414096832275391e+00 -2.333703041076660156e+00 -7.413340210914611816e-01 2.438414096832275391e+00 -2.333703041076660156e+00 2.438414096832275391e+00 7.413340210914611816e-01 -2.333703041076660156e+00 -2.438414096832275391e+00 -7.413340210914611816e-01 -2.333703041076660156e+00 9.999999974752427079e-07 9.999999974752427079e-07 7.043791770935058594e+00

tests/xyz/B1C9/type.raw

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
0
2+
1
3+
1
4+
1
5+
1
6+
1
7+
1
8+
1
9+
1
10+
1

0 commit comments

Comments
 (0)