Skip to content

Commit 8ef6758

Browse files
authored
Merge pull request #63 from felix5572/devel
support MultiSystems from_dir && add more test cases for quip/gap/xyz
2 parents 2625e79 + c9cc927 commit 8ef6758

File tree

6 files changed

+162
-24
lines changed

6 files changed

+162
-24
lines changed

README.md

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,16 +51,24 @@ The labels provided in the `OUTCAR`, i.e. energies, forces and virials (if any),
5151

5252
The `System` or `LabeledSystem` can be constructed from the following file formats with the `format key` in the table passed to argument `fmt`:
5353

54-
For `quip/gap xyz` files, single .xyz file may include many different configurations with different atom numbers and atom type.
54+
The Class `dpdata.MultiSystems` can read data from a dir which may contains many files of different systems, or from single xyz file which contains different systems.
5555

56-
The Class `dpdata.MultiSystems` can read data this kind of file.
57-
Now Class dpdata.MultiSystems Only support quip/gap xyz format file.
56+
Use `dpdata.MultiSystems.from_dir` to read from a directory, `dpdata.MultiSystems` will walk in the directory
57+
Recursively and find all file with specific file_name. Supports all the file formats that `dpdata.LabeledSystem` supports.
58+
59+
Use `dpdata.MultiSystems.from_file` to read from single file. Now only support quip/gap/xyz format file.
60+
61+
For example, for `quip/gap xyz` files, single .xyz file may contain many different configurations with different atom numbers and atom type.
5862

5963
The following commands relating to `Class dpdata.MultiSystems` may be useful.
6064
```python
6165
# load data
6266

63-
xyz_multi_systems = dpdata.MultiSystems.from_file('tests/xyz/xyz_unittest.xyz','quip/gap/xyz')
67+
xyz_multi_systems = dpdata.MultiSystems.from_file(file_name='tests/xyz/xyz_unittest.xyz',fmt='quip/gap/xyz')
68+
vasp_multi_systems = dpdata.MultiSystems.from_dir(dir_name='./mgal_outcar', file_name='OUTCAR', fmt='vasp/outcar')
69+
70+
# use wildcard
71+
vasp_multi_systems = dpdata.MultiSystems.from_dir(dir_name='./mgal_outcar', file_name='*OUTCAR', fmt='vasp/outcar')
6472

6573
# print the multi_system infomation
6674
print(xyz_multi_systems)

dpdata/system.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#%%
22
import os
3+
import glob
34
import numpy as np
45
import dpdata.lammps.lmp
56
import dpdata.lammps.dump
@@ -950,7 +951,16 @@ def from_file(cls,file_name,fmt):
950951
multi_systems = cls()
951952
multi_systems.load_systems_from_file(file_name=file_name,fmt=fmt)
952953
return multi_systems
953-
954+
955+
@classmethod
956+
def from_dir(cls,dir_name, file_name, fmt='auto'):
957+
multi_systems = cls()
958+
target_file_list = glob.glob('./{}/**/{}'.format(dir_name, file_name), recursive=True)
959+
for target_file in target_file_list:
960+
multi_systems.append(LabeledSystem(file_name=target_file, fmt=fmt))
961+
return multi_systems
962+
963+
954964
def load_systems_from_file(self, file_name=None, fmt=None):
955965
if file_name is not None:
956966
if fmt is None:
@@ -1015,10 +1025,10 @@ def check_atom_names(self, system):
10151025
system.add_atom_names(new_in_self)
10161026
system.sort_atom_names()
10171027

1018-
def from_quip_gap_xyz_file(self,filename):
1019-
# quip_gap_xyz_systems = QuipGapxyzSystems(filename)
1028+
def from_quip_gap_xyz_file(self,file_name):
1029+
# quip_gap_xyz_systems = QuipGapxyzSystems(file_name)
10201030
# print(next(quip_gap_xyz_systems))
1021-
for info_dict in QuipGapxyzSystems(filename):
1031+
for info_dict in QuipGapxyzSystems(file_name):
10221032
system=LabeledSystem(data=info_dict)
10231033
self.append(system)
10241034

dpdata/xyz/quip_gap_xyz.py

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,4 @@
11
#!/usr/bin/env python3
2-
3-
#%%
4-
# with open('./test.xyz', 'r') as xyz_file:
5-
# lines = xyz_file.readlines()
6-
# print(lines)
72
#%%
83
import numpy as np
94
from collections import OrderedDict
@@ -44,20 +39,16 @@ def handle_single_xyz_frame(lines):
4439
if len(lines) != atom_num + 2:
4540
raise RuntimeError("format error, atom_num=={}, {}!=atom_num+2".format(atom_num, len(lines)))
4641
data_format_line = lines[1].strip('\n').strip()+str(' ')
47-
p1 = re.compile(r'(?P<key>\S+)=(?P<quote>[\'\"]?)(?P<value>.*?)(?P=quote)\s+')
48-
p2 = re.compile(r'(?P<key>\w+?):(?P<datatype>[a-zA-Z]):(?P<value>\d+)')
49-
field_list = [kv_dict.groupdict() for kv_dict in p1.finditer(data_format_line)]
50-
field_dict = {}
51-
for item in field_list:
52-
field_dict[item['key']]=item['value']
53-
data_format_line = lines[1]
54-
data_format_list= [m.groupdict() for m in p1.finditer(data_format_line)]
42+
field_value_pattern= re.compile(r'(?P<key>\S+)=(?P<quote>[\'\"]?)(?P<value>.*?)(?P=quote)\s+')
43+
prop_pattern = re.compile(r'(?P<key>\w+?):(?P<datatype>[a-zA-Z]):(?P<value>\d+)')
44+
45+
data_format_list= [kv_dict.groupdict() for kv_dict in field_value_pattern.finditer(data_format_line)]
5546
field_dict = {}
5647
for item in data_format_list:
5748
field_dict[item['key']]=item['value']
5849

5950
Properties = field_dict['Properties']
60-
prop_list = [m.groupdict() for m in p2.finditer(Properties)]
51+
prop_list = [kv_dict.groupdict() for kv_dict in prop_pattern.finditer(Properties)]
6152

6253
data_lines = []
6354
for line in lines[2:]:
@@ -127,6 +118,7 @@ def handle_single_xyz_frame(lines):
127118
virials = np.array([np.array(list(filter(bool,field_dict['virial'].split(' ')))).reshape(3,3)]).astype('float32')
128119
else:
129120
virials = None
121+
130122
info_dict = {}
131123
info_dict['atom_names'] = list(type_num_array[:,0])
132124
info_dict['atom_numbs'] = list(type_num_array[:,1].astype(int))

tests/test_quip_gap_xyz.py

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from context import dpdata
55
from comp_sys import CompLabeledSys
66

7-
class TestQuipGapxyz(unittest.TestCase, CompLabeledSys):
7+
class TestQuipGapxyz1(unittest.TestCase, CompLabeledSys):
88
def setUp (self) :
99
self.multi_systems = dpdata.MultiSystems.from_file('xyz/xyz_unittest.xyz','quip/gap/xyz')
1010
self.system_1 = self.multi_systems.systems['B1C9']
@@ -17,7 +17,7 @@ def setUp (self) :
1717
class TestQuipGapxyz2(unittest.TestCase, CompLabeledSys):
1818
def setUp (self) :
1919
self.system_temp0 = dpdata.MultiSystems.from_file(file_name='xyz/xyz_unittest.xyz', fmt='quip/gap/xyz')
20-
self.system_1 = self.system_temp0.systems['B5C7']
20+
self.system_1 = self.system_temp0.systems['B5C7'] # .sort_atom_types()
2121
self.system_temp1 = dpdata.LabeledSystem('xyz/B1C9', fmt='deepmd')
2222
self.system_temp2 = dpdata.LabeledSystem('xyz/B5C7', fmt='deepmd')
2323
self.system_temp3 = dpdata.MultiSystems(self.system_temp2, self.system_temp1)
@@ -27,5 +27,54 @@ def setUp (self) :
2727
self.f_places = 6
2828
self.v_places = 4
2929

30+
class TestQuipGapxyzsort1(unittest.TestCase, CompLabeledSys):
31+
def setUp (self) :
32+
self.multi_systems_1 = dpdata.MultiSystems.from_file('xyz/xyz_unittest.sort.xyz','quip/gap/xyz')
33+
self.system_1 = self.multi_systems_1.systems['B5C7']
34+
self.system_1.sort_atom_types()
35+
self.multi_systems_2 = dpdata.MultiSystems.from_file('xyz/xyz_unittest.xyz','quip/gap/xyz')
36+
self.system_2 = self.multi_systems_2.systems['B5C7']
37+
self.places = 6
38+
self.e_places = 6
39+
self.f_places = 6
40+
self.v_places = 4
41+
42+
class TestQuipGapxyzsort2(unittest.TestCase, CompLabeledSys):
43+
def setUp (self) :
44+
self.multi_systems_1 = dpdata.MultiSystems.from_file('xyz/xyz_unittest.sort.xyz','quip/gap/xyz')
45+
self.system_1 = self.multi_systems_1.systems['B1C9']
46+
self.system_1.sort_atom_types()
47+
self.multi_systems_2 = dpdata.MultiSystems.from_file('xyz/xyz_unittest.xyz','quip/gap/xyz')
48+
self.system_2 = self.multi_systems_2.systems['B1C9']
49+
self.places = 6
50+
self.e_places = 6
51+
self.f_places = 6
52+
self.v_places = 4
53+
54+
class TestQuipGapxyzfield(unittest.TestCase, CompLabeledSys):
55+
def setUp (self) :
56+
self.multi_systems_1 = dpdata.MultiSystems.from_file('xyz/xyz_unittest.field.xyz','quip/gap/xyz')
57+
self.system_1 = self.multi_systems_1.systems['B1C9']
58+
self.system_1.sort_atom_types()
59+
self.multi_systems_2 = dpdata.MultiSystems.from_file('xyz/xyz_unittest.xyz','quip/gap/xyz')
60+
self.system_2 = self.multi_systems_2.systems['B1C9']
61+
self.places = 6
62+
self.e_places = 6
63+
self.f_places = 6
64+
self.v_places = 4
65+
66+
class TestQuipGapxyzfield2(unittest.TestCase, CompLabeledSys):
67+
def setUp (self) :
68+
self.multi_systems_1 = dpdata.MultiSystems.from_file('xyz/xyz_unittest.field.xyz','quip/gap/xyz')
69+
self.system_1 = self.multi_systems_1.systems['B5C7']
70+
self.system_1.sort_atom_types()
71+
self.multi_systems_2 = dpdata.MultiSystems.from_file('xyz/xyz_unittest.xyz','quip/gap/xyz')
72+
self.system_2 = self.multi_systems_2.systems['B5C7']
73+
self.places = 6
74+
self.e_places = 6
75+
self.f_places = 6
76+
self.v_places = 4
77+
78+
3079
if __name__ == '__main__':
3180
unittest.main()

tests/xyz/xyz_unittest.field.xyz

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# change field structure,table structure add quote and blank line
2+
12
3+
energy=0.2397023e+01 virial="159.582203018 1.23282341824e-05 0.187835167689 1.23282341824e-05 164.488614479 9.04070506711e-06 0.187835167689 9.04070506711e-06 300.596373102" Lattice="2.405388711 0.0 0.0 6.67e-07 8.830761667 0.0 2.145883783 6.3e-08 2.066179013" Properties=force:R:3:pos:R:3:species:S:1:Z:I:1
4+
-7.25565900000 -8.82919400000 4.68743200000 3.19567000000 0.97965000000 1.52250000000 B 5
5+
7.25565700000 -8.82919400000 -4.68743200000 1.35561000000 0.97965000000 0.54368000000 B 5
6+
7.25565900000 8.82919400000 -4.68743200000 1.35561000000 7.85111000000 0.54368000000 B 5
7+
-7.25565700000 8.82919400000 4.68743200000 3.19567000000 7.85111000000 1.52250000000 B 5
8+
-0.00000000000 0 +0.0 2.27564000000 4.41538000000 1.03309000000 B 5
9+
-14.39490800000 -19.12936600000 -4.67857200000 1.95905000000 6.23505000000 1.53936000000 C 6
10+
14.39490500000 -19.12936600000 4.67857200000 2.59222000000 6.23505000000 0.52682000000 C 6
11+
14.39490800000 19.12936600000 4.67857200000 2.59222000000 2.59571000000 0.52682000000 C 6
12+
-14.39490500000 19.12936600000 -4.67857200000 1.95905000000 2.59571000000 1.53936000000 C 6
13+
-0.00000000000 -4.51935800000 0 2.27564000000 1.56002000000 1.03309000000 C 6
14+
0.00000000000 4.51935800000 0 2.27564000000 7.27074000000 1.03309000000 C 6
15+
-0.00000000000 -0.00000000000 0 2.27564000000 0.00000000000 1.03309000000 C 6
16+
17+
18+
10
19+
virial=" -3.68535041825 1.63204257089e-06 -4.28008468355 1.63204257089e-06 18.1630123797 0.0 -4.28008468355 0.0 3.03073243091 " Lattice=" 9.217080809 0.0 0.0 4.86e-07 6.431267224 0.0 4.201562981 4.1e-08 2.205334915 " Properties='species:S:1:pos:R:3:Z:I:1:force:R:3 ' energy='-58.342497 '
20+
B 12.38023000000 3.21563000000 2.13103000000 5 -9.35551000000 -0.00000000000 -0.22364600000
21+
C 12.96783000000 4.32879000000 2.14172000000 6 7.05653600000 11.19171000000 1.80087100000
22+
C 12.96783000000 2.10247000000 2.14172000000 6 7.05653500000 -11.19171000000 1.80087100000
23+
C 7.95424000000 1.03482000000 2.10290000000 6 1.71010500000 -3.80357800000 -0.12402100000
24+
C 7.95424000000 5.39645000000 2.10290000000 6 1.71010600000 3.80357800000 -0.12402100000
25+
C 0.48970000000 5.37868000000 0.03690000000 6 0.43541500000 0.75339500000 -0.62086800000
26+
C 0.48970000000 1.05259000000 0.03690000000 6 0.43541500000 -0.75339500000 -0.62086800000
27+
C 2.71900000000 1.71197000000 0.17210000000 6 -3.43041200000 0.05665700000 -0.44035700000
28+
C 2.71900000000 4.71930000000 0.17210000000 6 -3.43041200000 -0.05665700000 -0.44035700000
29+
C 9.69981000000 3.21563000000 0.55395000000 6 -2.18778000000 -0.00000000000 -1.00760300000
30+
10
31+
energy=-56.397425 virial="-5.61261501333 -6.34809444383e-07 6.34809444383e-07 -6.34809444383e-07 -5.61261501333 6.34809444383e-07 6.34809444383e-07 6.34809444383e-07 4.91130356636" Lattice="5.512073672 0.0 0.0 4.16e-07 5.512073672 0.0 2.53e-07 2.53e-07 3.347159197" Properties=species:S:1:pos:R:3:Z:I:1:force:R:3
32+
B 4.08581000000 4.57623000000 2.94238000000 5 0.03989300000 -1.87257900000 0.97202400000
33+
C 1.42627000000 0.93584000000 2.94238000000 5 -0.03989300000 1.87257900000 0.97202400000
34+
C 0.93584000000 4.08581000000 2.94238000000 5 1.87257900000 0.03989300000 0.97202400000
35+
C 4.57623000000 1.42627000000 2.94238000000 5 -1.87257900000 -0.03989300000 0.97202400000
36+
C 2.75604000000 2.75604000000 0.32076000000 5 -0.00000000000 -0.00000000000 -1.59707500000
37+
C 2.94936000000 1.19711000000 3.32090000000 6 0.74133300000 -2.43841400000 -2.33370300000
38+
C 2.56271000000 4.31496000000 3.32090000000 6 -0.74133400000 2.43841400000 -2.33370300000
39+
C 4.31496000000 2.94936000000 3.32090000000 6 2.43841400000 0.74133400000 -2.33370300000
40+
C 1.19711000000 2.56271000000 3.32090000000 6 -2.43841400000 -0.74133400000 -2.33370300000
41+
C 2.75604000000 2.75604000000 1.72073000000 6 0.00000100000 0.00000100000 7.04379200000

tests/xyz/xyz_unittest.sort.xyz

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
12
2+
energy=2.397023 virial="159.582203018 1.23282341824e-05 0.187835167689 1.23282341824e-05 164.488614479 9.04070506711e-06 0.187835167689 9.04070506711e-06 300.596373102" Lattice="2.405388711 0.0 0.0 6.67e-07 8.830761667 0.0 2.145883783 6.3e-08 2.066179013" Properties=species:S:1:pos:R:3:Z:I:1:force:R:3
3+
C 1.95905000000 6.23505000000 1.53936000000 6 -14.39490800000 -19.12936600000 -4.67857200000
4+
B 3.19567000000 0.97965000000 1.52250000000 5 -7.25565900000 -8.82919400000 4.68743200000
5+
B 1.35561000000 0.97965000000 0.54368000000 5 7.25565700000 -8.82919400000 -4.68743200000
6+
B 1.35561000000 7.85111000000 0.54368000000 5 7.25565900000 8.82919400000 -4.68743200000
7+
C 2.59222000000 6.23505000000 0.52682000000 6 14.39490500000 -19.12936600000 4.67857200000
8+
C 2.59222000000 2.59571000000 0.52682000000 6 14.39490800000 19.12936600000 4.67857200000
9+
C 1.95905000000 2.59571000000 1.53936000000 6 -14.39490500000 19.12936600000 -4.67857200000
10+
C 2.27564000000 1.56002000000 1.03309000000 6 -0.00000000000 -4.51935800000 0.00000000000
11+
C 2.27564000000 7.27074000000 1.03309000000 6 0.00000000000 4.51935800000 0.00000000000
12+
B 3.19567000000 7.85111000000 1.52250000000 5 -7.25565700000 8.82919400000 4.68743200000
13+
C 2.27564000000 0.00000000000 1.03309000000 6 -0.00000000000 -0.00000000000 0.00000000000
14+
B 2.27564000000 4.41538000000 1.03309000000 5 -0.00000000000 -0.00000000000 0.00000000000
15+
10
16+
energy=-58.342497 virial="-3.68535041825 1.63204257089e-06 -4.28008468355 1.63204257089e-06 18.1630123797 0.0 -4.28008468355 0.0 3.03073243091" Lattice="9.217080809 0.0 0.0 4.86e-07 6.431267224 0.0 4.201562981 4.1e-08 2.205334915" Properties=species:S:1:pos:R:3:Z:I:1:force:R:3
17+
C 12.96783000000 4.32879000000 2.14172000000 6 7.05653600000 11.19171000000 1.80087100000
18+
C 12.96783000000 2.10247000000 2.14172000000 6 7.05653500000 -11.19171000000 1.80087100000
19+
C 7.95424000000 1.03482000000 2.10290000000 6 1.71010500000 -3.80357800000 -0.12402100000
20+
C 7.95424000000 5.39645000000 2.10290000000 6 1.71010600000 3.80357800000 -0.12402100000
21+
B 12.38023000000 3.21563000000 2.13103000000 5 -9.35551000000 -0.00000000000 -0.22364600000
22+
C 0.48970000000 5.37868000000 0.03690000000 6 0.43541500000 0.75339500000 -0.62086800000
23+
C 0.48970000000 1.05259000000 0.03690000000 6 0.43541500000 -0.75339500000 -0.62086800000
24+
C 2.71900000000 1.71197000000 0.17210000000 6 -3.43041200000 0.05665700000 -0.44035700000
25+
C 2.71900000000 4.71930000000 0.17210000000 6 -3.43041200000 -0.05665700000 -0.44035700000
26+
C 9.69981000000 3.21563000000 0.55395000000 6 -2.18778000000 -0.00000000000 -1.00760300000
27+
10
28+
energy=-56.397425 virial="-5.61261501333 -6.34809444383e-07 6.34809444383e-07 -6.34809444383e-07 -5.61261501333 6.34809444383e-07 6.34809444383e-07 6.34809444383e-07 4.91130356636" Lattice="5.512073672 0.0 0.0 4.16e-07 5.512073672 0.0 2.53e-07 2.53e-07 3.347159197" Properties=species:S:1:pos:R:3:Z:I:1:force:R:3
29+
B 4.08581000000 4.57623000000 2.94238000000 5 0.03989300000 -1.87257900000 0.97202400000
30+
C 1.42627000000 0.93584000000 2.94238000000 5 -0.03989300000 1.87257900000 0.97202400000
31+
C 0.93584000000 4.08581000000 2.94238000000 5 1.87257900000 0.03989300000 0.97202400000
32+
C 4.57623000000 1.42627000000 2.94238000000 5 -1.87257900000 -0.03989300000 0.97202400000
33+
C 2.75604000000 2.75604000000 0.32076000000 5 -0.00000000000 -0.00000000000 -1.59707500000
34+
C 2.94936000000 1.19711000000 3.32090000000 6 0.74133300000 -2.43841400000 -2.33370300000
35+
C 2.56271000000 4.31496000000 3.32090000000 6 -0.74133400000 2.43841400000 -2.33370300000
36+
C 4.31496000000 2.94936000000 3.32090000000 6 2.43841400000 0.74133400000 -2.33370300000
37+
C 1.19711000000 2.56271000000 3.32090000000 6 -2.43841400000 -0.74133400000 -2.33370300000
38+
C 2.75604000000 2.75604000000 1.72073000000 6 0.00000100000 0.00000100000 7.04379200000

0 commit comments

Comments
 (0)