Skip to content

Commit 84aa9c9

Browse files
committed
Merge remote-tracking branch 'upstream/devel' into devel
2 parents fda345f + 30cbb6a commit 84aa9c9

31 files changed

+924
-21
lines changed

README.md

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,41 @@ The labels provided in the `OUTCAR`, i.e. energies, forces and virials (if any),
5151

5252
The `System` or `LabeledSystem` can be constructed from the following file formats with the `format key` in the table passed to argument `fmt`:
5353

54+
The Class `dpdata.MultiSystems` can read data from a dir which may contains many files of different systems, or from single xyz file which contains different systems.
55+
56+
Use `dpdata.MultiSystems.from_dir` to read from a directory, `dpdata.MultiSystems` will walk in the directory
57+
Recursively and find all file with specific file_name. Supports all the file formats that `dpdata.LabeledSystem` supports.
58+
59+
Use `dpdata.MultiSystems.from_file` to read from single file. Now only support quip/gap/xyz format file.
60+
61+
For example, for `quip/gap xyz` files, single .xyz file may contain many different configurations with different atom numbers and atom type.
62+
63+
The following commands relating to `Class dpdata.MultiSystems` may be useful.
64+
```python
65+
# load data
66+
67+
xyz_multi_systems = dpdata.MultiSystems.from_file(file_name='tests/xyz/xyz_unittest.xyz',fmt='quip/gap/xyz')
68+
vasp_multi_systems = dpdata.MultiSystems.from_dir(dir_name='./mgal_outcar', file_name='OUTCAR', fmt='vasp/outcar')
69+
70+
# use wildcard
71+
vasp_multi_systems = dpdata.MultiSystems.from_dir(dir_name='./mgal_outcar', file_name='*OUTCAR', fmt='vasp/outcar')
72+
73+
# print the multi_system infomation
74+
print(xyz_multi_systems)
75+
print(xyz_multi_systems.systems) # return a dictionaries
76+
77+
# print the system infomation
78+
print(xyz_multi_systems.systems['B1C9'].data)
79+
80+
# dump a system's data to ./my_work_dir/B1C9_raw folder
81+
xyz_multi_systems.systems['B1C9'].to_deepmd_raw('./my_work_dir/B1C9_raw')
82+
83+
# dump all systems
84+
xyz_multi_systems.to_deepmd_raw('./my_deepmd_data/')
85+
86+
87+
```
88+
5489
| Software| format | multi frames | labeled | class | format key |
5590
| ------- | :--- | :---: | :---: | :--- | :--- |
5691
| vasp | poscar | False | False | System | 'vasp/poscar' |
@@ -70,6 +105,7 @@ The `System` or `LabeledSystem` can be constructed from the following file forma
70105
| QE | log | False | True | LabeledSystem | 'qe/pw/scf' |
71106
| QE | log | True | False | System | 'qe/cp/traj' |
72107
| QE | log | True | True | LabeledSystem | 'qe/cp/traj' |
108+
|quip/gap|xyz|True|True|MultiSystems|'quip/gap/xyz'|
73109

74110
## Access data
75111
These properties stored in `System` and `LabeledSystem` can be accessed by operator `[]` with the key of the property supplied, for example
@@ -116,3 +152,20 @@ Frame selection can be implemented by
116152
dpdata.LabeledSystem('OUTCAR').sub_system([0,-1]).to_deepmd_raw('dpmd_raw')
117153
```
118154
by which only the first and last frames are dumped to `dpmd_raw`.
155+
156+
## replicate
157+
dpdata will create a super cell of the current atom configuration.
158+
```python
159+
dpdata.System('./POSCAR').replicate((1,2,3,) )
160+
```
161+
tuple(1,2,3) means don't copy atom configuration in x direction, make 2 copys in y direction, make 3 copys in z direction.
162+
163+
## perturb
164+
By the following example, each frame of the original system (`dpdata.System('./POSCAR')`) is perturbed to generate three new frames. For each frame, the cell is perturbed by 5% and the atom positions are perturbed by 0.6 Angstrom. `atom_pert_style` indicates that the perturbation to the atom positions is subject to normal distribution. Other available options to `atom_pert_style` are`uniform` (uniform in a ball), and `const` (uniform on a sphere).
165+
```python
166+
perturbed_system = dpdata.System('./POSCAR').perturb(pert_num=3,
167+
cell_pert_fraction=0.05,
168+
atom_pert_distance=0.6,
169+
atom_pert_style='normal')
170+
print(perturbed_system.data)
171+
```

dpdata/siesta/aiMD_output.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,18 @@ def get_atom_types(fout, atomnums):
9090
atomtype.append(int(covert_type[i]) - 1)
9191
return atomtype
9292

93+
def get_atom_name(fout):
94+
file = open(fout, 'r')
95+
ret = []
96+
for value in file:
97+
if 'Species number:' in value:
98+
for j in range(len(value.split())):
99+
if value.split()[j] == 'Label:':
100+
ret.append(value.split()[j+1])
101+
break
102+
file.close()
103+
return ret
104+
93105
def get_atom_numbs(atomtypes):
94106
atom_numbs = []
95107
for i in set(atomtypes):
@@ -118,7 +130,7 @@ def covert_dimension(arr, num):
118130

119131
def get_aiMD_frame(fname):
120132
NumberOfSpecies = int(get_single_line_tail(fname, 'redata: Number of Atomic Species')[0])
121-
atom_names = extract_keyword(fname, 'initatom: Reading input for the pseudopotentials and atomic orbitals', NumberOfSpecies, 4, 5, 0, 8)[0].tolist()
133+
atom_names = get_atom_name(fname)
122134
tot_natoms = int(get_single_line_tail(fname, 'Number of atoms', 3)[0])
123135

124136
atom_types = get_atom_types(fname, tot_natoms)

dpdata/siesta/output.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,16 @@ def extract_keyword(fout, keyword, down_line_num, begin_column, column_num):
3838
else:
3939
flag = 0
4040
continue
41-
for i in range(begin_column, column_num):
42-
if not value.split()[i].isalpha():
43-
ret.append(float(value.strip().split()[i]))
44-
else:
45-
ret.append(value.strip().split()[i])
46-
continue
41+
if len(value.split()) >= column_num:
42+
for i in range(begin_column, column_num):
43+
if not value.split()[i].isalpha():
44+
ret.append(float(value.strip().split()[i]))
45+
else:
46+
ret.append(value.strip().split()[i])
47+
## compatible siesta-4.0.2 and siesta-4.1-b4
48+
else:
49+
flag = 0
50+
idx = 0
4751
file.close()
4852
return ret
4953

@@ -55,6 +59,17 @@ def get_atom_types(fout, atomnums):
5559
atomtype.append(int(covert_type[i]) - 1)
5660
return atomtype
5761

62+
def get_atom_name(fout):
63+
file = open(fout, 'r')
64+
ret = []
65+
for value in file:
66+
if 'Species number:' in value:
67+
for j in range(len(value.split())):
68+
if value.split()[j] == 'Label:':
69+
ret.append(value.split()[j+1])
70+
break
71+
file.close()
72+
return ret
5873

5974
def get_atom_numbs(atomtypes):
6075
atom_numbs = []
@@ -79,12 +94,11 @@ def get_virial(fout, cells):
7994

8095
def obtain_frame(fname):
8196
NumberOfSpecies = int(get_single_line_tail(fname, 'redata: Number of Atomic Species')[0])
82-
atom_names = extract_keyword(fname, 'initatom: Reading input for the pseudopotentials and atomic orbitals', NumberOfSpecies, 4, 5)
97+
atom_names = get_atom_name(fname)
8398
tot_natoms = int(get_single_line_tail(fname, 'Number of atoms', 3)[0])
8499
atom_types = get_atom_types(fname, tot_natoms)
85100
atom_numbs = get_atom_numbs(atom_types)
86101
assert (max(atom_types) + 1 == NumberOfSpecies)
87-
88102
cell = extract_keyword(fname, 'outcell: Unit cell vectors (Ang):', 3, 0, 3)
89103
coord = extract_keyword(fname, 'outcoor: Atomic coordinates (Ang):', tot_natoms, 0, 3)
90104
energy = get_single_line_tail(fname, 'siesta: E_KS(eV) =')

0 commit comments

Comments
 (0)