Skip to content

Commit 9b35c81

Browse files
authored
Implement Support for pymatgen.core.Molecule (#200)
* add .github/workflows/mirror_gitee.yml * implement support for pymatgen.core.Molecule * implement support for pymatgen.core.Molecule * Change atom_types in dpdata/pymatgen/molecule.py * Change doc string in dpdata/plugins/pymatgen.py * add dpdata/pymatgin to setup.py * modify molecule.py * modify molecule.py * modify molecule.py * Support of pymatgen.Molecule in dpdata * Update tests/test_pymatgen_molecule.py * Moved "remove_pbc" from pymatgen/molecule.py to system.py
1 parent 3e96c2c commit 9b35c81

File tree

10 files changed

+170
-13
lines changed

10 files changed

+170
-13
lines changed

dpdata/plugins/pymatgen.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
from dpdata.format import Format
2+
import dpdata.pymatgen.molecule
3+
import numpy as np
24

35

46
@Format.register("pymatgen/structure")
@@ -22,6 +24,37 @@ def to_system(self, data, **kwargs):
2224
return structures
2325

2426

27+
@Format.register("pymatgen/molecule")
28+
class PyMatgenMoleculeFormat(Format):
29+
@Format.post("remove_pbc")
30+
def from_system(self, file_name, **kwargs):
31+
try:
32+
from pymatgen.core import Molecule
33+
except ModuleNotFoundError as e:
34+
raise ImportError('No module pymatgen.Molecule') from e
35+
36+
return dpdata.pymatgen.molecule.to_system_data(file_name)
37+
38+
def to_system(self, data, **kwargs):
39+
"""convert System to Pymatgen Molecule obj
40+
"""
41+
molecules = []
42+
try:
43+
from pymatgen.core import Molecule
44+
except ModuleNotFoundError as e:
45+
raise ImportError('No module pymatgen.Molecule') from e
46+
47+
species = []
48+
for name, numb in zip(data['atom_names'], data['atom_numbs']):
49+
species.extend([name]*numb)
50+
data = dpdata.system.remove_pbc(data)
51+
for ii in range(np.array(data['coords']).shape[0]):
52+
molecule = Molecule(
53+
species, data['coords'][ii])
54+
molecules.append(molecule)
55+
return molecules
56+
57+
2558
@Format.register("pymatgen/computedstructureentry")
2659
@Format.register_to("to_pymatgen_ComputedStructureEntry")
2760
class PyMatgenCSEFormat(Format):
@@ -44,3 +77,5 @@ def to_labeled_system(self, data, *args, **kwargs):
4477
entry = ComputedStructureEntry(structure, energy, data=csedata)
4578
entries.append(entry)
4679
return entries
80+
81+

dpdata/pymatgen/__init__.py

Whitespace-only changes.

dpdata/pymatgen/molecule.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import numpy as np
2+
from pymatgen.core import Molecule
3+
from collections import Counter
4+
import dpdata
5+
6+
def to_system_data(file_name, protect_layer = 9) :
7+
mol = Molecule.from_file(file_name)
8+
elem_mol = list(str(site.species.elements[0]) for site in mol.sites)
9+
elem_counter = Counter(elem_mol)
10+
atom_names = list(elem_counter.keys())
11+
atom_numbs = list(elem_counter.values())
12+
atom_types = [list(atom_names).index(e) for e in elem_mol]
13+
natoms = np.sum(atom_numbs)
14+
15+
tmpcoord = np.copy(mol.cart_coords)
16+
17+
system = {}
18+
system['atom_names'] = atom_names
19+
system['atom_numbs'] = atom_numbs
20+
system['atom_types'] = np.array(atom_types, dtype = int)
21+
# center = [c - h_cell_size for c in mol.center_of_mass]
22+
system['orig'] = np.array([0, 0, 0])
23+
24+
system['coords'] = [tmpcoord]
25+
system['cells'] = [10.0 * np.eye(3)]
26+
return system

dpdata/system.py

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from monty.serialization import loadfn,dumpfn
1010
from dpdata.periodic_table import Element
1111
from dpdata.amber.mask import pick_by_amber_mask, load_param_file
12+
import dpdata
1213

1314
# ensure all plugins are loaded!
1415
import dpdata.plugins
@@ -418,7 +419,7 @@ def extend(self, systems):
418419
for system in systems:
419420
self.append(system.copy())
420421

421-
422+
422423
def apply_pbc(self) :
423424
"""
424425
Append periodic boundary condition
@@ -428,6 +429,7 @@ def apply_pbc(self) :
428429
self.data['coords'] = np.matmul(ncoord, self.data['cells'])
429430

430431

432+
@post_funcs.register("remove_pbc")
431433
def remove_pbc(self, protect_layer = 9):
432434
"""
433435
This method does NOT delete the definition of the cells, it
@@ -441,19 +443,8 @@ def remove_pbc(self, protect_layer = 9):
441443
protect_layer : the protect layer between the atoms and the cell
442444
boundary
443445
"""
444-
nframes = self.get_nframes()
445-
natoms = self.get_natoms()
446446
assert(protect_layer >= 0), "the protect_layer should be no less than 0"
447-
for ff in range(nframes):
448-
tmpcoord = self.data['coords'][ff]
449-
cog = np.average(tmpcoord, axis = 0)
450-
dist = tmpcoord - np.tile(cog, [natoms, 1])
451-
max_dist = np.max(np.linalg.norm(dist, axis = 1))
452-
h_cell_size = max_dist + protect_layer
453-
cell_size = h_cell_size * 2
454-
shift = np.array([1,1,1]) * h_cell_size - cog
455-
self.data['coords'][ff] = self.data['coords'][ff] + np.tile(shift, [natoms, 1])
456-
self.data['cells'][ff] = cell_size * np.eye(3)
447+
remove_pbc(self.data, protect_layer)
457448

458449
def affine_map(self, trans, f_idx = 0) :
459450
assert(np.linalg.det(trans) != 0)
@@ -1314,3 +1305,18 @@ def elements_index_map(elements,standard=False,inverse=False):
13141305
else:
13151306
return dict(zip(elements,range(len(elements))))
13161307
# %%
1308+
1309+
def remove_pbc(system, protect_layer = 9):
1310+
nframes = len(system["coords"])
1311+
natoms = len(system['coords'][0])
1312+
for ff in range(nframes):
1313+
tmpcoord = system['coords'][ff]
1314+
cog = np.average(tmpcoord, axis = 0)
1315+
dist = tmpcoord - np.tile(cog, [natoms, 1])
1316+
max_dist = np.max(np.linalg.norm(dist, axis = 1))
1317+
h_cell_size = max_dist + protect_layer
1318+
cell_size = h_cell_size * 2
1319+
shift = np.array([1,1,1]) * h_cell_size - cog
1320+
system['coords'][ff] = system['coords'][ff] + np.tile(shift, [natoms, 1])
1321+
system['cells'][ff] = cell_size * np.eye(3)
1322+
return system

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
'dpdata/abacus',
4242
'dpdata/rdkit',
4343
'dpdata/plugins',
44+
'dpdata/pymatgen',
4445
],
4546
package_data={'dpdata':['*.json']},
4647
classifiers=[

tests/pymatgen/FA-001.vasp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
C1 H5 N2
2+
1.0
3+
2.2504659203492643e+01 0.0000000000000000e+00 0.0000000000000000e+00
4+
0.0000000000000000e+00 2.2504659203492643e+01 0.0000000000000000e+00
5+
0.0000000000000000e+00 0.0000000000000000e+00 2.2504659203492643e+01
6+
C H N
7+
1 5 2
8+
Cartesian
9+
11.2523296017 11.9178548890 11.2108422059
10+
11.2523296017 11.4836546020 13.4873419539
11+
11.2523296017 9.9657546120 12.5961418889
12+
11.2523296017 11.5757547460 9.0233422059
13+
11.2523296017 9.9209545690 10.0419421929
14+
11.2523296017 13.0578542790 11.2108422059
15+
11.2523296017 11.0671545110 12.4584418079
16+
11.2523296017 11.0296546060 9.9897423529

tests/pymatgen/FA-001.xyz

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
8
2+
Lattice="10 0.0 0.0 0.0 10 0.0 0.0 0.0 10" Properties=species:S:1:pos:R:3:Z:I:1
3+
C 3.1742845125747904e-16 5.342900276 5.184000015 6
4+
H 4.568238577398768e-16 4.908699989 7.460499763 1
5+
N 3.938218942178196e-16 4.492199898 6.431599617 7
6+
H 4.022535923897687e-16 3.390799999 6.569299698 1
7+
H 1.834827076007373e-16 5.000800133 2.996500015 1
8+
N 2.4265764993669136e-16 4.454699993 3.962900162 7
9+
H 2.4585396828529157e-16 3.345999956 4.015100002 1
10+
H 3.1742845125747904e-16 6.482899666 5.184000015 1

tests/pymatgen/mol2-new.vasp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
H5 C1 N2
2+
1.0
3+
10.000000 0.000000 0.000000
4+
0.000000 10.000000 0.000000
5+
0.000000 0.000000 10.000000
6+
H C N
7+
5 1 2
8+
direct
9+
0.577383 0.631337 0.500367 H
10+
0.440363 0.395639 0.623009 H
11+
0.523296 0.528867 0.297597 H
12+
0.521491 0.528803 0.702604 H
13+
0.441477 0.395690 0.376400 H
14+
0.521985 0.537275 0.500106 C
15+
0.490941 0.484747 0.383775 N
16+
0.489919 0.484705 0.616136 N

tests/pymatgen/mol2.vasp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
hexagonal-PbI3
2+
1.00000000000000
3+
8.4261398403369476 -0.0207094818066008 -0.0031014314051307
4+
-4.2326852079273163 7.4079517949230027 0.0021007606821090
5+
-0.0033015270376727 0.0018001191064931 7.9750946862221301
6+
N C H
7+
2 1 5
8+
Direct
9+
0.3230984076298932 0.6454769144393403 0.1041651474646342
10+
0.3220267174078323 0.6453279258574751 0.3955245499807875
11+
0.3958479883967422 0.7164380537464232 0.2500334235810864
12+
0.5257412318253072 0.8435919770693032 0.2503606409552951
13+
0.2024793011832460 0.5249247262853158 0.4041424751787161
14+
0.3915235842243226 0.7051542887367509 -0.0038936922276933 0.9961063077723067
15+
0.3896311952726235 0.7049043576463694 0.5039461714481914
16+
0.2036571496859804 0.5250922648687301 0.0949178563751044

tests/test_pymatgen_molecule.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import os
2+
import numpy as np
3+
import unittest
4+
from context import dpdata
5+
6+
class TestPOSCARCart(unittest.TestCase):
7+
8+
def setUp(self):
9+
self.system = dpdata.System()
10+
self.system.from_pymatgen_molecule(os.path.join('pymatgen', 'FA-001.xyz'))
11+
self.assertEqual(list(self.system["atom_types"]), [0, 1, 2, 1, 1, 2, 1, 1])
12+
13+
def test_poscar_to_molecule(self):
14+
tmp_system = dpdata.System()
15+
tmp_system.from_vasp_poscar(os.path.join('pymatgen', 'mol2.vasp'))
16+
natoms = len(tmp_system['coords'][0])
17+
tmpcoord = tmp_system['coords'][0]
18+
cog = np.average(tmpcoord, axis = 0)
19+
dist = tmpcoord - np.tile(cog, [natoms, 1])
20+
max_dist_0 = np.max(np.linalg.norm(dist, axis = 1))
21+
22+
mols = tmp_system.to("pymatgen/molecule")
23+
cog = np.average(mols[-1].cart_coords, axis = 0)
24+
dist = mols[-1].cart_coords - np.tile(cog, [natoms, 1])
25+
max_dist_1 = np.max(np.linalg.norm(dist, axis = 1))
26+
self.assertAlmostEqual(max_dist_0, max_dist_1)
27+
28+
29+
30+
if __name__ == '__main__':
31+
unittest.main()

0 commit comments

Comments
 (0)