Skip to content

Commit 11157ec

Browse files
author
Musiha Mukta
committed
fix cif format for ranking
1 parent 18e8df9 commit 11157ec

File tree

1 file changed

+148
-0
lines changed

1 file changed

+148
-0
lines changed

scripts/rank.py

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
import pymatgen as mg
2+
import numpy as np
3+
from pyxtal import pyxtal
4+
from pyxtal.util import parse_cif
5+
from optparse import OptionParser
6+
import pymatgen.analysis.structure_matcher as sm
7+
8+
import warnings
9+
warnings.filterwarnings("ignore")
10+
11+
def new_struc(xtal, xtals, max_num=100):
12+
"""
13+
check if this is a new structure
14+
15+
Args:
16+
xtal: input structure
17+
xtals: list of reference structures
18+
19+
Return:
20+
`None` or the id of matched structure
21+
"""
22+
if len(xtals) > max_num:
23+
start = len(xtals) - max_num
24+
else:
25+
start = 0
26+
sg1 = xtal.group.number
27+
pmg_s1 = xtal.to_pymatgen()
28+
pmg_s1.remove_species("H")
29+
vol1 = pmg_s1.lattice.volume
30+
31+
for xtal2 in xtals[start:]:
32+
sg2 = xtal2.group.number
33+
if sg1 == sg2:
34+
pmg_s2 = xtal2.to_pymatgen()
35+
vol2 = pmg_s2.lattice.volume
36+
if abs(vol1-vol2)/vol1<5e-2:
37+
pmg_s2.remove_species("H")
38+
if sm.StructureMatcher().fit(pmg_s1, pmg_s2):
39+
return False
40+
return True
41+
42+
43+
parser = OptionParser()
44+
parser.add_option("-f", "--cif", dest="cif", default="WFS-gaff.cif",
45+
help="cif file name, optional")
46+
parser.add_option("-r", "--rank", dest="rank", default='energy',
47+
help="ranking criteria: default is energy")
48+
parser.add_option("-s", "--n1", dest="n1", type=int, default=0,
49+
help="starting id, optional")
50+
parser.add_option("-e", "--n2", dest="n2", type=int, default=-1,
51+
help="ennding id, optional")
52+
parser.add_option("-c", "--cut", dest="cut", type=int,
53+
help="cutoff number, optional")
54+
parser.add_option("--dmax", dest="dmax", type=float, default=10.0,
55+
help="maximum density in g/cm^3, optional")
56+
57+
(options, args) = parser.parse_args()
58+
rank = options.rank
59+
n1 = options.n1
60+
n2 = options.n2
61+
output1 = 'Ranked.cif'
62+
63+
"""
64+
Read the smile from the following contents
65+
-------Global Crystal Structure Prediction------
66+
smile : CC(=O)OC1=CC=CC=C1C(=O)O
67+
"""
68+
69+
with open(options.cif, 'r') as f:
70+
lines = f.readlines()
71+
smiles = []
72+
for l in lines:
73+
if 'smile' in l:
74+
smile_str = l.split(':')[1].strip()
75+
smiles = [smile_str + '.smi']
76+
break
77+
print(smiles)
78+
79+
cifs, engs = parse_cif(options.cif, eng=True)
80+
print("Total Number of Structures:", len(cifs))
81+
if options.cut is None:
82+
cut = len(cifs)
83+
else:
84+
cut = options.cut
85+
86+
if options.rank == 'energy':
87+
engs = np.array(engs)
88+
ids = np.argsort(engs)
89+
else:
90+
cifs, sims = parse_cif(options.cif, sim=True)
91+
sims = np.array(sims)
92+
ids = np.argsort(-1*sims)
93+
sims = [sims[id] for id in ids]
94+
95+
cifs = [cifs[id] for id in ids]
96+
engs = engs[ids]
97+
eng0 = engs.min()
98+
99+
if n2 == -1:
100+
cifs = cifs[n1:]
101+
engs = engs[n1:]
102+
ids = ids[n1:]
103+
else:
104+
n2 = min(n2, len(cifs))
105+
cifs = cifs[n1:n2]
106+
engs = engs[n1:n2]
107+
ids = ids[n1:n2]
108+
print("Index", n1, n2, cut, len(cifs))
109+
with open(output1, 'w') as f: f.write(l)
110+
111+
xtals = []
112+
count = 0
113+
with open(output1, 'a+') as f:
114+
for id, cif in enumerate(cifs):
115+
pmg = mg.core.Structure.from_str(cif, fmt='cif')
116+
try:
117+
xtal = pyxtal(molecular=True)
118+
xtal.from_seed(pmg, molecules = smiles)
119+
xtal.energy = engs[id]
120+
if new_struc(xtal, xtals, 100):
121+
xtals.append(xtal)
122+
spg = xtal.group.number
123+
den = xtal.get_density()
124+
eng = engs[id]
125+
label = f"{count}-d{den:.3f}-spg{spg}-e{eng:.3f}"
126+
if den < options.dmax:
127+
cif_lines = xtal.to_file(header=label).splitlines()
128+
# Extract energy from label
129+
energy_from_label = float(label.split('-e')[1])
130+
energy_line = f"#Energy: {energy_from_label:.3f} eV/cell\n"
131+
output_lines = []
132+
energy_replaced = False
133+
for line in cif_lines:
134+
stripped = line.strip()
135+
# Replace any existing energy line
136+
if stripped.startswith("#Energy:"):
137+
output_lines.append(energy_line)
138+
energy_replaced = True
139+
continue
140+
output_lines.append(line + '\n')
141+
f.writelines(output_lines)
142+
print(f"{ids[id]:6d} {label} {(eng - eng0)*96.485:6.2f}")
143+
count += 1
144+
if count == cut:
145+
print("Stop", count)
146+
break
147+
except:
148+
print("Problem in reading")

0 commit comments

Comments
 (0)