Skip to content

Commit 0c0b3e6

Browse files
committed
Merge remote-tracking branch 'origin/master' into HEAD
2 parents 6237c27 + 843ef16 commit 0c0b3e6

File tree

3 files changed

+34
-12
lines changed

3 files changed

+34
-12
lines changed

atomate/vasp/database.py

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
from pymatgen.electronic_structure.bandstructure import BandStructure, BandStructureSymmLine
1616
from pymatgen.electronic_structure.dos import CompleteDos
17-
from pymatgen.io.vasp import Chgcar
1817

1918
import gridfs
2019
from pymongo import ASCENDING, DESCENDING
@@ -71,7 +70,9 @@ def build_indexes(self, indexes=None, background=True):
7170
def insert_task(self, task_doc, use_gridfs=False):
7271
"""
7372
Inserts a task document (e.g., as returned by Drone.assimilate()) into the database.
74-
Handles putting DOS and band structure into GridFS as needed.
73+
Handles putting DOS, band structure and charge density into GridFS as needed.
74+
During testing, a percentage of runs on some clusters had corrupted AECCAR files when even if everything else about the calculation looked OK.
75+
So we do a quick check here and only record the AECCARs if they are valid
7576
7677
Args:
7778
task_doc: (dict) the task document
@@ -83,6 +84,7 @@ def insert_task(self, task_doc, use_gridfs=False):
8384
bs = None
8485
chgcar = None
8586
aeccar0 = None
87+
write_aeccar = False
8688

8789
# move dos BS and CHGCAR from doc to gridfs
8890
if use_gridfs and "calcs_reversed" in task_doc:
@@ -99,15 +101,20 @@ def insert_task(self, task_doc, use_gridfs=False):
99101
chgcar = json.dumps(task_doc["calcs_reversed"][0]["chgcar"], cls=MontyEncoder)
100102
del task_doc["calcs_reversed"][0]["chgcar"]
101103

102-
if "aeccar0" in task_doc["calcs_reversed"][0]: # only store idx=0 DOS
104+
if "aeccar0" in task_doc["calcs_reversed"][0]:
103105
aeccar0 = task_doc["calcs_reversed"][0]["aeccar0"]
104-
aeccar0 = json.dumps(task_doc["calcs_reversed"][0]["aeccar0"], cls=MontyEncoder)
105-
del task_doc["calcs_reversed"][0]["aeccar0"]
106-
try:
107-
# aeccar2 should also be in the task_doc
106+
aeccar2 = task_doc["calcs_reversed"][0]["aeccar2"]
107+
# check if the aeccar is valid before insertion
108+
if (aeccar0.data['total'] + aeccar2.data['total']).min() < 0:
109+
logger.warning(f"The AECCAR seems to be corrupted for task_in directory {task_doc['dir_name']}\nSkipping storage of AECCARs")
110+
write_aeccar = False
111+
else:
112+
# overwrite the aeccar variable with their string representations to be inserted in GridFS
113+
aeccar0 = json.dumps(task_doc["calcs_reversed"][0]["aeccar0"], cls=MontyEncoder)
108114
aeccar2 = json.dumps(task_doc["calcs_reversed"][0]["aeccar2"], cls=MontyEncoder)
109-
except:
110-
raise KeyError('aeccar2 data is missing from task_doc')
115+
write_aeccar = True
116+
117+
del task_doc["calcs_reversed"][0]["aeccar0"]
111118
del task_doc["calcs_reversed"][0]["aeccar2"]
112119

113120
# insert the task document
@@ -136,7 +143,7 @@ def insert_task(self, task_doc, use_gridfs=False):
136143
self.collection.update_one({"task_id": t_id}, {"$set": {"calcs_reversed.0.chgcar_fs_id": chgcar_gfs_id}})
137144

138145
# insert the AECCARs file into gridfs and update the task documents
139-
if aeccar0:
146+
if write_aeccar:
140147
aeccar0_gfs_id, compression_type = self.insert_gridfs(aeccar0, "aeccar0_fs", task_id=t_id)
141148
self.collection.update_one(
142149
{"task_id": t_id}, {"$set": {"calcs_reversed.0.aeccar0_compression": compression_type}})
@@ -169,6 +176,10 @@ def retrieve_task(self, task_id):
169176
if 'chgcar_fs_id' in calc:
170177
chgcar = self.get_chgcar(task_id)
171178
calc["chgcar"] = chgcar
179+
if 'aeccar0_fs_id' in calc:
180+
aeccar = self.get_aeccar(task_id)
181+
calc["aeccar0"] = aeccar['aeccar0']
182+
calc["aeccar2"] = aeccar['aeccar2']
172183
return task_doc
173184

174185
def insert_gridfs(self, d, collection="fs", compress=True, oid=None, task_id=None):
@@ -244,11 +255,12 @@ def get_chgcar(self, task_id):
244255
chgcar= json.loads(chgcar_json, cls=MontyDecoder)
245256
return chgcar
246257

247-
def get_aeccar(self, task_id):
258+
def get_aeccar(self, task_id, check_valid = True):
248259
"""
249260
Read the AECCAR0 + AECCAR2 grid_fs data into a Chgcar object
250261
Args:
251262
task_id(int or str): the task_id containing the gridfs metadata
263+
check_valid (bool): make sure that the aeccar is positive definite
252264
Returns:
253265
(aeccar0, aeccar2): Chgcar objects
254266
"""
@@ -262,6 +274,9 @@ def get_aeccar(self, task_id):
262274
aeccar_json = zlib.decompress(fs.get(fs_id).read())
263275
aeccar2 = json.loads(aeccar_json, cls=MontyDecoder)
264276

277+
if check_valid and (aeccar0.data['total'] + aeccar2.data['total']).min() < 0:
278+
ValueError(f"The AECCAR seems to be corrupted for task_id = {task_id}")
279+
265280
return {'aeccar0': aeccar0, 'aeccar2': aeccar2}
266281

267282
def reset(self):

atomate/vasp/drones.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
import json
1717
import glob
1818
import traceback
19-
import gzip
2019

2120
from monty.io import zopen
2221
from monty.json import jsanitize

atomate/vasp/workflows/tests/test_vasp_workflows.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,14 @@ def test_chgcar_db_read_write(self):
298298
dcc = mmdb.get_aeccar(task_id=t_id)
299299
self.assertAlmostEqual(dcc['aeccar0'].data['total'].sum()/cc.ngridpts, 23.253588293583313, 4)
300300
self.assertAlmostEqual(dcc['aeccar2'].data['total'].sum()/cc.ngridpts, 8.01314480789829, 4)
301+
# check the retrieve_task function for the same fake calculation
302+
ret_task = mmdb.retrieve_task(t_id)
303+
ret_chgcar = ret_task['calcs_reversed'][0]['chgcar']
304+
ret_aeccar0 = ret_task['calcs_reversed'][0]['aeccar0']
305+
ret_aeccar2 = ret_task['calcs_reversed'][0]['aeccar2']
306+
ret_aeccar = ret_aeccar0 + ret_aeccar2
307+
self.assertAlmostEqual(ret_chgcar.data['total'].sum()/ret_chgcar.ngridpts, 8.0, 4)
308+
self.assertAlmostEqual(ret_aeccar.data['total'].sum()/ret_aeccar.ngridpts, 31.2667331015, 4)
301309

302310
def test_chgcar_db_read(self):
303311
# add the workflow

0 commit comments

Comments
 (0)