Skip to content

Commit 0bc541e

Browse files
committed
checking and retrieving of AECCAR
Added positive definite checking of AECCAR and retrieve_task support for chgcar
1 parent 457bbd8 commit 0bc541e

File tree

2 files changed

+34
-10
lines changed

2 files changed

+34
-10
lines changed

atomate/vasp/database.py

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,9 @@ def build_indexes(self, indexes=None, background=True):
7171
def insert_task(self, task_doc, use_gridfs=False):
7272
"""
7373
Inserts a task document (e.g., as returned by Drone.assimilate()) into the database.
74-
Handles putting DOS and band structure into GridFS as needed.
74+
Handles putting DOS, band structure and charge density into GridFS as needed.
75+
During testing, a percentage of runs on some clusters had corrupted AECCAR files when even if everything else about the calculation looked OK.
76+
So we do a quick check here and only record the AECCARs if they are valid
7577
7678
Args:
7779
task_doc: (dict) the task document
@@ -83,6 +85,7 @@ def insert_task(self, task_doc, use_gridfs=False):
8385
bs = None
8486
chgcar = None
8587
aeccar0 = None
88+
write_aeccar = False
8689

8790
# move dos BS and CHGCAR from doc to gridfs
8891
if use_gridfs and "calcs_reversed" in task_doc:
@@ -99,15 +102,20 @@ def insert_task(self, task_doc, use_gridfs=False):
99102
chgcar = json.dumps(task_doc["calcs_reversed"][0]["chgcar"], cls=MontyEncoder)
100103
del task_doc["calcs_reversed"][0]["chgcar"]
101104

102-
if "aeccar0" in task_doc["calcs_reversed"][0]: # only store idx=0 DOS
105+
if "aeccar0" in task_doc["calcs_reversed"][0]:
103106
aeccar0 = task_doc["calcs_reversed"][0]["aeccar0"]
104-
aeccar0 = json.dumps(task_doc["calcs_reversed"][0]["aeccar0"], cls=MontyEncoder)
105-
del task_doc["calcs_reversed"][0]["aeccar0"]
106-
try:
107-
# aeccar2 should also be in the task_doc
107+
aeccar2 = task_doc["calcs_reversed"][0]["aeccar2"]
108+
# check if the aeccar is valid before insertion
109+
if (aeccar0.data['total'] + aeccar2.data['total']).min() < 0:
110+
logger.warning(f"The AECCAR seems to be corrupted for task_in directory {task_doc['dir_name']}\nSkipping storage of AECCARs")
111+
write_aeccar = False
112+
else:
113+
# overwrite the aeccar variable with their string representations to be inserted in GridFS
114+
aeccar0 = json.dumps(task_doc["calcs_reversed"][0]["aeccar0"], cls=MontyEncoder)
108115
aeccar2 = json.dumps(task_doc["calcs_reversed"][0]["aeccar2"], cls=MontyEncoder)
109-
except:
110-
raise KeyError('aeccar2 data is missing from task_doc')
116+
write_aeccar = True
117+
118+
del task_doc["calcs_reversed"][0]["aeccar0"]
111119
del task_doc["calcs_reversed"][0]["aeccar2"]
112120

113121
# insert the task document
@@ -136,7 +144,7 @@ def insert_task(self, task_doc, use_gridfs=False):
136144
self.collection.update_one({"task_id": t_id}, {"$set": {"calcs_reversed.0.chgcar_fs_id": chgcar_gfs_id}})
137145

138146
# insert the AECCARs file into gridfs and update the task documents
139-
if aeccar0:
147+
if write_aeccar:
140148
aeccar0_gfs_id, compression_type = self.insert_gridfs(aeccar0, "aeccar0_fs", task_id=t_id)
141149
self.collection.update_one(
142150
{"task_id": t_id}, {"$set": {"calcs_reversed.0.aeccar0_compression": compression_type}})
@@ -169,6 +177,10 @@ def retrieve_task(self, task_id):
169177
if 'chgcar_fs_id' in calc:
170178
chgcar = self.get_chgcar(task_id)
171179
calc["chgcar"] = chgcar
180+
if 'aeccar0_fs_id' in calc:
181+
aeccar = self.get_aeccar(task_id)
182+
calc["aeccar0"] = aeccar['aeccar0']
183+
calc["aeccar2"] = aeccar['aeccar2']
172184
return task_doc
173185

174186
def insert_gridfs(self, d, collection="fs", compress=True, oid=None, task_id=None):
@@ -244,11 +256,12 @@ def get_chgcar(self, task_id):
244256
chgcar= json.loads(chgcar_json, cls=MontyDecoder)
245257
return chgcar
246258

247-
def get_aeccar(self, task_id):
259+
def get_aeccar(self, task_id, check_valid = True):
248260
"""
249261
Read the AECCAR0 + AECCAR2 grid_fs data into a Chgcar object
250262
Args:
251263
task_id(int or str): the task_id containing the gridfs metadata
264+
check_valid (bool): make sure that the aeccar is positive definite
252265
Returns:
253266
(aeccar0, aeccar2): Chgcar objects
254267
"""
@@ -262,6 +275,9 @@ def get_aeccar(self, task_id):
262275
aeccar_json = zlib.decompress(fs.get(fs_id).read())
263276
aeccar2 = json.loads(aeccar_json, cls=MontyDecoder)
264277

278+
if check_valid and (aeccar0.data['total'] + aeccar2.data['total']).min() < 0:
279+
ValueError(f"The AECCAR seems to be corrupted for task_id = {task_id}")
280+
265281
return {'aeccar0': aeccar0, 'aeccar2': aeccar2}
266282

267283
def reset(self):

atomate/vasp/workflows/tests/test_vasp_workflows.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,14 @@ def test_chgcar_db_read_write(self):
298298
dcc = mmdb.get_aeccar(task_id=t_id)
299299
self.assertAlmostEqual(dcc['aeccar0'].data['total'].sum()/cc.ngridpts, 23.253588293583313, 4)
300300
self.assertAlmostEqual(dcc['aeccar2'].data['total'].sum()/cc.ngridpts, 8.01314480789829, 4)
301+
# check the retrieve_task function for the same fake calculation
302+
ret_task = mmdb.retrieve_task(t_id)
303+
ret_chgcar = ret_task['calcs_reversed'][0]['chgcar']
304+
ret_aeccar0 = ret_task['calcs_reversed'][0]['aeccar0']
305+
ret_aeccar2 = ret_task['calcs_reversed'][0]['aeccar2']
306+
ret_aeccar = ret_aeccar0 + ret_aeccar2
307+
self.assertAlmostEqual(ret_chgcar.data['total'].sum()/ret_chgcar.ngridpts, 8.0, 4)
308+
self.assertAlmostEqual(ret_aeccar.data['total'].sum()/ret_aeccar.ngridpts, 31.2667331015, 4)
301309

302310
def test_chgcar_db_read(self):
303311
# add the workflow

0 commit comments

Comments
 (0)