Skip to content

Commit 7e20b71

Browse files
authored
Merge pull request #276 from jmmshn/master
Small update for the writting and retrieving of AECCARs
2 parents 246d684 + 2d672ea commit 7e20b71

File tree

2 files changed

+35
-11
lines changed

2 files changed

+35
-11
lines changed

atomate/vasp/database.py

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,9 @@ def build_indexes(self, indexes=None, background=True):
7070
def insert_task(self, task_doc, use_gridfs=False):
7171
"""
7272
Inserts a task document (e.g., as returned by Drone.assimilate()) into the database.
73-
Handles putting DOS and band structure into GridFS as needed.
73+
Handles putting DOS, band structure and charge density into GridFS as needed.
74+
During testing, a percentage of runs on some clusters had corrupted AECCAR files when even if everything else about the calculation looked OK.
75+
So we do a quick check here and only record the AECCARs if they are valid
7476
7577
Args:
7678
task_doc: (dict) the task document
@@ -82,6 +84,7 @@ def insert_task(self, task_doc, use_gridfs=False):
8284
bs = None
8385
chgcar = None
8486
aeccar0 = None
87+
write_aeccar = False
8588

8689
# move dos BS and CHGCAR from doc to gridfs
8790
if use_gridfs and "calcs_reversed" in task_doc:
@@ -98,15 +101,20 @@ def insert_task(self, task_doc, use_gridfs=False):
98101
chgcar = json.dumps(task_doc["calcs_reversed"][0]["chgcar"], cls=MontyEncoder)
99102
del task_doc["calcs_reversed"][0]["chgcar"]
100103

101-
if "aeccar0" in task_doc["calcs_reversed"][0]: # only store idx=0 DOS
104+
if "aeccar0" in task_doc["calcs_reversed"][0]:
102105
aeccar0 = task_doc["calcs_reversed"][0]["aeccar0"]
103-
aeccar0 = json.dumps(task_doc["calcs_reversed"][0]["aeccar0"], cls=MontyEncoder)
104-
del task_doc["calcs_reversed"][0]["aeccar0"]
105-
try:
106-
# aeccar2 should also be in the task_doc
106+
aeccar2 = task_doc["calcs_reversed"][0]["aeccar2"]
107+
# check if the aeccar is valid before insertion
108+
if (aeccar0.data['total'] + aeccar2.data['total']).min() < 0:
109+
logger.warning(f"The AECCAR seems to be corrupted for task_in directory {task_doc['dir_name']}\nSkipping storage of AECCARs")
110+
write_aeccar = False
111+
else:
112+
# overwrite the aeccar variable with their string representations to be inserted in GridFS
113+
aeccar0 = json.dumps(task_doc["calcs_reversed"][0]["aeccar0"], cls=MontyEncoder)
107114
aeccar2 = json.dumps(task_doc["calcs_reversed"][0]["aeccar2"], cls=MontyEncoder)
108-
except:
109-
raise KeyError('aeccar2 data is missing from task_doc')
115+
write_aeccar = True
116+
117+
del task_doc["calcs_reversed"][0]["aeccar0"]
110118
del task_doc["calcs_reversed"][0]["aeccar2"]
111119

112120
# insert the task document
@@ -135,7 +143,7 @@ def insert_task(self, task_doc, use_gridfs=False):
135143
self.collection.update_one({"task_id": t_id}, {"$set": {"calcs_reversed.0.chgcar_fs_id": chgcar_gfs_id}})
136144

137145
# insert the AECCARs file into gridfs and update the task documents
138-
if aeccar0:
146+
if write_aeccar:
139147
aeccar0_gfs_id, compression_type = self.insert_gridfs(aeccar0, "aeccar0_fs", task_id=t_id)
140148
self.collection.update_one(
141149
{"task_id": t_id}, {"$set": {"calcs_reversed.0.aeccar0_compression": compression_type}})
@@ -168,6 +176,10 @@ def retrieve_task(self, task_id):
168176
if 'chgcar_fs_id' in calc:
169177
chgcar = self.get_chgcar(task_id)
170178
calc["chgcar"] = chgcar
179+
if 'aeccar0_fs_id' in calc:
180+
aeccar = self.get_aeccar(task_id)
181+
calc["aeccar0"] = aeccar['aeccar0']
182+
calc["aeccar2"] = aeccar['aeccar2']
171183
return task_doc
172184

173185
def insert_gridfs(self, d, collection="fs", compress=True, oid=None, task_id=None):
@@ -243,13 +255,14 @@ def get_chgcar(self, task_id):
243255
chgcar= json.loads(chgcar_json, cls=MontyDecoder)
244256
return chgcar
245257

246-
def get_aeccar(self, task_id):
258+
def get_aeccar(self, task_id, check_valid = True):
247259
"""
248260
Read the AECCAR0 + AECCAR2 grid_fs data into a Chgcar object
249261
Args:
250262
task_id(int or str): the task_id containing the gridfs metadata
263+
check_valid (bool): make sure that the aeccar is positive definite
251264
Returns:
252-
(aeccar0, aeccar2): Chgcar objects
265+
{"aeccar0" : Chgcar, "aeccar2" : Chgcar}: dict of Chgcar objects
253266
"""
254267
m_task = self.collection.find_one({"task_id": task_id}, {"calcs_reversed": 1})
255268
fs_id = m_task['calcs_reversed'][0]['aeccar0_fs_id']
@@ -261,6 +274,9 @@ def get_aeccar(self, task_id):
261274
aeccar_json = zlib.decompress(fs.get(fs_id).read())
262275
aeccar2 = json.loads(aeccar_json, cls=MontyDecoder)
263276

277+
if check_valid and (aeccar0.data['total'] + aeccar2.data['total']).min() < 0:
278+
ValueError(f"The AECCAR seems to be corrupted for task_id = {task_id}")
279+
264280
return {'aeccar0': aeccar0, 'aeccar2': aeccar2}
265281

266282
def reset(self):

atomate/vasp/workflows/tests/test_vasp_workflows.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,14 @@ def test_chgcar_db_read_write(self):
298298
dcc = mmdb.get_aeccar(task_id=t_id)
299299
self.assertAlmostEqual(dcc['aeccar0'].data['total'].sum()/cc.ngridpts, 23.253588293583313, 4)
300300
self.assertAlmostEqual(dcc['aeccar2'].data['total'].sum()/cc.ngridpts, 8.01314480789829, 4)
301+
# check the retrieve_task function for the same fake calculation
302+
ret_task = mmdb.retrieve_task(t_id)
303+
ret_chgcar = ret_task['calcs_reversed'][0]['chgcar']
304+
ret_aeccar0 = ret_task['calcs_reversed'][0]['aeccar0']
305+
ret_aeccar2 = ret_task['calcs_reversed'][0]['aeccar2']
306+
ret_aeccar = ret_aeccar0 + ret_aeccar2
307+
self.assertAlmostEqual(ret_chgcar.data['total'].sum()/ret_chgcar.ngridpts, 8.0, 4)
308+
self.assertAlmostEqual(ret_aeccar.data['total'].sum()/ret_aeccar.ngridpts, 31.2667331015, 4)
301309

302310
def test_chgcar_db_read(self):
303311
# add the workflow

0 commit comments

Comments
 (0)