14
14
15
15
from pymatgen .electronic_structure .bandstructure import BandStructure , BandStructureSymmLine
16
16
from pymatgen .electronic_structure .dos import CompleteDos
17
- from pymatgen .io .vasp import Chgcar
18
17
19
18
import gridfs
20
19
from pymongo import ASCENDING , DESCENDING
@@ -71,7 +70,9 @@ def build_indexes(self, indexes=None, background=True):
71
70
def insert_task (self , task_doc , use_gridfs = False ):
72
71
"""
73
72
Inserts a task document (e.g., as returned by Drone.assimilate()) into the database.
74
- Handles putting DOS and band structure into GridFS as needed.
73
+ Handles putting DOS, band structure and charge density into GridFS as needed.
74
+ During testing, a percentage of runs on some clusters had corrupted AECCAR files when even if everything else about the calculation looked OK.
75
+ So we do a quick check here and only record the AECCARs if they are valid
75
76
76
77
Args:
77
78
task_doc: (dict) the task document
@@ -83,6 +84,7 @@ def insert_task(self, task_doc, use_gridfs=False):
83
84
bs = None
84
85
chgcar = None
85
86
aeccar0 = None
87
+ write_aeccar = False
86
88
87
89
# move dos BS and CHGCAR from doc to gridfs
88
90
if use_gridfs and "calcs_reversed" in task_doc :
@@ -99,15 +101,20 @@ def insert_task(self, task_doc, use_gridfs=False):
99
101
chgcar = json .dumps (task_doc ["calcs_reversed" ][0 ]["chgcar" ], cls = MontyEncoder )
100
102
del task_doc ["calcs_reversed" ][0 ]["chgcar" ]
101
103
102
- if "aeccar0" in task_doc ["calcs_reversed" ][0 ]: # only store idx=0 DOS
104
+ if "aeccar0" in task_doc ["calcs_reversed" ][0 ]:
103
105
aeccar0 = task_doc ["calcs_reversed" ][0 ]["aeccar0" ]
104
- aeccar0 = json .dumps (task_doc ["calcs_reversed" ][0 ]["aeccar0" ], cls = MontyEncoder )
105
- del task_doc ["calcs_reversed" ][0 ]["aeccar0" ]
106
- try :
107
- # aeccar2 should also be in the task_doc
106
+ aeccar2 = task_doc ["calcs_reversed" ][0 ]["aeccar2" ]
107
+ # check if the aeccar is valid before insertion
108
+ if (aeccar0 .data ['total' ] + aeccar2 .data ['total' ]).min () < 0 :
109
+ logger .warning (f"The AECCAR seems to be corrupted for task_in directory { task_doc ['dir_name' ]} \n Skipping storage of AECCARs" )
110
+ write_aeccar = False
111
+ else :
112
+ # overwrite the aeccar variable with their string representations to be inserted in GridFS
113
+ aeccar0 = json .dumps (task_doc ["calcs_reversed" ][0 ]["aeccar0" ], cls = MontyEncoder )
108
114
aeccar2 = json .dumps (task_doc ["calcs_reversed" ][0 ]["aeccar2" ], cls = MontyEncoder )
109
- except :
110
- raise KeyError ('aeccar2 data is missing from task_doc' )
115
+ write_aeccar = True
116
+
117
+ del task_doc ["calcs_reversed" ][0 ]["aeccar0" ]
111
118
del task_doc ["calcs_reversed" ][0 ]["aeccar2" ]
112
119
113
120
# insert the task document
@@ -136,7 +143,7 @@ def insert_task(self, task_doc, use_gridfs=False):
136
143
self .collection .update_one ({"task_id" : t_id }, {"$set" : {"calcs_reversed.0.chgcar_fs_id" : chgcar_gfs_id }})
137
144
138
145
# insert the AECCARs file into gridfs and update the task documents
139
- if aeccar0 :
146
+ if write_aeccar :
140
147
aeccar0_gfs_id , compression_type = self .insert_gridfs (aeccar0 , "aeccar0_fs" , task_id = t_id )
141
148
self .collection .update_one (
142
149
{"task_id" : t_id }, {"$set" : {"calcs_reversed.0.aeccar0_compression" : compression_type }})
@@ -169,6 +176,10 @@ def retrieve_task(self, task_id):
169
176
if 'chgcar_fs_id' in calc :
170
177
chgcar = self .get_chgcar (task_id )
171
178
calc ["chgcar" ] = chgcar
179
+ if 'aeccar0_fs_id' in calc :
180
+ aeccar = self .get_aeccar (task_id )
181
+ calc ["aeccar0" ] = aeccar ['aeccar0' ]
182
+ calc ["aeccar2" ] = aeccar ['aeccar2' ]
172
183
return task_doc
173
184
174
185
def insert_gridfs (self , d , collection = "fs" , compress = True , oid = None , task_id = None ):
@@ -244,11 +255,12 @@ def get_chgcar(self, task_id):
244
255
chgcar = json .loads (chgcar_json , cls = MontyDecoder )
245
256
return chgcar
246
257
247
- def get_aeccar (self , task_id ):
258
+ def get_aeccar (self , task_id , check_valid = True ):
248
259
"""
249
260
Read the AECCAR0 + AECCAR2 grid_fs data into a Chgcar object
250
261
Args:
251
262
task_id(int or str): the task_id containing the gridfs metadata
263
+ check_valid (bool): make sure that the aeccar is positive definite
252
264
Returns:
253
265
(aeccar0, aeccar2): Chgcar objects
254
266
"""
@@ -262,6 +274,9 @@ def get_aeccar(self, task_id):
262
274
aeccar_json = zlib .decompress (fs .get (fs_id ).read ())
263
275
aeccar2 = json .loads (aeccar_json , cls = MontyDecoder )
264
276
277
+ if check_valid and (aeccar0 .data ['total' ] + aeccar2 .data ['total' ]).min () < 0 :
278
+ ValueError (f"The AECCAR seems to be corrupted for task_id = { task_id } " )
279
+
265
280
return {'aeccar0' : aeccar0 , 'aeccar2' : aeccar2 }
266
281
267
282
def reset (self ):
0 commit comments