@@ -70,7 +70,9 @@ def build_indexes(self, indexes=None, background=True):
70
70
def insert_task (self , task_doc , use_gridfs = False ):
71
71
"""
72
72
Inserts a task document (e.g., as returned by Drone.assimilate()) into the database.
73
- Handles putting DOS and band structure into GridFS as needed.
73
+ Handles putting DOS, band structure and charge density into GridFS as needed.
74
+ During testing, a percentage of runs on some clusters had corrupted AECCAR files when even if everything else about the calculation looked OK.
75
+ So we do a quick check here and only record the AECCARs if they are valid
74
76
75
77
Args:
76
78
task_doc: (dict) the task document
@@ -82,6 +84,7 @@ def insert_task(self, task_doc, use_gridfs=False):
82
84
bs = None
83
85
chgcar = None
84
86
aeccar0 = None
87
+ write_aeccar = False
85
88
86
89
# move dos BS and CHGCAR from doc to gridfs
87
90
if use_gridfs and "calcs_reversed" in task_doc :
@@ -98,15 +101,20 @@ def insert_task(self, task_doc, use_gridfs=False):
98
101
chgcar = json .dumps (task_doc ["calcs_reversed" ][0 ]["chgcar" ], cls = MontyEncoder )
99
102
del task_doc ["calcs_reversed" ][0 ]["chgcar" ]
100
103
101
- if "aeccar0" in task_doc ["calcs_reversed" ][0 ]: # only store idx=0 DOS
104
+ if "aeccar0" in task_doc ["calcs_reversed" ][0 ]:
102
105
aeccar0 = task_doc ["calcs_reversed" ][0 ]["aeccar0" ]
103
- aeccar0 = json .dumps (task_doc ["calcs_reversed" ][0 ]["aeccar0" ], cls = MontyEncoder )
104
- del task_doc ["calcs_reversed" ][0 ]["aeccar0" ]
105
- try :
106
- # aeccar2 should also be in the task_doc
106
+ aeccar2 = task_doc ["calcs_reversed" ][0 ]["aeccar2" ]
107
+ # check if the aeccar is valid before insertion
108
+ if (aeccar0 .data ['total' ] + aeccar2 .data ['total' ]).min () < 0 :
109
+ logger .warning (f"The AECCAR seems to be corrupted for task_in directory { task_doc ['dir_name' ]} \n Skipping storage of AECCARs" )
110
+ write_aeccar = False
111
+ else :
112
+ # overwrite the aeccar variable with their string representations to be inserted in GridFS
113
+ aeccar0 = json .dumps (task_doc ["calcs_reversed" ][0 ]["aeccar0" ], cls = MontyEncoder )
107
114
aeccar2 = json .dumps (task_doc ["calcs_reversed" ][0 ]["aeccar2" ], cls = MontyEncoder )
108
- except :
109
- raise KeyError ('aeccar2 data is missing from task_doc' )
115
+ write_aeccar = True
116
+
117
+ del task_doc ["calcs_reversed" ][0 ]["aeccar0" ]
110
118
del task_doc ["calcs_reversed" ][0 ]["aeccar2" ]
111
119
112
120
# insert the task document
@@ -135,7 +143,7 @@ def insert_task(self, task_doc, use_gridfs=False):
135
143
self .collection .update_one ({"task_id" : t_id }, {"$set" : {"calcs_reversed.0.chgcar_fs_id" : chgcar_gfs_id }})
136
144
137
145
# insert the AECCARs file into gridfs and update the task documents
138
- if aeccar0 :
146
+ if write_aeccar :
139
147
aeccar0_gfs_id , compression_type = self .insert_gridfs (aeccar0 , "aeccar0_fs" , task_id = t_id )
140
148
self .collection .update_one (
141
149
{"task_id" : t_id }, {"$set" : {"calcs_reversed.0.aeccar0_compression" : compression_type }})
@@ -168,6 +176,10 @@ def retrieve_task(self, task_id):
168
176
if 'chgcar_fs_id' in calc :
169
177
chgcar = self .get_chgcar (task_id )
170
178
calc ["chgcar" ] = chgcar
179
+ if 'aeccar0_fs_id' in calc :
180
+ aeccar = self .get_aeccar (task_id )
181
+ calc ["aeccar0" ] = aeccar ['aeccar0' ]
182
+ calc ["aeccar2" ] = aeccar ['aeccar2' ]
171
183
return task_doc
172
184
173
185
def insert_gridfs (self , d , collection = "fs" , compress = True , oid = None , task_id = None ):
@@ -243,13 +255,14 @@ def get_chgcar(self, task_id):
243
255
chgcar = json .loads (chgcar_json , cls = MontyDecoder )
244
256
return chgcar
245
257
246
- def get_aeccar (self , task_id ):
258
+ def get_aeccar (self , task_id , check_valid = True ):
247
259
"""
248
260
Read the AECCAR0 + AECCAR2 grid_fs data into a Chgcar object
249
261
Args:
250
262
task_id(int or str): the task_id containing the gridfs metadata
263
+ check_valid (bool): make sure that the aeccar is positive definite
251
264
Returns:
252
- ( aeccar0, aeccar2): Chgcar objects
265
+ {" aeccar0" : Chgcar, " aeccar2" : Chgcar}: dict of Chgcar objects
253
266
"""
254
267
m_task = self .collection .find_one ({"task_id" : task_id }, {"calcs_reversed" : 1 })
255
268
fs_id = m_task ['calcs_reversed' ][0 ]['aeccar0_fs_id' ]
@@ -261,6 +274,9 @@ def get_aeccar(self, task_id):
261
274
aeccar_json = zlib .decompress (fs .get (fs_id ).read ())
262
275
aeccar2 = json .loads (aeccar_json , cls = MontyDecoder )
263
276
277
+ if check_valid and (aeccar0 .data ['total' ] + aeccar2 .data ['total' ]).min () < 0 :
278
+ ValueError (f"The AECCAR seems to be corrupted for task_id = { task_id } " )
279
+
264
280
return {'aeccar0' : aeccar0 , 'aeccar2' : aeccar2 }
265
281
266
282
def reset (self ):
0 commit comments