@@ -71,7 +71,9 @@ def build_indexes(self, indexes=None, background=True):
71
71
def insert_task (self , task_doc , use_gridfs = False ):
72
72
"""
73
73
Inserts a task document (e.g., as returned by Drone.assimilate()) into the database.
74
- Handles putting DOS and band structure into GridFS as needed.
74
+ Handles putting DOS, band structure and charge density into GridFS as needed.
75
+ During testing, a percentage of runs on some clusters had corrupted AECCAR files when even if everything else about the calculation looked OK.
76
+ So we do a quick check here and only record the AECCARs if they are valid
75
77
76
78
Args:
77
79
task_doc: (dict) the task document
@@ -83,6 +85,7 @@ def insert_task(self, task_doc, use_gridfs=False):
83
85
bs = None
84
86
chgcar = None
85
87
aeccar0 = None
88
+ write_aeccar = False
86
89
87
90
# move dos BS and CHGCAR from doc to gridfs
88
91
if use_gridfs and "calcs_reversed" in task_doc :
@@ -99,15 +102,20 @@ def insert_task(self, task_doc, use_gridfs=False):
99
102
chgcar = json .dumps (task_doc ["calcs_reversed" ][0 ]["chgcar" ], cls = MontyEncoder )
100
103
del task_doc ["calcs_reversed" ][0 ]["chgcar" ]
101
104
102
- if "aeccar0" in task_doc ["calcs_reversed" ][0 ]: # only store idx=0 DOS
105
+ if "aeccar0" in task_doc ["calcs_reversed" ][0 ]:
103
106
aeccar0 = task_doc ["calcs_reversed" ][0 ]["aeccar0" ]
104
- aeccar0 = json .dumps (task_doc ["calcs_reversed" ][0 ]["aeccar0" ], cls = MontyEncoder )
105
- del task_doc ["calcs_reversed" ][0 ]["aeccar0" ]
106
- try :
107
- # aeccar2 should also be in the task_doc
107
+ aeccar2 = task_doc ["calcs_reversed" ][0 ]["aeccar2" ]
108
+ # check if the aeccar is valid before insertion
109
+ if (aeccar0 .data ['total' ] + aeccar2 .data ['total' ]).min () < 0 :
110
+ logger .warning (f"The AECCAR seems to be corrupted for task_in directory { task_doc ['dir_name' ]} \n Skipping storage of AECCARs" )
111
+ write_aeccar = False
112
+ else :
113
+ # overwrite the aeccar variable with their string representations to be inserted in GridFS
114
+ aeccar0 = json .dumps (task_doc ["calcs_reversed" ][0 ]["aeccar0" ], cls = MontyEncoder )
108
115
aeccar2 = json .dumps (task_doc ["calcs_reversed" ][0 ]["aeccar2" ], cls = MontyEncoder )
109
- except :
110
- raise KeyError ('aeccar2 data is missing from task_doc' )
116
+ write_aeccar = True
117
+
118
+ del task_doc ["calcs_reversed" ][0 ]["aeccar0" ]
111
119
del task_doc ["calcs_reversed" ][0 ]["aeccar2" ]
112
120
113
121
# insert the task document
@@ -136,7 +144,7 @@ def insert_task(self, task_doc, use_gridfs=False):
136
144
self .collection .update_one ({"task_id" : t_id }, {"$set" : {"calcs_reversed.0.chgcar_fs_id" : chgcar_gfs_id }})
137
145
138
146
# insert the AECCARs file into gridfs and update the task documents
139
- if aeccar0 :
147
+ if write_aeccar :
140
148
aeccar0_gfs_id , compression_type = self .insert_gridfs (aeccar0 , "aeccar0_fs" , task_id = t_id )
141
149
self .collection .update_one (
142
150
{"task_id" : t_id }, {"$set" : {"calcs_reversed.0.aeccar0_compression" : compression_type }})
@@ -169,6 +177,10 @@ def retrieve_task(self, task_id):
169
177
if 'chgcar_fs_id' in calc :
170
178
chgcar = self .get_chgcar (task_id )
171
179
calc ["chgcar" ] = chgcar
180
+ if 'aeccar0_fs_id' in calc :
181
+ aeccar = self .get_aeccar (task_id )
182
+ calc ["aeccar0" ] = aeccar ['aeccar0' ]
183
+ calc ["aeccar2" ] = aeccar ['aeccar2' ]
172
184
return task_doc
173
185
174
186
def insert_gridfs (self , d , collection = "fs" , compress = True , oid = None , task_id = None ):
@@ -244,11 +256,12 @@ def get_chgcar(self, task_id):
244
256
chgcar = json .loads (chgcar_json , cls = MontyDecoder )
245
257
return chgcar
246
258
247
- def get_aeccar (self , task_id ):
259
+ def get_aeccar (self , task_id , check_valid = True ):
248
260
"""
249
261
Read the AECCAR0 + AECCAR2 grid_fs data into a Chgcar object
250
262
Args:
251
263
task_id(int or str): the task_id containing the gridfs metadata
264
+ check_valid (bool): make sure that the aeccar is positive definite
252
265
Returns:
253
266
(aeccar0, aeccar2): Chgcar objects
254
267
"""
@@ -262,6 +275,9 @@ def get_aeccar(self, task_id):
262
275
aeccar_json = zlib .decompress (fs .get (fs_id ).read ())
263
276
aeccar2 = json .loads (aeccar_json , cls = MontyDecoder )
264
277
278
+ if check_valid and (aeccar0 .data ['total' ] + aeccar2 .data ['total' ]).min () < 0 :
279
+ ValueError (f"The AECCAR seems to be corrupted for task_id = { task_id } " )
280
+
265
281
return {'aeccar0' : aeccar0 , 'aeccar2' : aeccar2 }
266
282
267
283
def reset (self ):
0 commit comments