Skip to content

Commit a49cdd0

Browse files
Osmos (#440)
* assign container rank via registration order * use temp variable * re-raise cancel errors * re-throw cancelled exceptions * fixed typo --------- Co-authored-by: Joshua Stillerman <jas@mit.edu>
1 parent 824d008 commit a49cdd0

File tree

4 files changed

+23
-26
lines changed

4 files changed

+23
-26
lines changed

hsds/basenode.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,8 @@ async def docker_update_dn_info(app):
236236
log.error("HEAD node seems to be down.")
237237
app["dn_urls"] = []
238238
app["dn_ids"] = []
239+
except HTTPServiceUnavailable:
240+
log.warn("Head ServiceUnavailable")
239241
except OSError:
240242
log.error("failed to register")
241243
app["dn_urls"] = []

hsds/datanode_lib.py

Lines changed: 11 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -149,8 +149,7 @@ async def write_s3_obj(app, obj_id, bucket=None):
149149
bucket = domain_bucket
150150

151151
if obj_id in pending_s3_write:
152-
msg = f"write_s3_key - not expected for key {obj_id} to be in "
153-
msg += "pending_s3_write map"
152+
msg = f"write_s3_key - not expected for key {obj_id} to be in pending_s3_write map"
154153
log.error(msg)
155154
raise KeyError(msg)
156155

@@ -172,12 +171,10 @@ async def write_s3_obj(app, obj_id, bucket=None):
172171
# timestamp is first element of two-tuple
173172
last_update_time = dirty_ids[obj_id][0]
174173
else:
175-
msg = f"write_s3_obj - {obj_id} not in dirty_ids, "
176-
msg += "assuming flush write"
174+
msg = f"write_s3_obj - {obj_id} not in dirty_ids, assuming flush write"
177175
log.debug(msg)
178176
if last_update_time > now:
179-
msg = f"last_update time {last_update_time} is in the future for "
180-
msg += f"obj_id: {obj_id}"
177+
msg = f"last_update time {last_update_time} is in the future for obj_id: {obj_id}"
181178
log.error(msg)
182179
raise ValueError(msg)
183180

@@ -198,8 +195,7 @@ async def write_s3_obj(app, obj_id, bucket=None):
198195
dset_id = getDatasetId(obj_id)
199196
if dset_id in filter_map:
200197
filter_ops = filter_map[dset_id]
201-
msg = f"write_s3_obj: got filter_op: {filter_ops} "
202-
msg += f"for dset: {dset_id}"
198+
msg = f"write_s3_obj: got filter_op: {filter_ops} for dset: {dset_id}"
203199
log.debug(msg)
204200
else:
205201
filter_ops = None
@@ -237,13 +233,11 @@ async def write_s3_obj(app, obj_id, bucket=None):
237233
# meta data update
238234
# check for object in meta cache
239235
if obj_id not in meta_cache:
240-
msg = f"write_s3_obj: expected to find obj_id: {obj_id} "
241-
msg += "in meta cache"
236+
msg = f"write_s3_obj: expected to find obj_id: {obj_id} in meta cache"
242237
log.error(msg)
243238
raise KeyError(f"{obj_id} not found in meta cache")
244239
if not meta_cache.isDirty(obj_id):
245-
msg = f"write_s3_obj: expected meta cache obj {obj_id} "
246-
msg == "to be dirty"
240+
msg = f"write_s3_obj: expected meta cache obj {obj_id} to be dirty"
247241
log.error(msg)
248242
raise ValueError("bad dirty state for obj")
249243
obj_json = meta_cache[obj_id]
@@ -264,8 +258,7 @@ async def write_s3_obj(app, obj_id, bucket=None):
264258
else:
265259
timestamp = 0
266260
if timestamp > last_update_time:
267-
msg = f"write_s3_obj: {obj_id} got updated while s3 "
268-
msg += "write was in progress"
261+
msg = f"write_s3_obj: {obj_id} got updated while s3 write was in progress"
269262
log.info(msg)
270263
else:
271264
log.debug(f"write_s3obj: clear dirty for {obj_id} ")
@@ -279,11 +272,10 @@ async def write_s3_obj(app, obj_id, bucket=None):
279272

280273
finally:
281274
# clear pending_s3_write item
282-
log.debug(f"write_s3_obj finally block, success={success}")
275+
log.debug(f"write_s3_obj {obj_id} finally block, success={success}")
283276
if obj_id in pending_s3_write:
284277
if pending_s3_write[obj_id] != now:
285-
msg = "pending_s3_write timestamp got updated unexpectedly "
286-
msg += f"for {obj_id}"
278+
msg = f"pending_s3_write timestamp got updated unexpectedly for {obj_id}"
287279
log.error(msg)
288280
del pending_s3_write[obj_id]
289281
# clear task
@@ -1259,10 +1251,9 @@ def callback(future):
12591251

12601252
if obj_id in pending_s3_write:
12611253
pending_time = s3sync_start - pending_s3_write[obj_id]
1262-
msg = f"s3sync - key {obj_id} has been pending for "
1263-
msg += f"{pending_time:.3f}"
1254+
msg = f"s3sync - key {obj_id} has been pending for {pending_time:.3f}"
12641255
log.debug(msg)
1265-
if s3sync_start - pending_s3_write[obj_id] > s3_sync_task_timeout:
1256+
if pending_time > s3_sync_task_timeout:
12661257
msg = f"s3sync - obj {obj_id} has been in pending_s3_write "
12671258
msg += f"for {pending_time:.3f} seconds, restarting"
12681259
log.warn(msg)

hsds/util/fileClient.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -166,8 +166,8 @@ async def get_object(self, key, bucket=None, offset=0, length=-1):
166166
except CancelledError as cle:
167167
self._file_stats_increment("error_count")
168168
msg = f"CancelledError for get file obj {key}: {cle}"
169-
log.error(msg)
170-
raise HTTPInternalServerError()
169+
log.warn(msg)
170+
raise
171171
except Exception as e:
172172
self._file_stats_increment("error_count")
173173
msg = f"Unexpected Exception {type(e)} get get_object {key}: {e}"
@@ -227,8 +227,8 @@ async def put_object(self, key, data, bucket=None):
227227
except CancelledError as cle:
228228
# file_stats_increment(app, "error_count")
229229
msg = f"CancelledError for put file obj {key}: {cle}"
230-
log.error(msg)
231-
raise HTTPInternalServerError()
230+
log.warn(msg)
231+
raise
232232

233233
except Exception as e:
234234
# file_stats_increment(app, "error_count")
@@ -274,8 +274,8 @@ async def delete_object(self, key, bucket=None):
274274
except CancelledError as cle:
275275
self._file_stats_increment("error_count")
276276
msg = f"CancelledError deleting file obj {key}: {cle}"
277-
log.error(msg)
278-
raise HTTPInternalServerError()
277+
log.warn(msg)
278+
raise
279279

280280
except Exception as e:
281281
self._file_stats_increment("error_count")

hsds/util/idUtil.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -536,5 +536,9 @@ def getDataNodeUrl(app, obj_id):
536536
raise HTTPServiceUnavailable()
537537
dn_number = getObjPartition(obj_id, dn_node_count)
538538
url = dn_urls[dn_number]
539+
if not url:
540+
msg = "Service not ready (no DN url set)"
541+
log.warn(msg)
542+
raise HTTPServiceUnavailable()
539543
log.debug(f"got dn_url: {url} for obj_id: {obj_id}")
540544
return url

0 commit comments

Comments
 (0)