Skip to content

Commit bfa9f0c

Browse files
authored
Nullreq (#444)
* forward compat changes for hsds 1.0 storage format, support null requests * ignore /db key in folder crawl results * import pkg_resources on demand * fix for domains href
1 parent 18374bb commit bfa9f0c

File tree

9 files changed

+126
-37
lines changed

9 files changed

+126
-37
lines changed

hsds/chunk_crawl.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,7 @@ async def write_chunk_hyperslab(
8484
msg = f"write_chunk_hyperslab, chunk_id: {chunk_id}, slices: {slices}, "
8585
msg += f"bucket: {bucket}"
8686
log.info(msg)
87-
if "layout" not in dset_json:
88-
log.error(f"No layout found in dset_json: {dset_json}")
89-
raise HTTPInternalServerError()
87+
9088
partition_chunk_id = getChunkIdForPartition(chunk_id, dset_json)
9189
if partition_chunk_id != chunk_id:
9290
log.debug(f"using partition_chunk_id: {partition_chunk_id}")

hsds/datanode_lib.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
from .util.domainUtil import isValidDomain, getBucketForDomain
2929
from .util.attrUtil import getRequestCollectionName
3030
from .util.httpUtil import http_post
31-
from .util.dsetUtil import getChunkLayout, getFilterOps, getShapeDims
31+
from .util.dsetUtil import getChunkLayout, getFilterOps, getLayoutClass, getShapeDims
3232
from .util.dsetUtil import getChunkInitializer, getSliceQueryParam, getFilters
3333
from .util.chunkUtil import getDatasetId, getChunkSelection, getChunkIndex
3434
from .util.arrayUtil import arrayToBytes, bytesToArray, jsonToArray
@@ -999,8 +999,7 @@ async def get_chunk(
999999
dims = getChunkLayout(dset_json)
10001000
type_json = dset_json["type"]
10011001
dt = createDataType(type_json)
1002-
layout_json = dset_json["layout"]
1003-
layout_class = layout_json.get("class")
1002+
layout_class = getLayoutClass(dset_json)
10041003
chunk_dims = getChunkLayout(dset_json)
10051004
fill_value = getFillValue(dset_json)
10061005

hsds/domain_sn.py

Lines changed: 46 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ def getIdList(objs, marker=None, limit=None):
148148
return ret_ids
149149

150150

151-
async def get_domains(request):
151+
async def get_domains(request, include_hrefs=False):
152152
"""This method is called by GET_Domains and GET_Domain"""
153153
app = request.app
154154
params = request.rel_url.query
@@ -251,6 +251,8 @@ async def get_domains(request):
251251
if s3key[-1] != "/":
252252
log.debug(f"get_domains - ignoring key: {s3key}")
253253
continue
254+
if prefix == '/' and s3key == "db":
255+
log.debug("get_domains - ignoring db key at top level")
254256
if len(s3key) > 1 and s3key[-2] == "/":
255257
# trim off double slash
256258
s3key = s3key[:-1]
@@ -376,7 +378,16 @@ async def get_domains(request):
376378
if domain in crawler._domain_dict:
377379
domain_json = crawler._domain_dict[domain]
378380
# mixin domain name
381+
log.debug(f"get_domains - adding name: {domain} to domain json")
379382
domain_json["name"] = domain
383+
if include_hrefs:
384+
hrefs = []
385+
if domain[-1] == "/":
386+
domain = domain[:-1]
387+
# add an href to get sub-domains if this is a folder
388+
href = getHref(request, "/", domain=domain)
389+
hrefs.append({"rel": "domain", "href": href})
390+
domain_json["hrefs"] = hrefs
380391
domains.append(domain_json)
381392
else:
382393
if not query:
@@ -400,7 +411,16 @@ async def GET_Domains(request):
400411

401412
domains = await get_domains(request)
402413

414+
for domain in domains:
415+
domain_name = domain["name"]
416+
href = getHref(request, "/", domain=domain_name)
417+
domain["hrefs"] = [{"rel": "domain", "href": href}]
418+
403419
rsp_json = {"domains": domains}
420+
hrefs = []
421+
folder_path = getDomainFromRequest(request, validate=False)
422+
href = getHref(request, "/domains", domain=folder_path)
423+
hrefs.append({"rel": "self", "href": href})
404424
rsp_json["hrefs"] = []
405425
resp = await jsonResponse(request, rsp_json)
406426
log.response(request, resp=resp)
@@ -447,7 +467,7 @@ async def GET_Domain(request):
447467
bucket = getBucketForDomain(domain)
448468
log.debug(f"GET_Domain domain: {domain} bucket: {bucket}")
449469

450-
if not bucket:
470+
if not bucket and domain:
451471
# no bucket defined, raise 400
452472
msg = "Bucket not provided"
453473
log.warn(msg)
@@ -460,11 +480,21 @@ async def GET_Domain(request):
460480
verbose = True
461481

462482
if not domain:
483+
if "host" in params:
484+
msg = "Passing domain path as 'host' parameter is no longer supported"
485+
log.warn(msg)
486+
raise HTTPBadRequest(reason=msg)
487+
# This is a special case since the top-level folder isn't an
488+
# actual domain (i.e. doesn't have corresponding domain json in S3)
463489
log.info("no domain passed in, returning all top-level domains")
464490
# no domain passed in, return top-level domains for this request
465-
domains = await get_domains(request)
491+
domains = await get_domains(request, include_hrefs=True)
466492
rsp_json = {"domains": domains}
467-
rsp_json["hrefs"] = []
493+
hrefs = []
494+
href = getHref(request, "/")
495+
hrefs.append({"rel": "self", "href": href})
496+
497+
rsp_json["hrefs"] = hrefs
468498
resp = await jsonResponse(request, rsp_json)
469499
log.response(request, resp=resp)
470500
return resp
@@ -482,6 +512,7 @@ async def GET_Domain(request):
482512
# if h5path is passed in, return object info for that path
483513
# (if exists)
484514
h5path = params["h5path"]
515+
log.debug(f"h5path: {h5path}")
485516

486517
# select which object to perform path search under
487518
base_id = parent_id if parent_id else domain_json["root"]
@@ -506,6 +537,7 @@ async def GET_Domain(request):
506537

507538
# client may not know class of object retrieved via path
508539
obj_json["class"] = getObjectClass(obj_id)
540+
log.debug(f"got object json: {obj_json}")
509541

510542
hrefs = []
511543
hrefs.append({"rel": "self", "href": getHref(request, "/")})
@@ -553,10 +585,6 @@ async def GET_Domain(request):
553585
if domain_objs:
554586
rsp_json["domain_objs"] = domain_objs
555587

556-
# include domain class if present
557-
# if "class" in domain_json:
558-
# rsp_json["class"] = domain_json["class"]
559-
560588
# include dn_ids if requested
561589
if "getdnids" in params and params["getdnids"]:
562590
rsp_json["dn_ids"] = app["dn_ids"]
@@ -573,6 +601,10 @@ async def GET_Domain(request):
573601
hrefs.append({"rel": "typebase", "href": href})
574602
href = getHref(request, "/groups/" + root_uuid)
575603
hrefs.append({"rel": "root", "href": href})
604+
else:
605+
# add an href to get sub-domains if this is a folder
606+
href = getHref(request, "/domains") + '/'
607+
hrefs.append({"rel": "domains", "href": href})
576608

577609
hrefs.append({"rel": "acls", "href": getHref(request, "/acls")})
578610
parent_domain = getParentDomain(domain)
@@ -587,9 +619,12 @@ async def GET_Domain(request):
587619

588620
rsp_json["hrefs"] = hrefs
589621
# mixin limits, version
590-
domain_json["limits"] = getLimits()
591-
domain_json["compressors"] = getCompressors()
592-
domain_json["version"] = getVersion()
622+
if "root" in domain_json:
623+
log.debug('getting limits, compressors, version for domain')
624+
domain_json["limits"] = getLimits()
625+
domain_json["compressors"] = getCompressors()
626+
domain_json["version"] = getVersion()
627+
log.debug(f"returning domain json: {domain_json}")
593628
resp = await jsonResponse(request, rsp_json)
594629
log.response(request, resp=resp)
595630
return resp

hsds/node_runner.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import pkg_resources
21
import site
32
import sys
43
from . import config
@@ -8,7 +7,7 @@
87

98

109
def removeSitePackages():
11-
10+
import pkg_resources
1211
# site_packages = "/var/lang/lib/python3.9/site-packages"
1312
# but this is removing: "/home/sbx_user1051/.local/lib/python3.9/site-packages" on lambda?
1413
site_packages = site.getusersitepackages()

hsds/servicenode_lib.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -179,9 +179,10 @@ async def getDomainResponse(app, domain_json, bucket=None, verbose=False):
179179

180180
# pass back config parameters the client may care about
181181

182-
rsp_json["limits"] = getLimits()
183-
rsp_json["compressors"] = getCompressors()
184-
rsp_json["version"] = getVersion()
182+
if "root" in domain_json:
183+
rsp_json["limits"] = getLimits()
184+
rsp_json["compressors"] = getCompressors()
185+
rsp_json["version"] = getVersion()
185186
rsp_json["lastModified"] = lastModified
186187
return rsp_json
187188

hsds/util/chunkUtil.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,18 @@
99
PRIMES = [29, 31, 37, 41, 43, 47, 53, 59, 61, 67] # for chunk partitioning
1010

1111

12+
def _getLayout(dset_json):
13+
"""Get layout for the given dataset json. If layout is not found, try to
14+
get it from creationProperties.
15+
"""
16+
if "layout" in dset_json:
17+
return dset_json["layout"]
18+
elif "creationProperties" in dset_json and "layout" in dset_json["creationProperties"]:
19+
return dset_json["creationProperties"]["layout"]
20+
else:
21+
return None
22+
23+
1224
def getChunkSize(layout, type_size):
1325
"""Return chunk size given layout.
1426
i.e. just the product of the values in the list.
@@ -429,11 +441,12 @@ def getPartitionKey(chunk_id, partition_count):
429441

430442
def getChunkIdForPartition(chunk_id, dset_json):
431443
"""Return the partition specific chunk id for given chunk"""
432-
if "layout" not in dset_json:
444+
445+
layout_json = _getLayout(dset_json)
446+
if layout_json is None:
433447
msg = "No layout found in dset_json"
434448
log.error(msg)
435449
raise KeyError(msg)
436-
layout_json = dset_json["layout"]
437450
if "partition_count" in layout_json:
438451
partition_count = layout_json["partition_count"]
439452
partition = getChunkPartition(chunk_id)

hsds/util/dsetUtil.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -893,12 +893,37 @@ def getDsetMaxDims(dset_json):
893893
return maxdims
894894

895895

896+
def _getLayout(dset_json):
897+
"""Get layout for the given dataset json. If layout is not found, try to
898+
get it from creationProperties.
899+
"""
900+
if "layout" in dset_json:
901+
return dset_json["layout"]
902+
elif "creationProperties" in dset_json and "layout" in dset_json["creationProperties"]:
903+
return dset_json["creationProperties"]["layout"]
904+
else:
905+
return None
906+
907+
908+
def getLayoutClass(dset_json):
909+
"""Get layout class for the given dataset json. Throw 500 if no layout found"""
910+
layout_json = _getLayout(dset_json)
911+
if layout_json is None:
912+
log.error("No layout found in dset_json")
913+
raise HTTPInternalServerError()
914+
if "class" not in layout_json:
915+
log.error(f"Expected class key for layout: {layout_json}")
916+
raise HTTPInternalServerError()
917+
return layout_json["class"]
918+
919+
896920
def getChunkLayout(dset_json):
897921
"""Get chunk layout. Throw 500 if used with non-H5D_CHUNKED layout"""
898-
if "layout" not in dset_json:
922+
923+
layout_json = _getLayout(dset_json)
924+
if layout_json is None:
899925
log.error("No layout found in dset_json")
900926
raise HTTPInternalServerError()
901-
layout_json = dset_json["layout"]
902927
if "class" not in layout_json:
903928
log.error(f"Expected class key for layout: {layout_json}")
904929
raise HTTPInternalServerError()

tests/integ/dataset_test.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,6 @@ def testGet(self):
265265
"id",
266266
"shape",
267267
"hrefs",
268-
"layout",
269268
"creationProperties",
270269
"attributeCount",
271270
"created",
@@ -289,7 +288,13 @@ def testGet(self):
289288
self.assertEqual(shape["dims"], [10, 10])
290289
self.assertEqual(shape["maxdims"], [10, 10])
291290

292-
layout = rspJson["layout"]
291+
if "layout" in rspJson:
292+
layout = rspJson["layout"]
293+
else:
294+
cpl = rspJson["creationProperties"]
295+
self.assertTrue("layout" in cpl)
296+
layout = cpl["layout"]
297+
293298
self.assertEqual(layout["class"], "H5D_CHUNKED")
294299
self.assertEqual(layout["dims"], [10, 10])
295300
self.assertTrue("partition_count" not in layout)
@@ -359,7 +364,6 @@ def testGetByPath(self):
359364
"id",
360365
"shape",
361366
"hrefs",
362-
"layout",
363367
"creationProperties",
364368
"attributeCount",
365369
"created",
@@ -381,7 +385,6 @@ def testGetByPath(self):
381385
"id",
382386
"shape",
383387
"hrefs",
384-
"layout",
385388
"creationProperties",
386389
"attributeCount",
387390
"created",
@@ -443,7 +446,6 @@ def testGetVerbose(self):
443446
"id",
444447
"shape",
445448
"hrefs",
446-
"layout",
447449
"creationProperties",
448450
"attributeCount",
449451
"created",

tests/integ/domain_test.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ def testGetDomain(self):
140140
attr_count += 1
141141
self.assertEqual(attr_count, 4)
142142

143-
# passing domain via the host header is deprecated
143+
# passing domain via the host parameters is deprecated
144144
# Previously his returned 200, now it is a 400
145145
del headers["X-Hdf-domain"]
146146
params = {"host": domain}
@@ -856,9 +856,26 @@ def testGetNotFound(self):
856856

857857
def testNullDomain(self):
858858
headers = helper.getRequestHeaders()
859-
req = helper.getEndpoint() + "/"
860-
rsp = self.session.get(req, headers=headers)
861-
self.assertTrue(rsp.status_code in (400, 404))
859+
endpoint = helper.getEndpoint()
860+
861+
# should get the same result with or without a trailing slash
862+
for req in (endpoint, endpoint + '/'):
863+
rsp = self.session.get(req, headers=headers)
864+
self.assertEqual(rsp.status_code, 200)
865+
rspJson = json.loads(rsp.text)
866+
867+
for name in (
868+
"domains",
869+
"hrefs",
870+
):
871+
self.assertTrue(name in rspJson)
872+
domains = rspJson["domains"]
873+
self.assertTrue(len(domains) >= 1) # should be at least the "home" domain
874+
875+
for item in domains:
876+
self.assertTrue("name" in item)
877+
name = item["name"]
878+
self.assertTrue(name.startswith("/")) # should be an absolute path
862879

863880
def testInvalidDomain(self):
864881
domain = "bad_domain.h5"
@@ -1402,8 +1419,8 @@ def testGetDomainsVerbose(self):
14021419
now = time.time()
14031420
self.assertTrue(rspJson["created"] < now - 10)
14041421
self.assertTrue(rspJson["lastModified"] < now - 10)
1405-
self.assertEqual(len(rspJson["hrefs"]), 3)
1406-
self.assertTrue(rspJson["owner"])
1422+
self.assertEqual(len(rspJson["hrefs"]), 4)
1423+
self.assertTrue("owner" in rspJson)
14071424
self.assertEqual(rspJson["class"], "folder")
14081425

14091426
# get dommains in folder

0 commit comments

Comments
 (0)