Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions hsds/chunk_crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,7 @@ async def write_chunk_hyperslab(
msg = f"write_chunk_hyperslab, chunk_id: {chunk_id}, slices: {slices}, "
msg += f"bucket: {bucket}"
log.info(msg)
if "layout" not in dset_json:
log.error(f"No layout found in dset_json: {dset_json}")
raise HTTPInternalServerError()

partition_chunk_id = getChunkIdForPartition(chunk_id, dset_json)
if partition_chunk_id != chunk_id:
log.debug(f"using partition_chunk_id: {partition_chunk_id}")
Expand Down
5 changes: 2 additions & 3 deletions hsds/datanode_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from .util.domainUtil import isValidDomain, getBucketForDomain
from .util.attrUtil import getRequestCollectionName
from .util.httpUtil import http_post
from .util.dsetUtil import getChunkLayout, getFilterOps, getShapeDims
from .util.dsetUtil import getChunkLayout, getFilterOps, getLayoutClass, getShapeDims
from .util.dsetUtil import getChunkInitializer, getSliceQueryParam, getFilters
from .util.chunkUtil import getDatasetId, getChunkSelection, getChunkIndex
from .util.arrayUtil import arrayToBytes, bytesToArray, jsonToArray
Expand Down Expand Up @@ -999,8 +999,7 @@ async def get_chunk(
dims = getChunkLayout(dset_json)
type_json = dset_json["type"]
dt = createDataType(type_json)
layout_json = dset_json["layout"]
layout_class = layout_json.get("class")
layout_class = getLayoutClass(dset_json)
chunk_dims = getChunkLayout(dset_json)
fill_value = getFillValue(dset_json)

Expand Down
57 changes: 46 additions & 11 deletions hsds/domain_sn.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def getIdList(objs, marker=None, limit=None):
return ret_ids


async def get_domains(request):
async def get_domains(request, include_hrefs=False):
"""This method is called by GET_Domains and GET_Domain"""
app = request.app
params = request.rel_url.query
Expand Down Expand Up @@ -251,6 +251,8 @@ async def get_domains(request):
if s3key[-1] != "/":
log.debug(f"get_domains - ignoring key: {s3key}")
continue
if prefix == '/' and s3key == "db":
log.debug("get_domains - ignoring db key at top level")
if len(s3key) > 1 and s3key[-2] == "/":
# trim off double slash
s3key = s3key[:-1]
Expand Down Expand Up @@ -376,7 +378,16 @@ async def get_domains(request):
if domain in crawler._domain_dict:
domain_json = crawler._domain_dict[domain]
# mixin domain name
log.debug(f"get_domains - adding name: {domain} to domain json")
domain_json["name"] = domain
if include_hrefs:
hrefs = []
if domain[-1] == "/":
domain = domain[:-1]
# add an href to get sub-domains if this is a folder
href = getHref(request, "/", domain=domain)
hrefs.append({"rel": "domain", "href": href})
domain_json["hrefs"] = hrefs
domains.append(domain_json)
else:
if not query:
Expand All @@ -400,7 +411,16 @@ async def GET_Domains(request):

domains = await get_domains(request)

for domain in domains:
domain_name = domain["name"]
href = getHref(request, "/", domain=domain_name)
domain["hrefs"] = [{"rel": "domain", "href": href}]

rsp_json = {"domains": domains}
hrefs = []
folder_path = getDomainFromRequest(request, validate=False)
href = getHref(request, "/domains", domain=folder_path)
hrefs.append({"rel": "self", "href": href})
rsp_json["hrefs"] = []
resp = await jsonResponse(request, rsp_json)
log.response(request, resp=resp)
Expand Down Expand Up @@ -447,7 +467,7 @@ async def GET_Domain(request):
bucket = getBucketForDomain(domain)
log.debug(f"GET_Domain domain: {domain} bucket: {bucket}")

if not bucket:
if not bucket and domain:
# no bucket defined, raise 400
msg = "Bucket not provided"
log.warn(msg)
Expand All @@ -460,11 +480,21 @@ async def GET_Domain(request):
verbose = True

if not domain:
if "host" in params:
msg = "Passing domain path as 'host' parameter is no longer supported"
log.warn(msg)
raise HTTPBadRequest(reason=msg)
# This is a special case since the top-level folder isn't an
# actual domain (i.e. doesn't have corresponding domain json in S3)
log.info("no domain passed in, returning all top-level domains")
# no domain passed in, return top-level domains for this request
domains = await get_domains(request)
domains = await get_domains(request, include_hrefs=True)
rsp_json = {"domains": domains}
rsp_json["hrefs"] = []
hrefs = []
href = getHref(request, "/")
hrefs.append({"rel": "self", "href": href})

rsp_json["hrefs"] = hrefs
resp = await jsonResponse(request, rsp_json)
log.response(request, resp=resp)
return resp
Expand All @@ -482,6 +512,7 @@ async def GET_Domain(request):
# if h5path is passed in, return object info for that path
# (if exists)
h5path = params["h5path"]
log.debug(f"h5path: {h5path}")

# select which object to perform path search under
base_id = parent_id if parent_id else domain_json["root"]
Expand All @@ -506,6 +537,7 @@ async def GET_Domain(request):

# client may not know class of object retrieved via path
obj_json["class"] = getObjectClass(obj_id)
log.debug(f"got object json: {obj_json}")

hrefs = []
hrefs.append({"rel": "self", "href": getHref(request, "/")})
Expand Down Expand Up @@ -553,10 +585,6 @@ async def GET_Domain(request):
if domain_objs:
rsp_json["domain_objs"] = domain_objs

# include domain class if present
# if "class" in domain_json:
# rsp_json["class"] = domain_json["class"]

# include dn_ids if requested
if "getdnids" in params and params["getdnids"]:
rsp_json["dn_ids"] = app["dn_ids"]
Expand All @@ -573,6 +601,10 @@ async def GET_Domain(request):
hrefs.append({"rel": "typebase", "href": href})
href = getHref(request, "/groups/" + root_uuid)
hrefs.append({"rel": "root", "href": href})
else:
# add an href to get sub-domains if this is a folder
href = getHref(request, "/domains") + '/'
hrefs.append({"rel": "domains", "href": href})

hrefs.append({"rel": "acls", "href": getHref(request, "/acls")})
parent_domain = getParentDomain(domain)
Expand All @@ -587,9 +619,12 @@ async def GET_Domain(request):

rsp_json["hrefs"] = hrefs
# mixin limits, version
domain_json["limits"] = getLimits()
domain_json["compressors"] = getCompressors()
domain_json["version"] = getVersion()
if "root" in domain_json:
log.debug('getting limits, compressors, version for domain')
domain_json["limits"] = getLimits()
domain_json["compressors"] = getCompressors()
domain_json["version"] = getVersion()
log.debug(f"returning domain json: {domain_json}")
resp = await jsonResponse(request, rsp_json)
log.response(request, resp=resp)
return resp
Expand Down
3 changes: 1 addition & 2 deletions hsds/node_runner.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import pkg_resources
import site
import sys
from . import config
Expand All @@ -8,7 +7,7 @@


def removeSitePackages():

import pkg_resources
# site_packages = "/var/lang/lib/python3.9/site-packages"
# but this is removing: "/home/sbx_user1051/.local/lib/python3.9/site-packages" on lambda?
site_packages = site.getusersitepackages()
Expand Down
7 changes: 4 additions & 3 deletions hsds/servicenode_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,10 @@ async def getDomainResponse(app, domain_json, bucket=None, verbose=False):

# pass back config parameters the client may care about

rsp_json["limits"] = getLimits()
rsp_json["compressors"] = getCompressors()
rsp_json["version"] = getVersion()
if "root" in domain_json:
rsp_json["limits"] = getLimits()
rsp_json["compressors"] = getCompressors()
rsp_json["version"] = getVersion()
rsp_json["lastModified"] = lastModified
return rsp_json

Expand Down
17 changes: 15 additions & 2 deletions hsds/util/chunkUtil.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,18 @@
PRIMES = [29, 31, 37, 41, 43, 47, 53, 59, 61, 67] # for chunk partitioning


def _getLayout(dset_json):
"""Get layout for the given dataset json. If layout is not found, try to
get it from creationProperties.
"""
if "layout" in dset_json:
return dset_json["layout"]
elif "creationProperties" in dset_json and "layout" in dset_json["creationProperties"]:
return dset_json["creationProperties"]["layout"]
else:
return None


def getChunkSize(layout, type_size):
"""Return chunk size given layout.
i.e. just the product of the values in the list.
Expand Down Expand Up @@ -429,11 +441,12 @@ def getPartitionKey(chunk_id, partition_count):

def getChunkIdForPartition(chunk_id, dset_json):
"""Return the partition specific chunk id for given chunk"""
if "layout" not in dset_json:

layout_json = _getLayout(dset_json)
if layout_json is None:
msg = "No layout found in dset_json"
log.error(msg)
raise KeyError(msg)
layout_json = dset_json["layout"]
if "partition_count" in layout_json:
partition_count = layout_json["partition_count"]
partition = getChunkPartition(chunk_id)
Expand Down
29 changes: 27 additions & 2 deletions hsds/util/dsetUtil.py
Original file line number Diff line number Diff line change
Expand Up @@ -893,12 +893,37 @@ def getDsetMaxDims(dset_json):
return maxdims


def _getLayout(dset_json):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would eventually be nice to re-organize the utils to avoid having to duplicate helpers like this.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's been done already in the h5json branch - all the common util functions are moved to the hdf5-json package.

"""Get layout for the given dataset json. If layout is not found, try to
get it from creationProperties.
"""
if "layout" in dset_json:
return dset_json["layout"]
elif "creationProperties" in dset_json and "layout" in dset_json["creationProperties"]:
return dset_json["creationProperties"]["layout"]
else:
return None


def getLayoutClass(dset_json):
"""Get layout class for the given dataset json. Throw 500 if no layout found"""
layout_json = _getLayout(dset_json)
if layout_json is None:
log.error("No layout found in dset_json")
raise HTTPInternalServerError()
if "class" not in layout_json:
log.error(f"Expected class key for layout: {layout_json}")
raise HTTPInternalServerError()
return layout_json["class"]


def getChunkLayout(dset_json):
"""Get chunk layout. Throw 500 if used with non-H5D_CHUNKED layout"""
if "layout" not in dset_json:

layout_json = _getLayout(dset_json)
if layout_json is None:
log.error("No layout found in dset_json")
raise HTTPInternalServerError()
layout_json = dset_json["layout"]
if "class" not in layout_json:
log.error(f"Expected class key for layout: {layout_json}")
raise HTTPInternalServerError()
Expand Down
12 changes: 7 additions & 5 deletions tests/integ/dataset_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,6 @@ def testGet(self):
"id",
"shape",
"hrefs",
"layout",
"creationProperties",
"attributeCount",
"created",
Expand All @@ -289,7 +288,13 @@ def testGet(self):
self.assertEqual(shape["dims"], [10, 10])
self.assertEqual(shape["maxdims"], [10, 10])

layout = rspJson["layout"]
if "layout" in rspJson:
layout = rspJson["layout"]
else:
cpl = rspJson["creationProperties"]
self.assertTrue("layout" in cpl)
layout = cpl["layout"]

self.assertEqual(layout["class"], "H5D_CHUNKED")
self.assertEqual(layout["dims"], [10, 10])
self.assertTrue("partition_count" not in layout)
Expand Down Expand Up @@ -359,7 +364,6 @@ def testGetByPath(self):
"id",
"shape",
"hrefs",
"layout",
"creationProperties",
"attributeCount",
"created",
Expand All @@ -381,7 +385,6 @@ def testGetByPath(self):
"id",
"shape",
"hrefs",
"layout",
"creationProperties",
"attributeCount",
"created",
Expand Down Expand Up @@ -443,7 +446,6 @@ def testGetVerbose(self):
"id",
"shape",
"hrefs",
"layout",
"creationProperties",
"attributeCount",
"created",
Expand Down
29 changes: 23 additions & 6 deletions tests/integ/domain_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def testGetDomain(self):
attr_count += 1
self.assertEqual(attr_count, 4)

# passing domain via the host header is deprecated
# passing domain via the host parameters is deprecated
# Previously his returned 200, now it is a 400
del headers["X-Hdf-domain"]
params = {"host": domain}
Expand Down Expand Up @@ -856,9 +856,26 @@ def testGetNotFound(self):

def testNullDomain(self):
headers = helper.getRequestHeaders()
req = helper.getEndpoint() + "/"
rsp = self.session.get(req, headers=headers)
self.assertTrue(rsp.status_code in (400, 404))
endpoint = helper.getEndpoint()

# should get the same result with or without a trailing slash
for req in (endpoint, endpoint + '/'):
rsp = self.session.get(req, headers=headers)
self.assertEqual(rsp.status_code, 200)
rspJson = json.loads(rsp.text)

for name in (
"domains",
"hrefs",
):
self.assertTrue(name in rspJson)
domains = rspJson["domains"]
self.assertTrue(len(domains) >= 1) # should be at least the "home" domain

for item in domains:
self.assertTrue("name" in item)
name = item["name"]
self.assertTrue(name.startswith("/")) # should be an absolute path

def testInvalidDomain(self):
domain = "bad_domain.h5"
Expand Down Expand Up @@ -1402,8 +1419,8 @@ def testGetDomainsVerbose(self):
now = time.time()
self.assertTrue(rspJson["created"] < now - 10)
self.assertTrue(rspJson["lastModified"] < now - 10)
self.assertEqual(len(rspJson["hrefs"]), 3)
self.assertTrue(rspJson["owner"])
self.assertEqual(len(rspJson["hrefs"]), 4)
self.assertTrue("owner" in rspJson)
self.assertEqual(rspJson["class"], "folder")

# get dommains in folder
Expand Down
Loading