From 571fb4faab21037a60147d4f4d40a93a4d51eeca Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Thu, 1 May 2025 11:02:33 -0500 Subject: [PATCH 1/2] Enhance dataset representation in HDF5IO and update array display logic in generate_array_html_repr to handle LINDI datasets --- src/hdmf/backends/hdf5/h5tools.py | 49 +++++++++++++++++++++---------- src/hdmf/utils.py | 2 +- 2 files changed, 34 insertions(+), 17 deletions(-) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 493c4057f..ade2131d3 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -1538,23 +1538,40 @@ def generate_dataset_html(dataset): """Generates an html representation for a dataset for the HDF5IO class""" array_info_dict = get_basic_array_info(dataset) - if isinstance(dataset, h5py.Dataset): + if isinstance(dataset, str): + dataset_type = "String data" + # For string data from LINDI, add basic info about the string + string_info_dict = { + "Size": len(dataset), + "Type": "string" + } + array_info_dict.update(string_info_dict) + elif isinstance(dataset, h5py.Dataset): dataset_type = "HDF5 dataset" - # get info from hdf5 dataset - compressed_size = dataset.id.get_storage_size() - if hasattr(dataset, "nbytes"): # TODO: Remove this after h5py minimal version is larger than 3.0 - uncompressed_size = dataset.nbytes - else: - uncompressed_size = dataset.size * dataset.dtype.itemsize - compression_ratio = uncompressed_size / compressed_size if compressed_size != 0 else "undefined" - - hdf5_info_dict = { - "Chunk shape": dataset.chunks, - "Compression": dataset.compression, - "Compression opts": dataset.compression_opts, - "Compression ratio": compression_ratio, - } - array_info_dict.update(hdf5_info_dict) + + array_info_dict.update({ + "Chunk shape": dataset.chunks, + }) + + if hasattr(dataset, "id") and hasattr(dataset.id, "get_storage_size"): + compressed_size = dataset.id.get_storage_size() + + # get info from hdf5 dataset + if hasattr(dataset, "nbytes"): + uncompressed_size = dataset.nbytes + else: + uncompressed_size = dataset.size * dataset.dtype.itemsize + compression_ratio = uncompressed_size / compressed_size if compressed_size != 0 else "undefined" + array_info_dict.update({"Compression ratio": compression_ratio}) + + try: + array_info_dict.update({ + "Compression": dataset.compression, + "Compression opts": dataset.compression_opts, + }) + except: + pass # doesn't work for LINDI + elif isinstance(dataset, np.ndarray): dataset_type = "NumPy array" diff --git a/src/hdmf/utils.py b/src/hdmf/utils.py index c21382a2a..b14b4f30a 100644 --- a/src/hdmf/utils.py +++ b/src/hdmf/utils.py @@ -929,7 +929,7 @@ def html_table(item_dicts) -> str: # Heuristic for displaying data array_is_small = array_size_bytes < 1024 * 0.1 # 10 % a kilobyte to display the array if array_is_small: - repr_html += "
" + str(np.asarray(array)) + repr_html += "
" + str(array[()]) return repr_html From 13cb60a166dc52187bdc83eb8bd969f3fdbd421e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 1 May 2025 16:07:29 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/hdmf/backends/hdf5/h5tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index ade2131d3..d469f5b80 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -1555,7 +1555,7 @@ def generate_dataset_html(dataset): if hasattr(dataset, "id") and hasattr(dataset.id, "get_storage_size"): compressed_size = dataset.id.get_storage_size() - + # get info from hdf5 dataset if hasattr(dataset, "nbytes"): uncompressed_size = dataset.nbytes