Skip to content

Commit 0441a4b

Browse files
Allow use of filters that expand chunks by a large factor (#5939)
* Change new chunk indexing methods to always encode chunk size as a 64 bit (size of lengths) integer, when using the 2.0 file format. * Add CHANGELOG.md note * Spelling * Fix errors in parallel build * Committing clang-format changes * More parallel fixes. * Committing clang-format changes * Another parallel fix * Fix parallel for real this time I hope * Update function descriptions in dsets.c * Fix spelling --------- Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 5e03b3a commit 0441a4b

File tree

15 files changed

+1501
-967
lines changed

15 files changed

+1501
-967
lines changed

release_docs/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,8 @@ The Virtual Dataset Global Heap Block format has been updated to version 1 to su
253253

254254
Use of the shared strings option for Virtual Datasets reduces memory overhead and optimizes dataset close operations.
255255

256+
The chunked dataset file format has been updated to always use 64 bits to encode the size of filtered chunks. This will allow data filters that expand the chunks by a large amount to still work. Chunk sizes are still limited to `2^32 - 1`. This new format is only used when the HDF5 library version bounds lower bound is set to 2.0 or later.
257+
256258
### The `H5Dread_chunk()` signature has changed
257259

258260
A new parameter, `nalloc`, has been added to `H5Dread_chunk()`. This parameter contains a pointer to a variable that holds the size of the buffer buf. If *nalloc is not large enough to hold the entire chunk being read, no data is read. On exit, the value of this variable is set to the buffer size needed to read the chunk.

src/H5Dbtree.c

Lines changed: 37 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
/* Local Macros */
4242
/****************/
4343

44-
#define H5D_BTREE_IDX_IS_OPEN(idx_info) (NULL != (idx_info)->storage->u.btree.shared)
44+
#define H5D_BTREE_IDX_IS_OPEN(idx_info) (NULL != (idx_info)->layout->storage.u.chunk.u.btree.shared)
4545

4646
/******************/
4747
/* Local Typedefs */
@@ -819,13 +819,13 @@ H5D__btree_idx_init(const H5D_chk_idx_info_t *idx_info, const H5S_t H5_ATTR_UNUS
819819
assert(idx_info->f);
820820
assert(idx_info->pline);
821821
assert(idx_info->layout);
822-
assert(idx_info->storage);
823822
assert(H5_addr_defined(dset_ohdr_addr));
824823

825-
idx_info->storage->u.btree.dset_ohdr_addr = dset_ohdr_addr;
824+
idx_info->layout->storage.u.chunk.u.btree.dset_ohdr_addr = dset_ohdr_addr;
826825

827826
/* Allocate the shared structure */
828-
if (H5D__btree_shared_create(idx_info->f, idx_info->storage, idx_info->layout) < 0)
827+
if (H5D__btree_shared_create(idx_info->f, &idx_info->layout->storage.u.chunk,
828+
&idx_info->layout->u.chunk) < 0)
829829
HGOTO_ERROR(H5E_RESOURCE, H5E_CANTINIT, FAIL, "can't create wrapper for shared B-tree info");
830830

831831
done:
@@ -861,15 +861,14 @@ H5D__btree_idx_create(const H5D_chk_idx_info_t *idx_info)
861861
assert(idx_info->f);
862862
assert(idx_info->pline);
863863
assert(idx_info->layout);
864-
assert(idx_info->storage);
865-
assert(!H5_addr_defined(idx_info->storage->idx_addr));
864+
assert(!H5_addr_defined(idx_info->layout->storage.u.chunk.idx_addr));
866865

867866
/* Initialize "user" data for B-tree callbacks, etc. */
868-
udata.layout = idx_info->layout;
869-
udata.storage = idx_info->storage;
867+
udata.layout = &idx_info->layout->u.chunk;
868+
udata.storage = &idx_info->layout->storage.u.chunk;
870869

871870
/* Create the v1 B-tree for the chunk index */
872-
if (H5B_create(idx_info->f, H5B_BTREE, &udata, &(idx_info->storage->idx_addr) /*out*/) < 0)
871+
if (H5B_create(idx_info->f, H5B_BTREE, &udata, &(idx_info->layout->storage.u.chunk.idx_addr) /*out*/) < 0)
873872
HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't create B-tree");
874873

875874
done:
@@ -929,8 +928,8 @@ H5D__btree_idx_is_open(const H5D_chk_idx_info_t *idx_info, bool *is_open)
929928
FUNC_ENTER_PACKAGE_NOERR
930929

931930
assert(idx_info);
932-
assert(idx_info->storage);
933-
assert(H5D_CHUNK_IDX_BTREE == idx_info->storage->idx_type);
931+
assert(idx_info->layout);
932+
assert(H5D_CHUNK_IDX_BTREE == idx_info->layout->storage.u.chunk.idx_type);
934933
assert(is_open);
935934

936935
*is_open = H5D_BTREE_IDX_IS_OPEN(idx_info);
@@ -979,15 +978,14 @@ H5D__btree_idx_insert(const H5D_chk_idx_info_t *idx_info, H5D_chunk_ud_t *udata,
979978
assert(idx_info->f);
980979
assert(idx_info->pline);
981980
assert(idx_info->layout);
982-
assert(idx_info->storage);
983-
assert(H5_addr_defined(idx_info->storage->idx_addr));
981+
assert(H5_addr_defined(idx_info->layout->storage.u.chunk.idx_addr));
984982
assert(udata);
985983

986984
/*
987985
* Create the chunk it if it doesn't exist, or reallocate the chunk if
988986
* its size changed.
989987
*/
990-
if (H5B_insert(idx_info->f, H5B_BTREE, idx_info->storage->idx_addr, udata) < 0)
988+
if (H5B_insert(idx_info->f, H5B_BTREE, idx_info->layout->storage.u.chunk.idx_addr, udata) < 0)
991989
HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "unable to allocate chunk");
992990

993991
done:
@@ -1017,14 +1015,13 @@ H5D__btree_idx_get_addr(const H5D_chk_idx_info_t *idx_info, H5D_chunk_ud_t *udat
10171015
assert(idx_info->f);
10181016
assert(idx_info->pline);
10191017
assert(idx_info->layout);
1020-
assert(idx_info->layout->ndims > 0);
1021-
assert(idx_info->storage);
1022-
assert(H5_addr_defined(idx_info->storage->idx_addr));
1018+
assert(idx_info->layout->u.chunk.ndims > 0);
1019+
assert(H5_addr_defined(idx_info->layout->storage.u.chunk.idx_addr));
10231020
assert(udata);
10241021

10251022
/* Go get the chunk information from the B-tree */
10261023
found = false;
1027-
if (H5B_find(idx_info->f, H5B_BTREE, idx_info->storage->idx_addr, &found, udata) < 0)
1024+
if (H5B_find(idx_info->f, H5B_BTREE, idx_info->layout->storage.u.chunk.idx_addr, &found, udata) < 0)
10281025
HGOTO_ERROR(H5E_DATASET, H5E_CANTFIND, FAIL, "can't check for chunk in B-tree");
10291026

10301027
done:
@@ -1115,20 +1112,19 @@ H5D__btree_idx_iterate(const H5D_chk_idx_info_t *idx_info, H5D_chunk_cb_func_t c
11151112
assert(idx_info->f);
11161113
assert(idx_info->pline);
11171114
assert(idx_info->layout);
1118-
assert(idx_info->storage);
1119-
assert(H5_addr_defined(idx_info->storage->idx_addr));
1115+
assert(H5_addr_defined(idx_info->layout->storage.u.chunk.idx_addr));
11201116
assert(chunk_cb);
11211117
assert(chunk_udata);
11221118

11231119
/* Initialize userdata */
11241120
memset(&udata, 0, sizeof udata);
1125-
udata.common.layout = idx_info->layout;
1126-
udata.common.storage = idx_info->storage;
1121+
udata.common.layout = &idx_info->layout->u.chunk;
1122+
udata.common.storage = &idx_info->layout->storage.u.chunk;
11271123
udata.cb = chunk_cb;
11281124
udata.udata = chunk_udata;
11291125

11301126
/* Iterate over existing chunks */
1131-
if ((ret_value = H5B_iterate(idx_info->f, H5B_BTREE, idx_info->storage->idx_addr,
1127+
if ((ret_value = H5B_iterate(idx_info->f, H5B_BTREE, idx_info->layout->storage.u.chunk.idx_addr,
11321128
H5D__btree_idx_iterate_cb, &udata)) < 0)
11331129
HERROR(H5E_DATASET, H5E_BADITER, "unable to iterate over chunk B-tree");
11341130

@@ -1155,14 +1151,13 @@ H5D__btree_idx_remove(const H5D_chk_idx_info_t *idx_info, H5D_chunk_common_ud_t
11551151
assert(idx_info->f);
11561152
assert(idx_info->pline);
11571153
assert(idx_info->layout);
1158-
assert(idx_info->storage);
1159-
assert(H5_addr_defined(idx_info->storage->idx_addr));
1154+
assert(H5_addr_defined(idx_info->layout->storage.u.chunk.idx_addr));
11601155
assert(udata);
11611156

11621157
/* Remove the chunk from the v1 B-tree index and release the space for the
11631158
* chunk (in the B-tree callback).
11641159
*/
1165-
if (H5B_remove(idx_info->f, H5B_BTREE, idx_info->storage->idx_addr, udata) < 0)
1160+
if (H5B_remove(idx_info->f, H5B_BTREE, idx_info->layout->storage.u.chunk.idx_addr, udata) < 0)
11661161
HGOTO_ERROR(H5E_DATASET, H5E_CANTDELETE, FAIL, "unable to remove chunk entry");
11671162

11681163
done:
@@ -1192,23 +1187,22 @@ H5D__btree_idx_delete(const H5D_chk_idx_info_t *idx_info)
11921187
assert(idx_info->f);
11931188
assert(idx_info->pline);
11941189
assert(idx_info->layout);
1195-
assert(idx_info->storage);
11961190

11971191
/* Check if the index data structure has been allocated */
1198-
if (H5_addr_defined(idx_info->storage->idx_addr)) {
1192+
if (H5_addr_defined(idx_info->layout->storage.u.chunk.idx_addr)) {
11991193
H5O_storage_chunk_t tmp_storage; /* Local copy of storage info */
12001194
H5D_chunk_common_ud_t udata; /* User data for B-tree operations */
12011195

12021196
/* Set up temporary chunked storage info */
1203-
tmp_storage = *idx_info->storage;
1197+
tmp_storage = idx_info->layout->storage.u.chunk;
12041198

12051199
/* Set up the shared structure */
1206-
if (H5D__btree_shared_create(idx_info->f, &tmp_storage, idx_info->layout) < 0)
1200+
if (H5D__btree_shared_create(idx_info->f, &tmp_storage, &idx_info->layout->u.chunk) < 0)
12071201
HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't create wrapper for shared B-tree info");
12081202

12091203
/* Set up B-tree user data */
12101204
memset(&udata, 0, sizeof udata);
1211-
udata.layout = idx_info->layout;
1205+
udata.layout = &idx_info->layout->u.chunk;
12121206
udata.storage = &tmp_storage;
12131207

12141208
/* Delete entire B-tree */
@@ -1246,25 +1240,25 @@ H5D__btree_idx_copy_setup(const H5D_chk_idx_info_t *idx_info_src, const H5D_chk_
12461240
assert(idx_info_src->f);
12471241
assert(idx_info_src->pline);
12481242
assert(idx_info_src->layout);
1249-
assert(idx_info_src->storage);
12501243
assert(idx_info_dst);
12511244
assert(idx_info_dst->f);
12521245
assert(idx_info_dst->pline);
12531246
assert(idx_info_dst->layout);
1254-
assert(idx_info_dst->storage);
1255-
assert(!H5_addr_defined(idx_info_dst->storage->idx_addr));
1247+
assert(!H5_addr_defined(idx_info_dst->layout->storage.u.chunk.idx_addr));
12561248

12571249
/* Create shared B-tree info for each file */
1258-
if (H5D__btree_shared_create(idx_info_src->f, idx_info_src->storage, idx_info_src->layout) < 0)
1250+
if (H5D__btree_shared_create(idx_info_src->f, &idx_info_src->layout->storage.u.chunk,
1251+
&idx_info_src->layout->u.chunk) < 0)
12591252
HGOTO_ERROR(H5E_RESOURCE, H5E_CANTINIT, FAIL, "can't create wrapper for source shared B-tree info");
1260-
if (H5D__btree_shared_create(idx_info_dst->f, idx_info_dst->storage, idx_info_dst->layout) < 0)
1253+
if (H5D__btree_shared_create(idx_info_dst->f, &idx_info_dst->layout->storage.u.chunk,
1254+
&idx_info_dst->layout->u.chunk) < 0)
12611255
HGOTO_ERROR(H5E_RESOURCE, H5E_CANTINIT, FAIL,
12621256
"can't create wrapper for destination shared B-tree info");
12631257

12641258
/* Create the root of the B-tree that describes chunked storage in the dest. file */
12651259
if (H5D__btree_idx_create(idx_info_dst) < 0)
12661260
HGOTO_ERROR(H5E_IO, H5E_CANTINIT, FAIL, "unable to initialize chunked storage");
1267-
assert(H5_addr_defined(idx_info_dst->storage->idx_addr));
1261+
assert(H5_addr_defined(idx_info_dst->layout->storage.u.chunk.idx_addr));
12681262

12691263
done:
12701264
FUNC_LEAVE_NOAPI_TAG(ret_value)
@@ -1323,16 +1317,16 @@ H5D__btree_idx_size(const H5D_chk_idx_info_t *idx_info, hsize_t *index_size)
13231317
assert(idx_info->f);
13241318
assert(idx_info->pline);
13251319
assert(idx_info->layout);
1326-
assert(idx_info->storage);
13271320
assert(index_size);
13281321

13291322
/* Initialize B-tree node user-data */
13301323
memset(&udata, 0, sizeof udata);
1331-
udata.layout = idx_info->layout;
1332-
udata.storage = idx_info->storage;
1324+
udata.layout = &idx_info->layout->u.chunk;
1325+
udata.storage = &idx_info->layout->storage.u.chunk;
13331326

13341327
/* Get metadata information for B-tree */
1335-
if (H5B_get_info(idx_info->f, H5B_BTREE, idx_info->storage->idx_addr, &bt_info, NULL, &udata) < 0)
1328+
if (H5B_get_info(idx_info->f, H5B_BTREE, idx_info->layout->storage.u.chunk.idx_addr, &bt_info, NULL,
1329+
&udata) < 0)
13361330
HGOTO_ERROR(H5E_BTREE, H5E_CANTINIT, FAIL, "unable to iterate over chunk B-tree");
13371331

13381332
/* Set the size of the B-tree */
@@ -1408,12 +1402,11 @@ H5D__btree_idx_dest(const H5D_chk_idx_info_t *idx_info)
14081402
assert(idx_info->f);
14091403
assert(idx_info->pline);
14101404
assert(idx_info->layout);
1411-
assert(idx_info->storage);
14121405

14131406
/* Free the raw B-tree node buffer */
1414-
if (NULL == idx_info->storage->u.btree.shared)
1407+
if (NULL == idx_info->layout->storage.u.chunk.u.btree.shared)
14151408
HGOTO_ERROR(H5E_IO, H5E_CANTFREE, FAIL, "ref-counted page nil");
1416-
if (H5UC_DEC(idx_info->storage->u.btree.shared) < 0)
1409+
if (H5UC_DEC(idx_info->layout->storage.u.chunk.u.btree.shared) < 0)
14171410
HGOTO_ERROR(H5E_IO, H5E_CANTFREE, FAIL, "unable to decrement ref-counted page");
14181411

14191412
done:

0 commit comments

Comments
 (0)