Skip to content

Commit 71f6b42

Browse files
authored
Delay VDS mapping decoding until after the layout is copied to the dataset struct (#5779)
* Delay decoding VDS global heap block until the dataset is initialized, so the layout in the ohdr does not have the VDS info. Also decode VDS info before layout message copy. * Add macro for searching for VDS source names in a hash table and adding them if not found to reduce code duplication.
1 parent 016639a commit 71f6b42

File tree

5 files changed

+400
-432
lines changed

5 files changed

+400
-432
lines changed

release_docs/RELEASE.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,17 @@ New Features
217217

218218
Library:
219219
--------
220+
- Improved performance of opening a virtual dataset with many mappings
221+
222+
When opening a virtual dataset, the library would previously decode
223+
the mappings in the object header package, then copy them to the dataset
224+
struct, then copy them to the internal DCPL. Copying the VDS mappings
225+
could be very expensive if there were many mappings. Changed this to delay
226+
decoding the mappings until the dataset code, and delay copying the layout
227+
to the DCPL until it is needed. This results in only the decoding and no
228+
copies in most use cases, as opposed to the decoding and two copies with
229+
the previous code.
230+
220231
- Aligned the CMake compiler wrappers with the old Autotools versions
221232

222233
The versions of h5cc, h5fc, h5c++, etc. generated by CMake were missing

src/H5Dpkg.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,43 @@
111111
(DINFO)->type_info.dst_type_size); \
112112
}
113113

114+
/* Macro to add a virtual dataset source file or dataset name to a hash table for storing these names */
115+
#define H5D_VIRTUAL_FIND_OR_ADD_NAME(NAME_TYPE, LAYOUT, STR, STRLEN, ENT, ERR) \
116+
do { \
117+
/* Check for source name in hash table. While this normally shouldn't be \
118+
* necessary if it is version 1 or greater and it is at least as long as "size of \
119+
* lengths", we should still check since if we don't and it's not shared in the \
120+
* file for whatever reason it could cause the library to insert a duplicate key \
121+
* if it rebuilds the hash table. */ \
122+
H5O_storage_virtual_ent_t *_tmp_ent = NULL; /* Temporary VDS entry pointer */ \
123+
\
124+
if ((ENT) > (LAYOUT)->storage.u.virt.list) \
125+
HASH_FIND(hh_source_##NAME_TYPE, (LAYOUT)->storage.u.virt.source_##NAME_TYPE##_hash_table, STR, \
126+
STRLEN, _tmp_ent); \
127+
if (_tmp_ent) { \
128+
/* Found source name in previous mapping, use link to that mapping's source name */ \
129+
assert(_tmp_ent >= (LAYOUT)->storage.u.virt.list && _tmp_ent < (ENT)); \
130+
(ENT)->source_##NAME_TYPE##_orig = (size_t)(_tmp_ent - (LAYOUT)->storage.u.virt.list); \
131+
(ENT)->source_##NAME_TYPE##_name = _tmp_ent->source_##NAME_TYPE##_name; \
132+
} \
133+
else { \
134+
/* Did not find source name, copy it to the entry and add it to the hash table */ \
135+
if (NULL == ((ENT)->source_##NAME_TYPE##_name = (char *)H5MM_malloc((STRLEN) + 1))) \
136+
HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, ERR, "unable to allocate memory for source name"); \
137+
(ENT)->source_##NAME_TYPE##_orig = SIZE_MAX; \
138+
H5MM_memcpy((ENT)->source_##NAME_TYPE##_name, STR, (STRLEN) + 1); \
139+
\
140+
/* Add to source name hash table. If we eventually make the library \
141+
* resilient to repeated strings not stored shared in memory, possibly by \
142+
* permanently disabling the hash table, or marking it as needing a careful \
143+
* rebuild, we can avoid this step if the version is 1 or greater and the name \
144+
* is at least as long as "size of lengths". See comment above about HASH_FIND \
145+
* line. */ \
146+
HASH_ADD_KEYPTR(hh_source_##NAME_TYPE, (LAYOUT)->storage.u.virt.source_##NAME_TYPE##_hash_table, \
147+
(ENT)->source_##NAME_TYPE##_name, STRLEN, ENT); \
148+
} \
149+
} while (0)
150+
114151
/****************************/
115152
/* Package Private Typedefs */
116153
/****************************/
@@ -768,6 +805,7 @@ H5_DLL herr_t H5D__compact_copy(H5F_t *f_src, H5O_storage_compact_t *storage_src
768805

769806
/* Functions that operate on virtual dataset storage */
770807
H5_DLL herr_t H5D__virtual_store_layout(H5F_t *f, H5O_layout_t *layout);
808+
H5_DLL herr_t H5D__virtual_load_layout(H5F_t *f, H5O_layout_t *layout);
771809
H5_DLL herr_t H5D__virtual_copy_layout(H5O_layout_t *layout);
772810
H5_DLL herr_t H5D__virtual_set_extent_unlim(const H5D_t *dset);
773811
H5_DLL herr_t H5D__virtual_reset_layout(H5O_layout_t *layout);

0 commit comments

Comments
 (0)