a zero-dependency WebAssembly-powered library for reading and writing HDF5 files from javascript
(built on the HDF5 C API)
The built binaries (esm and node) will be attached to the latest release as h5wasm-{version}.tgz
The wasm-compiled libraries libhdf5.a, libhdf5_cpp.a ... and the related include/ folder are retrieved from libhdf5-wasm during the build.
Instead of importing a namespace "*", it is now possible to import the important h5wasm components in an object, from the default export:
// in hdf5_hl.ts:
export const h5wasm = {
File,
Group,
Dataset,
Datatype,
DatasetRegion,
ready,
ACCESS_MODES
}The Emscripten filesystem is important for operations, and it can be accessed after the WASM is loaded as below.
import h5wasm from "https://cdn.jsdelivr.net/npm/h5wasm@latest/dist/esm/hdf5_hl.js";
// the WASM loads asychronously, and you can get the module like this:
const Module = await h5wasm.ready;
// then you can get the FileSystem object from the Module:
const { FS } = Module;
// Or, you can directly get the FS if you don't care about the rest
// of the module:
// const { FS } = await h5wasm.ready;
let response = await fetch("https://ncnr.nist.gov/pub/ncnrdata/vsans/202003/24845/data/sans59510.nxs.ngv");
let ab = await response.arrayBuffer();
FS.writeFile("sans59510.nxs.ngv", new Uint8Array(ab));
// use mode "r" for reading. All modes can be found in h5wasm.ACCESS_MODES
let f = new h5wasm.File("sans59510.nxs.ngv", "r");
// File {path: "/", file_id: 72057594037927936n, filename: "data.h5", mode: "r"}Since ESM is not supported in all web worker contexts (e.g. Firefox), an additional ./dist/iife/h5wasm.js is provided in the package for h5wasm>=0.4.8; it can be loaded in a worker and used as in the example below (which uses the WORKERFS file system for random access on local files):
// worker.js
onmessage = async function(e) {
const { FS } = await h5wasm.ready;
// send in a file opened from an <input type="file" />
const f_in = e.data[0];
FS.mkdir('/work');
FS.mount(FS.filesystems.WORKERFS, { files: [f_in] }, '/work');
const f = new h5wasm.File(`/work/${f_in.name}`, 'r');
console.log(f);
}
self.importScripts('../dist/iife/h5wasm.js');npm i h5wasm or yarn add h5wasm
then in your file
// index.js
import h5wasm from "h5wasm";
const { FS } = await h5wasm.ready;
let f = new h5wasm.File("test.h5", "w");
f.create_dataset({name: "text_data", data: ["this", "that"]});
// ...note: you must configure your build system to target >= ES2020 (for bigint support)
The host filesystem is made available through Emscripten "NODERAWFS=1".
Enabling BigInt support may be required for nodejs < 16
npm i h5wasm
node
const h5wasm = await import("h5wasm/node");
await h5wasm.ready;
let f = new h5wasm.File("/home/brian/Downloads/sans59510.nxs.ngv", "r");
/*
File {
path: '/',
file_id: 72057594037927936n,
filename: '/home/brian/Downloads/sans59510.nxs.ngv',
mode: 'r'
}
*/(all examples are written in ESM - for Typescript some type casting is probably required, as get returns either Group or Dataset)
new h5wasm.File(filename, mode?, options?)| Argument | Type | Default | Description |
|---|---|---|---|
filename |
string |
— | Path to the HDF5 file |
mode |
string |
"r" |
Access mode (see table below) |
options.track_order |
boolean |
false |
Preserve insertion order of groups and attributes |
options.libver |
string | [string, string] |
— | Library version bound(s) for new objects (see libver) |
Available modes:
| Mode | Description |
|---|---|
"r" |
Read-only |
"a" |
Read/write (file must exist) |
"w" |
Create / truncate |
"x" |
Create, fail if exists |
"Sa" |
SWMR append |
"Sr" |
SWMR read |
let f = new h5wasm.File("sans59510.nxs.ngv", "r");
// list keys:
f.keys()
// ["entry"]
f.get("entry/instrument").keys()
// ["attenuator","beam","beam_monitor_low","beam_monitor_norm","beam_stop_C2","beam_stop_C3","collimator","converging_pinholes","detector_B","detector_FB","detector_FL","detector_FR","detector_FT","detector_MB","detector_ML","detector_MR","detector_MT","lenses","local_contact","name","sample_aperture","sample_aperture_2","sample_table","source","source_aperture","type"]
let data = f.get("entry/instrument/detector_MR/data")
// Dataset {path: "/entry/instrument/detector_MR/data", file_id: 72057594037927936n}
data.metadata
/*
{
"signed": true,
"vlen": false,
"littleEndian": true,
"type": 0,
"size": 4,
"shape": [
48,
128
],
"total_size": 6144
}
*/
// for convenience, these are extracted from metadata:
data.dtype
// "<i"
data.shape
// (2) [48, 128]
// data are loaded into a matching TypedArray in javascript if one exists, otherwise raw bytes are returned (there is no Float16Array, for instance). In this case the matching type is Int32Array
data.value
/*
Int32Array(6144) [0, 0, 0, 2, 2, 2, 3, 1, 1, 7, 3, 5, 7, 8, 9, 21, 43, 38, 47, 8, 8, 7, 3, 6, 1, 7, 3, 7, 47, 94, 91, 99, 76, 81, 86, 112, 98, 103, 85, 100, 83, 122, 111, 123, 136, 129, 134, 164, 130, 164, 176, 191, 200, 211, 237, 260, 304, 198, 32, 9, 5, 2, 6, 5, 8, 6, 25, 219, 341, 275, 69, 11, 4, 5, 5, 45, 151, 154, 141, 146, 108, 107, 105, 113, 99, 101, 96, 84, 86, 77, 78, 107, 73, 80, 105, 65, 75, 79, 62, 31, …]
*/
// take a slice from 0:10 on axis 0, keeping all of axis 1:
// (slicing is done through libhdf5 instead of in the javascript library - should be very efficient)
data.slice([[0,10],[]])
/*
Int32Array(1280) [0, 0, 0, 2, 2, 2, 3, 1, 1, 7, 3, 5, 7, 8, 9, 21, 43, 38, 47, 8, 8, 7, 3, 6, 1, 7, 3, 7, 47, 94, 91, 99, 76, 81, 86, 112, 98, 103, 85, 100, 83, 122, 111, 123, 136, 129, 134, 164, 130, 164, 176, 191, 200, 211, 237, 260, 304, 198, 32, 9, 5, 2, 6, 5, 8, 6, 25, 219, 341, 275, 69, 11, 4, 5, 5, 45, 151, 154, 141, 146, 108, 107, 105, 113, 99, 101, 96, 84, 86, 77, 78, 107, 73, 80, 105, 65, 75, 79, 62, 31, …]
*/
// Convert to nested Array, with JSON-compatible elements:
data.to_array()
/*
[
[
0, 0, 0, 2, 2, 2, 3, 1, 1, 7, 3, 5,
7, 8, 9, 21, 43, 38, 47, 8, 8, 7, 3, 6,
1, 7, 3, 7, 47, 94, 91, 99, 76, 81, 86, 112,
98, 103, 85, 100, 83, 122, 111, 123, 136, 129, 134, 164,
130, 164, 176, 191, 200, 211, 237, 260, 304, 198, 32, 9,
5, 2, 6, 5, 8, 6, 25, 219, 341, 275, 69, 11,
4, 5, 5, 45, 151, 154, 141, 146, 108, 107, 105, 113,
99, 101, 96, 84, 86, 77, 78, 107, 73, 80, 105, 65,
75, 79, 62, 31,
... 28 more items
],
[
0, 0, 2, 2, 4, 1, 2, 7, 2, 3, 2, 5,
6, 3, 6, 24, 37, 42, 25, 8, 3, 5, 4, 8,
2, 6, 7, 9, 61, 81, 81, 89, 104, 110, 82, 82,
104, 92, 97, 99, 104, 115, 106, 128, 134, 111, 125, 123,
159, 155, 182, 228, 227, 242, 283, 290, 295, 114, 11, 6,
5, 6, 8, 4, 4, 10, 59, 401, 401, 168, 10, 6,
6, 4, 10, 37, 150, 152, 146, 121, 125, 117, 122, 88,
100, 97, 86, 79, 90, 87, 78, 87, 87, 87, 84, 76,
76, 66, 51, 11,
... 28 more items
],
... 46 more items
*/let new_file = new h5wasm.File("myfile.h5", "w");
new_file.create_group("entry");
// shape and dtype will match input if omitted
new_file.get("entry").create_dataset({name: "auto", data: [3.1, 4.1, 0.0, -1.0]});
new_file.get("entry/auto").shape
// [4]
new_file.get("entry/auto").dtype
// "<d"
new_file.get("entry/auto").value
// Float64Array(4) [3.1, 4.1, 0, -1]
// make float array instead of double (shape will still match input if it is set to null)
new_file.get("entry").create_dataset({name: "data", data: [3.1, 4.1, 0.0, -1.0], shape: null, dtype: '<f'});
new_file.get("entry/data").shape
// [4]
new_file.get("entry/data").value
//Float32Array(4) [3.0999999046325684, 4.099999904632568, 0, -1]
// create a dataset with shape=[2,2]
// The dataset stored in the HDF5 file with the correct shape,
// but no attempt is made to make a 2x2 array out of it in javascript
new_file.get("entry").create_dataset({name: "square_data", data: [3.1, 4.1, 0.0, -1.0], shape: [2,2], dtype: '<d'});
new_file.get("entry/square_data").shape
// (2) [2, 2]
new_file.get("entry/square_data").value
//Float64Array(4) [3.1, 4.1, 0, -1]
// create a dataset with compression
const long_data = [...new Array(1000000)].map((_, i) => i);
new_file.get("entry").create_dataset({name: "compressed", data: long_data, shape: [1000, 1000], dtype: '<f', chunks: [100,100], compression: 9});
// equivalent to:
// new_file.get("entry").create_dataset({name: "compressed", data: long_data, shape: [1000, 1000], dtype: '<f', chunks=[100,100], compression='gzip', compression_opts=[9]});
new_file.get("entry/compressed").filters
// [{id: 1, name: 'deflate'}]);
new_file.get("entry/compressed").slice([[2,3]]);
// Float32Array(1000) [ 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, … ]
// create an attribute (creates a VLEN string by default for a string)
new_file.get("entry").create_attribute("myattr", "a string");
Object.keys(new_file.get("entry").attrs)
// ["myattr"]
new_file.get("entry").attrs["myattr"]
// {value: "a string", shape: Array(0), dtype: "S"}
new_file.get("entry").create_attribute("fixed", ["hello", "you"], null, "S5")
new_file.get("entry").attrs["fixed"]
/*
{
"value": [
"hello",
"you"
],
"shape": [
2
],
"dtype": "S5"
}
*/
// close the file - reading and writing will no longer work.
// calls H5Fclose on the file_id.
new_file.close()You can write HDF5 Compound Datatypes (H5T_COMPOUND) for both datasets and attributes by passing a JavaScript Map representing a Structure of Arrays (SoA).
The keys of the Map become the compound member names, and the values (which should be arrays or TypedArrays) represent the columns of data. Under the hood, h5wasm automatically interleaves these columns into the flat Array of Structures (AoS) byte buffer that HDF5 expects.
If you omit the dtype, the library will automatically guess the appropriate HDF5 types based on the provided TypedArrays:
let new_file = new h5wasm.File("myfile.h5", "w");
// 1. Create a Structure of Arrays (SoA) using a Map
const particle_data = new Map([
['id', new Int32Array([10, 20, 30])],
['velocity', new Float64Array([1.1, 2.2, 3.3])]
]);
// 2. Write the compound dataset
new_file.create_dataset({ name: "particles", data: particle_data });
// 3. Read it back (returns an Array of Structures row-format)
new_file.get("particles").value;
/*
[
[10, 1.1],
[20, 2.2],
[30, 3.3]
]
*/You can enforce a specific memory layout by providing an explicit dtype array. This is passed as an array of [name, type_string] tuples:
const explicit_dtype = [
["id", "<b"], // Int8
["velocity", "<f"] // Float32
];
new_file.create_dataset({
name: "particles_explicit",
data: particle_data,
dtype: explicit_dtype,
});Attributes share the exact same API. Just pass a Map to create_attribute:
const attr_data = new Map([
['x', new Int16Array([100, 200])],
['y', new Float32Array([1.125, 2.25])]
]);
new_file.get("particles").create_attribute("my_compound_attr", attr_data);You can create deeply nested compound datatypes by simply nesting Map objects within one another:
const nested_data = new Map([
['id', new Uint8Array([1, 2])],
['position', new Map([
['x', new Float64Array([10.1, 20.1])],
['y', new Float64Array([10.2, 20.2])],
['z', new Float64Array([10.3, 20.3])]
])]
]);
new_file.create_dataset({ name: "nested_particles", data: nested_data });
new_file.get("nested_particles").value;
/*
[
[1, [10.1, 10.2, 10.3]],
[2, [20.1, 20.2, 20.3]]
]
*/You can overwrite subsets of an existing compound dataset using write_slice. Simply pass a Map containing the updated columnar arrays sized exactly to the dimensions of the slice you are replacing:
// Overwrite indices 1 and 2 (a slice of length 2)
const slice_data = new Map([
['id', new Int32Array([99, 100])],
['velocity', new Float64Array([9.9, 10.0])]
]);
new_file.get("particles").write_slice([[1, 3]], slice_data);One can also open an existing file and write to it:
let f = new h5wasm.File("myfile.h5", "a");
f.create_attribute("new_attr", "something wicked this way comes");
f.close()SWMR requires a file created with at least libver: "v110" and a chunked, extensible dataset.
const PATH = join(".", "test", "tmp");
const FILEPATH = join(PATH, "swmr_test.h5");
const INITIAL_DATA = new Float32Array([1.0, 2.0, 3.0]);
const APPEND_DATA = new Float32Array([4.0, 5.0, 6.0]);
// Create file with SWMR-compatible format (v110 minimum)
const f_write = new h5wasm.File(FILEPATH, "w", { libver: "v110" });
// Create an extensible chunked dataset (required for SWMR)
const dset_write = f_write.create_dataset({
name: "data",
data: INITIAL_DATA,
maxshape: [null],
chunks: [10]
});
// Switch to SWMR write mode:
// It is important that you create the dataset before starting SWMR mode
f_write.start_swmr_write();
// Open for SWMR read
const f_read = new h5wasm.File(FILEPATH, "Sr");
const dset_read = f_read.get("data");
// Extend the dataset and write new values
dset_write.resize([6]);
dset_write.write_slice([[3, 6]], new Float32Array([4, 5, 6]));
f_write.flush();
// The read handle still sees the old shape until refreshed
dset_read.shape;
// [3]
dset_read.refresh();
dset_read.shape;
// [6]
dset_read.value;
// Float32Array(6) [1, 2, 3, 4, 5, 6]
f_write.close();
f_read.close();
// NOTE: To append to a closed file, you can re-open the file with "Sa" mode,
// or you can open it in "a" mode and then call start_swmr_write().
// You can't create a new file in "Sa" mode directly.let new_file = new h5wasm.File("myfile.h5", "w");
new_file.create_group("entry");
new_file.get("entry").create_dataset({name: "auto", data: [3.1, 4.1, 0.0, -1.0]});
// create a soft link in root:
new_file.create_soft_link("/entry/auto", "my_soft_link");
new_file.get("my_soft_link").value;
// Float64Array(4) [3.1, 4.1, 0, -1]
// create a hard link:
new_file.create_hard_link("/entry/auto", "my_hard_link");
new_file.get("my_hard_link").value;
// Float64Array(4) [3.1, 4.1, 0, -1]
// create an external link:
new_file.create_external_link("other_file.h5", "other_dataset", "my_external_link");
new_file.get_external_link("my_external_link");
// {filename: "other_file.h5", obj_path: "other_dataset"}
// create a soft link in a group:
new_file.create_group("links");
const links_group = new_file.get("links");
links_group.create_soft_link("/entry/auto", "soft_link");
new_file.get("/links/soft_link").value;
// Float64Array(4) [3.1, 4.1, 0, -1]
new_file.get_link("/links/soft_link");
// "/entry/auto"
new_file.get_link("/entry/auto");
// null // (null is returned if the path is not a symbolic link);
new_file.close()HDF5 supports controlling the minimum and maximum library version used when writing objects to a file, via libver. This can be set using the FileOptions object when opening a file.
Valid libver string values are: "earliest", "v108", "v110", "v112", "v114", "v200", and "latest".
// Require at least HDF5 v1.10 format features:
const f = new h5wasm.File("myfile.h5", "w", { libver: "v110" });
f.create_dataset({ name: "data", data: new Float32Array([1, 2, 3]) });
f.close();
// Read back the actual bounds stored in the file:
const f_read = new h5wasm.File("myfile.h5", "r");
f_read.libver;
// ["v110", "v110"]
f_read.close();// Allow any format from v1.10 up to the latest supported version:
const f = new h5wasm.File("myfile.h5", "w", { libver: ["v110", "latest"] });
f.create_dataset({ name: "data", data: new Float32Array([1, 2, 3]) });
f.close();Optional, to support uploads and downloads
import {uploader, download, UPLOADED_FILES} from "https://cdn.jsdelivr.net/npm/h5wasm@latest/dist/esm/file_handlers.js";
//
// Attach to a file input element:
// will save to Module.FS (memfs) with the name of the uploaded file
document.getElementById("upload_selector").onchange = uploader;
// file can be found with
let f = new h5wasm.File(UPLOADED_FILES[UPLOADED_FILES.length -1], "r");
let new_file = new h5wasm.File("myfile.h5", "w");
new_file.create_group("entry");
// shape and dtype will match input if omitted
new_file.get("entry").create_dataset({name: "auto", data: [3.1, 4.1, 0.0, -1.0]});
// this will download a snapshot of the HDF5 in its current state, with the same name
// (in this case, a file named "myfile.h5" would be downloaded)
download(new_file);To persist the emscripten virtual filesystem between sessions, use IDBFS (syncs with browser IndexedDB), e.g.
// create a local mount of the IndexedDB filesystem:
FS.mount(FS.filesystems.IDBFS, {}, "/home/web_user")
// to read from the browser IndexedDB into the active filesystem:
FS.syncfs(true, (e) => {console.log(e)});
// to push all current files in /home/web_user to IndexedDB, e.g. when closing your application:
FS.syncfs(false, (e) => {console.log(e)})