Skip to content

Commit c61c478

Browse files
authored
Feat/netcdf write (#57)
* feat: add writing to netcdf files * feat: add zarr writing * fix: remove histogram and md5 data * fix: write zarrs without computing data Avoids ever storing the full dataset in memory. * fix: write netcdfs without computing dask arrays * fix: move zarr to shard not split Requires updating Dask and zarr libaries (this is a recent Dask addition) * fix: make zarr chunks and shards align * docs: simplify comments in zarr/_writer.py * fix: make zarr units match OME spec * perf: collect and write split netcdf in parallel * perf: default to uncompressed netcdf Also document that NetCDF compression level 0 is no compression. * style: refactor netcdf/_writer.py nested if * style: use nectdf setncatts where appropriate * fix: remove erronious inclusion of zarr in deps * feat: expose zarr create_array kwargs Also removes the hardcoded compression level keyword arg. * refactor: prepare for multiple OME Zarr versions * feat: add direct access to zarr shards and chunks * fix: Dask Zarr write chunk warnings * fix: netcdf/_writer.py indentation issue * fix: mistakes in CHANGELOG.md * fix: netcdf/writer.py indentation mistake
1 parent 9f92247 commit c61c478

File tree

13 files changed

+2428
-470
lines changed

13 files changed

+2428
-470
lines changed

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Added
11+
12+
- Add netcdf writing via the `anu_ctlab_io.netcdf.dataset_to_netcdf()` function
13+
- Add zarr writing via the `anu_ctlab_io.zarr.dataset_to_zarr()` function (defaults to OME-Zarr format)
14+
- Add `Dataset.to_path()` to write to both zarr and netcdf formats
1015
- Add badges to docs/introduction.rst
16+
- Add `py312-dask-dev` test environment to test against fixed Dask warning logic
17+
- Added warning suppression for false-positive Dask PerformanceWarning when writing Zarr arrays
18+
19+
### Changed
1120

1221
## [1.1.0] - 2025-12-15
1322

src/anu_ctlab_io/_dataset.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ def from_path(
1919
) -> Self:
2020
pass
2121

22+
@abstractmethod
23+
def to_path(self, path: Path, *, filetype: str = "auto", **kwargs: Any) -> None:
24+
pass
25+
2226
@property
2327
@abstractmethod
2428
def voxel_size(self) -> tuple[np.float32, np.float32, np.float32]: ...
@@ -159,6 +163,65 @@ def from_path(
159163
)
160164
)
161165

166+
def to_path(
167+
self,
168+
path: Path | str,
169+
*,
170+
filetype: str = "auto",
171+
**kwargs: Any,
172+
) -> None:
173+
"""Writes the :any:`Dataset` to the given ``path``.
174+
175+
The data will be written in one of the ANU mass data storage formats, and the optional extras required for the specific
176+
file format must be installed.
177+
178+
:param path: The ``path`` to write data to.
179+
:param filetype: The format to write ("NetCDF", "zarr", or "auto"). If "auto", format is inferred from path extension.
180+
When inferring, NetCDF is assumed for paths ending in ``.nc`` or ``_nc``, and Zarr for paths ending in ``.zarr``.
181+
If datatype is present in filename (e.g., "tomo_output"), NetCDF is assumed.
182+
:param kwargs: Additional keyword arguments passed to the format-specific writer.
183+
"""
184+
if isinstance(path, str):
185+
path = Path(path)
186+
187+
match filetype:
188+
case "NetCDF":
189+
netcdf_mod = self._import_with_extra("anu_ctlab_io.netcdf", "netcdf")
190+
netcdf_mod.dataset_to_netcdf(self, path, **kwargs)
191+
return
192+
case "zarr":
193+
zarr_mod = self._import_with_extra("anu_ctlab_io.zarr", "zarr")
194+
zarr_mod.dataset_to_zarr(self, path, **kwargs)
195+
return
196+
case "auto":
197+
# Check for explicit extensions
198+
if path.name.endswith(".nc") or path.name.endswith("_nc"):
199+
netcdf_mod = self._import_with_extra(
200+
"anu_ctlab_io.netcdf", "netcdf"
201+
)
202+
netcdf_mod.dataset_to_netcdf(self, path, **kwargs)
203+
return
204+
205+
if path.name.endswith(".zarr"):
206+
zarr_mod = self._import_with_extra("anu_ctlab_io.zarr", "zarr")
207+
zarr_mod.dataset_to_zarr(self, path, **kwargs)
208+
return
209+
210+
# Check if datatype is in filename (Mango convention)
211+
if self._datatype is not None:
212+
datatype_str = str(self._datatype)
213+
if datatype_str in path.name:
214+
netcdf_mod = self._import_with_extra(
215+
"anu_ctlab_io.netcdf", "netcdf"
216+
)
217+
netcdf_mod.dataset_to_netcdf(self, path, **kwargs)
218+
return
219+
220+
raise ValueError(
221+
"Unable to determine output format from given `path`, perhaps specify `filetype`?",
222+
path,
223+
)
224+
162225
@property
163226
def voxel_size(self) -> tuple[np.float32, np.float32, np.float32]:
164227
"""The voxel size of the data in the dataset's native unit."""

src/anu_ctlab_io/_voxel_properties.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,25 @@ def __hash__(self) -> int:
5959
def __str__(self) -> str:
6060
return self.value
6161

62+
def to_full_name(self) -> str:
63+
"""Return the full unit name for OME-Zarr metadata.
64+
65+
OME-Zarr specification requires full unit names (e.g., "millimeter")
66+
rather than abbreviated forms (e.g., "mm").
67+
68+
:return: Full unit name as string.
69+
"""
70+
full_names = {
71+
VoxelUnit.M: "meter",
72+
VoxelUnit.CM: "centimeter",
73+
VoxelUnit.MM: "millimeter",
74+
VoxelUnit.UM: "micrometer",
75+
VoxelUnit.NM: "nanometer",
76+
VoxelUnit.ANGSTROM: "angstrom",
77+
VoxelUnit.VOXEL: "voxel",
78+
}
79+
return full_names[self]
80+
6281
def _conversion_factor(self, target_unit: Self) -> float:
6382
"""Get the conversion factor from this unit to the target unit.
6483

src/anu_ctlab_io/netcdf/__init__.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Read data from the ANU CTLab netcdf data format.
1+
"""Read and write data from/to the ANU CTLab netcdf data format.
22
33
This is an optional extra module, and must be explicitly installed to be used (e.g., ``pip install anu_ctlab_io[netcdf]``)."""
44

@@ -21,7 +21,9 @@
2121
):
2222
raise ImportError("Neither netCDF4 nor h5netcdf could be imported.")
2323

24-
__all__ = ["dataset_from_netcdf"]
24+
from anu_ctlab_io.netcdf._writer import dataset_to_netcdf
25+
26+
__all__ = ["dataset_from_netcdf", "dataset_to_netcdf"]
2527

2628

2729
def dataset_from_netcdf(
@@ -42,7 +44,9 @@ def dataset_from_netcdf(
4244
dataset.attrs = _update_attrs(dataset.attrs, parse_history)
4345
return Dataset(
4446
data=dataset.data.data,
45-
dimension_names=tuple(map(str, dataset.dims)),
47+
dimension_names=tuple(
48+
map(str, dataset.data.dims)
49+
), # Use dims from data var only
4650
datatype=datatype,
4751
voxel_unit=VoxelUnit.from_str(dataset.attrs["voxel_unit"]),
4852
voxel_size=dataset.attrs["voxel_size"],

0 commit comments

Comments
 (0)