From 6d879301d89dc4d45b6b9038f884ef10eece3131 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 19 Nov 2024 13:38:02 -0500 Subject: [PATCH 01/15] split optional deps --- pyproject.toml | 49 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 77998076..918cecb5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,30 +23,59 @@ dynamic = ["version"] dependencies = [ "xarray>=2024.10.0", "numpy>=2.0.0", - "packaging", "universal-pathlib", "numcodecs", "ujson", + "packaging", ] [project.optional-dependencies] +# non-kerchunk readers hdf_reader = [ "fsspec", "h5py", "hdf5plugin", "imagecodecs", "imagecodecs-numcodecs==2024.6.1", - "numcodecs" +] +# kerchunk-based readers +hdf5_reader = [ + "kerchunk>=0.25.0", + "h5py", +] +netcdf3_reader = [ + "kerchunk>=0.25.0", + "scipy", +] +fits_reader = [ + "kerchunk>=0.25.0", + "astropy", +] +# un-implemented readers +# tiff_reader = [ +# "kerchunk>=0.25.0", +# "tifffile", +# ] +# grib_reader = ["kerchunk>=0.25.0"] +# zarr_reader = ["zarr==3.0.0b2"] +all_readers = [ + "virtualizarr[hdf]", + "virtualizarr[hdf5]", + "virtualizarr[netcdf3]", + "virtualizarr[fits]", + "virtualizarr[tiff]", + "virtualizarr[grib]", + "virtualizarr[zarr]", +] +# writers +icechunk = ["icechunk"] +all = [ + "virtualizarr[all_readers]", + "virtualizarr[icechunk]", ] test = [ "codecov", - "fastparquet", - "fsspec", - "h5py", - "kerchunk>=0.2.5", "mypy", - "netcdf4", - "numcodecs", "pandas-stubs", "pooch", "pre-commit", @@ -55,11 +84,9 @@ test = [ "pytest", "ruff", "s3fs", - "scipy", - "virtualizarr[hdf_reader]" + "virtualizarr[all]" ] - [project.urls] Home = "https://github.com/TomNicholas/VirtualiZarr" Documentation = "https://github.com/TomNicholas/VirtualiZarr/blob/main/README.md" From 5b144773894dee44cbbdf7c53c8d16fb91b63c30 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 19 Nov 2024 13:39:36 -0500 Subject: [PATCH 02/15] comment out un-implemented readers --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 918cecb5..573026c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,9 +63,9 @@ all_readers = [ "virtualizarr[hdf5]", "virtualizarr[netcdf3]", "virtualizarr[fits]", - "virtualizarr[tiff]", - "virtualizarr[grib]", - "virtualizarr[zarr]", +# "virtualizarr[tiff]", +# "virtualizarr[grib]", +# "virtualizarr[zarr]", ] # writers icechunk = ["icechunk"] From 850c38a5e54702b2a7316efaa937eb801017b85c Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 19 Nov 2024 13:49:19 -0500 Subject: [PATCH 03/15] remove _reader suffix --- pyproject.toml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 573026c1..d381d672 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,7 @@ dependencies = [ [project.optional-dependencies] # non-kerchunk readers -hdf_reader = [ +hdf = [ "fsspec", "h5py", "hdf5plugin", @@ -39,25 +39,25 @@ hdf_reader = [ "imagecodecs-numcodecs==2024.6.1", ] # kerchunk-based readers -hdf5_reader = [ +hdf5 = [ "kerchunk>=0.25.0", "h5py", ] -netcdf3_reader = [ +netcdf3 = [ "kerchunk>=0.25.0", "scipy", ] -fits_reader = [ +fits = [ "kerchunk>=0.25.0", "astropy", ] # un-implemented readers -# tiff_reader = [ +# tiff = [ # "kerchunk>=0.25.0", # "tifffile", # ] -# grib_reader = ["kerchunk>=0.25.0"] -# zarr_reader = ["zarr==3.0.0b2"] +# grib = ["kerchunk>=0.25.0"] +# zarr = ["zarr==3.0.0b2"] all_readers = [ "virtualizarr[hdf]", "virtualizarr[hdf5]", From aeecf8a03fb0bff06de7a7d87d3479b8b2168fb6 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Wed, 20 Nov 2024 15:58:00 -0500 Subject: [PATCH 04/15] add kerchunk as writer to cover fastparquet dependency --- pyproject.toml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d381d672..3dd78cf2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ dependencies = [ ] [project.optional-dependencies] -# non-kerchunk readers +# non-kerchunk-based readers hdf = [ "fsspec", "h5py", @@ -69,9 +69,15 @@ all_readers = [ ] # writers icechunk = ["icechunk"] +# technically also a reader, as fastparquet is also required to read parquet-formatted kerchunk references +kerchunk = ["fastparquet"] +all_writers = [ + "virtualizarr[icechunk]", + "virtualizarr[kerchunk]", +] all = [ "virtualizarr[all_readers]", - "virtualizarr[icechunk]", + "virtualizarr[all_writers]", ] test = [ "codecov", From a633a0cadd7aca5ef5bd753f70ccd293f408b568 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Wed, 20 Nov 2024 17:14:55 -0500 Subject: [PATCH 05/15] embed the pyproject.toml in the installation docs --- docs/installation.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/docs/installation.md b/docs/installation.md index 03272a2f..9418630f 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -12,6 +12,31 @@ and on conda-forge: conda install -c conda-forge virtualizarr ``` +## Optional dependencies + +VirtualiZarr has many optional dependencies, split into those for reading various file formats, and those for writing virtual references out to different formats. + +Optional dependencies can be installed in groups via pip, e.g. to read HDF files and write virtual references to icechunk you could install all necessary dependencies via: + +```shell +pip install "virtualizarr[hdf, icechunk]" +``` + +The full list of optional dependencies can be seen in the `pyproject.toml` file: + +```{literalinclude} ../pyproject.toml +:start-at: "[project.optional-dependencies]" +:end-before: "test =" + +``` + +The compound groups allow you to e.g. install every file reader via + +```shell +pip install "virtualizarr[all_readers]" +``` + +The basic `pip install virtualizarr` will only install the minimal required dependencies, and so may not be particularly useful on its own. ## Install Test Dependencies From 4d77fa1811d90d25eec2b24faedbd83d764d830d Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Wed, 20 Nov 2024 17:16:31 -0500 Subject: [PATCH 06/15] notes on test and docs deps --- docs/installation.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/installation.md b/docs/installation.md index 9418630f..eb40cc91 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -40,13 +40,16 @@ The basic `pip install virtualizarr` will only install the minimal required depe ## Install Test Dependencies +For local development you will want to install the test dependencies so that you can run all the tests in the test suite: + ```shell pip install '-e .[test]' ``` - ## Install Docs Dependencies +To build the documentation locally you will need further dependencies: + ```shell pip install '-e .[docs]' ``` From 32966e56a05f3c5d598aadec4f97a6ea20901c07 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Wed, 20 Nov 2024 17:17:58 -0500 Subject: [PATCH 07/15] wording --- docs/installation.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/installation.md b/docs/installation.md index eb40cc91..ade5a9f8 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -16,7 +16,7 @@ conda install -c conda-forge virtualizarr VirtualiZarr has many optional dependencies, split into those for reading various file formats, and those for writing virtual references out to different formats. -Optional dependencies can be installed in groups via pip, e.g. to read HDF files and write virtual references to icechunk you could install all necessary dependencies via: +Optional dependencies can be installed in groups via pip. For example to read HDF files and write virtual references to icechunk you could install all necessary dependencies via: ```shell pip install "virtualizarr[hdf, icechunk]" @@ -27,10 +27,9 @@ The full list of optional dependencies can be seen in the `pyproject.toml` file: ```{literalinclude} ../pyproject.toml :start-at: "[project.optional-dependencies]" :end-before: "test =" - ``` -The compound groups allow you to e.g. install every file reader via +The compound groups allow you to install multiple sets of dependencies at once, e.g. install every file reader via ```shell pip install "virtualizarr[all_readers]" From aa347618b7b973bd79c8d030ce185120bbd093b1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 20 Nov 2024 22:18:11 +0000 Subject: [PATCH 08/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- docs/installation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/installation.md b/docs/installation.md index ade5a9f8..f1c15c80 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -29,7 +29,7 @@ The full list of optional dependencies can be seen in the `pyproject.toml` file: :end-before: "test =" ``` -The compound groups allow you to install multiple sets of dependencies at once, e.g. install every file reader via +The compound groups allow you to install multiple sets of dependencies at once, e.g. install every file reader via ```shell pip install "virtualizarr[all_readers]" From cc8a2188fa78a609e75ba026477c36be7bd7f9e4 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Wed, 20 Nov 2024 17:19:38 -0500 Subject: [PATCH 09/15] release note --- docs/releases.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/releases.rst b/docs/releases.rst index bde41778..07846920 100644 --- a/docs/releases.rst +++ b/docs/releases.rst @@ -9,6 +9,9 @@ v1.1.1 (unreleased) New Features ~~~~~~~~~~~~ +- Optional dependencies can now be installed in groups via pip. See the installation docs. + (:pull:`309`) By `Tom Nicholas `_. + Breaking changes ~~~~~~~~~~~~~~~~ From 2cbcd2f92a1ea2767bb2296ab6eccf9c25bc2cd0 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Mon, 3 Feb 2025 15:23:07 -0500 Subject: [PATCH 10/15] update kerchunk version --- pyproject.toml | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a415c816..a973011c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,19 +43,19 @@ hdf5 = [ "h5py", ] netcdf3 = [ - "kerchunk>=0.25.0", + "kerchunk>=0.2.8", "scipy", ] fits = [ - "kerchunk>=0.25.0", + "kerchunk>=0.2.8", "astropy", ] # un-implemented readers # tiff = [ -# "kerchunk>=0.25.0", +# "kerchunk>=0.2.8", # "tifffile", # ] -# grib = ["kerchunk>=0.25.0"] +# grib = ["kerchunk>=0.2.8",] # zarr = ["zarr==3.0.0b2"] all_readers = [ "virtualizarr[hdf]", @@ -67,7 +67,9 @@ all_readers = [ # "virtualizarr[zarr]", ] # writers -icechunk = ["icechunk"] +icechunk = [ + "icechunk>=0.1.0a12", +] # technically also a reader, as fastparquet is also required to read parquet-formatted kerchunk references kerchunk = ["fastparquet"] all_writers = [ @@ -78,17 +80,6 @@ all = [ "virtualizarr[all_readers]", "virtualizarr[all_writers]", ] -netcdf3 = [ - "kerchunk>=0.2.8", - "scipy", -] -fits = [ - "kerchunk>=0.2.8", - "astropy", -] -icechunk = [ - "icechunk>=0.1.0a12", -] test = [ "codecov", "mypy", From af52c4c47f4dc198d1575f2c1a98ab8f250cbcff Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Mon, 3 Feb 2025 15:26:07 -0500 Subject: [PATCH 11/15] separate remote options --- pyproject.toml | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a973011c..a1bf0d41 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,9 +29,15 @@ dependencies = [ ] [project.optional-dependencies] +remote = [ + "fsspec", + "requests", + "aiohttp", + "s3fs", +] # non-kerchunk-based readers hdf = [ - "fsspec", + "virtualizarr[remote]", "h5py", "hdf5plugin", "imagecodecs", @@ -39,23 +45,30 @@ hdf = [ ] # kerchunk-based readers hdf5 = [ + "virtualizarr[remote]", "kerchunk>=0.25.0", "h5py", ] netcdf3 = [ + "virtualizarr[remote]", "kerchunk>=0.2.8", "scipy", ] fits = [ + "virtualizarr[remote]", "kerchunk>=0.2.8", "astropy", ] # un-implemented readers # tiff = [ +# "virtualizarr[remote]", # "kerchunk>=0.2.8", # "tifffile", # ] -# grib = ["kerchunk>=0.2.8",] +# grib = [ +# "virtualizarr[remote]", +# "kerchunk>=0.2.8", +# ] # zarr = ["zarr==3.0.0b2"] all_readers = [ "virtualizarr[hdf]", From 65c4af1026640347c134f00a6eaaac6548779bc8 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Mon, 3 Feb 2025 15:27:21 -0500 Subject: [PATCH 12/15] remove sneaky git thing --- docs/releases.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/releases.rst b/docs/releases.rst index 5eae303c..2bbc74cf 100644 --- a/docs/releases.rst +++ b/docs/releases.rst @@ -84,7 +84,6 @@ as well as many other bugfixes and documentation improvements. New Features ~~~~~~~~~~~~ ->>>>>>> main - Add a ``virtual_backend_kwargs`` keyword argument to file readers and to ``open_virtual_dataset``, to allow reader-specific options to be passed down. (:pull:`315`) By `Tom Nicholas `_. - Added append functionality to `to_icechunk` (:pull:`272`) By `Aimee Barciauskas `_. From a81a845270d15aa80433c7dd5beb8f8bebfce422 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Mon, 3 Feb 2025 14:24:06 -0700 Subject: [PATCH 13/15] Update pyproject.toml Co-authored-by: Max Jones <14077947+maxrjones@users.noreply.github.com> --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a1bf0d41..9ed6046d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ hdf = [ # kerchunk-based readers hdf5 = [ "virtualizarr[remote]", - "kerchunk>=0.25.0", + "kerchunk>=0.2.5", "h5py", ] netcdf3 = [ From a34843bbd3d9e78332049a8b8df315bc99e57c5c Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Mon, 3 Feb 2025 16:24:52 -0500 Subject: [PATCH 14/15] update kerchunk verion --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9ed6046d..29445bdd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ hdf = [ # kerchunk-based readers hdf5 = [ "virtualizarr[remote]", - "kerchunk>=0.2.5", + "kerchunk>=0.2.8", "h5py", ] netcdf3 = [ From d79e348b4f1b457bef2a9be21289dc7f12901313 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Mon, 3 Feb 2025 17:12:23 -0500 Subject: [PATCH 15/15] remove commented-out readers --- pyproject.toml | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 29445bdd..e53262c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,12 +29,14 @@ dependencies = [ ] [project.optional-dependencies] +# for creating virtual datasets from objects on remote storage remote = [ "fsspec", "requests", "aiohttp", "s3fs", ] + # non-kerchunk-based readers hdf = [ "virtualizarr[remote]", @@ -43,6 +45,7 @@ hdf = [ "imagecodecs", "imagecodecs-numcodecs==2024.6.1", ] + # kerchunk-based readers hdf5 = [ "virtualizarr[remote]", @@ -59,31 +62,17 @@ fits = [ "kerchunk>=0.2.8", "astropy", ] -# un-implemented readers -# tiff = [ -# "virtualizarr[remote]", -# "kerchunk>=0.2.8", -# "tifffile", -# ] -# grib = [ -# "virtualizarr[remote]", -# "kerchunk>=0.2.8", -# ] -# zarr = ["zarr==3.0.0b2"] all_readers = [ "virtualizarr[hdf]", "virtualizarr[hdf5]", "virtualizarr[netcdf3]", "virtualizarr[fits]", -# "virtualizarr[tiff]", -# "virtualizarr[grib]", -# "virtualizarr[zarr]", ] + # writers icechunk = [ "icechunk>=0.1.0a12", ] -# technically also a reader, as fastparquet is also required to read parquet-formatted kerchunk references kerchunk = ["fastparquet"] all_writers = [ "virtualizarr[icechunk]", @@ -93,6 +82,7 @@ all = [ "virtualizarr[all_readers]", "virtualizarr[all_writers]", ] + test = [ "codecov", "mypy",