From 4bd91bf6c23f2ffded55a2e4a9afa728700ad671 Mon Sep 17 00:00:00 2001 From: malmans2 Date: Wed, 10 Dec 2025 11:44:56 +0100 Subject: [PATCH 1/2] Set preferred chunksizes to one file per chunk --- pyproject.toml | 1 + tests/test_20_open_dataset.py | 12 ++++++++++++ uv.lock | 31 +++++++++++++++++++++++++++++++ xarray_esgf/client.py | 9 +++++++-- 4 files changed, 51 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 616c5a5..43f5924 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,7 @@ classifiers = [ ] dependencies = [ "aiohttp", + "cftime>=1.6.5", "dask", "esgpull>=0.9.3", "h5netcdf", diff --git a/tests/test_20_open_dataset.py b/tests/test_20_open_dataset.py index a8d6652..98f9b1a 100644 --- a/tests/test_20_open_dataset.py +++ b/tests/test_20_open_dataset.py @@ -30,8 +30,20 @@ def test_open_dataset(tmp_path: Path, index_node: str, download: bool) -> None: download=download, chunks={}, ) + assert (esgpull_path / "data" / "CMIP6").exists() is download + # Chunks + for dim in ds.dims: + assert not ds[dim].chunks + assert ds.chunksizes == { + "experiment_id": (1, 1), + "time": (12, 12), + "lat": (256,), + "lon": (512,), + "bnds": (2,), + } + # Dims assert ds.sizes == { "experiment_id": 2, diff --git a/uv.lock b/uv.lock index 798c2cf..592aab0 100644 --- a/uv.lock +++ b/uv.lock @@ -277,6 +277,35 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", size = 7249, upload-time = "2023-08-12T20:38:16.269Z" }, ] +[[package]] +name = "cftime" +version = "1.6.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/65/dc/470ffebac2eb8c54151eb893055024fe81b1606e7c6ff8449a588e9cd17f/cftime-1.6.5.tar.gz", hash = "sha256:8225fed6b9b43fb87683ebab52130450fc1730011150d3092096a90e54d1e81e", size = 326605, upload-time = "2025-10-13T18:56:26.352Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b6/c1/e8cb7f78a3f87295450e7300ebaecf83076d96a99a76190593d4e1d2be40/cftime-1.6.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:eef25caed5ebd003a38719bd3ff8847cd52ef2ea56c3ebdb2c9345ba131fc7c5", size = 504175, upload-time = "2025-10-13T18:56:06.398Z" }, + { url = "https://files.pythonhosted.org/packages/50/1a/86e1072b09b2f9049bb7378869f64b6747f96a4f3008142afed8955b52a4/cftime-1.6.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c87d2f3b949e45463e559233c69e6a9cf691b2b378c1f7556166adfabbd1c6b0", size = 485980, upload-time = "2025-10-13T18:56:08.669Z" }, + { url = "https://files.pythonhosted.org/packages/35/28/d3177b60da3f308b60dee2aef2eb69997acfab1e863f0bf0d2a418396ce5/cftime-1.6.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:82cb413973cc51b55642b3a1ca5b28db5b93a294edbef7dc049c074b478b4647", size = 1591166, upload-time = "2025-10-13T19:39:14.109Z" }, + { url = "https://files.pythonhosted.org/packages/d1/fd/a7266970312df65e68b5641b86e0540a739182f5e9c62eec6dbd29f18055/cftime-1.6.5-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:85ba8e7356d239cfe56ef7707ac30feaf67964642ac760a82e507ee3c5db4ac4", size = 1642614, upload-time = "2025-10-13T18:56:09.815Z" }, + { url = "https://files.pythonhosted.org/packages/c4/73/f0035a4bc2df8885bb7bd5fe63659686ea1ec7d0cc74b4e3d50e447402e5/cftime-1.6.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:456039af7907a3146689bb80bfd8edabd074c7f3b4eca61f91b9c2670addd7ad", size = 1688090, upload-time = "2025-10-13T18:56:11.442Z" }, + { url = "https://files.pythonhosted.org/packages/88/15/8856a0ab76708553ff597dd2e617b088c734ba87dc3fd395e2b2f3efffe8/cftime-1.6.5-cp312-cp312-win_amd64.whl", hash = "sha256:da84534c43699960dc980a9a765c33433c5de1a719a4916748c2d0e97a071e44", size = 464840, upload-time = "2025-10-13T18:56:12.506Z" }, + { url = "https://files.pythonhosted.org/packages/2e/60/74ea344b3b003fada346ed98a6899085d6fd4c777df608992d90c458fda6/cftime-1.6.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4aba66fd6497711a47c656f3a732c2d1755ad15f80e323c44a8716ebde39ddd5", size = 502453, upload-time = "2025-10-13T18:56:13.545Z" }, + { url = "https://files.pythonhosted.org/packages/1e/14/adb293ac6127079b49ff11c05cf3d5ce5c1f17d097f326dc02d74ddfcb6e/cftime-1.6.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:89e7cba699242366e67d6fb5aee579440e791063f92a93853610c91647167c0d", size = 484541, upload-time = "2025-10-13T18:56:14.612Z" }, + { url = "https://files.pythonhosted.org/packages/4f/74/bb8a4566af8d0ef3f045d56c462a9115da4f04b07c7fbbf2b4875223eebd/cftime-1.6.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2f1eb43d7a7b919ec99aee709fb62ef87ef1cf0679829ef93d37cc1c725781e9", size = 1591014, upload-time = "2025-10-13T19:39:15.346Z" }, + { url = "https://files.pythonhosted.org/packages/ba/08/52f06ff2f04d376f9cd2c211aefcf2b37f1978e43289341f362fc99f6a0e/cftime-1.6.5-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e02a1d80ffc33fe469c7db68aa24c4a87f01da0c0c621373e5edadc92964900b", size = 1633625, upload-time = "2025-10-13T18:56:15.745Z" }, + { url = "https://files.pythonhosted.org/packages/cf/33/03e0b23d58ea8fab94ecb4f7c5b721e844a0800c13694876149d98830a73/cftime-1.6.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:18ab754805233cdd889614b2b3b86a642f6d51a57a1ec327c48053f3414f87d8", size = 1684269, upload-time = "2025-10-13T18:56:17.04Z" }, + { url = "https://files.pythonhosted.org/packages/a4/60/a0cfba63847b43599ef1cdbbf682e61894994c22b9a79fd9e1e8c7e9de41/cftime-1.6.5-cp313-cp313-win_amd64.whl", hash = "sha256:6c27add8f907f4a4cd400e89438f2ea33e2eb5072541a157a4d013b7dbe93f9c", size = 465364, upload-time = "2025-10-13T18:56:18.05Z" }, + { url = "https://files.pythonhosted.org/packages/ea/6c/a9618f589688358e279720f5c0fe67ef0077fba07334ce26895403ebc260/cftime-1.6.5-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:c69ce3bdae6a322cbb44e9ebc20770d47748002fb9d68846a1e934f1bd5daf0b", size = 502725, upload-time = "2025-10-13T18:56:19.424Z" }, + { url = "https://files.pythonhosted.org/packages/d8/e3/da3c36398bfb730b96248d006cabaceed87e401ff56edafb2a978293e228/cftime-1.6.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e62e9f2943e014c5ef583245bf2e878398af131c97e64f8cd47c1d7baef5c4e2", size = 485445, upload-time = "2025-10-13T18:56:20.853Z" }, + { url = "https://files.pythonhosted.org/packages/32/93/b05939e5abd14bd1ab69538bbe374b4ee2a15467b189ff895e9a8cdaddf6/cftime-1.6.5-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7da5fdaa4360d8cb89b71b8ded9314f2246aa34581e8105c94ad58d6102d9e4f", size = 1584434, upload-time = "2025-10-13T19:39:17.084Z" }, + { url = "https://files.pythonhosted.org/packages/7f/89/648397f9936e0b330999c4e776ebf296ec3c6a65f9901687dbca4ab820da/cftime-1.6.5-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bff865b4ea4304f2744a1ad2b8149b8328b321dd7a2b9746ef926d229bd7cd49", size = 1609812, upload-time = "2025-10-13T18:56:21.971Z" }, + { url = "https://files.pythonhosted.org/packages/e7/0f/901b4835aa67ad3e915605d4e01d0af80a44b114eefab74ae33de6d36933/cftime-1.6.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e552c5d1c8a58f25af7521e49237db7ca52ed2953e974fe9f7c4491e95fdd36c", size = 1669768, upload-time = "2025-10-13T18:56:24.027Z" }, + { url = "https://files.pythonhosted.org/packages/22/d5/e605e4b28363e7a9ae98ed12cabbda5b155b6009270e6a231d8f10182a17/cftime-1.6.5-cp314-cp314-win_amd64.whl", hash = "sha256:e645b095dc50a38ac454b7e7f0742f639e7d7f6b108ad329358544a6ff8c9ba2", size = 463818, upload-time = "2025-10-13T18:56:25.376Z" }, +] + [[package]] name = "chardet" version = "5.2.0" @@ -1912,6 +1941,7 @@ version = "0.0.1" source = { editable = "." } dependencies = [ { name = "aiohttp" }, + { name = "cftime" }, { name = "dask" }, { name = "esgpull" }, { name = "h5netcdf" }, @@ -1933,6 +1963,7 @@ dev = [ [package.metadata] requires-dist = [ { name = "aiohttp" }, + { name = "cftime", specifier = ">=1.6.5" }, { name = "dask" }, { name = "esgpull", specifier = ">=0.9.3" }, { name = "h5netcdf" }, diff --git a/xarray_esgf/client.py b/xarray_esgf/client.py index 5bbbbff..90d7ca5 100644 --- a/xarray_esgf/client.py +++ b/xarray_esgf/client.py @@ -132,11 +132,11 @@ def open_dataset( ): ds = xr.open_dataset( self._client.fs[file].drs if download else file.url, - chunks={}, + chunks=-1, engine="h5netcdf", drop_variables=drop_variables, ) - grouped_objects[file.dataset_id].append(ds) + grouped_objects[file.dataset_id].append(ds.drop_encoding()) combined_datasets = {} for dataset_id, datasets in grouped_objects.items(): @@ -168,4 +168,9 @@ def open_dataset( if isinstance(obj, DataArray): obj = obj.to_dataset() obj.attrs["dataset_ids"] = sorted(grouped_objects) + + for name, var in obj.variables.items(): + if name not in obj.dims: + var.encoding["preferred_chunks"] = dict(var.chunksizes) + return obj From 020db86d355f419d465fcb90e72f49362d0ea3e0 Mon Sep 17 00:00:00 2001 From: malmans2 Date: Wed, 10 Dec 2025 11:45:44 +0100 Subject: [PATCH 2/2] unpin cftime --- pyproject.toml | 2 +- uv.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 43f5924..c3d701d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ classifiers = [ ] dependencies = [ "aiohttp", - "cftime>=1.6.5", + "cftime", "dask", "esgpull>=0.9.3", "h5netcdf", diff --git a/uv.lock b/uv.lock index 592aab0..717e3c6 100644 --- a/uv.lock +++ b/uv.lock @@ -1963,7 +1963,7 @@ dev = [ [package.metadata] requires-dist = [ { name = "aiohttp" }, - { name = "cftime", specifier = ">=1.6.5" }, + { name = "cftime" }, { name = "dask" }, { name = "esgpull", specifier = ">=0.9.3" }, { name = "h5netcdf" },