From 07df9649d070acf05d13b20458f0ea8f5e6bf5f9 Mon Sep 17 00:00:00 2001 From: malmans2 Date: Thu, 4 Dec 2025 11:09:32 +0100 Subject: [PATCH] handle fix variables --- notebooks/quick_start.ipynb | 55 ----------------------------------- tests/test_20_open_dataset.py | 34 +++++++--------------- xarray_esgf/client.py | 25 +++++++++------- 3 files changed, 24 insertions(+), 90 deletions(-) delete mode 100644 notebooks/quick_start.ipynb diff --git a/notebooks/quick_start.ipynb b/notebooks/quick_start.ipynb deleted file mode 100644 index e280bcf..0000000 --- a/notebooks/quick_start.ipynb +++ /dev/null @@ -1,55 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "import xarray as xr\n", - "\n", - "ESGPULL_PATH = None # Set path to download data\n", - "if ESGPULL_PATH is None:\n", - " msg = \"Please set path to download data, otherwise it will use ~/.esgpull\"\n", - " raise ValueError(msg)\n", - "\n", - "ds = xr.open_dataset(\n", - " {\n", - " \"project\": \"CMIP6\",\n", - " \"experiment_id\": \"ssp*\",\n", - " \"source_id\": \"EC-Earth3-CC\",\n", - " \"frequency\": \"mon\",\n", - " \"variable_id\": [\"tas\", \"pr\"],\n", - " \"variant_label\": \"r1i1p1f1\",\n", - " },\n", - " concat_dims=\"experiment_id\",\n", - " esgpull_path=ESGPULL_PATH,\n", - " index_node=\"esgf.ceda.ac.uk\",\n", - " engine=\"esgf\",\n", - ")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.0" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/tests/test_20_open_dataset.py b/tests/test_20_open_dataset.py index e44371d..a8d6652 100644 --- a/tests/test_20_open_dataset.py +++ b/tests/test_20_open_dataset.py @@ -17,6 +17,8 @@ def test_open_dataset(tmp_path: Path, index_node: str, download: bool) -> None: '"pr_Amon_EC-Earth3-CC_ssp245_r1i1p1f1_gr_202001-202012.nc"', '"pr_Amon_EC-Earth3-CC_ssp585_r1i1p1f1_gr_201901-201912.nc"', '"pr_Amon_EC-Earth3-CC_ssp585_r1i1p1f1_gr_202001-202012.nc"', + '"CMIP6.ScenarioMIP.EC-Earth-Consortium.EC-Earth3-CC.ssp245.r1i1p1f1.fx.areacella.gr.v20210113.areacella_fx_EC-Earth3-CC_ssp245_r1i1p1f1_gr.nc"', + '"CMIP6.ScenarioMIP.EC-Earth-Consortium.EC-Earth3-CC.ssp585.r1i1p1f1.fx.areacella.gr.v20210113.areacella_fx_EC-Earth3-CC_ssp585_r1i1p1f1_gr.nc"', ] } ds = xr.open_dataset( @@ -41,6 +43,7 @@ def test_open_dataset(tmp_path: Path, index_node: str, download: bool) -> None: # Coords assert set(ds.coords) == { + "areacella", "experiment_id", "height", "lat", @@ -50,12 +53,11 @@ def test_open_dataset(tmp_path: Path, index_node: str, download: bool) -> None: "time", "time_bnds", } - assert set(ds[["lat_bnds", "lon_bnds", "time_bnds"]].dims) == { - "bnds", - "lat", - "lon", - "time", - } + assert all( + "experiment_id" not in coord.dims + for name, coord in ds.coords.items() + if name != "experiment_id" + ) # Data vars assert set(ds.data_vars) == {"tas", "pr"} @@ -64,24 +66,8 @@ def test_open_dataset(tmp_path: Path, index_node: str, download: bool) -> None: assert ds.dataset_ids == [ "CMIP6.ScenarioMIP.EC-Earth-Consortium.EC-Earth3-CC.ssp245.r1i1p1f1.Amon.pr.gr.v20210113", "CMIP6.ScenarioMIP.EC-Earth-Consortium.EC-Earth3-CC.ssp245.r1i1p1f1.Amon.tas.gr.v20210113", + "CMIP6.ScenarioMIP.EC-Earth-Consortium.EC-Earth3-CC.ssp245.r1i1p1f1.fx.areacella.gr.v20210113", "CMIP6.ScenarioMIP.EC-Earth-Consortium.EC-Earth3-CC.ssp585.r1i1p1f1.Amon.pr.gr.v20210113", "CMIP6.ScenarioMIP.EC-Earth-Consortium.EC-Earth3-CC.ssp585.r1i1p1f1.Amon.tas.gr.v20210113", + "CMIP6.ScenarioMIP.EC-Earth-Consortium.EC-Earth3-CC.ssp585.r1i1p1f1.fx.areacella.gr.v20210113", ] - - -def test_open_dataset_check_dims(tmp_path: Path) -> None: - esgpull_path = tmp_path / "esgpull" - selection = { - "query": [ - '"tos_Amon_EC-Earth3-CC_ssp245_r1i1p1f1_gr_201501-201512.nc"', - '"tos_Omon_EC-Earth3-CC_ssp245_r1i1p1f1_gn_201501-201512.nc"', - ] - } - with pytest.raises(ValueError, match="Dimensions do not match"): - xr.open_dataset( - selection, # type: ignore[arg-type] - esgpull_path=esgpull_path, - engine="esgf", - download=True, - chunks={}, - ) diff --git a/xarray_esgf/client.py b/xarray_esgf/client.py index 40b9c5d..5bbbbff 100644 --- a/xarray_esgf/client.py +++ b/xarray_esgf/client.py @@ -141,22 +141,25 @@ def open_dataset( combined_datasets = {} for dataset_id, datasets in grouped_objects.items(): dataset_id_dict = dataset_id_to_dict(dataset_id) - ds = xr.concat( - datasets, - dim="time", - data_vars="minimal", - coords="minimal", - compat="override", - combine_attrs="drop_conflicts", - ) + if len(datasets) == 1: + (ds,) = datasets + else: + ds = xr.concat( + datasets, + dim="time", + data_vars="minimal", + coords="minimal", + compat="override", + combine_attrs="drop_conflicts", + ) ds = ds.set_coords([ - name for name, da in ds.variables.items() if "bnds" in da.dims + name + for name, da in ds.variables.items() + if "bnds" in da.dims or "time" not in da.dims ]) ds = ds.expand_dims({dim: [dataset_id_dict[dim]] for dim in concat_dims}) combined_datasets[dataset_id] = ds - check_dimensions(combined_datasets) - obj = xr.combine_by_coords( combined_datasets.values(), join="exact",