Skip to content

Commit 503f22a

Browse files
authored
Merge pull request #24 from Climate-REF/add-pmp
Add datasets that are not yet published on obs4MIPs
2 parents 47b0159 + ffd2486 commit 503f22a

14 files changed

Lines changed: 774 additions & 45 deletions

File tree

.github/actions/regenerate/action.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ runs:
55
steps:
66
- uses: ./.github/actions/setup
77
with:
8-
python-version: 3.12
98
cache-esgf: true
109

1110
- name: Verify registry

.github/actions/setup/action.yml

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,11 @@ runs:
1212
- name: Install pixi
1313
uses: prefix-dev/setup-pixi@v0.8.3
1414
with:
15-
pixi-version: "latest"
15+
pixi-version: "v0.40.2"
1616
cache: true
17-
- name: Install the project
18-
shell: bash
19-
run: |
20-
# Only installs if the lock file is up-to-date with the manifest
21-
pixi install --locked
17+
# Frozen is needed as the ref git dependency was not playing nice with a fully locked environment
18+
frozen: true
19+
log-level: "v"
2220
- name: Cache downloaded ESGF data
2321
uses: actions/cache@v4
2422
if: ${{ inputs.cache-esgf == 'true' }}

changelog/24.feature.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add datasets that from pmp which are not yet published on obs4MIPs

pixi.lock

Lines changed: 642 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ xesmf = ">=0.8.7,<0.9"
2929
[tool.pixi.pypi-dependencies]
3030
# Add any dependencies that aren't available on conda-forge here
3131
ref_sample_data = { path = ".", editable = true }
32+
# TODO: Pin a release
33+
# This rev includes the PMP reference data
34+
cmip-ref = { git = "https://github.com/Climate-REF/climate-ref", subdirectory = "packages/ref", rev = "7ea9c966fc44b91e4b0e3d8b31f6f2c3f1445677" }
3235

3336
[tool.pixi.feature.dev.dependencies]
3437
ruff = "*"

registry.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,5 @@ CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/fx/areacella/gn/v20210318/
5151
obs4MIPs/NASA-JPL/AIRS-2-1/ta/gn/v20201110/taNobs_AIRS-2-1_gn_200209-201609.nc 3489895fc6cdd936ae64fa64fa221474e50f6b6bf347458c82d9a61f945f2d9d
5252
obs4MIPs/NASA-JPL/AIRS-2-1/ta/gn/v20201110/taStderr_AIRS-2-1_gn_200209-201609.nc 81e12ba5c6b058ace93737a3b69b317d2beb17e07fd6aa9f709b3e528ebfb4a2
5353
obs4MIPs/NASA-JPL/AIRS-2-1/ta/gn/v20201110/ta_AIRS-2-1_gn_200209-201609.nc a72d7172cd0c9df9eb0199082b196655490e5628fbb6a61ed1e7f8f83c610c0b
54+
obs4REF/obs4MIPs_PCMDI_monthly/MOHC/HadISST-1-1/mon/ts/gn/v20210727/ts_mon_HadISST-1-1_PCMDI_gn_187001-201907.nc 4f9a9270d001fc30488b49cdafe28e77db88e78e981ab580f0fae209f849a2da
55+
obs4REF/obs4MIPs_PCMDI_monthly/NOAA-ESRL-PSD/20CR/mon/psl/gn/v20210727/psl_mon_20CR_PCMDI_gn_187101-201212.nc 357e8915cc2ad30af1dd02cbecfb55f3083c13f54a11912e2f28396ccc84bd9c

scripts/fetch_test_data.py

Lines changed: 3 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2,45 +2,16 @@
22
from pathlib import Path
33
from typing import Annotated
44

5-
import pandas as pd
65
import pooch
76
import typer
87
import xarray as xr
98

10-
from ref_sample_data import CMIP6Request, DataRequest, Obs4MIPsRequest
9+
from ref_sample_data import CMIP6Request, DataRequest, Obs4MIPsRequest, Obs4REFRequest
1110

1211
OUTPUT_PATH = Path("data")
1312
app = typer.Typer()
1413

1514

16-
def deduplicate_datasets(datasets: pd.DataFrame) -> pd.DataFrame:
17-
"""
18-
Deduplicate a dataset collection.
19-
20-
Uses the metadata from the first dataset in each group,
21-
but expands the time range to the min/max timespan of the group.
22-
23-
Parameters
24-
----------
25-
datasets
26-
The dataset collection
27-
28-
Returns
29-
-------
30-
pd.DataFrame
31-
The deduplicated dataset collection spanning the times requested
32-
"""
33-
34-
def _deduplicate_group(group: pd.DataFrame) -> pd.DataFrame:
35-
first = group.iloc[0].copy()
36-
first.time_start = group.time_start.min()
37-
first.time_end = group.time_end.max()
38-
39-
return first
40-
41-
return datasets.groupby("key").apply(_deduplicate_group, include_groups=False).reset_index()
42-
43-
4415
def process_sample_data_request(
4516
request: DataRequest, decimate: bool, output_directory: Path, quiet: bool
4617
) -> None:
@@ -61,7 +32,6 @@ def process_sample_data_request(
6132
Whether to suppress progress messages
6233
"""
6334
datasets = request.fetch_datasets()
64-
datasets = deduplicate_datasets(datasets)
6535

6636
for _, dataset in datasets.iterrows():
6737
for ds_filename in dataset["files"]:
@@ -183,6 +153,8 @@ def process_sample_data_request(
183153
remove_ensembles=False,
184154
time_span=("2002", "2016"),
185155
),
156+
# All unpublished obs4mips datasets
157+
Obs4REFRequest(),
186158
]
187159

188160

src/ref_sample_data/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,6 @@
1010
from .data_request.base import DataRequest
1111
from .data_request.cmip6 import CMIP6Request
1212
from .data_request.obs4mips import Obs4MIPsRequest
13+
from .data_request.obs4ref import Obs4REFRequest
1314

14-
__all__ = ["CMIP6Request", "DataRequest", "Obs4MIPsRequest"]
15+
__all__ = ["CMIP6Request", "DataRequest", "Obs4MIPsRequest", "Obs4REFRequest"]

0 commit comments

Comments
 (0)