Skip to content

Commit 136911d

Browse files
authored
Preserve time_bnds Variable in Ocean CMORisation Workflow (#144)
* update ocean cmoriser unit test * create supergrid unit test * trim test * add test for time_bnds in unit test * add var and dim check in ocean test * add variables check in ocean cmoriser to ensure time_bnds in the ds and generate corresponding test * pre-commit fix * solve some conflicts * add calculation time_bnds mechanism if there is no time_bnds in raw-data, and add cooresponding tests * pre-commit fix * unit test for ultilities.py * unit test for ultilities.py * fix test_ocean.py after made a slightly change on generate unit in time_bnds * pre-commit fix * add warning for missing bounds * add warning for missing bounds * solve conflict
1 parent 0eb8d0e commit 136911d

File tree

5 files changed

+1712
-6
lines changed

5 files changed

+1712
-6
lines changed

src/access_moppy/ocean.py

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import warnings
12
from pathlib import Path
23
from typing import Any, Dict, List, Optional, Union
34

@@ -7,6 +8,7 @@
78
from access_moppy.base import CMIP6_CMORiser
89
from access_moppy.derivations import custom_functions, evaluate_expression
910
from access_moppy.ocean_supergrid import Supergrid
11+
from access_moppy.utilities import calculate_time_bounds
1012
from access_moppy.vocabulary_processors import CMIP6Vocabulary
1113

1214

@@ -60,10 +62,10 @@ def _get_dim_rename(self):
6062
def select_and_process_variables(self):
6163
"""Select and process variables for the CMOR output."""
6264
input_vars = self.mapping[self.cmor_name]["model_variables"]
63-
time_bnds = ["time_bnds"]
65+
bnds_required = ["time_bnds"]
6466
calc = self.mapping[self.cmor_name]["calculation"]
6567

66-
required_vars = set(input_vars + time_bnds)
68+
required_vars = set(input_vars + bnds_required)
6769
self.load_dataset(required_vars=required_vars)
6870

6971
dim_rename = self._get_dim_rename()
@@ -93,14 +95,33 @@ def select_and_process_variables(self):
9395
)
9496

9597
self.grid_type, self.symmetric = self.infer_grid_type()
96-
# Drop all other data variables except the CMOR variable
97-
self.ds = self.ds[[self.cmor_name, time_bnds[0]]]
98+
99+
# Check and calculate time_bnds if missing
100+
if bnds_required[0] not in self.ds:
101+
# Warn user that bounds are missing and will be calculated automatically
102+
warnings.warn(
103+
f"'{bnds_required[0]}' not found in raw data. Automatically calculating bounds for '{bnds_required[0]}' coordinate.",
104+
UserWarning,
105+
stacklevel=2,
106+
)
107+
try:
108+
calculated_bnds = calculate_time_bounds(
109+
self.ds, time_coord="time", bnds_name="nv"
110+
)
111+
self.ds[bnds_required[0]] = calculated_bnds
112+
except Exception as e:
113+
raise ValueError(
114+
f"time_bnds is required for CMIP6 compliance but was not found "
115+
f"in the dataset and could not be calculated: {e}"
116+
)
117+
118+
self.ds = self.ds[[self.cmor_name, bnds_required[0]]]
98119

99120
# Drop unused coordinates
100121
used_coords = set()
101122
dims = list(self.ds[self.cmor_name].dims)
102-
if time_bnds[0] in self.ds:
103-
dims = list(dict.fromkeys(dims + list(self.ds[time_bnds[0]].dims)))
123+
if bnds_required[0] in self.ds:
124+
dims = list(dict.fromkeys(dims + list(self.ds[bnds_required[0]].dims)))
104125
for dim in dims:
105126
if dim in self.ds.coords:
106127
used_coords.add(dim)

tests/mocks/mock_data.py

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -524,6 +524,175 @@ def create_mock_3d_ocean_dataset(
524524
return ds
525525

526526

527+
def create_mock_om2_dataset(nt=12, ny=300, nx=360):
528+
"""
529+
Create a mock ACCESS-OM2 ocean dataset with B-grid coordinates.
530+
Uses xt_ocean/yt_ocean for T-grid points.
531+
"""
532+
import cftime
533+
534+
xt_ocean = np.linspace(0.5, 359.5, nx)
535+
yt_ocean = np.linspace(-89.5, 89.5, ny)
536+
537+
time = [
538+
cftime.DatetimeProlepticGregorian(1850, month + 1, 15) for month in range(nt)
539+
]
540+
541+
data = np.random.rand(nt, ny, nx).astype(np.float32)
542+
543+
# Time bounds
544+
days_per_month = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
545+
base_days = (1850 - 1) * 365
546+
time_bnds = np.zeros((nt, 2))
547+
cumulative = base_days
548+
for i in range(nt):
549+
time_bnds[i, 0] = cumulative
550+
time_bnds[i, 1] = cumulative + days_per_month[i % 12]
551+
cumulative += days_per_month[i % 12]
552+
553+
ds = xr.Dataset(
554+
data_vars={
555+
"surface_temp": (
556+
["time", "yt_ocean", "xt_ocean"],
557+
data,
558+
{
559+
"long_name": "Conservative temperature",
560+
"units": "K",
561+
"_FillValue": np.float32(-1e20),
562+
"standard_name": "sea_surface_temperature",
563+
},
564+
),
565+
"time_bnds": (["time", "nv"], time_bnds),
566+
},
567+
coords={
568+
"xt_ocean": (
569+
"xt_ocean",
570+
xt_ocean,
571+
{"long_name": "tcell longitude", "units": "degrees_E"},
572+
),
573+
"yt_ocean": (
574+
"yt_ocean",
575+
yt_ocean,
576+
{"long_name": "tcell latitude", "units": "degrees_N"},
577+
),
578+
"time": (
579+
"time",
580+
time,
581+
{
582+
"units": "days since 0001-01-01 00:00:00",
583+
"calendar": "proleptic_gregorian",
584+
"bounds": "time_bnds",
585+
},
586+
),
587+
"nv": ("nv", [1.0, 2.0]),
588+
},
589+
attrs={
590+
"title": "ACCESS-OM2",
591+
"grid_type": "mosaic",
592+
},
593+
)
594+
return ds
595+
596+
597+
def create_mock_om3_dataset(nt=12, ny=300, nx=360):
598+
"""
599+
Create a mock ACCESS-OM3 ocean dataset with C-grid coordinates.
600+
Uses xh/yh for T-grid (tracer) points.
601+
"""
602+
import cftime
603+
604+
xh = np.linspace(0.5, 359.5, nx)
605+
yh = np.linspace(-89.5, 89.5, ny)
606+
607+
time = [
608+
cftime.DatetimeProlepticGregorian(1850, month + 1, 15) for month in range(nt)
609+
]
610+
611+
data = np.random.rand(nt, ny, nx).astype(np.float32)
612+
613+
ds = xr.Dataset(
614+
data_vars={
615+
"tos": (
616+
["time", "yh", "xh"],
617+
data,
618+
{
619+
"long_name": "Sea Surface Temperature",
620+
"units": "degC",
621+
"_FillValue": np.float32(-1e20),
622+
},
623+
),
624+
},
625+
coords={
626+
"xh": (
627+
"xh",
628+
xh,
629+
{"long_name": "h point nominal longitude", "units": "degrees_E"},
630+
),
631+
"yh": (
632+
"yh",
633+
yh,
634+
{"long_name": "h point nominal latitude", "units": "degrees_N"},
635+
),
636+
"time": (
637+
"time",
638+
time,
639+
{
640+
"units": "days since 0001-01-01 00:00:00",
641+
"calendar": "proleptic_gregorian",
642+
},
643+
),
644+
},
645+
attrs={"title": "ACCESS-OM3"},
646+
)
647+
return ds
648+
649+
650+
def create_mock_supergrid_dataset(ny=7, nx=9):
651+
"""
652+
Create a minimal mock supergrid dataset for testing.
653+
654+
The supergrid has dimensions (2*ny+1, 2*nx+1) to represent
655+
both cell centers and corners on a staggered grid.
656+
657+
Parameters
658+
----------
659+
ny : int
660+
Number of tracer cells in y direction
661+
nx : int
662+
Number of tracer cells in x direction
663+
664+
Returns
665+
-------
666+
xr.Dataset
667+
Mock supergrid with x and y coordinates
668+
"""
669+
# Supergrid dimensions
670+
sg_ny = 2 * ny + 1
671+
sg_nx = 2 * nx + 1
672+
673+
# Create simple regular lat/lon grid for testing
674+
# x ranges from 0 to 360, y from -90 to 90
675+
x_1d = np.linspace(0, 360, sg_nx)
676+
y_1d = np.linspace(-90, 90, sg_ny)
677+
678+
x, y = np.meshgrid(x_1d, y_1d)
679+
680+
ds = xr.Dataset(
681+
{
682+
"x": (["nyp", "nxp"], x),
683+
"y": (["nyp", "nxp"], y),
684+
},
685+
coords={
686+
"nyp": np.arange(sg_ny),
687+
"nxp": np.arange(sg_nx),
688+
},
689+
attrs={
690+
"title": "Mock Supergrid for Testing",
691+
},
692+
)
693+
return ds
694+
695+
527696
def create_chunked_dataset(chunks=None, **kwargs):
528697
"""Create a chunked dataset for testing dask operations."""
529698
if chunks is None:

0 commit comments

Comments
 (0)