Skip to content

Commit a2c2aa6

Browse files
Use int metadata instead of str for certain keys for lazy loaded fdb (#858)
* Use int metadata instead of str for certain keys for lazy loaded fdb
1 parent 36f2372 commit a2c2aa6

File tree

3 files changed

+148
-2
lines changed

3 files changed

+148
-2
lines changed

docs/release_notes/version_0.18_updates.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,17 @@
11
Version 0.18 Updates
22
/////////////////////////
33

4+
5+
6+
Version 0.18.3
7+
===============
8+
9+
Fixes
10+
++++++++
11+
12+
- Fixed issue when using the :ref:`data-sources-fdb` source with the ``lazy=True`` option resulted in string values instead of int for the following metadata keys: "level", "levelist", "date", "time", "step" (:pr:`858`).
13+
14+
415
Version 0.18.2
516
===============
617

src/earthkit/data/sources/fdb.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,14 @@ def get(self, request):
116116

117117

118118
class FDBRequestMapper(RequestMapper):
119+
_CONVERT_MAP = {
120+
"date": int,
121+
"time": int,
122+
"step": int,
123+
"levelist": int,
124+
"level": int,
125+
}
126+
119127
def __init__(self, request, fdb_kwargs=None, **kwargs):
120128
super().__init__(request, **kwargs)
121129
self.fdb_kwargs = fdb_kwargs or {}
@@ -131,8 +139,18 @@ def _build(self):
131139
r = []
132140
fdb = pyfdb.FDB(**self.fdb_kwargs)
133141
for el in fdb.list(self.request, True, True):
134-
r.append(el["keys"])
142+
data = el["keys"]
143+
r.append(self._convert(data))
135144
return r
136145

146+
@staticmethod
147+
def _convert(data):
148+
for k in data:
149+
c = FDBRequestMapper._CONVERT_MAP.get(k, None)
150+
if c:
151+
data[k] = c(data[k])
152+
153+
return data
154+
137155

138156
source = FDBSource

tests/lazy/test_lazy_fdb.py

Lines changed: 118 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,17 @@ def make_fdb(path):
5555
return ds, config
5656

5757

58+
def compare_coord(ds, coord_name, ref_values):
59+
assert coord_name in ds.coords
60+
for v, v_ref in zip(ds.coords[coord_name].values, ref_values):
61+
assert v == v_ref, f"Coordinate '{coord_name}' value {v} != {v_ref}"
62+
63+
5864
@pytest.mark.skipif(NO_FDB, reason="No access to FDB")
5965
@pytest.mark.cache
6066
def test_lazy_fdb():
6167
with temp_directory() as tmpdir:
62-
ds, config = make_fdb(os.path.join(tmpdir, "_fdb"))
68+
ds_in, config = make_fdb(os.path.join(tmpdir, "_fdb"))
6369

6470
ds = from_source("fdb", TEST_GRIB_REQUEST, config=config, stream=False, lazy=True)
6571
assert len(ds) == 32
@@ -74,16 +80,127 @@ def test_lazy_fdb():
7480
"relative_humidity",
7581
]
7682

83+
assert ds[0].metadata(["date", "time", "step", "levelist", "level"]) == [20240603, 0, 0, 500, 500]
84+
assert ds[1].metadata(["date", "time", "step", "levelist", "level"]) == [20240603, 0, 0, 500, 500]
85+
7786
assert ds[0].metadata("step_timedelta") == datetime.timedelta(hours=0)
7887
assert ds[4].metadata("step_timedelta") == datetime.timedelta(hours=6)
7988

89+
# compare all the fields
90+
ds_in_sorted = ds_in.order_by(["shortName", "date", "time", "step", "levelist"])
91+
ds_sorted = ds.order_by(["shortName", "date", "time", "step", "levelist"])
92+
t = ds_in_sorted.sel(shortName="t")
93+
r = ds_in_sorted.sel(shortName="r")
94+
t_fdb = ds_sorted.sel(shortName="t")
95+
r_fdb = ds_sorted.sel(shortName="r")
96+
97+
assert len(t) == 16
98+
assert len(r) == 16
99+
assert len(t_fdb) == 16
100+
assert len(r_fdb) == 16
101+
102+
assert t.metadata(["shortName", "date", "time", "step", "levelist"]) == t_fdb.metadata(
103+
["shortName", "date", "time", "step", "levelist"]
104+
)
105+
assert r.metadata(["shortName", "date", "time", "step", "levelist"]) == r_fdb.metadata(
106+
["shortName", "date", "time", "step", "levelist"]
107+
)
108+
109+
assert np.allclose(t.to_numpy(), t_fdb.to_numpy().reshape(16, 19, 36))
110+
assert np.allclose(r.to_numpy(), r_fdb.to_numpy().reshape(16, 19, 36))
111+
80112
assert not hasattr(ds, "path")
81113
assert not hasattr(ds[0], "path")
82114

115+
# --------------------
116+
# Xarray tests
117+
# --------------------
118+
83119
a = ds.to_xarray(time_dim_mode="forecast")
120+
121+
# TODO: use methods from xr_engine tests for comparison
122+
compare_coord(
123+
a,
124+
"forecast_reference_time",
125+
[
126+
np.datetime64("2024-06-03T00:00:00.000000000"),
127+
np.datetime64("2024-06-03T12:00:00.000000000"),
128+
np.datetime64("2024-06-04T00:00:00.000000000"),
129+
np.datetime64("2024-06-04T12:00:00.000000000"),
130+
],
131+
)
132+
compare_coord(a, "step", [np.timedelta64(0, "h"), np.timedelta64(6, "h")])
133+
compare_coord(a, "level", [500, 700])
134+
84135
assert a["t"].values.shape == (4, 2, 2, 19, 36)
85136
assert a["r"].values.shape == (4, 2, 2, 19, 36)
86137

138+
# Compare a few fields manually
139+
ref = [
140+
{
141+
"xr": (
142+
"r",
143+
{
144+
"xr_forecast_reference_time": 0,
145+
"xr_step": 0,
146+
"level": 0,
147+
},
148+
{
149+
"forecast_reference_time": np.datetime64("2024-06-03T00:00:00.000000000"),
150+
"step": np.timedelta64(0, "h"),
151+
"level": 500,
152+
},
153+
),
154+
"grib": (3, {"date": 20240603, "time": 0, "step": 0, "level": 500, "shortName": "r"}),
155+
},
156+
{
157+
"xr": (
158+
"r",
159+
{
160+
"xr_forecast_reference_time": 1,
161+
"xr_step": 0,
162+
"level": 1,
163+
},
164+
{
165+
"forecast_reference_time": np.datetime64("2024-06-03T12:00:00.000000000"),
166+
"step": np.timedelta64(0, "h"),
167+
"level": 700,
168+
},
169+
),
170+
"grib": (9, {"date": 20240603, "time": 1200, "step": 0, "level": 700, "shortName": "r"}),
171+
},
172+
]
173+
174+
for r in ref:
175+
xr_var, xr_sel, xr_meta = r["xr"]
176+
grib_idx, grib_meta = r["grib"]
177+
178+
f = a[xr_var].isel(
179+
forecast_reference_time=xr_sel["xr_forecast_reference_time"],
180+
step=xr_sel["xr_step"],
181+
level=xr_sel["level"],
182+
)
183+
for k, v in xr_meta.items():
184+
assert f[k].values == v, f"{k} {f[k].values} != {v}"
185+
186+
g = ds_in[grib_idx]
187+
for k, v in grib_meta.items():
188+
assert g.metadata(k) == v, f"{k} {g.metadata(k)} != {v}"
189+
190+
assert np.allclose(f.values, g.to_numpy())
191+
192+
# compare all the field values
193+
ds_in_sorted = ds_in.order_by(["shortName", "date", "time", "step", "levelist"])
194+
t = ds_in_sorted.sel(shortName="t")
195+
r = ds_in_sorted.sel(shortName="r")
196+
197+
assert len(t) == 16
198+
assert len(r) == 16
199+
200+
assert np.allclose(t.to_numpy(), a["t"].values.reshape(16, 19, 36))
201+
assert np.allclose(r.to_numpy(), a["r"].values.reshape(16, 19, 36))
202+
203+
# test aggregation
87204
m = a.mean("step").load()
88205
assert m["t"].values.shape == (4, 2, 19, 36)
89206
assert m["r"].values.shape == (4, 2, 19, 36)

0 commit comments

Comments
 (0)