Skip to content

Commit b44dce5

Browse files
perf(gribjump): fix two performance bottlenecks in the gribjump source
Avoid recomputing per-field data by caching/precomputing: - Cache reference lat/lon when fetch_coords_from_fdb=True to avoid re-reading the reference field's geography per retrieved field. - Pre-convert index lists to ranges once to avoid repeated calls to ExtractionRequest.from_indices.
1 parent 4acd81b commit b44dce5

File tree

2 files changed

+19
-6
lines changed

2 files changed

+19
-6
lines changed

src/earthkit/data/sources/gribjump.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,12 @@ def from_mars_requests(
242242
ranges = mask_to_ranges(mask)
243243
mask = None
244244

245+
if indices is not None:
246+
# We do the same small optimization for indices. Optimally, we
247+
# would do similar optimizations in pygribjump and remove this.
248+
ranges = [(i, i + 1) for i in indices]
249+
indices = None
250+
245251
extraction_requests = [build_extraction_request(req, ranges, mask, indices) for req in mars_requests]
246252
return cls(extraction_requests)
247253

@@ -286,7 +292,11 @@ def __init__(
286292
# These attributes are set lazily after loading the data.
287293
self._loaded = False
288294
self._grid_indices = None
295+
296+
# Cached reference metadata for coordinates
289297
self._reference_metadata: Optional[GribMetadata] = None
298+
self._latitudes: Optional[np.ndarray] = None
299+
self._longitudes: Optional[np.ndarray] = None
290300

291301
super().__init__(fields=None)
292302

@@ -334,10 +344,10 @@ def _load(self):
334344

335345
def _load_reference_metadata(self):
336346
"""Loads the reference metadata from the FDB retriever if available."""
337-
if self._fdb_retriever is None:
338-
return None
339347
if self._reference_metadata is not None:
340348
return self._reference_metadata
349+
if self._fdb_retriever is None:
350+
return None
341351

342352
fields = self._fdb_retriever.get(self._requests[0].request)
343353
metadatas = fields.metadata()
@@ -355,9 +365,13 @@ def _enrich_metadata_with_coordinates(self, indices: np.ndarray, metadata: UserM
355365
if (reference_metadata := self._load_reference_metadata()) is None:
356366
return metadata
357367

358-
reference_geography = reference_metadata.geography
359-
grid_latitudes = reference_geography.latitudes()[indices]
360-
grid_longitudes = reference_geography.longitudes()[indices]
368+
if self._latitudes is None or self._longitudes is None:
369+
self._latitudes = reference_metadata.geography.latitudes()
370+
self._longitudes = reference_metadata.geography.longitudes()
371+
372+
grid_latitudes = self._latitudes[indices]
373+
grid_longitudes = self._longitudes[indices]
374+
361375
metadata = metadata.override(
362376
{
363377
"latitudes": grid_latitudes,

tests/sources/test_gribjump.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,6 @@ def ds_expected_with_coords():
192192
"levelist": "1000",
193193
"levtype": "pl",
194194
"stream": "oper",
195-
"param": "129",
196195
"time": "1200",
197196
"type": "fc",
198197
"Conventions": "CF-1.8",

0 commit comments

Comments
 (0)