Skip to content

Commit 11d45e4

Browse files
committed
Fix: drop non-common global attributes and data variables
1 parent b6a615b commit 11d45e4

File tree

1 file changed

+91
-0
lines changed

1 file changed

+91
-0
lines changed

kaleidoscope/reader.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ def _open(self, data_id: str | Path) -> Dataset:
116116
"""This method does not belong to public API."""
117117
kwargs = {}
118118
if isinstance(data_id, str) and "*" in data_id:
119+
pr = Preprocessor()
119120
ds = xr.open_mfdataset(
120121
data_id,
121122
chunks=self._chunks,
@@ -131,7 +132,9 @@ def _open(self, data_id: str | Path) -> Dataset:
131132
backend_kwargs=kwargs,
132133
combine="nested",
133134
concat_dim="i",
135+
preprocess=pr,
134136
)
137+
ds = pr.drop(ds)
135138
else:
136139
ds = xr.open_dataset(
137140
data_id,
@@ -206,3 +209,91 @@ def _concat_characters(self) -> bool:
206209
def _inline_array(self) -> bool:
207210
"""This method does not belong to public API."""
208211
return self._config[_KEY_INLINE_ARRAY] == "true"
212+
213+
214+
class Preprocessor:
215+
"""
216+
A preprocessor to find the names of global attributes and data
217+
variables, which are not common to all datasets.
218+
"""
219+
220+
_all_attrs: list[str]
221+
"""The list of all global attributes."""
222+
_all_vars: list[str]
223+
"""The list of all data variables."""
224+
_drop_attrs: list[str]
225+
"""The list of non-common global attributes to drop."""
226+
_drop_vars: list[str]
227+
"""The list of non-common data variables to drop."""
228+
229+
def __init__(self):
230+
"""Creates a preprocessor instance."""
231+
self._all_vars = []
232+
self._all_attrs = []
233+
self._drop_vars = []
234+
self._drop_attrs = []
235+
236+
def __call__(self, ds: Dataset) -> Dataset:
237+
"""
238+
Returns the dataset supplied as argument unmodified.
239+
240+
When consecutively called for multiple datasets, finds the names
241+
of global attributes and data variables, which are not common to
242+
all datasets.
243+
"""
244+
self._process_attrs(ds)
245+
self._process_vars(ds)
246+
return ds
247+
248+
def _process_attrs(self, ds):
249+
"""This method does not belong to public API."""
250+
if self._all_attrs:
251+
for a in self._all_attrs:
252+
if a not in ds.attrs and a not in self._drop_attrs:
253+
self._drop_attrs.append(a)
254+
for a, _ in ds.attrs.items():
255+
if a not in self._all_attrs:
256+
self._all_attrs.append(a)
257+
if a not in self._drop_attrs:
258+
self._drop_attrs.append(a)
259+
else:
260+
for a, _ in ds.attrs.items():
261+
self._all_attrs.append(a)
262+
263+
def _process_vars(self, ds):
264+
"""This method does not belong to public API."""
265+
if self._all_vars:
266+
for v in self._all_vars:
267+
if v not in ds.data_vars and v not in self._drop_vars:
268+
self._drop_vars.append(v)
269+
for v, _ in ds.data_vars.items():
270+
if v not in self._all_vars:
271+
self._all_vars.append(v)
272+
if v not in self._drop_vars:
273+
self._drop_vars.append(v)
274+
else:
275+
for v, _ in ds.data_vars.items():
276+
self._all_vars.append(v)
277+
278+
def drop(self, ds: Dataset) -> Dataset:
279+
"""
280+
Returns a dataset with all non-common attributes
281+
and data variables dropped.
282+
"""
283+
return self.drop_attrs(self.drop_vars(ds))
284+
285+
def drop_attrs(self, ds: Dataset) -> Dataset:
286+
"""
287+
Returns a dataset with all non-common attributes
288+
dropped.
289+
"""
290+
for attr in self._drop_attrs:
291+
ds.attrs.pop(attr, None)
292+
return ds
293+
294+
def drop_vars(self, ds: Dataset) -> Dataset:
295+
"""
296+
Returns a dataset with all non-common data variables
297+
dropped.
298+
"""
299+
return ds.drop_vars(self._drop_vars) if self._drop_vars else ds

0 commit comments

Comments
 (0)