@@ -116,6 +116,7 @@ def _open(self, data_id: str | Path) -> Dataset:
116116 """This method does not belong to public API."""
117117 kwargs = {}
118118 if isinstance (data_id , str ) and "*" in data_id :
119+ pr = Preprocessor ()
119120 ds = xr .open_mfdataset (
120121 data_id ,
121122 chunks = self ._chunks ,
@@ -131,7 +132,9 @@ def _open(self, data_id: str | Path) -> Dataset:
131132 backend_kwargs = kwargs ,
132133 combine = "nested" ,
133134 concat_dim = "i" ,
135+ preprocess = pr ,
134136 )
137+ ds = pr .drop (ds )
135138 else :
136139 ds = xr .open_dataset (
137140 data_id ,
@@ -206,3 +209,91 @@ def _concat_characters(self) -> bool:
206209 def _inline_array (self ) -> bool :
207210 """This method does not belong to public API."""
208211 return self ._config [_KEY_INLINE_ARRAY ] == "true"
212+
213+
214+ class Preprocessor :
215+ """
216+ A preprocessor to find the names of global attributes and data
217+ variables, which are not common to all datasets.
218+ """
219+
220+ _all_attrs : list [str ]
221+ """The list of all global attributes."""
222+ _all_vars : list [str ]
223+ """The list of all data variables."""
224+ _drop_attrs : list [str ]
225+ """The list of non-common global attributes to drop."""
226+ _drop_vars : list [str ]
227+ """The list of non-common data variables to drop."""
228+
229+ def __init__ (self ):
230+ """Creates a preprocessor instance."""
231+ self ._all_vars = []
232+ self ._all_attrs = []
233+ self ._drop_vars = []
234+ self ._drop_attrs = []
235+
236+ def __call__ (self , ds : Dataset ) -> Dataset :
237+ """
238+ Returns the dataset supplied as argument unmodified.
239+
240+ When consecutively called for multiple datasets, finds the names
241+ of global attributes and data variables, which are not common to
242+ all datasets.
243+ """
244+ self ._process_attrs (ds )
245+ self ._process_vars (ds )
246+ return ds
247+
248+ def _process_attrs (self , ds ):
249+ """This method does not belong to public API."""
250+ if self ._all_attrs :
251+ for a in self ._all_attrs :
252+ if a not in ds .attrs and a not in self ._drop_attrs :
253+ self ._drop_attrs .append (a )
254+ for a , _ in ds .attrs .items ():
255+ if a not in self ._all_attrs :
256+ self ._all_attrs .append (a )
257+ if a not in self ._drop_attrs :
258+ self ._drop_attrs .append (a )
259+ else :
260+ for a , _ in ds .attrs .items ():
261+ self ._all_attrs .append (a )
262+
263+ def _process_vars (self , ds ):
264+ """This method does not belong to public API."""
265+ if self ._all_vars :
266+ for v in self ._all_vars :
267+ if v not in ds .data_vars and v not in self ._drop_vars :
268+ self ._drop_vars .append (v )
269+ for v , _ in ds .data_vars .items ():
270+ if v not in self ._all_vars :
271+ self ._all_vars .append (v )
272+ if v not in self ._drop_vars :
273+ self ._drop_vars .append (v )
274+ else :
275+ for v , _ in ds .data_vars .items ():
276+ self ._all_vars .append (v )
277+
278+ def drop (self , ds : Dataset ) -> Dataset :
279+ """
280+ Returns a dataset with all non-common attributes
281+ and data variables dropped.
282+ """
283+ return self .drop_attrs (self .drop_vars (ds ))
284+
285+ def drop_attrs (self , ds : Dataset ) -> Dataset :
286+ """
287+ Returns a dataset with all non-common attributes
288+ dropped.
289+ """
290+ for attr in self ._drop_attrs :
291+ ds .attrs .pop (attr , None )
292+ return ds
293+
294+ def drop_vars (self , ds : Dataset ) -> Dataset :
295+ """
296+ Returns a dataset with all non-common data variables
297+ dropped.
298+ """
299+ return ds .drop_vars (self ._drop_vars ) if self ._drop_vars else ds
0 commit comments