88
99import json
1010import uuid
11+ from typing import Callable
1112
13+ GEOMETRY_TYPES = ["POINT" , "LINESTRING" , "POLYGON" , "MIXED" ]
1214QUERY_NAME_TRANSLATION = str .maketrans ({x : "" for x in "., -_/" })
1315
1416
@@ -28,7 +30,6 @@ def __dir__(self):
2830 return self .keys ()
2931
3032 def _repr_html_ (self , inside = False ):
31-
3233 children = ""
3334 for key in self .keys ():
3435 if isinstance (self [key ], Dataset ):
@@ -118,6 +119,139 @@ def query_name(self, name: str) -> Dataset:
118119
119120 raise ValueError (f"No matching item found for the query '{ name } '." )
120121
122+ def filter (
123+ self ,
124+ keyword : str | None = None ,
125+ name : str | None = None ,
126+ geometry_type : str | None = None ,
127+ function : Callable [[Dataset ], bool ] = None ,
128+ ) -> Bunch :
129+ """Return a subset of the :class:`Bunch` matching the filter conditions
130+
131+ Each :class:`Dataset` within a :class:`Bunch` is checked against one or
132+ more specified conditions and kept if they are satisfied or removed if at least
133+ one condition is not met.
134+
135+ Parameters
136+ ----------
137+ keyword : str (optional)
138+ Condition returns ``True`` if ``keyword`` string is present in any string
139+ value in a :class:`Dataset` object.
140+ The comparison is not case sensitive.
141+ name : str (optional)
142+ Condition returns ``True`` if ``name`` string is present in
143+ the name attribute of :class:`Dataset` object.
144+ The comparison is not case sensitive.
145+ geometry_type : str (optional)
146+ Condition returns ``True`` if :meth:`Dataset.geometry_type` is
147+ matches the ``geometry_type``.
148+ Possible options are ``["Point", "LineString", "Polygon", "Mixed"]``.
149+ The comparison is not case sensitive.
150+ function : callable (optional)
151+ Custom function taking :class:`Dataset` as an argument and returns
152+ bool. If ``function`` is given, other parameters are ignored.
153+
154+ Returns
155+ -------
156+ filtered : Bunch
157+
158+ Examples
159+ --------
160+ >>> from geodatasets import data
161+
162+ You can filter all Point datasets:
163+
164+ >>> points = data.filter(geometry_type="Point")
165+
166+ Or all datasets with ``chicago`` in the name:
167+
168+ >>> chicago_datasets = data.filter(name="chicago")
169+
170+ You can use keyword search to find all datasets in a CSV format:
171+
172+ >>> csv_datasets = data.filter(keyword="csv")
173+
174+ You can combine multiple conditions to find datasets based with ``chicago`` in
175+ name of Polygon geometry type:
176+
177+ >>> chicago_polygons = data.filter(name="chicago", geometry_type="Polygon")
178+
179+ You can also pass custom function that takes :class:`Dataset` and returns
180+ boolean value. You can then find all datasets with ``nrows`` smaller than
181+ 100:
182+
183+ >>> def small_data(dataset):
184+ ... if hasattr(dataset, "nrows") and dataset.nrows < 100:
185+ ... return True
186+ ... return False
187+ >>> small = data.filter(function=small_data)
188+ """
189+
190+ def _validate (dataset , keyword , name , geometry_type ):
191+ cond = []
192+
193+ if keyword is not None :
194+ keyword_match = False
195+ for v in dataset .values ():
196+ if isinstance (v , str ) and keyword .lower () in v .lower ():
197+ keyword_match = True
198+ break
199+ cond .append (keyword_match )
200+
201+ if name is not None :
202+ name_match = False
203+ if name .lower () in dataset .name .lower ():
204+ name_match = True
205+ cond .append (name_match )
206+
207+ if geometry_type is not None :
208+ geom_type_match = False
209+ if (
210+ dataset .geometry_type .upper ()
211+ == geometry_type .translate (QUERY_NAME_TRANSLATION ).upper ()
212+ ):
213+ geom_type_match = True
214+ cond .append (geom_type_match )
215+
216+ return all (cond )
217+
218+ def _filter_bunch (bunch , keyword , name , geometry_type , function ):
219+ new = Bunch ()
220+ for key , value in bunch .items ():
221+ if isinstance (value , Dataset ):
222+ if function is None :
223+ if _validate (
224+ value ,
225+ keyword = keyword ,
226+ name = name ,
227+ geometry_type = geometry_type ,
228+ ):
229+ new [key ] = value
230+ else :
231+ if function (value ):
232+ new [key ] = value
233+
234+ else :
235+ filtered = _filter_bunch (
236+ value ,
237+ keyword = keyword ,
238+ name = name ,
239+ geometry_type = geometry_type ,
240+ function = function ,
241+ )
242+ if filtered :
243+ new [key ] = filtered
244+
245+ return new
246+
247+ return _filter_bunch (
248+ self ,
249+ keyword = keyword ,
250+ name = name ,
251+ geometry_type = geometry_type ,
252+ function = function ,
253+ )
254+
121255
122256class Dataset (Bunch ):
123257 """
@@ -178,7 +312,6 @@ def _repr_html_(self, inside=False):
178312
179313
180314def _load_json (f ):
181-
182315 data = json .loads (f )
183316
184317 items = Bunch ()
0 commit comments