diff --git a/examples/!category_filter.ipynb b/examples/!category_filter.ipynb new file mode 100644 index 00000000..b8510b37 --- /dev/null +++ b/examples/!category_filter.ipynb @@ -0,0 +1,183 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "9ffdeba2", + "metadata": {}, + "outputs": [], + "source": [ + "# no intention of actually keeping this notebook in the repo, once it's all working good I'll make up a new doc about the category_filter\n", + "# I have added it for now to demonstrate that I've got the data filter filter_categories functionality working for numeric inputs\n", + "# looking at the deck gl docs: https://deck.gl/docs/api-reference/extensions/data-filter-extension#layer-properties\n", + "# it appears that we should be able to use strings for the categories but when I try to use string data the layer simply doesnt work\n", + "\n", + "import geopandas as gpd\n", + "import ipywidgets\n", + "import pyarrow as pa # noqa\n", + "from shapely.geometry import Point\n", + "\n", + "import lonboard\n", + "from lonboard.basemap import CartoBasemap\n", + "from lonboard.layer_extension import DataFilterExtension\n", + "\n", + "cat_col = \"int_col\"\n", + "# int_col: works\n", + "# float_col: works\n", + "# str_col: does NOT work :(\n", + "# as is it will throw an arro3 ValueError: Expected object with __arrow_c_array__ method or implementing buffer protocol.\n", + "# we can avoid the arro3 exception by using pyarrow as the input to get_filter_category when we create the layer:\n", + "# `get_filter_category=pa.array(gdf[cat_col])`\n", + "# but the layer doesn't display and throws a lot of the following WebGL error:\n", + "# GL_INVALID_OPERATION: Vertex shader input type does not match the type of the bound vertex attribute\n", + "\n", + "\n", + "d = {\n", + " \"int_col\": [0, 1, 2, 3, 4, 5],\n", + " \"float_col\": [0.0, 1.5, 0.0, 1.5, 0.0, 1.5],\n", + " \"str_col\": [\"even\", \"odd\", \"even\", \"odd\", \"even\", \"odd\"],\n", + " \"geometry\": [\n", + " Point(0, 0),\n", + " Point(1, 1),\n", + " Point(2, 2),\n", + " Point(3, 3),\n", + " Point(4, 4),\n", + " Point(5, 5),\n", + " ],\n", + "}\n", + "gdf = gpd.GeoDataFrame(d, crs=\"EPSG:4326\")\n", + "\n", + "point_layer = lonboard.ScatterplotLayer.from_geopandas(\n", + " gdf,\n", + " get_fill_color=(0, 255, 0),\n", + " radius_min_pixels=10,\n", + " extensions=[\n", + " DataFilterExtension(filter_size=0, category_size=1),\n", + " ], # no range filter, just a category\n", + " get_filter_category=gdf[cat_col], # use the cat column for the filter category\n", + ")\n", + "\n", + "m = lonboard.Map(layers=[point_layer], basemap_style=CartoBasemap.DarkMatter)\n", + "\n", + "filter_enabled_w = ipywidgets.Checkbox(\n", + " value=True,\n", + " description=\"Filter Enabled\",\n", + ")\n", + "\n", + "\n", + "def on_filter_enabled_change(change): # noqa\n", + " # when we change the checkbox, toggle filtering on the layer\n", + " point_layer.filter_enabled = filter_enabled_w.value\n", + "\n", + "\n", + "filter_enabled_w.observe(on_filter_enabled_change, names=\"value\")\n", + "\n", + "cat_selector = ipywidgets.SelectMultiple( # make a select multiple so we can see interaction on the map\n", + " options=list(gdf[cat_col].unique()),\n", + " value=[list(gdf[cat_col].unique())[0]],\n", + " description=\"Category\",\n", + " disabled=False,\n", + ")\n", + "\n", + "\n", + "def on_cat_selector_change(change) -> None: # noqa\n", + " # when we change the selector, update the filter on the layer.\n", + " point_layer.filter_categories = cat_selector.value\n", + "\n", + "\n", + "cat_selector.observe(on_cat_selector_change, names=\"value\")\n", + "\n", + "ipywidgets.VBox([m, filter_enabled_w, cat_selector])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ce25544c", + "metadata": {}, + "outputs": [], + "source": [ + "point_layer2 = lonboard.ScatterplotLayer.from_geopandas(\n", + " gdf,\n", + " get_fill_color=(0, 255, 0),\n", + " radius_min_pixels=10,\n", + " extensions=[\n", + " DataFilterExtension(filter_size=1, category_size=0),\n", + " ], # no category filter, just a range\n", + " get_filter_value=gdf[\"int_col\"], # use the int_col for the filter category\n", + ")\n", + "\n", + "m2 = lonboard.Map(layers=[point_layer2], basemap_style=CartoBasemap.DarkMatter)\n", + "point_layer2.filter_range = [0, 5]\n", + "m2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef71f006", + "metadata": {}, + "outputs": [], + "source": [ + "point_layer2.filter_range = [1, 4]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10e27c5d", + "metadata": {}, + "outputs": [], + "source": [ + "point_layer3 = lonboard.ScatterplotLayer.from_geopandas(\n", + " gdf,\n", + " get_fill_color=(0, 255, 0),\n", + " radius_min_pixels=10,\n", + " extensions=[\n", + " DataFilterExtension(filter_size=1, category_size=1),\n", + " ], # no category filter, just a range\n", + " get_filter_category=gdf[\n", + " \"float_col\"\n", + " ], # use the float column for the filter category\n", + " get_filter_value=gdf[\"int_col\"], # use the int column for the filter category\n", + ")\n", + "\n", + "point_layer3.filter_categories = [1.5]\n", + "point_layer3.filter_range = [0, 3]\n", + "m3 = lonboard.Map(layers=[point_layer3], basemap_style=CartoBasemap.DarkMatter)\n", + "m3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3f48195", + "metadata": {}, + "outputs": [], + "source": [ + "point_layer3.filter_range = [0, 5]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "lonboard_category_filter", + "language": "python", + "name": "lonboard_category_filter" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/lonboard/layer_extension.py b/lonboard/layer_extension.py index 306a3c94..8e07fad2 100644 --- a/lonboard/layer_extension.py +++ b/lonboard/layer_extension.py @@ -5,6 +5,7 @@ from lonboard._base import BaseExtension from lonboard.traits import ( DashArrayAccessor, + FilterCategoryAccessor, FilterValueAccessor, FloatAccessor, PointAccessor, @@ -353,10 +354,13 @@ class DataFilterExtension(BaseExtension): "filter_transform_size": t.Bool(default_value=True).tag(sync=True), "filter_transform_color": t.Bool(default_value=True).tag(sync=True), "get_filter_value": FilterValueAccessor(default_value=None, allow_none=True), - "get_filter_category": FilterValueAccessor(default_value=None, allow_none=True), + "get_filter_category": FilterCategoryAccessor( + default_value=None, + allow_none=True, + ), } - filter_size = t.Int(None, min=1, max=4, allow_none=True).tag(sync=True) + filter_size = t.Int(1, min=1, max=4, allow_none=True).tag(sync=True) """The size of the filter (number of columns to filter by). The data filter can show/hide data based on 1-4 numeric properties of each object. @@ -371,7 +375,7 @@ class DataFilterExtension(BaseExtension): The category filter can show/hide data based on 1-4 properties of each object. - Type: `int`. This is required if using category-based filtering. - - Default 0. + - Default None. """ diff --git a/lonboard/traits.py b/lonboard/traits.py index fba25a4d..c7929965 100644 --- a/lonboard/traits.py +++ b/lonboard/traits.py @@ -823,6 +823,178 @@ def validate( return value.rechunk(max_chunksize=obj._rows_per_chunk) +class FilterCategoryAccessor(FixedErrorTraitType): + """Validate input for `get_filter_category`. + + A trait to validate input for the `get_filter_category` accessor added by the + [`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension], which can + have between 1 and 4 values per row. + + + Various input is allowed: + + - An `int` or `float`. This will be used as the value for all objects. The + `category_size` of the + [`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension] instance + must be 1. + - A one-dimensional numpy `ndarray` with a numeric data type. Each value in the array will + be used as the value for the object at the same row index. The `category_size` of + the [`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension] instance + must be 1. + - A two-dimensional numpy `ndarray` with a numeric data type. Each value in the array will + be used as the value for the object at the same row index. The `category_size` of + the [`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension] instance + must match the size of the second dimension of the array. + - A pandas `Series` with a numeric data type. Each value in the array will be used as + the value for the object at the same row index. The `category_size` of the + [`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension] instance + must be 1. + - A pyarrow [`FloatArray`][pyarrow.FloatArray], [`DoubleArray`][pyarrow.DoubleArray] + or [`ChunkedArray`][pyarrow.ChunkedArray] containing either a `FloatArray` or + `DoubleArray`. Each value in the array will be used as the value for the object at + the same row index. The `category_size` of the + [`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension] instance + must be 1. + + Alternatively, you can pass any corresponding Arrow data structure from a library + that implements the [Arrow PyCapsule + Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html). + - A pyarrow [`FixedSizeListArray`][pyarrow.FixedSizeListArray] or + [`ChunkedArray`][pyarrow.ChunkedArray] containing `FixedSizeListArray`s. The `category_size` of + the [`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension] instance + must match the list size. + + Alternatively, you can pass any corresponding Arrow data structure from a library + that implements the [Arrow PyCapsule + Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html). + """ + + default_value = None + info_text = "a value or numpy ndarray or Arrow array representing an array of data" + + def __init__( + self: TraitType, + *args: Any, + **kwargs: Any, + ) -> None: + super().__init__(*args, **kwargs) + self.tag(sync=True, **ACCESSOR_SERIALIZATION) + + def _pandas_to_numpy( + self, + obj: BaseArrowLayer, + value: Any, + category_size: int, + ) -> np.ndarray: + # Assert that category_size == 1 for a pandas series. + # Pandas series can technically contain Python list objects inside them, but + # for simplicity we disallow that. + if category_size != 1: + self.error(obj, value, info="category_size==1 with pandas Series") + + # Cast pandas Series to numpy ndarray + return np.asarray(value) + + def _numpy_to_arrow( + self, + obj: BaseArrowLayer, + value: Any, + category_size: int, + ) -> ChunkedArray: + if len(value.shape) == 1: + if category_size != 1: + self.error(obj, value, info="category_size==1 with 1-D numpy array") + array = fixed_size_list_array(value, category_size) + return ChunkedArray(array) + + if len(value.shape) != 2: + self.error(obj, value, info="1-D or 2-D numpy array") + + if value.shape[1] != category_size: + self.error( + obj, + value, + info=( + f"category_size ({category_size}) to match 2nd dimension of numpy array" + ), + ) + array = fixed_size_list_array(value, category_size) + return ChunkedArray([array]) + + def validate( + self, + obj: BaseArrowLayer, + value: Any, + ) -> str | float | tuple | list | ChunkedArray: + # Find the data filter extension in the attributes of the parent object so we + # can validate against the filter size. + data_filter_extension = [ + ext + for ext in obj.extensions + if ext._extension_type == "data-filter" # type: ignore + ] + assert len(data_filter_extension) == 1 + category_size = data_filter_extension[0].category_size # type: ignore + + if isinstance(value, (int, float, str)): + if category_size != 1: + self.error(obj, value, info="category_size==1 with scalar value") + return value + + if isinstance(value, (tuple, list)): + if category_size != len(value): + self.error( + obj, + value, + info=f"category_size ({category_size}) to match length of tuple/list", + ) + return value + + # pandas Series + if ( + value.__class__.__module__.startswith("pandas") + and value.__class__.__name__ == "Series" + ): + value = self._pandas_to_numpy(obj, value, category_size) + + if isinstance(value, np.ndarray): + value = self._numpy_to_arrow(obj, value, category_size) + elif hasattr(value, "__arrow_c_array__"): + value = ChunkedArray([Array.from_arrow(value)]) + elif hasattr(value, "__arrow_c_stream__"): + value = ChunkedArray.from_arrow(value) + else: + self.error(obj, value) + + assert isinstance(value, ChunkedArray) + + # Allowed inputs are either a FixedSizeListArray or array. + if not DataType.is_fixed_size_list(value.type): + if category_size != 1: + self.error( + obj, + value, + info="category_size==1 with non-FixedSizeList type arrow array", + ) + + return value + + # We have a FixedSizeListArray + if category_size != value.type.list_size: + self.error( + obj, + value, + info=( + f"category_size ({category_size}) to match list size of " + "FixedSizeList arrow array" + ), + ) + + value_type = value.type.value_type + assert value_type is not None + return value.rechunk(max_chunksize=obj._rows_per_chunk) + + class NormalAccessor(FixedErrorTraitType): """A representation of a deck.gl "normal" accessor. diff --git a/src/model/extension.ts b/src/model/extension.ts index 7ae75f7b..d6ffbec0 100644 --- a/src/model/extension.ts +++ b/src/model/extension.ts @@ -147,19 +147,15 @@ export class DataFilterExtension extends BaseExtensionModel { } extensionInstance(): _DataFilterExtension | null { - if (isDefined(this.filterSize)) { - const props = { - ...(isDefined(this.filterSize) ? { filterSize: this.filterSize } : {}), - }; - // console.log("ext props", props); - return new _DataFilterExtension(props); - } else if (isDefined(this.categorySize)) { + if (isDefined(this.filterSize) || isDefined(this.categorySize)) { const props = { + ...(isDefined(this.filterSize) + ? { filterSize: this.filterSize != null ? this.filterSize : 0 } + : {}), ...(isDefined(this.categorySize) - ? { categorySize: this.categorySize } + ? { categorySize: this.categorySize != null ? this.categorySize : 0 } : {}), }; - // console.log("ext props", props); return new _DataFilterExtension(props); } else { return null;