Skip to content
Draft
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 141 additions & 0 deletions examples/!category_filter.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "9ffdeba2",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f45976dba1f541268fa76936193821c3",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"VBox(children=(Map(basemap_style=<CartoBasemap.DarkMatter: 'https://basemaps.cartocdn.com/gl/dark-matter-gl-st…"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# no intention of actually keeping this notebook in the repo, once it's all working good I'll make up a new doc about the category_filter\n",
"# I have added it for now to demonstrate that I've got the data filter filter_categories functionality working for numeric inputs\n",
"# looking at the deck gl docs: https://deck.gl/docs/api-reference/extensions/data-filter-extension#layer-properties\n",
"# it appears that we should be able to use strings for the categories but when I try to use string data the layer simply doesnt work\n",
"\n",
"import geopandas as gpd\n",
"import ipywidgets\n",
"import pyarrow as pa # noqa\n",
"from shapely.geometry import Point\n",
"\n",
"import lonboard\n",
"from lonboard.basemap import CartoBasemap\n",
"from lonboard.layer_extension import DataFilterExtension\n",
"\n",
"cat_col = \"int_col\"\n",
"# int_col: works\n",
"# float_col: works\n",
"# str_col: does NOT work :(\n",
"# as is it will throw an arro3 ValueError: Expected object with __arrow_c_array__ method or implementing buffer protocol.\n",
"# we can avoid the arro3 exception by using pyarrow as the input to get_filter_category when we create the layer:\n",
"# `get_filter_category=pa.array(gdf[cat_col])`\n",
"# but the layer doesn't display and throws a lot of the following WebGL error:\n",
"# GL_INVALID_OPERATION: Vertex shader input type does not match the type of the bound vertex attribute\n",
"\n",
"\n",
"d = {\n",
" \"int_col\": [0, 1, 2, 3, 4, 5],\n",
" \"float_col\": [0.0, 1.5, 0.0, 1.5, 0.0, 1.5],\n",
" \"str_col\": [\"even\", \"odd\", \"even\", \"odd\", \"even\", \"odd\"],\n",
" \"geometry\": [\n",
" Point(0, 0),\n",
" Point(1, 1),\n",
" Point(2, 2),\n",
" Point(3, 3),\n",
" Point(4, 4),\n",
" Point(5, 5),\n",
" ],\n",
"}\n",
"gdf = gpd.GeoDataFrame(d, crs=\"EPSG:4326\")\n",
"\n",
"point_layer = lonboard.ScatterplotLayer.from_geopandas(\n",
" gdf,\n",
" get_fill_color=(0, 255, 0),\n",
" radius_min_pixels=10,\n",
" extensions=[\n",
" DataFilterExtension(filter_size=0, category_size=1),\n",
" ], # no range filter, just a category\n",
" get_filter_category=gdf[cat_col], # use the cat column for the filter category\n",
")\n",
"\n",
"m = lonboard.Map(layers=[point_layer], basemap_style=CartoBasemap.DarkMatter)\n",
"\n",
"filter_enabled_w = ipywidgets.Checkbox(\n",
" value=True,\n",
" description=\"Filter Enabled\",\n",
")\n",
"\n",
"\n",
"def on_filter_enabled_change(change): # noqa\n",
" # when we change the checkbox, toggle filtering on the layer\n",
" point_layer.filter_enabled = filter_enabled_w.value\n",
"\n",
"\n",
"filter_enabled_w.observe(on_filter_enabled_change, names=\"value\")\n",
"\n",
"cat_selector = ipywidgets.SelectMultiple( # make a select multiple so we can see interaction on the map\n",
" options=list(gdf[cat_col].unique()),\n",
" value=[list(gdf[cat_col].unique())[0]],\n",
" description=\"Category\",\n",
" disabled=False,\n",
")\n",
"\n",
"\n",
"def on_cat_selector_change(change) -> None: # noqa\n",
" # when we change the selector, update the filter on the layer.\n",
" point_layer.filter_categories = cat_selector.value\n",
"\n",
"\n",
"cat_selector.observe(on_cat_selector_change, names=\"value\")\n",
"\n",
"ipywidgets.VBox([m, filter_enabled_w, cat_selector])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ce25544c",
"metadata": {},
"outputs": [],
"source": [
"gdf[\"number\"].values[0].item()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "lonboard_category_filter",
"language": "python",
"name": "lonboard_category_filter"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
10 changes: 7 additions & 3 deletions lonboard/layer_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from lonboard._base import BaseExtension
from lonboard.traits import (
DashArrayAccessor,
FilterCategoryAccessor,
FilterValueAccessor,
FloatAccessor,
PointAccessor,
Expand Down Expand Up @@ -353,10 +354,13 @@ class DataFilterExtension(BaseExtension):
"filter_transform_size": t.Bool(default_value=True).tag(sync=True),
"filter_transform_color": t.Bool(default_value=True).tag(sync=True),
"get_filter_value": FilterValueAccessor(default_value=None, allow_none=True),
"get_filter_category": FilterValueAccessor(default_value=None, allow_none=True),
"get_filter_category": FilterCategoryAccessor(
default_value=None,
allow_none=True,
),
}

filter_size = t.Int(None, min=1, max=4, allow_none=True).tag(sync=True)
filter_size = t.Int(None, min=0, max=4, allow_none=True).tag(sync=True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The JS docs say

Set to 0 to disable numeric filtering

I think instead of exposing that same behavior to Python, we should override None to mean "disabled". So when the Python side passes in None it translates to passing 0 into JS.

Similarly, we should change the default value here to 1 to match JS, now that None doesn't mean "undefined" but rather "defined and null"

"""The size of the filter (number of columns to filter by).

The data filter can show/hide data based on 1-4 numeric properties of each object.
Expand All @@ -365,7 +369,7 @@ class DataFilterExtension(BaseExtension):
- Default 1.
"""

category_size = t.Int(None, min=1, max=4, allow_none=True).tag(sync=True)
category_size = t.Int(None, min=0, max=4, allow_none=True).tag(sync=True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto for the same behavior as above, though the default here can be None

"""The size of the category filter (number of columns to filter by).

The category filter can show/hide data based on 1-4 properties of each object.
Expand Down
172 changes: 172 additions & 0 deletions lonboard/traits.py
Original file line number Diff line number Diff line change
Expand Up @@ -823,6 +823,178 @@ def validate(
return value.rechunk(max_chunksize=obj._rows_per_chunk)


class FilterCategoryAccessor(FixedErrorTraitType):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add some tests for this? There are some example tests in test_traits.py and you can look at #917 for more examples.

It might be worth making test_traits into a folder and having a file specifically for test_traits/test_filter_extension.py

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ran out of time tonight, but I'll see what I can do in the next couple evenings, or maybe this weekend

"""Validate input for `get_filter_category`.

A trait to validate input for the `get_filter_category` accessor added by the
[`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension], which can
have between 1 and 4 values per row.


Various input is allowed:

- An `int` or `float`. This will be used as the value for all objects. The
`category_size` of the
[`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension] instance
must be 1.
- A one-dimensional numpy `ndarray` with a numeric data type. Each value in the array will
be used as the value for the object at the same row index. The `category_size` of
the [`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension] instance
must be 1.
- A two-dimensional numpy `ndarray` with a numeric data type. Each value in the array will
be used as the value for the object at the same row index. The `category_size` of
the [`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension] instance
must match the size of the second dimension of the array.
- A pandas `Series` with a numeric data type. Each value in the array will be used as
the value for the object at the same row index. The `category_size` of the
[`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension] instance
must be 1.
- A pyarrow [`FloatArray`][pyarrow.FloatArray], [`DoubleArray`][pyarrow.DoubleArray]
or [`ChunkedArray`][pyarrow.ChunkedArray] containing either a `FloatArray` or
`DoubleArray`. Each value in the array will be used as the value for the object at
the same row index. The `category_size` of the
[`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension] instance
must be 1.

Alternatively, you can pass any corresponding Arrow data structure from a library
that implements the [Arrow PyCapsule
Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html).
- A pyarrow [`FixedSizeListArray`][pyarrow.FixedSizeListArray] or
[`ChunkedArray`][pyarrow.ChunkedArray] containing `FixedSizeListArray`s. The `category_size` of
the [`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension] instance
must match the list size.

Alternatively, you can pass any corresponding Arrow data structure from a library
that implements the [Arrow PyCapsule
Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html).
"""

default_value = None
info_text = "a value or numpy ndarray or Arrow array representing an array of data"

def __init__(
self: TraitType,
*args: Any,
**kwargs: Any,
) -> None:
super().__init__(*args, **kwargs)
self.tag(sync=True, **ACCESSOR_SERIALIZATION)

def _pandas_to_numpy(
self,
obj: BaseArrowLayer,
value: Any,
category_size: int,
) -> np.ndarray:
# Assert that category_size == 1 for a pandas series.
# Pandas series can technically contain Python list objects inside them, but
# for simplicity we disallow that.
if category_size != 1:
self.error(obj, value, info="category_size==1 with pandas Series")

# Cast pandas Series to numpy ndarray
return np.asarray(value)

def _numpy_to_arrow(
self,
obj: BaseArrowLayer,
value: Any,
category_size: int,
) -> ChunkedArray:
if len(value.shape) == 1:
if category_size != 1:
self.error(obj, value, info="category_size==1 with 1-D numpy array")
array = fixed_size_list_array(value, category_size)
return ChunkedArray(array)

if len(value.shape) != 2:
self.error(obj, value, info="1-D or 2-D numpy array")

if value.shape[1] != category_size:
self.error(
obj,
value,
info=(
f"category_size ({category_size}) to match 2nd dimension of numpy array"
),
)
array = fixed_size_list_array(value, category_size)
return ChunkedArray([array])

def validate(
self,
obj: BaseArrowLayer,
value: Any,
) -> str | float | tuple | list | ChunkedArray:
# Find the data filter extension in the attributes of the parent object so we
# can validate against the filter size.
data_filter_extension = [
ext
for ext in obj.extensions
if ext._extension_type == "data-filter" # type: ignore
]
assert len(data_filter_extension) == 1
category_size = data_filter_extension[0].category_size # type: ignore

if isinstance(value, (int, float, str)):
if category_size != 1:
self.error(obj, value, info="category_size==1 with scalar value")
return value

if isinstance(value, (tuple, list)):
if category_size != len(value):
self.error(
obj,
value,
info=f"category_size ({category_size}) to match length of tuple/list",
)
return value

# pandas Series
if (
value.__class__.__module__.startswith("pandas")
and value.__class__.__name__ == "Series"
):
value = self._pandas_to_numpy(obj, value, category_size)

if isinstance(value, np.ndarray):
value = self._numpy_to_arrow(obj, value, category_size)
elif hasattr(value, "__arrow_c_array__"):
value = ChunkedArray([Array.from_arrow(value)])
elif hasattr(value, "__arrow_c_stream__"):
value = ChunkedArray.from_arrow(value)
else:
self.error(obj, value)

assert isinstance(value, ChunkedArray)

# Allowed inputs are either a FixedSizeListArray or array.
if not DataType.is_fixed_size_list(value.type):
if category_size != 1:
self.error(
obj,
value,
info="category_size==1 with non-FixedSizeList type arrow array",
)

return value

# We have a FixedSizeListArray
if category_size != value.type.list_size:
self.error(
obj,
value,
info=(
f"category_size ({category_size}) to match list size of "
"FixedSizeList arrow array"
),
)

value_type = value.type.value_type
assert value_type is not None
return value.rechunk(max_chunksize=obj._rows_per_chunk)


class NormalAccessor(FixedErrorTraitType):
"""A representation of a deck.gl "normal" accessor.

Expand Down
12 changes: 9 additions & 3 deletions src/model/extension.ts
Original file line number Diff line number Diff line change
Expand Up @@ -146,19 +146,25 @@ export class DataFilterExtension extends BaseExtensionModel {
}

extensionInstance(): _DataFilterExtension | null {
if (isDefined(this.filterSize)) {
if (isDefined(this.filterSize) && isDefined(this.categorySize)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we avoid having this extra case? Can we leave it to the user to define their filter sizes and category sizes correctly? I.e. why would this case need to be different from the case below it?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, whoops! If I change that && to an || then we won't need either of the the other else if blocks at all since in the creation of props it's checking isDefined too

const props = {
...(isDefined(this.filterSize) ? { filterSize: this.filterSize } : {}),
...(isDefined(this.categorySize)
? { categorySize: this.categorySize }
: {}),
};
return new _DataFilterExtension(props);
} else if (isDefined(this.filterSize)) {
const props = {
...(isDefined(this.filterSize) ? { filterSize: this.filterSize } : {}),
};
// console.log("ext props", props);
return new _DataFilterExtension(props);
} else if (isDefined(this.categorySize)) {
const props = {
...(isDefined(this.categorySize)
? { categorySize: this.categorySize }
: {}),
};
// console.log("ext props", props);
return new _DataFilterExtension(props);
} else {
return null;
Expand Down