-
Notifications
You must be signed in to change notification settings - Fork 39
fix: DataFilterExtension get_filter_category #884
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 7 commits
d37c8ce
53abc41
8ff247e
5c7f72e
a1f519b
e5281be
732fcf2
2403779
e4625c2
647a111
b0086ad
daf0a51
fa6d94b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "9ffdeba2", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"application/vnd.jupyter.widget-view+json": { | ||
"model_id": "f45976dba1f541268fa76936193821c3", | ||
"version_major": 2, | ||
"version_minor": 0 | ||
}, | ||
"text/plain": [ | ||
"VBox(children=(Map(basemap_style=<CartoBasemap.DarkMatter: 'https://basemaps.cartocdn.com/gl/dark-matter-gl-st…" | ||
] | ||
}, | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"# no intention of actually keeping this notebook in the repo, once it's all working good I'll make up a new doc about the category_filter\n", | ||
"# I have added it for now to demonstrate that I've got the data filter filter_categories functionality working for numeric inputs\n", | ||
"# looking at the deck gl docs: https://deck.gl/docs/api-reference/extensions/data-filter-extension#layer-properties\n", | ||
"# it appears that we should be able to use strings for the categories but when I try to use string data the layer simply doesnt work\n", | ||
"\n", | ||
"import geopandas as gpd\n", | ||
"import ipywidgets\n", | ||
"import pyarrow as pa # noqa\n", | ||
"from shapely.geometry import Point\n", | ||
"\n", | ||
"import lonboard\n", | ||
"from lonboard.basemap import CartoBasemap\n", | ||
"from lonboard.layer_extension import DataFilterExtension\n", | ||
"\n", | ||
"cat_col = \"int_col\"\n", | ||
"# int_col: works\n", | ||
"# float_col: works\n", | ||
"# str_col: does NOT work :(\n", | ||
"# as is it will throw an arro3 ValueError: Expected object with __arrow_c_array__ method or implementing buffer protocol.\n", | ||
"# we can avoid the arro3 exception by using pyarrow as the input to get_filter_category when we create the layer:\n", | ||
"# `get_filter_category=pa.array(gdf[cat_col])`\n", | ||
"# but the layer doesn't display and throws a lot of the following WebGL error:\n", | ||
"# GL_INVALID_OPERATION: Vertex shader input type does not match the type of the bound vertex attribute\n", | ||
"\n", | ||
"\n", | ||
"d = {\n", | ||
" \"int_col\": [0, 1, 2, 3, 4, 5],\n", | ||
" \"float_col\": [0.0, 1.5, 0.0, 1.5, 0.0, 1.5],\n", | ||
" \"str_col\": [\"even\", \"odd\", \"even\", \"odd\", \"even\", \"odd\"],\n", | ||
" \"geometry\": [\n", | ||
" Point(0, 0),\n", | ||
" Point(1, 1),\n", | ||
" Point(2, 2),\n", | ||
" Point(3, 3),\n", | ||
" Point(4, 4),\n", | ||
" Point(5, 5),\n", | ||
" ],\n", | ||
"}\n", | ||
"gdf = gpd.GeoDataFrame(d, crs=\"EPSG:4326\")\n", | ||
"\n", | ||
"point_layer = lonboard.ScatterplotLayer.from_geopandas(\n", | ||
" gdf,\n", | ||
" get_fill_color=(0, 255, 0),\n", | ||
" radius_min_pixels=10,\n", | ||
" extensions=[\n", | ||
" DataFilterExtension(filter_size=0, category_size=1),\n", | ||
" ], # no range filter, just a category\n", | ||
" get_filter_category=gdf[cat_col], # use the cat column for the filter category\n", | ||
")\n", | ||
"\n", | ||
"m = lonboard.Map(layers=[point_layer], basemap_style=CartoBasemap.DarkMatter)\n", | ||
"\n", | ||
"filter_enabled_w = ipywidgets.Checkbox(\n", | ||
" value=True,\n", | ||
" description=\"Filter Enabled\",\n", | ||
")\n", | ||
"\n", | ||
"\n", | ||
"def on_filter_enabled_change(change): # noqa\n", | ||
" # when we change the checkbox, toggle filtering on the layer\n", | ||
" point_layer.filter_enabled = filter_enabled_w.value\n", | ||
"\n", | ||
"\n", | ||
"filter_enabled_w.observe(on_filter_enabled_change, names=\"value\")\n", | ||
"\n", | ||
"cat_selector = ipywidgets.SelectMultiple( # make a select multiple so we can see interaction on the map\n", | ||
" options=list(gdf[cat_col].unique()),\n", | ||
" value=[list(gdf[cat_col].unique())[0]],\n", | ||
" description=\"Category\",\n", | ||
" disabled=False,\n", | ||
")\n", | ||
"\n", | ||
"\n", | ||
"def on_cat_selector_change(change) -> None: # noqa\n", | ||
" # when we change the selector, update the filter on the layer.\n", | ||
" point_layer.filter_categories = cat_selector.value\n", | ||
"\n", | ||
"\n", | ||
"cat_selector.observe(on_cat_selector_change, names=\"value\")\n", | ||
"\n", | ||
"ipywidgets.VBox([m, filter_enabled_w, cat_selector])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "ce25544c", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"gdf[\"number\"].values[0].item()" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "lonboard_category_filter", | ||
"language": "python", | ||
"name": "lonboard_category_filter" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.12.8" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ | |
from lonboard._base import BaseExtension | ||
from lonboard.traits import ( | ||
DashArrayAccessor, | ||
FilterCategoryAccessor, | ||
FilterValueAccessor, | ||
FloatAccessor, | ||
PointAccessor, | ||
|
@@ -353,10 +354,13 @@ class DataFilterExtension(BaseExtension): | |
"filter_transform_size": t.Bool(default_value=True).tag(sync=True), | ||
"filter_transform_color": t.Bool(default_value=True).tag(sync=True), | ||
"get_filter_value": FilterValueAccessor(default_value=None, allow_none=True), | ||
"get_filter_category": FilterValueAccessor(default_value=None, allow_none=True), | ||
"get_filter_category": FilterCategoryAccessor( | ||
default_value=None, | ||
allow_none=True, | ||
), | ||
} | ||
|
||
filter_size = t.Int(None, min=1, max=4, allow_none=True).tag(sync=True) | ||
filter_size = t.Int(None, min=0, max=4, allow_none=True).tag(sync=True) | ||
"""The size of the filter (number of columns to filter by). | ||
|
||
The data filter can show/hide data based on 1-4 numeric properties of each object. | ||
|
@@ -365,7 +369,7 @@ class DataFilterExtension(BaseExtension): | |
- Default 1. | ||
""" | ||
|
||
category_size = t.Int(None, min=1, max=4, allow_none=True).tag(sync=True) | ||
category_size = t.Int(None, min=0, max=4, allow_none=True).tag(sync=True) | ||
|
||
"""The size of the category filter (number of columns to filter by). | ||
|
||
The category filter can show/hide data based on 1-4 properties of each object. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -823,6 +823,178 @@ def validate( | |
return value.rechunk(max_chunksize=obj._rows_per_chunk) | ||
|
||
|
||
class FilterCategoryAccessor(FixedErrorTraitType): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add some tests for this? There are some example tests in It might be worth making There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ran out of time tonight, but I'll see what I can do in the next couple evenings, or maybe this weekend |
||
"""Validate input for `get_filter_category`. | ||
|
||
A trait to validate input for the `get_filter_category` accessor added by the | ||
[`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension], which can | ||
have between 1 and 4 values per row. | ||
|
||
|
||
Various input is allowed: | ||
|
||
- An `int` or `float`. This will be used as the value for all objects. The | ||
`category_size` of the | ||
[`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension] instance | ||
must be 1. | ||
- A one-dimensional numpy `ndarray` with a numeric data type. Each value in the array will | ||
be used as the value for the object at the same row index. The `category_size` of | ||
the [`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension] instance | ||
must be 1. | ||
- A two-dimensional numpy `ndarray` with a numeric data type. Each value in the array will | ||
be used as the value for the object at the same row index. The `category_size` of | ||
the [`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension] instance | ||
must match the size of the second dimension of the array. | ||
- A pandas `Series` with a numeric data type. Each value in the array will be used as | ||
the value for the object at the same row index. The `category_size` of the | ||
[`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension] instance | ||
must be 1. | ||
- A pyarrow [`FloatArray`][pyarrow.FloatArray], [`DoubleArray`][pyarrow.DoubleArray] | ||
or [`ChunkedArray`][pyarrow.ChunkedArray] containing either a `FloatArray` or | ||
`DoubleArray`. Each value in the array will be used as the value for the object at | ||
the same row index. The `category_size` of the | ||
[`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension] instance | ||
must be 1. | ||
|
||
Alternatively, you can pass any corresponding Arrow data structure from a library | ||
that implements the [Arrow PyCapsule | ||
Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html). | ||
- A pyarrow [`FixedSizeListArray`][pyarrow.FixedSizeListArray] or | ||
[`ChunkedArray`][pyarrow.ChunkedArray] containing `FixedSizeListArray`s. The `category_size` of | ||
the [`DataFilterExtension`][lonboard.layer_extension.DataFilterExtension] instance | ||
must match the list size. | ||
|
||
Alternatively, you can pass any corresponding Arrow data structure from a library | ||
that implements the [Arrow PyCapsule | ||
Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html). | ||
""" | ||
|
||
default_value = None | ||
info_text = "a value or numpy ndarray or Arrow array representing an array of data" | ||
|
||
def __init__( | ||
self: TraitType, | ||
*args: Any, | ||
**kwargs: Any, | ||
) -> None: | ||
super().__init__(*args, **kwargs) | ||
self.tag(sync=True, **ACCESSOR_SERIALIZATION) | ||
|
||
def _pandas_to_numpy( | ||
self, | ||
obj: BaseArrowLayer, | ||
value: Any, | ||
category_size: int, | ||
) -> np.ndarray: | ||
# Assert that category_size == 1 for a pandas series. | ||
# Pandas series can technically contain Python list objects inside them, but | ||
# for simplicity we disallow that. | ||
if category_size != 1: | ||
self.error(obj, value, info="category_size==1 with pandas Series") | ||
|
||
# Cast pandas Series to numpy ndarray | ||
return np.asarray(value) | ||
|
||
def _numpy_to_arrow( | ||
self, | ||
obj: BaseArrowLayer, | ||
value: Any, | ||
category_size: int, | ||
) -> ChunkedArray: | ||
if len(value.shape) == 1: | ||
if category_size != 1: | ||
self.error(obj, value, info="category_size==1 with 1-D numpy array") | ||
array = fixed_size_list_array(value, category_size) | ||
return ChunkedArray(array) | ||
|
||
if len(value.shape) != 2: | ||
self.error(obj, value, info="1-D or 2-D numpy array") | ||
|
||
if value.shape[1] != category_size: | ||
self.error( | ||
obj, | ||
value, | ||
info=( | ||
f"category_size ({category_size}) to match 2nd dimension of numpy array" | ||
), | ||
) | ||
array = fixed_size_list_array(value, category_size) | ||
return ChunkedArray([array]) | ||
|
||
def validate( | ||
self, | ||
obj: BaseArrowLayer, | ||
value: Any, | ||
) -> str | float | tuple | list | ChunkedArray: | ||
# Find the data filter extension in the attributes of the parent object so we | ||
# can validate against the filter size. | ||
data_filter_extension = [ | ||
ext | ||
for ext in obj.extensions | ||
if ext._extension_type == "data-filter" # type: ignore | ||
] | ||
assert len(data_filter_extension) == 1 | ||
category_size = data_filter_extension[0].category_size # type: ignore | ||
|
||
if isinstance(value, (int, float, str)): | ||
if category_size != 1: | ||
self.error(obj, value, info="category_size==1 with scalar value") | ||
return value | ||
|
||
if isinstance(value, (tuple, list)): | ||
if category_size != len(value): | ||
self.error( | ||
obj, | ||
value, | ||
info=f"category_size ({category_size}) to match length of tuple/list", | ||
) | ||
return value | ||
|
||
# pandas Series | ||
if ( | ||
value.__class__.__module__.startswith("pandas") | ||
and value.__class__.__name__ == "Series" | ||
): | ||
value = self._pandas_to_numpy(obj, value, category_size) | ||
|
||
if isinstance(value, np.ndarray): | ||
value = self._numpy_to_arrow(obj, value, category_size) | ||
elif hasattr(value, "__arrow_c_array__"): | ||
value = ChunkedArray([Array.from_arrow(value)]) | ||
elif hasattr(value, "__arrow_c_stream__"): | ||
value = ChunkedArray.from_arrow(value) | ||
else: | ||
self.error(obj, value) | ||
|
||
assert isinstance(value, ChunkedArray) | ||
|
||
# Allowed inputs are either a FixedSizeListArray or array. | ||
if not DataType.is_fixed_size_list(value.type): | ||
if category_size != 1: | ||
self.error( | ||
obj, | ||
value, | ||
info="category_size==1 with non-FixedSizeList type arrow array", | ||
) | ||
|
||
return value | ||
|
||
# We have a FixedSizeListArray | ||
if category_size != value.type.list_size: | ||
self.error( | ||
obj, | ||
value, | ||
info=( | ||
f"category_size ({category_size}) to match list size of " | ||
"FixedSizeList arrow array" | ||
), | ||
) | ||
|
||
value_type = value.type.value_type | ||
assert value_type is not None | ||
return value.rechunk(max_chunksize=obj._rows_per_chunk) | ||
|
||
|
||
class NormalAccessor(FixedErrorTraitType): | ||
"""A representation of a deck.gl "normal" accessor. | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -146,19 +146,25 @@ export class DataFilterExtension extends BaseExtensionModel { | |
} | ||
|
||
extensionInstance(): _DataFilterExtension | null { | ||
if (isDefined(this.filterSize)) { | ||
if (isDefined(this.filterSize) && isDefined(this.categorySize)) { | ||
|
||
const props = { | ||
...(isDefined(this.filterSize) ? { filterSize: this.filterSize } : {}), | ||
...(isDefined(this.categorySize) | ||
? { categorySize: this.categorySize } | ||
: {}), | ||
}; | ||
return new _DataFilterExtension(props); | ||
} else if (isDefined(this.filterSize)) { | ||
const props = { | ||
...(isDefined(this.filterSize) ? { filterSize: this.filterSize } : {}), | ||
}; | ||
// console.log("ext props", props); | ||
return new _DataFilterExtension(props); | ||
} else if (isDefined(this.categorySize)) { | ||
const props = { | ||
...(isDefined(this.categorySize) | ||
? { categorySize: this.categorySize } | ||
: {}), | ||
}; | ||
// console.log("ext props", props); | ||
return new _DataFilterExtension(props); | ||
} else { | ||
return null; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The JS docs say
I think instead of exposing that same behavior to Python, we should override
None
to mean "disabled". So when the Python side passes inNone
it translates to passing0
into JS.Similarly, we should change the default value here to
1
to match JS, now thatNone
doesn't mean "undefined" but rather "defined and null"