Skip to content

Commit 370d992

Browse files
authored
rerun_py.dataframe: add support for .filter_index_values (#7670)
### What - Add a new type helper for IndexValues (in general some variation of this will be useful for other APIs) - Added a dependency on `numpy` package for ArrayLike functionality - `numpy` has an old dep on ndarray. - This has already been fixed but not yet released: PyO3/rust-numpy#439 - ChunkedArrays are sadly are more complicated (see the note in: https://docs.rs/arrow/latest/arrow/pyarrow/index.html) ### Checklist * [x] I have read and agree to [Contributor Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and the [Code of Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md) * [x] I've included a screenshot or gif (if applicable) * [x] I have tested the web demo (if applicable): * Using examples from latest `main` build: [rerun.io/viewer](https://rerun.io/viewer/pr/7670?manifest_url=https://app.rerun.io/version/main/examples_manifest.json) * Using full set of examples from `nightly` build: [rerun.io/viewer](https://rerun.io/viewer/pr/7670?manifest_url=https://app.rerun.io/version/nightly/examples_manifest.json) * [x] The PR title and labels are set such as to maximize their usefulness for the next release's CHANGELOG * [x] If applicable, add a new check to the [release checklist](https://github.com/rerun-io/rerun/blob/main/tests/python/release_checklist)! * [x] If have noted any breaking changes to the log API in `CHANGELOG.md` and the migration guide - [PR Build Summary](https://build.rerun.io/pr/7670) - [Recent benchmark results](https://build.rerun.io/graphs/crates.html) - [Wasm size tracking](https://build.rerun.io/graphs/sizes.html) To run all checks from `main`, comment on the PR with `@rerun-bot full-check`.
1 parent 8df29e2 commit 370d992

File tree

8 files changed

+280
-45
lines changed

8 files changed

+280
-45
lines changed

Cargo.lock

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3588,6 +3588,19 @@ version = "1.0.9"
35883588
source = "registry+https://github.com/rust-lang/crates.io-index"
35893589
checksum = "308d96db8debc727c3fd9744aac51751243420e46edf401010908da7f8d5e57c"
35903590

3591+
[[package]]
3592+
name = "ndarray"
3593+
version = "0.15.6"
3594+
source = "registry+https://github.com/rust-lang/crates.io-index"
3595+
checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32"
3596+
dependencies = [
3597+
"matrixmultiply",
3598+
"num-complex",
3599+
"num-integer",
3600+
"num-traits",
3601+
"rawpointer",
3602+
]
3603+
35913604
[[package]]
35923605
name = "ndarray"
35933606
version = "0.16.1"
@@ -3609,7 +3622,7 @@ version = "0.15.0"
36093622
source = "registry+https://github.com/rust-lang/crates.io-index"
36103623
checksum = "f093b3db6fd194718dcdeea6bd8c829417deae904e3fcc7732dabcd4416d25d8"
36113624
dependencies = [
3612-
"ndarray",
3625+
"ndarray 0.16.1",
36133626
"rand",
36143627
"rand_distr",
36153628
]
@@ -3846,6 +3859,21 @@ version = "0.4.0"
38463859
source = "registry+https://github.com/rust-lang/crates.io-index"
38473860
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
38483861

3862+
[[package]]
3863+
name = "numpy"
3864+
version = "0.21.0"
3865+
source = "registry+https://github.com/rust-lang/crates.io-index"
3866+
checksum = "ec170733ca37175f5d75a5bea5911d6ff45d2cd52849ce98b685394e4f2f37f4"
3867+
dependencies = [
3868+
"libc",
3869+
"ndarray 0.15.6",
3870+
"num-complex",
3871+
"num-integer",
3872+
"num-traits",
3873+
"pyo3",
3874+
"rustc-hash",
3875+
]
3876+
38493877
[[package]]
38503878
name = "objc"
38513879
version = "0.2.7"
@@ -5457,7 +5485,7 @@ dependencies = [
54575485
"document-features",
54585486
"itertools 0.13.0",
54595487
"libc",
5460-
"ndarray",
5488+
"ndarray 0.16.1",
54615489
"ndarray-rand",
54625490
"once_cell",
54635491
"parking_lot",
@@ -5647,7 +5675,7 @@ dependencies = [
56475675
"bytemuck",
56485676
"egui",
56495677
"half 2.3.1",
5650-
"ndarray",
5678+
"ndarray 0.16.1",
56515679
"re_chunk_store",
56525680
"re_data_ui",
56535681
"re_log_types",
@@ -5798,7 +5826,7 @@ dependencies = [
57985826
"linked-hash-map",
57995827
"mime_guess2",
58005828
"mint",
5801-
"ndarray",
5829+
"ndarray 0.16.1",
58025830
"nohash-hasher",
58035831
"once_cell",
58045832
"ply-rs",
@@ -6015,7 +6043,7 @@ dependencies = [
60156043
"indexmap 2.1.0",
60166044
"itertools 0.13.0",
60176045
"linked-hash-map",
6018-
"ndarray",
6046+
"ndarray 0.16.1",
60196047
"nohash-hasher",
60206048
"once_cell",
60216049
"parking_lot",
@@ -6289,6 +6317,7 @@ dependencies = [
62896317
"infer",
62906318
"itertools 0.13.0",
62916319
"mimalloc",
6320+
"numpy",
62926321
"once_cell",
62936322
"parking_lot",
62946323
"pyo3",
@@ -6461,7 +6490,7 @@ dependencies = [
64616490
"clap",
64626491
"half 2.3.1",
64636492
"image",
6464-
"ndarray",
6493+
"ndarray 0.16.1",
64656494
"re_log",
64666495
"rerun",
64676496
]
@@ -6533,7 +6562,7 @@ version = "0.19.0-alpha.1+dev"
65336562
dependencies = [
65346563
"anyhow",
65356564
"clap",
6536-
"ndarray",
6565+
"ndarray 0.16.1",
65376566
"re_log",
65386567
"rerun",
65396568
]
@@ -7016,7 +7045,7 @@ name = "snippets"
70167045
version = "0.19.0-alpha.1+dev"
70177046
dependencies = [
70187047
"itertools 0.13.0",
7019-
"ndarray",
7048+
"ndarray 0.16.1",
70207049
"rand",
70217050
"rand_distr",
70227051
"re_build_tools",
@@ -7188,7 +7217,7 @@ dependencies = [
71887217
"clap",
71897218
"glam",
71907219
"itertools 0.13.0",
7191-
"ndarray",
7220+
"ndarray 0.16.1",
71927221
"ndarray-rand",
71937222
"rand",
71947223
"re_log",

Cargo.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ memory-stats = "1.1"
195195
# This version is not pinned to avoid creating version requirement conflicts,
196196
# but other packages pin it to exactly "=0.1.37"
197197
mimalloc = "0.1.37"
198-
mime_guess2 = "2.0" # infer MIME type by file extension, and map mime to file extension
198+
mime_guess2 = "2.0" # infer MIME type by file extension, and map mime to file extension
199199
mint = "0.5.9"
200200
re_mp4 = "0.1.0"
201201
natord = "1.0.9"
@@ -206,6 +206,9 @@ nohash-hasher = "0.2"
206206
notify = { version = "6.1.1", features = ["macos_kqueue"] }
207207
num-derive = "0.4"
208208
num-traits = "0.2"
209+
# TODO(#7676) This pulls in an older ndarray. Remove it from the skip list in `deny.toml` and
210+
# close the issue when updating to 0.22.
211+
numpy = "0.21"
209212
once_cell = "1.17" # No lazy_static - use `std::sync::OnceLock` or `once_cell` instead
210213
ordered-float = "4.2"
211214
parking_lot = "0.12"

deny.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ skip = [
5555
{ name = "hashbrown" }, # Old version used by polar-rs
5656
{ name = "libloading" }, # Old version used by ash (vulkan binding), newer version used by khronos-egl
5757
{ name = "memoffset" }, # Small crate
58+
{ name = "ndarray" }, # Needed by `numpy<0.22` in `rerun_py`
5859
{ name = "prettyplease" }, # Old version being used by prost
5960
{ name = "pulldown-cmark" }, # Build-dependency via `ply-rs` (!). TODO(emilk): use a better crate for .ply parsing
6061
{ name = "raw-window-handle" }, # Pretty small crate; some crates still on old version

rerun_py/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ infer.workspace = true
6767
# TODO(#5875): `mimalloc` starts leaking OS pages starting with `0.1.38`.
6868
# When the bug is fixed, change this back to `mimalloc = { workspace = true, …`.
6969
mimalloc = { version = "=0.1.37", features = ["local_dynamic_tls"] }
70+
numpy.workspace = true
7071
once_cell.workspace = true
7172
parking_lot.workspace = true
7273
pyo3 = { workspace = true, features = ["abi3-py38"] }

rerun_py/rerun_bindings/rerun_bindings.pyi

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ from typing import Optional, Sequence
33

44
import pyarrow as pa
55

6-
from .types import AnyColumn, ComponentLike, ViewContentsLike
6+
from .types import AnyColumn, ComponentLike, IndexValuesLike, ViewContentsLike
77

88
class IndexColumnDescriptor:
99
"""A column containing the index values for when the component data was updated."""
@@ -57,6 +57,16 @@ class RecordingView:
5757
"""Filter the view to only include data between the given index time values."""
5858
...
5959

60+
def filter_index_values(self, values: IndexValuesLike) -> RecordingView:
61+
"""
62+
Filter the view to only include data at the given index values.
63+
64+
This requires index values to be a precise match. Index values in Rerun are
65+
represented as i64 sequence counts or nanoseconds. This API does not expose an interface
66+
in floating point seconds, as the numerical conversion would risk false mismatches.
67+
"""
68+
...
69+
6070
def select(self, *args: AnyColumn, columns: Optional[Sequence[AnyColumn]] = None) -> pa.RecordBatchReader: ...
6171

6272
class Recording:

rerun_py/rerun_bindings/types.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,27 @@
22

33
from typing import TYPE_CHECKING, Sequence, TypeAlias, Union
44

5+
import numpy as np
6+
import numpy.typing as npt
7+
import pyarrow as pa
8+
59
if TYPE_CHECKING:
610
from rerun._baseclasses import ComponentMixin
711

812
from .rerun_bindings import (
913
ComponentColumnDescriptor as ComponentColumnDescriptor,
1014
ComponentColumnSelector as ComponentColumnSelector,
11-
TimeColumnDescriptor as TimeColumnDescriptor,
12-
TimeColumnSelector as TimeColumnSelector,
15+
IndexColumnSelector as IndexColumnDescriptor,
16+
IndexColumnSelector as IndexColumnSelector,
1317
)
1418

1519
ComponentLike: TypeAlias = Union[str, type["ComponentMixin"]]
1620

1721
AnyColumn: TypeAlias = Union[
18-
"TimeColumnDescriptor",
1922
"ComponentColumnDescriptor",
20-
"TimeColumnSelector",
2123
"ComponentColumnSelector",
24+
"IndexColumnDescriptor",
25+
"IndexColumnSelector",
2226
]
2327

2428
AnyComponentColumn: TypeAlias = Union[
@@ -30,3 +34,5 @@
3034
str,
3135
dict[str, Union[AnyColumn, Sequence[ComponentLike]]],
3236
]
37+
38+
IndexValuesLike: TypeAlias = Union[npt.NDArray[np.int_], pa.Int64Array]

rerun_py/src/dataframe.rs

Lines changed: 116 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@
55
use std::collections::{BTreeMap, BTreeSet};
66

77
use arrow::{
8-
array::{RecordBatchIterator, RecordBatchReader},
8+
array::{make_array, Array, ArrayData, Int64Array, RecordBatchIterator, RecordBatchReader},
99
pyarrow::PyArrowType,
1010
};
11+
use numpy::PyArrayMethods as _;
1112
use pyo3::{
1213
exceptions::{PyRuntimeError, PyTypeError, PyValueError},
1314
prelude::*,
@@ -195,6 +196,108 @@ impl AnyComponentColumn {
195196
}
196197
}
197198

199+
#[derive(FromPyObject)]
200+
enum IndexValuesLike<'py> {
201+
PyArrow(PyArrowType<ArrayData>),
202+
NumPy(numpy::PyArrayLike1<'py, i64>),
203+
204+
// Catch all to support ChunkedArray and other types
205+
#[pyo3(transparent)]
206+
CatchAll(Bound<'py, PyAny>),
207+
}
208+
209+
impl<'py> IndexValuesLike<'py> {
210+
fn to_index_values(&self) -> PyResult<BTreeSet<re_chunk_store::TimeInt>> {
211+
match self {
212+
Self::PyArrow(array) => {
213+
let array = make_array(array.0.clone());
214+
215+
let int_array = array.as_any().downcast_ref::<Int64Array>().ok_or_else(|| {
216+
PyTypeError::new_err("pyarrow.Array for IndexValuesLike must be of type int64.")
217+
})?;
218+
219+
let values: BTreeSet<re_chunk_store::TimeInt> = int_array
220+
.iter()
221+
.map(|v| {
222+
v.map_or_else(
223+
|| re_chunk_store::TimeInt::STATIC,
224+
// The use of temporal here should be fine even if the data is
225+
// not actually temporal. The important thing is we are converting
226+
// from an i64 input
227+
re_chunk_store::TimeInt::new_temporal,
228+
)
229+
})
230+
.collect();
231+
232+
if values.len() != int_array.len() {
233+
return Err(PyValueError::new_err("Index values must be unique."));
234+
}
235+
236+
Ok(values)
237+
}
238+
Self::NumPy(array) => {
239+
let values: BTreeSet<re_chunk_store::TimeInt> = array
240+
.readonly()
241+
.as_array()
242+
.iter()
243+
// The use of temporal here should be fine even if the data is
244+
// not actually temporal. The important thing is we are converting
245+
// from an i64 input
246+
.map(|v| re_chunk_store::TimeInt::new_temporal(*v))
247+
.collect();
248+
249+
if values.len() != array.len()? {
250+
return Err(PyValueError::new_err("Index values must be unique."));
251+
}
252+
253+
Ok(values)
254+
}
255+
Self::CatchAll(any) => {
256+
// If any has the `.chunks` attribute, we can try to try each chunk as pyarrow array
257+
if let Ok(chunks) = any.getattr("chunks") {
258+
let mut values = BTreeSet::new();
259+
for chunk in chunks.iter()? {
260+
let chunk = chunk?.extract::<PyArrowType<ArrayData>>()?;
261+
let array = make_array(chunk.0.clone());
262+
263+
let int_array =
264+
array.as_any().downcast_ref::<Int64Array>().ok_or_else(|| {
265+
PyTypeError::new_err(
266+
"pyarrow.Array for IndexValuesLike must be of type int64.",
267+
)
268+
})?;
269+
270+
values.extend(
271+
int_array
272+
.iter()
273+
.map(|v| {
274+
v.map_or_else(
275+
|| re_chunk_store::TimeInt::STATIC,
276+
// The use of temporal here should be fine even if the data is
277+
// not actually temporal. The important thing is we are converting
278+
// from an i64 input
279+
re_chunk_store::TimeInt::new_temporal,
280+
)
281+
})
282+
.collect::<BTreeSet<_>>(),
283+
);
284+
}
285+
286+
if values.len() != any.len()? {
287+
return Err(PyValueError::new_err("Index values must be unique."));
288+
}
289+
290+
Ok(values)
291+
} else {
292+
Err(PyTypeError::new_err(
293+
"IndexValuesLike must be a pyarrow.Array, pyarrow.ChunkedArray, or numpy.ndarray",
294+
))
295+
}
296+
}
297+
}
298+
}
299+
}
300+
198301
struct ComponentLike(re_sdk::ComponentName);
199302

200303
impl FromPyObject<'_> for ComponentLike {
@@ -438,6 +541,18 @@ impl PyRecordingView {
438541
query_expression,
439542
})
440543
}
544+
545+
fn filter_index_values(&self, values: IndexValuesLike<'_>) -> PyResult<Self> {
546+
let values = values.to_index_values()?;
547+
548+
let mut query_expression = self.query_expression.clone();
549+
query_expression.filtered_index_values = Some(values);
550+
551+
Ok(Self {
552+
recording: self.recording.clone(),
553+
query_expression,
554+
})
555+
}
441556
}
442557

443558
impl PyRecording {

0 commit comments

Comments
 (0)