From 331502b0579e151b1696a11a451e9de0cff5b11a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jochen=20G=C3=B6rtler?= Date: Thu, 25 Sep 2025 18:14:01 +0200 Subject: [PATCH 01/12] Initial commit for `TransformSink` --- crates/top/re_sdk/src/lib.rs | 2 +- crates/top/re_sdk/src/log_sink.rs | 37 +++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/crates/top/re_sdk/src/lib.rs b/crates/top/re_sdk/src/lib.rs index cb77eaf1bd1a..042df0b78189 100644 --- a/crates/top/re_sdk/src/lib.rs +++ b/crates/top/re_sdk/src/lib.rs @@ -85,7 +85,7 @@ pub mod sink { pub use crate::binary_stream_sink::{BinaryStreamSink, BinaryStreamStorage}; pub use crate::log_sink::{ BufferedSink, CallbackSink, IntoMultiSink, LogSink, MemorySink, MemorySinkStorage, - MultiSink, SinkFlushError, + MultiSink, SinkFlushError, TransformSink, }; pub use crate::log_sink::{GrpcSink, GrpcSinkConnectionFailure, GrpcSinkConnectionState}; diff --git a/crates/top/re_sdk/src/log_sink.rs b/crates/top/re_sdk/src/log_sink.rs index a4b99ae4a4e2..ad7accf2229c 100644 --- a/crates/top/re_sdk/src/log_sink.rs +++ b/crates/top/re_sdk/src/log_sink.rs @@ -593,3 +593,40 @@ impl LogSink for GrpcSink { self } } + +// ---------------------------------------------------------------------------- + +trait Transformation: Send + Sync + 'static { + fn apply(&self, msg: LogMsg) -> Vec; +} + +/// A sink which can transform a `LogMsg` and forward the result to an underlying backing `LogSink`. +pub struct TransformSink { + sink: S, + transform: T, +} + +impl TransformSink { + /// Create a new `TransformSink` with the given transform function. + #[inline] + pub fn new(sink: S, transform: T) -> Self { + Self { sink, transform } + } +} + +impl LogSink for TransformSink { + fn send(&self, msg: re_log_types::LogMsg) { + self.sink.send_all(self.transform.apply(msg)) + } + + fn flush_blocking( + &self, + timeout: std::time::Duration, + ) -> Result<(), crate::sink::SinkFlushError> { + self.sink.flush_blocking(timeout) + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } +} From d0e0ffe77d583ac7842840a08a038e9eddb4668c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jochen=20G=C3=B6rtler?= Date: Fri, 26 Sep 2025 16:13:10 +0200 Subject: [PATCH 02/12] Minimal example --- Cargo.lock | 14 +- crates/top/re_sdk/src/lib.rs | 2 +- crates/top/re_sdk/src/log_sink.rs | 22 ++- examples/rust/transform/Cargo.toml | 17 ++ examples/rust/transform/README.md | 8 + examples/rust/transform/src/main.rs | 249 ++++++++++++++++++++++++++++ 6 files changed, 305 insertions(+), 7 deletions(-) create mode 100644 examples/rust/transform/Cargo.toml create mode 100644 examples/rust/transform/README.md create mode 100644 examples/rust/transform/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index 749e1b727729..9c37135b6de0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1753,7 +1753,7 @@ checksum = "fe6d2e5af09e8c8ad56c969f2157a3d4238cebc7c55f0a517728c38f7b200f81" dependencies = [ "serde", "termcolor", - "unicode-width 0.1.14", + "unicode-width 0.2.1", ] [[package]] @@ -3026,7 +3026,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "330c60081dcc4c72131f8eb70510f1ac07223e5d4163db481a04a0befcffa412" dependencies = [ - "libloading 0.7.4", + "libloading 0.8.8", ] [[package]] @@ -11243,6 +11243,16 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "transform" +version = "0.26.0-alpha.1+dev" +dependencies = [ + "anyhow", + "arrow", + "clap", + "rerun", +] + [[package]] name = "try-lock" version = "0.2.5" diff --git a/crates/top/re_sdk/src/lib.rs b/crates/top/re_sdk/src/lib.rs index 042df0b78189..d403e5941cf1 100644 --- a/crates/top/re_sdk/src/lib.rs +++ b/crates/top/re_sdk/src/lib.rs @@ -85,7 +85,7 @@ pub mod sink { pub use crate::binary_stream_sink::{BinaryStreamSink, BinaryStreamStorage}; pub use crate::log_sink::{ BufferedSink, CallbackSink, IntoMultiSink, LogSink, MemorySink, MemorySinkStorage, - MultiSink, SinkFlushError, TransformSink, + MultiSink, Pipeline, PipelineTransform, SinkFlushError, }; pub use crate::log_sink::{GrpcSink, GrpcSinkConnectionFailure, GrpcSinkConnectionState}; diff --git a/crates/top/re_sdk/src/log_sink.rs b/crates/top/re_sdk/src/log_sink.rs index ad7accf2229c..78299f872eb3 100644 --- a/crates/top/re_sdk/src/log_sink.rs +++ b/crates/top/re_sdk/src/log_sink.rs @@ -595,18 +595,32 @@ impl LogSink for GrpcSink { } // ---------------------------------------------------------------------------- +// +// TODO: +// * Move to own file. +// * Better names. -trait Transformation: Send + Sync + 'static { +pub trait PipelineTransform: Send + Sync + 'static { fn apply(&self, msg: LogMsg) -> Vec; + + fn to_sink(self, sink: S) -> Pipeline + where + Self: Sized, + { + Pipeline { + sink, + transform: self, + } + } } /// A sink which can transform a `LogMsg` and forward the result to an underlying backing `LogSink`. -pub struct TransformSink { +pub struct Pipeline { sink: S, transform: T, } -impl TransformSink { +impl Pipeline { /// Create a new `TransformSink` with the given transform function. #[inline] pub fn new(sink: S, transform: T) -> Self { @@ -614,7 +628,7 @@ impl TransformSink { } } -impl LogSink for TransformSink { +impl LogSink for Pipeline { fn send(&self, msg: re_log_types::LogMsg) { self.sink.send_all(self.transform.apply(msg)) } diff --git a/examples/rust/transform/Cargo.toml b/examples/rust/transform/Cargo.toml new file mode 100644 index 000000000000..d933f66f894f --- /dev/null +++ b/examples/rust/transform/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "transform" +version = "0.26.0-alpha.1+dev" +edition = "2024" +rust-version = "1.88" +license = "MIT OR Apache-2.0" +publish = false + +[dependencies] +rerun = { path = "../../../crates/top/rerun", features = [ + "web_viewer", + "clap", +] } + +anyhow = "1.0" +arrow.workspace = true +clap = { version = "4.0", features = ["derive"] } diff --git a/examples/rust/transform/README.md b/examples/rust/transform/README.md new file mode 100644 index 000000000000..cfcb6b7bb813 --- /dev/null +++ b/examples/rust/transform/README.md @@ -0,0 +1,8 @@ + + +Demonstrates how to transform log messages before forwarding them to the sink the SDK. +```bash +cargo run -p transform +``` diff --git a/examples/rust/transform/src/main.rs b/examples/rust/transform/src/main.rs new file mode 100644 index 000000000000..53fe9ed8c120 --- /dev/null +++ b/examples/rust/transform/src/main.rs @@ -0,0 +1,249 @@ +use std::sync::Arc; + +use arrow::{ + array::{Array, Float32Array, Float64Array, ListArray, StructArray}, + datatypes::{DataType, Field}, +}; +use rerun::{ + ComponentDescriptor, DynamicArchetype, RecordingStream, Scalars, TextDocument, TimeCell, + dataframe::{EntityPathFilter, ResolvedEntityPathFilter}, + external::re_log, + log::{Chunk, ChunkId, LogMsg}, + sink::{GrpcSink, PipelineTransform}, +}; + +#[derive(Debug, clap::Parser)] +#[clap(author, version, about)] +struct Args { + #[command(flatten)] + rerun: rerun::clap::RerunArgs, + + /// The filepaths to be loaded and logged. + filepaths: Vec, +} + +// TODO: Is this the right API. +type ChunkFunc = Box Vec + Send + Sync>; + +pub struct PerChunkTransform { + /// The entity path to apply the transformation to. + pub filter: ResolvedEntityPathFilter, + + /// A closure that outputs a list of chunks + pub func: ChunkFunc, +} + +pub struct PerChunkPiplineTransform { + transforms: Vec, +} + +impl PipelineTransform for PerChunkPiplineTransform { + fn apply(&self, msg: LogMsg) -> Vec { + match &msg { + LogMsg::SetStoreInfo(_) | LogMsg::BlueprintActivationCommand(_) => { + vec![msg] + } + LogMsg::ArrowMsg(store_id, arrow_msg) => match Chunk::from_arrow_msg(arrow_msg) { + Ok(chunk) => { + let mut relevant = self + .transforms + .iter() + .filter(|transform| transform.filter.matches(chunk.entity_path())) + .peekable(); + if relevant.peek().is_some() { + relevant + .flat_map(|transform| (*transform.func)(&chunk)) + .filter_map(|transformed| match transformed.to_arrow_msg() { + Ok(arrow_msg) => { + Some(LogMsg::ArrowMsg(store_id.clone(), arrow_msg)) + } + Err(err) => { + re_log::error_once!( + "failed to create log message from chunk: {err}" + ); + None + } + }) + .collect() + } else { + vec![msg] + } + } + + Err(err) => { + re_log::error_once!("Failed to convert arrow message to chunk: {err}"); + vec![msg] + } + }, + } + } +} + +fn main() -> anyhow::Result<()> { + re_log::setup_logging(); + + use clap::Parser as _; + let args = Args::parse(); + + let instruction_transform = PerChunkTransform { + filter: "/instructions" + .parse::()? + .resolve_without_substitutions(), // TODO: call the right thing here. + func: Box::new(|chunk: &rerun::log::Chunk| { + let mut components = chunk.components().clone(); + + let maybe_array = components + .get(&ComponentDescriptor { + archetype: Some("com.Example.Instruction".into()), + component: "com.Example.Instruction:text".into(), + component_type: None, + }) + .cloned(); + if let Some(array) = maybe_array { + components.insert(TextDocument::descriptor_text(), array); + } + + let mut new_chunk = chunk.clone().components_removed().with_id(ChunkId::new()); + for (component_descr, array) in components.iter() { + new_chunk + .add_component(component_descr.clone(), array.clone()) + .unwrap(); + } + vec![new_chunk] + }), + }; + + let gello_a_transform = PerChunkTransform { + filter: "/nested" + .parse::()? + .resolve_without_substitutions(), // TODO: call the right thing here. + func: Box::new(|chunk: &rerun::log::Chunk| { + let mut components = chunk.components().clone(); + + let maybe_array = components + .get(&ComponentDescriptor { + archetype: Some("com.Example.Nested".into()), + component: "com.Example.Nested:payload".into(), + component_type: None, + }) + .cloned(); + + if let Some(list_struct_array) = maybe_array { + let list_array = list_struct_array + .as_any() + .downcast_ref::() + .unwrap(); + + let struct_array = list_array + .values() + .as_any() + .downcast_ref::() + .unwrap(); + + let child_b_array = struct_array.column_by_name("b").unwrap(); + + let field = Arc::new(Field::new_list_field( + child_b_array.data_type().clone(), + true, + )); + + let new_list_array = ListArray::new( + field, + list_array.offsets().clone(), // Use ListArray's offsets + child_b_array.clone(), // Values from field "b" + list_array.nulls().cloned(), // Preserve null mask + ); + + components.insert(Scalars::descriptor_scalars(), new_list_array); + } + + let mut new_chunk = chunk.clone().components_removed().with_id(ChunkId::new()); + for (component_descr, array) in components.iter() { + new_chunk + .add_component(component_descr.clone(), array.clone()) + .unwrap(); + } + vec![new_chunk] + }), + }; + + let transform = PerChunkPiplineTransform { + transforms: vec![instruction_transform, gello_a_transform], + } + .to_sink(GrpcSink::default()); + + let (rec, _serve_guard) = args.rerun.init("rerun_example_transform")?; + // TODO: There should be a way to do this in one go. + rec.set_sink(Box::new(transform)); + run(&rec, &args)?; + + Ok(()) +} + +fn run(rec: &rerun::RecordingStream, args: &Args) -> anyhow::Result<()> { + let prefix = Some("log_file_example".into()); + + if args.filepaths.is_empty() { + log_instructions(rec)?; + log_structs_with_scalars(rec)?; + return Ok(()); + } + + for filepath in &args.filepaths { + let filepath = filepath.as_path(); + + // …or using its contents if you already have them loaded for some reason. + if filepath.is_file() { + let contents = std::fs::read(filepath)?; + rec.log_file_from_contents( + filepath, + std::borrow::Cow::Borrowed(&contents), + prefix.clone(), + true, /* static */ + )?; + } + } + + Ok(()) +} + +fn log_instructions(rec: &RecordingStream) -> anyhow::Result<()> { + rec.set_time("tick", TimeCell::from_sequence(1)); + rec.log( + "instructions", + &DynamicArchetype::new("com.Example.Instruction").with_component_from_data( + "text", + Arc::new(arrow::array::StringArray::from(vec![ + "This is a nice instruction text.", + ])), + ), + )?; + + Ok(()) +} + +fn log_structs_with_scalars(rec: &RecordingStream) -> anyhow::Result<()> { + for x in 0..10 { + let a = Float32Array::from(vec![1.0 * x as f32, 2.0 + x as f32, 3.0 + x as f32]); + let b = Float64Array::from(vec![5.0 * x as f64, 6.0 + x as f64, 7.0 + x as f64]); + + let struct_array = StructArray::from(vec![ + ( + Arc::new(Field::new("a", DataType::Float32, false)), + Arc::new(a) as Arc, + ), + ( + Arc::new(Field::new("b", DataType::Float64, false)), + Arc::new(b) as Arc, + ), + ]); + rec.set_time("tick", TimeCell::from_sequence(x)); + rec.log( + "nested", + &DynamicArchetype::new("com.Example.Nested") + .with_component_from_data("payload", Arc::new(struct_array)), + )? + } + + Ok(()) +} From 5aa672a2b31a54cc62e9a9cbc02f938eb360c64e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jochen=20G=C3=B6rtler?= Date: Wed, 1 Oct 2025 11:00:51 +0200 Subject: [PATCH 03/12] rename --- examples/rust/transform/src/main.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/rust/transform/src/main.rs b/examples/rust/transform/src/main.rs index 53fe9ed8c120..e39b730e8acb 100644 --- a/examples/rust/transform/src/main.rs +++ b/examples/rust/transform/src/main.rs @@ -113,7 +113,7 @@ fn main() -> anyhow::Result<()> { }), }; - let gello_a_transform = PerChunkTransform { + let destructure_transform = PerChunkTransform { filter: "/nested" .parse::()? .resolve_without_substitutions(), // TODO: call the right thing here. @@ -168,7 +168,7 @@ fn main() -> anyhow::Result<()> { }; let transform = PerChunkPiplineTransform { - transforms: vec![instruction_transform, gello_a_transform], + transforms: vec![instruction_transform, destructure_transform], } .to_sink(GrpcSink::default()); From fbe7a4d13a1a569a5473b3503572adb8686de154 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jochen=20G=C3=B6rtler?= Date: Mon, 29 Sep 2025 13:28:09 +0200 Subject: [PATCH 04/12] Implement initial version of column-wise transformations --- examples/rust/transform/src/main.rs | 270 ++++++++++++++++++++++++++-- 1 file changed, 258 insertions(+), 12 deletions(-) diff --git a/examples/rust/transform/src/main.rs b/examples/rust/transform/src/main.rs index e39b730e8acb..b7aae40e371a 100644 --- a/examples/rust/transform/src/main.rs +++ b/examples/rust/transform/src/main.rs @@ -1,14 +1,15 @@ use std::sync::Arc; use arrow::{ - array::{Array, Float32Array, Float64Array, ListArray, StructArray}, + array::{Array, Float32Array, Float64Array, ListArray, StringArray, StructArray}, datatypes::{DataType, Field}, }; use rerun::{ - ComponentDescriptor, DynamicArchetype, RecordingStream, Scalars, TextDocument, TimeCell, + ComponentDescriptor, ComponentIdentifier, DynamicArchetype, EntityPath, RecordingStream, + Scalars, SeriesLines, SeriesPoints, TextDocument, TimeCell, dataframe::{EntityPathFilter, ResolvedEntityPathFilter}, - external::re_log, - log::{Chunk, ChunkId, LogMsg}, + external::{nohash_hasher::IntMap, re_log}, + log::{Chunk, ChunkComponents, ChunkId, LogMsg}, sink::{GrpcSink, PipelineTransform}, }; @@ -79,12 +80,152 @@ impl PipelineTransform for PerChunkPiplineTransform { } } -fn main() -> anyhow::Result<()> { - re_log::setup_logging(); +type ComponentBatchFunc = Box< + dyn Fn(Arc, &EntityPath) -> Vec<(EntityPath, ComponentDescriptor, Arc)> + + Send + + Sync, +>; - use clap::Parser as _; - let args = Args::parse(); +pub struct ComponentBatchTransform { + /// The entity path to apply the transformation to. + pub filter: ResolvedEntityPathFilter, + + /// The component that we want to select. + pub component: ComponentIdentifier, + + /// A closure that outputs a list of chunks + pub func: ComponentBatchFunc, +} + +pub struct ComponentBatchPipelineTransform { + transforms: Vec, +} + +impl ComponentBatchTransform { + pub fn new( + entity_path_filter: EntityPathFilter, + component: impl Into, + func: F, + ) -> Self + where + F: Fn( + Arc, + &EntityPath, + ) -> Vec<(EntityPath, ComponentDescriptor, Arc)> + + Send + + Sync + + 'static, + { + Self { + filter: entity_path_filter.resolve_without_substitutions(), + component: component.into(), + func: Box::new(func), + } + } +} + +fn apply_to_chunk(transform: &ComponentBatchTransform, chunk: &Chunk) -> Vec { + let found = chunk + .components() + .iter() + .find(|(descr, _array)| descr.component == transform.component); + + // TODO: This means we drop chunks that belong to the same entity but don't have the component. + let Some((_component_descr, outer_array)) = found else { + return Default::default(); + }; + + let inner_array = outer_array.values(); + + // TODO: + // * unwrap array + // * Guarantee that there is only one component descr + let mut builders = IntMap::default(); + let results = (transform.func)(inner_array.clone(), chunk.entity_path()); + for (entity_path, component_descr, new_array) in results { + let components = builders + .entry(entity_path) + .or_insert_with(ChunkComponents::default); + + if components.contains_component(&component_descr) { + re_log::warn_once!( + "Replacing duplicated component {}", + component_descr.component + ); + } + + components.insert( + component_descr, + ListArray::new( + Field::new_list_field(new_array.data_type().clone(), true).into(), + outer_array.offsets().clone(), + // TODO: box from the start + new_array.into(), + outer_array.nulls().cloned(), + ), + ); + } + + builders + .into_iter() + .filter_map(|(entity_path, components)| { + Chunk::from_auto_row_ids( + ChunkId::new(), + entity_path.clone(), + chunk.timelines().clone(), + components, + ) + .inspect_err(|err| { + re_log::error_once!("Failed to build chunk at entity path '{entity_path}': {err}") + }) + .ok() + }) + .collect() +} + +impl PipelineTransform for ComponentBatchPipelineTransform { + fn apply(&self, msg: LogMsg) -> Vec { + match &msg { + LogMsg::SetStoreInfo(_) | LogMsg::BlueprintActivationCommand(_) => { + vec![msg] + } + LogMsg::ArrowMsg(store_id, arrow_msg) => match Chunk::from_arrow_msg(arrow_msg) { + Ok(chunk) => { + let mut relevant = self + .transforms + .iter() + .filter(|transform| transform.filter.matches(chunk.entity_path())) + .peekable(); + if relevant.peek().is_some() { + relevant + .flat_map(|transform| apply_to_chunk(transform, &chunk)) + .filter_map(|transformed| match transformed.to_arrow_msg() { + Ok(arrow_msg) => { + Some(LogMsg::ArrowMsg(store_id.clone(), arrow_msg)) + } + Err(err) => { + re_log::error_once!( + "failed to create log message from chunk: {err}" + ); + None + } + }) + .collect() + } else { + vec![msg] + } + } + Err(err) => { + re_log::error_once!("Failed to convert arrow message to chunk: {err}"); + vec![msg] + } + }, + } + } +} + +fn per_chunk_pipeline() -> anyhow::Result { let instruction_transform = PerChunkTransform { filter: "/instructions" .parse::()? @@ -167,10 +308,99 @@ fn main() -> anyhow::Result<()> { }), }; - let transform = PerChunkPiplineTransform { + Ok(PerChunkPiplineTransform { transforms: vec![instruction_transform, destructure_transform], - } - .to_sink(GrpcSink::default()); + }) +} + +fn per_column_pipline() -> anyhow::Result { + // Takes an existing component that has the right backing data and apply a new component descriptor too it. + // TODO: For these simple cases, we could have premade constructors that hide the closure. This could also lead to more efficient Python mappings. + let instruction_transform = ComponentBatchTransform::new( + "/instructions".parse()?, + "com.Example.Instruction:text", + |array, entity_path| vec![(entity_path.clone(), TextDocument::descriptor_text(), array)], + ); + + // Extracts two fields from a struct, and adds them to new sub-entities as scalars. + let destructure_transform = ComponentBatchTransform::new( + "/nested".parse()?, + "com.Example.Nested:payload", + |array, entity_path| { + let struct_array = array.as_any().downcast_ref::().unwrap(); + + let child_a_array = struct_array.column_by_name("a").unwrap(); + let child_a_array = arrow::compute::cast(child_a_array, &DataType::Float64).unwrap(); + + let child_b_array = struct_array.column_by_name("b").unwrap(); + + vec![ + ( + entity_path.join(&EntityPath::parse_forgiving("a")), + Scalars::descriptor_scalars(), + child_a_array, + ), + ( + entity_path.join(&EntityPath::parse_forgiving("b")), + Scalars::descriptor_scalars(), + child_b_array.clone(), + ), + ] + }, + ); + + let flag_transform = ComponentBatchTransform::new( + "/flag".parse()?, + "com.Example.Flag:flag", + |array, entity_path| { + let flag_array = array.as_any().downcast_ref::().unwrap(); + + let scalar_array: Float64Array = flag_array + .iter() + .map(|s| { + s.map(|v| match v { + "ACTIVE" => 1.0, + "INACTIVE" => 2.0, + _ => f64::NAN, + // _ => 0.0, + }) + }) + .collect(); + + vec![ + ( + entity_path.clone(), + Scalars::descriptor_scalars(), + Arc::new(scalar_array), + ), + // TODO: Very sad that we need to log this multiple times. We need static chunks without timelines. + ( + entity_path.clone(), + SeriesPoints::descriptor_marker_sizes(), + Arc::new(Float32Array::from(vec![5.0; 10])), + ), + ( + entity_path.clone(), + SeriesLines::descriptor_widths(), + Arc::new(Float32Array::from(vec![3.0; 10])), + ), + ] + }, + ); + + Ok(ComponentBatchPipelineTransform { + transforms: vec![instruction_transform, destructure_transform, flag_transform], + }) +} + +fn main() -> anyhow::Result<()> { + re_log::setup_logging(); + + use clap::Parser as _; + let args = Args::parse(); + + // let transform = per_chunk_pipeline()?.to_sink(GrpcSink::default()); + let transform = per_column_pipline()?.to_sink(GrpcSink::default()); let (rec, _serve_guard) = args.rerun.init("rerun_example_transform")?; // TODO: There should be a way to do this in one go. @@ -186,6 +416,7 @@ fn run(rec: &rerun::RecordingStream, args: &Args) -> anyhow::Result<()> { if args.filepaths.is_empty() { log_instructions(rec)?; log_structs_with_scalars(rec)?; + log_flag(rec)?; return Ok(()); } @@ -207,6 +438,21 @@ fn run(rec: &rerun::RecordingStream, args: &Args) -> anyhow::Result<()> { Ok(()) } +fn log_flag(rec: &RecordingStream) -> anyhow::Result<()> { + let flags = ["ACTIVE", "ACTIVE", "INACTIVE", "UNKNOWN"]; + for x in 0..10i64 { + let flag = StringArray::from(vec![flags[x as usize % flags.len()]]); + rec.set_time("tick", TimeCell::from_sequence(x)); + rec.log( + "flag", + &DynamicArchetype::new("com.Example.Flag") + .with_component_from_data("flag", Arc::new(flag)), + )? + } + + Ok(()) +} + fn log_instructions(rec: &RecordingStream) -> anyhow::Result<()> { rec.set_time("tick", TimeCell::from_sequence(1)); rec.log( @@ -223,7 +469,7 @@ fn log_instructions(rec: &RecordingStream) -> anyhow::Result<()> { } fn log_structs_with_scalars(rec: &RecordingStream) -> anyhow::Result<()> { - for x in 0..10 { + for x in 0..10i64 { let a = Float32Array::from(vec![1.0 * x as f32, 2.0 + x as f32, 3.0 + x as f32]); let b = Float64Array::from(vec![5.0 * x as f64, 6.0 + x as f64, 7.0 + x as f64]); From ef2ab016fb518f359f0069fd6b5dc496d8112cec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jochen=20G=C3=B6rtler?= Date: Tue, 30 Sep 2025 09:26:24 +0200 Subject: [PATCH 05/12] Add nullability test case --- Cargo.lock | 1 + examples/rust/transform/Cargo.toml | 3 + examples/rust/transform/src/main.rs | 334 +++++++++++++++++++++++----- 3 files changed, 282 insertions(+), 56 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9c37135b6de0..fa9f95aec133 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11250,6 +11250,7 @@ dependencies = [ "anyhow", "arrow", "clap", + "insta", "rerun", ] diff --git a/examples/rust/transform/Cargo.toml b/examples/rust/transform/Cargo.toml index d933f66f894f..615d9ab92115 100644 --- a/examples/rust/transform/Cargo.toml +++ b/examples/rust/transform/Cargo.toml @@ -15,3 +15,6 @@ rerun = { path = "../../../crates/top/rerun", features = [ anyhow = "1.0" arrow.workspace = true clap = { version = "4.0", features = ["derive"] } + +[dev-dependencies] +insta.workspace = true diff --git a/examples/rust/transform/src/main.rs b/examples/rust/transform/src/main.rs index b7aae40e371a..3dbc2e9c1fc9 100644 --- a/examples/rust/transform/src/main.rs +++ b/examples/rust/transform/src/main.rs @@ -1,14 +1,21 @@ -use std::sync::Arc; +use std::{collections::HashMap, sync::Arc}; use arrow::{ - array::{Array, Float32Array, Float64Array, ListArray, StringArray, StructArray}, + array::{ + Array, FixedSizeListBuilder, Float32Array, Float32Builder, Float64Array, Float64Builder, + ListArray, ListBuilder, StringArray, StringBuilder, StructArray, StructBuilder, + }, datatypes::{DataType, Field}, }; use rerun::{ ComponentDescriptor, ComponentIdentifier, DynamicArchetype, EntityPath, RecordingStream, - Scalars, SeriesLines, SeriesPoints, TextDocument, TimeCell, - dataframe::{EntityPathFilter, ResolvedEntityPathFilter}, - external::{nohash_hasher::IntMap, re_log}, + Scalars, SeriesLines, SeriesPoints, StoreId, TextDocument, TimeCell, TimeColumn, Timeline, + dataframe::{EntityPathFilter, ResolvedEntityPathFilter, TimelineName}, + external::{ + nohash_hasher::IntMap, + re_format_arrow::{self, RecordBatchFormatOpts}, + re_log, + }, log::{Chunk, ChunkComponents, ChunkId, LogMsg}, sink::{GrpcSink, PipelineTransform}, }; @@ -80,11 +87,44 @@ impl PipelineTransform for PerChunkPiplineTransform { } } -type ComponentBatchFunc = Box< - dyn Fn(Arc, &EntityPath) -> Vec<(EntityPath, ComponentDescriptor, Arc)> - + Send - + Sync, ->; +// TODO: This looks like a weird love-child between `SerializedComponentColumn` and `ComponentColumnDescriptor`. +struct TransformedColumn { + entity_path: EntityPath, + component_descr: ComponentDescriptor, + // TODO: This is currently still expecting the inner column. + component_data: Arc, + is_static: bool, +} + +impl TransformedColumn { + pub fn new( + entity_path: EntityPath, + component_descr: ComponentDescriptor, + array: Arc, + ) -> Self { + Self { + entity_path, + component_descr, + component_data: array, + is_static: false, + } + } + pub fn new_static( + entity_path: EntityPath, + component_descr: ComponentDescriptor, + array: Arc, + ) -> Self { + Self { + entity_path, + component_descr, + component_data: array, + is_static: true, + } + } +} + +type ComponentBatchFunc = + Box, &EntityPath) -> Vec + Send + Sync>; pub struct ComponentBatchTransform { /// The entity path to apply the transformation to. @@ -108,13 +148,7 @@ impl ComponentBatchTransform { func: F, ) -> Self where - F: Fn( - Arc, - &EntityPath, - ) -> Vec<(EntityPath, ComponentDescriptor, Arc)> - + Send - + Sync - + 'static, + F: Fn(Arc, &EntityPath) -> Vec + Send + Sync + 'static, { Self { filter: entity_path_filter.resolve_without_substitutions(), @@ -140,27 +174,26 @@ fn apply_to_chunk(transform: &ComponentBatchTransform, chunk: &Chunk) -> Vec Vec anyhow::Result { }) } -fn per_column_pipline() -> anyhow::Result { - // Takes an existing component that has the right backing data and apply a new component descriptor too it. - // TODO: For these simple cases, we could have premade constructors that hide the closure. This could also lead to more efficient Python mappings. - let instruction_transform = ComponentBatchTransform::new( - "/instructions".parse()?, - "com.Example.Instruction:text", - |array, entity_path| vec![(entity_path.clone(), TextDocument::descriptor_text(), array)], - ); - - // Extracts two fields from a struct, and adds them to new sub-entities as scalars. - let destructure_transform = ComponentBatchTransform::new( - "/nested".parse()?, - "com.Example.Nested:payload", +// Extracts two fields from a struct, and adds them to new sub-entities as scalars. +fn destructure_transform( + entity_path_filter: EntityPathFilter, + component: impl Into, +) -> ComponentBatchTransform { + ComponentBatchTransform::new( + entity_path_filter, + component.into(), |array, entity_path| { let struct_array = array.as_any().downcast_ref::().unwrap(); @@ -335,20 +367,39 @@ fn per_column_pipline() -> anyhow::Result { let child_b_array = struct_array.column_by_name("b").unwrap(); vec![ - ( + TransformedColumn::new( entity_path.join(&EntityPath::parse_forgiving("a")), Scalars::descriptor_scalars(), child_a_array, ), - ( + TransformedColumn::new( entity_path.join(&EntityPath::parse_forgiving("b")), Scalars::descriptor_scalars(), child_b_array.clone(), ), ] }, + ) +} + +fn per_column_pipline() -> anyhow::Result { + // Takes an existing component that has the right backing data and apply a new component descriptor too it. + // TODO: For these simple cases, we could have premade constructors that hide the closure. This could also lead to more efficient Python mappings. + let instruction_transform = ComponentBatchTransform::new( + "/instructions".parse()?, + "com.Example.Instruction:text", + |array, entity_path| { + vec![TransformedColumn { + entity_path: entity_path.clone(), + component_descr: TextDocument::descriptor_text(), + component_data: array, + is_static: false, + }] + }, ); + let destructure_transform = + destructure_transform("/nested".parse().unwrap(), "com.Example.Nested:payload"); let flag_transform = ComponentBatchTransform::new( "/flag".parse()?, "com.Example.Flag:flag", @@ -368,18 +419,18 @@ fn per_column_pipline() -> anyhow::Result { .collect(); vec![ - ( + TransformedColumn::new( entity_path.clone(), Scalars::descriptor_scalars(), Arc::new(scalar_array), ), - // TODO: Very sad that we need to log this multiple times. We need static chunks without timelines. - ( + TransformedColumn::new_static( entity_path.clone(), SeriesPoints::descriptor_marker_sizes(), + // TODO: get rid of the 10 here Arc::new(Float32Array::from(vec![5.0; 10])), ), - ( + TransformedColumn::new_static( entity_path.clone(), SeriesLines::descriptor_widths(), Arc::new(Float32Array::from(vec![3.0; 10])), @@ -417,6 +468,7 @@ fn run(rec: &rerun::RecordingStream, args: &Args) -> anyhow::Result<()> { log_instructions(rec)?; log_structs_with_scalars(rec)?; log_flag(rec)?; + log_columns_with_nullability(rec)?; return Ok(()); } @@ -493,3 +545,173 @@ fn log_structs_with_scalars(rec: &RecordingStream) -> anyhow::Result<()> { Ok(()) } + +fn log_columns_with_nullability(rec: &RecordingStream) -> anyhow::Result<()> { + let chunk = nullability_chunk(); + rec.send_chunk(chunk); + Ok(()) +} + +/// Creates a chunk that contains all sorts of validity, nullability, and empty lists. +// ┌──────────────┬──────────┐ +// │ [{a:0,b:0}] │ ["zero"] │ +// ├──────────────┼──────────┤ +// │[{a:1,b:null}]│ ["one"] │ +// ├──────────────┼──────────┤ +// │ [] │ [] │ +// ├──────────────┼──────────┤ +// │ null │["three"] │ +// ├──────────────┼──────────┤ +// │ [{a:4,b:4}] │ null │ +// ├──────────────┼──────────┤ +// │ [null] │ ["five"] │ +// └──────────────┴──────────┘ +fn nullability_chunk() -> Chunk { + let mut struct_column_builder = ListBuilder::new(StructBuilder::new( + [ + Arc::new(Field::new("a", DataType::Float32, true)), + Arc::new(Field::new("b", DataType::Float64, true)), + ], + vec![ + Box::new(Float32Builder::new()), + Box::new(Float64Builder::new()), + ], + )); + let mut string_column_builder = ListBuilder::new(StringBuilder::new()); + + // row 0 + struct_column_builder + .values() + .field_builder::(0) + .unwrap() + .append_value(0.0); + struct_column_builder + .values() + .field_builder::(1) + .unwrap() + .append_value(0.0); + struct_column_builder.values().append(true); + struct_column_builder.append(true); + + string_column_builder.values().append_value("zero"); + string_column_builder.append(true); + + // row 1 + struct_column_builder + .values() + .field_builder::(0) + .unwrap() + .append_value(1.0); + struct_column_builder + .values() + .field_builder::(1) + .unwrap() + .append_null(); + struct_column_builder.values().append(true); + struct_column_builder.append(true); + + string_column_builder.values().append_value("one"); + string_column_builder.append(true); + + // row 2 + struct_column_builder.append(true); // empty list + + string_column_builder.append(true); // empty list + + // row 3 + struct_column_builder.append(false); // null + + string_column_builder.values().append_value("three"); + string_column_builder.append(true); + + // row 4 + struct_column_builder + .values() + .field_builder::(0) + .unwrap() + .append_value(4.0); + struct_column_builder + .values() + .field_builder::(1) + .unwrap() + .append_value(4.0); + struct_column_builder.values().append(true); + struct_column_builder.append(true); + + string_column_builder.append(false); // null + + // row 5 + struct_column_builder + .values() + .field_builder::(0) + .unwrap() + .append_null(); // placeholder for null struct + struct_column_builder + .values() + .field_builder::(1) + .unwrap() + .append_null(); // placeholder for null struct + struct_column_builder.values().append(false); // null struct element + struct_column_builder.append(true); + + string_column_builder.values().append_value("five"); + string_column_builder.append(true); + + let struct_column = struct_column_builder.finish(); + let string_column = string_column_builder.finish(); + + let components = [ + (ComponentDescriptor::partial("structs"), struct_column), + (ComponentDescriptor::partial("strings"), string_column), + ] + .into_iter(); + + let time_column = TimeColumn::new_sequence("tick", [0, 1, 2, 3, 4, 5]); + + let chunk = Chunk::from_auto_row_ids( + ChunkId::new(), + "nullability".into(), + [(TimelineName::new("tick"), time_column)] + .into_iter() + .collect(), + components.collect(), + ) + .unwrap(); + + chunk +} + +const FORMAT_OPTS: RecordBatchFormatOpts = RecordBatchFormatOpts { + transposed: false, + width: Some(240usize), + include_metadata: false, + include_column_metadata: true, + trim_field_names: true, + trim_metadata_keys: true, + trim_metadata_values: true, + redact_non_deterministic: true, +}; + +#[test] +fn test_destructure() { + let chunk = nullability_chunk(); + println!("{chunk}"); + let arrow_msg = nullability_chunk().to_arrow_msg().unwrap(); + let msg = LogMsg::ArrowMsg(StoreId::empty_recording(), arrow_msg); + + let pipeline = ComponentBatchPipelineTransform { + transforms: vec![destructure_transform( + "nullability".parse().unwrap(), + "structs", + )], + }; + + let mut res = pipeline.apply(msg); + assert_eq!(res.len(), 2); + + let transformed_batch = res[0].arrow_record_batch_mut().unwrap(); + insta::assert_snapshot!(re_format_arrow::format_record_batch_opts( + transformed_batch, + &FORMAT_OPTS, + )) +} From e5a222264c504bcd38d354297dccd3035b01a9cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jochen=20G=C3=B6rtler?= Date: Tue, 30 Sep 2025 10:05:25 +0200 Subject: [PATCH 06/12] better testing --- examples/rust/transform/src/main.rs | 249 +++++++++++++----- .../transform__test__destructure_cast.snap | 35 +++ .../transform__test__destructure_only.snap | 35 +++ 3 files changed, 247 insertions(+), 72 deletions(-) create mode 100644 examples/rust/transform/src/snapshots/transform__test__destructure_cast.snap create mode 100644 examples/rust/transform/src/snapshots/transform__test__destructure_only.snap diff --git a/examples/rust/transform/src/main.rs b/examples/rust/transform/src/main.rs index 3dbc2e9c1fc9..2dd5178c0925 100644 --- a/examples/rust/transform/src/main.rs +++ b/examples/rust/transform/src/main.rs @@ -350,14 +350,25 @@ fn per_chunk_pipeline() -> anyhow::Result { }) } -// Extracts two fields from a struct, and adds them to new sub-entities as scalars. -fn destructure_transform( - entity_path_filter: EntityPathFilter, - component: impl Into, -) -> ComponentBatchTransform { - ComponentBatchTransform::new( - entity_path_filter, - component.into(), +fn per_column_pipline() -> anyhow::Result { + // Takes an existing component that has the right backing data and apply a new component descriptor too it. + // TODO: For these simple cases, we could have premade constructors that hide the closure. This could also lead to more efficient Python mappings. + let instruction_transform = ComponentBatchTransform::new( + "/instructions".parse()?, + "com.Example.Instruction:text", + |array, entity_path| { + vec![TransformedColumn { + entity_path: entity_path.clone(), + component_descr: TextDocument::descriptor_text(), + component_data: array, + is_static: false, + }] + }, + ); + + let destructure_transform = ComponentBatchTransform::new( + "/nested".parse().unwrap(), + "com.Example.Nested:payload", |array, entity_path| { let struct_array = array.as_any().downcast_ref::().unwrap(); @@ -379,27 +390,8 @@ fn destructure_transform( ), ] }, - ) -} - -fn per_column_pipline() -> anyhow::Result { - // Takes an existing component that has the right backing data and apply a new component descriptor too it. - // TODO: For these simple cases, we could have premade constructors that hide the closure. This could also lead to more efficient Python mappings. - let instruction_transform = ComponentBatchTransform::new( - "/instructions".parse()?, - "com.Example.Instruction:text", - |array, entity_path| { - vec![TransformedColumn { - entity_path: entity_path.clone(), - component_descr: TextDocument::descriptor_text(), - component_data: array, - is_static: false, - }] - }, ); - let destructure_transform = - destructure_transform("/nested".parse().unwrap(), "com.Example.Nested:payload"); let flag_transform = ComponentBatchTransform::new( "/flag".parse()?, "com.Example.Flag:flag", @@ -553,19 +545,21 @@ fn log_columns_with_nullability(rec: &RecordingStream) -> anyhow::Result<()> { } /// Creates a chunk that contains all sorts of validity, nullability, and empty lists. -// ┌──────────────┬──────────┐ -// │ [{a:0,b:0}] │ ["zero"] │ -// ├──────────────┼──────────┤ -// │[{a:1,b:null}]│ ["one"] │ -// ├──────────────┼──────────┤ -// │ [] │ [] │ -// ├──────────────┼──────────┤ -// │ null │["three"] │ -// ├──────────────┼──────────┤ -// │ [{a:4,b:4}] │ null │ -// ├──────────────┼──────────┤ -// │ [null] │ ["five"] │ -// └──────────────┴──────────┘ +// ┌──────────────┬───────────┐ +// │ [{a:0,b:0}] │ ["zero"] │ +// ├──────────────┼───────────┤ +// │[{a:1,b:null}]│["one","1"]│ +// ├──────────────┼───────────┤ +// │ [] │ [] │ +// ├──────────────┼───────────┤ +// │ null │ ["three"] │ +// ├──────────────┼───────────┤ +// │ [{a:4,b:4}] │ null │ +// ├──────────────┼───────────┤ +// │ [null] │ ["five"] │ +// ├──────────────┼───────────┤ +// │ [{a:6,b:6}] │ [null] │ +// └──────────────┴───────────┘ fn nullability_chunk() -> Chunk { let mut struct_column_builder = ListBuilder::new(StructBuilder::new( [ @@ -611,6 +605,7 @@ fn nullability_chunk() -> Chunk { struct_column_builder.append(true); string_column_builder.values().append_value("one"); + string_column_builder.values().append_value("1"); string_column_builder.append(true); // row 2 @@ -657,6 +652,23 @@ fn nullability_chunk() -> Chunk { string_column_builder.values().append_value("five"); string_column_builder.append(true); + // row 6 + struct_column_builder + .values() + .field_builder::(0) + .unwrap() + .append_value(6.0); + struct_column_builder + .values() + .field_builder::(1) + .unwrap() + .append_value(6.0); + struct_column_builder.values().append(true); + struct_column_builder.append(true); + + string_column_builder.values().append_null(); + string_column_builder.append(true); + let struct_column = struct_column_builder.finish(); let string_column = string_column_builder.finish(); @@ -666,9 +678,9 @@ fn nullability_chunk() -> Chunk { ] .into_iter(); - let time_column = TimeColumn::new_sequence("tick", [0, 1, 2, 3, 4, 5]); + let time_column = TimeColumn::new_sequence("tick", [0, 1, 2, 3, 4, 5, 6]); - let chunk = Chunk::from_auto_row_ids( + Chunk::from_auto_row_ids( ChunkId::new(), "nullability".into(), [(TimelineName::new("tick"), time_column)] @@ -676,42 +688,135 @@ fn nullability_chunk() -> Chunk { .collect(), components.collect(), ) - .unwrap(); - - chunk + .unwrap() } -const FORMAT_OPTS: RecordBatchFormatOpts = RecordBatchFormatOpts { - transposed: false, - width: Some(240usize), - include_metadata: false, - include_column_metadata: true, - trim_field_names: true, - trim_metadata_keys: true, - trim_metadata_values: true, - redact_non_deterministic: true, -}; +#[cfg(test)] +mod test { + use super::*; + use rerun::external::re_format_arrow::RecordBatchFormatOpts; + + const FORMAT_OPTS: RecordBatchFormatOpts = RecordBatchFormatOpts { + transposed: false, + width: Some(240usize), + include_metadata: true, + include_column_metadata: true, + trim_field_names: true, + trim_metadata_keys: true, + trim_metadata_values: true, + redact_non_deterministic: true, + }; -#[test] -fn test_destructure() { - let chunk = nullability_chunk(); - println!("{chunk}"); - let arrow_msg = nullability_chunk().to_arrow_msg().unwrap(); - let msg = LogMsg::ArrowMsg(StoreId::empty_recording(), arrow_msg); + #[test] + fn test_destructure_cast() { + let chunk = nullability_chunk(); + println!("{chunk}"); + let arrow_msg = nullability_chunk().to_arrow_msg().unwrap(); + let msg = LogMsg::ArrowMsg(StoreId::empty_recording(), arrow_msg); - let pipeline = ComponentBatchPipelineTransform { - transforms: vec![destructure_transform( + let destructure_transform = ComponentBatchTransform::new( "nullability".parse().unwrap(), "structs", - )], - }; + |array, entity_path| { + let struct_array = array.as_any().downcast_ref::().unwrap(); + + let child_a_array = struct_array.column_by_name("a").unwrap(); + let child_a_array = + arrow::compute::cast(child_a_array, &DataType::Float64).unwrap(); + + vec![TransformedColumn::new( + entity_path.join(&EntityPath::parse_forgiving("a")), + Scalars::descriptor_scalars(), + child_a_array, + )] + }, + ); + + let pipeline = ComponentBatchPipelineTransform { + transforms: vec![destructure_transform], + }; + + let mut res = pipeline.apply(msg.clone()); + assert_eq!(res.len(), 1); + + let transformed_batch = res[0].arrow_record_batch_mut().unwrap(); + insta::assert_snapshot!( + "destructure_cast", + re_format_arrow::format_record_batch_opts(transformed_batch, &FORMAT_OPTS,) + ); + } + + #[test] + fn test_destructure() { + let chunk = nullability_chunk(); + println!("{chunk}"); + let arrow_msg = nullability_chunk().to_arrow_msg().unwrap(); + let msg = LogMsg::ArrowMsg(StoreId::empty_recording(), arrow_msg); + + let destructure_transform = ComponentBatchTransform::new( + "nullability".parse().unwrap(), + "structs", + |array, entity_path| { + let struct_array = array.as_any().downcast_ref::().unwrap(); + + let child_b_array = struct_array.column_by_name("b").unwrap(); - let mut res = pipeline.apply(msg); - assert_eq!(res.len(), 2); + vec![TransformedColumn::new( + entity_path.join(&EntityPath::parse_forgiving("b")), + Scalars::descriptor_scalars(), + child_b_array.clone(), + )] + }, + ); + + let pipeline = ComponentBatchPipelineTransform { + transforms: vec![destructure_transform], + }; + + let mut res = pipeline.apply(msg); + assert_eq!(res.len(), 1); + + let transformed_batch = res[0].arrow_record_batch_mut().unwrap(); + insta::assert_snapshot!( + "destructure_only", + re_format_arrow::format_record_batch_opts(transformed_batch, &FORMAT_OPTS,) + ) + } + + #[test] + fn test_inner_count() { + let chunk = nullability_chunk(); + println!("{chunk}"); + let arrow_msg = nullability_chunk().to_arrow_msg().unwrap(); + let msg = LogMsg::ArrowMsg(StoreId::empty_recording(), arrow_msg); + + let destructure_transform = ComponentBatchTransform::new( + "nullability".parse().unwrap(), + "strings", + |array, entity_path| { + let struct_array = array.as_any().downcast_ref::().unwrap(); - let transformed_batch = res[0].arrow_record_batch_mut().unwrap(); - insta::assert_snapshot!(re_format_arrow::format_record_batch_opts( - transformed_batch, - &FORMAT_OPTS, - )) + let child_b_array = struct_array.column_by_name("b").unwrap(); + + vec![TransformedColumn::new( + entity_path.join(&EntityPath::parse_forgiving("b")), + Scalars::descriptor_scalars(), + child_b_array.clone(), + )] + }, + ); + + let pipeline = ComponentBatchPipelineTransform { + transforms: vec![destructure_transform], + }; + + let mut res = pipeline.apply(msg); + assert_eq!(res.len(), 1); + + let transformed_batch = res[0].arrow_record_batch_mut().unwrap(); + insta::assert_snapshot!( + "inner_count", + re_format_arrow::format_record_batch_opts(transformed_batch, &FORMAT_OPTS,) + ) + } } diff --git a/examples/rust/transform/src/snapshots/transform__test__destructure_cast.snap b/examples/rust/transform/src/snapshots/transform__test__destructure_cast.snap new file mode 100644 index 000000000000..63d9c3a91587 --- /dev/null +++ b/examples/rust/transform/src/snapshots/transform__test__destructure_cast.snap @@ -0,0 +1,35 @@ +--- +source: examples/rust/transform/src/main.rs +expression: "re_format_arrow::format_record_batch_opts(transformed_batch, &FORMAT_OPTS,)" +--- +┌───────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /nullability/a │ +│ * heap_size_bytes: [**REDACTED**] │ +│ * id: [**REDACTED**] │ +│ * version: [**REDACTED**] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌───────────────────────────────────────────────┬──────────────────┬────────────────────────────┐ │ +│ │ RowId ┆ tick ┆ Scalars:scalars │ │ +│ │ --- ┆ --- ┆ --- │ │ +│ │ type: FixedSizeBinary[16] ┆ type: i64 ┆ type: List[nullable f64] │ │ +│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: tick ┆ archetype: Scalars │ │ +│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ component: Scalars:scalars │ │ +│ │ is_sorted: true ┆ kind: index ┆ component_type: Scalar │ │ +│ │ kind: control ┆ ┆ kind: data │ │ +│ ╞═══════════════════════════════════════════════╪══════════════════╪════════════════════════════╡ │ +│ │ row_[**REDACTED**] ┆ 0 ┆ [0.0] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 1 ┆ [1.0] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 2 ┆ [] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 3 ┆ null │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 4 ┆ [4.0] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 5 ┆ [null] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 6 ┆ [6.0] │ │ +│ └───────────────────────────────────────────────┴──────────────────┴────────────────────────────┘ │ +└───────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/examples/rust/transform/src/snapshots/transform__test__destructure_only.snap b/examples/rust/transform/src/snapshots/transform__test__destructure_only.snap new file mode 100644 index 000000000000..ea1bce60bf3f --- /dev/null +++ b/examples/rust/transform/src/snapshots/transform__test__destructure_only.snap @@ -0,0 +1,35 @@ +--- +source: examples/rust/transform/src/main.rs +expression: "re_format_arrow::format_record_batch_opts(transformed_batch, &FORMAT_OPTS,)" +--- +┌───────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /nullability/b │ +│ * heap_size_bytes: [**REDACTED**] │ +│ * id: [**REDACTED**] │ +│ * version: [**REDACTED**] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌───────────────────────────────────────────────┬──────────────────┬────────────────────────────┐ │ +│ │ RowId ┆ tick ┆ Scalars:scalars │ │ +│ │ --- ┆ --- ┆ --- │ │ +│ │ type: FixedSizeBinary[16] ┆ type: i64 ┆ type: List[nullable f64] │ │ +│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: tick ┆ archetype: Scalars │ │ +│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ component: Scalars:scalars │ │ +│ │ is_sorted: true ┆ kind: index ┆ component_type: Scalar │ │ +│ │ kind: control ┆ ┆ kind: data │ │ +│ ╞═══════════════════════════════════════════════╪══════════════════╪════════════════════════════╡ │ +│ │ row_[**REDACTED**] ┆ 0 ┆ [0.0] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 1 ┆ [null] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 2 ┆ [] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 3 ┆ null │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 4 ┆ [4.0] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 5 ┆ [null] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 6 ┆ [6.0] │ │ +│ └───────────────────────────────────────────────┴──────────────────┴────────────────────────────┘ │ +└───────────────────────────────────────────────────────────────────────────────────────────────────┘ From 7de07df91295d8a4d8564a08e4056e0159435f8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jochen=20G=C3=B6rtler?= Date: Tue, 30 Sep 2025 15:52:45 +0200 Subject: [PATCH 07/12] More tests and examples --- examples/rust/transform/src/main.rs | 334 ++++++++---------- .../transform__test__inner_count.snap | 35 ++ 2 files changed, 181 insertions(+), 188 deletions(-) create mode 100644 examples/rust/transform/src/snapshots/transform__test__inner_count.snap diff --git a/examples/rust/transform/src/main.rs b/examples/rust/transform/src/main.rs index 2dd5178c0925..a85bc7d65ced 100644 --- a/examples/rust/transform/src/main.rs +++ b/examples/rust/transform/src/main.rs @@ -2,20 +2,18 @@ use std::{collections::HashMap, sync::Arc}; use arrow::{ array::{ - Array, FixedSizeListBuilder, Float32Array, Float32Builder, Float64Array, Float64Builder, - ListArray, ListBuilder, StringArray, StringBuilder, StructArray, StructBuilder, + Array, Float32Array, Float32Builder, Float64Array, Float64Builder, ListArray, ListBuilder, + StringArray, StringBuilder, StructArray, StructBuilder, }, datatypes::{DataType, Field}, + ipc::ListArgs, }; use rerun::{ ComponentDescriptor, ComponentIdentifier, DynamicArchetype, EntityPath, RecordingStream, - Scalars, SeriesLines, SeriesPoints, StoreId, TextDocument, TimeCell, TimeColumn, Timeline, + Scalars, SerializedComponentColumn, SeriesLines, SeriesPoints, TextDocument, TimeCell, + TimeColumn, dataframe::{EntityPathFilter, ResolvedEntityPathFilter, TimelineName}, - external::{ - nohash_hasher::IntMap, - re_format_arrow::{self, RecordBatchFormatOpts}, - re_log, - }, + external::re_log, log::{Chunk, ChunkComponents, ChunkId, LogMsg}, sink::{GrpcSink, PipelineTransform}, }; @@ -87,44 +85,55 @@ impl PipelineTransform for PerChunkPiplineTransform { } } +fn extract_field(list_array: ListArray, column_name: &str) -> ListArray { + let (_, offsets, values, nulls) = list_array.into_parts(); + let struct_array = values.as_any().downcast_ref::().unwrap(); + let column = struct_array.column_by_name(column_name).unwrap(); + ListArray::new( + Arc::new(Field::new_list_field(column.data_type().clone(), true)), + offsets, + column.clone(), + nulls, + ) +} + +fn cast_component_batch(list_array: ListArray, to_inner_type: &DataType) -> ListArray { + let (field, offsets, ref array, nulls) = list_array.into_parts(); + let res = arrow::compute::cast(array, to_inner_type).unwrap(); + ListArray::new( + Arc::new(Field::new_list_field(res.data_type().clone(), true)), + offsets, + res, + nulls, + ) +} + // TODO: This looks like a weird love-child between `SerializedComponentColumn` and `ComponentColumnDescriptor`. struct TransformedColumn { entity_path: EntityPath, - component_descr: ComponentDescriptor, - // TODO: This is currently still expecting the inner column. - component_data: Arc, + column: SerializedComponentColumn, is_static: bool, } impl TransformedColumn { - pub fn new( - entity_path: EntityPath, - component_descr: ComponentDescriptor, - array: Arc, - ) -> Self { + pub fn new(entity_path: EntityPath, column: SerializedComponentColumn) -> Self { Self { entity_path, - component_descr, - component_data: array, + column, is_static: false, } } - pub fn new_static( - entity_path: EntityPath, - component_descr: ComponentDescriptor, - array: Arc, - ) -> Self { + pub fn new_static(entity_path: EntityPath, column: SerializedComponentColumn) -> Self { Self { entity_path, - component_descr, - component_data: array, + column, is_static: true, } } } type ComponentBatchFunc = - Box, &EntityPath) -> Vec + Send + Sync>; + Box Vec + Send + Sync>; pub struct ComponentBatchTransform { /// The entity path to apply the transformation to. @@ -148,7 +157,7 @@ impl ComponentBatchTransform { func: F, ) -> Self where - F: Fn(Arc, &EntityPath) -> Vec + Send + Sync + 'static, + F: Fn(ListArray, &EntityPath) -> Vec + Send + Sync + 'static, { Self { filter: entity_path_filter.resolve_without_substitutions(), @@ -165,38 +174,28 @@ fn apply_to_chunk(transform: &ComponentBatchTransform, chunk: &Chunk) -> Vec anyhow::Result { - let instruction_transform = PerChunkTransform { - filter: "/instructions" - .parse::()? - .resolve_without_substitutions(), // TODO: call the right thing here. - func: Box::new(|chunk: &rerun::log::Chunk| { - let mut components = chunk.components().clone(); - - let maybe_array = components - .get(&ComponentDescriptor { - archetype: Some("com.Example.Instruction".into()), - component: "com.Example.Instruction:text".into(), - component_type: None, - }) - .cloned(); - if let Some(array) = maybe_array { - components.insert(TextDocument::descriptor_text(), array); - } - - let mut new_chunk = chunk.clone().components_removed().with_id(ChunkId::new()); - for (component_descr, array) in components.iter() { - new_chunk - .add_component(component_descr.clone(), array.clone()) - .unwrap(); - } - vec![new_chunk] - }), - }; - - let destructure_transform = PerChunkTransform { - filter: "/nested" - .parse::()? - .resolve_without_substitutions(), // TODO: call the right thing here. - func: Box::new(|chunk: &rerun::log::Chunk| { - let mut components = chunk.components().clone(); - - let maybe_array = components - .get(&ComponentDescriptor { - archetype: Some("com.Example.Nested".into()), - component: "com.Example.Nested:payload".into(), - component_type: None, - }) - .cloned(); - - if let Some(list_struct_array) = maybe_array { - let list_array = list_struct_array - .as_any() - .downcast_ref::() - .unwrap(); - - let struct_array = list_array - .values() - .as_any() - .downcast_ref::() - .unwrap(); - - let child_b_array = struct_array.column_by_name("b").unwrap(); - - let field = Arc::new(Field::new_list_field( - child_b_array.data_type().clone(), - true, - )); - - let new_list_array = ListArray::new( - field, - list_array.offsets().clone(), // Use ListArray's offsets - child_b_array.clone(), // Values from field "b" - list_array.nulls().cloned(), // Preserve null mask - ); - - components.insert(Scalars::descriptor_scalars(), new_list_array); - } - - let mut new_chunk = chunk.clone().components_removed().with_id(ChunkId::new()); - for (component_descr, array) in components.iter() { - new_chunk - .add_component(component_descr.clone(), array.clone()) - .unwrap(); - } - vec![new_chunk] - }), - }; - - Ok(PerChunkPiplineTransform { - transforms: vec![instruction_transform, destructure_transform], - }) -} - fn per_column_pipline() -> anyhow::Result { - // Takes an existing component that has the right backing data and apply a new component descriptor too it. - // TODO: For these simple cases, we could have premade constructors that hide the closure. This could also lead to more efficient Python mappings. let instruction_transform = ComponentBatchTransform::new( "/instructions".parse()?, "com.Example.Instruction:text", |array, entity_path| { vec![TransformedColumn { entity_path: entity_path.clone(), - component_descr: TextDocument::descriptor_text(), - component_data: array, + column: SerializedComponentColumn { + descriptor: TextDocument::descriptor_text(), + list_array: array, + }, is_static: false, }] }, @@ -370,23 +281,25 @@ fn per_column_pipline() -> anyhow::Result { "/nested".parse().unwrap(), "com.Example.Nested:payload", |array, entity_path| { - let struct_array = array.as_any().downcast_ref::().unwrap(); - - let child_a_array = struct_array.column_by_name("a").unwrap(); - let child_a_array = arrow::compute::cast(child_a_array, &DataType::Float64).unwrap(); + let list_array_a = extract_field(array.clone(), "a"); + let list_array_a = cast_component_batch(list_array_a, &DataType::Float64); - let child_b_array = struct_array.column_by_name("b").unwrap(); + let list_array_b = extract_field(array, "b"); vec![ TransformedColumn::new( entity_path.join(&EntityPath::parse_forgiving("a")), - Scalars::descriptor_scalars(), - child_a_array, + SerializedComponentColumn { + descriptor: Scalars::descriptor_scalars(), + list_array: list_array_a, + }, ), TransformedColumn::new( entity_path.join(&EntityPath::parse_forgiving("b")), - Scalars::descriptor_scalars(), - child_b_array.clone(), + SerializedComponentColumn { + descriptor: Scalars::descriptor_scalars(), + list_array: list_array_b, + }, ), ] }, @@ -395,8 +308,9 @@ fn per_column_pipline() -> anyhow::Result { let flag_transform = ComponentBatchTransform::new( "/flag".parse()?, "com.Example.Flag:flag", - |array, entity_path| { - let flag_array = array.as_any().downcast_ref::().unwrap(); + |list_array, entity_path| { + let (_, offsets, values, nulls) = list_array.into_parts(); + let flag_array = values.as_any().downcast_ref::().unwrap(); let scalar_array: Float64Array = flag_array .iter() @@ -404,29 +318,45 @@ fn per_column_pipline() -> anyhow::Result { s.map(|v| match v { "ACTIVE" => 1.0, "INACTIVE" => 2.0, - _ => f64::NAN, - // _ => 0.0, + _ => 0.0, }) }) .collect(); + let list_array = ListArray::new( + Arc::new(Field::new_list_field( + scalar_array.data_type().clone(), + true, + )), + offsets, + Arc::new(scalar_array), + nulls, + ); + + let series_points = SeriesPoints::new() + .with_marker_sizes([5.0]) + .columns_of_unit_batches() + .unwrap() + .next() + .unwrap(); + + let series_lines = SeriesLines::new() + .with_widths([3.0]) + .columns_of_unit_batches() + .unwrap() + .next() + .unwrap(); + vec![ TransformedColumn::new( entity_path.clone(), - Scalars::descriptor_scalars(), - Arc::new(scalar_array), - ), - TransformedColumn::new_static( - entity_path.clone(), - SeriesPoints::descriptor_marker_sizes(), - // TODO: get rid of the 10 here - Arc::new(Float32Array::from(vec![5.0; 10])), - ), - TransformedColumn::new_static( - entity_path.clone(), - SeriesLines::descriptor_widths(), - Arc::new(Float32Array::from(vec![3.0; 10])), + SerializedComponentColumn { + list_array, + descriptor: Scalars::descriptor_scalars(), + }, ), + TransformedColumn::new_static(entity_path.clone(), series_points), + TransformedColumn::new_static(entity_path.clone(), series_lines), ] }, ); @@ -694,7 +624,11 @@ fn nullability_chunk() -> Chunk { #[cfg(test)] mod test { use super::*; - use rerun::external::re_format_arrow::RecordBatchFormatOpts; + use arrow::array::{FixedSizeListBuilder, Int32Builder}; + use rerun::{ + StoreId, + external::re_format_arrow::{self, RecordBatchFormatOpts}, + }; const FORMAT_OPTS: RecordBatchFormatOpts = RecordBatchFormatOpts { transposed: false, @@ -717,17 +651,16 @@ mod test { let destructure_transform = ComponentBatchTransform::new( "nullability".parse().unwrap(), "structs", - |array, entity_path| { - let struct_array = array.as_any().downcast_ref::().unwrap(); - - let child_a_array = struct_array.column_by_name("a").unwrap(); - let child_a_array = - arrow::compute::cast(child_a_array, &DataType::Float64).unwrap(); + |list_array, entity_path| { + let list_array = extract_field(list_array, "a"); + let list_array = cast_component_batch(list_array, &DataType::Float64); vec![TransformedColumn::new( entity_path.join(&EntityPath::parse_forgiving("a")), - Scalars::descriptor_scalars(), - child_a_array, + SerializedComponentColumn { + list_array, + descriptor: Scalars::descriptor_scalars(), + }, )] }, ); @@ -756,15 +689,15 @@ mod test { let destructure_transform = ComponentBatchTransform::new( "nullability".parse().unwrap(), "structs", - |array, entity_path| { - let struct_array = array.as_any().downcast_ref::().unwrap(); - - let child_b_array = struct_array.column_by_name("b").unwrap(); + |list_array, entity_path| { + let list_array = extract_field(list_array, "b"); vec![TransformedColumn::new( entity_path.join(&EntityPath::parse_forgiving("b")), - Scalars::descriptor_scalars(), - child_b_array.clone(), + SerializedComponentColumn { + list_array, + descriptor: Scalars::descriptor_scalars(), + }, )] }, ); @@ -790,24 +723,49 @@ mod test { let arrow_msg = nullability_chunk().to_arrow_msg().unwrap(); let msg = LogMsg::ArrowMsg(StoreId::empty_recording(), arrow_msg); - let destructure_transform = ComponentBatchTransform::new( + let count_transform = ComponentBatchTransform::new( "nullability".parse().unwrap(), "strings", - |array, entity_path| { - let struct_array = array.as_any().downcast_ref::().unwrap(); - - let child_b_array = struct_array.column_by_name("b").unwrap(); + |list_array, entity_path| { + // We keep the original `list_array` around for better comparability. + let original_list_array = list_array.clone(); + let mut builder = ListBuilder::new(Int32Builder::new()); + + for maybe_array in list_array.iter() { + match maybe_array { + None => builder.append_null(), + Some(component_batch_array) => { + builder + .values() + .append_value(component_batch_array.len() as i32); + builder.append(true); + } + } + } - vec![TransformedColumn::new( - entity_path.join(&EntityPath::parse_forgiving("b")), - Scalars::descriptor_scalars(), - child_b_array.clone(), - )] + let list_array = builder.finish(); + + vec![ + TransformedColumn::new( + entity_path.join(&EntityPath::parse_forgiving("b_count")), + SerializedComponentColumn { + list_array, + descriptor: ComponentDescriptor::partial("counts"), + }, + ), + TransformedColumn::new( + entity_path.join(&EntityPath::parse_forgiving("b_count")), + SerializedComponentColumn { + list_array: original_list_array, + descriptor: ComponentDescriptor::partial("original"), + }, + ), + ] }, ); let pipeline = ComponentBatchPipelineTransform { - transforms: vec![destructure_transform], + transforms: vec![count_transform], }; let mut res = pipeline.apply(msg); diff --git a/examples/rust/transform/src/snapshots/transform__test__inner_count.snap b/examples/rust/transform/src/snapshots/transform__test__inner_count.snap new file mode 100644 index 000000000000..1f7a7249cd9c --- /dev/null +++ b/examples/rust/transform/src/snapshots/transform__test__inner_count.snap @@ -0,0 +1,35 @@ +--- +source: examples/rust/transform/src/main.rs +expression: "re_format_arrow::format_record_batch_opts(transformed_batch, &FORMAT_OPTS,)" +--- +┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /nullability/b_count │ +│ * heap_size_bytes: [**REDACTED**] │ +│ * id: [**REDACTED**] │ +│ * version: [**REDACTED**] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌───────────────────────────────────────────────┬──────────────────┬──────────────────────────┬───────────────────────────┐ │ +│ │ RowId ┆ tick ┆ counts ┆ original │ │ +│ │ --- ┆ --- ┆ --- ┆ --- │ │ +│ │ type: FixedSizeBinary[16] ┆ type: i64 ┆ type: List[nullable i32] ┆ type: List[nullable Utf8] │ │ +│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ index_name: tick ┆ component: counts ┆ component: original │ │ +│ │ ARROW:extension:name: TUID ┆ is_sorted: true ┆ kind: data ┆ kind: data │ │ +│ │ is_sorted: true ┆ kind: index ┆ ┆ │ │ +│ │ kind: control ┆ ┆ ┆ │ │ +│ ╞═══════════════════════════════════════════════╪══════════════════╪══════════════════════════╪═══════════════════════════╡ │ +│ │ row_[**REDACTED**] ┆ 0 ┆ [1] ┆ [zero] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 1 ┆ [2] ┆ [one, 1] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 2 ┆ [0] ┆ [] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 3 ┆ [1] ┆ [three] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 4 ┆ null ┆ null │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 5 ┆ [1] ┆ [five] │ │ +│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ +│ │ row_[**REDACTED**] ┆ 6 ┆ [1] ┆ [null] │ │ +│ └───────────────────────────────────────────────┴──────────────────┴──────────────────────────┴───────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ From 39acd87bd8889730933d9f757ac1dc890e5cf33b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jochen=20G=C3=B6rtler?= Date: Tue, 30 Sep 2025 15:58:42 +0200 Subject: [PATCH 08/12] Add `-` formatting option for redacted values --- crates/store/re_chunk/src/chunk.rs | 2 +- crates/store/re_chunk/tests/formatting.rs | 31 +------------- .../formatting__format_chunk_redacted.snap | 42 +++++++++---------- crates/store/re_dataframe/src/query.rs | 3 +- crates/store/re_format_arrow/src/lib.rs | 5 ++- crates/store/re_sorbet/src/chunk_batch.rs | 2 +- crates/store/re_sorbet/src/sorbet_batch.rs | 2 +- crates/utils/re_mcap/src/layers/protobuf.rs | 31 +------------- 8 files changed, 31 insertions(+), 87 deletions(-) diff --git a/crates/store/re_chunk/src/chunk.rs b/crates/store/re_chunk/src/chunk.rs index 8d664059021c..ab7c0da49398 100644 --- a/crates/store/re_chunk/src/chunk.rs +++ b/crates/store/re_chunk/src/chunk.rs @@ -1247,7 +1247,7 @@ impl std::fmt::Display for Chunk { re_log::error_once!("couldn't display Chunk: {err}"); std::fmt::Error })?; - re_format_arrow::format_record_batch_with_width(&batch, f.width()).fmt(f) + re_format_arrow::format_record_batch_with_width(&batch, f.width(), f.sign_minus()).fmt(f) } } diff --git a/crates/store/re_chunk/tests/formatting.rs b/crates/store/re_chunk/tests/formatting.rs index 79bd43c56b0f..04890fcf1205 100644 --- a/crates/store/re_chunk/tests/formatting.rs +++ b/crates/store/re_chunk/tests/formatting.rs @@ -55,40 +55,11 @@ fn format_chunk() -> anyhow::Result<()> { Ok(()) } -/// Wrapper struct to help with `insta` snapshot tests. -struct ChunkRedacted(Chunk); - -impl std::fmt::Display for ChunkRedacted { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let batch = self.0.to_record_batch().map_err(|err| { - re_log::error_once!("couldn't display Chunk: {err}"); - std::fmt::Error - })?; - re_format_arrow::format_record_batch_opts( - &batch, - &re_format_arrow::RecordBatchFormatOpts { - transposed: false, - width: f.width(), - include_metadata: true, - include_column_metadata: true, - trim_field_names: false, - trim_metadata_keys: false, - trim_metadata_values: false, - redact_non_deterministic: true, - }, - ) - .fmt(f) - } -} - #[test] fn format_chunk_redacted() -> anyhow::Result<()> { let chunk = create_chunk()?; - insta::assert_snapshot!( - "format_chunk_redacted", - format!("{:240}", ChunkRedacted(chunk)) - ); + insta::assert_snapshot!("format_chunk_redacted", format!("{:-240}", chunk)); Ok(()) } diff --git a/crates/store/re_chunk/tests/snapshots/formatting__format_chunk_redacted.snap b/crates/store/re_chunk/tests/snapshots/formatting__format_chunk_redacted.snap index 83a0081047a1..309824048fd6 100644 --- a/crates/store/re_chunk/tests/snapshots/formatting__format_chunk_redacted.snap +++ b/crates/store/re_chunk/tests/snapshots/formatting__format_chunk_redacted.snap @@ -1,24 +1,24 @@ --- source: crates/store/re_chunk/tests/formatting.rs -expression: "format!(\"{:240}\", ChunkRedacted(chunk))" +expression: "format!(\"{:-240}\", chunk)" --- -┌──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ METADATA: │ -│ * rerun:entity_path: /this/that │ -│ * rerun:heap_size_bytes: [**REDACTED**] │ -│ * rerun:id: [**REDACTED**] │ -│ * sorbet:version: [**REDACTED**] │ -├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ ┌──────────────────────────────────────────────┬────────────────────────────┬───────────────────────────────┬───────────────────────────────────────┬──────────────────────────────────────────┐ │ -│ │ rerun.controls.RowId ┆ frame_nr ┆ log_time ┆ my_index ┆ example.MyPoints:colors │ │ -│ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ -│ │ type: FixedSizeBinary[16] ┆ type: i64 ┆ type: Timestamp(ns) ┆ type: List[nullable u64] ┆ type: List[nullable u32] │ │ -│ │ ARROW:extension:metadata: ┆ rerun:index_name: frame_nr ┆ rerun:index_name: log_time ┆ rerun:component: my_index ┆ rerun:archetype: example.MyPoints │ │ -│ │ {"namespace":"row"} ┆ rerun:is_sorted: true ┆ rerun:is_sorted: true ┆ rerun:component_type: example.MyIndex ┆ rerun:component: example.MyPoints:colors │ │ -│ │ ARROW:extension:name: rerun.datatypes.TUID ┆ rerun:kind: index ┆ rerun:kind: index ┆ rerun:kind: data ┆ rerun:component_type: example.MyColor │ │ -│ │ rerun:is_sorted: true ┆ ┆ ┆ ┆ rerun:kind: data │ │ -│ │ rerun:kind: control ┆ ┆ ┆ ┆ │ │ -│ ╞══════════════════════════════════════════════╪════════════════════════════╪═══════════════════════════════╪═══════════════════════════════════════╪══════════════════════════════════════════╡ │ -│ │ row_[**REDACTED**] ┆ 1 ┆ 2025-01-10T18:43:42.123456789 ┆ [0, 1, 2] ┆ [0, 1, 2] │ │ -│ └──────────────────────────────────────────────┴────────────────────────────┴───────────────────────────────┴───────────────────────────────────────┴──────────────────────────────────────────┘ │ -└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +┌────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /this/that │ +│ * heap_size_bytes: [**REDACTED**] │ +│ * id: [**REDACTED**] │ +│ * version: [**REDACTED**] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌──────────────────────────────────────────────┬──────────────────────┬───────────────────────────────┬─────────────────────────────────┬────────────────────────────────────┐ │ +│ │ RowId ┆ frame_nr ┆ log_time ┆ my_index ┆ example.MyPoints:colors │ │ +│ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ +│ │ type: FixedSizeBinary[16] ┆ type: i64 ┆ type: Timestamp(ns) ┆ type: List[nullable u64] ┆ type: List[nullable u32] │ │ +│ │ ARROW:extension:metadata: ┆ index_name: frame_nr ┆ index_name: log_time ┆ component: my_index ┆ archetype: example.MyPoints │ │ +│ │ {"namespace":"row"} ┆ is_sorted: true ┆ is_sorted: true ┆ component_type: example.MyIndex ┆ component: example.MyPoints:colors │ │ +│ │ ARROW:extension:name: TUID ┆ kind: index ┆ kind: index ┆ kind: data ┆ component_type: example.MyColor │ │ +│ │ is_sorted: true ┆ ┆ ┆ ┆ kind: data │ │ +│ │ kind: control ┆ ┆ ┆ ┆ │ │ +│ ╞══════════════════════════════════════════════╪══════════════════════╪═══════════════════════════════╪═════════════════════════════════╪════════════════════════════════════╡ │ +│ │ row_[**REDACTED**] ┆ 1 ┆ 2025-01-10T18:43:42.123456789 ┆ [0, 1, 2] ┆ [0, 1, 2] │ │ +│ └──────────────────────────────────────────────┴──────────────────────┴───────────────────────────────┴─────────────────────────────────┴────────────────────────────────────┘ │ +└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ diff --git a/crates/store/re_dataframe/src/query.rs b/crates/store/re_dataframe/src/query.rs index 964ce6ee80f8..8b6af6208331 100644 --- a/crates/store/re_dataframe/src/query.rs +++ b/crates/store/re_dataframe/src/query.rs @@ -1368,7 +1368,8 @@ mod tests { #[inline] fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let width = 200; - re_format_arrow::format_record_batch_with_width(&self.0, Some(width)).fmt(f) + re_format_arrow::format_record_batch_with_width(&self.0, Some(width), f.sign_minus()) + .fmt(f) } } diff --git a/crates/store/re_format_arrow/src/lib.rs b/crates/store/re_format_arrow/src/lib.rs index b3b49b740600..7633cd8f53c8 100644 --- a/crates/store/re_format_arrow/src/lib.rs +++ b/crates/store/re_format_arrow/src/lib.rs @@ -200,7 +200,7 @@ impl Default for RecordBatchFormatOpts { /// Nicely format this record batch in a way that fits the terminal. pub fn format_record_batch(batch: &arrow::array::RecordBatch) -> Table { - format_record_batch_with_width(batch, None) + format_record_batch_with_width(batch, None, false) } /// Nicely format this record batch using the specified options. @@ -223,6 +223,7 @@ pub fn format_record_batch_opts( pub fn format_record_batch_with_width( batch: &arrow::array::RecordBatch, width: Option, + redact_non_deterministic: bool, ) -> Table { format_dataframe_with_metadata( &batch.schema_ref().metadata.clone().into_iter().collect(), // HashMap -> BTreeMap @@ -236,7 +237,7 @@ pub fn format_record_batch_with_width( trim_field_names: true, trim_metadata_keys: true, trim_metadata_values: true, - redact_non_deterministic: false, + redact_non_deterministic, }, ) } diff --git a/crates/store/re_sorbet/src/chunk_batch.rs b/crates/store/re_sorbet/src/chunk_batch.rs index b394485928b9..d653b52debde 100644 --- a/crates/store/re_sorbet/src/chunk_batch.rs +++ b/crates/store/re_sorbet/src/chunk_batch.rs @@ -114,7 +114,7 @@ impl ChunkBatch { impl std::fmt::Display for ChunkBatch { #[inline] fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - re_format_arrow::format_record_batch_with_width(self, f.width()).fmt(f) + re_format_arrow::format_record_batch_with_width(self, f.width(), f.sign_minus()).fmt(f) } } diff --git a/crates/store/re_sorbet/src/sorbet_batch.rs b/crates/store/re_sorbet/src/sorbet_batch.rs index ca0e976b2c92..977cb27d8841 100644 --- a/crates/store/re_sorbet/src/sorbet_batch.rs +++ b/crates/store/re_sorbet/src/sorbet_batch.rs @@ -129,7 +129,7 @@ impl SorbetBatch { impl std::fmt::Display for SorbetBatch { #[inline] fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - re_format_arrow::format_record_batch_with_width(self, f.width()).fmt(f) + re_format_arrow::format_record_batch_with_width(self, f.width(), f.sign_minus()).fmt(f) } } diff --git a/crates/utils/re_mcap/src/layers/protobuf.rs b/crates/utils/re_mcap/src/layers/protobuf.rs index 3749e62f5360..064a54666fdd 100644 --- a/crates/utils/re_mcap/src/layers/protobuf.rs +++ b/crates/utils/re_mcap/src/layers/protobuf.rs @@ -547,32 +547,6 @@ mod test { chunks } - /// Wrapper to help with creating nicely formatted chunks to use with `insta`. - struct ChunkRedacted<'a>(&'a Chunk); - - impl std::fmt::Display for ChunkRedacted<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let batch = self.0.to_record_batch().map_err(|err| { - re_log::error_once!("couldn't display Chunk: {err}"); - std::fmt::Error - })?; - re_format_arrow::format_record_batch_opts( - &batch, - &re_format_arrow::RecordBatchFormatOpts { - transposed: false, - width: f.width(), - include_metadata: true, - include_column_metadata: true, - trim_field_names: true, - trim_metadata_keys: true, - trim_metadata_values: true, - redact_non_deterministic: true, - }, - ) - .fmt(f) - } - } - #[test] fn two_simple_rows() { // Writing to the MCAP buffer. @@ -615,9 +589,6 @@ mod test { let chunks = run_layer(&summary, buffer.as_slice()); assert_eq!(chunks.len(), 1); - insta::assert_snapshot!( - "two_simple_rows", - format!("{:240}", ChunkRedacted(&chunks[0])) - ); + insta::assert_snapshot!("two_simple_rows", format!("{:-240}", &chunks[0])); } } From f5bcb12a1d3dcb408769b7f9aa63320c3a5e7b29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jochen=20G=C3=B6rtler?= Date: Wed, 1 Oct 2025 09:46:12 +0200 Subject: [PATCH 09/12] cleanup --- Cargo.lock | 22 +- crates/top/re_sdk/src/lenses.rs | 541 ++++++++++++ crates/top/re_sdk/src/lib.rs | 9 +- crates/top/re_sdk/src/log_sink.rs | 51 -- ...e_sdk__lenses__test__destructure_cast.snap | 4 +- ...e_sdk__lenses__test__destructure_only.snap | 4 +- .../re_sdk__lenses__test__inner_count.snap | 4 +- .../rust/{transform => lenses}/Cargo.toml | 2 +- examples/rust/{transform => lenses}/README.md | 4 +- examples/rust/lenses/src/main.rs | 192 +++++ examples/rust/transform/src/main.rs | 780 ------------------ 11 files changed, 762 insertions(+), 851 deletions(-) create mode 100644 crates/top/re_sdk/src/lenses.rs rename examples/rust/transform/src/snapshots/transform__test__destructure_cast.snap => crates/top/re_sdk/src/snapshots/re_sdk__lenses__test__destructure_cast.snap (97%) rename examples/rust/transform/src/snapshots/transform__test__destructure_only.snap => crates/top/re_sdk/src/snapshots/re_sdk__lenses__test__destructure_only.snap (97%) rename examples/rust/transform/src/snapshots/transform__test__inner_count.snap => crates/top/re_sdk/src/snapshots/re_sdk__lenses__test__inner_count.snap (98%) rename examples/rust/{transform => lenses}/Cargo.toml (95%) rename examples/rust/{transform => lenses}/README.md (82%) create mode 100644 examples/rust/lenses/src/main.rs delete mode 100644 examples/rust/transform/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index fa9f95aec133..e371ae4b1eb1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4959,6 +4959,17 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67" +[[package]] +name = "lenses" +version = "0.26.0-alpha.1+dev" +dependencies = [ + "anyhow", + "arrow", + "clap", + "insta", + "rerun", +] + [[package]] name = "lexical-core" version = "1.0.5" @@ -11243,17 +11254,6 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "transform" -version = "0.26.0-alpha.1+dev" -dependencies = [ - "anyhow", - "arrow", - "clap", - "insta", - "rerun", -] - [[package]] name = "try-lock" version = "0.2.5" diff --git a/crates/top/re_sdk/src/lenses.rs b/crates/top/re_sdk/src/lenses.rs new file mode 100644 index 000000000000..8147803485d0 --- /dev/null +++ b/crates/top/re_sdk/src/lenses.rs @@ -0,0 +1,541 @@ +use re_chunk::{ + Chunk, ChunkComponents, ChunkId, ComponentIdentifier, EntityPath, + external::arrow::array::ListArray, +}; +use re_log_types::{EntityPathFilter, LogMsg, ResolvedEntityPathFilter}; +use re_types::SerializedComponentColumn; + +use crate::sink::LogSink; + +/// A sink which can transform a `LogMsg` and forward the result to an underlying backing `LogSink`. +/// +/// The sink will only forward components that are matched by a lens specified via [`Self::with_lens`]. +pub struct LensesSink { + sink: S, + registry: LensRegistry, +} + +impl LensesSink { + /// Create a new sink with the given lenses. + pub fn new(sink: S) -> Self { + Self { + sink, + registry: Default::default(), + } + } + + /// Adds a [`Lens`] to this sink. + pub fn with_lens(mut self, lens: Lens) -> Self { + self.registry.lenses.push(lens); + self + } +} + +impl LogSink for LensesSink { + fn send(&self, msg: re_log_types::LogMsg) { + match &msg { + LogMsg::SetStoreInfo(_) | LogMsg::BlueprintActivationCommand(_) => { + self.sink.send(msg); + } + LogMsg::ArrowMsg(store_id, arrow_msg) => match Chunk::from_arrow_msg(arrow_msg) { + Ok(chunk) => { + let new_chunks = self.registry.apply(&chunk); + // TODO(grtlr): Should we use `self.sink.send_all` here? + for new_chunk in new_chunks { + match new_chunk.to_arrow_msg() { + Ok(arrow_msg) => { + self.sink + .send(LogMsg::ArrowMsg(store_id.clone(), arrow_msg)); + } + Err(err) => { + re_log::error_once!( + "failed to create log message from chunk: {err}" + ); + } + } + } + } + + Err(err) => { + re_log::error_once!("Failed to convert arrow message to chunk: {err}"); + self.sink.send(msg); + } + }, + } + } + + fn flush_blocking( + &self, + timeout: std::time::Duration, + ) -> Result<(), crate::sink::SinkFlushError> { + self.sink.flush_blocking(timeout) + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } +} + +/// TODO: Better defintions + pub / private distinctions. +pub struct TransformedColumn { + /// TODO + pub entity_path: EntityPath, + /// TODO + pub column: SerializedComponentColumn, + /// TODO + pub is_static: bool, +} + +impl TransformedColumn { + /// TODO + pub fn new(entity_path: EntityPath, column: SerializedComponentColumn) -> Self { + Self { + entity_path, + column, + is_static: false, + } + } + + /// TODO + pub fn new_static(entity_path: EntityPath, column: SerializedComponentColumn) -> Self { + Self { + entity_path, + column, + is_static: true, + } + } +} + +type LensFunc = Box Vec + Send + Sync>; + +/// TODO +pub struct Lens { + /// The entity path to apply the transformation to. + pub filter: ResolvedEntityPathFilter, + + /// The component that we want to select. + pub component: ComponentIdentifier, + + /// A closure that outputs a list of chunks + pub func: LensFunc, +} + +#[derive(Default)] +struct LensRegistry { + lenses: Vec, +} + +impl LensRegistry { + fn relevant(&self, chunk: &Chunk) -> impl Iterator { + self.lenses + .iter() + .filter(|transform| transform.filter.matches(chunk.entity_path())) + } + + /// TODO: This will drop component columns that are not relevant. + /// + /// Retaining some of the original data could be done via idenity lenses, or via multi sinks. + pub fn apply(&self, chunk: &Chunk) -> Vec { + self.relevant(chunk) + .flat_map(|transform| transform.apply(chunk)) + .collect() + } +} + +impl Lens { + /// TODO + pub fn new( + entity_path_filter: EntityPathFilter, + component: impl Into, + func: F, + ) -> Self + where + F: Fn(ListArray, &EntityPath) -> Vec + Send + Sync + 'static, + { + Self { + filter: entity_path_filter.resolve_without_substitutions(), + component: component.into(), + func: Box::new(func), + } + } + + fn apply(&self, chunk: &Chunk) -> Vec { + let found = chunk + .components() + .iter() + .find(|(descr, _array)| descr.component == self.component); + + // TODO: This means we drop chunks that belong to the same entity but don't have the component. + let Some((_component_descr, list_array)) = found else { + return Default::default(); + }; + + // TODO: + // * unwrap array + // * Guarantee that there is only one component descr + let mut builders = ahash::HashMap::default(); + let results = (self.func)(list_array.clone(), chunk.entity_path()); + for transformed in results { + let components = builders + .entry((transformed.entity_path, transformed.is_static)) + .or_insert_with(ChunkComponents::default); + + if components.contains_component(&transformed.column.descriptor) { + re_log::warn_once!( + "Replacing duplicated component {}", + transformed.column.descriptor.component + ); + } + + components.insert(transformed.column.descriptor, transformed.column.list_array); + } + + builders + .into_iter() + .filter_map(|((entity_path, is_static), components)| { + let timelines = if is_static { + Default::default() + } else { + chunk.timelines().clone() + }; + + // TODO: In case of static, should we use sparse rows instead? + Chunk::from_auto_row_ids(ChunkId::new(), entity_path.clone(), timelines, components) + .inspect_err(|err| { + re_log::error_once!( + "Failed to build chunk at entity path '{entity_path}': {err}" + ); + }) + .ok() + }) + .collect() + } +} + +/// Provides commonly used transformations of Arrow arrays. +/// +/// # Experimental +/// +/// This is an experimental API and may change in future releases. +pub mod op { + + // TODO(grtlr): Make this into proper objects, with APIs similar to Datafusion's UDFs. + + use std::sync::Arc; + + use re_chunk::external::arrow::{ + array::{ListArray, StructArray}, + compute, + datatypes::{DataType, Field}, + }; + + /// TODO + pub fn extract_field(list_array: ListArray, column_name: &str) -> ListArray { + let (_field, offsets, values, nulls) = list_array.into_parts(); + let struct_array = values.as_any().downcast_ref::().unwrap(); + let column = struct_array.column_by_name(column_name).unwrap(); + ListArray::new( + Arc::new(Field::new_list_field(column.data_type().clone(), true)), + offsets, + column.clone(), + nulls, + ) + } + + /// TODO + pub fn cast_component_batch(list_array: ListArray, to_inner_type: &DataType) -> ListArray { + let (_field, offsets, ref array, nulls) = list_array.into_parts(); + let res = compute::cast(array, to_inner_type).unwrap(); + ListArray::new( + Arc::new(Field::new_list_field(res.data_type().clone(), true)), + offsets, + res, + nulls, + ) + } +} +#[cfg(test)] +mod test { + use std::sync::Arc; + + use re_chunk::{ + TimeColumn, TimelineName, + external::arrow::{ + array::{ + Float32Builder, Float64Builder, Int32Builder, ListBuilder, StringBuilder, + StructBuilder, + }, + datatypes::{DataType, Field}, + }, + }; + use re_types::{ComponentDescriptor, archetypes::Scalars}; + + use super::*; + + /// Creates a chunk that contains all sorts of validity, nullability, and empty lists. + // ┌──────────────┬───────────┐ + // │ [{a:0,b:0}] │ ["zero"] │ + // ├──────────────┼───────────┤ + // │[{a:1,b:null}]│["one","1"]│ + // ├──────────────┼───────────┤ + // │ [] │ [] │ + // ├──────────────┼───────────┤ + // │ null │ ["three"] │ + // ├──────────────┼───────────┤ + // │ [{a:4,b:4}] │ null │ + // ├──────────────┼───────────┤ + // │ [null] │ ["five"] │ + // ├──────────────┼───────────┤ + // │ [{a:6,b:6}] │ [null] │ + // └──────────────┴───────────┘ + fn nullability_chunk() -> Chunk { + let mut struct_column_builder = ListBuilder::new(StructBuilder::new( + [ + Arc::new(Field::new("a", DataType::Float32, true)), + Arc::new(Field::new("b", DataType::Float64, true)), + ], + vec![ + Box::new(Float32Builder::new()), + Box::new(Float64Builder::new()), + ], + )); + let mut string_column_builder = ListBuilder::new(StringBuilder::new()); + + // row 0 + struct_column_builder + .values() + .field_builder::(0) + .unwrap() + .append_value(0.0); + struct_column_builder + .values() + .field_builder::(1) + .unwrap() + .append_value(0.0); + struct_column_builder.values().append(true); + struct_column_builder.append(true); + + string_column_builder.values().append_value("zero"); + string_column_builder.append(true); + + // row 1 + struct_column_builder + .values() + .field_builder::(0) + .unwrap() + .append_value(1.0); + struct_column_builder + .values() + .field_builder::(1) + .unwrap() + .append_null(); + struct_column_builder.values().append(true); + struct_column_builder.append(true); + + string_column_builder.values().append_value("one"); + string_column_builder.values().append_value("1"); + string_column_builder.append(true); + + // row 2 + struct_column_builder.append(true); // empty list + + string_column_builder.append(true); // empty list + + // row 3 + struct_column_builder.append(false); // null + + string_column_builder.values().append_value("three"); + string_column_builder.append(true); + + // row 4 + struct_column_builder + .values() + .field_builder::(0) + .unwrap() + .append_value(4.0); + struct_column_builder + .values() + .field_builder::(1) + .unwrap() + .append_value(4.0); + struct_column_builder.values().append(true); + struct_column_builder.append(true); + + string_column_builder.append(false); // null + + // row 5 + struct_column_builder + .values() + .field_builder::(0) + .unwrap() + .append_null(); // placeholder for null struct + struct_column_builder + .values() + .field_builder::(1) + .unwrap() + .append_null(); // placeholder for null struct + struct_column_builder.values().append(false); // null struct element + struct_column_builder.append(true); + + string_column_builder.values().append_value("five"); + string_column_builder.append(true); + + // row 6 + struct_column_builder + .values() + .field_builder::(0) + .unwrap() + .append_value(6.0); + struct_column_builder + .values() + .field_builder::(1) + .unwrap() + .append_value(6.0); + struct_column_builder.values().append(true); + struct_column_builder.append(true); + + string_column_builder.values().append_null(); + string_column_builder.append(true); + + let struct_column = struct_column_builder.finish(); + let string_column = string_column_builder.finish(); + + let components = [ + (ComponentDescriptor::partial("structs"), struct_column), + (ComponentDescriptor::partial("strings"), string_column), + ] + .into_iter(); + + let time_column = TimeColumn::new_sequence("tick", [0, 1, 2, 3, 4, 5, 6]); + + Chunk::from_auto_row_ids( + ChunkId::new(), + "nullability".into(), + std::iter::once((TimelineName::new("tick"), time_column)).collect(), + components.collect(), + ) + .unwrap() + } + + #[test] + fn test_destructure_cast() { + let original_chunk = nullability_chunk(); + println!("{original_chunk}"); + + let destructure = Lens::new( + "nullability".parse().unwrap(), + "structs", + |list_array, entity_path| { + let list_array = op::extract_field(list_array, "a"); + let list_array = op::cast_component_batch(list_array, &DataType::Float64); + + vec![TransformedColumn::new( + entity_path.join(&EntityPath::parse_forgiving("a")), + SerializedComponentColumn { + list_array, + descriptor: Scalars::descriptor_scalars(), + }, + )] + }, + ); + + let pipeline = LensRegistry { + lenses: vec![destructure], + }; + + let res = pipeline.apply(&original_chunk); + assert_eq!(res.len(), 1); + + let chunk = &res[0]; + insta::assert_snapshot!("destructure_cast", format!("{chunk:-240}")); + } + + #[test] + fn test_destructure() { + let original_chunk = nullability_chunk(); + println!("{original_chunk}"); + + let destructure = Lens::new( + "nullability".parse().unwrap(), + "structs", + |list_array, entity_path| { + let list_array = op::extract_field(list_array, "b"); + + vec![TransformedColumn::new( + entity_path.join(&EntityPath::parse_forgiving("b")), + SerializedComponentColumn { + list_array, + descriptor: Scalars::descriptor_scalars(), + }, + )] + }, + ); + + let pipeline = LensRegistry { + lenses: vec![destructure], + }; + + let res = pipeline.apply(&original_chunk); + assert_eq!(res.len(), 1); + + let chunk = &res[0]; + insta::assert_snapshot!("destructure_only", format!("{chunk:-240}")); + } + + #[test] + fn test_inner_count() { + let original_chunk = nullability_chunk(); + println!("{original_chunk}"); + + let count = Lens::new( + "nullability".parse().unwrap(), + "strings", + |list_array, entity_path| { + // We keep the original `list_array` around for better comparability. + let original_list_array = list_array.clone(); + let mut builder = ListBuilder::new(Int32Builder::new()); + + for maybe_array in list_array.iter() { + match maybe_array { + None => builder.append_null(), + Some(component_batch_array) => { + builder + .values() + .append_value(component_batch_array.len() as i32); + builder.append(true); + } + } + } + + let list_array = builder.finish(); + + vec![ + TransformedColumn::new( + entity_path.join(&EntityPath::parse_forgiving("b_count")), + SerializedComponentColumn { + list_array, + descriptor: ComponentDescriptor::partial("counts"), + }, + ), + TransformedColumn::new( + entity_path.join(&EntityPath::parse_forgiving("b_count")), + SerializedComponentColumn { + list_array: original_list_array, + descriptor: ComponentDescriptor::partial("original"), + }, + ), + ] + }, + ); + + let pipeline = LensRegistry { + lenses: vec![count], + }; + + let res = pipeline.apply(&original_chunk); + assert_eq!(res.len(), 1); + + let chunk = &res[0]; + insta::assert_snapshot!("inner_count", format!("{chunk:-240}")); + } +} diff --git a/crates/top/re_sdk/src/lib.rs b/crates/top/re_sdk/src/lib.rs index d403e5941cf1..7dd064978f3d 100644 --- a/crates/top/re_sdk/src/lib.rs +++ b/crates/top/re_sdk/src/lib.rs @@ -85,7 +85,7 @@ pub mod sink { pub use crate::binary_stream_sink::{BinaryStreamSink, BinaryStreamStorage}; pub use crate::log_sink::{ BufferedSink, CallbackSink, IntoMultiSink, LogSink, MemorySink, MemorySinkStorage, - MultiSink, Pipeline, PipelineTransform, SinkFlushError, + MultiSink, SinkFlushError, }; pub use crate::log_sink::{GrpcSink, GrpcSinkConnectionFailure, GrpcSinkConnectionState}; @@ -116,6 +116,13 @@ pub use re_types::{ SerializedComponentColumn, }; +/// Transformation and reinterpretation of components. +/// +/// # Experimental +/// +/// This is an experimental API and may change in future releases. +pub mod lenses; + pub use re_byte_size::SizeBytes; #[cfg(feature = "data_loaders")] diff --git a/crates/top/re_sdk/src/log_sink.rs b/crates/top/re_sdk/src/log_sink.rs index 78299f872eb3..a4b99ae4a4e2 100644 --- a/crates/top/re_sdk/src/log_sink.rs +++ b/crates/top/re_sdk/src/log_sink.rs @@ -593,54 +593,3 @@ impl LogSink for GrpcSink { self } } - -// ---------------------------------------------------------------------------- -// -// TODO: -// * Move to own file. -// * Better names. - -pub trait PipelineTransform: Send + Sync + 'static { - fn apply(&self, msg: LogMsg) -> Vec; - - fn to_sink(self, sink: S) -> Pipeline - where - Self: Sized, - { - Pipeline { - sink, - transform: self, - } - } -} - -/// A sink which can transform a `LogMsg` and forward the result to an underlying backing `LogSink`. -pub struct Pipeline { - sink: S, - transform: T, -} - -impl Pipeline { - /// Create a new `TransformSink` with the given transform function. - #[inline] - pub fn new(sink: S, transform: T) -> Self { - Self { sink, transform } - } -} - -impl LogSink for Pipeline { - fn send(&self, msg: re_log_types::LogMsg) { - self.sink.send_all(self.transform.apply(msg)) - } - - fn flush_blocking( - &self, - timeout: std::time::Duration, - ) -> Result<(), crate::sink::SinkFlushError> { - self.sink.flush_blocking(timeout) - } - - fn as_any(&self) -> &dyn std::any::Any { - self - } -} diff --git a/examples/rust/transform/src/snapshots/transform__test__destructure_cast.snap b/crates/top/re_sdk/src/snapshots/re_sdk__lenses__test__destructure_cast.snap similarity index 97% rename from examples/rust/transform/src/snapshots/transform__test__destructure_cast.snap rename to crates/top/re_sdk/src/snapshots/re_sdk__lenses__test__destructure_cast.snap index 63d9c3a91587..11ada273145a 100644 --- a/examples/rust/transform/src/snapshots/transform__test__destructure_cast.snap +++ b/crates/top/re_sdk/src/snapshots/re_sdk__lenses__test__destructure_cast.snap @@ -1,6 +1,6 @@ --- -source: examples/rust/transform/src/main.rs -expression: "re_format_arrow::format_record_batch_opts(transformed_batch, &FORMAT_OPTS,)" +source: crates/top/re_sdk/src/lenses.rs +expression: "format!(\"{chunk:-240}\")" --- ┌───────────────────────────────────────────────────────────────────────────────────────────────────┐ │ METADATA: │ diff --git a/examples/rust/transform/src/snapshots/transform__test__destructure_only.snap b/crates/top/re_sdk/src/snapshots/re_sdk__lenses__test__destructure_only.snap similarity index 97% rename from examples/rust/transform/src/snapshots/transform__test__destructure_only.snap rename to crates/top/re_sdk/src/snapshots/re_sdk__lenses__test__destructure_only.snap index ea1bce60bf3f..69225f41bd13 100644 --- a/examples/rust/transform/src/snapshots/transform__test__destructure_only.snap +++ b/crates/top/re_sdk/src/snapshots/re_sdk__lenses__test__destructure_only.snap @@ -1,6 +1,6 @@ --- -source: examples/rust/transform/src/main.rs -expression: "re_format_arrow::format_record_batch_opts(transformed_batch, &FORMAT_OPTS,)" +source: crates/top/re_sdk/src/lenses.rs +expression: "format!(\"{chunk:-240}\")" --- ┌───────────────────────────────────────────────────────────────────────────────────────────────────┐ │ METADATA: │ diff --git a/examples/rust/transform/src/snapshots/transform__test__inner_count.snap b/crates/top/re_sdk/src/snapshots/re_sdk__lenses__test__inner_count.snap similarity index 98% rename from examples/rust/transform/src/snapshots/transform__test__inner_count.snap rename to crates/top/re_sdk/src/snapshots/re_sdk__lenses__test__inner_count.snap index 1f7a7249cd9c..3e9203ab9d43 100644 --- a/examples/rust/transform/src/snapshots/transform__test__inner_count.snap +++ b/crates/top/re_sdk/src/snapshots/re_sdk__lenses__test__inner_count.snap @@ -1,6 +1,6 @@ --- -source: examples/rust/transform/src/main.rs -expression: "re_format_arrow::format_record_batch_opts(transformed_batch, &FORMAT_OPTS,)" +source: crates/top/re_sdk/src/lenses.rs +expression: "format!(\"{chunk:-240}\")" --- ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ METADATA: │ diff --git a/examples/rust/transform/Cargo.toml b/examples/rust/lenses/Cargo.toml similarity index 95% rename from examples/rust/transform/Cargo.toml rename to examples/rust/lenses/Cargo.toml index 615d9ab92115..8a6772e99816 100644 --- a/examples/rust/transform/Cargo.toml +++ b/examples/rust/lenses/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "transform" +name = "lenses" version = "0.26.0-alpha.1+dev" edition = "2024" rust-version = "1.88" diff --git a/examples/rust/transform/README.md b/examples/rust/lenses/README.md similarity index 82% rename from examples/rust/transform/README.md rename to examples/rust/lenses/README.md index cfcb6b7bb813..efeb98aa3bf4 100644 --- a/examples/rust/transform/README.md +++ b/examples/rust/lenses/README.md @@ -2,7 +2,9 @@ title = "Transform recording stream example" --> + + Demonstrates how to transform log messages before forwarding them to the sink the SDK. ```bash -cargo run -p transform +cargo run -p lenses ``` diff --git a/examples/rust/lenses/src/main.rs b/examples/rust/lenses/src/main.rs new file mode 100644 index 000000000000..ccc99d004795 --- /dev/null +++ b/examples/rust/lenses/src/main.rs @@ -0,0 +1,192 @@ +use std::sync::Arc; + +use arrow::{ + array::{Array, Float32Array, Float64Array, ListArray, StringArray, StructArray}, + datatypes::{DataType, Field}, +}; +use rerun::{ + DynamicArchetype, EntityPath, RecordingStream, Scalars, SerializedComponentColumn, SeriesLines, + SeriesPoints, TextDocument, TimeCell, + external::re_log, + lenses::{Lens, LensesSink, TransformedColumn, op}, + sink::GrpcSink, +}; + +fn lens_instruction() -> anyhow::Result { + Ok(Lens::new( + "/instructions".parse()?, + "com.Example.Instruction:text", + |array, entity_path| { + vec![TransformedColumn { + entity_path: entity_path.clone(), + column: SerializedComponentColumn { + descriptor: TextDocument::descriptor_text(), + list_array: array, + }, + is_static: false, + }] + }, + )) +} + +fn lens_destructure() -> anyhow::Result { + Ok(Lens::new( + "/nested".parse().unwrap(), + "com.Example.Nested:payload", + |array, entity_path| { + let list_array_a = op::extract_field(array.clone(), "a"); + let list_array_a = op::cast_component_batch(list_array_a, &DataType::Float64); + + let list_array_b = op::extract_field(array, "b"); + + vec![ + TransformedColumn::new( + entity_path.join(&EntityPath::parse_forgiving("a")), + SerializedComponentColumn { + descriptor: Scalars::descriptor_scalars(), + list_array: list_array_a, + }, + ), + TransformedColumn::new( + entity_path.join(&EntityPath::parse_forgiving("b")), + SerializedComponentColumn { + descriptor: Scalars::descriptor_scalars(), + list_array: list_array_b, + }, + ), + ] + }, + )) +} + +fn lens_flag() -> anyhow::Result { + Ok(Lens::new( + "/flag".parse()?, + "com.Example.Flag:flag", + |list_array, entity_path| { + let (_, offsets, values, nulls) = list_array.into_parts(); + let flag_array = values.as_any().downcast_ref::().unwrap(); + + let scalar_array: Float64Array = flag_array + .iter() + .map(|s| { + s.map(|v| match v { + "ACTIVE" => 1.0, + "INACTIVE" => 2.0, + _ => 0.0, + }) + }) + .collect(); + + let list_array = ListArray::new( + Arc::new(Field::new_list_field( + scalar_array.data_type().clone(), + true, + )), + offsets, + Arc::new(scalar_array), + nulls, + ); + + let series_points = SeriesPoints::new() + .with_marker_sizes([5.0]) + .columns_of_unit_batches() + .unwrap() + .next() + .unwrap(); + + let series_lines = SeriesLines::new() + .with_widths([3.0]) + .columns_of_unit_batches() + .unwrap() + .next() + .unwrap(); + + vec![ + TransformedColumn::new( + entity_path.clone(), + SerializedComponentColumn { + list_array, + descriptor: Scalars::descriptor_scalars(), + }, + ), + TransformedColumn::new_static(entity_path.clone(), series_points), + TransformedColumn::new_static(entity_path.clone(), series_lines), + ] + }, + )) +} + +fn main() -> anyhow::Result<()> { + re_log::setup_logging(); + + let lenses_sink = LensesSink::new(GrpcSink::default()) + .with_lens(lens_instruction()?) + .with_lens(lens_destructure()?) + .with_lens(lens_flag()?); + + let rec = rerun::RecordingStreamBuilder::new("rerun_example_lenses").spawn()?; + rec.set_sink(Box::new(lenses_sink)); + + log_instructions(&rec)?; + log_structs_with_scalars(&rec)?; + log_flag(&rec)?; + + Ok(()) +} + +fn log_flag(rec: &RecordingStream) -> anyhow::Result<()> { + let flags = ["ACTIVE", "ACTIVE", "INACTIVE", "UNKNOWN"]; + for x in 0..10i64 { + let flag = StringArray::from(vec![flags[x as usize % flags.len()]]); + rec.set_time("tick", TimeCell::from_sequence(x)); + rec.log( + "flag", + &DynamicArchetype::new("com.Example.Flag") + .with_component_from_data("flag", Arc::new(flag)), + )? + } + + Ok(()) +} + +fn log_instructions(rec: &RecordingStream) -> anyhow::Result<()> { + rec.set_time("tick", TimeCell::from_sequence(1)); + rec.log( + "instructions", + &DynamicArchetype::new("com.Example.Instruction").with_component_from_data( + "text", + Arc::new(arrow::array::StringArray::from(vec![ + "This is a nice instruction text.", + ])), + ), + )?; + + Ok(()) +} + +fn log_structs_with_scalars(rec: &RecordingStream) -> anyhow::Result<()> { + for x in 0..10i64 { + let a = Float32Array::from(vec![1.0 * x as f32, 2.0 + x as f32, 3.0 + x as f32]); + let b = Float64Array::from(vec![5.0 * x as f64, 6.0 + x as f64, 7.0 + x as f64]); + + let struct_array = StructArray::from(vec![ + ( + Arc::new(Field::new("a", DataType::Float32, false)), + Arc::new(a) as Arc, + ), + ( + Arc::new(Field::new("b", DataType::Float64, false)), + Arc::new(b) as Arc, + ), + ]); + rec.set_time("tick", TimeCell::from_sequence(x)); + rec.log( + "nested", + &DynamicArchetype::new("com.Example.Nested") + .with_component_from_data("payload", Arc::new(struct_array)), + )? + } + + Ok(()) +} diff --git a/examples/rust/transform/src/main.rs b/examples/rust/transform/src/main.rs deleted file mode 100644 index a85bc7d65ced..000000000000 --- a/examples/rust/transform/src/main.rs +++ /dev/null @@ -1,780 +0,0 @@ -use std::{collections::HashMap, sync::Arc}; - -use arrow::{ - array::{ - Array, Float32Array, Float32Builder, Float64Array, Float64Builder, ListArray, ListBuilder, - StringArray, StringBuilder, StructArray, StructBuilder, - }, - datatypes::{DataType, Field}, - ipc::ListArgs, -}; -use rerun::{ - ComponentDescriptor, ComponentIdentifier, DynamicArchetype, EntityPath, RecordingStream, - Scalars, SerializedComponentColumn, SeriesLines, SeriesPoints, TextDocument, TimeCell, - TimeColumn, - dataframe::{EntityPathFilter, ResolvedEntityPathFilter, TimelineName}, - external::re_log, - log::{Chunk, ChunkComponents, ChunkId, LogMsg}, - sink::{GrpcSink, PipelineTransform}, -}; - -#[derive(Debug, clap::Parser)] -#[clap(author, version, about)] -struct Args { - #[command(flatten)] - rerun: rerun::clap::RerunArgs, - - /// The filepaths to be loaded and logged. - filepaths: Vec, -} - -// TODO: Is this the right API. -type ChunkFunc = Box Vec + Send + Sync>; - -pub struct PerChunkTransform { - /// The entity path to apply the transformation to. - pub filter: ResolvedEntityPathFilter, - - /// A closure that outputs a list of chunks - pub func: ChunkFunc, -} - -pub struct PerChunkPiplineTransform { - transforms: Vec, -} - -impl PipelineTransform for PerChunkPiplineTransform { - fn apply(&self, msg: LogMsg) -> Vec { - match &msg { - LogMsg::SetStoreInfo(_) | LogMsg::BlueprintActivationCommand(_) => { - vec![msg] - } - LogMsg::ArrowMsg(store_id, arrow_msg) => match Chunk::from_arrow_msg(arrow_msg) { - Ok(chunk) => { - let mut relevant = self - .transforms - .iter() - .filter(|transform| transform.filter.matches(chunk.entity_path())) - .peekable(); - if relevant.peek().is_some() { - relevant - .flat_map(|transform| (*transform.func)(&chunk)) - .filter_map(|transformed| match transformed.to_arrow_msg() { - Ok(arrow_msg) => { - Some(LogMsg::ArrowMsg(store_id.clone(), arrow_msg)) - } - Err(err) => { - re_log::error_once!( - "failed to create log message from chunk: {err}" - ); - None - } - }) - .collect() - } else { - vec![msg] - } - } - - Err(err) => { - re_log::error_once!("Failed to convert arrow message to chunk: {err}"); - vec![msg] - } - }, - } - } -} - -fn extract_field(list_array: ListArray, column_name: &str) -> ListArray { - let (_, offsets, values, nulls) = list_array.into_parts(); - let struct_array = values.as_any().downcast_ref::().unwrap(); - let column = struct_array.column_by_name(column_name).unwrap(); - ListArray::new( - Arc::new(Field::new_list_field(column.data_type().clone(), true)), - offsets, - column.clone(), - nulls, - ) -} - -fn cast_component_batch(list_array: ListArray, to_inner_type: &DataType) -> ListArray { - let (field, offsets, ref array, nulls) = list_array.into_parts(); - let res = arrow::compute::cast(array, to_inner_type).unwrap(); - ListArray::new( - Arc::new(Field::new_list_field(res.data_type().clone(), true)), - offsets, - res, - nulls, - ) -} - -// TODO: This looks like a weird love-child between `SerializedComponentColumn` and `ComponentColumnDescriptor`. -struct TransformedColumn { - entity_path: EntityPath, - column: SerializedComponentColumn, - is_static: bool, -} - -impl TransformedColumn { - pub fn new(entity_path: EntityPath, column: SerializedComponentColumn) -> Self { - Self { - entity_path, - column, - is_static: false, - } - } - pub fn new_static(entity_path: EntityPath, column: SerializedComponentColumn) -> Self { - Self { - entity_path, - column, - is_static: true, - } - } -} - -type ComponentBatchFunc = - Box Vec + Send + Sync>; - -pub struct ComponentBatchTransform { - /// The entity path to apply the transformation to. - pub filter: ResolvedEntityPathFilter, - - /// The component that we want to select. - pub component: ComponentIdentifier, - - /// A closure that outputs a list of chunks - pub func: ComponentBatchFunc, -} - -pub struct ComponentBatchPipelineTransform { - transforms: Vec, -} - -impl ComponentBatchTransform { - pub fn new( - entity_path_filter: EntityPathFilter, - component: impl Into, - func: F, - ) -> Self - where - F: Fn(ListArray, &EntityPath) -> Vec + Send + Sync + 'static, - { - Self { - filter: entity_path_filter.resolve_without_substitutions(), - component: component.into(), - func: Box::new(func), - } - } -} - -fn apply_to_chunk(transform: &ComponentBatchTransform, chunk: &Chunk) -> Vec { - let found = chunk - .components() - .iter() - .find(|(descr, _array)| descr.component == transform.component); - - // TODO: This means we drop chunks that belong to the same entity but don't have the component. - let Some((_component_descr, list_array)) = found else { - return Default::default(); - }; - - // TODO: - // * unwrap array - // * Guarantee that there is only one component descr - let mut builders = HashMap::new(); // TODO: Use ahash - let results = (transform.func)(list_array.clone(), chunk.entity_path()); - for transformed in results { - let components = builders - .entry((transformed.entity_path, transformed.is_static)) - .or_insert_with(ChunkComponents::default); - - if components.contains_component(&transformed.column.descriptor) { - re_log::warn_once!( - "Replacing duplicated component {}", - transformed.column.descriptor.component - ); - } - - components.insert(transformed.column.descriptor, transformed.column.list_array); - } - - builders - .into_iter() - .filter_map(|((entity_path, is_static), components)| { - let timelines = if is_static { - Default::default() - } else { - chunk.timelines().clone() - }; - - // TODO: In case of static, should we use sparse rows instead? - Chunk::from_auto_row_ids(ChunkId::new(), entity_path.clone(), timelines, components) - .inspect_err(|err| { - re_log::error_once!( - "Failed to build chunk at entity path '{entity_path}': {err}" - ) - }) - .ok() - }) - .collect() -} - -impl PipelineTransform for ComponentBatchPipelineTransform { - fn apply(&self, msg: LogMsg) -> Vec { - match &msg { - LogMsg::SetStoreInfo(_) | LogMsg::BlueprintActivationCommand(_) => { - vec![msg] - } - LogMsg::ArrowMsg(store_id, arrow_msg) => match Chunk::from_arrow_msg(arrow_msg) { - Ok(chunk) => { - let mut relevant = self - .transforms - .iter() - .filter(|transform| transform.filter.matches(chunk.entity_path())) - .peekable(); - if relevant.peek().is_some() { - relevant - .flat_map(|transform| apply_to_chunk(transform, &chunk)) - .filter_map(|transformed| match transformed.to_arrow_msg() { - Ok(arrow_msg) => { - Some(LogMsg::ArrowMsg(store_id.clone(), arrow_msg)) - } - Err(err) => { - re_log::error_once!( - "failed to create log message from chunk: {err}" - ); - None - } - }) - .collect() - } else { - vec![msg] - } - } - - Err(err) => { - re_log::error_once!("Failed to convert arrow message to chunk: {err}"); - vec![msg] - } - }, - } - } -} - -fn per_column_pipline() -> anyhow::Result { - let instruction_transform = ComponentBatchTransform::new( - "/instructions".parse()?, - "com.Example.Instruction:text", - |array, entity_path| { - vec![TransformedColumn { - entity_path: entity_path.clone(), - column: SerializedComponentColumn { - descriptor: TextDocument::descriptor_text(), - list_array: array, - }, - is_static: false, - }] - }, - ); - - let destructure_transform = ComponentBatchTransform::new( - "/nested".parse().unwrap(), - "com.Example.Nested:payload", - |array, entity_path| { - let list_array_a = extract_field(array.clone(), "a"); - let list_array_a = cast_component_batch(list_array_a, &DataType::Float64); - - let list_array_b = extract_field(array, "b"); - - vec![ - TransformedColumn::new( - entity_path.join(&EntityPath::parse_forgiving("a")), - SerializedComponentColumn { - descriptor: Scalars::descriptor_scalars(), - list_array: list_array_a, - }, - ), - TransformedColumn::new( - entity_path.join(&EntityPath::parse_forgiving("b")), - SerializedComponentColumn { - descriptor: Scalars::descriptor_scalars(), - list_array: list_array_b, - }, - ), - ] - }, - ); - - let flag_transform = ComponentBatchTransform::new( - "/flag".parse()?, - "com.Example.Flag:flag", - |list_array, entity_path| { - let (_, offsets, values, nulls) = list_array.into_parts(); - let flag_array = values.as_any().downcast_ref::().unwrap(); - - let scalar_array: Float64Array = flag_array - .iter() - .map(|s| { - s.map(|v| match v { - "ACTIVE" => 1.0, - "INACTIVE" => 2.0, - _ => 0.0, - }) - }) - .collect(); - - let list_array = ListArray::new( - Arc::new(Field::new_list_field( - scalar_array.data_type().clone(), - true, - )), - offsets, - Arc::new(scalar_array), - nulls, - ); - - let series_points = SeriesPoints::new() - .with_marker_sizes([5.0]) - .columns_of_unit_batches() - .unwrap() - .next() - .unwrap(); - - let series_lines = SeriesLines::new() - .with_widths([3.0]) - .columns_of_unit_batches() - .unwrap() - .next() - .unwrap(); - - vec![ - TransformedColumn::new( - entity_path.clone(), - SerializedComponentColumn { - list_array, - descriptor: Scalars::descriptor_scalars(), - }, - ), - TransformedColumn::new_static(entity_path.clone(), series_points), - TransformedColumn::new_static(entity_path.clone(), series_lines), - ] - }, - ); - - Ok(ComponentBatchPipelineTransform { - transforms: vec![instruction_transform, destructure_transform, flag_transform], - }) -} - -fn main() -> anyhow::Result<()> { - re_log::setup_logging(); - - use clap::Parser as _; - let args = Args::parse(); - - // let transform = per_chunk_pipeline()?.to_sink(GrpcSink::default()); - let transform = per_column_pipline()?.to_sink(GrpcSink::default()); - - let (rec, _serve_guard) = args.rerun.init("rerun_example_transform")?; - // TODO: There should be a way to do this in one go. - rec.set_sink(Box::new(transform)); - run(&rec, &args)?; - - Ok(()) -} - -fn run(rec: &rerun::RecordingStream, args: &Args) -> anyhow::Result<()> { - let prefix = Some("log_file_example".into()); - - if args.filepaths.is_empty() { - log_instructions(rec)?; - log_structs_with_scalars(rec)?; - log_flag(rec)?; - log_columns_with_nullability(rec)?; - return Ok(()); - } - - for filepath in &args.filepaths { - let filepath = filepath.as_path(); - - // …or using its contents if you already have them loaded for some reason. - if filepath.is_file() { - let contents = std::fs::read(filepath)?; - rec.log_file_from_contents( - filepath, - std::borrow::Cow::Borrowed(&contents), - prefix.clone(), - true, /* static */ - )?; - } - } - - Ok(()) -} - -fn log_flag(rec: &RecordingStream) -> anyhow::Result<()> { - let flags = ["ACTIVE", "ACTIVE", "INACTIVE", "UNKNOWN"]; - for x in 0..10i64 { - let flag = StringArray::from(vec![flags[x as usize % flags.len()]]); - rec.set_time("tick", TimeCell::from_sequence(x)); - rec.log( - "flag", - &DynamicArchetype::new("com.Example.Flag") - .with_component_from_data("flag", Arc::new(flag)), - )? - } - - Ok(()) -} - -fn log_instructions(rec: &RecordingStream) -> anyhow::Result<()> { - rec.set_time("tick", TimeCell::from_sequence(1)); - rec.log( - "instructions", - &DynamicArchetype::new("com.Example.Instruction").with_component_from_data( - "text", - Arc::new(arrow::array::StringArray::from(vec![ - "This is a nice instruction text.", - ])), - ), - )?; - - Ok(()) -} - -fn log_structs_with_scalars(rec: &RecordingStream) -> anyhow::Result<()> { - for x in 0..10i64 { - let a = Float32Array::from(vec![1.0 * x as f32, 2.0 + x as f32, 3.0 + x as f32]); - let b = Float64Array::from(vec![5.0 * x as f64, 6.0 + x as f64, 7.0 + x as f64]); - - let struct_array = StructArray::from(vec![ - ( - Arc::new(Field::new("a", DataType::Float32, false)), - Arc::new(a) as Arc, - ), - ( - Arc::new(Field::new("b", DataType::Float64, false)), - Arc::new(b) as Arc, - ), - ]); - rec.set_time("tick", TimeCell::from_sequence(x)); - rec.log( - "nested", - &DynamicArchetype::new("com.Example.Nested") - .with_component_from_data("payload", Arc::new(struct_array)), - )? - } - - Ok(()) -} - -fn log_columns_with_nullability(rec: &RecordingStream) -> anyhow::Result<()> { - let chunk = nullability_chunk(); - rec.send_chunk(chunk); - Ok(()) -} - -/// Creates a chunk that contains all sorts of validity, nullability, and empty lists. -// ┌──────────────┬───────────┐ -// │ [{a:0,b:0}] │ ["zero"] │ -// ├──────────────┼───────────┤ -// │[{a:1,b:null}]│["one","1"]│ -// ├──────────────┼───────────┤ -// │ [] │ [] │ -// ├──────────────┼───────────┤ -// │ null │ ["three"] │ -// ├──────────────┼───────────┤ -// │ [{a:4,b:4}] │ null │ -// ├──────────────┼───────────┤ -// │ [null] │ ["five"] │ -// ├──────────────┼───────────┤ -// │ [{a:6,b:6}] │ [null] │ -// └──────────────┴───────────┘ -fn nullability_chunk() -> Chunk { - let mut struct_column_builder = ListBuilder::new(StructBuilder::new( - [ - Arc::new(Field::new("a", DataType::Float32, true)), - Arc::new(Field::new("b", DataType::Float64, true)), - ], - vec![ - Box::new(Float32Builder::new()), - Box::new(Float64Builder::new()), - ], - )); - let mut string_column_builder = ListBuilder::new(StringBuilder::new()); - - // row 0 - struct_column_builder - .values() - .field_builder::(0) - .unwrap() - .append_value(0.0); - struct_column_builder - .values() - .field_builder::(1) - .unwrap() - .append_value(0.0); - struct_column_builder.values().append(true); - struct_column_builder.append(true); - - string_column_builder.values().append_value("zero"); - string_column_builder.append(true); - - // row 1 - struct_column_builder - .values() - .field_builder::(0) - .unwrap() - .append_value(1.0); - struct_column_builder - .values() - .field_builder::(1) - .unwrap() - .append_null(); - struct_column_builder.values().append(true); - struct_column_builder.append(true); - - string_column_builder.values().append_value("one"); - string_column_builder.values().append_value("1"); - string_column_builder.append(true); - - // row 2 - struct_column_builder.append(true); // empty list - - string_column_builder.append(true); // empty list - - // row 3 - struct_column_builder.append(false); // null - - string_column_builder.values().append_value("three"); - string_column_builder.append(true); - - // row 4 - struct_column_builder - .values() - .field_builder::(0) - .unwrap() - .append_value(4.0); - struct_column_builder - .values() - .field_builder::(1) - .unwrap() - .append_value(4.0); - struct_column_builder.values().append(true); - struct_column_builder.append(true); - - string_column_builder.append(false); // null - - // row 5 - struct_column_builder - .values() - .field_builder::(0) - .unwrap() - .append_null(); // placeholder for null struct - struct_column_builder - .values() - .field_builder::(1) - .unwrap() - .append_null(); // placeholder for null struct - struct_column_builder.values().append(false); // null struct element - struct_column_builder.append(true); - - string_column_builder.values().append_value("five"); - string_column_builder.append(true); - - // row 6 - struct_column_builder - .values() - .field_builder::(0) - .unwrap() - .append_value(6.0); - struct_column_builder - .values() - .field_builder::(1) - .unwrap() - .append_value(6.0); - struct_column_builder.values().append(true); - struct_column_builder.append(true); - - string_column_builder.values().append_null(); - string_column_builder.append(true); - - let struct_column = struct_column_builder.finish(); - let string_column = string_column_builder.finish(); - - let components = [ - (ComponentDescriptor::partial("structs"), struct_column), - (ComponentDescriptor::partial("strings"), string_column), - ] - .into_iter(); - - let time_column = TimeColumn::new_sequence("tick", [0, 1, 2, 3, 4, 5, 6]); - - Chunk::from_auto_row_ids( - ChunkId::new(), - "nullability".into(), - [(TimelineName::new("tick"), time_column)] - .into_iter() - .collect(), - components.collect(), - ) - .unwrap() -} - -#[cfg(test)] -mod test { - use super::*; - use arrow::array::{FixedSizeListBuilder, Int32Builder}; - use rerun::{ - StoreId, - external::re_format_arrow::{self, RecordBatchFormatOpts}, - }; - - const FORMAT_OPTS: RecordBatchFormatOpts = RecordBatchFormatOpts { - transposed: false, - width: Some(240usize), - include_metadata: true, - include_column_metadata: true, - trim_field_names: true, - trim_metadata_keys: true, - trim_metadata_values: true, - redact_non_deterministic: true, - }; - - #[test] - fn test_destructure_cast() { - let chunk = nullability_chunk(); - println!("{chunk}"); - let arrow_msg = nullability_chunk().to_arrow_msg().unwrap(); - let msg = LogMsg::ArrowMsg(StoreId::empty_recording(), arrow_msg); - - let destructure_transform = ComponentBatchTransform::new( - "nullability".parse().unwrap(), - "structs", - |list_array, entity_path| { - let list_array = extract_field(list_array, "a"); - let list_array = cast_component_batch(list_array, &DataType::Float64); - - vec![TransformedColumn::new( - entity_path.join(&EntityPath::parse_forgiving("a")), - SerializedComponentColumn { - list_array, - descriptor: Scalars::descriptor_scalars(), - }, - )] - }, - ); - - let pipeline = ComponentBatchPipelineTransform { - transforms: vec![destructure_transform], - }; - - let mut res = pipeline.apply(msg.clone()); - assert_eq!(res.len(), 1); - - let transformed_batch = res[0].arrow_record_batch_mut().unwrap(); - insta::assert_snapshot!( - "destructure_cast", - re_format_arrow::format_record_batch_opts(transformed_batch, &FORMAT_OPTS,) - ); - } - - #[test] - fn test_destructure() { - let chunk = nullability_chunk(); - println!("{chunk}"); - let arrow_msg = nullability_chunk().to_arrow_msg().unwrap(); - let msg = LogMsg::ArrowMsg(StoreId::empty_recording(), arrow_msg); - - let destructure_transform = ComponentBatchTransform::new( - "nullability".parse().unwrap(), - "structs", - |list_array, entity_path| { - let list_array = extract_field(list_array, "b"); - - vec![TransformedColumn::new( - entity_path.join(&EntityPath::parse_forgiving("b")), - SerializedComponentColumn { - list_array, - descriptor: Scalars::descriptor_scalars(), - }, - )] - }, - ); - - let pipeline = ComponentBatchPipelineTransform { - transforms: vec![destructure_transform], - }; - - let mut res = pipeline.apply(msg); - assert_eq!(res.len(), 1); - - let transformed_batch = res[0].arrow_record_batch_mut().unwrap(); - insta::assert_snapshot!( - "destructure_only", - re_format_arrow::format_record_batch_opts(transformed_batch, &FORMAT_OPTS,) - ) - } - - #[test] - fn test_inner_count() { - let chunk = nullability_chunk(); - println!("{chunk}"); - let arrow_msg = nullability_chunk().to_arrow_msg().unwrap(); - let msg = LogMsg::ArrowMsg(StoreId::empty_recording(), arrow_msg); - - let count_transform = ComponentBatchTransform::new( - "nullability".parse().unwrap(), - "strings", - |list_array, entity_path| { - // We keep the original `list_array` around for better comparability. - let original_list_array = list_array.clone(); - let mut builder = ListBuilder::new(Int32Builder::new()); - - for maybe_array in list_array.iter() { - match maybe_array { - None => builder.append_null(), - Some(component_batch_array) => { - builder - .values() - .append_value(component_batch_array.len() as i32); - builder.append(true); - } - } - } - - let list_array = builder.finish(); - - vec![ - TransformedColumn::new( - entity_path.join(&EntityPath::parse_forgiving("b_count")), - SerializedComponentColumn { - list_array, - descriptor: ComponentDescriptor::partial("counts"), - }, - ), - TransformedColumn::new( - entity_path.join(&EntityPath::parse_forgiving("b_count")), - SerializedComponentColumn { - list_array: original_list_array, - descriptor: ComponentDescriptor::partial("original"), - }, - ), - ] - }, - ); - - let pipeline = ComponentBatchPipelineTransform { - transforms: vec![count_transform], - }; - - let mut res = pipeline.apply(msg); - assert_eq!(res.len(), 1); - - let transformed_batch = res[0].arrow_record_batch_mut().unwrap(); - insta::assert_snapshot!( - "inner_count", - re_format_arrow::format_record_batch_opts(transformed_batch, &FORMAT_OPTS,) - ) - } -} From c773761cf5a66ec22973d03d1e071f6c65f8e27a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jochen=20G=C3=B6rtler?= Date: Wed, 1 Oct 2025 10:14:41 +0200 Subject: [PATCH 10/12] add docs --- crates/top/re_sdk/src/lenses.rs | 44 ++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/crates/top/re_sdk/src/lenses.rs b/crates/top/re_sdk/src/lenses.rs index 8147803485d0..21815c824a2c 100644 --- a/crates/top/re_sdk/src/lenses.rs +++ b/crates/top/re_sdk/src/lenses.rs @@ -76,18 +76,21 @@ impl LogSink for LensesSink { } } -/// TODO: Better defintions + pub / private distinctions. +/// A transformed column result from applying a lens operation. +/// +/// Contains the output of a lens transformation, including the new entity path, +/// the serialized component data, and whether the data should be treated as static. pub struct TransformedColumn { - /// TODO + /// The entity path where this transformed column should be logged. pub entity_path: EntityPath, - /// TODO + /// The serialized component column containing the transformed data. pub column: SerializedComponentColumn, - /// TODO + /// Whether this column represents static data. pub is_static: bool, } impl TransformedColumn { - /// TODO + /// Creates a new transformed column. pub fn new(entity_path: EntityPath, column: SerializedComponentColumn) -> Self { Self { entity_path, @@ -96,7 +99,7 @@ impl TransformedColumn { } } - /// TODO + /// Creates a new static transformed column. pub fn new_static(entity_path: EntityPath, column: SerializedComponentColumn) -> Self { Self { entity_path, @@ -108,7 +111,11 @@ impl TransformedColumn { type LensFunc = Box Vec + Send + Sync>; -/// TODO +/// A lens that transforms component data from one form to another. +/// +/// Lenses allow you to extract, transform, and restructure component data +/// as it flows through the logging pipeline. They are applied to chunks +/// that match the specified entity path filter and contain the target component. pub struct Lens { /// The entity path to apply the transformation to. pub filter: ResolvedEntityPathFilter, @@ -132,9 +139,11 @@ impl LensRegistry { .filter(|transform| transform.filter.matches(chunk.entity_path())) } - /// TODO: This will drop component columns that are not relevant. + /// Applies all relevant lenses to a chunk and returns the transformed chunks. /// - /// Retaining some of the original data could be done via idenity lenses, or via multi sinks. + /// This will only transform component columns that match registered lenses. + /// Other component columns are dropped. To retain original data, use identity + /// lenses or multi-sink configurations. pub fn apply(&self, chunk: &Chunk) -> Vec { self.relevant(chunk) .flat_map(|transform| transform.apply(chunk)) @@ -143,7 +152,12 @@ impl LensRegistry { } impl Lens { - /// TODO + /// Creates a new lens with the specified filter, component, and transformation function. + /// + /// # Arguments + /// * `entity_path_filter` - Filter to match entity paths this lens should apply to + /// * `component` - The component identifier to transform + /// * `func` - Transformation function that takes a ListArray and EntityPath and returns transformed columns pub fn new( entity_path_filter: EntityPathFilter, component: impl Into, @@ -229,7 +243,10 @@ pub mod op { datatypes::{DataType, Field}, }; - /// TODO + /// Extracts a specific field from a struct component within a ListArray. + /// + /// Takes a ListArray containing StructArrays and extracts the specified field, + /// returning a new ListArray containing only that field's data. pub fn extract_field(list_array: ListArray, column_name: &str) -> ListArray { let (_field, offsets, values, nulls) = list_array.into_parts(); let struct_array = values.as_any().downcast_ref::().unwrap(); @@ -242,7 +259,10 @@ pub mod op { ) } - /// TODO + /// Casts the inner array of a ListArray to a different data type. + /// + /// Performs type casting on the component data within the ListArray, + /// preserving the list structure while changing the inner data type. pub fn cast_component_batch(list_array: ListArray, to_inner_type: &DataType) -> ListArray { let (_field, offsets, ref array, nulls) = list_array.into_parts(); let res = compute::cast(array, to_inner_type).unwrap(); From b19428fd0c433b6d1b435b8821862b2e9b300905 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jochen=20G=C3=B6rtler?= Date: Wed, 1 Oct 2025 10:14:54 +0200 Subject: [PATCH 11/12] remove unwraps --- crates/top/re_sdk/src/lenses.rs | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/crates/top/re_sdk/src/lenses.rs b/crates/top/re_sdk/src/lenses.rs index 21815c824a2c..d7f074b9ce93 100644 --- a/crates/top/re_sdk/src/lenses.rs +++ b/crates/top/re_sdk/src/lenses.rs @@ -247,10 +247,23 @@ pub mod op { /// /// Takes a ListArray containing StructArrays and extracts the specified field, /// returning a new ListArray containing only that field's data. + /// Returns an empty ListArray if the extraction fails. pub fn extract_field(list_array: ListArray, column_name: &str) -> ListArray { - let (_field, offsets, values, nulls) = list_array.into_parts(); - let struct_array = values.as_any().downcast_ref::().unwrap(); - let column = struct_array.column_by_name(column_name).unwrap(); + let (field, offsets, values, nulls) = list_array.into_parts(); + let struct_array = match values.as_any().downcast_ref::() { + Some(array) => array, + None => { + re_log::error_once!("Expected StructArray in ListArray, but found different type"); + return ListArray::new_null(field, offsets.len() - 1); + } + }; + let column = match struct_array.column_by_name(column_name) { + Some(col) => col, + None => { + re_log::error_once!("Field '{}' not found in struct", column_name); + return ListArray::new_null(field, offsets.len() - 1); + } + }; ListArray::new( Arc::new(Field::new_list_field(column.data_type().clone(), true)), offsets, @@ -263,9 +276,16 @@ pub mod op { /// /// Performs type casting on the component data within the ListArray, /// preserving the list structure while changing the inner data type. + /// Returns an empty ListArray if the cast fails. pub fn cast_component_batch(list_array: ListArray, to_inner_type: &DataType) -> ListArray { - let (_field, offsets, ref array, nulls) = list_array.into_parts(); - let res = compute::cast(array, to_inner_type).unwrap(); + let (field, offsets, ref array, nulls) = list_array.into_parts(); + let res = match compute::cast(array, to_inner_type) { + Ok(casted) => casted, + Err(err) => { + re_log::error_once!("Failed to cast array to {:?}: {}", to_inner_type, err); + return ListArray::new_null(field, offsets.len() - 1); + } + }; ListArray::new( Arc::new(Field::new_list_field(res.data_type().clone(), true)), offsets, From b6bd58be5e2af8a98c252c82352a1c471301caad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jochen=20G=C3=B6rtler?= Date: Wed, 1 Oct 2025 10:52:52 +0200 Subject: [PATCH 12/12] add test case for static chunk --- crates/top/re_sdk/src/lenses.rs | 50 +++++++++++++++++++ .../re_sdk__lenses__test__single_static.snap | 23 +++++++++ 2 files changed, 73 insertions(+) create mode 100644 crates/top/re_sdk/src/snapshots/re_sdk__lenses__test__single_static.snap diff --git a/crates/top/re_sdk/src/lenses.rs b/crates/top/re_sdk/src/lenses.rs index d7f074b9ce93..9cac80503f62 100644 --- a/crates/top/re_sdk/src/lenses.rs +++ b/crates/top/re_sdk/src/lenses.rs @@ -578,4 +578,54 @@ mod test { let chunk = &res[0]; insta::assert_snapshot!("inner_count", format!("{chunk:-240}")); } + + #[test] + fn test_static_chunk_creation() { + let original_chunk = nullability_chunk(); + + let static_lens_a = Lens::new( + "nullability".parse().unwrap(), + "strings", + |_, entity_path| { + let mut metadata_builder_a = ListBuilder::new(StringBuilder::new()); + metadata_builder_a + .values() + .append_value("static_metadata_a"); + metadata_builder_a.append(true); + + let mut metadata_builder_b = ListBuilder::new(StringBuilder::new()); + metadata_builder_b + .values() + .append_value("static_metadata_b"); + metadata_builder_b.append(true); + + vec![ + TransformedColumn::new_static( + entity_path.join(&EntityPath::parse_forgiving("static")), + SerializedComponentColumn { + list_array: metadata_builder_a.finish(), + descriptor: ComponentDescriptor::partial("static_metadata_a"), + }, + ), + TransformedColumn::new_static( + entity_path.join(&EntityPath::parse_forgiving("static")), + SerializedComponentColumn { + list_array: metadata_builder_b.finish(), + descriptor: ComponentDescriptor::partial("static_metadata_b"), + }, + ), + ] + }, + ); + + let pipeline = LensRegistry { + lenses: vec![static_lens_a], + }; + + let res = pipeline.apply(&original_chunk); + assert_eq!(res.len(), 1); + + let chunk = &res[0]; + insta::assert_snapshot!("single_static", format!("{chunk:-240}")); + } } diff --git a/crates/top/re_sdk/src/snapshots/re_sdk__lenses__test__single_static.snap b/crates/top/re_sdk/src/snapshots/re_sdk__lenses__test__single_static.snap new file mode 100644 index 000000000000..0cd38c20a176 --- /dev/null +++ b/crates/top/re_sdk/src/snapshots/re_sdk__lenses__test__single_static.snap @@ -0,0 +1,23 @@ +--- +source: crates/top/re_sdk/src/lenses.rs +expression: "format!(\"{chunk:-240}\")" +--- +┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ METADATA: │ +│ * entity_path: /nullability/static │ +│ * heap_size_bytes: [**REDACTED**] │ +│ * id: [**REDACTED**] │ +│ * version: [**REDACTED**] │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ ┌───────────────────────────────────────────────┬──────────────────────────────┬──────────────────────────────┐ │ +│ │ RowId ┆ static_metadata_a ┆ static_metadata_b │ │ +│ │ --- ┆ --- ┆ --- │ │ +│ │ type: FixedSizeBinary[16] ┆ type: List[nullable Utf8] ┆ type: List[nullable Utf8] │ │ +│ │ ARROW:extension:metadata: {"namespace":"row"} ┆ component: static_metadata_a ┆ component: static_metadata_b │ │ +│ │ ARROW:extension:name: TUID ┆ kind: data ┆ kind: data │ │ +│ │ is_sorted: true ┆ ┆ │ │ +│ │ kind: control ┆ ┆ │ │ +│ ╞═══════════════════════════════════════════════╪══════════════════════════════╪══════════════════════════════╡ │ +│ │ row_[**REDACTED**] ┆ [static_metadata_a] ┆ [static_metadata_b] │ │ +│ └───────────────────────────────────────────────┴──────────────────────────────┴──────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘