Skip to content

Commit 789aba4

Browse files
authored
Add re_arrow_combinators::reshape::RowMajorToColumnMajor (#12031)
Required for example to read matrices from variable sized, row-major protobuf lists, e.g. the 3x3 or 3x4 camera matrices that are found in `foxglove.CameraCalibration` schema. In order to do that, we need both a cast to fixed-size-list and a reshaping transformation. --- This is a prerequisite for #12035. Contributes to: https://linear.app/rerun/issue/RR-3045 https://linear.app/rerun/issue/RR-2327
1 parent 0b28573 commit 789aba4

File tree

5 files changed

+221
-4
lines changed

5 files changed

+221
-4
lines changed

crates/store/re_arrow_combinators/src/cast.rs

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
//! Transforms that cast arrays to different types.
22
3+
use std::sync::Arc;
4+
35
use arrow::array::{Array, ArrayRef, ArrowPrimitiveType, PrimitiveArray};
46
use arrow::compute::cast;
7+
use arrow::datatypes::Field;
58

69
use crate::{Error, Transform};
710

@@ -83,3 +86,54 @@ where
8386
.cloned()
8487
}
8588
}
89+
90+
/// Casts a `ListArray` to a `FixedSizeListArray` with the specified value length.
91+
///
92+
/// The source `ListArray` must have lists of exactly that length (or null).
93+
#[derive(Clone)]
94+
pub struct ListToFixedSizeList {
95+
value_length: i32,
96+
}
97+
98+
impl ListToFixedSizeList {
99+
/// Create a new `ListToFixedSizeList` transformation with an expected value length.
100+
pub fn new(value_length: i32) -> Self {
101+
Self { value_length }
102+
}
103+
}
104+
105+
impl Transform for ListToFixedSizeList {
106+
type Source = arrow::array::ListArray;
107+
type Target = arrow::array::FixedSizeListArray;
108+
109+
fn transform(&self, source: &Self::Source) -> Result<Self::Target, Error> {
110+
// Check that each list has exactly the expected length (or is null).
111+
let offsets = source.value_offsets();
112+
let expected_length = self.value_length as usize;
113+
for list_index in 0..source.len() {
114+
if source.is_valid(list_index) {
115+
let start = offsets[list_index] as usize;
116+
let end = offsets[list_index + 1] as usize;
117+
let list_length = end - start;
118+
if list_length != expected_length {
119+
return Err(Error::UnexpectedListValueLength {
120+
expected: expected_length,
121+
actual: list_length,
122+
});
123+
}
124+
}
125+
}
126+
127+
// Build the FixedSizeListArray.
128+
let field = Arc::new(Field::new_list_field(
129+
source.value_type().clone(),
130+
source.is_nullable(),
131+
));
132+
Ok(arrow::array::FixedSizeListArray::try_new(
133+
field,
134+
self.value_length,
135+
source.values().clone(),
136+
source.nulls().cloned(),
137+
)?)
138+
}
139+
}

crates/store/re_arrow_combinators/src/error.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ pub enum Error {
4343
#[error("List contains unexpected value type: expected {expected}, got {actual:?}")]
4444
UnexpectedListValueType { expected: String, actual: DataType },
4545

46+
#[error("Expected list with {expected} elements, got {actual}")]
47+
UnexpectedListValueLength { expected: usize, actual: usize },
48+
4649
#[error("Fixed-size list contains unexpected value type: expected {expected}, got {actual:?}")]
4750
UnexpectedFixedSizeListValueType { expected: String, actual: DataType },
4851

crates/store/re_arrow_combinators/src/reshape.rs

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
33
use std::sync::Arc;
44

5-
use arrow::array::{Array, ArrayRef, FixedSizeListArray, ListArray, StructArray, UInt32Array};
5+
use arrow::array::{
6+
Array, ArrayRef, FixedSizeListArray, ListArray, StructArray, UInt32Array, UInt64Array,
7+
};
68
use arrow::buffer::{NullBuffer, OffsetBuffer};
79
use arrow::datatypes::Field;
810

@@ -383,3 +385,77 @@ impl Transform for Explode {
383385
))
384386
}
385387
}
388+
389+
/// Reorders a `FixedSizeListArray`, where each `FixedSizeList` stores matrix elements
390+
/// in flat row-major order, to `FixedSizeList`s in column-major order.
391+
///
392+
/// The source array is expected to have a value length of `output_rows * output_columns`.
393+
#[derive(Clone, Debug)]
394+
pub struct RowMajorToColumnMajor {
395+
output_rows: usize,
396+
output_columns: usize,
397+
permutation_per_list: Vec<usize>,
398+
}
399+
400+
impl RowMajorToColumnMajor {
401+
/// Create a new row-major to column-major transformation for the desired output shape.
402+
pub fn new(output_rows: usize, output_columns: usize) -> Self {
403+
let mut permutation = Vec::with_capacity(output_rows * output_columns);
404+
for column in 0..output_columns {
405+
for row in 0..output_rows {
406+
let row_major_pos = row * output_columns + column;
407+
permutation.push(row_major_pos);
408+
}
409+
}
410+
Self {
411+
output_rows,
412+
output_columns,
413+
permutation_per_list: permutation,
414+
}
415+
}
416+
}
417+
418+
impl Transform for RowMajorToColumnMajor {
419+
type Source = FixedSizeListArray;
420+
type Target = FixedSizeListArray;
421+
422+
fn transform(&self, source: &Self::Source) -> Result<Self::Target, Error> {
423+
// First, check that the input array has the expected value length.
424+
let expected_list_size = self.output_rows * self.output_columns;
425+
let value_length = source.value_length() as usize;
426+
if value_length != expected_list_size {
427+
return Err(Error::UnexpectedListValueLength {
428+
expected: expected_list_size,
429+
actual: value_length,
430+
});
431+
}
432+
433+
// Create indices for extracting column-major values as row-major, for all input lists.
434+
let total_values = source.values().len();
435+
let indices_to_take: UInt64Array = (0..total_values)
436+
.map(|value_index| {
437+
let list_index = value_index / expected_list_size;
438+
let value_index_within_list = value_index % expected_list_size;
439+
let next_index_to_take = list_index * expected_list_size
440+
+ self.permutation_per_list[value_index_within_list];
441+
next_index_to_take as u64
442+
})
443+
.collect();
444+
445+
// Reorder values into a new FixedSizeListArray.
446+
// We explicitly allow `take` here because we care about nulls.
447+
#[expect(clippy::disallowed_methods)]
448+
let reordered_values = arrow::compute::take(source.values(), &indices_to_take, None)?;
449+
450+
let field = Arc::new(Field::new_list_field(
451+
source.value_type().clone(),
452+
source.is_nullable(),
453+
));
454+
Ok(FixedSizeListArray::new(
455+
field,
456+
source.value_length(),
457+
reordered_values,
458+
source.nulls().cloned(),
459+
))
460+
}
461+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
---
2+
source: crates/utils/re_arrow_combinators/tests/transform.rs
3+
expression: "format!(\"{}\", DisplayRB(result.clone()))"
4+
---
5+
┌──────────────────────────────────────────────────┐
6+
col
7+
---
8+
type: nullable FixedSizeList[nullable i32; 12] │
9+
╞══════════════════════════════════════════════════╡
10+
│ [1, 4, 7, 10, null, 5, 8, 11, 3, 6, null, 12] │
11+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
12+
null
13+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
14+
│ [13, 16, 19, 22, 14, 17, 20, 23, 15, 18, 21, 24] │
15+
└──────────────────────────────────────────────────┘

crates/store/re_arrow_combinators/tests/transform.rs

Lines changed: 72 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,16 @@ use std::sync::Arc;
66

77
use re_arrow_combinators::{
88
Transform as _,
9-
cast::PrimitiveCast,
9+
cast::{ListToFixedSizeList, PrimitiveCast},
1010
map::{MapFixedSizeList, MapList, MapPrimitive, ReplaceNull},
11-
reshape::{Flatten, GetField, StructToFixedList},
11+
reshape::{Flatten, GetField, RowMajorToColumnMajor, StructToFixedList},
1212
};
1313

1414
use arrow::{
15-
array::{Float32Array, Float64Array, Float64Builder, ListArray, ListBuilder, StructBuilder},
15+
array::{
16+
Float32Array, Float64Array, Float64Builder, Int32Builder, ListArray, ListBuilder,
17+
StructBuilder,
18+
},
1619
datatypes::{DataType, Field, Fields},
1720
};
1821

@@ -295,3 +298,69 @@ fn test_flatten_multiple_elements() {
295298
format!("{}", DisplayRB(result.clone()))
296299
);
297300
}
301+
302+
#[test]
303+
fn test_row_major_to_col_major() {
304+
let inner_builder = Int32Builder::new();
305+
let mut outer_builder = ListBuilder::new(inner_builder);
306+
307+
// First list represents a 4x3 matrix in row-major order with some null elements.
308+
// Row 0
309+
outer_builder.values().append_value(1);
310+
outer_builder.values().append_null();
311+
outer_builder.values().append_value(3);
312+
// Row 1
313+
outer_builder.values().append_value(4);
314+
outer_builder.values().append_value(5);
315+
outer_builder.values().append_value(6);
316+
// Row 2
317+
outer_builder.values().append_value(7);
318+
outer_builder.values().append_value(8);
319+
outer_builder.values().append_null();
320+
// Row 3
321+
outer_builder.values().append_value(10);
322+
outer_builder.values().append_value(11);
323+
outer_builder.values().append_value(12);
324+
outer_builder.append(true);
325+
326+
// Second list is invalid / null.
327+
for _ in 0..12 {
328+
// Add dummy values for Arrow's fixed-size requirements.
329+
// See: https://docs.rs/arrow/latest/arrow/array/struct.FixedSizeListArray.html#representation
330+
outer_builder.values().append_value(0);
331+
}
332+
outer_builder.append(false);
333+
334+
// Third list represents a 4x3 matrix in row-major order without null elements.
335+
// Row 0
336+
outer_builder.values().append_value(13);
337+
outer_builder.values().append_value(14);
338+
outer_builder.values().append_value(15);
339+
// Row 1
340+
outer_builder.values().append_value(16);
341+
outer_builder.values().append_value(17);
342+
outer_builder.values().append_value(18);
343+
// Row 2
344+
outer_builder.values().append_value(19);
345+
outer_builder.values().append_value(20);
346+
outer_builder.values().append_value(21);
347+
// Row 3
348+
outer_builder.values().append_value(22);
349+
outer_builder.values().append_value(23);
350+
outer_builder.values().append_value(24);
351+
outer_builder.append(true);
352+
353+
let input_array = outer_builder.finish();
354+
355+
// Cast to `FixedSizeListArray` and convert to column-major order.
356+
let fixed_size_list_array = ListToFixedSizeList::new(12)
357+
.transform(&input_array)
358+
.unwrap();
359+
let result = RowMajorToColumnMajor::new(4, 3)
360+
.transform(&fixed_size_list_array)
361+
.unwrap();
362+
insta::assert_snapshot!(
363+
"row_major_to_col_major",
364+
format!("{}", DisplayRB(result.clone()))
365+
);
366+
}

0 commit comments

Comments
 (0)