Skip to content

Commit 7cf07e1

Browse files
committed
Add re_arrow_combinators::reshape::RowMajorToColumnMajor transformation
Required for example to convert camera matrices read from variable sized, row-major protobuf lists, e.g. the 3x3 or 3x4 camera matrices that are found in `foxglove.CameraCalibration` schema. In order to do that, we need both a cast to fixed-size-list and a reshaping transformation. Contributes to: https://linear.app/rerun/issue/RR-3045 https://linear.app/rerun/issue/RR-2327
1 parent 34400e3 commit 7cf07e1

File tree

5 files changed

+221
-4
lines changed

5 files changed

+221
-4
lines changed

crates/utils/re_arrow_combinators/src/cast.rs

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,3 +83,56 @@ where
8383
.cloned()
8484
}
8585
}
86+
87+
/// Casts a `ListArray` to a `FixedSizeListArray` with the specified value length.
88+
///
89+
/// The source `ListArray` must have lists of exactly that length (or null).
90+
#[derive(Clone)]
91+
pub struct ListToFixedSizeList {
92+
value_length: i32,
93+
}
94+
95+
impl ListToFixedSizeList {
96+
/// Create a new `ListToFixedSizeList` transformation with an expected value length.
97+
pub fn new(value_length: i32) -> Self {
98+
Self { value_length }
99+
}
100+
}
101+
102+
impl Transform for ListToFixedSizeList {
103+
type Source = arrow::array::ListArray;
104+
type Target = arrow::array::FixedSizeListArray;
105+
106+
fn transform(&self, source: &Self::Source) -> Result<Self::Target, Error> {
107+
// Check that each list has exactly the expected length (or is null).
108+
let offsets = source.value_offsets();
109+
let expected_length = self.value_length as usize;
110+
for i in 0..source.len() {
111+
if source.is_valid(i) {
112+
let start = offsets[i] as usize;
113+
let end = offsets[i + 1] as usize;
114+
let length = end - start;
115+
if length != expected_length {
116+
return Err(Error::UnexpectedListValueLength {
117+
expected: expected_length,
118+
actual: length,
119+
});
120+
}
121+
}
122+
}
123+
124+
// We know that `source` is a `ListArray` by it's type. But Arrow won't expose its field directly.
125+
let field = match source.data_type() {
126+
arrow::datatypes::DataType::List(f) => f.clone(),
127+
_ => unreachable!(),
128+
};
129+
130+
// Build the FixedSizeListArray.
131+
Ok(arrow::array::FixedSizeListArray::try_new(
132+
field,
133+
self.value_length,
134+
source.values().clone(),
135+
source.nulls().cloned(),
136+
)?)
137+
}
138+
}

crates/utils/re_arrow_combinators/src/error.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ pub enum Error {
4343
#[error("List contains unexpected value type: expected {expected}, got {actual:?}")]
4444
UnexpectedListValueType { expected: String, actual: DataType },
4545

46+
#[error("Expected list with {expected} elements, got {actual}")]
47+
UnexpectedListValueLength { expected: usize, actual: usize },
48+
4649
#[error("Fixed-size list contains unexpected value type: expected {expected}, got {actual:?}")]
4750
UnexpectedFixedSizeListValueType { expected: String, actual: DataType },
4851

crates/utils/re_arrow_combinators/src/reshape.rs

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
33
use std::sync::Arc;
44

5-
use arrow::array::{Array, ArrayRef, FixedSizeListArray, ListArray, StructArray, UInt32Array};
5+
use arrow::array::{
6+
Array, ArrayRef, FixedSizeListArray, ListArray, StructArray, UInt32Array, UInt64Array,
7+
};
68
use arrow::buffer::{NullBuffer, OffsetBuffer};
79
use arrow::datatypes::Field;
810

@@ -383,3 +385,78 @@ impl Transform for Explode {
383385
))
384386
}
385387
}
388+
389+
/// Reorders a `FixedSizeListArray`, where each `FixedSizeList` stores matrix elements
390+
/// in flat row-major order, to `FixedSizeList`s in column-major order.
391+
///
392+
/// The source array is expected to have a value length of `output_rows * output_columns`.
393+
#[derive(Clone, Debug)]
394+
pub struct RowMajorToColumnMajor {
395+
output_rows: usize,
396+
output_columns: usize,
397+
permutation_per_list: Vec<usize>,
398+
}
399+
400+
impl RowMajorToColumnMajor {
401+
/// Create a new row-major to column-major transformation for the desired output shape.
402+
pub fn new(output_rows: usize, output_columns: usize) -> Self {
403+
let mut permutation = Vec::with_capacity(output_rows * output_columns);
404+
for column in 0..output_columns {
405+
for row in 0..output_rows {
406+
let row_major_pos = row * output_columns + column;
407+
permutation.push(row_major_pos);
408+
}
409+
}
410+
Self {
411+
output_rows,
412+
output_columns,
413+
permutation_per_list: permutation,
414+
}
415+
}
416+
}
417+
418+
impl Transform for RowMajorToColumnMajor {
419+
type Source = FixedSizeListArray;
420+
type Target = FixedSizeListArray;
421+
422+
fn transform(&self, source: &Self::Source) -> Result<Self::Target, Error> {
423+
// First, check that the input array has the expected value length.
424+
let expected_list_size = self.output_rows * self.output_columns;
425+
let value_length = source.value_length() as usize;
426+
if value_length != expected_list_size {
427+
return Err(Error::UnexpectedListValueLength {
428+
expected: expected_list_size,
429+
actual: value_length,
430+
});
431+
}
432+
433+
// Create indices for extracting column-major values as row-major, for all input lists.
434+
let total_values = source.values().len();
435+
let indices_to_take: UInt64Array = (0..total_values)
436+
.map(|value_index| {
437+
let list_index = value_index / expected_list_size;
438+
let value_index_within_list = value_index % expected_list_size;
439+
let next_index_to_take = list_index * expected_list_size
440+
+ self.permutation_per_list[value_index_within_list];
441+
next_index_to_take as u64
442+
})
443+
.collect();
444+
445+
// Reorder values into a new FixedSizeListArray.
446+
// We explicitly allow `take` here because we care about nulls.
447+
#[expect(clippy::disallowed_methods)]
448+
let reordered_values = arrow::compute::take(source.values(), &indices_to_take, None)?;
449+
450+
let field = Arc::new(Field::new(
451+
"item",
452+
source.values().data_type().clone(),
453+
true,
454+
));
455+
Ok(FixedSizeListArray::new(
456+
field,
457+
source.value_length(),
458+
reordered_values,
459+
source.nulls().cloned(),
460+
))
461+
}
462+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
---
2+
source: crates/utils/re_arrow_combinators/tests/transform.rs
3+
expression: "format!(\"{}\", DisplayRB(result.clone()))"
4+
---
5+
┌──────────────────────────────────────────────────┐
6+
col
7+
---
8+
type: nullable FixedSizeList[nullable i32; 12] │
9+
╞══════════════════════════════════════════════════╡
10+
│ [1, 4, 7, 10, null, 5, 8, 11, 3, 6, null, 12] │
11+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
12+
null
13+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
14+
│ [13, 16, 19, 22, 14, 17, 20, 23, 15, 18, 21, 24] │
15+
└──────────────────────────────────────────────────┘

crates/utils/re_arrow_combinators/tests/transform.rs

Lines changed: 72 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,16 @@ use std::sync::Arc;
66

77
use re_arrow_combinators::{
88
Transform as _,
9-
cast::PrimitiveCast,
9+
cast::{ListToFixedSizeList, PrimitiveCast},
1010
map::{MapFixedSizeList, MapList, MapPrimitive, ReplaceNull},
11-
reshape::{Flatten, GetField, StructToFixedList},
11+
reshape::{Flatten, GetField, RowMajorToColumnMajor, StructToFixedList},
1212
};
1313

1414
use arrow::{
15-
array::{Float32Array, Float64Array, Float64Builder, ListArray, ListBuilder, StructBuilder},
15+
array::{
16+
Float32Array, Float64Array, Float64Builder, Int32Builder, ListArray, ListBuilder,
17+
StructBuilder,
18+
},
1619
datatypes::{DataType, Field, Fields},
1720
};
1821

@@ -295,3 +298,69 @@ fn test_flatten_multiple_elements() {
295298
format!("{}", DisplayRB(result.clone()))
296299
);
297300
}
301+
302+
#[test]
303+
fn test_row_major_to_col_major() {
304+
let inner_builder = Int32Builder::new();
305+
let mut outer_builder = ListBuilder::new(inner_builder);
306+
307+
// First list represents a 4x3 matrix in row-major order with some null elements.
308+
// Row 0
309+
outer_builder.values().append_value(1);
310+
outer_builder.values().append_null();
311+
outer_builder.values().append_value(3);
312+
// Row 1
313+
outer_builder.values().append_value(4);
314+
outer_builder.values().append_value(5);
315+
outer_builder.values().append_value(6);
316+
// Row 2
317+
outer_builder.values().append_value(7);
318+
outer_builder.values().append_value(8);
319+
outer_builder.values().append_null();
320+
// Row 3
321+
outer_builder.values().append_value(10);
322+
outer_builder.values().append_value(11);
323+
outer_builder.values().append_value(12);
324+
outer_builder.append(true);
325+
326+
// Second list is invalid / null.
327+
for _ in 0..12 {
328+
// Add dummy values for Arrow's fixed-size requirements.
329+
// See: https://docs.rs/arrow/latest/arrow/array/struct.FixedSizeListArray.html#representation
330+
outer_builder.values().append_value(0);
331+
}
332+
outer_builder.append(false);
333+
334+
// Third list represents a 4x3 matrix in row-major order without null elements.
335+
// Row 0
336+
outer_builder.values().append_value(13);
337+
outer_builder.values().append_value(14);
338+
outer_builder.values().append_value(15);
339+
// Row 1
340+
outer_builder.values().append_value(16);
341+
outer_builder.values().append_value(17);
342+
outer_builder.values().append_value(18);
343+
// Row 2
344+
outer_builder.values().append_value(19);
345+
outer_builder.values().append_value(20);
346+
outer_builder.values().append_value(21);
347+
// Row 3
348+
outer_builder.values().append_value(22);
349+
outer_builder.values().append_value(23);
350+
outer_builder.values().append_value(24);
351+
outer_builder.append(true);
352+
353+
let input_array = outer_builder.finish();
354+
355+
// Cast to `FixedSizeListArray` and convert to column-major order.
356+
let fixed_size_list_array = ListToFixedSizeList::new(12)
357+
.transform(&input_array)
358+
.unwrap();
359+
let result = RowMajorToColumnMajor::new(4, 3)
360+
.transform(&fixed_size_list_array)
361+
.unwrap();
362+
insta::assert_snapshot!(
363+
"row_major_to_col_major",
364+
format!("{}", DisplayRB(result.clone()))
365+
);
366+
}

0 commit comments

Comments
 (0)