Skip to content

Commit cb45d5f

Browse files
authored
Represent enums from protobuf messages as StructArray (#11458)
### What In robotics, enums are often use to represent state. Currently the viewer can't really handle categorical values in plots. To make it easier to create step functions, we pass the enum's `number` along with the actual value.
1 parent 8586d47 commit cb45d5f

File tree

2 files changed

+54
-18
lines changed

2 files changed

+54
-18
lines changed

crates/utils/re_mcap/src/layers/protobuf.rs

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,18 @@ fn append_value(
245245
let value = enum_descriptor
246246
.get_value(*x)
247247
.ok_or_else(|| ProtobufError::UnknownEnumNumber(*x))?;
248-
downcast_err::<StringBuilder>(builder, val)?.append_value(value.name());
248+
249+
let struct_builder = downcast_err::<StructBuilder>(builder, val)?;
250+
let field_builders = struct_builder.field_builders_mut();
251+
252+
// First field is "name" (String)
253+
downcast_err::<StringBuilder>(field_builders[0].as_mut(), val)?
254+
.append_value(value.name());
255+
256+
// Second field is "value" (Int32)
257+
downcast_err::<Int32Builder>(field_builders[1].as_mut(), val)?.append_value(*x);
258+
259+
struct_builder.append(true);
249260
}
250261
}
251262

@@ -287,11 +298,18 @@ fn arrow_builder_from_field(descr: &FieldDescriptor) -> Box<dyn ArrayBuilder> {
287298
Box::new(struct_builder_from_message(&message_descriptor)) as Box<dyn ArrayBuilder>
288299
}
289300
Kind::Enum(_) => {
290-
// TODO(grtlr): It would be great to improve our `enum` support. Using `Utf8`
291-
// means a lot of excess memory / storage usage. Ideally we would use something
292-
// like `StringDictionary`, but it's not clear right now how this works with
293-
// `dyn ArrayBuilder` and sharing entries across lists.
294-
Box::new(StringBuilder::new())
301+
// Create a struct with "name" (String) and "value" (Int32) fields.
302+
// We can't use `DictionaryArray` because `concat` does not re-key, and there
303+
// could be protobuf schema evolution with different enum values across chunks.
304+
let fields = Fields::from(vec![
305+
Field::new("name", DataType::Utf8, false),
306+
Field::new("value", DataType::Int32, false),
307+
]);
308+
let field_builders: Vec<Box<dyn ArrayBuilder>> = vec![
309+
Box::new(StringBuilder::new()),
310+
Box::new(Int32Builder::new()),
311+
];
312+
Box::new(StructBuilder::new(fields, field_builders))
295313
}
296314
};
297315

@@ -303,7 +321,20 @@ fn arrow_builder_from_field(descr: &FieldDescriptor) -> Box<dyn ArrayBuilder> {
303321
}
304322

305323
fn arrow_field_from(descr: &FieldDescriptor) -> Field {
306-
Field::new(descr.name(), datatype_from(descr), true)
324+
let mut field = Field::new(descr.name(), datatype_from(descr), true);
325+
326+
// Add extension metadata for enum types
327+
if matches!(descr.kind(), Kind::Enum(_)) {
328+
field = field.with_metadata(
329+
std::iter::once((
330+
"ARROW:extension:name".to_owned(),
331+
"rerun.datatypes.ProtobufEnum".to_owned(),
332+
))
333+
.collect(),
334+
);
335+
}
336+
337+
field
307338
}
308339

309340
fn datatype_from(descr: &FieldDescriptor) -> DataType {
@@ -325,8 +356,13 @@ fn datatype_from(descr: &FieldDescriptor) -> DataType {
325356
DataType::Struct(fields)
326357
}
327358
Kind::Enum(_) => {
328-
// TODO(grtlr): Explanation see above.
329-
DataType::Utf8
359+
// Struct with "name" (String) and "value" (Int32) fields.
360+
// See comment in arrow_builder_from_field for why we use a struct.
361+
let fields = Fields::from(vec![
362+
Field::new("name", DataType::Utf8, false),
363+
Field::new("value", DataType::Int32, false),
364+
]);
365+
DataType::Struct(fields)
330366
}
331367
};
332368

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
---
22
source: crates/utils/re_mcap/src/layers/protobuf.rs
3-
expression: "format!(\"{:240}\", ChunkRedacted(&chunks[0]))"
3+
expression: "format!(\"{:-240}\", &chunks[0])"
44
---
55
┌────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
66
METADATA: │
@@ -12,15 +12,15 @@ expression: "format!(\"{:240}\", ChunkRedacted(&chunks[0]))"
1212
│ ┌─────────────────────────────────────┬─────────────────────────────┬─────────────────────────────┬───────────────────────────────────┬────────────────────────────────────┬─────────────────────────────────────┐ │
1313
│ │ RowIdlog_timepublish_timecom.example.Person:idcom.example.Person:namecom.example.Person:status │ │
1414
│ │ ------------------ │ │
15-
│ │ type: FixedSizeBinary[16] ┆ type: nullable Duration(ns) ┆ type: nullable Duration(ns) ┆ type: nullable List[nullable i32] ┆ type: nullable List[nullable Utf8] ┆ type: nullable List[nullable Utf8] │ │
16-
│ │ ARROW:extension:metadata: ┆ index_name: log_timeindex_name: publish_timearchetype: com.example.Personarchetype: com.example.Personarchetype: com.example.Person │ │
17-
│ │ {"namespace":"row"} ┆ is_sorted: trueis_sorted: truecomponent: com.example.Person:idcomponent: com.example.Person:namecomponent: │ │
18-
│ │ ARROW:extension:name: TUIDkind: indexkind: indexkind: datakind: datacom.example.Person:status │ │
19-
│ │ is_sorted: true ┆ ┆ ┆ ┆ ┆ kind: data │ │
20-
│ │ kind: control ┆ ┆ ┆ ┆ ┆ │ │
15+
│ │ type: FixedSizeBinary[16] ┆ type: nullable Duration(ns) ┆ type: nullable Duration(ns) ┆ type: nullable List[nullable i32] ┆ type: nullable List[nullable Utf8] ┆ type: nullable List[nullable │ │
16+
│ │ ARROW:extension:metadata: ┆ index_name: log_timeindex_name: publish_timearchetype: com.example.Personarchetype: com.example.PersonStruct[2]] │ │
17+
│ │ {"namespace":"row"} ┆ is_sorted: trueis_sorted: truecomponent: com.example.Person:idcomponent: com.example.Person:namearchetype: com.example.Person │ │
18+
│ │ ARROW:extension:name: TUIDkind: indexkind: indexkind: datakind: datacomponent: │ │
19+
│ │ is_sorted: true ┆ ┆ ┆ ┆ ┆ com.example.Person:status │ │
20+
│ │ kind: control ┆ ┆ ┆ ┆ ┆ kind: data │ │
2121
│ ╞═════════════════════════════════════╪═════════════════════════════╪═════════════════════════════╪═══════════════════════════════════╪════════════════════════════════════╪═════════════════════════════════════╡ │
22-
│ │ row_[**REDACTED**] ┆ PT0.000000042SPT0.000000042S ┆ [0] ┆ [Bob] ┆ [INACTIVE] │ │
22+
│ │ row_[**REDACTED**] ┆ PT0.000000042SPT0.000000042S ┆ [0] ┆ [Bob] ┆ [{name: INACTIVE, value: 2}] │ │
2323
│ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │
24-
│ │ row_[**REDACTED**] ┆ PT0.000000043SPT0.000000043S ┆ [123] ┆ [Alice] ┆ [UNKNOWN] │ │
24+
│ │ row_[**REDACTED**] ┆ PT0.000000043SPT0.000000043S ┆ [123] ┆ [Alice] ┆ [{name: UNKNOWN, value: 0}] │ │
2525
│ └─────────────────────────────────────┴─────────────────────────────┴─────────────────────────────┴───────────────────────────────────┴────────────────────────────────────┴─────────────────────────────────────┘ │
2626
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘

0 commit comments

Comments
 (0)