@@ -245,7 +245,18 @@ fn append_value(
245
245
let value = enum_descriptor
246
246
. get_value ( * x)
247
247
. ok_or_else ( || ProtobufError :: UnknownEnumNumber ( * x) ) ?;
248
- downcast_err :: < StringBuilder > ( builder, val) ?. append_value ( value. name ( ) ) ;
248
+
249
+ let struct_builder = downcast_err :: < StructBuilder > ( builder, val) ?;
250
+ let field_builders = struct_builder. field_builders_mut ( ) ;
251
+
252
+ // First field is "name" (String)
253
+ downcast_err :: < StringBuilder > ( field_builders[ 0 ] . as_mut ( ) , val) ?
254
+ . append_value ( value. name ( ) ) ;
255
+
256
+ // Second field is "value" (Int32)
257
+ downcast_err :: < Int32Builder > ( field_builders[ 1 ] . as_mut ( ) , val) ?. append_value ( * x) ;
258
+
259
+ struct_builder. append ( true ) ;
249
260
}
250
261
}
251
262
@@ -287,11 +298,18 @@ fn arrow_builder_from_field(descr: &FieldDescriptor) -> Box<dyn ArrayBuilder> {
287
298
Box :: new ( struct_builder_from_message ( & message_descriptor) ) as Box < dyn ArrayBuilder >
288
299
}
289
300
Kind :: Enum ( _) => {
290
- // TODO(grtlr): It would be great to improve our `enum` support. Using `Utf8`
291
- // means a lot of excess memory / storage usage. Ideally we would use something
292
- // like `StringDictionary`, but it's not clear right now how this works with
293
- // `dyn ArrayBuilder` and sharing entries across lists.
294
- Box :: new ( StringBuilder :: new ( ) )
301
+ // Create a struct with "name" (String) and "value" (Int32) fields.
302
+ // We can't use `DictionaryArray` because `concat` does not re-key, and there
303
+ // could be protobuf schema evolution with different enum values across chunks.
304
+ let fields = Fields :: from ( vec ! [
305
+ Field :: new( "name" , DataType :: Utf8 , false ) ,
306
+ Field :: new( "value" , DataType :: Int32 , false ) ,
307
+ ] ) ;
308
+ let field_builders: Vec < Box < dyn ArrayBuilder > > = vec ! [
309
+ Box :: new( StringBuilder :: new( ) ) ,
310
+ Box :: new( Int32Builder :: new( ) ) ,
311
+ ] ;
312
+ Box :: new ( StructBuilder :: new ( fields, field_builders) )
295
313
}
296
314
} ;
297
315
@@ -303,7 +321,20 @@ fn arrow_builder_from_field(descr: &FieldDescriptor) -> Box<dyn ArrayBuilder> {
303
321
}
304
322
305
323
fn arrow_field_from ( descr : & FieldDescriptor ) -> Field {
306
- Field :: new ( descr. name ( ) , datatype_from ( descr) , true )
324
+ let mut field = Field :: new ( descr. name ( ) , datatype_from ( descr) , true ) ;
325
+
326
+ // Add extension metadata for enum types
327
+ if matches ! ( descr. kind( ) , Kind :: Enum ( _) ) {
328
+ field = field. with_metadata (
329
+ std:: iter:: once ( (
330
+ "ARROW:extension:name" . to_owned ( ) ,
331
+ "rerun.datatypes.ProtobufEnum" . to_owned ( ) ,
332
+ ) )
333
+ . collect ( ) ,
334
+ ) ;
335
+ }
336
+
337
+ field
307
338
}
308
339
309
340
fn datatype_from ( descr : & FieldDescriptor ) -> DataType {
@@ -325,8 +356,13 @@ fn datatype_from(descr: &FieldDescriptor) -> DataType {
325
356
DataType :: Struct ( fields)
326
357
}
327
358
Kind :: Enum ( _) => {
328
- // TODO(grtlr): Explanation see above.
329
- DataType :: Utf8
359
+ // Struct with "name" (String) and "value" (Int32) fields.
360
+ // See comment in arrow_builder_from_field for why we use a struct.
361
+ let fields = Fields :: from ( vec ! [
362
+ Field :: new( "name" , DataType :: Utf8 , false ) ,
363
+ Field :: new( "value" , DataType :: Int32 , false ) ,
364
+ ] ) ;
365
+ DataType :: Struct ( fields)
330
366
}
331
367
} ;
332
368
0 commit comments