Skip to content

Commit 48502a8

Browse files
feat: Adaptive array builders for struct & time types (open-telemetry#573)
Adds adaptive array builder implementations for `Struct`, `TimestampNano` and `DurationNano`. Some small cleanup items: - changes the `append_value_checked` method on `AdaptiveArrayBuilder` to just be `append_value` by having `AdaptiveArrayBuilder` implement the `CheckedArrayAppend` trait. This makes the method call more similar to how it's called on the underlying arrow builders (e.g. for FixedSizeBinaryBuilder, the method is just called `append_value` https://docs.rs/arrow/latest/arrow/array/struct.FixedSizeBinaryBuilder.html#method.append_value). - removes som unnecessary `'static` lifetime constraints in the `array::test` module. - made some methods on `AdaptiveBooleanBuilder` public which should not have been private
1 parent 6c0cd6a commit 48502a8

File tree

3 files changed

+563
-20
lines changed

3 files changed

+563
-20
lines changed

rust/otel-arrow-rust/src/encode/record/array.rs

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@ use arrow::array::{
2020
StringDictionaryBuilder,
2121
};
2222
use arrow::datatypes::{
23-
ArrowDictionaryKeyType, DataType, Float32Type, Float64Type, Int8Type, Int16Type, Int32Type,
24-
Int64Type, UInt8Type, UInt16Type, UInt32Type, UInt64Type,
23+
ArrowDictionaryKeyType, DataType, DurationNanosecondType, Float32Type, Float64Type, Int8Type,
24+
Int16Type, Int32Type, Int64Type, TimestampNanosecondType, UInt8Type, UInt16Type, UInt32Type,
25+
UInt64Type,
2526
};
2627
use arrow::error::ArrowError;
2728

@@ -40,6 +41,7 @@ pub mod dictionary;
4041
pub mod fixed_size_binary;
4142
pub mod primitive;
4243
pub mod string;
44+
pub mod structs;
4345

4446
/// This is the base trait that array builders should implement to build the array.
4547
///
@@ -116,6 +118,7 @@ pub struct ArrayOptions {
116118

117119
pub struct AdaptiveArrayBuilder<TArgs, TN, TD8, TD16> {
118120
dictionary_options: Option<DictionaryOptions>,
121+
nullable: bool,
119122
inner: Option<MaybeDictionaryBuilder<TN, TD8, TD16>>,
120123

121124
// these are the args that will be used to create the underlying builder. In most cases this
@@ -154,6 +157,7 @@ where
154157

155158
Self {
156159
dictionary_options: options.dictionary_options,
160+
nullable: options.nullable,
157161
inner,
158162
inner_args: args,
159163
}
@@ -232,7 +236,7 @@ where
232236
}
233237
}
234238

235-
impl<T, TArgs, TN, TD8, TD16> AdaptiveArrayBuilder<TArgs, TN, TD8, TD16>
239+
impl<T, TArgs, TN, TD8, TD16> CheckedArrayAppend for AdaptiveArrayBuilder<TArgs, TN, TD8, TD16>
236240
where
237241
TArgs: Clone,
238242
TN: CheckedArrayAppend<Native = T> + ArrayBuilderConstructor<Args = TArgs>,
@@ -248,9 +252,11 @@ where
248252
+ ConvertToNativeHelper,
249253
<TD16 as ConvertToNativeHelper>::Accessor: NullableArrayAccessor<Native = T> + 'static,
250254
{
255+
type Native = T;
256+
251257
/// Try to append a value to the underlying builder. This method may return an error if
252258
/// the value is not valid.
253-
fn append_value_checked(&mut self, value: &T) -> Result<(), ArrowError> {
259+
fn append_value(&mut self, value: &T) -> Result<(), ArrowError> {
254260
self.initialize_inner();
255261
let inner = self
256262
.inner
@@ -269,7 +275,7 @@ where
269275
let mut native = TN::new(self.inner_args.clone());
270276
dictionary_builder.to_native_checked(&mut native)?;
271277
self.inner = Some(MaybeDictionaryBuilder::Native(native));
272-
self.append_value_checked(value)
278+
self.append_value(value)
273279
}
274280
Err(checked::DictionaryBuilderError::CheckedBuilderError {
275281
source: arrow_error,
@@ -333,6 +339,8 @@ pub type Int8ArrayBuilder = PrimitiveArrayBuilder<Int8Type>;
333339
pub type Int16ArrayBuilder = PrimitiveArrayBuilder<Int16Type>;
334340
pub type Int32ArrayBuilder = PrimitiveArrayBuilder<Int32Type>;
335341
pub type Int64ArrayBuilder = PrimitiveArrayBuilder<Int64Type>;
342+
pub type TimestampNanosecondArrayBuilder = PrimitiveArrayBuilder<TimestampNanosecondType>;
343+
pub type DurationNanosecondArrayBuilder = PrimitiveArrayBuilder<DurationNanosecondType>;
336344

337345
#[cfg(test)]
338346
pub mod test {
@@ -341,7 +349,7 @@ pub mod test {
341349
use std::sync::Arc;
342350

343351
use arrow::array::{DictionaryArray, StringArray, UInt8Array, UInt8DictionaryArray};
344-
use arrow::datatypes::DataType;
352+
use arrow::datatypes::{DataType, TimeUnit};
345353

346354
fn test_array_builder_generic<T, TArgs, TN, TD8, TD16>(
347355
array_builder_factory: &impl Fn(ArrayOptions) -> AdaptiveArrayBuilder<TArgs, TN, TD8, TD16>,
@@ -353,11 +361,11 @@ pub mod test {
353361
+ ArrayBuilderConstructor<Args = TArgs>
354362
+ ConvertToNativeHelper
355363
+ UpdateDictionaryIndexInto<TD16>,
356-
<TD8 as ConvertToNativeHelper>::Accessor: NullableArrayAccessor<Native = T> + 'static,
364+
<TD8 as ConvertToNativeHelper>::Accessor: NullableArrayAccessor<Native = T>,
357365
TD16: DictionaryBuilder<UInt16Type>
358366
+ ArrayBuilderConstructor<Args = TArgs>
359367
+ ConvertToNativeHelper,
360-
<TD16 as ConvertToNativeHelper>::Accessor: NullableArrayAccessor<Native = T> + 'static,
368+
<TD16 as ConvertToNativeHelper>::Accessor: NullableArrayAccessor<Native = T>,
361369
{
362370
// tests some common behaviours of checked & unchecked array builders:
363371

@@ -512,6 +520,16 @@ pub mod test {
512520
vec![b"a".to_vec(), b"b".to_vec()],
513521
DataType::Binary,
514522
);
523+
test_array_append_generic(
524+
TimestampNanosecondArrayBuilder::new,
525+
vec![0, 1],
526+
DataType::Timestamp(TimeUnit::Nanosecond, None),
527+
);
528+
test_array_append_generic(
529+
DurationNanosecondArrayBuilder::new,
530+
vec![0, 1],
531+
DataType::Duration(TimeUnit::Nanosecond),
532+
);
515533
}
516534

517535
fn test_checked_array_builder_generic<T, TArgs, TN, TD8, TD16>(
@@ -545,9 +563,9 @@ pub mod test {
545563
}),
546564
});
547565

548-
assert!(builder.append_value_checked(&values[0]).is_ok());
549-
assert!(builder.append_value_checked(&values[0]).is_ok());
550-
assert!(builder.append_value_checked(&values[1]).is_ok());
566+
assert!(builder.append_value(&values[0]).is_ok());
567+
assert!(builder.append_value(&values[0]).is_ok());
568+
assert!(builder.append_value(&values[1]).is_ok());
551569

552570
let result = builder.finish().unwrap();
553571
assert_eq!(
@@ -578,8 +596,8 @@ pub mod test {
578596
dictionary_options: None,
579597
nullable: false,
580598
});
581-
assert!(builder.append_value_checked(&values[0]).is_ok());
582-
assert!(builder.append_value_checked(&values[1]).is_ok());
599+
assert!(builder.append_value(&values[0]).is_ok());
600+
assert!(builder.append_value(&values[1]).is_ok());
583601
let result = builder.finish().unwrap();
584602
assert_eq!(result.len(), 2);
585603
let array = result
@@ -597,8 +615,8 @@ pub mod test {
597615
}),
598616
nullable: false,
599617
});
600-
assert!(builder.append_value_checked(&values[0]).is_ok());
601-
assert!(builder.append_value_checked(&values[1]).is_ok());
618+
assert!(builder.append_value(&values[0]).is_ok());
619+
assert!(builder.append_value(&values[1]).is_ok());
602620
let result = builder.finish().unwrap();
603621
assert_eq!(result.len(), 2);
604622
let array = result
@@ -616,7 +634,7 @@ pub mod test {
616634
}),
617635
nullable: false,
618636
});
619-
let result = builder.append_value_checked(&invalid_values[0]);
637+
let result = builder.append_value(&invalid_values[0]);
620638
let err = result.unwrap_err();
621639
assert!(matches!(err, ArrowError::InvalidArgumentError(_)))
622640
}

rust/otel-arrow-rust/src/encode/record/array/boolean.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,12 @@ use super::{ArrayBuilder, ArrayBuilderConstructor};
1717
/// This is implemented a bit differently than for other types because `Boolean` is the one datatype
1818
/// where it would never really make sense to have it in a dictionary.
1919
pub struct AdaptiveBooleanArrayBuilder {
20+
pub nullable: bool,
2021
inner: Option<BooleanBuilder>,
2122
}
2223

2324
pub struct BooleanBuilderOptions {
24-
nullable: bool,
25+
pub nullable: bool,
2526
}
2627

2728
impl AdaptiveBooleanArrayBuilder {
@@ -32,10 +33,13 @@ impl AdaptiveBooleanArrayBuilder {
3233
Some(BooleanBuilder::new())
3334
};
3435

35-
Self { inner }
36+
Self {
37+
inner,
38+
nullable: options.nullable,
39+
}
3640
}
3741

38-
fn append_value(&mut self, value: bool) {
42+
pub fn append_value(&mut self, value: bool) {
3943
if self.inner.is_none() {
4044
// TODO -- when we handle nulls here we need to keep track of how many
4145
// nulls have been appended before the first value, and prefix this
@@ -52,7 +56,7 @@ impl AdaptiveBooleanArrayBuilder {
5256
inner.append_value(value);
5357
}
5458

55-
fn finish(&mut self) -> Option<ArrayRef> {
59+
pub fn finish(&mut self) -> Option<ArrayRef> {
5660
self.inner
5761
.as_mut()
5862
.map(|inner| Arc::new(inner.finish()) as ArrayRef)

0 commit comments

Comments
 (0)