Skip to content

Commit e4b68e6

Browse files
authored
Add append_non_nulls to StructBuilder (apache#9430)
# Which issue does this PR close? - Closes apache#9429 I'm doing some performance optimization, and noticed that we have a loop adding one value to the null mask at a time. Instead, I'd suggest adding `append_non_nulls` to do this at once. ``` append_non_nulls(n) vs append(true) in a loop (with bitmap allocated) ┌───────────┬───────────────────┬─────────────────────┬─────────┐ │ n │ append(true) loop │ append_non_nulls(n) │ speedup │ ├───────────┼───────────────────┼─────────────────────┼─────────┤ │ 100 │ 251 ns │ 73 ns │ ~3x │ ├───────────┼───────────────────┼─────────────────────┼─────────┤ │ 1,000 │ 2.0 µs │ 94 ns │ ~21x │ ├───────────┼───────────────────┼─────────────────────┼─────────┤ │ 10,000 │ 19.3 µs │ 119 ns │ ~162x │ ├───────────┼───────────────────┼─────────────────────┼─────────┤ │ 100,000 │ 191 µs │ 348 ns │ ~549x │ ├───────────┼───────────────────┼─────────────────────┼─────────┤ │ 1,000,000 │ 1.90 ms │ 3.5 µs │ ~543x │ └───────────┴───────────────────┴─────────────────────┴─────────┘ ``` # Rationale for this change It adds a new public API in favor of performance improvements. # What changes are included in this PR? A new public API # Are these changes tested? Yes, with new unit-tests. # Are there any user-facing changes? Just a new convient API.
1 parent bee4595 commit e4b68e6

1 file changed

Lines changed: 62 additions & 0 deletions

File tree

arrow-array/src/builder/struct_builder.rs

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,12 @@ impl StructBuilder {
213213
self.null_buffer_builder.append(is_valid);
214214
}
215215

216+
/// Appends `n` non-null entries into the builder.
217+
#[inline]
218+
pub fn append_non_nulls(&mut self, n: usize) {
219+
self.null_buffer_builder.append_n_non_nulls(n);
220+
}
221+
216222
/// Appends a null element to the struct.
217223
#[inline]
218224
pub fn append_null(&mut self) {
@@ -727,4 +733,60 @@ mod tests {
727733
assert!(a1.is_valid(0));
728734
assert!(a1.is_null(1));
729735
}
736+
737+
#[test]
738+
fn test_append_non_nulls() {
739+
let int_builder = Int32Builder::new();
740+
let fields = vec![Field::new("f1", DataType::Int32, false)];
741+
let field_builders = vec![Box::new(int_builder) as Box<dyn ArrayBuilder>];
742+
743+
let mut builder = StructBuilder::new(fields, field_builders);
744+
builder
745+
.field_builder::<Int32Builder>(0)
746+
.unwrap()
747+
.append_slice(&[1, 2, 3, 4, 5]);
748+
builder.append_non_nulls(5);
749+
750+
let arr = builder.finish();
751+
assert_eq!(arr.len(), 5);
752+
assert_eq!(arr.null_count(), 0);
753+
for i in 0..5 {
754+
assert!(arr.is_valid(i));
755+
}
756+
}
757+
758+
#[test]
759+
fn test_append_non_nulls_with_nulls() {
760+
let mut builder = StructBuilder::new(Fields::empty(), vec![]);
761+
builder.append_null();
762+
builder.append_non_nulls(3);
763+
builder.append_nulls(2);
764+
builder.append_non_nulls(1);
765+
766+
let arr = builder.finish();
767+
assert_eq!(arr.len(), 7);
768+
assert_eq!(arr.null_count(), 3);
769+
assert!(arr.is_null(0));
770+
assert!(arr.is_valid(1));
771+
assert!(arr.is_valid(2));
772+
assert!(arr.is_valid(3));
773+
assert!(arr.is_null(4));
774+
assert!(arr.is_null(5));
775+
assert!(arr.is_valid(6));
776+
}
777+
778+
#[test]
779+
fn test_append_non_nulls_zero() {
780+
let mut builder = StructBuilder::new(Fields::empty(), vec![]);
781+
builder.append_non_nulls(0);
782+
assert_eq!(builder.len(), 0);
783+
784+
builder.append(true);
785+
builder.append_non_nulls(0);
786+
assert_eq!(builder.len(), 1);
787+
788+
let arr = builder.finish();
789+
assert_eq!(arr.len(), 1);
790+
assert_eq!(arr.null_count(), 0);
791+
}
730792
}

0 commit comments

Comments
 (0)