Skip to content

Commit 237065b

Browse files
alambJefffrey
andauthored
Avoid clones in make_array for StructArray and GenericByteViewArray (#9114)
# Which issue does this PR close? - Part of #9061 - broken out of #9058 # Rationale for this change The current implementation of `make_array` for StructArray and GenericByteViewArray clones `ArrayData` which allocates a new Vec. This is unnecessary given that `make_array` is passed an owned ArrayData # What changes are included in this PR? 1. Add a new API to ArrayData to break it down into parts (`into_parts`) 2. Use that API to avoid cloning while constructing StructArray and GenericByteViewArray # Are these changes tested? Yes by CI # Are there any user-facing changes? A few fewer allocations when creating arrays --------- Co-authored-by: Jeffrey Vo <jeffrey.vo.australia@gmail.com>
1 parent 02af070 commit 237065b

File tree

3 files changed

+46
-14
lines changed

3 files changed

+46
-14
lines changed

arrow-array/src/array/byte_view_array.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -967,15 +967,16 @@ impl<'a, T: ByteViewType + ?Sized> IntoIterator for &'a GenericByteViewArray<T>
967967
}
968968

969969
impl<T: ByteViewType + ?Sized> From<ArrayData> for GenericByteViewArray<T> {
970-
fn from(value: ArrayData) -> Self {
971-
let views = value.buffers()[0].clone();
972-
let views = ScalarBuffer::new(views, value.offset(), value.len());
973-
let buffers = value.buffers()[1..].to_vec().into();
970+
fn from(data: ArrayData) -> Self {
971+
let (_data_type, len, nulls, offset, mut buffers, _child_data) = data.into_parts();
972+
let views = buffers.remove(0); // need to maintain order of remaining buffers
973+
let buffers = Arc::from(buffers);
974+
let views = ScalarBuffer::new(views, offset, len);
974975
Self {
975976
data_type: T::DATA_TYPE,
976977
views,
977978
buffers,
978-
nulls: value.nulls().cloned(),
979+
nulls,
979980
phantom: Default::default(),
980981
}
981982
}

arrow-array/src/array/struct_array.rs

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -347,25 +347,26 @@ impl StructArray {
347347

348348
impl From<ArrayData> for StructArray {
349349
fn from(data: ArrayData) -> Self {
350-
let parent_offset = data.offset();
351-
let parent_len = data.len();
350+
let (data_type, len, nulls, offset, _buffers, child_data) = data.into_parts();
352351

353-
let fields = data
354-
.child_data()
355-
.iter()
352+
let parent_offset = offset;
353+
let parent_len = len;
354+
355+
let fields = child_data
356+
.into_iter()
356357
.map(|cd| {
357358
if parent_offset != 0 || parent_len != cd.len() {
358359
make_array(cd.slice(parent_offset, parent_len))
359360
} else {
360-
make_array(cd.clone())
361+
make_array(cd)
361362
}
362363
})
363364
.collect();
364365

365366
Self {
366-
len: data.len(),
367-
data_type: data.data_type().clone(),
368-
nulls: data.nulls().cloned(),
367+
len,
368+
data_type,
369+
nulls,
369370
fields,
370371
}
371372
}

arrow-data/src/data.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,9 @@ impl ArrayData {
309309
///
310310
/// Note: This is a low level API and most users of the arrow crate should create
311311
/// arrays using the builders found in [arrow_array](https://docs.rs/arrow-array)
312+
/// or [`ArrayDataBuilder`].
313+
///
314+
/// See also [`Self::into_parts`] to recover the fields
312315
pub fn try_new(
313316
data_type: DataType,
314317
len: usize,
@@ -351,6 +354,33 @@ impl ArrayData {
351354
Ok(new_self)
352355
}
353356

357+
/// Return the constituent parts of this ArrayData
358+
///
359+
/// This is the inverse of [`ArrayData::try_new`].
360+
///
361+
/// Returns `(data_type, len, nulls, offset, buffers, child_data)`
362+
pub fn into_parts(
363+
self,
364+
) -> (
365+
DataType,
366+
usize,
367+
Option<NullBuffer>,
368+
usize,
369+
Vec<Buffer>,
370+
Vec<ArrayData>,
371+
) {
372+
let Self {
373+
data_type,
374+
len,
375+
nulls,
376+
offset,
377+
buffers,
378+
child_data,
379+
} = self;
380+
381+
(data_type, len, nulls, offset, buffers, child_data)
382+
}
383+
354384
/// Returns a builder to construct a [`ArrayData`] instance of the same [`DataType`]
355385
#[inline]
356386
pub const fn builder(data_type: DataType) -> ArrayDataBuilder {

0 commit comments

Comments
 (0)