Skip to content

Commit f92385d

Browse files
committed
[arrow] Minimize allocation in GenericViewArray::slice()
Use the suggested Arc<[Buffer]> storage for ViewArray storage instead of an owned Vec<Buffer> so that the slice clone does not allocate.
1 parent 116ae12 commit f92385d

File tree

1 file changed

+32
-15
lines changed

1 file changed

+32
-15
lines changed

arrow-array/src/array/byte_view_array.rs

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ use super::ByteArrayType;
165165
pub struct GenericByteViewArray<T: ByteViewType + ?Sized> {
166166
data_type: DataType,
167167
views: ScalarBuffer<u128>,
168-
buffers: Vec<Buffer>,
168+
buffers: Arc<[Buffer]>,
169169
phantom: PhantomData<T>,
170170
nulls: Option<NullBuffer>,
171171
}
@@ -188,7 +188,10 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
188188
/// # Panics
189189
///
190190
/// Panics if [`GenericByteViewArray::try_new`] returns an error
191-
pub fn new(views: ScalarBuffer<u128>, buffers: Vec<Buffer>, nulls: Option<NullBuffer>) -> Self {
191+
pub fn new<U>(views: ScalarBuffer<u128>, buffers: U, nulls: Option<NullBuffer>) -> Self
192+
where
193+
U: Into<Arc<[Buffer]>>,
194+
{
192195
Self::try_new(views, buffers, nulls).unwrap()
193196
}
194197

@@ -198,11 +201,16 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
198201
///
199202
/// * `views.len() != nulls.len()`
200203
/// * [ByteViewType::validate] fails
201-
pub fn try_new(
204+
pub fn try_new<U>(
202205
views: ScalarBuffer<u128>,
203-
buffers: Vec<Buffer>,
206+
buffers: U,
204207
nulls: Option<NullBuffer>,
205-
) -> Result<Self, ArrowError> {
208+
) -> Result<Self, ArrowError>
209+
where
210+
U: Into<Arc<[Buffer]>>,
211+
{
212+
let buffers: Arc<[Buffer]> = buffers.into();
213+
206214
T::validate(&views, &buffers)?;
207215

208216
if let Some(n) = nulls.as_ref() {
@@ -230,11 +238,14 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
230238
/// # Safety
231239
///
232240
/// Safe if [`Self::try_new`] would not error
233-
pub unsafe fn new_unchecked(
241+
pub unsafe fn new_unchecked<U>(
234242
views: ScalarBuffer<u128>,
235-
buffers: Vec<Buffer>,
243+
buffers: U,
236244
nulls: Option<NullBuffer>,
237-
) -> Self {
245+
) -> Self
246+
where
247+
U: Into<Arc<[Buffer]>>,
248+
{
238249
if cfg!(feature = "force_validate") {
239250
return Self::new(views, buffers, nulls);
240251
}
@@ -243,7 +254,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
243254
data_type: T::DATA_TYPE,
244255
phantom: Default::default(),
245256
views,
246-
buffers,
257+
buffers: buffers.into(),
247258
nulls,
248259
}
249260
}
@@ -253,7 +264,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
253264
Self {
254265
data_type: T::DATA_TYPE,
255266
views: vec![0; len].into(),
256-
buffers: vec![],
267+
buffers: vec![].into(),
257268
nulls: Some(NullBuffer::new_null(len)),
258269
phantom: Default::default(),
259270
}
@@ -279,7 +290,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
279290
}
280291

281292
/// Deconstruct this array into its constituent parts
282-
pub fn into_parts(self) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
293+
pub fn into_parts(self) -> (ScalarBuffer<u128>, Arc<[Buffer]>, Option<NullBuffer>) {
283294
(self.views, self.buffers, self.nulls)
284295
}
285296

@@ -884,12 +895,15 @@ impl<T: ByteViewType + ?Sized> Array for GenericByteViewArray<T> {
884895
}
885896

886897
fn shrink_to_fit(&mut self) {
898+
/*
887899
self.views.shrink_to_fit();
888900
self.buffers.iter_mut().for_each(|b| b.shrink_to_fit());
889901
self.buffers.shrink_to_fit();
890902
if let Some(nulls) = &mut self.nulls {
891903
nulls.shrink_to_fit();
892904
}
905+
*/
906+
todo!()
893907
}
894908

895909
fn offset(&self) -> usize {
@@ -944,7 +958,7 @@ impl<T: ByteViewType + ?Sized> From<ArrayData> for GenericByteViewArray<T> {
944958
fn from(value: ArrayData) -> Self {
945959
let views = value.buffers()[0].clone();
946960
let views = ScalarBuffer::new(views, value.offset(), value.len());
947-
let buffers = value.buffers()[1..].to_vec();
961+
let buffers = value.buffers()[1..].to_vec().into();
948962
Self {
949963
data_type: T::DATA_TYPE,
950964
views,
@@ -1012,12 +1026,15 @@ where
10121026
}
10131027

10141028
impl<T: ByteViewType + ?Sized> From<GenericByteViewArray<T>> for ArrayData {
1015-
fn from(mut array: GenericByteViewArray<T>) -> Self {
1029+
fn from(array: GenericByteViewArray<T>) -> Self {
10161030
let len = array.len();
1017-
array.buffers.insert(0, array.views.into_inner());
1031+
1032+
let mut buffers = array.buffers.iter().cloned().collect::<Vec<_>>();
1033+
buffers.insert(0, array.views.into_inner());
1034+
10181035
let builder = ArrayDataBuilder::new(T::DATA_TYPE)
10191036
.len(len)
1020-
.buffers(array.buffers)
1037+
.buffers(buffers)
10211038
.nulls(array.nulls);
10221039

10231040
unsafe { builder.build_unchecked() }

0 commit comments

Comments
 (0)