Skip to content

Commit f4fcd77

Browse files
committed
Avoid clones while creating Arrays from ArrayData
1 parent 814ee42 commit f4fcd77

16 files changed

+206
-142
lines changed

arrow-array/src/array/boolean_array.rs

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -387,24 +387,21 @@ impl From<Vec<Option<bool>>> for BooleanArray {
387387

388388
impl From<ArrayData> for BooleanArray {
389389
fn from(data: ArrayData) -> Self {
390+
let (data_type, len, nulls, offset, mut buffers, _child_data) = data.into_parts();
390391
assert_eq!(
391-
data.data_type(),
392-
&DataType::Boolean,
393-
"BooleanArray expected ArrayData with type {} got {}",
392+
data_type,
394393
DataType::Boolean,
395-
data.data_type()
394+
"BooleanArray expected ArrayData with type Boolean got {data_type:?}",
396395
);
397396
assert_eq!(
398-
data.buffers().len(),
397+
buffers.len(),
399398
1,
400399
"BooleanArray data should contain a single buffer only (values buffer)"
401400
);
402-
let values = BooleanBuffer::new(data.buffers()[0].clone(), data.offset(), data.len());
401+
let buffer = buffers.pop().expect("checked above");
402+
let values = BooleanBuffer::new(buffer, offset, len);
403403

404-
Self {
405-
values,
406-
nulls: data.nulls().cloned(),
407-
}
404+
Self { values, nulls }
408405
}
409406
}
410407

arrow-array/src/array/byte_array.rs

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -540,30 +540,34 @@ impl<'a, T: ByteArrayType> ArrayAccessor for &'a GenericByteArray<T> {
540540

541541
impl<T: ByteArrayType> From<ArrayData> for GenericByteArray<T> {
542542
fn from(data: ArrayData) -> Self {
543+
let (data_type, len, nulls, offset, mut buffers, _child_data) = data.into_parts();
543544
assert_eq!(
544-
data.data_type(),
545-
&Self::DATA_TYPE,
545+
data_type,
546+
Self::DATA_TYPE,
546547
"{}{}Array expects DataType::{}",
547548
T::Offset::PREFIX,
548549
T::PREFIX,
549550
Self::DATA_TYPE
550551
);
551552
assert_eq!(
552-
data.buffers().len(),
553+
buffers.len(),
553554
2,
554555
"{}{}Array data should contain 2 buffers only (offsets and values)",
555556
T::Offset::PREFIX,
556557
T::PREFIX,
557558
);
559+
// buffers are offset then value, so pop in reverse
560+
let value_data = buffers.pop().expect("checked above");
561+
let offset_buffer = buffers.pop().expect("checked above");
562+
558563
// SAFETY:
559564
// ArrayData is valid, and verified type above
560-
let value_offsets = unsafe { get_offsets(&data) };
561-
let value_data = data.buffers()[1].clone();
565+
let value_offsets = unsafe { get_offsets(offset_buffer, offset, len) };
562566
Self {
563567
value_offsets,
564568
value_data,
565569
data_type: T::DATA_TYPE,
566-
nulls: data.nulls().cloned(),
570+
nulls,
567571
}
568572
}
569573
}

arrow-array/src/array/byte_view_array.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -941,15 +941,15 @@ impl<'a, T: ByteViewType + ?Sized> IntoIterator for &'a GenericByteViewArray<T>
941941
}
942942

943943
impl<T: ByteViewType + ?Sized> From<ArrayData> for GenericByteViewArray<T> {
944-
fn from(value: ArrayData) -> Self {
945-
let views = value.buffers()[0].clone();
946-
let views = ScalarBuffer::new(views, value.offset(), value.len());
947-
let buffers = value.buffers()[1..].to_vec();
944+
fn from(data: ArrayData) -> Self {
945+
let (_data_type, len, nulls, offset, mut buffers, _child_data) = data.into_parts();
946+
let views = buffers.remove(0); // need to maintain order of remaining buffers
947+
let views = ScalarBuffer::new(views, offset, len);
948948
Self {
949949
data_type: T::DATA_TYPE,
950950
views,
951951
buffers,
952-
nulls: value.nulls().cloned(),
952+
nulls,
953953
phantom: Default::default(),
954954
}
955955
}

arrow-array/src/array/dictionary_array.rs

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ use crate::{
2626
use arrow_buffer::bit_util::set_bit;
2727
use arrow_buffer::buffer::NullBuffer;
2828
use arrow_buffer::{ArrowNativeType, BooleanBuffer, BooleanBufferBuilder};
29-
use arrow_data::ArrayData;
29+
use arrow_data::{ArrayData, ArrayDataBuilder};
3030
use arrow_schema::{ArrowError, DataType};
3131
use std::any::Any;
3232
use std::sync::Arc;
@@ -583,18 +583,21 @@ impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
583583
/// Constructs a `DictionaryArray` from an array data reference.
584584
impl<T: ArrowDictionaryKeyType> From<ArrayData> for DictionaryArray<T> {
585585
fn from(data: ArrayData) -> Self {
586+
let (data_type, len, nulls, offset, buffers, mut child_data) = data.into_parts();
587+
586588
assert_eq!(
587-
data.buffers().len(),
589+
buffers.len(),
588590
1,
589591
"DictionaryArray data should contain a single buffer only (keys)."
590592
);
591593
assert_eq!(
592-
data.child_data().len(),
594+
child_data.len(),
593595
1,
594596
"DictionaryArray should contain a single child array (values)."
595597
);
598+
let cd = child_data.pop().expect("checked above");
596599

597-
if let DataType::Dictionary(key_data_type, _) = data.data_type() {
600+
if let DataType::Dictionary(key_data_type, _) = &data_type {
598601
assert_eq!(
599602
&T::DATA_TYPE,
600603
key_data_type.as_ref(),
@@ -603,17 +606,17 @@ impl<T: ArrowDictionaryKeyType> From<ArrayData> for DictionaryArray<T> {
603606
key_data_type
604607
);
605608

606-
let values = make_array(data.child_data()[0].clone());
607-
let data_type = data.data_type().clone();
609+
let values = make_array(cd);
608610

609611
// create a zero-copy of the keys' data
610612
// SAFETY:
611613
// ArrayData is valid and verified type above
612-
613614
let keys = PrimitiveArray::<T>::from(unsafe {
614-
data.into_builder()
615-
.data_type(T::DATA_TYPE)
616-
.child_data(vec![])
615+
ArrayDataBuilder::new(T::DATA_TYPE)
616+
.buffers(buffers)
617+
.nulls(nulls)
618+
.offset(offset)
619+
.len(len)
617620
.build_unchecked()
618621
});
619622

arrow-array/src/array/fixed_size_binary_array.rs

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -497,24 +497,25 @@ impl FixedSizeBinaryArray {
497497

498498
impl From<ArrayData> for FixedSizeBinaryArray {
499499
fn from(data: ArrayData) -> Self {
500+
let (data_type, len, nulls, offset, buffers, _child_data) = data.into_parts();
501+
500502
assert_eq!(
501-
data.buffers().len(),
503+
buffers.len(),
502504
1,
503505
"FixedSizeBinaryArray data should contain 1 buffer only (values)"
504506
);
505-
let value_length = match data.data_type() {
506-
DataType::FixedSizeBinary(len) => *len,
507+
let value_length = match data_type {
508+
DataType::FixedSizeBinary(len) => len,
507509
_ => panic!("Expected data type to be FixedSizeBinary"),
508510
};
509511

510512
let size = value_length as usize;
511-
let value_data =
512-
data.buffers()[0].slice_with_length(data.offset() * size, data.len() * size);
513+
let value_data = buffers[0].slice_with_length(offset * size, len * size);
513514

514515
Self {
515-
data_type: data.data_type().clone(),
516-
nulls: data.nulls().cloned(),
517-
len: data.len(),
516+
data_type,
517+
nulls,
518+
len,
518519
value_data,
519520
value_length,
520521
}

arrow-array/src/array/fixed_size_list_array.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -429,8 +429,10 @@ impl FixedSizeListArray {
429429

430430
impl From<ArrayData> for FixedSizeListArray {
431431
fn from(data: ArrayData) -> Self {
432-
let value_length = match data.data_type() {
433-
DataType::FixedSizeList(_, len) => *len,
432+
let (data_type, len, nulls, offset, _buffers, child_data) = data.into_parts();
433+
434+
let value_length = match data_type {
435+
DataType::FixedSizeList(_, len) => len,
434436
data_type => {
435437
panic!(
436438
"FixedSizeListArray data should contain a FixedSizeList data type, got {data_type}"
@@ -439,14 +441,13 @@ impl From<ArrayData> for FixedSizeListArray {
439441
};
440442

441443
let size = value_length as usize;
442-
let values =
443-
make_array(data.child_data()[0].slice(data.offset() * size, data.len() * size));
444+
let values = make_array(child_data[0].slice(offset * size, len * size));
444445
Self {
445-
data_type: data.data_type().clone(),
446+
data_type,
446447
values,
447-
nulls: data.nulls().cloned(),
448+
nulls,
448449
value_length,
449-
len: data.len(),
450+
len,
450451
}
451452
}
452453
}

arrow-array/src/array/list_array.rs

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -479,23 +479,26 @@ impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListArray<
479479

480480
impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
481481
fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
482-
if data.buffers().len() != 1 {
482+
let (data_type, len, nulls, offset, mut buffers, mut child_data) = data.into_parts();
483+
484+
if buffers.len() != 1 {
483485
return Err(ArrowError::InvalidArgumentError(format!(
484486
"ListArray data should contain a single buffer only (value offsets), had {}",
485-
data.buffers().len()
487+
buffers.len()
486488
)));
487489
}
490+
let buffer = buffers.pop().expect("checked above");
488491

489-
if data.child_data().len() != 1 {
492+
if child_data.len() != 1 {
490493
return Err(ArrowError::InvalidArgumentError(format!(
491494
"ListArray should contain a single child array (values array), had {}",
492-
data.child_data().len()
495+
child_data.len()
493496
)));
494497
}
495498

496-
let values = data.child_data()[0].clone();
499+
let values = child_data.pop().expect("checked above");
497500

498-
if let Some(child_data_type) = Self::get_type(data.data_type()) {
501+
if let Some(child_data_type) = Self::get_type(&data_type) {
499502
if values.data_type() != child_data_type {
500503
return Err(ArrowError::InvalidArgumentError(format!(
501504
"[Large]ListArray's child datatype {:?} does not \
@@ -506,19 +509,18 @@ impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
506509
}
507510
} else {
508511
return Err(ArrowError::InvalidArgumentError(format!(
509-
"[Large]ListArray's datatype must be [Large]ListArray(). It is {:?}",
510-
data.data_type()
512+
"[Large]ListArray's datatype must be [Large]ListArray(). It is {data_type:?}",
511513
)));
512514
}
513515

514516
let values = make_array(values);
515517
// SAFETY:
516518
// ArrayData is valid, and verified type above
517-
let value_offsets = unsafe { get_offsets(&data) };
519+
let value_offsets = unsafe { get_offsets(buffer, offset, len) };
518520

519521
Ok(Self {
520-
data_type: data.data_type().clone(),
521-
nulls: data.nulls().cloned(),
522+
data_type,
523+
nulls,
522524
values,
523525
value_offsets,
524526
})

arrow-array/src/array/list_view_array.rs

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -574,23 +574,25 @@ impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListViewAr
574574

575575
impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
576576
fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
577-
if data.buffers().len() != 2 {
577+
let (data_type, len, nulls, offset, mut buffers, mut child_data) = data.into_parts();
578+
579+
if buffers.len() != 2 {
578580
return Err(ArrowError::InvalidArgumentError(format!(
579581
"ListViewArray data should contain two buffers (value offsets & value sizes), had {}",
580-
data.buffers().len()
582+
buffers.len()
581583
)));
582584
}
583585

584-
if data.child_data().len() != 1 {
586+
if child_data.len() != 1 {
585587
return Err(ArrowError::InvalidArgumentError(format!(
586588
"ListViewArray should contain a single child array (values array), had {}",
587-
data.child_data().len()
589+
child_data.len()
588590
)));
589591
}
590592

591-
let values = data.child_data()[0].clone();
593+
let values = child_data.pop().expect("checked above");
592594

593-
if let Some(child_data_type) = Self::get_type(data.data_type()) {
595+
if let Some(child_data_type) = Self::get_type(&data_type) {
594596
if values.data_type() != child_data_type {
595597
return Err(ArrowError::InvalidArgumentError(format!(
596598
"{}ListViewArray's child datatype {:?} does not \
@@ -605,18 +607,21 @@ impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
605607
"{}ListViewArray's datatype must be {}ListViewArray(). It is {:?}",
606608
OffsetSize::PREFIX,
607609
OffsetSize::PREFIX,
608-
data.data_type()
610+
data_type
609611
)));
610612
}
611613

612614
let values = make_array(values);
613615
// ArrayData is valid, and verified type above
614-
let value_offsets = ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len());
615-
let value_sizes = ScalarBuffer::new(data.buffers()[1].clone(), data.offset(), data.len());
616+
// buffer[0] is offsets, buffer[1] is sizes
617+
let sizes_buffer = buffers.pop().expect("checked above");
618+
let offsets_buffer = buffers.pop().expect("checked above");
619+
let value_offsets = ScalarBuffer::new(offsets_buffer, offset, len);
620+
let value_sizes = ScalarBuffer::new(sizes_buffer, offset, len);
616621

617622
Ok(Self {
618-
data_type: data.data_type().clone(),
619-
nulls: data.nulls().cloned(),
623+
data_type,
624+
nulls,
620625
values,
621626
value_offsets,
622627
value_sizes,

arrow-array/src/array/map_array.rs

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -272,28 +272,29 @@ impl From<MapArray> for ArrayData {
272272

273273
impl MapArray {
274274
fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
275-
if !matches!(data.data_type(), DataType::Map(_, _)) {
275+
let (data_type, len, nulls, offset, mut buffers, mut child_data) = data.into_parts();
276+
277+
if !matches!(data_type, DataType::Map(_, _)) {
276278
return Err(ArrowError::InvalidArgumentError(format!(
277-
"MapArray expected ArrayData with DataType::Map got {}",
278-
data.data_type()
279+
"MapArray expected ArrayData with DataType::Map got {data_type}",
279280
)));
280281
}
281282

282-
if data.buffers().len() != 1 {
283+
if buffers.len() != 1 {
283284
return Err(ArrowError::InvalidArgumentError(format!(
284285
"MapArray data should contain a single buffer only (value offsets), had {}",
285-
data.len()
286+
buffers.len(),
286287
)));
287288
}
289+
let buffer = buffers.pop().expect("checked above");
288290

289-
if data.child_data().len() != 1 {
291+
if child_data.len() != 1 {
290292
return Err(ArrowError::InvalidArgumentError(format!(
291293
"MapArray should contain a single child array (values array), had {}",
292-
data.child_data().len()
294+
child_data.len()
293295
)));
294296
}
295-
296-
let entries = data.child_data()[0].clone();
297+
let entries = child_data.pop().expect("checked above");
297298

298299
if let DataType::Struct(fields) = entries.data_type() {
299300
if fields.len() != 2 {
@@ -312,11 +313,11 @@ impl MapArray {
312313

313314
// SAFETY:
314315
// ArrayData is valid, and verified type above
315-
let value_offsets = unsafe { get_offsets(&data) };
316+
let value_offsets = unsafe { get_offsets(buffer, offset, len) };
316317

317318
Ok(Self {
318-
data_type: data.data_type().clone(),
319-
nulls: data.nulls().cloned(),
319+
data_type,
320+
nulls,
320321
entries,
321322
value_offsets,
322323
})

0 commit comments

Comments
 (0)