Skip to content

Commit f01bcc9

Browse files
committed
Avoid clones while creating Arrays from ArrayData
1 parent 372b59c commit f01bcc9

File tree

13 files changed

+161
-128
lines changed

13 files changed

+161
-128
lines changed

arrow-array/src/array/boolean_array.rs

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -389,24 +389,21 @@ impl From<Vec<Option<bool>>> for BooleanArray {
389389

390390
impl From<ArrayData> for BooleanArray {
391391
fn from(data: ArrayData) -> Self {
392+
let (data_type, len, nulls, offset, mut buffers, _child_data) = data.into_parts();
392393
assert_eq!(
393-
data.data_type(),
394-
&DataType::Boolean,
395-
"BooleanArray expected ArrayData with type {} got {}",
394+
data_type,
396395
DataType::Boolean,
397-
data.data_type()
396+
"BooleanArray expected ArrayData with type Boolean got {data_type:?}",
398397
);
399398
assert_eq!(
400-
data.buffers().len(),
399+
buffers.len(),
401400
1,
402401
"BooleanArray data should contain a single buffer only (values buffer)"
403402
);
404-
let values = BooleanBuffer::new(data.buffers()[0].clone(), data.offset(), data.len());
403+
let buffer = buffers.pop().expect("checked above");
404+
let values = BooleanBuffer::new(buffer, offset, len);
405405

406-
Self {
407-
values,
408-
nulls: data.nulls().cloned(),
409-
}
406+
Self { values, nulls }
410407
}
411408
}
412409

arrow-array/src/array/byte_array.rs

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -542,30 +542,34 @@ impl<'a, T: ByteArrayType> ArrayAccessor for &'a GenericByteArray<T> {
542542

543543
impl<T: ByteArrayType> From<ArrayData> for GenericByteArray<T> {
544544
fn from(data: ArrayData) -> Self {
545+
let (data_type, len, nulls, offset, mut buffers, _child_data) = data.into_parts();
545546
assert_eq!(
546-
data.data_type(),
547-
&Self::DATA_TYPE,
547+
data_type,
548+
Self::DATA_TYPE,
548549
"{}{}Array expects DataType::{}",
549550
T::Offset::PREFIX,
550551
T::PREFIX,
551552
Self::DATA_TYPE
552553
);
553554
assert_eq!(
554-
data.buffers().len(),
555+
buffers.len(),
555556
2,
556557
"{}{}Array data should contain 2 buffers only (offsets and values)",
557558
T::Offset::PREFIX,
558559
T::PREFIX,
559560
);
561+
// buffers are offset then value, so pop in reverse
562+
let value_data = buffers.pop().expect("checked above");
563+
let offset_buffer = buffers.pop().expect("checked above");
564+
560565
// SAFETY:
561566
// ArrayData is valid, and verified type above
562-
let value_offsets = unsafe { get_offsets(&data) };
563-
let value_data = data.buffers()[1].clone();
567+
let value_offsets = unsafe { get_offsets(offset_buffer, offset, len) };
564568
Self {
565569
value_offsets,
566570
value_data,
567571
data_type: T::DATA_TYPE,
568-
nulls: data.nulls().cloned(),
572+
nulls,
569573
}
570574
}
571575
}

arrow-array/src/array/dictionary_array.rs

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ use crate::{
2626
use arrow_buffer::bit_util::set_bit;
2727
use arrow_buffer::buffer::NullBuffer;
2828
use arrow_buffer::{ArrowNativeType, BooleanBuffer, BooleanBufferBuilder};
29-
use arrow_data::ArrayData;
29+
use arrow_data::{ArrayData, ArrayDataBuilder};
3030
use arrow_schema::{ArrowError, DataType};
3131
use std::any::Any;
3232
use std::sync::Arc;
@@ -583,18 +583,21 @@ impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
583583
/// Constructs a `DictionaryArray` from an array data reference.
584584
impl<T: ArrowDictionaryKeyType> From<ArrayData> for DictionaryArray<T> {
585585
fn from(data: ArrayData) -> Self {
586+
let (data_type, len, nulls, offset, buffers, mut child_data) = data.into_parts();
587+
586588
assert_eq!(
587-
data.buffers().len(),
589+
buffers.len(),
588590
1,
589591
"DictionaryArray data should contain a single buffer only (keys)."
590592
);
591593
assert_eq!(
592-
data.child_data().len(),
594+
child_data.len(),
593595
1,
594596
"DictionaryArray should contain a single child array (values)."
595597
);
598+
let cd = child_data.pop().expect("checked above");
596599

597-
if let DataType::Dictionary(key_data_type, _) = data.data_type() {
600+
if let DataType::Dictionary(key_data_type, _) = &data_type {
598601
assert_eq!(
599602
&T::DATA_TYPE,
600603
key_data_type.as_ref(),
@@ -603,17 +606,17 @@ impl<T: ArrowDictionaryKeyType> From<ArrayData> for DictionaryArray<T> {
603606
key_data_type
604607
);
605608

606-
let values = make_array(data.child_data()[0].clone());
607-
let data_type = data.data_type().clone();
609+
let values = make_array(cd);
608610

609611
// create a zero-copy of the keys' data
610612
// SAFETY:
611613
// ArrayData is valid and verified type above
612-
613614
let keys = PrimitiveArray::<T>::from(unsafe {
614-
data.into_builder()
615-
.data_type(T::DATA_TYPE)
616-
.child_data(vec![])
615+
ArrayDataBuilder::new(T::DATA_TYPE)
616+
.buffers(buffers)
617+
.nulls(nulls)
618+
.offset(offset)
619+
.len(len)
617620
.build_unchecked()
618621
});
619622

arrow-array/src/array/fixed_size_binary_array.rs

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -497,24 +497,25 @@ impl FixedSizeBinaryArray {
497497

498498
impl From<ArrayData> for FixedSizeBinaryArray {
499499
fn from(data: ArrayData) -> Self {
500+
let (data_type, len, nulls, offset, buffers, _child_data) = data.into_parts();
501+
500502
assert_eq!(
501-
data.buffers().len(),
503+
buffers.len(),
502504
1,
503505
"FixedSizeBinaryArray data should contain 1 buffer only (values)"
504506
);
505-
let value_length = match data.data_type() {
506-
DataType::FixedSizeBinary(len) => *len,
507+
let value_length = match data_type {
508+
DataType::FixedSizeBinary(len) => len,
507509
_ => panic!("Expected data type to be FixedSizeBinary"),
508510
};
509511

510512
let size = value_length as usize;
511-
let value_data =
512-
data.buffers()[0].slice_with_length(data.offset() * size, data.len() * size);
513+
let value_data = buffers[0].slice_with_length(offset * size, len * size);
513514

514515
Self {
515-
data_type: data.data_type().clone(),
516-
nulls: data.nulls().cloned(),
517-
len: data.len(),
516+
data_type,
517+
nulls,
518+
len,
518519
value_data,
519520
value_length,
520521
}

arrow-array/src/array/fixed_size_list_array.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -429,8 +429,10 @@ impl FixedSizeListArray {
429429

430430
impl From<ArrayData> for FixedSizeListArray {
431431
fn from(data: ArrayData) -> Self {
432-
let value_length = match data.data_type() {
433-
DataType::FixedSizeList(_, len) => *len,
432+
let (data_type, len, nulls, offset, _buffers, child_data) = data.into_parts();
433+
434+
let value_length = match data_type {
435+
DataType::FixedSizeList(_, len) => len,
434436
data_type => {
435437
panic!(
436438
"FixedSizeListArray data should contain a FixedSizeList data type, got {data_type}"
@@ -439,14 +441,13 @@ impl From<ArrayData> for FixedSizeListArray {
439441
};
440442

441443
let size = value_length as usize;
442-
let values =
443-
make_array(data.child_data()[0].slice(data.offset() * size, data.len() * size));
444+
let values = make_array(child_data[0].slice(offset * size, len * size));
444445
Self {
445-
data_type: data.data_type().clone(),
446+
data_type,
446447
values,
447-
nulls: data.nulls().cloned(),
448+
nulls,
448449
value_length,
449-
len: data.len(),
450+
len,
450451
}
451452
}
452453
}

arrow-array/src/array/list_array.rs

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -479,23 +479,26 @@ impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListArray<
479479

480480
impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
481481
fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
482-
if data.buffers().len() != 1 {
482+
let (data_type, len, nulls, offset, mut buffers, mut child_data) = data.into_parts();
483+
484+
if buffers.len() != 1 {
483485
return Err(ArrowError::InvalidArgumentError(format!(
484486
"ListArray data should contain a single buffer only (value offsets), had {}",
485-
data.buffers().len()
487+
buffers.len()
486488
)));
487489
}
490+
let buffer = buffers.pop().expect("checked above");
488491

489-
if data.child_data().len() != 1 {
492+
if child_data.len() != 1 {
490493
return Err(ArrowError::InvalidArgumentError(format!(
491494
"ListArray should contain a single child array (values array), had {}",
492-
data.child_data().len()
495+
child_data.len()
493496
)));
494497
}
495498

496-
let values = data.child_data()[0].clone();
499+
let values = child_data.pop().expect("checked above");
497500

498-
if let Some(child_data_type) = Self::get_type(data.data_type()) {
501+
if let Some(child_data_type) = Self::get_type(&data_type) {
499502
if values.data_type() != child_data_type {
500503
return Err(ArrowError::InvalidArgumentError(format!(
501504
"[Large]ListArray's child datatype {:?} does not \
@@ -506,19 +509,18 @@ impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
506509
}
507510
} else {
508511
return Err(ArrowError::InvalidArgumentError(format!(
509-
"[Large]ListArray's datatype must be [Large]ListArray(). It is {:?}",
510-
data.data_type()
512+
"[Large]ListArray's datatype must be [Large]ListArray(). It is {data_type:?}",
511513
)));
512514
}
513515

514516
let values = make_array(values);
515517
// SAFETY:
516518
// ArrayData is valid, and verified type above
517-
let value_offsets = unsafe { get_offsets(&data) };
519+
let value_offsets = unsafe { get_offsets(buffer, offset, len) };
518520

519521
Ok(Self {
520-
data_type: data.data_type().clone(),
521-
nulls: data.nulls().cloned(),
522+
data_type,
523+
nulls,
522524
values,
523525
value_offsets,
524526
})

arrow-array/src/array/list_view_array.rs

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -576,23 +576,25 @@ impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListViewAr
576576

577577
impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
578578
fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
579-
if data.buffers().len() != 2 {
579+
let (data_type, len, nulls, offset, mut buffers, mut child_data) = data.into_parts();
580+
581+
if buffers.len() != 2 {
580582
return Err(ArrowError::InvalidArgumentError(format!(
581583
"ListViewArray data should contain two buffers (value offsets & value sizes), had {}",
582-
data.buffers().len()
584+
buffers.len()
583585
)));
584586
}
585587

586-
if data.child_data().len() != 1 {
588+
if child_data.len() != 1 {
587589
return Err(ArrowError::InvalidArgumentError(format!(
588590
"ListViewArray should contain a single child array (values array), had {}",
589-
data.child_data().len()
591+
child_data.len()
590592
)));
591593
}
592594

593-
let values = data.child_data()[0].clone();
595+
let values = child_data.pop().expect("checked above");
594596

595-
if let Some(child_data_type) = Self::get_type(data.data_type()) {
597+
if let Some(child_data_type) = Self::get_type(&data_type) {
596598
if values.data_type() != child_data_type {
597599
return Err(ArrowError::InvalidArgumentError(format!(
598600
"{}ListViewArray's child datatype {:?} does not \
@@ -607,18 +609,21 @@ impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
607609
"{}ListViewArray's datatype must be {}ListViewArray(). It is {:?}",
608610
OffsetSize::PREFIX,
609611
OffsetSize::PREFIX,
610-
data.data_type()
612+
data_type
611613
)));
612614
}
613615

614616
let values = make_array(values);
615617
// ArrayData is valid, and verified type above
616-
let value_offsets = ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len());
617-
let value_sizes = ScalarBuffer::new(data.buffers()[1].clone(), data.offset(), data.len());
618+
// buffer[0] is offsets, buffer[1] is sizes
619+
let sizes_buffer = buffers.pop().expect("checked above");
620+
let offsets_buffer = buffers.pop().expect("checked above");
621+
let value_offsets = ScalarBuffer::new(offsets_buffer, offset, len);
622+
let value_sizes = ScalarBuffer::new(sizes_buffer, offset, len);
618623

619624
Ok(Self {
620-
data_type: data.data_type().clone(),
621-
nulls: data.nulls().cloned(),
625+
data_type,
626+
nulls,
622627
values,
623628
value_offsets,
624629
value_sizes,

arrow-array/src/array/map_array.rs

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -272,28 +272,29 @@ impl From<MapArray> for ArrayData {
272272

273273
impl MapArray {
274274
fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
275-
if !matches!(data.data_type(), DataType::Map(_, _)) {
275+
let (data_type, len, nulls, offset, mut buffers, mut child_data) = data.into_parts();
276+
277+
if !matches!(data_type, DataType::Map(_, _)) {
276278
return Err(ArrowError::InvalidArgumentError(format!(
277-
"MapArray expected ArrayData with DataType::Map got {}",
278-
data.data_type()
279+
"MapArray expected ArrayData with DataType::Map got {data_type}",
279280
)));
280281
}
281282

282-
if data.buffers().len() != 1 {
283+
if buffers.len() != 1 {
283284
return Err(ArrowError::InvalidArgumentError(format!(
284285
"MapArray data should contain a single buffer only (value offsets), had {}",
285-
data.len()
286+
buffers.len(),
286287
)));
287288
}
289+
let buffer = buffers.pop().expect("checked above");
288290

289-
if data.child_data().len() != 1 {
291+
if child_data.len() != 1 {
290292
return Err(ArrowError::InvalidArgumentError(format!(
291293
"MapArray should contain a single child array (values array), had {}",
292-
data.child_data().len()
294+
child_data.len()
293295
)));
294296
}
295-
296-
let entries = data.child_data()[0].clone();
297+
let entries = child_data.pop().expect("checked above");
297298

298299
if let DataType::Struct(fields) = entries.data_type() {
299300
if fields.len() != 2 {
@@ -312,11 +313,11 @@ impl MapArray {
312313

313314
// SAFETY:
314315
// ArrayData is valid, and verified type above
315-
let value_offsets = unsafe { get_offsets(&data) };
316+
let value_offsets = unsafe { get_offsets(buffer, offset, len) };
316317

317318
Ok(Self {
318-
data_type: data.data_type().clone(),
319-
nulls: data.nulls().cloned(),
319+
data_type,
320+
nulls,
320321
entries,
321322
value_offsets,
322323
})

0 commit comments

Comments
 (0)