Skip to content

Commit 3594454

Browse files
committed
fix: performance regression for filtering ListView
Signed-off-by: Andrew Duffy <[email protected]>
1 parent 35d12a3 commit 3594454

File tree

1 file changed

+49
-15
lines changed

1 file changed

+49
-15
lines changed

vortex-array/src/arrays/listview/rebuild.rs

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,15 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
use num_traits::FromPrimitive;
5+
use vortex_buffer::BufferMut;
56
use vortex_dtype::{IntegerPType, Nullability, match_each_integer_ptype};
67
use vortex_error::VortexExpect;
78
use vortex_scalar::Scalar;
89

9-
use crate::arrays::ListViewArray;
10-
use crate::builders::{ArrayBuilder, ListViewBuilder};
10+
use crate::arrays::{ChunkedArray, ListViewArray};
11+
use crate::builders::ArrayBuilder;
1112
use crate::vtable::ValidityHelper;
12-
use crate::{Array, compute};
13+
use crate::{Array, IntoArray, ToCanonical, compute};
1314

1415
/// Modes for rebuilding a [`ListViewArray`].
1516
pub enum ListViewRebuildMode {
@@ -87,22 +88,55 @@ impl ListViewArray {
8788
.as_list_element_opt()
8889
.vortex_expect("somehow had a canonical list that was not a list");
8990

90-
let mut builder = ListViewBuilder::<O, S>::with_capacity(
91-
element_dtype.clone(),
92-
self.dtype().nullability(),
93-
self.elements().len(),
94-
self.len(),
95-
);
91+
// Upfront canonicalize the list elements, we're going to be doing a lot of
92+
// slicing with them.
93+
let elements_canonical = self.elements().to_canonical().into_array();
94+
let offsets_canonical = self.offsets().to_primitive();
95+
let sizes_canonical = self.offsets().to_primitive();
96+
97+
let offsets_canonical = offsets_canonical.as_slice::<O>();
98+
let sizes_canonical = sizes_canonical.as_slice::<S>();
99+
100+
let mut offsets = BufferMut::<u32>::with_capacity(self.len());
101+
let mut sizes = BufferMut::<S>::with_capacity(self.len());
102+
103+
let mut chunks = Vec::with_capacity(self.len());
104+
105+
let mut n_elements = 0u32;
96106

97-
for i in 0..self.len() {
98-
let list = self.scalar_at(i);
107+
for index in 0..self.len() {
108+
if !self.is_valid(index) {
109+
offsets.push(offsets.last().copied().unwrap_or_default());
110+
sizes.push(S::zero());
111+
continue;
112+
}
99113

100-
builder
101-
.append_scalar(&list)
102-
.vortex_expect("was unable to extend the `ListViewBuilder`")
114+
let offset = offsets_canonical[index];
115+
let size = sizes_canonical[index];
116+
117+
let start = offset.as_();
118+
let stop = start + size.as_();
119+
120+
chunks.push(elements_canonical.slice(start..stop));
121+
offsets.push(n_elements);
122+
sizes.push(size);
123+
124+
n_elements += size.to_u32().vortex_expect("to_u32");
103125
}
104126

105-
builder.finish_into_listview()
127+
let offsets = offsets.into_array();
128+
let sizes = sizes.into_array();
129+
130+
// SAFETY: all chunks were sliced from the same array so have same DType.
131+
let elements =
132+
unsafe { ChunkedArray::new_unchecked(chunks, element_dtype.as_ref().clone()) };
133+
134+
ListViewArray::new(
135+
elements.to_canonical().into_array(),
136+
offsets,
137+
sizes,
138+
self.validity.clone(),
139+
)
106140
}
107141

108142
/// Rebuilds a [`ListViewArray`] by trimming any unused / unreferenced leading and trailing

0 commit comments

Comments
 (0)