Skip to content

Commit 604c2f5

Browse files
authored
fix: Slicing empty chunked arrays (#1870)
first finding of the new file-targeted fuzzer (which is still WIP).
1 parent c10d553 commit 604c2f5

File tree

4 files changed

+55
-13
lines changed

4 files changed

+55
-13
lines changed

vortex-array/src/array/chunked/compute/compare.rs

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@ impl CompareFn<ChunkedArray> for ChunkedEncoding {
1515
let mut idx = 0;
1616
let mut compare_chunks = Vec::with_capacity(lhs.nchunks());
1717

18-
for chunk in lhs.chunks() {
18+
for chunk in lhs.chunks().filter(|c| !c.is_empty()) {
1919
let sliced = slice(rhs, idx, idx + chunk.len())?;
2020
let cmp_result = compare(&chunk, &sliced, operator)?;
21-
compare_chunks.push(cmp_result);
2221

22+
compare_chunks.push(cmp_result);
2323
idx += chunk.len();
2424
}
2525

@@ -32,3 +32,24 @@ impl CompareFn<ChunkedArray> for ChunkedEncoding {
3232
))
3333
}
3434
}
35+
36+
#[cfg(test)]
37+
mod tests {
38+
39+
use super::*;
40+
use crate::array::StructArray;
41+
use crate::validity::Validity;
42+
43+
#[test]
44+
fn empty_compare() {
45+
let base = StructArray::try_new([].into(), [].into(), 0, Validity::NonNullable)
46+
.unwrap()
47+
.into_array();
48+
let chunked =
49+
ChunkedArray::try_new(vec![base.clone(), base.clone()], base.dtype().clone()).unwrap();
50+
let chunked_empty = ChunkedArray::try_new(vec![], base.dtype().clone()).unwrap();
51+
52+
let r = compare(&chunked, &chunked_empty, Operator::Eq).unwrap();
53+
assert!(r.is_empty());
54+
}
55+
}

vortex-array/src/array/chunked/compute/slice.rs

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,21 @@
1-
use vortex_error::VortexResult;
1+
use vortex_error::{vortex_bail, VortexResult};
22

33
use crate::array::chunked::ChunkedArray;
44
use crate::array::ChunkedEncoding;
55
use crate::compute::{slice, SliceFn};
6-
use crate::{ArrayDType, ArrayData, IntoArrayData};
6+
use crate::{ArrayDType, ArrayData, ArrayLen, IntoArrayData};
77

88
impl SliceFn<ChunkedArray> for ChunkedEncoding {
99
fn slice(&self, array: &ChunkedArray, start: usize, stop: usize) -> VortexResult<ArrayData> {
1010
let (offset_chunk, offset_in_first_chunk) = array.find_chunk_idx(start);
1111
let (length_chunk, length_in_last_chunk) = array.find_chunk_idx(stop);
1212

13+
if array.is_empty() && (start != 0 || stop != 0) {
14+
vortex_bail!(ComputeError: "Empty chunked array can't be sliced from {start} to {stop}");
15+
} else if array.is_empty() {
16+
return Ok(ChunkedArray::try_new(vec![], array.dtype().clone())?.into_array());
17+
}
18+
1319
if length_chunk == offset_chunk {
1420
let chunk = array.chunk(offset_chunk)?;
1521
return Ok(ChunkedArray::try_new(
@@ -67,44 +73,52 @@ mod tests {
6773
}
6874

6975
#[test]
70-
pub fn slice_middle() {
76+
fn slice_middle() {
7177
assert_equal_slices(
7278
slice(chunked_array().as_ref(), 2, 5).unwrap(),
7379
&[3u64, 4, 5],
7480
)
7581
}
7682

7783
#[test]
78-
pub fn slice_begin() {
84+
fn slice_begin() {
7985
assert_equal_slices(slice(chunked_array().as_ref(), 1, 3).unwrap(), &[2u64, 3]);
8086
}
8187

8288
#[test]
83-
pub fn slice_aligned() {
89+
fn slice_aligned() {
8490
assert_equal_slices(
8591
slice(chunked_array().as_ref(), 3, 6).unwrap(),
8692
&[4u64, 5, 6],
8793
);
8894
}
8995

9096
#[test]
91-
pub fn slice_many_aligned() {
97+
fn slice_many_aligned() {
9298
assert_equal_slices(
9399
slice(chunked_array().as_ref(), 0, 6).unwrap(),
94100
&[1u64, 2, 3, 4, 5, 6],
95101
);
96102
}
97103

98104
#[test]
99-
pub fn slice_end() {
105+
fn slice_end() {
100106
assert_equal_slices(slice(chunked_array().as_ref(), 7, 8).unwrap(), &[8u64]);
101107
}
102108

103109
#[test]
104-
pub fn slice_exactly_end() {
110+
fn slice_exactly_end() {
105111
assert_equal_slices(
106112
slice(chunked_array().as_ref(), 6, 9).unwrap(),
107113
&[7u64, 8, 9],
108114
);
109115
}
116+
117+
#[test]
118+
fn slice_empty() {
119+
let chunked = ChunkedArray::try_new(vec![], PType::U32.into()).unwrap();
120+
let sliced = slice(chunked, 0, 0).unwrap();
121+
122+
assert!(sliced.is_empty());
123+
}
110124
}

vortex-array/src/array/chunked/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ impl ChunkedArray {
128128
self.chunk(c).unwrap_or_else(|e| {
129129
vortex_panic!(
130130
e,
131-
"ChunkedArray: chunks: chunk {} should exist (nchunks: {})",
131+
"ChunkedArray: chunks: chunk {} should exist (num chunks: {})",
132132
c,
133133
self.nchunks()
134134
)

vortex-array/src/compute/compare.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use vortex_scalar::Scalar;
88

99
use crate::arrow::{Datum, FromArrowArray};
1010
use crate::encoding::Encoding;
11-
use crate::{ArrayDType, ArrayData};
11+
use crate::{ArrayDType, ArrayData, Canonical, IntoArrayData};
1212

1313
#[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd)]
1414
pub enum Operator {
@@ -112,6 +112,13 @@ pub fn compare(
112112
vortex_bail!("Compare operations only support arrays of the same type");
113113
}
114114

115+
let result_dtype =
116+
DType::Bool((left.dtype().is_nullable() || right.dtype().is_nullable()).into());
117+
118+
if left.is_empty() {
119+
return Ok(Canonical::empty(&result_dtype)?.into_array());
120+
}
121+
115122
// Always try to put constants on the right-hand side so encodings can optimise themselves.
116123
if left.is_constant() && !right.is_constant() {
117124
return compare(right, left, operator.swap());
@@ -152,7 +159,7 @@ pub fn compare(
152159
);
153160
debug_assert_eq!(
154161
result.dtype(),
155-
&DType::Bool((left.dtype().is_nullable() || right.dtype().is_nullable()).into()),
162+
&result_dtype,
156163
"Compare dtype mismatch {}",
157164
right.encoding().id()
158165
);

0 commit comments

Comments
 (0)