Skip to content

Commit fa5644e

Browse files
committed
implement DictArray batched execute
Signed-off-by: Connor Tsui <[email protected]>
1 parent 9af4e14 commit fa5644e

File tree

3 files changed

+487
-12
lines changed

3 files changed

+487
-12
lines changed

vortex-array/src/arrays/dict/vtable/mod.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,21 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
use vortex_buffer::BufferHandle;
5+
use vortex_compute::take::Take;
56
use vortex_dtype::DType;
67
use vortex_dtype::Nullability;
78
use vortex_dtype::PType;
89
use vortex_error::VortexResult;
910
use vortex_error::vortex_bail;
1011
use vortex_error::vortex_err;
12+
use vortex_vector::Vector;
1113

1214
use super::DictArray;
1315
use super::DictMetadata;
1416
use crate::DeserializeMetadata;
1517
use crate::ProstMetadata;
1618
use crate::SerializeMetadata;
19+
use crate::execution::ExecutionCtx;
1720
use crate::serde::ArrayChildren;
1821
use crate::vtable;
1922
use crate::vtable::ArrayId;
@@ -31,6 +34,9 @@ mod visitor;
3134

3235
vtable!(Dict);
3336

37+
#[cfg(test)]
38+
mod tests;
39+
3440
#[derive(Debug)]
3541
pub struct DictVTable;
3642

@@ -108,4 +114,10 @@ impl VTable for DictVTable {
108114
DictArray::new_unchecked(codes, values).set_all_values_referenced(all_values_referenced)
109115
})
110116
}
117+
118+
fn batch_execute(array: &DictArray, ctx: &mut ExecutionCtx) -> VortexResult<Vector> {
119+
let values = array.values().batch_execute(ctx)?;
120+
let codes = array.codes().batch_execute(ctx)?.into_primitive();
121+
Ok(values.take(&codes))
122+
}
111123
}
Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Tests for the optimized boolean take in dictionary batch execution.
5+
6+
use vortex_buffer::buffer;
7+
use vortex_session::VortexSession;
8+
use vortex_vector::VectorOps;
9+
use vortex_vector::bool::BoolVector;
10+
11+
use crate::IntoArray;
12+
use crate::arrays::BoolArray;
13+
use crate::arrays::PrimitiveArray;
14+
use crate::arrays::dict::DictArray;
15+
16+
/// Helper to create a boolean dict array and execute it via `batch_execute`.
17+
fn bool_dict_execute(codes: impl IntoArray, values: impl IntoArray) -> BoolVector {
18+
let dict = DictArray::try_new(codes.into_array(), values.into_array()).unwrap();
19+
let session = VortexSession::empty();
20+
dict.execute(&session).unwrap().into_bool()
21+
}
22+
23+
/// Helper to assert two BoolVectors are equal by comparing element-by-element.
24+
fn assert_bool_vectors_eq(result: &BoolVector, expected: &[Option<bool>]) {
25+
assert_eq!(result.len(), expected.len(), "length mismatch");
26+
for (i, expected_val) in expected.iter().enumerate() {
27+
let result_val = result.scalar_at(i);
28+
assert_eq!(
29+
result_val.value(),
30+
*expected_val,
31+
"mismatch at index {i}: result={:?}, expected={expected_val:?}",
32+
result_val.value()
33+
);
34+
}
35+
}
36+
37+
/// Test case: single true, single false (the common case).
38+
/// values = [false, true], codes = [0, 1, 0, 1, 1]
39+
/// expected = [false, true, false, true, true]
40+
#[test]
41+
fn test_bool_dict_single_true_single_false() {
42+
let values = BoolArray::from_iter([false, true]);
43+
let codes = buffer![0u8, 1, 0, 1, 1];
44+
45+
let result = bool_dict_execute(codes, values);
46+
assert_bool_vectors_eq(
47+
&result,
48+
&[Some(false), Some(true), Some(false), Some(true), Some(true)],
49+
);
50+
}
51+
52+
/// Test case: all values are true (single true, no false).
53+
/// values = [true], codes = [0, 0, 0]
54+
/// expected = [true, true, true]
55+
#[test]
56+
fn test_bool_dict_all_true() {
57+
let values = BoolArray::from_iter([true]);
58+
let codes = buffer![0u8, 0, 0];
59+
60+
let result = bool_dict_execute(codes, values);
61+
assert_bool_vectors_eq(&result, &[Some(true), Some(true), Some(true)]);
62+
}
63+
64+
/// Test case: all values are false (single false, no true).
65+
/// values = [false], codes = [0, 0, 0]
66+
/// expected = [false, false, false]
67+
#[test]
68+
fn test_bool_dict_all_false() {
69+
let values = BoolArray::from_iter([false]);
70+
let codes = buffer![0u8, 0, 0];
71+
72+
let result = bool_dict_execute(codes, values);
73+
assert_bool_vectors_eq(&result, &[Some(false), Some(false), Some(false)]);
74+
}
75+
76+
/// Test case: multiple true values, no false values.
77+
/// values = [true, true], codes = [0, 1, 0, 1]
78+
/// expected = [true, true, true, true]
79+
#[test]
80+
fn test_bool_dict_multiple_true_no_false() {
81+
let values = BoolArray::from_iter([true, true]);
82+
let codes = buffer![0u8, 1, 0, 1];
83+
84+
let result = bool_dict_execute(codes, values);
85+
assert_bool_vectors_eq(&result, &[Some(true), Some(true), Some(true), Some(true)]);
86+
}
87+
88+
/// Test case: no true values, multiple false values.
89+
/// values = [false, false], codes = [0, 1, 0, 1]
90+
/// expected = [false, false, false, false]
91+
#[test]
92+
fn test_bool_dict_no_true_multiple_false() {
93+
let values = BoolArray::from_iter([false, false]);
94+
let codes = buffer![0u8, 1, 0, 1];
95+
96+
let result = bool_dict_execute(codes, values);
97+
assert_bool_vectors_eq(
98+
&result,
99+
&[Some(false), Some(false), Some(false), Some(false)],
100+
);
101+
}
102+
103+
/// Test case: single true, multiple false values.
104+
/// values = [true, false, false], codes = [0, 1, 2, 0, 1]
105+
/// expected = [true, false, false, true, false]
106+
#[test]
107+
fn test_bool_dict_single_true_multiple_false() {
108+
let values = BoolArray::from_iter([true, false, false]);
109+
let codes = buffer![0u8, 1, 2, 0, 1];
110+
111+
let result = bool_dict_execute(codes, values);
112+
assert_bool_vectors_eq(
113+
&result,
114+
&[
115+
Some(true),
116+
Some(false),
117+
Some(false),
118+
Some(true),
119+
Some(false),
120+
],
121+
);
122+
}
123+
124+
/// Test case: multiple true values, single false.
125+
/// values = [true, true, false], codes = [0, 1, 2, 0, 2]
126+
/// expected = [true, true, false, true, false]
127+
#[test]
128+
fn test_bool_dict_multiple_true_single_false() {
129+
let values = BoolArray::from_iter([true, true, false]);
130+
let codes = buffer![0u8, 1, 2, 0, 2];
131+
132+
let result = bool_dict_execute(codes, values);
133+
assert_bool_vectors_eq(
134+
&result,
135+
&[Some(true), Some(true), Some(false), Some(true), Some(false)],
136+
);
137+
}
138+
139+
/// Test case: multiple of both true and false (fallback case).
140+
/// values = [true, false, true, false], codes = [0, 1, 2, 3, 0]
141+
/// expected = [true, false, true, false, true]
142+
#[test]
143+
fn test_bool_dict_multiple_of_both() {
144+
let values = BoolArray::from_iter([true, false, true, false]);
145+
let codes = buffer![0u8, 1, 2, 3, 0];
146+
147+
let result = bool_dict_execute(codes, values);
148+
assert_bool_vectors_eq(
149+
&result,
150+
&[Some(true), Some(false), Some(true), Some(false), Some(true)],
151+
);
152+
}
153+
154+
/// Test case: nullable codes (null code produces null output).
155+
/// values = [false, true], codes = [0, null, 1]
156+
/// expected = [false, null, true]
157+
#[test]
158+
fn test_bool_dict_nullable_codes() {
159+
let values = BoolArray::from_iter([false, true]);
160+
let codes = PrimitiveArray::from_option_iter([Some(0u8), None, Some(1)]);
161+
162+
let result = bool_dict_execute(codes, values);
163+
assert_bool_vectors_eq(&result, &[Some(false), None, Some(true)]);
164+
}
165+
166+
/// Test case: nullable values (code pointing to null produces null output).
167+
/// values = [null, true], codes = [0, 1, 0]
168+
/// expected = [null, true, null]
169+
#[test]
170+
fn test_bool_dict_nullable_values() {
171+
let values = BoolArray::from_iter([None, Some(true)]);
172+
let codes = buffer![0u8, 1, 0];
173+
174+
let result = bool_dict_execute(codes, values);
175+
assert_bool_vectors_eq(&result, &[None, Some(true), None]);
176+
}
177+
178+
/// Test case: all values are null (no valid true or false).
179+
/// values = [null, null], codes = [0, 1, 0]
180+
/// expected = [null, null, null]
181+
#[test]
182+
fn test_bool_dict_all_values_null() {
183+
let values = BoolArray::from_iter([None::<bool>, None]);
184+
let codes = buffer![0u8, 1, 0];
185+
186+
let result = bool_dict_execute(codes, values);
187+
assert_bool_vectors_eq(&result, &[None, None, None]);
188+
}
189+
190+
/// Test case: single non-null true with null values.
191+
/// values = [null, true], codes = [0, 1, 1, 0]
192+
/// expected = [null, true, true, null]
193+
#[test]
194+
fn test_bool_dict_single_true_with_nulls() {
195+
let values = BoolArray::from_iter([None, Some(true)]);
196+
let codes = buffer![0u8, 1, 1, 0];
197+
198+
let result = bool_dict_execute(codes, values);
199+
assert_bool_vectors_eq(&result, &[None, Some(true), Some(true), None]);
200+
}
201+
202+
/// Test case: single non-null false with null values.
203+
/// values = [null, false], codes = [0, 1, 1, 0]
204+
/// expected = [null, false, false, null]
205+
#[test]
206+
fn test_bool_dict_single_false_with_nulls() {
207+
let values = BoolArray::from_iter([None, Some(false)]);
208+
let codes = buffer![0u8, 1, 1, 0];
209+
210+
let result = bool_dict_execute(codes, values);
211+
assert_bool_vectors_eq(&result, &[None, Some(false), Some(false), None]);
212+
}
213+
214+
/// Test case: both nullable codes and nullable values.
215+
/// values = [null, true, false], codes = [0, null, 1, 2]
216+
/// expected = [null, null, true, false]
217+
#[test]
218+
fn test_bool_dict_nullable_codes_and_values() {
219+
let values = BoolArray::from_iter([None, Some(true), Some(false)]);
220+
let codes = PrimitiveArray::from_option_iter([Some(0u8), None, Some(1), Some(2)]);
221+
222+
let result = bool_dict_execute(codes, values);
223+
assert_bool_vectors_eq(&result, &[None, None, Some(true), Some(false)]);
224+
}
225+
226+
/// Test case: empty codes.
227+
/// values = [true, false], codes = []
228+
/// expected = []
229+
#[test]
230+
fn test_bool_dict_empty_codes() {
231+
let values = BoolArray::from_iter([true, false]);
232+
let codes = buffer![0u8; 0];
233+
234+
let result = bool_dict_execute(codes, values);
235+
assert_bool_vectors_eq(&result, &[]);
236+
}
237+
238+
/// Test case: larger index types (u32).
239+
/// values = [false, true], codes = [0u32, 1, 0, 1]
240+
/// expected = [false, true, false, true]
241+
#[test]
242+
fn test_bool_dict_u32_codes() {
243+
let values = BoolArray::from_iter([false, true]);
244+
let codes = buffer![0u32, 1, 0, 1];
245+
246+
let result = bool_dict_execute(codes, values);
247+
assert_bool_vectors_eq(&result, &[Some(false), Some(true), Some(false), Some(true)]);
248+
}
249+
250+
/// Test case: single true at non-zero index.
251+
/// values = [false, false, true], codes = [0, 1, 2, 2, 0]
252+
/// expected = [false, false, true, true, false]
253+
#[test]
254+
fn test_bool_dict_single_true_at_nonzero_index() {
255+
let values = BoolArray::from_iter([false, false, true]);
256+
let codes = buffer![0u8, 1, 2, 2, 0];
257+
258+
let result = bool_dict_execute(codes, values);
259+
assert_bool_vectors_eq(
260+
&result,
261+
&[
262+
Some(false),
263+
Some(false),
264+
Some(true),
265+
Some(true),
266+
Some(false),
267+
],
268+
);
269+
}
270+
271+
/// Test case: single false at non-zero index.
272+
/// values = [true, true, false], codes = [0, 1, 2, 2, 0]
273+
/// expected = [true, true, false, false, true]
274+
#[test]
275+
fn test_bool_dict_single_false_at_nonzero_index() {
276+
let values = BoolArray::from_iter([true, true, false]);
277+
let codes = buffer![0u8, 1, 2, 2, 0];
278+
279+
let result = bool_dict_execute(codes, values);
280+
assert_bool_vectors_eq(
281+
&result,
282+
&[Some(true), Some(true), Some(false), Some(false), Some(true)],
283+
);
284+
}

0 commit comments

Comments
 (0)