Skip to content

Commit 1fcbdd4

Browse files
committed
perf[dtp]: push down compare over dtp constant second subsecond
Signed-off-by: Joe Isaacs <[email protected]>
1 parent fb976a1 commit 1fcbdd4

File tree

1 file changed

+306
-2
lines changed
  • encodings/datetime-parts/src/compute

1 file changed

+306
-2
lines changed

encodings/datetime-parts/src/compute/rules.rs

Lines changed: 306 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,32 @@
44
use vortex_array::Array;
55
use vortex_array::ArrayRef;
66
use vortex_array::IntoArray;
7+
use vortex_array::arrays::AnyScalarFn;
78
use vortex_array::arrays::ConstantArray;
89
use vortex_array::arrays::ConstantVTable;
910
use vortex_array::arrays::FilterArray;
1011
use vortex_array::arrays::FilterVTable;
12+
use vortex_array::arrays::ScalarFnArray;
13+
use vortex_array::builtins::ArrayBuiltins;
14+
use vortex_array::expr::Between;
15+
use vortex_array::expr::Binary;
1116
use vortex_array::matchers::Exact;
17+
use vortex_array::optimizer::ArrayOptimizer;
1218
use vortex_array::optimizer::rules::ArrayParentReduceRule;
1319
use vortex_array::optimizer::rules::ParentRuleSet;
20+
use vortex_dtype::DType;
21+
use vortex_dtype::datetime::TemporalMetadata;
1422
use vortex_error::VortexExpect;
1523
use vortex_error::VortexResult;
1624

1725
use crate::DateTimePartsArray;
1826
use crate::DateTimePartsVTable;
27+
use crate::timestamp;
1928

20-
pub(crate) const PARENT_RULES: ParentRuleSet<DateTimePartsVTable> =
21-
ParentRuleSet::new(&[ParentRuleSet::lift(&DTPFilterPushDownRule)]);
29+
pub(crate) const PARENT_RULES: ParentRuleSet<DateTimePartsVTable> = ParentRuleSet::new(&[
30+
ParentRuleSet::lift(&DTPFilterPushDownRule),
31+
ParentRuleSet::lift(&DTPComparisonPushDownRule),
32+
]);
2233

2334
/// Push the filter into the days column of a date time parts, we could extend this to other fields
2435
/// but its less clear if that is beneficial.
@@ -61,3 +72,296 @@ impl ArrayParentReduceRule<DateTimePartsVTable> for DTPFilterPushDownRule {
6172
.map(|x| Some(x.into_array()))
6273
}
6374
}
75+
76+
/// Push down comparison operators (Binary and Between) to the days column when both seconds
77+
/// and subseconds are constant zero on both sides of the comparison.
78+
///
79+
/// When a DateTimeParts array has constant zero for seconds and subseconds, and is being
80+
/// compared against a constant timestamp that also has zero seconds and subseconds,
81+
/// we can push the comparison down to just compare the days.
82+
///
83+
/// For example: `dtp <= 2013-07-31` where dtp has seconds=0 and subseconds=0,
84+
/// and the RHS timestamp is exactly at midnight (no time component),
85+
/// becomes: `dtp.days <= 15917` (the day number for 2013-07-31).
86+
#[derive(Debug)]
87+
struct DTPComparisonPushDownRule;
88+
89+
impl ArrayParentReduceRule<DateTimePartsVTable> for DTPComparisonPushDownRule {
90+
type Parent = AnyScalarFn;
91+
92+
fn parent(&self) -> AnyScalarFn {
93+
AnyScalarFn
94+
}
95+
96+
fn reduce_parent(
97+
&self,
98+
child: &DateTimePartsArray,
99+
parent: &ScalarFnArray,
100+
child_idx: usize,
101+
) -> VortexResult<Option<ArrayRef>> {
102+
// Only handle comparison operations (Binary comparisons or Between)
103+
if !is_comparison_op(parent) {
104+
return Ok(None);
105+
}
106+
107+
// Check that DTP's seconds and subseconds are constant zero
108+
if !is_constant_zero(child.seconds()) || !is_constant_zero(child.subseconds()) {
109+
return Ok(None);
110+
}
111+
112+
let children = parent.children();
113+
let days = child.days();
114+
115+
// Build new children: replace DTP with days, replace constant timestamps with days constants
116+
let mut new_children = Vec::with_capacity(children.len());
117+
for (idx, c) in children.iter().enumerate() {
118+
if idx == child_idx {
119+
// This is the DTP child - replace with days
120+
new_children.push(days.clone());
121+
} else {
122+
// Must be a constant timestamp at midnight
123+
let Some(days_value) = try_extract_days_constant(c) else {
124+
return Ok(None);
125+
};
126+
let len = days.len();
127+
let target_dtype = days.dtype();
128+
let constant = ConstantArray::new(days_value, len).into_array();
129+
new_children.push(constant.cast(target_dtype.clone())?);
130+
}
131+
}
132+
133+
let result =
134+
ScalarFnArray::try_new(parent.scalar_fn().clone(), new_children, parent.len())?
135+
.into_array()
136+
.optimize()?;
137+
138+
Ok(Some(result))
139+
}
140+
}
141+
142+
/// Check if the scalar function is a comparison operation we can push down.
143+
fn is_comparison_op(parent: &ScalarFnArray) -> bool {
144+
parent
145+
.scalar_fn()
146+
.as_opt::<Binary>()
147+
.is_some_and(|c| c.maybe_cmp_operator().is_some())
148+
|| parent.scalar_fn().is::<Between>()
149+
}
150+
151+
/// Try to extract the days value from a constant timestamp.
152+
/// Returns None if the constant is not a timestamp or has non-zero seconds/subseconds.
153+
fn try_extract_days_constant(array: &ArrayRef) -> Option<i64> {
154+
let constant = array.as_constant()?;
155+
156+
// Extract the timestamp value
157+
let timestamp = constant
158+
.as_extension()
159+
.storage()
160+
.as_primitive()
161+
.as_::<i64>()?;
162+
163+
// Get the time unit from the dtype
164+
let DType::Extension(ext_dtype) = constant.dtype() else {
165+
return None;
166+
};
167+
168+
let temporal_metadata = TemporalMetadata::try_from(ext_dtype.as_ref()).ok()?;
169+
let ts_parts = timestamp::split(timestamp, temporal_metadata.time_unit()).ok()?;
170+
171+
// Only allow pushdown if seconds and subseconds are zero
172+
if ts_parts.seconds != 0 || ts_parts.subseconds != 0 {
173+
return None;
174+
}
175+
176+
Some(ts_parts.days)
177+
}
178+
179+
/// Check if an array is a constant with value zero.
180+
fn is_constant_zero(array: &ArrayRef) -> bool {
181+
array
182+
.as_opt::<ConstantVTable>()
183+
.is_some_and(|c| c.scalar().is_zero())
184+
}
185+
186+
#[cfg(test)]
187+
mod tests {
188+
use vortex_array::arrays::PrimitiveArray;
189+
use vortex_array::arrays::ScalarFnArrayExt;
190+
use vortex_array::arrays::TemporalArray;
191+
use vortex_array::compute::BetweenOptions;
192+
use vortex_array::compute::StrictComparison;
193+
use vortex_array::optimizer::ArrayOptimizer;
194+
use vortex_array::validity::Validity;
195+
use vortex_buffer::Buffer;
196+
use vortex_buffer::buffer;
197+
use vortex_dtype::datetime::TimeUnit;
198+
use vortex_scalar::Scalar;
199+
200+
use super::*;
201+
202+
const SECONDS_PER_DAY: i64 = 86400;
203+
204+
/// Create a DTP array with the given day values (all at midnight).
205+
fn dtp_at_midnight(days: &[i64], time_unit: TimeUnit) -> DateTimePartsArray {
206+
let multiplier = match time_unit {
207+
TimeUnit::Seconds => 1,
208+
TimeUnit::Milliseconds => 1_000,
209+
TimeUnit::Microseconds => 1_000_000,
210+
TimeUnit::Nanoseconds => 1_000_000_000,
211+
TimeUnit::Days => panic!("Days not supported"),
212+
};
213+
let timestamps: Vec<i64> = days
214+
.iter()
215+
.map(|d| d * SECONDS_PER_DAY * multiplier)
216+
.collect();
217+
let buffer: Buffer<i64> = timestamps.into();
218+
let temporal = TemporalArray::new_timestamp(
219+
PrimitiveArray::new(buffer, Validity::NonNullable).into_array(),
220+
time_unit,
221+
None,
222+
);
223+
DateTimePartsArray::try_from(temporal).unwrap()
224+
}
225+
226+
/// Create a constant timestamp scalar at midnight for the given day.
227+
fn midnight_constant(day: i64, time_unit: TimeUnit, len: usize) -> ArrayRef {
228+
let multiplier = match time_unit {
229+
TimeUnit::Seconds => 1,
230+
TimeUnit::Milliseconds => 1_000,
231+
TimeUnit::Microseconds => 1_000_000,
232+
TimeUnit::Nanoseconds => 1_000_000_000,
233+
TimeUnit::Days => panic!("Days not supported"),
234+
};
235+
let timestamp = day * SECONDS_PER_DAY * multiplier;
236+
let temporal = TemporalArray::new_timestamp(
237+
PrimitiveArray::new(buffer![timestamp], Validity::NonNullable).into_array(),
238+
time_unit,
239+
None,
240+
);
241+
let scalar = Scalar::extension(temporal.ext_dtype(), timestamp.into());
242+
ConstantArray::new(scalar, len).into_array()
243+
}
244+
245+
/// Create a constant timestamp scalar with non-midnight time.
246+
fn non_midnight_constant(day: i64, seconds: i64, time_unit: TimeUnit, len: usize) -> ArrayRef {
247+
let multiplier = match time_unit {
248+
TimeUnit::Seconds => 1,
249+
TimeUnit::Milliseconds => 1_000,
250+
TimeUnit::Microseconds => 1_000_000,
251+
TimeUnit::Nanoseconds => 1_000_000_000,
252+
TimeUnit::Days => panic!("Days not supported"),
253+
};
254+
let timestamp = (day * SECONDS_PER_DAY + seconds) * multiplier;
255+
let temporal = TemporalArray::new_timestamp(
256+
PrimitiveArray::new(buffer![timestamp], Validity::NonNullable).into_array(),
257+
time_unit,
258+
None,
259+
);
260+
let scalar = Scalar::extension(temporal.ext_dtype(), timestamp.into());
261+
ConstantArray::new(scalar, len).into_array()
262+
}
263+
264+
#[test]
265+
fn test_binary_comparison_pushdown() {
266+
// DTP with days [0, 1, 2] at midnight
267+
let dtp = dtp_at_midnight(&[0, 1, 2], TimeUnit::Seconds);
268+
let len = dtp.len();
269+
270+
// Compare: dtp <= day 1 (midnight)
271+
let constant = midnight_constant(1, TimeUnit::Seconds, len);
272+
let comparison = Binary
273+
.try_new_array(len, Operator::Lte, [dtp.into_array(), constant])
274+
.unwrap();
275+
276+
// Optimize should push down to days
277+
let optimized = comparison.optimize().unwrap();
278+
279+
// The result should be a ScalarFn over primitive days, not over DTP
280+
assert!(
281+
!optimized.is::<DateTimePartsVTable>(),
282+
"Expected pushdown to remove DTP from expression"
283+
);
284+
285+
// Verify correctness: days [0, 1, 2] <= 1 should give [true, true, false]
286+
assert_eq!(optimized.as_bool_typed().true_count().unwrap(), 2);
287+
}
288+
289+
#[test]
290+
fn test_between_pushdown() {
291+
// DTP with days [0, 1, 2, 3, 4] at midnight
292+
let dtp = dtp_at_midnight(&[0, 1, 2, 3, 4], TimeUnit::Seconds);
293+
let len = dtp.len();
294+
295+
// Between: 1 <= dtp <= 3
296+
let lower = midnight_constant(1, TimeUnit::Seconds, len);
297+
let upper = midnight_constant(3, TimeUnit::Seconds, len);
298+
299+
let between = Between
300+
.try_new_array(
301+
len,
302+
BetweenOptions {
303+
lower_strict: StrictComparison::NonStrict,
304+
upper_strict: StrictComparison::NonStrict,
305+
},
306+
[dtp.into_array(), lower, upper],
307+
)
308+
.unwrap();
309+
310+
// Optimize should push down to days
311+
let optimized = between.optimize().unwrap();
312+
313+
// Verify correctness: days [0, 1, 2, 3, 4] between 1 and 3 should give [false, true, true, true, false]
314+
assert_eq!(optimized.as_bool_typed().true_count().unwrap(), 3);
315+
}
316+
317+
#[test]
318+
fn test_no_pushdown_non_midnight_constant() {
319+
// DTP with days [0, 1, 2] at midnight
320+
let dtp = dtp_at_midnight(&[0, 1, 2], TimeUnit::Seconds);
321+
let len = dtp.len();
322+
323+
// Compare against non-midnight constant (day 1 at noon)
324+
let constant = non_midnight_constant(1, 43200, TimeUnit::Seconds, len);
325+
let comparison = Binary
326+
.try_new_array(len, Operator::Lte, [dtp.into_array(), constant])
327+
.unwrap();
328+
329+
// Optimize should NOT push down (constant has non-zero seconds)
330+
let optimized = comparison.optimize().unwrap();
331+
332+
// The DTP should still be in the expression tree
333+
// (optimization doesn't apply, so we keep the original structure)
334+
// Just verify it still computes correctly
335+
// days [0, 1, 2] at midnight <= day 1 at noon: [true, true, false]
336+
assert_eq!(optimized.as_bool_typed().true_count().unwrap(), 2);
337+
}
338+
339+
#[test]
340+
fn test_no_pushdown_non_zero_dtp_seconds() {
341+
// Create a DTP with non-zero seconds (not at midnight)
342+
let timestamps: Buffer<i64> = vec![
343+
3600, // day 0 + 1 hour
344+
SECONDS_PER_DAY + 3600, // day 1 + 1 hour
345+
2 * SECONDS_PER_DAY + 3600, // day 2 + 1 hour
346+
]
347+
.into();
348+
let temporal = TemporalArray::new_timestamp(
349+
PrimitiveArray::new(timestamps, Validity::NonNullable).into_array(),
350+
TimeUnit::Seconds,
351+
None,
352+
);
353+
let dtp = DateTimePartsArray::try_from(temporal).unwrap();
354+
let len = dtp.len();
355+
356+
// Compare against midnight constant
357+
let constant = midnight_constant(1, TimeUnit::Seconds, len);
358+
let comparison = Binary
359+
.try_new_array(len, Operator::Lte, [dtp.into_array(), constant])
360+
.unwrap();
361+
362+
// Should still compute correctly (just not optimized via pushdown)
363+
let optimized = comparison.optimize().unwrap();
364+
// timestamps at 1am on days [0, 1, 2] <= day 1 midnight: [true, false, false]
365+
assert_eq!(optimized.as_bool_typed().true_count().unwrap(), 1);
366+
}
367+
}

0 commit comments

Comments
 (0)