Skip to content

Commit 8113798

Browse files
perf[dtp]: push down compare over dtp constant second subsecond (#5793)
If compare(DTP) operators only on the day, you can replace with a compare over the day. Signed-off-by: Joe Isaacs <[email protected]>
1 parent aa7a891 commit 8113798

File tree

1 file changed

+303
-2
lines changed
  • encodings/datetime-parts/src/compute

1 file changed

+303
-2
lines changed

encodings/datetime-parts/src/compute/rules.rs

Lines changed: 303 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,32 @@
44
use vortex_array::Array;
55
use vortex_array::ArrayRef;
66
use vortex_array::IntoArray;
7+
use vortex_array::arrays::AnyScalarFn;
78
use vortex_array::arrays::ConstantArray;
89
use vortex_array::arrays::ConstantVTable;
910
use vortex_array::arrays::FilterArray;
1011
use vortex_array::arrays::FilterVTable;
12+
use vortex_array::arrays::ScalarFnArray;
13+
use vortex_array::builtins::ArrayBuiltins;
14+
use vortex_array::expr::Between;
15+
use vortex_array::expr::Binary;
1116
use vortex_array::matchers::Exact;
17+
use vortex_array::optimizer::ArrayOptimizer;
1218
use vortex_array::optimizer::rules::ArrayParentReduceRule;
1319
use vortex_array::optimizer::rules::ParentRuleSet;
20+
use vortex_dtype::DType;
21+
use vortex_dtype::datetime::TemporalMetadata;
1422
use vortex_error::VortexExpect;
1523
use vortex_error::VortexResult;
1624

1725
use crate::DateTimePartsArray;
1826
use crate::DateTimePartsVTable;
27+
use crate::timestamp;
1928

20-
pub(crate) const PARENT_RULES: ParentRuleSet<DateTimePartsVTable> =
21-
ParentRuleSet::new(&[ParentRuleSet::lift(&DTPFilterPushDownRule)]);
29+
pub(crate) const PARENT_RULES: ParentRuleSet<DateTimePartsVTable> = ParentRuleSet::new(&[
30+
ParentRuleSet::lift(&DTPFilterPushDownRule),
31+
ParentRuleSet::lift(&DTPComparisonPushDownRule),
32+
]);
2233

2334
/// Push the filter into the days column of a date time parts, we could extend this to other fields
2435
/// but its less clear if that is beneficial.
@@ -61,3 +72,293 @@ impl ArrayParentReduceRule<DateTimePartsVTable> for DTPFilterPushDownRule {
6172
.map(|x| Some(x.into_array()))
6273
}
6374
}
75+
76+
/// Push down comparison operators (Binary and Between) to the days column when both seconds
77+
/// and subseconds are constant zero on both sides of the comparison.
78+
///
79+
/// When a DateTimeParts array has constant zero for seconds and subseconds, and is being
80+
/// compared against a constant timestamp that also has zero seconds and subseconds,
81+
/// we can push the comparison down to just compare the days.
82+
///
83+
/// For example: `dtp <= 2013-07-31` where dtp has seconds=0 and subseconds=0,
84+
/// and the RHS timestamp is exactly at midnight (no time component),
85+
/// becomes: `dtp.days <= 15917` (the day number for 2013-07-31).
86+
#[derive(Debug)]
87+
struct DTPComparisonPushDownRule;
88+
89+
impl ArrayParentReduceRule<DateTimePartsVTable> for DTPComparisonPushDownRule {
90+
type Parent = AnyScalarFn;
91+
92+
fn parent(&self) -> AnyScalarFn {
93+
AnyScalarFn
94+
}
95+
96+
fn reduce_parent(
97+
&self,
98+
child: &DateTimePartsArray,
99+
parent: &ScalarFnArray,
100+
child_idx: usize,
101+
) -> VortexResult<Option<ArrayRef>> {
102+
// Only handle comparison operations (Binary comparisons or Between)
103+
if parent
104+
.scalar_fn()
105+
.as_opt::<Binary>()
106+
.is_none_or(|c| c.maybe_cmp_operator().is_none())
107+
&& !parent.scalar_fn().is::<Between>()
108+
{
109+
return Ok(None);
110+
}
111+
112+
// Check that DTP's seconds and subseconds are constant zero
113+
if !is_constant_zero(child.seconds()) || !is_constant_zero(child.subseconds()) {
114+
return Ok(None);
115+
}
116+
117+
let children = parent.children();
118+
let days = child.days();
119+
120+
// Build new children: replace DTP with days, replace constant timestamps with days constants
121+
let mut new_children = Vec::with_capacity(children.len());
122+
for (idx, c) in children.iter().enumerate() {
123+
if idx == child_idx {
124+
// This is the DTP child - replace with days
125+
new_children.push(days.clone());
126+
} else {
127+
// Must be a constant timestamp at midnight
128+
let Some(days_value) = try_extract_days_constant(c) else {
129+
return Ok(None);
130+
};
131+
let len = days.len();
132+
let target_dtype = days.dtype();
133+
let constant = ConstantArray::new(days_value, len).into_array();
134+
new_children.push(constant.cast(target_dtype.clone())?);
135+
}
136+
}
137+
138+
let result =
139+
ScalarFnArray::try_new(parent.scalar_fn().clone(), new_children, parent.len())?
140+
.into_array()
141+
.optimize()?;
142+
143+
Ok(Some(result))
144+
}
145+
}
146+
147+
/// Try to extract the days value from a constant timestamp.
148+
/// Returns None if the constant is not a timestamp or has non-zero seconds/subseconds.
149+
fn try_extract_days_constant(array: &ArrayRef) -> Option<i64> {
150+
let constant = array.as_constant()?;
151+
152+
// Extract the timestamp value
153+
let timestamp = constant
154+
.as_extension()
155+
.storage()
156+
.as_primitive()
157+
.as_::<i64>()?;
158+
159+
// Get the time unit from the dtype
160+
let DType::Extension(ext_dtype) = constant.dtype() else {
161+
return None;
162+
};
163+
164+
let temporal_metadata = TemporalMetadata::try_from(ext_dtype.as_ref()).ok()?;
165+
let ts_parts = timestamp::split(timestamp, temporal_metadata.time_unit()).ok()?;
166+
167+
// Only allow pushdown if seconds and subseconds are zero
168+
if ts_parts.seconds != 0 || ts_parts.subseconds != 0 {
169+
return None;
170+
}
171+
172+
Some(ts_parts.days)
173+
}
174+
175+
/// Check if an array is a constant with value zero.
176+
fn is_constant_zero(array: &ArrayRef) -> bool {
177+
array
178+
.as_opt::<ConstantVTable>()
179+
.is_some_and(|c| c.scalar().is_zero())
180+
}
181+
182+
#[cfg(test)]
183+
mod tests {
184+
use vortex_array::arrays::PrimitiveArray;
185+
use vortex_array::arrays::ScalarFnArrayExt;
186+
use vortex_array::arrays::TemporalArray;
187+
use vortex_array::compute::BetweenOptions;
188+
use vortex_array::compute::StrictComparison;
189+
use vortex_array::expr::Operator;
190+
use vortex_array::optimizer::ArrayOptimizer;
191+
use vortex_array::validity::Validity;
192+
use vortex_buffer::Buffer;
193+
use vortex_buffer::buffer;
194+
use vortex_dtype::datetime::TimeUnit;
195+
use vortex_scalar::Scalar;
196+
197+
use super::*;
198+
199+
const SECONDS_PER_DAY: i64 = 86400;
200+
201+
/// Create a DTP array with the given day values (all at midnight).
202+
fn dtp_at_midnight(days: &[i64], time_unit: TimeUnit) -> DateTimePartsArray {
203+
let multiplier = match time_unit {
204+
TimeUnit::Seconds => 1,
205+
TimeUnit::Milliseconds => 1_000,
206+
TimeUnit::Microseconds => 1_000_000,
207+
TimeUnit::Nanoseconds => 1_000_000_000,
208+
TimeUnit::Days => panic!("Days not supported"),
209+
};
210+
let timestamps: Vec<i64> = days
211+
.iter()
212+
.map(|d| d * SECONDS_PER_DAY * multiplier)
213+
.collect();
214+
let buffer: Buffer<i64> = timestamps.into();
215+
let temporal = TemporalArray::new_timestamp(
216+
PrimitiveArray::new(buffer, Validity::NonNullable).into_array(),
217+
time_unit,
218+
None,
219+
);
220+
DateTimePartsArray::try_from(temporal).unwrap()
221+
}
222+
223+
/// Create a constant timestamp scalar at midnight for the given day.
224+
fn midnight_constant(day: i64, time_unit: TimeUnit, len: usize) -> ArrayRef {
225+
let multiplier = match time_unit {
226+
TimeUnit::Seconds => 1,
227+
TimeUnit::Milliseconds => 1_000,
228+
TimeUnit::Microseconds => 1_000_000,
229+
TimeUnit::Nanoseconds => 1_000_000_000,
230+
TimeUnit::Days => panic!("Days not supported"),
231+
};
232+
let timestamp = day * SECONDS_PER_DAY * multiplier;
233+
let temporal = TemporalArray::new_timestamp(
234+
PrimitiveArray::new(buffer![timestamp], Validity::NonNullable).into_array(),
235+
time_unit,
236+
None,
237+
);
238+
let scalar = Scalar::extension(temporal.ext_dtype(), timestamp.into());
239+
ConstantArray::new(scalar, len).into_array()
240+
}
241+
242+
/// Create a constant timestamp scalar with non-midnight time.
243+
fn non_midnight_constant(day: i64, seconds: i64, time_unit: TimeUnit, len: usize) -> ArrayRef {
244+
let multiplier = match time_unit {
245+
TimeUnit::Seconds => 1,
246+
TimeUnit::Milliseconds => 1_000,
247+
TimeUnit::Microseconds => 1_000_000,
248+
TimeUnit::Nanoseconds => 1_000_000_000,
249+
TimeUnit::Days => panic!("Days not supported"),
250+
};
251+
let timestamp = (day * SECONDS_PER_DAY + seconds) * multiplier;
252+
let temporal = TemporalArray::new_timestamp(
253+
PrimitiveArray::new(buffer![timestamp], Validity::NonNullable).into_array(),
254+
time_unit,
255+
None,
256+
);
257+
let scalar = Scalar::extension(temporal.ext_dtype(), timestamp.into());
258+
ConstantArray::new(scalar, len).into_array()
259+
}
260+
261+
#[test]
262+
fn test_binary_comparison_pushdown() {
263+
// DTP with days [0, 1, 2] at midnight
264+
let dtp = dtp_at_midnight(&[0, 1, 2], TimeUnit::Seconds);
265+
let len = dtp.len();
266+
267+
// Compare: dtp <= day 1 (midnight)
268+
let constant = midnight_constant(1, TimeUnit::Seconds, len);
269+
let comparison = Binary
270+
.try_new_array(len, Operator::Lte, [dtp.into_array(), constant])
271+
.unwrap();
272+
273+
// Optimize should push down to days
274+
let optimized = comparison.optimize().unwrap();
275+
276+
// The result should be a ScalarFn over primitive days, not over DTP
277+
assert!(
278+
!optimized.is::<DateTimePartsVTable>(),
279+
"Expected pushdown to remove DTP from expression"
280+
);
281+
282+
// Verify correctness: days [0, 1, 2] <= 1 should give [true, true, false]
283+
assert_eq!(optimized.as_bool_typed().true_count().unwrap(), 2);
284+
}
285+
286+
#[test]
287+
fn test_between_pushdown() {
288+
// DTP with days [0, 1, 2, 3, 4] at midnight
289+
let dtp = dtp_at_midnight(&[0, 1, 2, 3, 4], TimeUnit::Seconds);
290+
let len = dtp.len();
291+
292+
// Between: 1 <= dtp <= 3
293+
let lower = midnight_constant(1, TimeUnit::Seconds, len);
294+
let upper = midnight_constant(3, TimeUnit::Seconds, len);
295+
296+
let between = Between
297+
.try_new_array(
298+
len,
299+
BetweenOptions {
300+
lower_strict: StrictComparison::NonStrict,
301+
upper_strict: StrictComparison::NonStrict,
302+
},
303+
[dtp.into_array(), lower, upper],
304+
)
305+
.unwrap();
306+
307+
// Optimize should push down to days
308+
let optimized = between.optimize().unwrap();
309+
310+
// Verify correctness: days [0, 1, 2, 3, 4] between 1 and 3 should give [false, true, true, true, false]
311+
assert_eq!(optimized.as_bool_typed().true_count().unwrap(), 3);
312+
}
313+
314+
#[test]
315+
fn test_no_pushdown_non_midnight_constant() {
316+
// DTP with days [0, 1, 2] at midnight
317+
let dtp = dtp_at_midnight(&[0, 1, 2], TimeUnit::Seconds);
318+
let len = dtp.len();
319+
320+
// Compare against non-midnight constant (day 1 at noon)
321+
let constant = non_midnight_constant(1, 43200, TimeUnit::Seconds, len);
322+
let comparison = Binary
323+
.try_new_array(len, Operator::Lte, [dtp.into_array(), constant])
324+
.unwrap();
325+
326+
// Optimize should NOT push down (constant has non-zero seconds)
327+
let optimized = comparison.optimize().unwrap();
328+
329+
// The DTP should still be in the expression tree
330+
// (optimization doesn't apply, so we keep the original structure)
331+
// Just verify it still computes correctly
332+
// days [0, 1, 2] at midnight <= day 1 at noon: [true, true, false]
333+
assert_eq!(optimized.as_bool_typed().true_count().unwrap(), 2);
334+
}
335+
336+
#[test]
337+
fn test_no_pushdown_non_zero_dtp_seconds() {
338+
// Create a DTP with non-zero seconds (not at midnight)
339+
let timestamps: Buffer<i64> = vec![
340+
3600, // day 0 + 1 hour
341+
SECONDS_PER_DAY + 3600, // day 1 + 1 hour
342+
2 * SECONDS_PER_DAY + 3600, // day 2 + 1 hour
343+
]
344+
.into();
345+
let temporal = TemporalArray::new_timestamp(
346+
PrimitiveArray::new(timestamps, Validity::NonNullable).into_array(),
347+
TimeUnit::Seconds,
348+
None,
349+
);
350+
let dtp = DateTimePartsArray::try_from(temporal).unwrap();
351+
let len = dtp.len();
352+
353+
// Compare against midnight constant
354+
let constant = midnight_constant(1, TimeUnit::Seconds, len);
355+
let comparison = Binary
356+
.try_new_array(len, Operator::Lte, [dtp.into_array(), constant])
357+
.unwrap();
358+
359+
// Should still compute correctly (just not optimized via pushdown)
360+
let optimized = comparison.optimize().unwrap();
361+
// timestamps at 1am on days [0, 1, 2] <= day 1 midnight: [true, false, false]
362+
assert_eq!(optimized.as_bool_typed().true_count().unwrap(), 1);
363+
}
364+
}

0 commit comments

Comments
 (0)