|
4 | 4 | use vortex_array::Array; |
5 | 5 | use vortex_array::ArrayRef; |
6 | 6 | use vortex_array::IntoArray; |
| 7 | +use vortex_array::arrays::AnyScalarFn; |
7 | 8 | use vortex_array::arrays::ConstantArray; |
8 | 9 | use vortex_array::arrays::ConstantVTable; |
9 | 10 | use vortex_array::arrays::FilterArray; |
10 | 11 | use vortex_array::arrays::FilterVTable; |
| 12 | +use vortex_array::arrays::ScalarFnArray; |
| 13 | +use vortex_array::builtins::ArrayBuiltins; |
| 14 | +use vortex_array::expr::Between; |
| 15 | +use vortex_array::expr::Binary; |
11 | 16 | use vortex_array::matchers::Exact; |
| 17 | +use vortex_array::optimizer::ArrayOptimizer; |
12 | 18 | use vortex_array::optimizer::rules::ArrayParentReduceRule; |
13 | 19 | use vortex_array::optimizer::rules::ParentRuleSet; |
| 20 | +use vortex_dtype::DType; |
| 21 | +use vortex_dtype::datetime::TemporalMetadata; |
14 | 22 | use vortex_error::VortexExpect; |
15 | 23 | use vortex_error::VortexResult; |
16 | 24 |
|
17 | 25 | use crate::DateTimePartsArray; |
18 | 26 | use crate::DateTimePartsVTable; |
| 27 | +use crate::timestamp; |
19 | 28 |
|
20 | | -pub(crate) const PARENT_RULES: ParentRuleSet<DateTimePartsVTable> = |
21 | | - ParentRuleSet::new(&[ParentRuleSet::lift(&DTPFilterPushDownRule)]); |
| 29 | +pub(crate) const PARENT_RULES: ParentRuleSet<DateTimePartsVTable> = ParentRuleSet::new(&[ |
| 30 | + ParentRuleSet::lift(&DTPFilterPushDownRule), |
| 31 | + ParentRuleSet::lift(&DTPComparisonPushDownRule), |
| 32 | +]); |
22 | 33 |
|
23 | 34 | /// Push the filter into the days column of a date time parts, we could extend this to other fields |
24 | 35 | /// but its less clear if that is beneficial. |
@@ -61,3 +72,293 @@ impl ArrayParentReduceRule<DateTimePartsVTable> for DTPFilterPushDownRule { |
61 | 72 | .map(|x| Some(x.into_array())) |
62 | 73 | } |
63 | 74 | } |
| 75 | + |
| 76 | +/// Push down comparison operators (Binary and Between) to the days column when both seconds |
| 77 | +/// and subseconds are constant zero on both sides of the comparison. |
| 78 | +/// |
| 79 | +/// When a DateTimeParts array has constant zero for seconds and subseconds, and is being |
| 80 | +/// compared against a constant timestamp that also has zero seconds and subseconds, |
| 81 | +/// we can push the comparison down to just compare the days. |
| 82 | +/// |
| 83 | +/// For example: `dtp <= 2013-07-31` where dtp has seconds=0 and subseconds=0, |
| 84 | +/// and the RHS timestamp is exactly at midnight (no time component), |
| 85 | +/// becomes: `dtp.days <= 15917` (the day number for 2013-07-31). |
| 86 | +#[derive(Debug)] |
| 87 | +struct DTPComparisonPushDownRule; |
| 88 | + |
| 89 | +impl ArrayParentReduceRule<DateTimePartsVTable> for DTPComparisonPushDownRule { |
| 90 | + type Parent = AnyScalarFn; |
| 91 | + |
| 92 | + fn parent(&self) -> AnyScalarFn { |
| 93 | + AnyScalarFn |
| 94 | + } |
| 95 | + |
| 96 | + fn reduce_parent( |
| 97 | + &self, |
| 98 | + child: &DateTimePartsArray, |
| 99 | + parent: &ScalarFnArray, |
| 100 | + child_idx: usize, |
| 101 | + ) -> VortexResult<Option<ArrayRef>> { |
| 102 | + // Only handle comparison operations (Binary comparisons or Between) |
| 103 | + if parent |
| 104 | + .scalar_fn() |
| 105 | + .as_opt::<Binary>() |
| 106 | + .is_none_or(|c| c.maybe_cmp_operator().is_none()) |
| 107 | + && !parent.scalar_fn().is::<Between>() |
| 108 | + { |
| 109 | + return Ok(None); |
| 110 | + } |
| 111 | + |
| 112 | + // Check that DTP's seconds and subseconds are constant zero |
| 113 | + if !is_constant_zero(child.seconds()) || !is_constant_zero(child.subseconds()) { |
| 114 | + return Ok(None); |
| 115 | + } |
| 116 | + |
| 117 | + let children = parent.children(); |
| 118 | + let days = child.days(); |
| 119 | + |
| 120 | + // Build new children: replace DTP with days, replace constant timestamps with days constants |
| 121 | + let mut new_children = Vec::with_capacity(children.len()); |
| 122 | + for (idx, c) in children.iter().enumerate() { |
| 123 | + if idx == child_idx { |
| 124 | + // This is the DTP child - replace with days |
| 125 | + new_children.push(days.clone()); |
| 126 | + } else { |
| 127 | + // Must be a constant timestamp at midnight |
| 128 | + let Some(days_value) = try_extract_days_constant(c) else { |
| 129 | + return Ok(None); |
| 130 | + }; |
| 131 | + let len = days.len(); |
| 132 | + let target_dtype = days.dtype(); |
| 133 | + let constant = ConstantArray::new(days_value, len).into_array(); |
| 134 | + new_children.push(constant.cast(target_dtype.clone())?); |
| 135 | + } |
| 136 | + } |
| 137 | + |
| 138 | + let result = |
| 139 | + ScalarFnArray::try_new(parent.scalar_fn().clone(), new_children, parent.len())? |
| 140 | + .into_array() |
| 141 | + .optimize()?; |
| 142 | + |
| 143 | + Ok(Some(result)) |
| 144 | + } |
| 145 | +} |
| 146 | + |
| 147 | +/// Try to extract the days value from a constant timestamp. |
| 148 | +/// Returns None if the constant is not a timestamp or has non-zero seconds/subseconds. |
| 149 | +fn try_extract_days_constant(array: &ArrayRef) -> Option<i64> { |
| 150 | + let constant = array.as_constant()?; |
| 151 | + |
| 152 | + // Extract the timestamp value |
| 153 | + let timestamp = constant |
| 154 | + .as_extension() |
| 155 | + .storage() |
| 156 | + .as_primitive() |
| 157 | + .as_::<i64>()?; |
| 158 | + |
| 159 | + // Get the time unit from the dtype |
| 160 | + let DType::Extension(ext_dtype) = constant.dtype() else { |
| 161 | + return None; |
| 162 | + }; |
| 163 | + |
| 164 | + let temporal_metadata = TemporalMetadata::try_from(ext_dtype.as_ref()).ok()?; |
| 165 | + let ts_parts = timestamp::split(timestamp, temporal_metadata.time_unit()).ok()?; |
| 166 | + |
| 167 | + // Only allow pushdown if seconds and subseconds are zero |
| 168 | + if ts_parts.seconds != 0 || ts_parts.subseconds != 0 { |
| 169 | + return None; |
| 170 | + } |
| 171 | + |
| 172 | + Some(ts_parts.days) |
| 173 | +} |
| 174 | + |
| 175 | +/// Check if an array is a constant with value zero. |
| 176 | +fn is_constant_zero(array: &ArrayRef) -> bool { |
| 177 | + array |
| 178 | + .as_opt::<ConstantVTable>() |
| 179 | + .is_some_and(|c| c.scalar().is_zero()) |
| 180 | +} |
| 181 | + |
| 182 | +#[cfg(test)] |
| 183 | +mod tests { |
| 184 | + use vortex_array::arrays::PrimitiveArray; |
| 185 | + use vortex_array::arrays::ScalarFnArrayExt; |
| 186 | + use vortex_array::arrays::TemporalArray; |
| 187 | + use vortex_array::compute::BetweenOptions; |
| 188 | + use vortex_array::compute::StrictComparison; |
| 189 | + use vortex_array::expr::Operator; |
| 190 | + use vortex_array::optimizer::ArrayOptimizer; |
| 191 | + use vortex_array::validity::Validity; |
| 192 | + use vortex_buffer::Buffer; |
| 193 | + use vortex_buffer::buffer; |
| 194 | + use vortex_dtype::datetime::TimeUnit; |
| 195 | + use vortex_scalar::Scalar; |
| 196 | + |
| 197 | + use super::*; |
| 198 | + |
| 199 | + const SECONDS_PER_DAY: i64 = 86400; |
| 200 | + |
| 201 | + /// Create a DTP array with the given day values (all at midnight). |
| 202 | + fn dtp_at_midnight(days: &[i64], time_unit: TimeUnit) -> DateTimePartsArray { |
| 203 | + let multiplier = match time_unit { |
| 204 | + TimeUnit::Seconds => 1, |
| 205 | + TimeUnit::Milliseconds => 1_000, |
| 206 | + TimeUnit::Microseconds => 1_000_000, |
| 207 | + TimeUnit::Nanoseconds => 1_000_000_000, |
| 208 | + TimeUnit::Days => panic!("Days not supported"), |
| 209 | + }; |
| 210 | + let timestamps: Vec<i64> = days |
| 211 | + .iter() |
| 212 | + .map(|d| d * SECONDS_PER_DAY * multiplier) |
| 213 | + .collect(); |
| 214 | + let buffer: Buffer<i64> = timestamps.into(); |
| 215 | + let temporal = TemporalArray::new_timestamp( |
| 216 | + PrimitiveArray::new(buffer, Validity::NonNullable).into_array(), |
| 217 | + time_unit, |
| 218 | + None, |
| 219 | + ); |
| 220 | + DateTimePartsArray::try_from(temporal).unwrap() |
| 221 | + } |
| 222 | + |
| 223 | + /// Create a constant timestamp scalar at midnight for the given day. |
| 224 | + fn midnight_constant(day: i64, time_unit: TimeUnit, len: usize) -> ArrayRef { |
| 225 | + let multiplier = match time_unit { |
| 226 | + TimeUnit::Seconds => 1, |
| 227 | + TimeUnit::Milliseconds => 1_000, |
| 228 | + TimeUnit::Microseconds => 1_000_000, |
| 229 | + TimeUnit::Nanoseconds => 1_000_000_000, |
| 230 | + TimeUnit::Days => panic!("Days not supported"), |
| 231 | + }; |
| 232 | + let timestamp = day * SECONDS_PER_DAY * multiplier; |
| 233 | + let temporal = TemporalArray::new_timestamp( |
| 234 | + PrimitiveArray::new(buffer![timestamp], Validity::NonNullable).into_array(), |
| 235 | + time_unit, |
| 236 | + None, |
| 237 | + ); |
| 238 | + let scalar = Scalar::extension(temporal.ext_dtype(), timestamp.into()); |
| 239 | + ConstantArray::new(scalar, len).into_array() |
| 240 | + } |
| 241 | + |
| 242 | + /// Create a constant timestamp scalar with non-midnight time. |
| 243 | + fn non_midnight_constant(day: i64, seconds: i64, time_unit: TimeUnit, len: usize) -> ArrayRef { |
| 244 | + let multiplier = match time_unit { |
| 245 | + TimeUnit::Seconds => 1, |
| 246 | + TimeUnit::Milliseconds => 1_000, |
| 247 | + TimeUnit::Microseconds => 1_000_000, |
| 248 | + TimeUnit::Nanoseconds => 1_000_000_000, |
| 249 | + TimeUnit::Days => panic!("Days not supported"), |
| 250 | + }; |
| 251 | + let timestamp = (day * SECONDS_PER_DAY + seconds) * multiplier; |
| 252 | + let temporal = TemporalArray::new_timestamp( |
| 253 | + PrimitiveArray::new(buffer![timestamp], Validity::NonNullable).into_array(), |
| 254 | + time_unit, |
| 255 | + None, |
| 256 | + ); |
| 257 | + let scalar = Scalar::extension(temporal.ext_dtype(), timestamp.into()); |
| 258 | + ConstantArray::new(scalar, len).into_array() |
| 259 | + } |
| 260 | + |
| 261 | + #[test] |
| 262 | + fn test_binary_comparison_pushdown() { |
| 263 | + // DTP with days [0, 1, 2] at midnight |
| 264 | + let dtp = dtp_at_midnight(&[0, 1, 2], TimeUnit::Seconds); |
| 265 | + let len = dtp.len(); |
| 266 | + |
| 267 | + // Compare: dtp <= day 1 (midnight) |
| 268 | + let constant = midnight_constant(1, TimeUnit::Seconds, len); |
| 269 | + let comparison = Binary |
| 270 | + .try_new_array(len, Operator::Lte, [dtp.into_array(), constant]) |
| 271 | + .unwrap(); |
| 272 | + |
| 273 | + // Optimize should push down to days |
| 274 | + let optimized = comparison.optimize().unwrap(); |
| 275 | + |
| 276 | + // The result should be a ScalarFn over primitive days, not over DTP |
| 277 | + assert!( |
| 278 | + !optimized.is::<DateTimePartsVTable>(), |
| 279 | + "Expected pushdown to remove DTP from expression" |
| 280 | + ); |
| 281 | + |
| 282 | + // Verify correctness: days [0, 1, 2] <= 1 should give [true, true, false] |
| 283 | + assert_eq!(optimized.as_bool_typed().true_count().unwrap(), 2); |
| 284 | + } |
| 285 | + |
| 286 | + #[test] |
| 287 | + fn test_between_pushdown() { |
| 288 | + // DTP with days [0, 1, 2, 3, 4] at midnight |
| 289 | + let dtp = dtp_at_midnight(&[0, 1, 2, 3, 4], TimeUnit::Seconds); |
| 290 | + let len = dtp.len(); |
| 291 | + |
| 292 | + // Between: 1 <= dtp <= 3 |
| 293 | + let lower = midnight_constant(1, TimeUnit::Seconds, len); |
| 294 | + let upper = midnight_constant(3, TimeUnit::Seconds, len); |
| 295 | + |
| 296 | + let between = Between |
| 297 | + .try_new_array( |
| 298 | + len, |
| 299 | + BetweenOptions { |
| 300 | + lower_strict: StrictComparison::NonStrict, |
| 301 | + upper_strict: StrictComparison::NonStrict, |
| 302 | + }, |
| 303 | + [dtp.into_array(), lower, upper], |
| 304 | + ) |
| 305 | + .unwrap(); |
| 306 | + |
| 307 | + // Optimize should push down to days |
| 308 | + let optimized = between.optimize().unwrap(); |
| 309 | + |
| 310 | + // Verify correctness: days [0, 1, 2, 3, 4] between 1 and 3 should give [false, true, true, true, false] |
| 311 | + assert_eq!(optimized.as_bool_typed().true_count().unwrap(), 3); |
| 312 | + } |
| 313 | + |
| 314 | + #[test] |
| 315 | + fn test_no_pushdown_non_midnight_constant() { |
| 316 | + // DTP with days [0, 1, 2] at midnight |
| 317 | + let dtp = dtp_at_midnight(&[0, 1, 2], TimeUnit::Seconds); |
| 318 | + let len = dtp.len(); |
| 319 | + |
| 320 | + // Compare against non-midnight constant (day 1 at noon) |
| 321 | + let constant = non_midnight_constant(1, 43200, TimeUnit::Seconds, len); |
| 322 | + let comparison = Binary |
| 323 | + .try_new_array(len, Operator::Lte, [dtp.into_array(), constant]) |
| 324 | + .unwrap(); |
| 325 | + |
| 326 | + // Optimize should NOT push down (constant has non-zero seconds) |
| 327 | + let optimized = comparison.optimize().unwrap(); |
| 328 | + |
| 329 | + // The DTP should still be in the expression tree |
| 330 | + // (optimization doesn't apply, so we keep the original structure) |
| 331 | + // Just verify it still computes correctly |
| 332 | + // days [0, 1, 2] at midnight <= day 1 at noon: [true, true, false] |
| 333 | + assert_eq!(optimized.as_bool_typed().true_count().unwrap(), 2); |
| 334 | + } |
| 335 | + |
| 336 | + #[test] |
| 337 | + fn test_no_pushdown_non_zero_dtp_seconds() { |
| 338 | + // Create a DTP with non-zero seconds (not at midnight) |
| 339 | + let timestamps: Buffer<i64> = vec![ |
| 340 | + 3600, // day 0 + 1 hour |
| 341 | + SECONDS_PER_DAY + 3600, // day 1 + 1 hour |
| 342 | + 2 * SECONDS_PER_DAY + 3600, // day 2 + 1 hour |
| 343 | + ] |
| 344 | + .into(); |
| 345 | + let temporal = TemporalArray::new_timestamp( |
| 346 | + PrimitiveArray::new(timestamps, Validity::NonNullable).into_array(), |
| 347 | + TimeUnit::Seconds, |
| 348 | + None, |
| 349 | + ); |
| 350 | + let dtp = DateTimePartsArray::try_from(temporal).unwrap(); |
| 351 | + let len = dtp.len(); |
| 352 | + |
| 353 | + // Compare against midnight constant |
| 354 | + let constant = midnight_constant(1, TimeUnit::Seconds, len); |
| 355 | + let comparison = Binary |
| 356 | + .try_new_array(len, Operator::Lte, [dtp.into_array(), constant]) |
| 357 | + .unwrap(); |
| 358 | + |
| 359 | + // Should still compute correctly (just not optimized via pushdown) |
| 360 | + let optimized = comparison.optimize().unwrap(); |
| 361 | + // timestamps at 1am on days [0, 1, 2] <= day 1 midnight: [true, false, false] |
| 362 | + assert_eq!(optimized.as_bool_typed().true_count().unwrap(), 1); |
| 363 | + } |
| 364 | +} |
0 commit comments