Skip to content

Commit 150a3c5

Browse files
authored
feat(cubesql): Filter push down for date_part(?upper) AND date_part('week') (#10071)
Allow to merge the week granularity date_part filter into the inDateRange. After that, it's possible to push down a complex filter: WHERE DATE_PART('year', "order_date") = 2019 AND DATE_PART('quarter', "order_date") = 2 AND DATE_PART('month', "order_date") = 4 AND DATE_PART('week', "order_date") = 15 We are using such filters in the MDX API
1 parent b7018c7 commit 150a3c5

File tree

3 files changed

+352
-93
lines changed

3 files changed

+352
-93
lines changed

rust/cubesql/cubesql/src/compile/mod.rs

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9541,6 +9541,136 @@ ORDER BY "source"."str0" ASC
95419541
)
95429542
}
95439543

9544+
#[tokio::test]
9545+
async fn test_filter_extract_by_year_and_week() {
9546+
init_testing_logger();
9547+
9548+
async fn assert_week_result(week: i32, start_date: &str, end_date: &str) {
9549+
let query_plan = convert_select_to_query_plan(
9550+
format!(r#"
9551+
SELECT COUNT(*) AS "count",
9552+
EXTRACT(YEAR FROM "KibanaSampleDataEcommerce"."order_date") AS "yr:completedAt:ok"
9553+
FROM "public"."KibanaSampleDataEcommerce" "KibanaSampleDataEcommerce"
9554+
WHERE EXTRACT(YEAR FROM "KibanaSampleDataEcommerce"."order_date") = 2019
9555+
AND EXTRACT(WEEK FROM "KibanaSampleDataEcommerce"."order_date") = {}
9556+
GROUP BY 2
9557+
"#, week),
9558+
DatabaseProtocol::PostgreSQL,
9559+
).await;
9560+
9561+
assert_eq!(
9562+
query_plan.as_logical_plan().find_cube_scan().request,
9563+
V1LoadRequestQuery {
9564+
measures: Some(vec!["KibanaSampleDataEcommerce.count".to_string()]),
9565+
dimensions: Some(vec![]),
9566+
segments: Some(vec![]),
9567+
time_dimensions: Some(vec![V1LoadRequestQueryTimeDimension {
9568+
dimension: "KibanaSampleDataEcommerce.order_date".to_string(),
9569+
granularity: Some("year".to_string()),
9570+
date_range: Some(json!(vec![start_date, end_date])),
9571+
},]),
9572+
order: Some(vec![]),
9573+
..Default::default()
9574+
}
9575+
)
9576+
}
9577+
9578+
// Test week 1 (first week of 2019)
9579+
// In 2019, January 1 is a Tuesday, so ISO week 1 starts on Monday, December 31, 2018
9580+
// But since our range is constrained to 2019, it should be Jan 1-6
9581+
assert_week_result(1, "2019-01-01", "2019-01-06").await;
9582+
9583+
// Test week 15 (mid-April)
9584+
// Week 15 of 2019 is April 8-14
9585+
assert_week_result(15, "2019-04-08", "2019-04-14").await;
9586+
9587+
// Test week 52 (end of year)
9588+
// Week 52 of 2019 is December 23-29
9589+
assert_week_result(52, "2019-12-23", "2019-12-29").await;
9590+
}
9591+
9592+
#[tokio::test]
9593+
async fn test_filter_extract_by_year_and_week_with_trunc() {
9594+
init_testing_logger();
9595+
9596+
let logical_plan = convert_select_to_query_plan(
9597+
r#"
9598+
SELECT
9599+
COUNT(*) AS "count",
9600+
EXTRACT(YEAR FROM "KibanaSampleDataEcommerce"."order_date") AS "yr:completedAt:ok"
9601+
FROM "public"."KibanaSampleDataEcommerce" "KibanaSampleDataEcommerce"
9602+
WHERE EXTRACT(YEAR FROM "KibanaSampleDataEcommerce"."order_date") = 2019
9603+
AND CAST(TRUNC(EXTRACT(WEEK FROM "KibanaSampleDataEcommerce"."order_date")) AS INTEGER) = 15
9604+
GROUP BY 2
9605+
"#
9606+
.to_string(),
9607+
DatabaseProtocol::PostgreSQL,
9608+
)
9609+
.await
9610+
.as_logical_plan();
9611+
9612+
assert_eq!(
9613+
logical_plan.find_cube_scan().request,
9614+
V1LoadRequestQuery {
9615+
measures: Some(vec!["KibanaSampleDataEcommerce.count".to_string()]),
9616+
dimensions: Some(vec![]),
9617+
segments: Some(vec![]),
9618+
time_dimensions: Some(vec![V1LoadRequestQueryTimeDimension {
9619+
dimension: "KibanaSampleDataEcommerce.order_date".to_string(),
9620+
granularity: Some("year".to_string()),
9621+
date_range: Some(json!(vec![
9622+
"2019-04-08".to_string(),
9623+
"2019-04-14".to_string(),
9624+
])),
9625+
},]),
9626+
order: Some(vec![]),
9627+
..Default::default()
9628+
}
9629+
)
9630+
}
9631+
9632+
#[tokio::test]
9633+
async fn test_filter_date_part_by_year_quarter_month_week() {
9634+
init_testing_logger();
9635+
9636+
let logical_plan = convert_select_to_query_plan(
9637+
r#"
9638+
SELECT
9639+
COUNT(*) AS "count",
9640+
DATE_PART('year', "KibanaSampleDataEcommerce"."order_date") AS "yr:completedAt:ok"
9641+
FROM "public"."KibanaSampleDataEcommerce" "KibanaSampleDataEcommerce"
9642+
WHERE DATE_PART('year', "KibanaSampleDataEcommerce"."order_date") = 2019
9643+
AND DATE_PART('quarter', "KibanaSampleDataEcommerce"."order_date") = 2
9644+
AND DATE_PART('month', "KibanaSampleDataEcommerce"."order_date") = 4
9645+
AND DATE_PART('week', "KibanaSampleDataEcommerce"."order_date") = 15
9646+
GROUP BY 2
9647+
"#
9648+
.to_string(),
9649+
DatabaseProtocol::PostgreSQL,
9650+
)
9651+
.await
9652+
.as_logical_plan();
9653+
9654+
assert_eq!(
9655+
logical_plan.find_cube_scan().request,
9656+
V1LoadRequestQuery {
9657+
measures: Some(vec!["KibanaSampleDataEcommerce.count".to_string()]),
9658+
dimensions: Some(vec![]),
9659+
segments: Some(vec![]),
9660+
time_dimensions: Some(vec![V1LoadRequestQueryTimeDimension {
9661+
dimension: "KibanaSampleDataEcommerce.order_date".to_string(),
9662+
granularity: Some("year".to_string()),
9663+
date_range: Some(json!(vec![
9664+
"2019-04-08".to_string(),
9665+
"2019-04-14".to_string(),
9666+
])),
9667+
},]),
9668+
order: Some(vec![]),
9669+
..Default::default()
9670+
}
9671+
)
9672+
}
9673+
95449674
#[tokio::test]
95459675
async fn test_tableau_filter_extract_by_year() {
95469676
init_testing_logger();

rust/cubesql/cubesql/src/compile/rewrite/rules/filters.rs

Lines changed: 11 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use super::utils;
1+
use super::utils::{self, try_merge_range_with_date_part};
22
use crate::compile::date_parser::parse_date_str;
33
use crate::{
44
compile::rewrite::{
@@ -51,7 +51,6 @@ use datafusion::{
5151
};
5252
use egg::{Subst, Var};
5353
use std::{
54-
cmp::{max, min},
5554
collections::HashSet,
5655
convert::TryInto,
5756
fmt::Display,
@@ -4295,98 +4294,18 @@ impl FilterRules {
42954294
return false;
42964295
};
42974296

4298-
let new_values = match granularity.as_str() {
4299-
"month" => {
4300-
// Check that the range only covers one year
4301-
let start_date_year = start_date.year();
4302-
if start_date_year != end_date.year() {
4303-
return false;
4304-
}
4305-
4306-
// Month value must be valid
4307-
if !(1..=12).contains(&value) {
4308-
return false;
4309-
}
4310-
4311-
// Obtain the new range
4312-
let Some(new_start_date) =
4313-
NaiveDate::from_ymd_opt(start_date_year, value as u32, 1)
4314-
else {
4315-
return false;
4316-
};
4317-
let Some(new_end_date) = new_start_date
4318-
.checked_add_months(Months::new(1))
4319-
.and_then(|date| date.checked_sub_days(Days::new(1)))
4320-
else {
4321-
return false;
4322-
};
4323-
4324-
// If the resulting range is outside of the original range, we can't merge
4325-
// the filters
4326-
if new_start_date > end_date || new_end_date < start_date {
4327-
return false;
4328-
}
4329-
4330-
// Preserves existing constraints, for example:
4331-
// inDataRange: order_date >= '2019-02-15' AND order_date < '2019-03-10'
4332-
// Month filter: EXTRACT(MONTH FROM order_date) = 2 (February)
4333-
let new_start_date = max(new_start_date, start_date);
4334-
let new_end_date = min(new_end_date, end_date);
4335-
4336-
vec![
4337-
new_start_date.format("%Y-%m-%d").to_string(),
4338-
new_end_date.format("%Y-%m-%d").to_string(),
4339-
]
4340-
}
4341-
"quarter" | "qtr" => {
4342-
// Check that the range only covers one year
4343-
let start_date_year = start_date.year();
4344-
if start_date_year != end_date.year() {
4345-
return false;
4346-
}
4347-
4348-
// Quarter value must be valid (1-4)
4349-
if !(1..=4).contains(&value) {
4350-
return false;
4351-
}
4352-
4353-
let quarter_start_month = (value - 1) * 3 + 1;
4354-
4355-
// Obtain the new range
4356-
let Some(new_start_date) =
4357-
NaiveDate::from_ymd_opt(start_date_year, quarter_start_month as u32, 1)
4358-
else {
4359-
return false;
4360-
};
4361-
4362-
let Some(new_end_date) = new_start_date
4363-
.checked_add_months(Months::new(3))
4364-
.and_then(|date| date.checked_sub_days(Days::new(1)))
4365-
else {
4366-
return false;
4367-
};
4368-
4369-
// Paranoid check, If the resulting range is outside of the original range, we can't merge
4370-
// the filters
4371-
if new_start_date > end_date || new_end_date < start_date {
4372-
return false;
4373-
}
4374-
4375-
// Preserves existing constraints, for example:
4376-
// inDataRange: order_date >= '2019-04-15' AND order_date < '2019-12-31'
4377-
// Month filter: EXTRACT(QUARTER FROM order_date) = 2
4378-
let new_start_date = max(new_start_date, start_date);
4379-
let new_end_date = min(new_end_date, end_date);
4380-
4381-
vec![
4382-
new_start_date.format("%Y-%m-%d").to_string(),
4383-
new_end_date.format("%Y-%m-%d").to_string(),
4384-
]
4385-
}
4386-
// TODO: handle more granularities
4387-
_ => return false,
4297+
// Use the utility function to calculate the date range for the given granularity
4298+
let Some((new_start_date, new_end_date)) =
4299+
try_merge_range_with_date_part(start_date, end_date, granularity.as_str(), value)
4300+
else {
4301+
return false;
43884302
};
43894303

4304+
let new_values = vec![
4305+
new_start_date.format("%Y-%m-%d").to_string(),
4306+
new_end_date.format("%Y-%m-%d").to_string(),
4307+
];
4308+
43904309
subst.insert(
43914310
new_values_var,
43924311
egraph.add(LogicalPlanLanguage::FilterMemberValues(FilterMemberValues(

0 commit comments

Comments
 (0)