Skip to content

Commit 9387072

Browse files
authored
fix(cubesql): Use date_part => date_part + date_trunc split only with appropriate date_part argument (#8552)
* Add new DatePartToken, DeltaTimeUnitToken and SpecialTimeUnitToken, with parsing as in PostgreSQL * Use DatePartToken to parse and normalize extract and date_part token during rewrite * Use DatePartToken to derive proper token for date_trunc after split
1 parent f3b8b19 commit 9387072

14 files changed

+743
-18
lines changed

rust/cubesql/cubesql/src/compile/mod.rs

Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15145,6 +15145,273 @@ ORDER BY "source"."str0" ASC
1514515145
.contains("\\\\\\\\\\\\`"));
1514615146
}
1514715147

15148+
#[tokio::test]
15149+
async fn test_extract_epoch_from_dimension() {
15150+
if !Rewriter::sql_push_down_enabled() {
15151+
return;
15152+
}
15153+
init_testing_logger();
15154+
15155+
let context = TestContext::new(DatabaseProtocol::PostgreSQL).await;
15156+
15157+
let expected_cube_scan = V1LoadRequestQuery {
15158+
measures: Some(vec![]),
15159+
segments: Some(vec![]),
15160+
dimensions: Some(vec!["MultiTypeCube.dim_date0".to_string()]),
15161+
time_dimensions: None,
15162+
order: None,
15163+
limit: None,
15164+
offset: None,
15165+
filters: None,
15166+
ungrouped: None,
15167+
};
15168+
15169+
context
15170+
.add_cube_load_mock(
15171+
expected_cube_scan.clone(),
15172+
simple_load_response(vec![
15173+
json!({"MultiTypeCube.dim_date0": "2024-12-31T01:02:03.500"}),
15174+
]),
15175+
)
15176+
.await;
15177+
15178+
// "extract(EPOCH FROM dim_date0)" expression gets typed Int32 in schema by DF, but executed as Float64
15179+
// https://github.com/apache/datafusion/blob/e088945c38b74bb1d86dcbb88a69dfc21d59e375/datafusion/functions/src/datetime/date_part.rs#L131-L133
15180+
// https://github.com/cube-js/arrow-datafusion/blob/a78e52154e63bed2b7546bb250959239b020036f/datafusion/expr/src/function.rs#L126-L133
15181+
// Without + 0.0 execution will fail with "column types must match schema types, expected Int32 but found Float64 at column index 0"
15182+
// TODO Remove + 0.0 on fresh DF
15183+
15184+
// language=PostgreSQL
15185+
let query = r#"
15186+
SELECT EXTRACT(EPOCH FROM dim_date0) + 0.0 AS result
15187+
FROM MultiTypeCube
15188+
GROUP BY 1
15189+
"#;
15190+
15191+
assert_eq!(
15192+
context
15193+
.convert_sql_to_cube_query(&query)
15194+
.await
15195+
.unwrap()
15196+
.as_logical_plan()
15197+
.find_cube_scan()
15198+
.request,
15199+
expected_cube_scan
15200+
);
15201+
15202+
// Expect proper epoch in floating point
15203+
insta::assert_snapshot!(context.execute_query(query).await.unwrap());
15204+
}
15205+
15206+
#[tokio::test]
15207+
async fn test_extract_granularity_from_dimension() {
15208+
if !Rewriter::sql_push_down_enabled() {
15209+
return;
15210+
}
15211+
init_testing_logger();
15212+
15213+
let context = TestContext::new(DatabaseProtocol::PostgreSQL).await;
15214+
15215+
// This date should be idempotent for every expected granularity, so mocked response would stay correct
15216+
// At the same time, it should generate different extractions for different tokens
15217+
let base_date = "2024-10-01T00:00:00.000Z";
15218+
15219+
// TODO qtr is not supported in EXTRACT for now, probably in sqlparser
15220+
let tokens = [
15221+
("day", "day"),
15222+
("dow", "day"),
15223+
("doy", "day"),
15224+
("quarter", "quarter"),
15225+
// ("qtr", "quarter"),
15226+
];
15227+
15228+
for (token, expected_granularity) in tokens {
15229+
// language=PostgreSQL
15230+
let query = format!(
15231+
r#"
15232+
SELECT EXTRACT({token} FROM dim_date0) AS result
15233+
FROM MultiTypeCube
15234+
GROUP BY 1
15235+
"#
15236+
);
15237+
15238+
let expected_cube_scan = V1LoadRequestQuery {
15239+
measures: Some(vec![]),
15240+
segments: Some(vec![]),
15241+
dimensions: Some(vec![]),
15242+
time_dimensions: Some(vec![V1LoadRequestQueryTimeDimension {
15243+
dimension: "MultiTypeCube.dim_date0".to_string(),
15244+
granularity: Some(expected_granularity.to_string()),
15245+
date_range: None,
15246+
}]),
15247+
order: None,
15248+
limit: None,
15249+
offset: None,
15250+
filters: None,
15251+
ungrouped: None,
15252+
};
15253+
15254+
context
15255+
.add_cube_load_mock(
15256+
expected_cube_scan.clone(),
15257+
simple_load_response(vec![
15258+
json!({format!("MultiTypeCube.dim_date0.{expected_granularity}"): base_date}),
15259+
]),
15260+
)
15261+
.await;
15262+
15263+
assert_eq!(
15264+
context
15265+
.convert_sql_to_cube_query(&query)
15266+
.await
15267+
.unwrap()
15268+
.as_logical_plan()
15269+
.find_cube_scan()
15270+
.request,
15271+
expected_cube_scan
15272+
);
15273+
15274+
// Expect different values for different tokens
15275+
insta::assert_snapshot!(
15276+
format!("extract_{token}_from_dimension"),
15277+
context.execute_query(query).await.unwrap()
15278+
);
15279+
}
15280+
}
15281+
15282+
#[tokio::test]
15283+
async fn test_date_part_epoch_from_dimension() {
15284+
if !Rewriter::sql_push_down_enabled() {
15285+
return;
15286+
}
15287+
init_testing_logger();
15288+
15289+
let context = TestContext::new(DatabaseProtocol::PostgreSQL).await;
15290+
15291+
let expected_cube_scan = V1LoadRequestQuery {
15292+
measures: Some(vec![]),
15293+
segments: Some(vec![]),
15294+
dimensions: Some(vec!["MultiTypeCube.dim_date0".to_string()]),
15295+
time_dimensions: None,
15296+
order: None,
15297+
limit: None,
15298+
offset: None,
15299+
filters: None,
15300+
ungrouped: None,
15301+
};
15302+
15303+
context
15304+
.add_cube_load_mock(
15305+
expected_cube_scan.clone(),
15306+
simple_load_response(vec![
15307+
json!({"MultiTypeCube.dim_date0": "2024-12-31T01:02:03.500"}),
15308+
]),
15309+
)
15310+
.await;
15311+
15312+
// "extract(EPOCH FROM dim_date0)" expression gets typed Int32 in schema by DF, but executed as Float64
15313+
// https://github.com/apache/datafusion/blob/e088945c38b74bb1d86dcbb88a69dfc21d59e375/datafusion/functions/src/datetime/date_part.rs#L131-L133
15314+
// https://github.com/cube-js/arrow-datafusion/blob/a78e52154e63bed2b7546bb250959239b020036f/datafusion/expr/src/function.rs#L126-L133
15315+
// Without + 0.0 execution will fail with "column types must match schema types, expected Int32 but found Float64 at column index 0"
15316+
// TODO Remove + 0.0 on fresh DF
15317+
15318+
// language=PostgreSQL
15319+
let query = r#"
15320+
SELECT date_part('epoch', dim_date0) + 0.0 AS result
15321+
FROM MultiTypeCube
15322+
GROUP BY 1
15323+
"#;
15324+
15325+
assert_eq!(
15326+
context
15327+
.convert_sql_to_cube_query(&query)
15328+
.await
15329+
.unwrap()
15330+
.as_logical_plan()
15331+
.find_cube_scan()
15332+
.request,
15333+
expected_cube_scan
15334+
);
15335+
15336+
// Expect proper epoch in floating point
15337+
insta::assert_snapshot!(context.execute_query(query).await.unwrap());
15338+
}
15339+
15340+
#[tokio::test]
15341+
async fn test_date_part_granularity_from_dimension() {
15342+
if !Rewriter::sql_push_down_enabled() {
15343+
return;
15344+
}
15345+
init_testing_logger();
15346+
15347+
let context = TestContext::new(DatabaseProtocol::PostgreSQL).await;
15348+
15349+
// This date should be idempotent for every expected granularity, so mocked response would stay correct
15350+
// At the same time, it should generate different extractions for different tokens
15351+
let base_date = "2024-10-01T00:00:00.000Z";
15352+
15353+
let tokens = [
15354+
("day", "day"),
15355+
("dow", "day"),
15356+
("doy", "day"),
15357+
("quarter", "quarter"),
15358+
("qtr", "quarter"),
15359+
];
15360+
15361+
for (token, expected_granularity) in tokens {
15362+
// language=PostgreSQL
15363+
let query = format!(
15364+
r#"
15365+
SELECT date_part('{token}', dim_date0) AS result
15366+
FROM MultiTypeCube
15367+
GROUP BY 1
15368+
"#
15369+
);
15370+
15371+
let expected_cube_scan = V1LoadRequestQuery {
15372+
measures: Some(vec![]),
15373+
segments: Some(vec![]),
15374+
dimensions: Some(vec![]),
15375+
time_dimensions: Some(vec![V1LoadRequestQueryTimeDimension {
15376+
dimension: "MultiTypeCube.dim_date0".to_string(),
15377+
granularity: Some(expected_granularity.to_string()),
15378+
date_range: None,
15379+
}]),
15380+
order: None,
15381+
limit: None,
15382+
offset: None,
15383+
filters: None,
15384+
ungrouped: None,
15385+
};
15386+
15387+
context
15388+
.add_cube_load_mock(
15389+
expected_cube_scan.clone(),
15390+
simple_load_response(vec![
15391+
json!({format!("MultiTypeCube.dim_date0.{expected_granularity}"): base_date}),
15392+
]),
15393+
)
15394+
.await;
15395+
15396+
assert_eq!(
15397+
context
15398+
.convert_sql_to_cube_query(&query)
15399+
.await
15400+
.unwrap()
15401+
.as_logical_plan()
15402+
.find_cube_scan()
15403+
.request,
15404+
expected_cube_scan
15405+
);
15406+
15407+
// Expect different values for different tokens
15408+
insta::assert_snapshot!(
15409+
format!("date_part_{token}_from_dimension"),
15410+
context.execute_query(query).await.unwrap()
15411+
);
15412+
}
15413+
}
15414+
1514815415
#[tokio::test]
1514915416
async fn test_wrapper_tableau_sunday_week() {
1515015417
if !Rewriter::sql_push_down_enabled() {

0 commit comments

Comments
 (0)