Skip to content

Commit e1b84fc

Browse files
committed
test(cubesql): Add grouped join tests
1 parent 35a7d36 commit e1b84fc

File tree

2 files changed

+364
-0
lines changed

2 files changed

+364
-0
lines changed

rust/cubesql/cubesql/src/compile/test/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ pub mod test_bi_workarounds;
3030
#[cfg(test)]
3131
pub mod test_cube_join;
3232
#[cfg(test)]
33+
pub mod test_cube_join_grouped;
34+
#[cfg(test)]
3335
pub mod test_df_execution;
3436
#[cfg(test)]
3537
pub mod test_introspection;
Lines changed: 362 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,362 @@
1+
use datafusion::physical_plan::displayable;
2+
use pretty_assertions::assert_eq;
3+
use regex::Regex;
4+
5+
use crate::compile::{
6+
test::{convert_select_to_query_plan, init_testing_logger, utils::LogicalPlanTestUtils},
7+
DatabaseProtocol, Rewriter,
8+
};
9+
10+
// TODO Tests more joins with grouped queries
11+
// Join structure:
12+
// * ungrouped inner join grouped CubeScan
13+
// * ungrouped inner join grouped CubeScan with filters with values
14+
// * ungrouped inner join grouped WrappedSelect
15+
// * ungrouped inner join grouped WrappedSelect with filters with values
16+
// * ungrouped left join grouped
17+
// * grouped left join ungrouped
18+
// * ungrouped join EmptyRelation
19+
// Join condition columns:
20+
// * one dim
21+
// * two dim
22+
// * one measure
23+
// * __cubeJoinField
24+
// * one member expression dim (like ON LOWER(dim) = LOWER(column))
25+
// Join condition predicate:
26+
// * =
27+
// * IS NOT DISTINCT FROM
28+
// * COALESCE + IS NULL
29+
// Grouped query:
30+
// * Grouping
31+
// * Aggregation
32+
// * Filter
33+
// * Sort
34+
// * Limit
35+
// * Wrapper
36+
// On top of of join
37+
// * Grouping
38+
// * Aggregation
39+
// * Filter
40+
// * Limit
41+
// Test long and otherwise bad aliases for columns:
42+
// * in both parts
43+
// * in join condition
44+
// * in expressions on top
45+
// Test long and otherwise bad aliases for tables:
46+
// * for grouped join part
47+
// * for ungrouped join part
48+
// * inside grouped join part
49+
// * inside ungrouped join part
50+
// * for result
51+
52+
/// Simple join between ungrouped and grouped query should plan as a push-to-Cube query
53+
/// with subquery_joins and with concrete member expressions in SQL
54+
#[tokio::test]
55+
async fn test_join_ungrouped_with_grouped() {
56+
if !Rewriter::sql_push_down_enabled() {
57+
return;
58+
}
59+
init_testing_logger();
60+
61+
let query_plan = convert_select_to_query_plan(
62+
// language=PostgreSQL
63+
r#"
64+
SELECT
65+
kibana_grouped.avg_price,
66+
KibanaSampleDataEcommerce.customer_gender AS gender,
67+
AVG(KibanaSampleDataEcommerce.avgPrice) AS price
68+
FROM
69+
KibanaSampleDataEcommerce
70+
INNER JOIN (
71+
SELECT
72+
customer_gender,
73+
AVG(avgPrice) as avg_price
74+
FROM
75+
KibanaSampleDataEcommerce
76+
GROUP BY 1
77+
) kibana_grouped
78+
ON (
79+
(KibanaSampleDataEcommerce.customer_gender = kibana_grouped.customer_gender)
80+
)
81+
GROUP BY
82+
1,
83+
2
84+
;
85+
"#
86+
.to_string(),
87+
DatabaseProtocol::PostgreSQL,
88+
)
89+
.await;
90+
91+
let physical_plan = query_plan.as_physical_plan().await.unwrap();
92+
println!(
93+
"Physical plan: {}",
94+
displayable(physical_plan.as_ref()).indent()
95+
);
96+
97+
let request = query_plan
98+
.as_logical_plan()
99+
.find_cube_scan_wrapped_sql()
100+
.request;
101+
102+
assert_eq!(request.subquery_joins.as_ref().unwrap().len(), 1);
103+
104+
let subquery = &request.subquery_joins.unwrap()[0];
105+
106+
assert!(!subquery.sql.contains("ungrouped"));
107+
assert_eq!(subquery.join_type, "INNER");
108+
assert!(subquery.on.contains(
109+
r#"${KibanaSampleDataEcommerce.customer_gender} = \"kibana_grouped\".\"customer_gender\""#
110+
));
111+
112+
// Measure from top aggregation
113+
assert!(query_plan
114+
.as_logical_plan()
115+
.find_cube_scan_wrapped_sql()
116+
.wrapped_sql
117+
.sql
118+
.contains(r#"\"expr\":\"${KibanaSampleDataEcommerce.avgPrice}\""#));
119+
// Dimension from ungrouped side
120+
assert!(query_plan
121+
.as_logical_plan()
122+
.find_cube_scan_wrapped_sql()
123+
.wrapped_sql
124+
.sql
125+
.contains(r#"\"expr\":\"${KibanaSampleDataEcommerce.customer_gender}\""#));
126+
// Dimension from grouped side
127+
assert!(query_plan
128+
.as_logical_plan()
129+
.find_cube_scan_wrapped_sql()
130+
.wrapped_sql
131+
.sql
132+
.contains(r#"\"expr\":\"\\\"kibana_grouped\\\".\\\"avg_price\\\"\""#));
133+
}
134+
135+
/// Join between ungrouped and grouped query with two columns join condition
136+
/// should plan as a push-to-Cube query with subquery_joins
137+
#[tokio::test]
138+
async fn test_join_ungrouped_with_grouped_two_columns_condition() {
139+
if !Rewriter::sql_push_down_enabled() {
140+
return;
141+
}
142+
init_testing_logger();
143+
144+
let query_plan = convert_select_to_query_plan(
145+
// language=PostgreSQL
146+
r#"
147+
SELECT
148+
AVG(KibanaSampleDataEcommerce.avgPrice) AS price
149+
FROM
150+
KibanaSampleDataEcommerce
151+
INNER JOIN (
152+
SELECT
153+
customer_gender,
154+
notes,
155+
AVG(avgPrice) as avg_price
156+
FROM
157+
KibanaSampleDataEcommerce
158+
GROUP BY 1, 2
159+
) kibana_grouped
160+
ON (
161+
KibanaSampleDataEcommerce.customer_gender = kibana_grouped.customer_gender AND KibanaSampleDataEcommerce.notes = kibana_grouped.notes
162+
)
163+
;
164+
"#
165+
.to_string(),
166+
DatabaseProtocol::PostgreSQL,
167+
)
168+
.await;
169+
170+
let physical_plan = query_plan.as_physical_plan().await.unwrap();
171+
println!(
172+
"Physical plan: {}",
173+
displayable(physical_plan.as_ref()).indent()
174+
);
175+
176+
let request = query_plan
177+
.as_logical_plan()
178+
.find_cube_scan_wrapped_sql()
179+
.request;
180+
181+
assert_eq!(request.subquery_joins.as_ref().unwrap().len(), 1);
182+
183+
let subquery = &request.subquery_joins.unwrap()[0];
184+
185+
assert!(!subquery.sql.contains("ungrouped"));
186+
assert_eq!(subquery.join_type, "INNER");
187+
assert!(subquery.on.contains(
188+
r#"${KibanaSampleDataEcommerce.customer_gender} = \"kibana_grouped\".\"customer_gender\""#
189+
));
190+
assert!(subquery
191+
.on
192+
.contains(r#"${KibanaSampleDataEcommerce.notes} = \"kibana_grouped\".\"notes\""#));
193+
194+
// Measure from top aggregation
195+
assert!(query_plan
196+
.as_logical_plan()
197+
.find_cube_scan_wrapped_sql()
198+
.wrapped_sql
199+
.sql
200+
.contains(r#"\"expr\":\"${KibanaSampleDataEcommerce.avgPrice}\""#));
201+
}
202+
203+
/// Join between ungrouped and grouped query with filter + sort + limit
204+
/// should plan as a push-to-Cube query with subquery_joins
205+
#[tokio::test]
206+
async fn test_join_ungrouped_with_grouped_top1_and_filter() {
207+
if !Rewriter::sql_push_down_enabled() {
208+
return;
209+
}
210+
init_testing_logger();
211+
212+
let query_plan = convert_select_to_query_plan(
213+
// language=PostgreSQL
214+
r#"
215+
SELECT
216+
KibanaSampleDataEcommerce.customer_gender AS customer_gender,
217+
AVG(KibanaSampleDataEcommerce.avgPrice) AS price
218+
FROM
219+
KibanaSampleDataEcommerce
220+
INNER JOIN (
221+
SELECT
222+
customer_gender,
223+
AVG(avgPrice) as avg_price
224+
FROM
225+
KibanaSampleDataEcommerce
226+
WHERE
227+
notes = 'foo'
228+
GROUP BY 1
229+
ORDER BY 2 DESC NULLS LAST
230+
LIMIT 1
231+
) kibana_grouped
232+
ON (
233+
KibanaSampleDataEcommerce.customer_gender = kibana_grouped.customer_gender
234+
)
235+
GROUP BY 1
236+
;
237+
"#
238+
.to_string(),
239+
DatabaseProtocol::PostgreSQL,
240+
)
241+
.await;
242+
243+
let physical_plan = query_plan.as_physical_plan().await.unwrap();
244+
println!(
245+
"Physical plan: {}",
246+
displayable(physical_plan.as_ref()).indent()
247+
);
248+
249+
let request = query_plan
250+
.as_logical_plan()
251+
.find_cube_scan_wrapped_sql()
252+
.request;
253+
254+
assert_eq!(request.subquery_joins.as_ref().unwrap().len(), 1);
255+
256+
let subquery = &request.subquery_joins.unwrap()[0];
257+
258+
assert!(!subquery.sql.contains("ungrouped"));
259+
let re = Regex::new(
260+
r#""order":\s*\[\s*\[\s*"KibanaSampleDataEcommerce.avgPrice",\s*"desc"\s*\]\s*\]"#,
261+
)
262+
.unwrap();
263+
assert!(re.is_match(&subquery.sql));
264+
assert!(subquery.sql.contains(r#""limit": 1"#));
265+
assert_eq!(subquery.join_type, "INNER");
266+
assert!(subquery.on.contains(
267+
r#"${KibanaSampleDataEcommerce.customer_gender} = \"kibana_grouped\".\"customer_gender\""#
268+
));
269+
270+
// Measure from top aggregation
271+
assert!(query_plan
272+
.as_logical_plan()
273+
.find_cube_scan_wrapped_sql()
274+
.wrapped_sql
275+
.sql
276+
.contains(r#"\"expr\":\"${KibanaSampleDataEcommerce.avgPrice}\""#));
277+
}
278+
279+
// TODO add comment
280+
#[tokio::test]
281+
async fn test_superset_topk() {
282+
if !Rewriter::sql_push_down_enabled() {
283+
return;
284+
}
285+
init_testing_logger();
286+
287+
let query_plan = convert_select_to_query_plan(
288+
// language=PostgreSQL
289+
r#"
290+
SELECT DATE_TRUNC('week', order_date) AS __timestamp,
291+
MEASURE(KibanaSampleDataEcommerce.avgPrice) AS avgPrice
292+
FROM KibanaSampleDataEcommerce
293+
JOIN
294+
(SELECT customer_gender AS customer_gender__,
295+
MEASURE(KibanaSampleDataEcommerce.avgPrice) AS mme_inner__
296+
FROM KibanaSampleDataEcommerce
297+
WHERE order_date >= TO_TIMESTAMP('2022-09-16 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
298+
AND order_date < TO_TIMESTAMP('2024-09-16 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
299+
GROUP BY customer_gender
300+
ORDER BY mme_inner__ DESC
301+
LIMIT 20) AS anon_1 ON customer_gender = customer_gender__
302+
-- filters here are not supported without filter flattening in wrapper
303+
-- TODO enable it when ready
304+
-- WHERE order_date >= TO_TIMESTAMP('2022-09-16 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
305+
-- AND order_date < TO_TIMESTAMP('2024-09-16 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
306+
GROUP BY DATE_TRUNC('week', order_date)
307+
ORDER BY avgPrice DESC
308+
LIMIT 1000
309+
;
310+
"#
311+
.to_string(),
312+
DatabaseProtocol::PostgreSQL,
313+
)
314+
.await;
315+
316+
let physical_plan = query_plan.as_physical_plan().await.unwrap();
317+
println!(
318+
"Physical plan: {}",
319+
displayable(physical_plan.as_ref()).indent()
320+
);
321+
322+
let wrapped_sql_node = query_plan.as_logical_plan().find_cube_scan_wrapped_sql();
323+
324+
assert_eq!(
325+
wrapped_sql_node
326+
.request
327+
.subquery_joins
328+
.as_ref()
329+
.unwrap()
330+
.len(),
331+
1
332+
);
333+
334+
let subquery = &wrapped_sql_node.request.subquery_joins.unwrap()[0];
335+
336+
assert!(!subquery.sql.contains("ungrouped"));
337+
let re = Regex::new(
338+
r#""order":\s*\[\s*\[\s*"KibanaSampleDataEcommerce.avgPrice",\s*"desc"\s*\]\s*\]"#,
339+
)
340+
.unwrap();
341+
assert!(re.is_match(&subquery.sql));
342+
assert!(subquery.sql.contains(r#""limit": 20"#));
343+
assert_eq!(subquery.join_type, "INNER");
344+
assert!(subquery.on.contains(
345+
r#"${KibanaSampleDataEcommerce.customer_gender} = \"anon_1\".\"customer_gender_\""#
346+
));
347+
348+
// Measure from top aggregation
349+
assert!(wrapped_sql_node
350+
.wrapped_sql
351+
.sql
352+
.contains(r#"\"expr\":\"${KibanaSampleDataEcommerce.avgPrice}\""#));
353+
354+
// Outer sort
355+
assert!(wrapped_sql_node
356+
.wrapped_sql
357+
.sql
358+
.contains(r#"ORDER BY "KibanaSampleDataEcommerce"."measure_kibanasa" DESC NULLS FIRST"#));
359+
360+
// Outer limit
361+
assert!(wrapped_sql_node.wrapped_sql.sql.contains("LIMIT 1000"));
362+
}

0 commit comments

Comments
 (0)