Skip to content

Commit d8cc2cc

Browse files
committed
test(cubesql): Add grouped join tests
1 parent 35a7d36 commit d8cc2cc

File tree

2 files changed

+363
-0
lines changed

2 files changed

+363
-0
lines changed

rust/cubesql/cubesql/src/compile/test/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ pub mod test_bi_workarounds;
3030
#[cfg(test)]
3131
pub mod test_cube_join;
3232
#[cfg(test)]
33+
pub mod test_cube_join_grouped;
34+
#[cfg(test)]
3335
pub mod test_df_execution;
3436
#[cfg(test)]
3537
pub mod test_introspection;
Lines changed: 361 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,361 @@
1+
use datafusion::physical_plan::displayable;
2+
use pretty_assertions::assert_eq;
3+
use regex::Regex;
4+
5+
use crate::compile::{
6+
test::{convert_select_to_query_plan, init_testing_logger, utils::LogicalPlanTestUtils},
7+
DatabaseProtocol, Rewriter,
8+
};
9+
10+
// TODO Tests more joins with grouped queries
11+
// Join structure:
12+
// * ungrouped inner join grouped CubeScan
13+
// * ungrouped inner join grouped CubeScan with filters with values
14+
// * ungrouped inner join grouped WrappedSelect
15+
// * ungrouped inner join grouped WrappedSelect with filters with values
16+
// * ungrouped left join grouped
17+
// * grouped left join ungrouped
18+
// * ungrouped join EmptyRelation
19+
// Join condition columns:
20+
// * one dim
21+
// * two dim
22+
// * one measure
23+
// * __cubeJoinField
24+
// * one member expression dim (like ON LOWER(dim) = LOWER(column))
25+
// Join condition predicate:
26+
// * =
27+
// * IS NOT DISTINCT FROM
28+
// * COALESCE + IS NULL
29+
// Grouped query:
30+
// * Grouping
31+
// * Aggregation
32+
// * Filter
33+
// * Sort
34+
// * Limit
35+
// * Wrapper
36+
// On top of of join
37+
// * Grouping
38+
// * Aggregation
39+
// * Filter
40+
// * Limit
41+
// Test long and otherwise bad aliases for columns:
42+
// * in both parts
43+
// * in join condition
44+
// * in expressions on top
45+
// Test long and otherwise bad aliases for tables:
46+
// * for grouped join part
47+
// * for ungrouped join part
48+
// * inside grouped join part
49+
// * inside ungrouped join part
50+
// * for result
51+
52+
/// Simple join between ungrouped and grouped query should plan as a push-to-Cube query
53+
/// with subquery_joins and with concrete member expressions in SQL
54+
#[tokio::test]
55+
async fn test_join_ungrouped_with_grouped() {
56+
if !Rewriter::sql_push_down_enabled() {
57+
return;
58+
}
59+
init_testing_logger();
60+
61+
let query_plan = convert_select_to_query_plan(
62+
// language=PostgreSQL
63+
r#"
64+
SELECT
65+
kibana_grouped.avg_price,
66+
KibanaSampleDataEcommerce.customer_gender AS gender,
67+
AVG(KibanaSampleDataEcommerce.avgPrice) AS price
68+
FROM
69+
KibanaSampleDataEcommerce
70+
INNER JOIN (
71+
SELECT
72+
customer_gender,
73+
AVG(avgPrice) as avg_price
74+
FROM
75+
KibanaSampleDataEcommerce
76+
GROUP BY 1
77+
) kibana_grouped
78+
ON (
79+
(KibanaSampleDataEcommerce.customer_gender = kibana_grouped.customer_gender)
80+
)
81+
GROUP BY
82+
1,
83+
2
84+
;
85+
"#
86+
.to_string(),
87+
DatabaseProtocol::PostgreSQL,
88+
)
89+
.await;
90+
91+
let physical_plan = query_plan.as_physical_plan().await.unwrap();
92+
println!(
93+
"Physical plan: {}",
94+
displayable(physical_plan.as_ref()).indent()
95+
);
96+
97+
let request = query_plan
98+
.as_logical_plan()
99+
.find_cube_scan_wrapped_sql()
100+
.request;
101+
102+
assert_eq!(request.subquery_joins.as_ref().unwrap().len(), 1);
103+
104+
let subquery = &request.subquery_joins.unwrap()[0];
105+
106+
assert!(!subquery.sql.contains("ungrouped"));
107+
assert_eq!(subquery.join_type, "INNER");
108+
assert!(subquery.on.contains(
109+
r#"${KibanaSampleDataEcommerce.customer_gender} = \"kibana_grouped\".\"customer_gender\""#
110+
));
111+
112+
// Measure from top aggregation
113+
assert!(query_plan
114+
.as_logical_plan()
115+
.find_cube_scan_wrapped_sql()
116+
.wrapped_sql
117+
.sql
118+
.contains(r#"\"expr\":\"${KibanaSampleDataEcommerce.avgPrice}\""#));
119+
// Dimension from ungrouped side
120+
assert!(query_plan
121+
.as_logical_plan()
122+
.find_cube_scan_wrapped_sql()
123+
.wrapped_sql
124+
.sql
125+
.contains(r#"\"expr\":\"${KibanaSampleDataEcommerce.customer_gender}\""#));
126+
// Dimension from grouped side
127+
assert!(query_plan
128+
.as_logical_plan()
129+
.find_cube_scan_wrapped_sql()
130+
.wrapped_sql
131+
.sql
132+
.contains(r#"\"expr\":\"\\\"kibana_grouped\\\".\\\"avg_price\\\"\""#));
133+
}
134+
135+
/// Join between ungrouped and grouped query with two columns join condition
136+
/// should plan as a push-to-Cube query with subquery_joins
137+
#[tokio::test]
138+
async fn test_join_ungrouped_with_grouped_two_columns_condition() {
139+
if !Rewriter::sql_push_down_enabled() {
140+
return;
141+
}
142+
init_testing_logger();
143+
144+
let query_plan = convert_select_to_query_plan(
145+
// language=PostgreSQL
146+
r#"
147+
SELECT
148+
AVG(KibanaSampleDataEcommerce.avgPrice) AS price
149+
FROM
150+
KibanaSampleDataEcommerce
151+
INNER JOIN (
152+
SELECT
153+
customer_gender,
154+
notes,
155+
AVG(avgPrice) as avg_price
156+
FROM
157+
KibanaSampleDataEcommerce
158+
GROUP BY 1, 2
159+
) kibana_grouped
160+
ON (
161+
KibanaSampleDataEcommerce.customer_gender = kibana_grouped.customer_gender AND KibanaSampleDataEcommerce.notes = kibana_grouped.notes
162+
)
163+
;
164+
"#
165+
.to_string(),
166+
DatabaseProtocol::PostgreSQL,
167+
)
168+
.await;
169+
170+
let physical_plan = query_plan.as_physical_plan().await.unwrap();
171+
println!(
172+
"Physical plan: {}",
173+
displayable(physical_plan.as_ref()).indent()
174+
);
175+
176+
let request = query_plan
177+
.as_logical_plan()
178+
.find_cube_scan_wrapped_sql()
179+
.request;
180+
181+
assert_eq!(request.subquery_joins.as_ref().unwrap().len(), 1);
182+
183+
let subquery = &request.subquery_joins.unwrap()[0];
184+
185+
assert!(!subquery.sql.contains("ungrouped"));
186+
assert_eq!(subquery.join_type, "INNER");
187+
assert!(subquery.on.contains(
188+
r#"${KibanaSampleDataEcommerce.customer_gender} = \"kibana_grouped\".\"customer_gender\""#
189+
));
190+
assert!(subquery
191+
.on
192+
.contains(r#"${KibanaSampleDataEcommerce.notes} = \"kibana_grouped\".\"notes\""#));
193+
194+
// Measure from top aggregation
195+
assert!(query_plan
196+
.as_logical_plan()
197+
.find_cube_scan_wrapped_sql()
198+
.wrapped_sql
199+
.sql
200+
.contains(r#"\"expr\":\"${KibanaSampleDataEcommerce.avgPrice}\""#));
201+
}
202+
203+
/// Join between ungrouped and grouped query with filter + sort + limit
204+
/// should plan as a push-to-Cube query with subquery_joins
205+
#[tokio::test]
206+
async fn test_join_ungrouped_with_grouped_top1_and_filter() {
207+
if !Rewriter::sql_push_down_enabled() {
208+
return;
209+
}
210+
init_testing_logger();
211+
212+
let query_plan = convert_select_to_query_plan(
213+
// language=PostgreSQL
214+
r#"
215+
SELECT
216+
KibanaSampleDataEcommerce.customer_gender AS customer_gender,
217+
AVG(KibanaSampleDataEcommerce.avgPrice) AS price
218+
FROM
219+
KibanaSampleDataEcommerce
220+
INNER JOIN (
221+
SELECT
222+
customer_gender,
223+
AVG(avgPrice) as avg_price
224+
FROM
225+
KibanaSampleDataEcommerce
226+
WHERE
227+
notes = 'foo'
228+
GROUP BY 1
229+
ORDER BY 2 DESC NULLS LAST
230+
LIMIT 1
231+
) kibana_grouped
232+
ON (
233+
KibanaSampleDataEcommerce.customer_gender = kibana_grouped.customer_gender
234+
)
235+
GROUP BY 1
236+
;
237+
"#
238+
.to_string(),
239+
DatabaseProtocol::PostgreSQL,
240+
)
241+
.await;
242+
243+
let physical_plan = query_plan.as_physical_plan().await.unwrap();
244+
println!(
245+
"Physical plan: {}",
246+
displayable(physical_plan.as_ref()).indent()
247+
);
248+
249+
let request = query_plan
250+
.as_logical_plan()
251+
.find_cube_scan_wrapped_sql()
252+
.request;
253+
254+
assert_eq!(request.subquery_joins.as_ref().unwrap().len(), 1);
255+
256+
let subquery = &request.subquery_joins.unwrap()[0];
257+
258+
assert!(!subquery.sql.contains("ungrouped"));
259+
let re = Regex::new(
260+
r#""order":\s*\[\s*\[\s*"KibanaSampleDataEcommerce.avgPrice",\s*"desc"\s*\]\s*\]"#,
261+
)
262+
.unwrap();
263+
assert!(re.is_match(&subquery.sql));
264+
assert!(subquery.sql.contains(r#""limit": 1"#));
265+
assert_eq!(subquery.join_type, "INNER");
266+
assert!(subquery.on.contains(
267+
r#"${KibanaSampleDataEcommerce.customer_gender} = \"kibana_grouped\".\"customer_gender\""#
268+
));
269+
270+
// Measure from top aggregation
271+
assert!(query_plan
272+
.as_logical_plan()
273+
.find_cube_scan_wrapped_sql()
274+
.wrapped_sql
275+
.sql
276+
.contains(r#"\"expr\":\"${KibanaSampleDataEcommerce.avgPrice}\""#));
277+
}
278+
279+
#[tokio::test]
280+
async fn test_superset_topk() {
281+
if !Rewriter::sql_push_down_enabled() {
282+
return;
283+
}
284+
init_testing_logger();
285+
286+
let query_plan = convert_select_to_query_plan(
287+
// language=PostgreSQL
288+
r#"
289+
SELECT DATE_TRUNC('week', order_date) AS __timestamp,
290+
MEASURE(KibanaSampleDataEcommerce.avgPrice) AS avgPrice
291+
FROM KibanaSampleDataEcommerce
292+
JOIN
293+
(SELECT customer_gender AS customer_gender__,
294+
MEASURE(KibanaSampleDataEcommerce.avgPrice) AS mme_inner__
295+
FROM KibanaSampleDataEcommerce
296+
WHERE order_date >= TO_TIMESTAMP('2022-09-16 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
297+
AND order_date < TO_TIMESTAMP('2024-09-16 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
298+
GROUP BY customer_gender
299+
ORDER BY mme_inner__ DESC
300+
LIMIT 20) AS anon_1 ON customer_gender = customer_gender__
301+
-- filters here are not supported without filter flattening in wrapper
302+
-- TODO enable it when ready
303+
-- WHERE order_date >= TO_TIMESTAMP('2022-09-16 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
304+
-- AND order_date < TO_TIMESTAMP('2024-09-16 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
305+
GROUP BY DATE_TRUNC('week', order_date)
306+
ORDER BY avgPrice DESC
307+
LIMIT 1000
308+
;
309+
"#
310+
.to_string(),
311+
DatabaseProtocol::PostgreSQL,
312+
)
313+
.await;
314+
315+
let physical_plan = query_plan.as_physical_plan().await.unwrap();
316+
println!(
317+
"Physical plan: {}",
318+
displayable(physical_plan.as_ref()).indent()
319+
);
320+
321+
let wrapped_sql_node = query_plan.as_logical_plan().find_cube_scan_wrapped_sql();
322+
323+
assert_eq!(
324+
wrapped_sql_node
325+
.request
326+
.subquery_joins
327+
.as_ref()
328+
.unwrap()
329+
.len(),
330+
1
331+
);
332+
333+
let subquery = &wrapped_sql_node.request.subquery_joins.unwrap()[0];
334+
335+
assert!(!subquery.sql.contains("ungrouped"));
336+
let re = Regex::new(
337+
r#""order":\s*\[\s*\[\s*"KibanaSampleDataEcommerce.avgPrice",\s*"desc"\s*\]\s*\]"#,
338+
)
339+
.unwrap();
340+
assert!(re.is_match(&subquery.sql));
341+
assert!(subquery.sql.contains(r#""limit": 20"#));
342+
assert_eq!(subquery.join_type, "INNER");
343+
assert!(subquery.on.contains(
344+
r#"${KibanaSampleDataEcommerce.customer_gender} = \"anon_1\".\"customer_gender_\""#
345+
));
346+
347+
// Measure from top aggregation
348+
assert!(wrapped_sql_node
349+
.wrapped_sql
350+
.sql
351+
.contains(r#"\"expr\":\"${KibanaSampleDataEcommerce.avgPrice}\""#));
352+
353+
// Outer sort
354+
assert!(wrapped_sql_node
355+
.wrapped_sql
356+
.sql
357+
.contains(r#"ORDER BY "KibanaSampleDataEcommerce"."measure_kibanasa" DESC NULLS FIRST"#));
358+
359+
// Outer limit
360+
assert!(wrapped_sql_node.wrapped_sql.sql.contains("LIMIT 1000"));
361+
}

0 commit comments

Comments
 (0)