Skip to content

Commit 8f66334

Browse files
committed
fix(cubesql): Calculate proper limit and offset for CubeScan in nested limits case
1 parent 119475e commit 8f66334

File tree

3 files changed

+287
-8
lines changed

3 files changed

+287
-8
lines changed

rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs

Lines changed: 59 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -237,8 +237,8 @@ impl RewriteRules for MemberRules {
237237
"?members",
238238
"?filters",
239239
"?orders",
240-
"?cube_fetch",
241-
"?offset",
240+
"?inner_fetch",
241+
"?inner_skip",
242242
"?split",
243243
"?can_pushdown_join",
244244
"CubeScanWrapped:false",
@@ -257,7 +257,14 @@ impl RewriteRules for MemberRules {
257257
"CubeScanWrapped:false",
258258
"?ungrouped",
259259
),
260-
self.push_down_limit("?skip", "?fetch", "?new_skip", "?new_fetch"),
260+
self.push_down_limit(
261+
"?skip",
262+
"?fetch",
263+
"?inner_skip",
264+
"?inner_fetch",
265+
"?new_skip",
266+
"?new_fetch",
267+
),
261268
),
262269
// MOD function to binary expr
263270
transforming_rewrite_with_root(
@@ -1559,14 +1566,21 @@ impl MemberRules {
15591566
&self,
15601567
skip_var: &'static str,
15611568
fetch_var: &'static str,
1569+
inner_skip_var: &'static str,
1570+
inner_fetch_var: &'static str,
15621571
new_skip_var: &'static str,
15631572
new_fetch_var: &'static str,
15641573
) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool {
15651574
let skip_var = var!(skip_var);
15661575
let fetch_var = var!(fetch_var);
1576+
let inner_skip_var = var!(inner_skip_var);
1577+
let inner_fetch_var = var!(inner_fetch_var);
15671578
let new_skip_var = var!(new_skip_var);
15681579
let new_fetch_var = var!(new_fetch_var);
15691580
move |egraph, subst| {
1581+
// This transform expects only single value in every (eclass, kind)
1582+
// No two different values of fetch or skip should ever get unified
1583+
15701584
let mut skip_value = None;
15711585
for skip in var_iter!(egraph[subst[skip_var]], LimitSkip) {
15721586
if skip.unwrap_or_default() > 0 {
@@ -1582,18 +1596,55 @@ impl MemberRules {
15821596
}
15831597
}
15841598

1585-
if skip_value.is_some() || fetch_value.is_some() {
1599+
let mut inner_skip_value = None;
1600+
for inner_skip in var_iter!(egraph[subst[inner_skip_var]], CubeScanOffset) {
1601+
inner_skip_value = *inner_skip;
1602+
break;
1603+
}
1604+
1605+
let mut inner_fetch_value = None;
1606+
for inner_fetch in var_iter!(egraph[subst[inner_fetch_var]], CubeScanLimit) {
1607+
inner_fetch_value = *inner_fetch;
1608+
break;
1609+
}
1610+
1611+
let new_skip = match (skip_value, inner_skip_value) {
1612+
(None, None) => None,
1613+
(Some(skip), None) | (None, Some(skip)) => Some(skip),
1614+
(Some(outer_skip), Some(inner_skip)) => Some(outer_skip + inner_skip),
1615+
};
1616+
let new_fetch = match (fetch_value, inner_fetch_value) {
1617+
(None, None) => None,
1618+
// Inner node have no limit, maybe just offset, result limit is same as for outer node
1619+
(Some(outer_fetch), None) => Some(outer_fetch),
1620+
// Outer node have no limit, but may have offset
1621+
// First, inner offset would apply
1622+
// Then inner node would limit rows
1623+
// Then outer offset would apply, which would yield no more than `inner_fetch - outer_skip` rows
1624+
(None, Some(inner_fetch)) => {
1625+
Some(inner_fetch.saturating_sub(skip_value.unwrap_or(0)))
1626+
}
1627+
// Both nodes have a limit
1628+
// First, inner offset would apply
1629+
// Then inner node would limit rows
1630+
// Then outer offset would apply, which would yield no more than `in_limit - out_offset` rows
1631+
// Then outer limit would apply, which would yield no more than minimal of two
1632+
(Some(outer_fetch), Some(inner_fetch)) => Some(usize::min(
1633+
inner_fetch.saturating_sub(skip_value.unwrap_or(0)),
1634+
outer_fetch,
1635+
)),
1636+
};
1637+
1638+
if new_skip.is_some() || new_fetch.is_some() {
15861639
subst.insert(
15871640
new_skip_var,
15881641
egraph.add(LogicalPlanLanguage::CubeScanOffset(CubeScanOffset(
1589-
skip_value,
1642+
new_skip,
15901643
))),
15911644
);
15921645
subst.insert(
15931646
new_fetch_var,
1594-
egraph.add(LogicalPlanLanguage::CubeScanLimit(CubeScanLimit(
1595-
fetch_value,
1596-
))),
1647+
egraph.add(LogicalPlanLanguage::CubeScanLimit(CubeScanLimit(new_fetch))),
15971648
);
15981649

15991650
return true;

rust/cubesql/cubesql/src/compile/test/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ pub mod test_bi_workarounds;
3030
#[cfg(test)]
3131
pub mod test_cube_join;
3232
#[cfg(test)]
33+
pub mod test_cube_scan;
34+
#[cfg(test)]
3335
pub mod test_df_execution;
3436
#[cfg(test)]
3537
pub mod test_introspection;
Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
use cubeclient::models::V1LoadRequestQuery;
2+
use pretty_assertions::assert_eq;
3+
4+
use crate::compile::{
5+
test::{convert_select_to_query_plan, init_testing_logger, utils::LogicalPlanTestUtils},
6+
DatabaseProtocol,
7+
};
8+
9+
/// LIMIT n OFFSET m should be pushed to CubeScan
10+
#[tokio::test]
11+
async fn cubescan_limit_offset() {
12+
init_testing_logger();
13+
14+
let query_plan = convert_select_to_query_plan(
15+
// language=PostgreSQL
16+
r#"
17+
SELECT
18+
customer_gender
19+
FROM
20+
KibanaSampleDataEcommerce
21+
GROUP BY
22+
1
23+
LIMIT 2
24+
OFFSET 3
25+
"#
26+
.to_string(),
27+
DatabaseProtocol::PostgreSQL,
28+
)
29+
.await;
30+
31+
let logical_plan = query_plan.as_logical_plan();
32+
assert_eq!(
33+
logical_plan.find_cube_scan().request,
34+
V1LoadRequestQuery {
35+
measures: Some(vec![]),
36+
dimensions: Some(vec!["KibanaSampleDataEcommerce.customer_gender".to_string()]),
37+
segments: Some(vec![]),
38+
order: Some(vec![]),
39+
limit: Some(2),
40+
offset: Some(3),
41+
..Default::default()
42+
}
43+
);
44+
}
45+
46+
/// LIMIT over LIMIT should be pushed to single CubeScan
47+
#[tokio::test]
48+
async fn cubescan_limit_limit() {
49+
init_testing_logger();
50+
51+
let variants = vec![
52+
// language=PostgreSQL
53+
r#"
54+
SELECT
55+
customer_gender
56+
FROM (
57+
SELECT
58+
customer_gender
59+
FROM
60+
KibanaSampleDataEcommerce
61+
GROUP BY
62+
1
63+
LIMIT 3
64+
) scan
65+
LIMIT 2
66+
"#,
67+
// language=PostgreSQL
68+
r#"
69+
SELECT
70+
customer_gender
71+
FROM (
72+
SELECT
73+
customer_gender
74+
FROM
75+
KibanaSampleDataEcommerce
76+
GROUP BY
77+
1
78+
LIMIT 2
79+
) scan
80+
LIMIT 3
81+
"#,
82+
];
83+
84+
for variant in variants {
85+
let query_plan =
86+
convert_select_to_query_plan(variant.to_string(), DatabaseProtocol::PostgreSQL).await;
87+
88+
let logical_plan = query_plan.as_logical_plan();
89+
assert_eq!(
90+
logical_plan.find_cube_scan().request,
91+
V1LoadRequestQuery {
92+
measures: Some(vec![]),
93+
dimensions: Some(vec!["KibanaSampleDataEcommerce.customer_gender".to_string()]),
94+
segments: Some(vec![]),
95+
order: Some(vec![]),
96+
limit: Some(2),
97+
..Default::default()
98+
}
99+
);
100+
}
101+
}
102+
103+
/// OFFSET over OFFSET should be pushed to single CubeScan
104+
#[tokio::test]
105+
async fn cubescan_offset_offset() {
106+
init_testing_logger();
107+
108+
let variants = vec![
109+
// language=PostgreSQL
110+
r#"
111+
SELECT
112+
customer_gender
113+
FROM (
114+
SELECT
115+
customer_gender
116+
FROM
117+
KibanaSampleDataEcommerce
118+
GROUP BY
119+
1
120+
OFFSET 3
121+
) scan
122+
OFFSET 2
123+
"#,
124+
// language=PostgreSQL
125+
r#"
126+
SELECT
127+
customer_gender
128+
FROM (
129+
SELECT
130+
customer_gender
131+
FROM
132+
KibanaSampleDataEcommerce
133+
GROUP BY
134+
1
135+
OFFSET 2
136+
) scan
137+
OFFSET 3
138+
"#,
139+
];
140+
141+
for variant in variants {
142+
let query_plan =
143+
convert_select_to_query_plan(variant.to_string(), DatabaseProtocol::PostgreSQL).await;
144+
145+
let logical_plan = query_plan.as_logical_plan();
146+
assert_eq!(
147+
logical_plan.find_cube_scan().request,
148+
V1LoadRequestQuery {
149+
measures: Some(vec![]),
150+
dimensions: Some(vec!["KibanaSampleDataEcommerce.customer_gender".to_string()]),
151+
segments: Some(vec![]),
152+
order: Some(vec![]),
153+
offset: Some(5),
154+
..Default::default()
155+
}
156+
);
157+
}
158+
}
159+
160+
/// LIMIT OFFSET over LIMIT OFFSET should be pushed to single CubeScan with a proper values
161+
#[tokio::test]
162+
async fn cubescan_limit_offset_limit_offset() {
163+
init_testing_logger();
164+
165+
let variants = vec![
166+
(
167+
// language=PostgreSQL
168+
r#"
169+
SELECT
170+
customer_gender
171+
FROM (
172+
SELECT
173+
customer_gender
174+
FROM
175+
KibanaSampleDataEcommerce
176+
GROUP BY
177+
1
178+
LIMIT 3
179+
OFFSET 3
180+
) scan
181+
LIMIT 2
182+
OFFSET 2
183+
"#,
184+
1,
185+
),
186+
(
187+
// language=PostgreSQL
188+
r#"
189+
SELECT
190+
customer_gender
191+
FROM (
192+
SELECT
193+
customer_gender
194+
FROM
195+
KibanaSampleDataEcommerce
196+
GROUP BY
197+
1
198+
LIMIT 10
199+
OFFSET 3
200+
) scan
201+
LIMIT 2
202+
OFFSET 2
203+
"#,
204+
2,
205+
),
206+
];
207+
208+
for (variant, limit) in variants {
209+
let query_plan =
210+
convert_select_to_query_plan(variant.to_string(), DatabaseProtocol::PostgreSQL).await;
211+
212+
let logical_plan = query_plan.as_logical_plan();
213+
assert_eq!(
214+
logical_plan.find_cube_scan().request,
215+
V1LoadRequestQuery {
216+
measures: Some(vec![]),
217+
dimensions: Some(vec!["KibanaSampleDataEcommerce.customer_gender".to_string()]),
218+
segments: Some(vec![]),
219+
order: Some(vec![]),
220+
limit: Some(limit),
221+
offset: Some(5),
222+
..Default::default()
223+
}
224+
);
225+
}
226+
}

0 commit comments

Comments
 (0)