Skip to content

Commit f67418d

Browse files
committed
fix(cubesql): Calculate proper limit and offset for CubeScan in nested limits case
1 parent 74cce28 commit f67418d

File tree

3 files changed

+287
-8
lines changed

3 files changed

+287
-8
lines changed

rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs

Lines changed: 59 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -240,8 +240,8 @@ impl RewriteRules for MemberRules {
240240
"?members",
241241
"?filters",
242242
"?orders",
243-
"?cube_fetch",
244-
"?offset",
243+
"?inner_fetch",
244+
"?inner_skip",
245245
"?split",
246246
"?can_pushdown_join",
247247
"CubeScanWrapped:false",
@@ -260,7 +260,14 @@ impl RewriteRules for MemberRules {
260260
"CubeScanWrapped:false",
261261
"?ungrouped",
262262
),
263-
self.push_down_limit("?skip", "?fetch", "?new_skip", "?new_fetch"),
263+
self.push_down_limit(
264+
"?skip",
265+
"?fetch",
266+
"?inner_skip",
267+
"?inner_fetch",
268+
"?new_skip",
269+
"?new_fetch",
270+
),
264271
),
265272
// MOD function to binary expr
266273
transforming_rewrite_with_root(
@@ -1597,14 +1604,21 @@ impl MemberRules {
15971604
&self,
15981605
skip_var: &'static str,
15991606
fetch_var: &'static str,
1607+
inner_skip_var: &'static str,
1608+
inner_fetch_var: &'static str,
16001609
new_skip_var: &'static str,
16011610
new_fetch_var: &'static str,
16021611
) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool {
16031612
let skip_var = var!(skip_var);
16041613
let fetch_var = var!(fetch_var);
1614+
let inner_skip_var = var!(inner_skip_var);
1615+
let inner_fetch_var = var!(inner_fetch_var);
16051616
let new_skip_var = var!(new_skip_var);
16061617
let new_fetch_var = var!(new_fetch_var);
16071618
move |egraph, subst| {
1619+
// This transform expects only single value in every (eclass, kind)
1620+
// No two different values of fetch or skip should ever get unified
1621+
16081622
let mut skip_value = None;
16091623
for skip in var_iter!(egraph[subst[skip_var]], LimitSkip) {
16101624
if skip.unwrap_or_default() > 0 {
@@ -1620,18 +1634,55 @@ impl MemberRules {
16201634
}
16211635
}
16221636

1623-
if skip_value.is_some() || fetch_value.is_some() {
1637+
let mut inner_skip_value = None;
1638+
for inner_skip in var_iter!(egraph[subst[inner_skip_var]], CubeScanOffset) {
1639+
inner_skip_value = *inner_skip;
1640+
break;
1641+
}
1642+
1643+
let mut inner_fetch_value = None;
1644+
for inner_fetch in var_iter!(egraph[subst[inner_fetch_var]], CubeScanLimit) {
1645+
inner_fetch_value = *inner_fetch;
1646+
break;
1647+
}
1648+
1649+
let new_skip = match (skip_value, inner_skip_value) {
1650+
(None, None) => None,
1651+
(Some(skip), None) | (None, Some(skip)) => Some(skip),
1652+
(Some(outer_skip), Some(inner_skip)) => Some(outer_skip + inner_skip),
1653+
};
1654+
let new_fetch = match (fetch_value, inner_fetch_value) {
1655+
(None, None) => None,
1656+
// Inner node have no limit, maybe just offset, result limit is same as for outer node
1657+
(Some(outer_fetch), None) => Some(outer_fetch),
1658+
// Outer node have no limit, but may have offset
1659+
// First, inner offset would apply
1660+
// Then inner node would limit rows
1661+
// Then outer offset would apply, which would yield no more than `inner_fetch - outer_skip` rows
1662+
(None, Some(inner_fetch)) => {
1663+
Some(inner_fetch.saturating_sub(skip_value.unwrap_or(0)))
1664+
}
1665+
// Both nodes have a limit
1666+
// First, inner offset would apply
1667+
// Then inner node would limit rows
1668+
// Then outer offset would apply, which would yield no more than `in_limit - out_offset` rows
1669+
// Then outer limit would apply, which would yield no more than minimal of two
1670+
(Some(outer_fetch), Some(inner_fetch)) => Some(usize::min(
1671+
inner_fetch.saturating_sub(skip_value.unwrap_or(0)),
1672+
outer_fetch,
1673+
)),
1674+
};
1675+
1676+
if new_skip.is_some() || new_fetch.is_some() {
16241677
subst.insert(
16251678
new_skip_var,
16261679
egraph.add(LogicalPlanLanguage::CubeScanOffset(CubeScanOffset(
1627-
skip_value,
1680+
new_skip,
16281681
))),
16291682
);
16301683
subst.insert(
16311684
new_fetch_var,
1632-
egraph.add(LogicalPlanLanguage::CubeScanLimit(CubeScanLimit(
1633-
fetch_value,
1634-
))),
1685+
egraph.add(LogicalPlanLanguage::CubeScanLimit(CubeScanLimit(new_fetch))),
16351686
);
16361687

16371688
return true;

rust/cubesql/cubesql/src/compile/test/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ pub mod test_bi_workarounds;
3030
#[cfg(test)]
3131
pub mod test_cube_join;
3232
#[cfg(test)]
33+
pub mod test_cube_scan;
34+
#[cfg(test)]
3335
pub mod test_df_execution;
3436
#[cfg(test)]
3537
pub mod test_introspection;
Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
use cubeclient::models::V1LoadRequestQuery;
2+
use pretty_assertions::assert_eq;
3+
4+
use crate::compile::{
5+
test::{convert_select_to_query_plan, init_testing_logger, utils::LogicalPlanTestUtils},
6+
DatabaseProtocol,
7+
};
8+
9+
/// LIMIT n OFFSET m should be pushed to CubeScan
10+
#[tokio::test]
11+
async fn cubescan_limit_offset() {
12+
init_testing_logger();
13+
14+
let query_plan = convert_select_to_query_plan(
15+
// language=PostgreSQL
16+
r#"
17+
SELECT
18+
customer_gender
19+
FROM
20+
KibanaSampleDataEcommerce
21+
GROUP BY
22+
1
23+
LIMIT 2
24+
OFFSET 3
25+
"#
26+
.to_string(),
27+
DatabaseProtocol::PostgreSQL,
28+
)
29+
.await;
30+
31+
let logical_plan = query_plan.as_logical_plan();
32+
assert_eq!(
33+
logical_plan.find_cube_scan().request,
34+
V1LoadRequestQuery {
35+
measures: Some(vec![]),
36+
dimensions: Some(vec!["KibanaSampleDataEcommerce.customer_gender".to_string()]),
37+
segments: Some(vec![]),
38+
order: Some(vec![]),
39+
limit: Some(2),
40+
offset: Some(3),
41+
..Default::default()
42+
}
43+
);
44+
}
45+
46+
/// LIMIT over LIMIT should be pushed to single CubeScan
47+
#[tokio::test]
48+
async fn cubescan_limit_limit() {
49+
init_testing_logger();
50+
51+
let variants = vec![
52+
// language=PostgreSQL
53+
r#"
54+
SELECT
55+
customer_gender
56+
FROM (
57+
SELECT
58+
customer_gender
59+
FROM
60+
KibanaSampleDataEcommerce
61+
GROUP BY
62+
1
63+
LIMIT 3
64+
) scan
65+
LIMIT 2
66+
"#,
67+
// language=PostgreSQL
68+
r#"
69+
SELECT
70+
customer_gender
71+
FROM (
72+
SELECT
73+
customer_gender
74+
FROM
75+
KibanaSampleDataEcommerce
76+
GROUP BY
77+
1
78+
LIMIT 2
79+
) scan
80+
LIMIT 3
81+
"#,
82+
];
83+
84+
for variant in variants {
85+
let query_plan =
86+
convert_select_to_query_plan(variant.to_string(), DatabaseProtocol::PostgreSQL).await;
87+
88+
let logical_plan = query_plan.as_logical_plan();
89+
assert_eq!(
90+
logical_plan.find_cube_scan().request,
91+
V1LoadRequestQuery {
92+
measures: Some(vec![]),
93+
dimensions: Some(vec!["KibanaSampleDataEcommerce.customer_gender".to_string()]),
94+
segments: Some(vec![]),
95+
order: Some(vec![]),
96+
limit: Some(2),
97+
..Default::default()
98+
}
99+
);
100+
}
101+
}
102+
103+
/// OFFSET over OFFSET should be pushed to single CubeScan
104+
#[tokio::test]
105+
async fn cubescan_offset_offset() {
106+
init_testing_logger();
107+
108+
let variants = vec![
109+
// language=PostgreSQL
110+
r#"
111+
SELECT
112+
customer_gender
113+
FROM (
114+
SELECT
115+
customer_gender
116+
FROM
117+
KibanaSampleDataEcommerce
118+
GROUP BY
119+
1
120+
OFFSET 3
121+
) scan
122+
OFFSET 2
123+
"#,
124+
// language=PostgreSQL
125+
r#"
126+
SELECT
127+
customer_gender
128+
FROM (
129+
SELECT
130+
customer_gender
131+
FROM
132+
KibanaSampleDataEcommerce
133+
GROUP BY
134+
1
135+
OFFSET 2
136+
) scan
137+
OFFSET 3
138+
"#,
139+
];
140+
141+
for variant in variants {
142+
let query_plan =
143+
convert_select_to_query_plan(variant.to_string(), DatabaseProtocol::PostgreSQL).await;
144+
145+
let logical_plan = query_plan.as_logical_plan();
146+
assert_eq!(
147+
logical_plan.find_cube_scan().request,
148+
V1LoadRequestQuery {
149+
measures: Some(vec![]),
150+
dimensions: Some(vec!["KibanaSampleDataEcommerce.customer_gender".to_string()]),
151+
segments: Some(vec![]),
152+
order: Some(vec![]),
153+
offset: Some(5),
154+
..Default::default()
155+
}
156+
);
157+
}
158+
}
159+
160+
/// LIMIT OFFSET over LIMIT OFFSET should be pushed to single CubeScan with a proper values
161+
#[tokio::test]
162+
async fn cubescan_limit_offset_limit_offset() {
163+
init_testing_logger();
164+
165+
let variants = vec![
166+
(
167+
// language=PostgreSQL
168+
r#"
169+
SELECT
170+
customer_gender
171+
FROM (
172+
SELECT
173+
customer_gender
174+
FROM
175+
KibanaSampleDataEcommerce
176+
GROUP BY
177+
1
178+
LIMIT 3
179+
OFFSET 3
180+
) scan
181+
LIMIT 2
182+
OFFSET 2
183+
"#,
184+
1,
185+
),
186+
(
187+
// language=PostgreSQL
188+
r#"
189+
SELECT
190+
customer_gender
191+
FROM (
192+
SELECT
193+
customer_gender
194+
FROM
195+
KibanaSampleDataEcommerce
196+
GROUP BY
197+
1
198+
LIMIT 10
199+
OFFSET 3
200+
) scan
201+
LIMIT 2
202+
OFFSET 2
203+
"#,
204+
2,
205+
),
206+
];
207+
208+
for (variant, limit) in variants {
209+
let query_plan =
210+
convert_select_to_query_plan(variant.to_string(), DatabaseProtocol::PostgreSQL).await;
211+
212+
let logical_plan = query_plan.as_logical_plan();
213+
assert_eq!(
214+
logical_plan.find_cube_scan().request,
215+
V1LoadRequestQuery {
216+
measures: Some(vec![]),
217+
dimensions: Some(vec!["KibanaSampleDataEcommerce.customer_gender".to_string()]),
218+
segments: Some(vec![]),
219+
order: Some(vec![]),
220+
limit: Some(limit),
221+
offset: Some(5),
222+
..Default::default()
223+
}
224+
);
225+
}
226+
}

0 commit comments

Comments
 (0)