Skip to content

Commit bcdbe94

Browse files
waralexromigorlukanin
authored andcommitted
feat(tesseract): Bucketing (#10095)
1 parent 8993ca2 commit bcdbe94

File tree

20 files changed

+458
-117
lines changed

20 files changed

+458
-117
lines changed

packages/cubejs-schema-compiler/src/compiler/CubeValidator.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -715,7 +715,6 @@ const DimensionsSchema = Joi.object().pattern(identifierRegex, Joi.alternatives(
715715
}),
716716
inherit(BaseDimension, {
717717
multiStage: Joi.boolean().valid(true),
718-
type: Joi.any().valid('number').required(),
719718
sql: Joi.func().required(),
720719
addGroupBy: Joi.func(),
721720
}),
Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
import {
2+
getEnv,
3+
} from '@cubejs-backend/shared';
4+
import { prepareYamlCompiler } from '../../unit/PrepareCompiler';
5+
import { dbRunner } from './PostgresDBRunner';
6+
7+
describe('Multi-Stage Bucketing', () => {
8+
jest.setTimeout(200000);
9+
10+
const { compiler, joinGraph, cubeEvaluator } = prepareYamlCompiler(`
11+
cubes:
12+
- name: orders
13+
sql: >
14+
SELECT 1 AS id, '2023-03-01T00:00:00Z'::timestamptz AS createdAt, 1 AS customerId, 1000 AS revenue UNION ALL
15+
SELECT 2 AS id, '2023-09-01T00:00:00Z'::timestamptz AS createdAt, 1 AS customerId, 1100 AS revenue UNION ALL
16+
SELECT 3 AS id, '2024-03-01T00:00:00Z'::timestamptz AS createdAt, 1 AS customerId, 1300 AS revenue UNION ALL
17+
SELECT 4 AS id, '2024-09-01T00:00:00Z'::timestamptz AS createdAt, 1 AS customerId, 1400 AS revenue UNION ALL
18+
SELECT 5 AS id, '2025-03-01T00:00:00Z'::timestamptz AS createdAt, 1 AS customerId, 1600 AS revenue UNION ALL
19+
SELECT 6 AS id, '2025-09-01T00:00:00Z'::timestamptz AS createdAt, 1 AS customerId, 1700 AS revenue UNION ALL
20+
21+
SELECT 7 AS id, '2023-03-01T00:00:00Z'::timestamptz AS createdAt, 2 AS customerId, 2000 AS revenue UNION ALL
22+
SELECT 8 AS id, '2023-09-01T00:00:00Z'::timestamptz AS createdAt, 2 AS customerId, 2100 AS revenue UNION ALL
23+
SELECT 9 AS id, '2024-03-01T00:00:00Z'::timestamptz AS createdAt, 2 AS customerId, 2300 AS revenue UNION ALL
24+
SELECT 10 AS id, '2024-09-01T00:00:00Z'::timestamptz AS createdAt, 2 AS customerId, 2500 AS revenue UNION ALL
25+
SELECT 11 AS id, '2025-03-01T00:00:00Z'::timestamptz AS createdAt, 2 AS customerId, 2700 AS revenue UNION ALL
26+
SELECT 12 AS id, '2025-09-01T00:00:00Z'::timestamptz AS createdAt, 2 AS customerId, 2900 AS revenue UNION ALL
27+
28+
SELECT 13 AS id, '2023-03-01T00:00:00Z'::timestamptz AS createdAt, 3 AS customerId, 3000 AS revenue UNION ALL
29+
SELECT 14 AS id, '2023-09-01T00:00:00Z'::timestamptz AS createdAt, 3 AS customerId, 2800 AS revenue UNION ALL
30+
SELECT 15 AS id, '2024-03-01T00:00:00Z'::timestamptz AS createdAt, 3 AS customerId, 2500 AS revenue UNION ALL
31+
SELECT 16 AS id, '2024-09-01T00:00:00Z'::timestamptz AS createdAt, 3 AS customerId, 2300 AS revenue UNION ALL
32+
SELECT 17 AS id, '2025-03-01T00:00:00Z'::timestamptz AS createdAt, 3 AS customerId, 2100 AS revenue UNION ALL
33+
SELECT 18 AS id, '2025-09-01T00:00:00Z'::timestamptz AS createdAt, 3 AS customerId, 1900 AS revenue UNION ALL
34+
35+
SELECT 19 AS id, '2023-03-01T00:00:00Z'::timestamptz AS createdAt, 4 AS customerId, 4000 AS revenue UNION ALL
36+
SELECT 20 AS id, '2023-09-01T00:00:00Z'::timestamptz AS createdAt, 4 AS customerId, 4200 AS revenue UNION ALL
37+
SELECT 21 AS id, '2024-03-01T00:00:00Z'::timestamptz AS createdAt, 4 AS customerId, 3900 AS revenue UNION ALL
38+
SELECT 22 AS id, '2024-09-01T00:00:00Z'::timestamptz AS createdAt, 4 AS customerId, 3700 AS revenue UNION ALL
39+
SELECT 23 AS id, '2025-03-01T00:00:00Z'::timestamptz AS createdAt, 4 AS customerId, 3400 AS revenue UNION ALL
40+
SELECT 24 AS id, '2025-09-01T00:00:00Z'::timestamptz AS createdAt, 4 AS customerId, 3200 AS revenue UNION ALL
41+
42+
SELECT 25 AS id, '2023-03-01T00:00:00Z'::timestamptz AS createdAt, 5 AS customerId, 1500 AS revenue UNION ALL
43+
SELECT 26 AS id, '2023-09-01T00:00:00Z'::timestamptz AS createdAt, 5 AS customerId, 1700 AS revenue UNION ALL
44+
SELECT 27 AS id, '2024-03-01T00:00:00Z'::timestamptz AS createdAt, 5 AS customerId, 2000 AS revenue UNION ALL
45+
SELECT 28 AS id, '2024-09-01T00:00:00Z'::timestamptz AS createdAt, 5 AS customerId, 2200 AS revenue UNION ALL
46+
SELECT 29 AS id, '2025-03-01T00:00:00Z'::timestamptz AS createdAt, 5 AS customerId, 2500 AS revenue UNION ALL
47+
SELECT 30 AS id, '2025-09-01T00:00:00Z'::timestamptz AS createdAt, 5 AS customerId, 2700 AS revenue UNION ALL
48+
49+
SELECT 31 AS id, '2023-03-01T00:00:00Z'::timestamptz AS createdAt, 6 AS customerId, 4500 AS revenue UNION ALL
50+
SELECT 32 AS id, '2023-09-01T00:00:00Z'::timestamptz AS createdAt, 6 AS customerId, 4300 AS revenue UNION ALL
51+
SELECT 33 AS id, '2024-03-01T00:00:00Z'::timestamptz AS createdAt, 6 AS customerId, 4100 AS revenue UNION ALL
52+
SELECT 34 AS id, '2024-09-01T00:00:00Z'::timestamptz AS createdAt, 6 AS customerId, 3900 AS revenue UNION ALL
53+
SELECT 35 AS id, '2025-03-01T00:00:00Z'::timestamptz AS createdAt, 6 AS customerId, 3700 AS revenue UNION ALL
54+
SELECT 36 AS id, '2025-09-01T00:00:00Z'::timestamptz AS createdAt, 6 AS customerId, 3500 AS revenue
55+
56+
dimensions:
57+
- name: id
58+
sql: ID
59+
type: number
60+
primary_key: true
61+
62+
- name: customerId
63+
sql: customerId
64+
type: number
65+
66+
- name: createdAt
67+
sql: createdAt
68+
type: time
69+
70+
- name: changeType
71+
sql: "CONCAT('Revenue is ', {revenueChangeType})"
72+
multi_stage: true
73+
type: string
74+
add_group_by: [orders.customerId]
75+
76+
- name: changeTypeComplex
77+
sql: >
78+
CASE
79+
WHEN {revenueYearAgo} IS NULL THEN 'New'
80+
WHEN {revenue} > {revenueYearAgo} THEN 'Grow'
81+
ELSE 'Down'
82+
END
83+
multi_stage: true
84+
type: string
85+
add_group_by: [orders.customerId]
86+
87+
88+
measures:
89+
- name: count
90+
type: count
91+
92+
- name: revenue
93+
sql: revenue
94+
type: sum
95+
96+
- name: revenueYearAgo
97+
sql: "{revenue}"
98+
multi_stage: true
99+
type: number
100+
time_shift:
101+
- time_dimension: orders.createdAt
102+
interval: 1 year
103+
type: prior
104+
105+
- name: revenueChangeType
106+
sql: >
107+
CASE
108+
WHEN {revenueYearAgo} IS NULL THEN 'New'
109+
WHEN {revenue} > {revenueYearAgo} THEN 'Grow'
110+
ELSE 'Down'
111+
END
112+
type: string
113+
114+
115+
116+
`);
117+
118+
if (getEnv('nativeSqlPlanner')) {
119+
it('simple bucketing', async () => dbRunner.runQueryTest({
120+
dimensions: ['orders.changeType'],
121+
measures: ['orders.count', 'orders.revenue'],
122+
timeDimensions: [
123+
{
124+
dimension: 'orders.createdAt',
125+
granularity: 'year',
126+
dateRange: ['2024-01-02T00:00:00', '2026-01-01T00:00:00']
127+
}
128+
],
129+
timezone: 'UTC',
130+
order: [{
131+
id: 'orders.changeType'
132+
}, { id: 'orders.createdAt' }],
133+
}, [
134+
{
135+
orders__change_type: 'Revenue is Down',
136+
orders__created_at_year: '2024-01-01T00:00:00.000Z',
137+
orders__count: '6',
138+
orders__revenue: '20400'
139+
},
140+
{
141+
orders__change_type: 'Revenue is Down',
142+
orders__created_at_year: '2025-01-01T00:00:00.000Z',
143+
orders__count: '6',
144+
orders__revenue: '17800'
145+
},
146+
{
147+
orders__change_type: 'Revenue is Grow',
148+
orders__created_at_year: '2024-01-01T00:00:00.000Z',
149+
orders__count: '6',
150+
orders__revenue: '11700'
151+
},
152+
{
153+
orders__change_type: 'Revenue is Grow',
154+
orders__created_at_year: '2025-01-01T00:00:00.000Z',
155+
orders__count: '6',
156+
orders__revenue: '14100'
157+
}
158+
],
159+
{ joinGraph, cubeEvaluator, compiler }));
160+
161+
it('bucketing with multistage measure', async () => dbRunner.runQueryTest({
162+
dimensions: ['orders.changeType'],
163+
measures: ['orders.revenue', 'orders.revenueYearAgo'],
164+
timeDimensions: [
165+
{
166+
dimension: 'orders.createdAt',
167+
granularity: 'year',
168+
dateRange: ['2024-01-02T00:00:00', '2026-01-01T00:00:00']
169+
}
170+
],
171+
timezone: 'UTC',
172+
order: [{
173+
id: 'orders.changeType'
174+
}, { id: 'orders.createdAt' }],
175+
},
176+
[
177+
{
178+
orders__change_type: 'Revenue is Down',
179+
orders__created_at_year: '2024-01-01T00:00:00.000Z',
180+
orders__revenue: '20400',
181+
orders__revenue_year_ago: '22800'
182+
},
183+
{
184+
orders__change_type: 'Revenue is Down',
185+
orders__created_at_year: '2025-01-01T00:00:00.000Z',
186+
orders__revenue: '17800',
187+
orders__revenue_year_ago: '20400'
188+
},
189+
{
190+
orders__change_type: 'Revenue is Grow',
191+
orders__created_at_year: '2024-01-01T00:00:00.000Z',
192+
orders__revenue: '11700',
193+
orders__revenue_year_ago: '9400'
194+
},
195+
{
196+
orders__change_type: 'Revenue is Grow',
197+
orders__created_at_year: '2025-01-01T00:00:00.000Z',
198+
orders__revenue: '14100',
199+
orders__revenue_year_ago: '11700'
200+
},
201+
],
202+
{ joinGraph, cubeEvaluator, compiler }));
203+
it('bucketing with complex bucket dimension', async () => dbRunner.runQueryTest({
204+
dimensions: ['orders.changeTypeComplex'],
205+
measures: ['orders.revenue', 'orders.revenueYearAgo'],
206+
timeDimensions: [
207+
{
208+
dimension: 'orders.createdAt',
209+
granularity: 'year',
210+
dateRange: ['2024-01-02T00:00:00', '2026-01-01T00:00:00']
211+
}
212+
],
213+
timezone: 'UTC',
214+
order: [{
215+
id: 'orders.changeTypeComplex'
216+
}, { id: 'orders.createdAt' }],
217+
},
218+
[
219+
{
220+
orders__change_type_complex: 'Down',
221+
orders__created_at_year: '2024-01-01T00:00:00.000Z',
222+
orders__revenue: '20400',
223+
orders__revenue_year_ago: '22800'
224+
},
225+
{
226+
orders__change_type_complex: 'Down',
227+
orders__created_at_year: '2025-01-01T00:00:00.000Z',
228+
orders__revenue: '17800',
229+
orders__revenue_year_ago: '20400'
230+
},
231+
{
232+
orders__change_type_complex: 'Grow',
233+
orders__created_at_year: '2024-01-01T00:00:00.000Z',
234+
orders__revenue: '11700',
235+
orders__revenue_year_ago: '9400'
236+
},
237+
{
238+
orders__change_type_complex: 'Grow',
239+
orders__created_at_year: '2025-01-01T00:00:00.000Z',
240+
orders__revenue: '14100',
241+
orders__revenue_year_ago: '11700'
242+
},
243+
],
244+
{ joinGraph, cubeEvaluator, compiler }));
245+
} else {
246+
// This test is working only in tesseract
247+
test.skip('multi stage over sub query', () => { expect(1).toBe(1); });
248+
}
249+
});

packages/cubejs-schema-compiler/test/integration/postgres/calc-groups.test.ts

Lines changed: 0 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -1198,85 +1198,6 @@ views:
11981198
{ joinGraph, cubeEvaluator, compiler });
11991199
});
12001200

1201-
it('source product_category_ext and created_at cross join', async () => {
1202-
await dbRunner.runQueryTest({
1203-
dimensions: ['source.product_category_ext'],
1204-
timeDimensions: [
1205-
{
1206-
dimension: 'source.created_at',
1207-
granularity: 'month'
1208-
}
1209-
],
1210-
timezone: 'UTC',
1211-
order: [
1212-
{
1213-
id: 'source.created_at'
1214-
},
1215-
{
1216-
id: 'source.product_category_ext'
1217-
}
1218-
],
1219-
}, [
1220-
{
1221-
source__product_category_ext: 'some category-EUR-EUR',
1222-
source__created_at_month: '2022-01-01T00:00:00.000Z'
1223-
},
1224-
{
1225-
source__product_category_ext: 'some category-USD-USD',
1226-
source__created_at_month: '2022-01-01T00:00:00.000Z'
1227-
},
1228-
{
1229-
source__product_category_ext: 'some category A-EUR-EUR',
1230-
source__created_at_month: '2022-02-01T00:00:00.000Z'
1231-
},
1232-
{
1233-
source__product_category_ext: 'some category A-USD-USD',
1234-
source__created_at_month: '2022-02-01T00:00:00.000Z'
1235-
},
1236-
{
1237-
source__product_category_ext: 'some category B-EUR-EUR',
1238-
source__created_at_month: '2022-02-01T00:00:00.000Z'
1239-
},
1240-
{
1241-
source__product_category_ext: 'some category B-USD-USD',
1242-
source__created_at_month: '2022-02-01T00:00:00.000Z'
1243-
},
1244-
{
1245-
source__product_category_ext: 'some category-EUR-EUR',
1246-
source__created_at_month: '2022-02-01T00:00:00.000Z'
1247-
},
1248-
{
1249-
source__product_category_ext: 'some category-USD-USD',
1250-
source__created_at_month: '2022-02-01T00:00:00.000Z'
1251-
},
1252-
{
1253-
source__product_category_ext: 'some category A-EUR-EUR',
1254-
source__created_at_month: '2022-03-01T00:00:00.000Z'
1255-
},
1256-
{
1257-
source__product_category_ext: 'some category A-USD-USD',
1258-
source__created_at_month: '2022-03-01T00:00:00.000Z'
1259-
},
1260-
{
1261-
source__product_category_ext: 'some category B-EUR-EUR',
1262-
source__created_at_month: '2022-03-01T00:00:00.000Z'
1263-
},
1264-
{
1265-
source__product_category_ext: 'some category B-USD-USD',
1266-
source__created_at_month: '2022-03-01T00:00:00.000Z'
1267-
},
1268-
{
1269-
source__product_category_ext: 'some category B-EUR-EUR',
1270-
source__created_at_month: '2022-04-01T00:00:00.000Z'
1271-
},
1272-
{
1273-
source__product_category_ext: 'some category B-USD-USD',
1274-
source__created_at_month: '2022-04-01T00:00:00.000Z'
1275-
}
1276-
],
1277-
{ joinGraph, cubeEvaluator, compiler });
1278-
});
1279-
12801201
it('source product_category_ext filter', async () => {
12811202
await dbRunner.runQueryTest({
12821203
dimensions: ['source.product_category'],

packages/cubejs-schema-compiler/test/integration/utils/BaseDbRunner.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ export class BaseDbRunner {
3030

3131
const res = await this.testQuery(sqlAndParams);
3232
console.log(JSON.stringify(res));
33+
console.log('!!! res', res);
3334

3435
expect(res).toEqual(
3536
expectedResult

rust/cubesqlplanner/cubesqlplanner/src/cube_bridge/dimension_definition.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ pub struct DimensionDefinitionStatic {
2121
pub owned_by_cube: Option<bool>,
2222
#[serde(rename = "multiStage")]
2323
pub multi_stage: Option<bool>,
24+
#[serde(rename = "addGroupByReferences")]
25+
pub add_group_by_references: Option<Vec<String>>,
2426
#[serde(rename = "subQuery")]
2527
pub sub_query: Option<bool>,
2628
#[serde(rename = "propagateFiltersToSubQuery")]

rust/cubesqlplanner/cubesqlplanner/src/logical_plan/full_key_aggregate.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ pub struct MultiStageSubqueryRef {
99
name: String,
1010
#[builder(default)]
1111
symbols: Vec<Rc<MemberSymbol>>,
12+
schema: Rc<LogicalSchema>,
1213
}
1314

1415
impl MultiStageSubqueryRef {
@@ -19,6 +20,10 @@ impl MultiStageSubqueryRef {
1920
pub fn symbols(&self) -> &Vec<Rc<MemberSymbol>> {
2021
&self.symbols
2122
}
23+
24+
pub fn schema(&self) -> &Rc<LogicalSchema> {
25+
&self.schema
26+
}
2227
}
2328

2429
impl PrettyPrint for MultiStageSubqueryRef {

0 commit comments

Comments
 (0)