Skip to content

Commit 98fd96c

Browse files
mcheshkovmarianore-muttdata
authored andcommitted
feat: Rewrite joins from SQL as query-level join hints (cube-js#9561)
This allows to keep join structure in some cases. Consider query like this: ``` SELECT cube2.dim, cube3.dim FROM cube1 JOIN cube2 ON cube1.__cubeJoinField = cube2.__cubeJoinField JOIN cube3 ON cube1.__cubeJoinField = cube3.__cubeJoinField GROUP BY 1, 2 ``` It references members only from `cube2` and `cube3`, but join structure actually has `cube1` as root, and it is completely missing from query. Now every join will generate separate join hint during rewrite, and then they will get to JS side as a query-level join hints. Supporting changes: * Remove unused MergedMembersReplacer * Remove unused rewrite for CrossJoin on empty CubeScan CubeScan with empty members just should not manifest during rewrites: either it's `AllMembers`, or specific members, but never empty * Simplify CrossJoin to CubeScan rewrite * Drop ordering from CubeScan during CrossJoin rewrite CROSS JOIN does not guarantee same ordering as inputs * Rewrite join on `__cubeJoinField` directly to CubeScan, without intermediate CrossJoin
1 parent ce463cd commit 98fd96c

File tree

22 files changed

+478
-409
lines changed

22 files changed

+478
-409
lines changed

packages/cubejs-api-gateway/openspec.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,10 @@ components:
369369
- "on"
370370
- joinType
371371
- alias
372+
V1LoadRequestJoinHint:
373+
type: "array"
374+
items:
375+
type: "string"
372376
V1LoadRequestQuery:
373377
type: "object"
374378
properties:
@@ -412,6 +416,10 @@ components:
412416
type: "array"
413417
items:
414418
$ref: "#/components/schemas/V1LoadRequestQueryJoinSubquery"
419+
joinHints:
420+
type: "array"
421+
items:
422+
$ref: "#/components/schemas/V1LoadRequestJoinHint"
415423
V1LoadRequest:
416424
type: "object"
417425
properties:

packages/cubejs-api-gateway/src/query.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,8 @@ const subqueryJoin = Joi.object().keys({
162162
alias: Joi.string(),
163163
});
164164

165+
const joinHint = Joi.array().items(Joi.string());
166+
165167
const querySchema = Joi.object().keys({
166168
// TODO add member expression alternatives only for SQL API queries?
167169
measures: Joi.array().items(Joi.alternatives(id, memberExpression, parsedMemberExpression)),
@@ -189,6 +191,7 @@ const querySchema = Joi.object().keys({
189191
ungrouped: Joi.boolean(),
190192
responseFormat: Joi.valid('default', 'compact'),
191193
subqueryJoins: Joi.array().items(subqueryJoin),
194+
joinHints: Joi.array().items(joinHint),
192195
});
193196

194197
const normalizeQueryOrder = order => {

packages/cubejs-api-gateway/src/types/query.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,8 @@ type SubqueryJoins = {
122122
alias: string,
123123
};
124124

125+
type JoinHint = Array<string>;
126+
125127
/**
126128
* Incoming network query data type.
127129
*/
@@ -143,6 +145,8 @@ interface Query {
143145

144146
// TODO incoming query, query with parsed exprs and query with evaluated exprs are all different types
145147
subqueryJoins?: Array<SubqueryJoins>,
148+
149+
joinHints?: Array<JoinHint>
146150
}
147151

148152
/**

packages/cubejs-schema-compiler/src/adapter/BaseQuery.js

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,7 @@ export class BaseQuery {
279279
multiStageDimensions: this.options.multiStageDimensions,
280280
multiStageTimeDimensions: this.options.multiStageTimeDimensions,
281281
subqueryJoins: this.options.subqueryJoins,
282+
joinHints: this.options.joinHints,
282283
});
283284
this.from = this.options.from;
284285
this.multiStageQuery = this.options.multiStageQuery;
@@ -329,6 +330,7 @@ export class BaseQuery {
329330
const hasMultiStageMeasures = this.fullKeyQueryAggregateMeasures({ hasMultipliedForPreAggregation: true }).multiStageMembers.length > 0;
330331
this.canUseNativeSqlPlannerPreAggregation = hasMultiStageMeasures;
331332
}
333+
this.queryLevelJoinHints = this.options.joinHints ?? [];
332334
this.prebuildJoin();
333335

334336
this.cubeAliasPrefix = this.options.cubeAliasPrefix;
@@ -410,7 +412,10 @@ export class BaseQuery {
410412
*/
411413
get allJoinHints() {
412414
if (!this.collectedJoinHints) {
413-
this.collectedJoinHints = this.collectJoinHints();
415+
this.collectedJoinHints = [
416+
...this.queryLevelJoinHints,
417+
...this.collectJoinHints(),
418+
];
414419
}
415420
return this.collectedJoinHints;
416421
}

packages/cubejs-schema-compiler/test/integration/postgres/multiple-join-paths.test.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -641,4 +641,32 @@ describe('Multiple join paths', () => {
641641
});
642642
}
643643
});
644+
645+
describe('Query level join hints', () => {
646+
it('should respect query level join hints', async () => {
647+
const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, {
648+
measures: [],
649+
dimensions: [
650+
'A.a_id',
651+
'X.x_name_ref',
652+
],
653+
joinHints: [
654+
['A', 'D'],
655+
['D', 'E'],
656+
['E', 'X'],
657+
],
658+
});
659+
660+
const [sql, _params] = query.buildSqlAndParams();
661+
662+
expect(sql).toMatch(/ON 'A' = 'D'/);
663+
expect(sql).toMatch(/ON 'D' = 'E'/);
664+
expect(sql).toMatch(/ON 'E' = 'X'/);
665+
expect(sql).not.toMatch(/ON 'A' = 'B'/);
666+
expect(sql).not.toMatch(/ON 'B' = 'C'/);
667+
expect(sql).not.toMatch(/ON 'C' = 'X'/);
668+
expect(sql).not.toMatch(/ON 'A' = 'F'/);
669+
expect(sql).not.toMatch(/ON 'F' = 'X'/);
670+
});
671+
});
644672
});

rust/cubesql/cubeclient/src/models/v1_load_request_query.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ pub struct V1LoadRequestQuery {
3030
pub ungrouped: Option<bool>,
3131
#[serde(rename = "subqueryJoins", skip_serializing_if = "Option::is_none")]
3232
pub subquery_joins: Option<Vec<crate::models::V1LoadRequestQueryJoinSubquery>>,
33+
#[serde(rename = "joinHints", skip_serializing_if = "Option::is_none")]
34+
pub join_hints: Option<Vec<Vec<String>>>,
3335
}
3436

3537
impl V1LoadRequestQuery {
@@ -45,6 +47,7 @@ impl V1LoadRequestQuery {
4547
filters: None,
4648
ungrouped: None,
4749
subquery_joins: None,
50+
join_hints: None,
4851
}
4952
}
5053
}

rust/cubesql/cubesql/src/compile/builder.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ impl QueryBuilder {
152152
},
153153
ungrouped: None,
154154
subquery_joins: None,
155+
join_hints: None,
155156
},
156157
meta: self.meta,
157158
}

rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3368,6 +3368,8 @@ impl WrappedSelectNode {
33683368
time_dimensions: load_request.time_dimensions.clone(),
33693369
subquery_joins: (!prepared_join_subqueries.is_empty())
33703370
.then_some(prepared_join_subqueries),
3371+
3372+
join_hints: load_request.join_hints.clone(),
33713373
};
33723374

33733375
// TODO time dimensions, filters, segments

rust/cubesql/cubesql/src/compile/mod.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4665,6 +4665,13 @@ ORDER BY "ca_4" ASC
46654665
segments: Some(vec![]),
46664666
dimensions: Some(vec!["Logs.read".to_string()]),
46674667
order: Some(vec![]),
4668+
join_hints: Some(vec![
4669+
vec!["KibanaSampleDataEcommerce".to_string(), "Logs".to_string(),],
4670+
vec![
4671+
"KibanaSampleDataEcommerce".to_string(),
4672+
"NumberCube".to_string(),
4673+
],
4674+
]),
46684675
..Default::default()
46694676
}
46704677
);
@@ -4726,6 +4733,10 @@ ORDER BY
47264733
date_range: None,
47274734
}]),
47284735
order: Some(vec![]),
4736+
join_hints: Some(vec![vec![
4737+
"KibanaSampleDataEcommerce".to_string(),
4738+
"Logs".to_string(),
4739+
],]),
47294740
..Default::default()
47304741
}
47314742
);
@@ -8218,6 +8229,10 @@ ORDER BY "source"."str0" ASC
82188229
segments: Some(vec![]),
82198230
order: Some(vec![]),
82208231
ungrouped: Some(true),
8232+
join_hints: Some(vec![vec![
8233+
"KibanaSampleDataEcommerce".to_string(),
8234+
"Logs".to_string(),
8235+
],]),
82218236
..Default::default()
82228237
}
82238238
)
@@ -9794,6 +9809,10 @@ ORDER BY "source"."str0" ASC
97949809
segments: Some(vec![]),
97959810
order: Some(vec![]),
97969811
ungrouped: Some(true),
9812+
join_hints: Some(vec![vec![
9813+
"Logs".to_string(),
9814+
"KibanaSampleDataEcommerce".to_string(),
9815+
],]),
97979816
..Default::default()
97989817
},
97999818
);
@@ -11845,6 +11864,12 @@ ORDER BY "source"."str0" ASC
1184511864
}).to_string(),
1184611865
]),
1184711866
order: Some(vec![]),
11867+
join_hints: Some(vec![
11868+
vec![
11869+
"KibanaSampleDataEcommerce".to_string(),
11870+
"Logs".to_string(),
11871+
],
11872+
]),
1184811873
..Default::default()
1184911874
}
1185011875
);
@@ -12271,6 +12296,10 @@ ORDER BY "source"."str0" ASC
1227112296
]),
1227212297
segments: Some(vec![]),
1227312298
order: Some(vec![]),
12299+
join_hints: Some(vec![vec![
12300+
"KibanaSampleDataEcommerce".to_string(),
12301+
"Logs".to_string(),
12302+
],]),
1227412303
..Default::default()
1227512304
}
1227612305
)

rust/cubesql/cubesql/src/compile/rewrite/converter.rs

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@ use crate::{
1212
AggregateFunctionExprDistinct, AggregateFunctionExprFun, AggregateSplit,
1313
AggregateUDFExprFun, AliasExprAlias, AnyExprAll, AnyExprOp, BetweenExprNegated,
1414
BinaryExprOp, CastExprDataType, ChangeUserMemberValue, ColumnExprColumn,
15-
CubeScanAliasToCube, CubeScanLimit, CubeScanOffset, CubeScanUngrouped, CubeScanWrapped,
16-
DimensionName, EmptyRelationDerivedSourceTableName, EmptyRelationIsWrappable,
17-
EmptyRelationProduceOneRow, FilterMemberMember, FilterMemberOp, FilterMemberValues,
18-
FilterOpOp, GroupingSetExprType, GroupingSetType, InListExprNegated,
19-
InSubqueryExprNegated, JoinJoinConstraint, JoinJoinType, JoinLeftOn,
15+
CubeScanAliasToCube, CubeScanJoinHints, CubeScanLimit, CubeScanOffset,
16+
CubeScanUngrouped, CubeScanWrapped, DimensionName, EmptyRelationDerivedSourceTableName,
17+
EmptyRelationIsWrappable, EmptyRelationProduceOneRow, FilterMemberMember,
18+
FilterMemberOp, FilterMemberValues, FilterOpOp, GroupingSetExprType, GroupingSetType,
19+
InListExprNegated, InSubqueryExprNegated, JoinJoinConstraint, JoinJoinType, JoinLeftOn,
2020
JoinNullEqualsNull, JoinRightOn, LikeExprEscapeChar, LikeExprLikeType, LikeExprNegated,
2121
LikeType, LimitFetch, LimitSkip, LiteralExprValue, LiteralMemberRelation,
2222
LiteralMemberValue, LogicalPlanLanguage, MeasureName, MemberErrorError, OrderAsc,
@@ -2002,6 +2002,12 @@ impl LanguageToLogicalPlanConverter {
20022002
query.ungrouped = Some(true);
20032003
}
20042004

2005+
let join_hints =
2006+
match_data_node!(node_by_id, cube_scan_params[10], CubeScanJoinHints);
2007+
if join_hints.len() > 0 {
2008+
query.join_hints = Some(join_hints);
2009+
}
2010+
20052011
query.order = if !query_order.is_empty() {
20062012
Some(query_order)
20072013
} else {

0 commit comments

Comments
 (0)