Skip to content

Commit 358ee5e

Browse files
committed
[WIP] Add join hints to members adapters
just adding extra join hints on query level does not work because of FKQA and multiplication check in subqueries: we have to know join path for every member, so it should be either explicit or unambigous _for each member_.
1 parent b9b829d commit 358ee5e

File tree

8 files changed

+380
-22
lines changed

8 files changed

+380
-22
lines changed

packages/cubejs-schema-compiler/src/adapter/BaseDimension.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ export class BaseDimension {
1010

1111
public readonly isMemberExpression: boolean = false;
1212

13+
public readonly joinHint: Array<string> = [];
14+
1315
public constructor(
1416
protected readonly query: BaseQuery,
1517
public readonly dimension: any
@@ -20,6 +22,18 @@ export class BaseDimension {
2022
// In case of SQL push down expressionName doesn't contain cube name. It's just a column name.
2123
this.expressionName = dimension.expressionName || `${dimension.cubeName}.${dimension.name}`;
2224
this.isMemberExpression = !!dimension.definition;
25+
} else {
26+
// TODO move this `as` to static types
27+
const dimensionPath = dimension as string | null;
28+
if (dimensionPath !== null) {
29+
const parts = dimensionPath.split('.');
30+
if (parts.length > 2) {
31+
// Measure path contains join path
32+
const hint = parts.slice(0, -1);
33+
this.dimension = parts.slice(-2).join('.');
34+
this.joinHint = hint;
35+
}
36+
}
2337
}
2438
}
2539

packages/cubejs-schema-compiler/src/adapter/BaseMeasure.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ export class BaseMeasure {
1010

1111
public readonly isMemberExpression: boolean = false;
1212

13+
public readonly joinHint: Array<string> = [];
14+
1315
public constructor(
1416
protected readonly query: BaseQuery,
1517
public readonly measure: any
@@ -20,6 +22,16 @@ export class BaseMeasure {
2022
// In case of SQL push down expressionName doesn't contain cube name. It's just a column name.
2123
this.expressionName = measure.expressionName || `${measure.cubeName}.${measure.name}`;
2224
this.isMemberExpression = !!measure.definition;
25+
} else {
26+
// TODO move this `as` to static types
27+
const measurePath = measure as string;
28+
const parts = measurePath.split('.');
29+
if (parts.length > 2) {
30+
// Measure path contains join path
31+
const hint = parts.slice(0, -1);
32+
this.measure = parts.slice(-2).join('.');
33+
this.joinHint = hint;
34+
}
2335
}
2436
}
2537

packages/cubejs-schema-compiler/src/adapter/BaseQuery.js

Lines changed: 61 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -307,16 +307,16 @@ export class BaseQuery {
307307
}
308308

309309
prebuildJoin() {
310-
if (this.useNativeSqlPlanner) {
311-
// Tesseract doesn't require join to be prebuilt and there's a case where single join can't be built for multi-fact query
312-
// But we need this join for a fallback when using pre-aggregations. So we’ll try to obtain the join but ignore any errors (which may occur if the query is a multi-fact one).
313-
try {
314-
this.join = this.joinGraph.buildJoin(this.allJoinHints);
315-
} catch (e) {
316-
// Ignore
317-
}
318-
} else {
310+
try {
311+
// TODO allJoinHints should contain join hints form pre-agg
319312
this.join = this.joinGraph.buildJoin(this.allJoinHints);
313+
} catch (e) {
314+
if (this.useNativeSqlPlanner) {
315+
// Tesseract doesn't require join to be prebuilt and there's a case where single join can't be built for multi-fact query
316+
// But we need this join for a fallback when using pre-aggregations. So we’ll try to obtain the join but ignore any errors (which may occur if the query is a multi-fact one).
317+
} else {
318+
throw e;
319+
}
320320
}
321321
}
322322

@@ -363,6 +363,10 @@ export class BaseQuery {
363363
return this.collectedCubeNames;
364364
}
365365

366+
/**
367+
*
368+
* @returns {Array<Array<string>>}
369+
*/
366370
get allJoinHints() {
367371
if (!this.collectedJoinHints) {
368372
this.collectedJoinHints = this.collectJoinHints();
@@ -1201,7 +1205,16 @@ export class BaseQuery {
12011205

12021206
collectAllMultiStageMembers(allMemberChildren) {
12031207
const allMembers = R.uniq(R.flatten(Object.keys(allMemberChildren).map(k => [k].concat(allMemberChildren[k]))));
1204-
return R.fromPairs(allMembers.map(m => ([m, this.memberInstanceByPath(m).isMultiStage()])));
1208+
return R.fromPairs(allMembers.map(m => {
1209+
// When `m` is coming from `collectAllMemberChildren`, it can contain `granularities.customGranularityName` in path
1210+
// And it would mess up with join hints detection
1211+
const trimmedPath = this
1212+
.cubeEvaluator
1213+
.parsePathAnyType(m)
1214+
.slice(0, 2)
1215+
.join('.');
1216+
return [m, this.memberInstanceByPath(trimmedPath).isMultiStage()];
1217+
}));
12051218
}
12061219

12071220
memberInstanceByPath(m) {
@@ -1990,7 +2003,7 @@ export class BaseQuery {
19902003
);
19912004

19922005
if (shouldBuildJoinForMeasureSelect) {
1993-
const joinHints = this.collectFrom(measures, this.collectJoinHintsFor.bind(this), 'collectJoinHintsFor');
2006+
const joinHints = this.collectJoinHintsFromMembers(measures);
19942007
const measuresJoin = this.joinGraph.buildJoin(joinHints);
19952008
if (measuresJoin.multiplicationFactor[keyCubeName]) {
19962009
throw new UserError(
@@ -2044,6 +2057,11 @@ export class BaseQuery {
20442057
(!this.safeEvaluateSymbolContext().ungrouped && this.aggregateSubQueryGroupByClause() || '');
20452058
}
20462059

2060+
/**
2061+
* @param {Array<BaseMeasure>} measures
2062+
* @param {string} keyCubeName
2063+
* @returns {boolean}
2064+
*/
20472065
checkShouldBuildJoinForMeasureSelect(measures, keyCubeName) {
20482066
// When member expression references view, it would have to collect join hints from view
20492067
// Consider join A->B, as many-to-one, so B is multiplied and A is not, and member expression like SUM(AB_view.dimB)
@@ -2065,7 +2083,11 @@ export class BaseQuery {
20652083
.filter(member => member.definition().ownedByCube);
20662084

20672085
const cubes = this.collectFrom(nonViewMembers, this.collectCubeNamesFor.bind(this), 'collectCubeNamesFor');
2068-
const joinHints = this.collectFrom(nonViewMembers, this.collectJoinHintsFor.bind(this), 'collectJoinHintsFor');
2086+
// Not using `collectJoinHintsFromMembers([measure])` because it would collect too many join hints from view
2087+
const joinHints = [
2088+
measure.joinHint,
2089+
...this.collectJoinHintsFromMembers(nonViewMembers),
2090+
];
20692091
if (R.any(cubeName => keyCubeName !== cubeName, cubes)) {
20702092
const measuresJoin = this.joinGraph.buildJoin(joinHints);
20712093
if (measuresJoin.multiplicationFactor[keyCubeName]) {
@@ -2179,12 +2201,29 @@ export class BaseQuery {
21792201
);
21802202
}
21812203

2204+
/**
2205+
*
2206+
* @param {boolean} [excludeTimeDimensions=false]
2207+
* @returns {Array<Array<string>>}
2208+
*/
21822209
collectJoinHints(excludeTimeDimensions = false) {
2183-
return this.collectFromMembers(
2184-
excludeTimeDimensions,
2185-
this.collectJoinHintsFor.bind(this),
2186-
'collectJoinHintsFor'
2187-
);
2210+
const membersToCollectFrom = this.allMembersConcat(excludeTimeDimensions)
2211+
.concat(this.join ? this.join.joins.map(j => ({
2212+
getMembers: () => [{
2213+
path: () => null,
2214+
cube: () => this.cubeEvaluator.cubeFromPath(j.originalFrom),
2215+
definition: () => j.join,
2216+
}]
2217+
})) : []);
2218+
2219+
return this.collectJoinHintsFromMembers(membersToCollectFrom);
2220+
}
2221+
2222+
collectJoinHintsFromMembers(members) {
2223+
return [
2224+
...members.map(m => m.joinHint).filter(h => h?.length > 0),
2225+
...this.collectFrom(members, this.collectJoinHintsFor.bind(this), 'collectJoinHintsFromMembers'),
2226+
];
21882227
}
21892228

21902229
collectFromMembers(excludeTimeDimensions, fn, methodName) {
@@ -2199,6 +2238,11 @@ export class BaseQuery {
21992238
return this.collectFrom(membersToCollectFrom, fn, methodName);
22002239
}
22012240

2241+
/**
2242+
*
2243+
* @param {boolean} excludeTimeDimensions
2244+
* @returns {Array<BaseMeasure | BaseDimension | BaseSegment>}
2245+
*/
22022246
allMembersConcat(excludeTimeDimensions) {
22032247
return this.measures
22042248
.concat(this.dimensions)

packages/cubejs-schema-compiler/src/adapter/BaseSegment.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ export class BaseSegment {
99

1010
public readonly isMemberExpression: boolean = false;
1111

12+
public readonly joinHint: Array<string> = [];
13+
1214
public constructor(
1315
protected readonly query: BaseQuery,
1416
public readonly segment: string | any
@@ -19,6 +21,16 @@ export class BaseSegment {
1921
// In case of SQL push down expressionName doesn't contain cube name. It's just a column name.
2022
this.expressionName = segment.expressionName || `${segment.cubeName}.${segment.name}`;
2123
this.isMemberExpression = !!segment.definition;
24+
} else {
25+
// TODO move this `as` to static types
26+
const segmentPath = segment as string;
27+
const parts = segmentPath.split('.');
28+
if (parts.length > 2) {
29+
// Measure path contains join path
30+
const hint = parts.slice(0, -1);
31+
this.segment = parts.slice(-2).join('.');
32+
this.joinHint = hint;
33+
}
2234
}
2335
}
2436

packages/cubejs-schema-compiler/src/adapter/PreAggregations.js

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ export class PreAggregations {
166166
return false;
167167
}
168168

169+
// TODO timeDimensionsReference[*].dimension can contain full join path
169170
const timeDimensionsReference =
170171
foundPreAggregation.preAggregation.rollupLambdaTimeDimensionsReference ||
171172
foundPreAggregation.references.timeDimensions;
@@ -485,6 +486,10 @@ export class PreAggregations {
485486
* @returns {function(preagg: Object): boolean}
486487
*/
487488
static canUsePreAggregationForTransformedQueryFn(transformedQuery, refs) {
489+
// TODO this needs to check not only members list, but their join paths as well:
490+
// query can have same members as pre-agg, but different calculated join path
491+
// `refs` will come from preagg references, and would contain full join paths
492+
488493
/**
489494
* Returns an array of 2-elements arrays with the dimension and granularity
490495
* sorted by the concatenated dimension + granularity key.
@@ -1061,6 +1066,9 @@ export class PreAggregations {
10611066
}
10621067

10631068
rollupPreAggregationQuery(cube, aggregation) {
1069+
// `this.evaluateAllReferences` will retain not only members, but their join path as well, and pass join hints
1070+
// to subquery. Otherwise, members in subquery would regenerate new join tree from clean state,
1071+
// and it can be different from expected by join path in pre-aggregation declaration
10641072
const references = this.evaluateAllReferences(cube, aggregation);
10651073
const cubeQuery = this.query.newSubQueryForCube(cube, {});
10661074
return this.query.newSubQueryForCube(
@@ -1074,6 +1082,7 @@ export class PreAggregations {
10741082
preAggregationQuery: true,
10751083
useOriginalSqlPreAggregationsInPreAggregation: aggregation.useOriginalSqlPreAggregations,
10761084
ungrouped: cubeQuery.preAggregationAllowUngroupingWithPrimaryKey(cube, aggregation) &&
1085+
// TODO `d` in `this.query.cubeEvaluator.dimensionByPath(d)` can contain full join path
10771086
!!references.dimensions.find(d => this.query.cubeEvaluator.dimensionByPath(d).primaryKey)
10781087
}
10791088
);
@@ -1101,6 +1110,7 @@ export class PreAggregations {
11011110
}
11021111
return aggregation.timeDimensions.map(d => {
11031112
const toMerge = partitionTimeDimensions.find(
1113+
// TODO d.dimension can contain full join path
11041114
qd => qd.dimension === d.dimension
11051115
);
11061116
return toMerge ? { ...d, dateRange: toMerge.dateRange, boundaryDateRange: toMerge.boundaryDateRange } : d;
@@ -1119,6 +1129,13 @@ export class PreAggregations {
11191129
.toLowerCase();
11201130
}
11211131

1132+
/**
1133+
*
1134+
* @param {string} cube
1135+
* @param aggregation
1136+
* @param {string} [preAggregationName]
1137+
* @returns {PreAggregationReferences}
1138+
*/
11221139
evaluateAllReferences(cube, aggregation, preAggregationName) {
11231140
const evaluateReferences = () => {
11241141
const references = this.query.cubeEvaluator.evaluatePreAggregationReferences(cube, aggregation);

packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ type PreAggregationTimeDimensionReference = {
8181
granularity: string,
8282
};
8383

84+
/// Strings in `dimensions`, `measures` and `timeDimensions[*].dimension` can contain full join path, not just `cube.member`
8485
type PreAggregationReferences = {
8586
allowNonStrictDateRangeMatch?: boolean,
8687
dimensions: Array<string>,
@@ -742,14 +743,14 @@ export class CubeEvaluator extends CubeSymbols {
742743

743744
if (aggregation.timeDimensionReference) {
744745
timeDimensions.push({
745-
dimension: this.evaluateReferences(cube, aggregation.timeDimensionReference),
746+
dimension: this.evaluateReferences(cube, aggregation.timeDimensionReference, { collectJoinHints: true }),
746747
granularity: aggregation.granularity
747748
});
748749
} else if (aggregation.timeDimensionReferences) {
749750
// eslint-disable-next-line guard-for-in
750751
for (const timeDimensionReference of aggregation.timeDimensionReferences) {
751752
timeDimensions.push({
752-
dimension: this.evaluateReferences(cube, timeDimensionReference.dimension),
753+
dimension: this.evaluateReferences(cube, timeDimensionReference.dimension, { collectJoinHints: true }),
753754
granularity: timeDimensionReference.granularity
754755
});
755756
}
@@ -758,12 +759,12 @@ export class CubeEvaluator extends CubeSymbols {
758759
return {
759760
allowNonStrictDateRangeMatch: aggregation.allowNonStrictDateRangeMatch,
760761
dimensions:
761-
(aggregation.dimensionReferences && this.evaluateReferences(cube, aggregation.dimensionReferences) || [])
762+
(aggregation.dimensionReferences && this.evaluateReferences(cube, aggregation.dimensionReferences, { collectJoinHints: true }) || [])
762763
.concat(
763-
aggregation.segmentReferences && this.evaluateReferences(cube, aggregation.segmentReferences) || []
764+
aggregation.segmentReferences && this.evaluateReferences(cube, aggregation.segmentReferences, { collectJoinHints: true }) || []
764765
),
765766
measures:
766-
aggregation.measureReferences && this.evaluateReferences(cube, aggregation.measureReferences) || [],
767+
(aggregation.measureReferences && this.evaluateReferences(cube, aggregation.measureReferences, { collectJoinHints: true }) || []),
767768
timeDimensions,
768769
rollups:
769770
aggregation.rollupReferences && this.evaluateReferences(cube, aggregation.rollupReferences, { originalSorting: true }) || [],

0 commit comments

Comments
 (0)