Skip to content

Commit aea3a6c

Browse files
authored
fix(schema-compiler): Use join paths from pre-aggregation declaration instead of building join tree from scratch (#9431)
Without this pre-aggregatoin declaration would build a list of members, and build join tree from scratch. New join tree could be different, and pre-aggregation would contain incorrect data. Add join hints to member adapters, to build join tree with proper members. This is more of a hack than proper solution: join tree would still disallow to have same cube twice with different join paths. Just adding extra join hints on query level does not work because of FKQA and multiplication check in subqueries: we have to know join path for every member, so it should be either explicit or unambigous _for each member_. Join tree from query is still not included in pre-aggregation matching, so user would have to manually ensure that views and pre-aggregations both match each other and does not produce false matches
1 parent 14ae6a4 commit aea3a6c

File tree

9 files changed

+418
-38
lines changed

9 files changed

+418
-38
lines changed

packages/cubejs-schema-compiler/src/adapter/BaseDimension.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ export class BaseDimension {
1010

1111
public readonly isMemberExpression: boolean = false;
1212

13+
public readonly joinHint: Array<string> = [];
14+
1315
public constructor(
1416
protected readonly query: BaseQuery,
1517
public readonly dimension: any
@@ -20,6 +22,14 @@ export class BaseDimension {
2022
// In case of SQL push down expressionName doesn't contain cube name. It's just a column name.
2123
this.expressionName = dimension.expressionName || `${dimension.cubeName}.${dimension.name}`;
2224
this.isMemberExpression = !!dimension.definition;
25+
} else {
26+
// TODO move this `as` to static types
27+
const dimensionPath = dimension as string | null;
28+
if (dimensionPath !== null) {
29+
const { path, joinHint } = this.query.cubeEvaluator.joinHintFromPath(dimensionPath);
30+
this.dimension = path;
31+
this.joinHint = joinHint;
32+
}
2333
}
2434
}
2535

packages/cubejs-schema-compiler/src/adapter/BaseMeasure.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ export class BaseMeasure {
1313

1414
protected readonly patchedMeasure: MeasureDefinition | null = null;
1515

16+
public readonly joinHint: Array<string> = [];
17+
1618
protected preparePatchedMeasure(sourceMeasure: string, newMeasureType: string | null, addFilters: Array<{sql: Function}>): MeasureDefinition {
1719
const source = this.query.cubeEvaluator.measureByPath(sourceMeasure);
1820

@@ -123,6 +125,12 @@ export class BaseMeasure {
123125
measure.expression.addFilters,
124126
);
125127
}
128+
} else {
129+
// TODO move this `as` to static types
130+
const measurePath = measure as string;
131+
const { path, joinHint } = this.query.cubeEvaluator.joinHintFromPath(measurePath);
132+
this.measure = path;
133+
this.joinHint = joinHint;
126134
}
127135
}
128136

packages/cubejs-schema-compiler/src/adapter/BaseQuery.js

Lines changed: 61 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -307,16 +307,16 @@ export class BaseQuery {
307307
}
308308

309309
prebuildJoin() {
310-
if (this.useNativeSqlPlanner) {
311-
// Tesseract doesn't require join to be prebuilt and there's a case where single join can't be built for multi-fact query
312-
// But we need this join for a fallback when using pre-aggregations. So we’ll try to obtain the join but ignore any errors (which may occur if the query is a multi-fact one).
313-
try {
314-
this.join = this.joinGraph.buildJoin(this.allJoinHints);
315-
} catch (e) {
316-
// Ignore
317-
}
318-
} else {
310+
try {
311+
// TODO allJoinHints should contain join hints form pre-agg
319312
this.join = this.joinGraph.buildJoin(this.allJoinHints);
313+
} catch (e) {
314+
if (this.useNativeSqlPlanner) {
315+
// Tesseract doesn't require join to be prebuilt and there's a case where single join can't be built for multi-fact query
316+
// But we need this join for a fallback when using pre-aggregations. So we’ll try to obtain the join but ignore any errors (which may occur if the query is a multi-fact one).
317+
} else {
318+
throw e;
319+
}
320320
}
321321
}
322322

@@ -363,6 +363,10 @@ export class BaseQuery {
363363
return this.collectedCubeNames;
364364
}
365365

366+
/**
367+
*
368+
* @returns {Array<Array<string>>}
369+
*/
366370
get allJoinHints() {
367371
if (!this.collectedJoinHints) {
368372
this.collectedJoinHints = this.collectJoinHints();
@@ -1203,7 +1207,16 @@ export class BaseQuery {
12031207

12041208
collectAllMultiStageMembers(allMemberChildren) {
12051209
const allMembers = R.uniq(R.flatten(Object.keys(allMemberChildren).map(k => [k].concat(allMemberChildren[k]))));
1206-
return R.fromPairs(allMembers.map(m => ([m, this.memberInstanceByPath(m).isMultiStage()])));
1210+
return R.fromPairs(allMembers.map(m => {
1211+
// When `m` is coming from `collectAllMemberChildren`, it can contain `granularities.customGranularityName` in path
1212+
// And it would mess up with join hints detection
1213+
const trimmedPath = this
1214+
.cubeEvaluator
1215+
.parsePathAnyType(m)
1216+
.slice(0, 2)
1217+
.join('.');
1218+
return [m, this.memberInstanceByPath(trimmedPath).isMultiStage()];
1219+
}));
12071220
}
12081221

12091222
memberInstanceByPath(m) {
@@ -1992,7 +2005,7 @@ export class BaseQuery {
19922005
);
19932006

19942007
if (shouldBuildJoinForMeasureSelect) {
1995-
const joinHints = this.collectFrom(measures, this.collectJoinHintsFor.bind(this), 'collectJoinHintsFor');
2008+
const joinHints = this.collectJoinHintsFromMembers(measures);
19962009
const measuresJoin = this.joinGraph.buildJoin(joinHints);
19972010
if (measuresJoin.multiplicationFactor[keyCubeName]) {
19982011
throw new UserError(
@@ -2046,6 +2059,11 @@ export class BaseQuery {
20462059
(!this.safeEvaluateSymbolContext().ungrouped && this.aggregateSubQueryGroupByClause() || '');
20472060
}
20482061

2062+
/**
2063+
* @param {Array<BaseMeasure>} measures
2064+
* @param {string} keyCubeName
2065+
* @returns {boolean}
2066+
*/
20492067
checkShouldBuildJoinForMeasureSelect(measures, keyCubeName) {
20502068
// When member expression references view, it would have to collect join hints from view
20512069
// Consider join A->B, as many-to-one, so B is multiplied and A is not, and member expression like SUM(AB_view.dimB)
@@ -2067,7 +2085,11 @@ export class BaseQuery {
20672085
.filter(member => member.definition().ownedByCube);
20682086

20692087
const cubes = this.collectFrom(nonViewMembers, this.collectCubeNamesFor.bind(this), 'collectCubeNamesFor');
2070-
const joinHints = this.collectFrom(nonViewMembers, this.collectJoinHintsFor.bind(this), 'collectJoinHintsFor');
2088+
// Not using `collectJoinHintsFromMembers([measure])` because it would collect too many join hints from view
2089+
const joinHints = [
2090+
measure.joinHint,
2091+
...this.collectJoinHintsFromMembers(nonViewMembers),
2092+
];
20712093
if (R.any(cubeName => keyCubeName !== cubeName, cubes)) {
20722094
const measuresJoin = this.joinGraph.buildJoin(joinHints);
20732095
if (measuresJoin.multiplicationFactor[keyCubeName]) {
@@ -2186,12 +2208,29 @@ export class BaseQuery {
21862208
);
21872209
}
21882210

2211+
/**
2212+
*
2213+
* @param {boolean} [excludeTimeDimensions=false]
2214+
* @returns {Array<Array<string>>}
2215+
*/
21892216
collectJoinHints(excludeTimeDimensions = false) {
2190-
return this.collectFromMembers(
2191-
excludeTimeDimensions,
2192-
this.collectJoinHintsFor.bind(this),
2193-
'collectJoinHintsFor'
2194-
);
2217+
const membersToCollectFrom = this.allMembersConcat(excludeTimeDimensions)
2218+
.concat(this.join ? this.join.joins.map(j => ({
2219+
getMembers: () => [{
2220+
path: () => null,
2221+
cube: () => this.cubeEvaluator.cubeFromPath(j.originalFrom),
2222+
definition: () => j.join,
2223+
}]
2224+
})) : []);
2225+
2226+
return this.collectJoinHintsFromMembers(membersToCollectFrom);
2227+
}
2228+
2229+
collectJoinHintsFromMembers(members) {
2230+
return [
2231+
...members.map(m => m.joinHint).filter(h => h?.length > 0),
2232+
...this.collectFrom(members, this.collectJoinHintsFor.bind(this), 'collectJoinHintsFromMembers'),
2233+
];
21952234
}
21962235

21972236
collectFromMembers(excludeTimeDimensions, fn, methodName) {
@@ -2206,6 +2245,11 @@ export class BaseQuery {
22062245
return this.collectFrom(membersToCollectFrom, fn, methodName);
22072246
}
22082247

2248+
/**
2249+
*
2250+
* @param {boolean} excludeTimeDimensions
2251+
* @returns {Array<BaseMeasure | BaseDimension | BaseSegment>}
2252+
*/
22092253
allMembersConcat(excludeTimeDimensions) {
22102254
return this.measures
22112255
.concat(this.dimensions)

packages/cubejs-schema-compiler/src/adapter/BaseSegment.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ export class BaseSegment {
99

1010
public readonly isMemberExpression: boolean = false;
1111

12+
public readonly joinHint: Array<string> = [];
13+
1214
public constructor(
1315
protected readonly query: BaseQuery,
1416
public readonly segment: string | any
@@ -19,6 +21,12 @@ export class BaseSegment {
1921
// In case of SQL push down expressionName doesn't contain cube name. It's just a column name.
2022
this.expressionName = segment.expressionName || `${segment.cubeName}.${segment.name}`;
2123
this.isMemberExpression = !!segment.definition;
24+
} else {
25+
// TODO move this `as` to static types
26+
const segmentPath = segment as string;
27+
const { path, joinHint } = this.query.cubeEvaluator.joinHintFromPath(segmentPath);
28+
this.segment = path;
29+
this.joinHint = joinHint;
2230
}
2331
}
2432

packages/cubejs-schema-compiler/src/adapter/PreAggregations.js

Lines changed: 46 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -169,13 +169,18 @@ export class PreAggregations {
169169
const timeDimensionsReference =
170170
foundPreAggregation.preAggregation.rollupLambdaTimeDimensionsReference ||
171171
foundPreAggregation.references.timeDimensions;
172+
const timeDimensionReference = timeDimensionsReference[0];
172173

173-
if (td.dimension === timeDimensionsReference[0].dimension) {
174+
// timeDimensionsReference[*].dimension can contain full join path, so we should trim it
175+
// TODO check full join path match here
176+
const timeDimensionReferenceDimension = this.query.cubeEvaluator.joinHintFromPath(timeDimensionReference.dimension).path;
177+
178+
if (td.dimension === timeDimensionReferenceDimension) {
174179
return true;
175180
}
176181

177182
// Handling for views
178-
return td.dimension === allBackAliasMembers[timeDimensionsReference[0].dimension];
183+
return td.dimension === allBackAliasMembers[timeDimensionReferenceDimension];
179184
});
180185

181186
const filters = preAggregation.partitionGranularity && this.query.filters.filter(td => {
@@ -485,6 +490,10 @@ export class PreAggregations {
485490
* @returns {function(preagg: Object): boolean}
486491
*/
487492
static canUsePreAggregationForTransformedQueryFn(transformedQuery, refs) {
493+
// TODO this needs to check not only members list, but their join paths as well:
494+
// query can have same members as pre-agg, but different calculated join path
495+
// `refs` will come from preagg references, and would contain full join paths
496+
488497
/**
489498
* Returns an array of 2-elements arrays with the dimension and granularity
490499
* sorted by the concatenated dimension + granularity key.
@@ -1061,22 +1070,35 @@ export class PreAggregations {
10611070
}
10621071

10631072
rollupPreAggregationQuery(cube, aggregation) {
1073+
// `this.evaluateAllReferences` will retain not only members, but their join path as well, and pass join hints
1074+
// to subquery. Otherwise, members in subquery would regenerate new join tree from clean state,
1075+
// and it can be different from expected by join path in pre-aggregation declaration
10641076
const references = this.evaluateAllReferences(cube, aggregation);
10651077
const cubeQuery = this.query.newSubQueryForCube(cube, {});
1066-
return this.query.newSubQueryForCube(
1067-
cube,
1068-
{
1069-
rowLimit: null,
1070-
offset: null,
1071-
measures: references.measures,
1072-
dimensions: references.dimensions,
1073-
timeDimensions: this.mergePartitionTimeDimensions(references, aggregation.partitionTimeDimensions),
1074-
preAggregationQuery: true,
1075-
useOriginalSqlPreAggregationsInPreAggregation: aggregation.useOriginalSqlPreAggregations,
1076-
ungrouped: cubeQuery.preAggregationAllowUngroupingWithPrimaryKey(cube, aggregation) &&
1077-
!!references.dimensions.find(d => this.query.cubeEvaluator.dimensionByPath(d).primaryKey)
1078-
}
1079-
);
1078+
return this.query.newSubQueryForCube(cube, {
1079+
rowLimit: null,
1080+
offset: null,
1081+
measures: references.measures,
1082+
dimensions: references.dimensions,
1083+
timeDimensions: this.mergePartitionTimeDimensions(
1084+
references,
1085+
aggregation.partitionTimeDimensions
1086+
),
1087+
preAggregationQuery: true,
1088+
useOriginalSqlPreAggregationsInPreAggregation:
1089+
aggregation.useOriginalSqlPreAggregations,
1090+
ungrouped:
1091+
cubeQuery.preAggregationAllowUngroupingWithPrimaryKey(
1092+
cube,
1093+
aggregation
1094+
) &&
1095+
!!references.dimensions.find((d) => {
1096+
// `d` can contain full join path, so we should trim it
1097+
// TODO check full join path match here
1098+
const trimmedDimension = this.query.cubeEvaluator.joinHintFromPath(d).path;
1099+
return this.query.cubeEvaluator.dimensionByPath(trimmedDimension).primaryKey;
1100+
}),
1101+
});
10801102
}
10811103

10821104
autoRollupPreAggregationQuery(cube, aggregation) {
@@ -1101,6 +1123,7 @@ export class PreAggregations {
11011123
}
11021124
return aggregation.timeDimensions.map(d => {
11031125
const toMerge = partitionTimeDimensions.find(
1126+
// Both qd and d comes from PreaggregationReferences
11041127
qd => qd.dimension === d.dimension
11051128
);
11061129
return toMerge ? { ...d, dateRange: toMerge.dateRange, boundaryDateRange: toMerge.boundaryDateRange } : d;
@@ -1119,6 +1142,13 @@ export class PreAggregations {
11191142
.toLowerCase();
11201143
}
11211144

1145+
/**
1146+
*
1147+
* @param {string} cube
1148+
* @param aggregation
1149+
* @param {string} [preAggregationName]
1150+
* @returns {PreAggregationReferences}
1151+
*/
11221152
evaluateAllReferences(cube, aggregation, preAggregationName) {
11231153
const evaluateReferences = () => {
11241154
const references = this.query.cubeEvaluator.evaluatePreAggregationReferences(cube, aggregation);

packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ type PreAggregationTimeDimensionReference = {
8282
granularity: string,
8383
};
8484

85+
/// Strings in `dimensions`, `measures` and `timeDimensions[*].dimension` can contain full join path, not just `cube.member`
8586
type PreAggregationReferences = {
8687
allowNonStrictDateRangeMatch?: boolean,
8788
dimensions: Array<string>,
@@ -743,14 +744,14 @@ export class CubeEvaluator extends CubeSymbols {
743744

744745
if (aggregation.timeDimensionReference) {
745746
timeDimensions.push({
746-
dimension: this.evaluateReferences(cube, aggregation.timeDimensionReference),
747+
dimension: this.evaluateReferences(cube, aggregation.timeDimensionReference, { collectJoinHints: true }),
747748
granularity: aggregation.granularity
748749
});
749750
} else if (aggregation.timeDimensionReferences) {
750751
// eslint-disable-next-line guard-for-in
751752
for (const timeDimensionReference of aggregation.timeDimensionReferences) {
752753
timeDimensions.push({
753-
dimension: this.evaluateReferences(cube, timeDimensionReference.dimension),
754+
dimension: this.evaluateReferences(cube, timeDimensionReference.dimension, { collectJoinHints: true }),
754755
granularity: timeDimensionReference.granularity
755756
});
756757
}
@@ -759,12 +760,12 @@ export class CubeEvaluator extends CubeSymbols {
759760
return {
760761
allowNonStrictDateRangeMatch: aggregation.allowNonStrictDateRangeMatch,
761762
dimensions:
762-
(aggregation.dimensionReferences && this.evaluateReferences(cube, aggregation.dimensionReferences) || [])
763+
(aggregation.dimensionReferences && this.evaluateReferences(cube, aggregation.dimensionReferences, { collectJoinHints: true }) || [])
763764
.concat(
764-
aggregation.segmentReferences && this.evaluateReferences(cube, aggregation.segmentReferences) || []
765+
aggregation.segmentReferences && this.evaluateReferences(cube, aggregation.segmentReferences, { collectJoinHints: true }) || []
765766
),
766767
measures:
767-
aggregation.measureReferences && this.evaluateReferences(cube, aggregation.measureReferences) || [],
768+
(aggregation.measureReferences && this.evaluateReferences(cube, aggregation.measureReferences, { collectJoinHints: true }) || []),
768769
timeDimensions,
769770
rollups:
770771
aggregation.rollupReferences && this.evaluateReferences(cube, aggregation.rollupReferences, { originalSorting: true }) || [],

packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,27 @@ export class CubeSymbols {
609609
return array.join('.');
610610
}
611611

612+
/**
613+
* Split join path to member to join hint and member path: `A.B.C.D.E.dim` => `[A, B, C, D, E]` + `E.dim`
614+
* @param path
615+
*/
616+
public joinHintFromPath(path: string): { path: string, joinHint: Array<string> } {
617+
const parts = path.split('.');
618+
if (parts.length > 2) {
619+
// Path contains join path
620+
const joinHint = parts.slice(0, -1);
621+
return {
622+
path: parts.slice(-2).join('.'),
623+
joinHint,
624+
};
625+
} else {
626+
return {
627+
path,
628+
joinHint: [],
629+
};
630+
}
631+
}
632+
612633
protected resolveSymbolsCall<T>(
613634
func: (...args: Array<unknown>) => T | DynamicReference<T>,
614635
nameResolver: (id: string) => unknown,

0 commit comments

Comments
 (0)