Skip to content

Commit 85d23eb

Browse files
committed
fix(schema-compiler): Use join paths from pre-aggregation declaration instead of building join tree from scratch
Without this pre-aggregatoin declaration would build a list of members, and build join tree from scratch. New join tree could be different, and pre-aggregation would contain incorrect data. Add join hints to member adapters, to build join tree with proper members. This is more of a hack than proper solution: join tree would still disallow to have same cube twice with different join paths. Just adding extra join hints on query level does not work because of FKQA and multiplication check in subqueries: we have to know join path for every member, so it should be either explicit or unambigous _for each member_. Join tree from query is still not included in pre-aggregation matching, so user would have to manually ensure that views and pre-aggregations both match each other and does not produce false matches
1 parent 381630e commit 85d23eb

File tree

8 files changed

+416
-38
lines changed

8 files changed

+416
-38
lines changed

packages/cubejs-schema-compiler/src/adapter/BaseDimension.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ export class BaseDimension {
1010

1111
public readonly isMemberExpression: boolean = false;
1212

13+
public readonly joinHint: Array<string> = [];
14+
1315
public constructor(
1416
protected readonly query: BaseQuery,
1517
public readonly dimension: any
@@ -20,6 +22,18 @@ export class BaseDimension {
2022
// In case of SQL push down expressionName doesn't contain cube name. It's just a column name.
2123
this.expressionName = dimension.expressionName || `${dimension.cubeName}.${dimension.name}`;
2224
this.isMemberExpression = !!dimension.definition;
25+
} else {
26+
// TODO move this `as` to static types
27+
const dimensionPath = dimension as string | null;
28+
if (dimensionPath !== null) {
29+
const parts = dimensionPath.split('.');
30+
if (parts.length > 2) {
31+
// Measure path contains join path
32+
const hint = parts.slice(0, -1);
33+
this.dimension = parts.slice(-2).join('.');
34+
this.joinHint = hint;
35+
}
36+
}
2337
}
2438
}
2539

packages/cubejs-schema-compiler/src/adapter/BaseMeasure.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ export class BaseMeasure {
1010

1111
public readonly isMemberExpression: boolean = false;
1212

13+
public readonly joinHint: Array<string> = [];
14+
1315
public constructor(
1416
protected readonly query: BaseQuery,
1517
public readonly measure: any
@@ -20,6 +22,16 @@ export class BaseMeasure {
2022
// In case of SQL push down expressionName doesn't contain cube name. It's just a column name.
2123
this.expressionName = measure.expressionName || `${measure.cubeName}.${measure.name}`;
2224
this.isMemberExpression = !!measure.definition;
25+
} else {
26+
// TODO move this `as` to static types
27+
const measurePath = measure as string;
28+
const parts = measurePath.split('.');
29+
if (parts.length > 2) {
30+
// Measure path contains join path
31+
const hint = parts.slice(0, -1);
32+
this.measure = parts.slice(-2).join('.');
33+
this.joinHint = hint;
34+
}
2335
}
2436
}
2537

packages/cubejs-schema-compiler/src/adapter/BaseQuery.js

Lines changed: 61 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -307,16 +307,16 @@ export class BaseQuery {
307307
}
308308

309309
prebuildJoin() {
310-
if (this.useNativeSqlPlanner) {
311-
// Tesseract doesn't require join to be prebuilt and there's a case where single join can't be built for multi-fact query
312-
// But we need this join for a fallback when using pre-aggregations. So we’ll try to obtain the join but ignore any errors (which may occur if the query is a multi-fact one).
313-
try {
314-
this.join = this.joinGraph.buildJoin(this.allJoinHints);
315-
} catch (e) {
316-
// Ignore
317-
}
318-
} else {
310+
try {
311+
// TODO allJoinHints should contain join hints form pre-agg
319312
this.join = this.joinGraph.buildJoin(this.allJoinHints);
313+
} catch (e) {
314+
if (this.useNativeSqlPlanner) {
315+
// Tesseract doesn't require join to be prebuilt and there's a case where single join can't be built for multi-fact query
316+
// But we need this join for a fallback when using pre-aggregations. So we’ll try to obtain the join but ignore any errors (which may occur if the query is a multi-fact one).
317+
} else {
318+
throw e;
319+
}
320320
}
321321
}
322322

@@ -363,6 +363,10 @@ export class BaseQuery {
363363
return this.collectedCubeNames;
364364
}
365365

366+
/**
367+
*
368+
* @returns {Array<Array<string>>}
369+
*/
366370
get allJoinHints() {
367371
if (!this.collectedJoinHints) {
368372
this.collectedJoinHints = this.collectJoinHints();
@@ -1201,7 +1205,16 @@ export class BaseQuery {
12011205

12021206
collectAllMultiStageMembers(allMemberChildren) {
12031207
const allMembers = R.uniq(R.flatten(Object.keys(allMemberChildren).map(k => [k].concat(allMemberChildren[k]))));
1204-
return R.fromPairs(allMembers.map(m => ([m, this.memberInstanceByPath(m).isMultiStage()])));
1208+
return R.fromPairs(allMembers.map(m => {
1209+
// When `m` is coming from `collectAllMemberChildren`, it can contain `granularities.customGranularityName` in path
1210+
// And it would mess up with join hints detection
1211+
const trimmedPath = this
1212+
.cubeEvaluator
1213+
.parsePathAnyType(m)
1214+
.slice(0, 2)
1215+
.join('.');
1216+
return [m, this.memberInstanceByPath(trimmedPath).isMultiStage()];
1217+
}));
12051218
}
12061219

12071220
memberInstanceByPath(m) {
@@ -1990,7 +2003,7 @@ export class BaseQuery {
19902003
);
19912004

19922005
if (shouldBuildJoinForMeasureSelect) {
1993-
const joinHints = this.collectFrom(measures, this.collectJoinHintsFor.bind(this), 'collectJoinHintsFor');
2006+
const joinHints = this.collectJoinHintsFromMembers(measures);
19942007
const measuresJoin = this.joinGraph.buildJoin(joinHints);
19952008
if (measuresJoin.multiplicationFactor[keyCubeName]) {
19962009
throw new UserError(
@@ -2044,6 +2057,11 @@ export class BaseQuery {
20442057
(!this.safeEvaluateSymbolContext().ungrouped && this.aggregateSubQueryGroupByClause() || '');
20452058
}
20462059

2060+
/**
2061+
* @param {Array<BaseMeasure>} measures
2062+
* @param {string} keyCubeName
2063+
* @returns {boolean}
2064+
*/
20472065
checkShouldBuildJoinForMeasureSelect(measures, keyCubeName) {
20482066
// When member expression references view, it would have to collect join hints from view
20492067
// Consider join A->B, as many-to-one, so B is multiplied and A is not, and member expression like SUM(AB_view.dimB)
@@ -2065,7 +2083,11 @@ export class BaseQuery {
20652083
.filter(member => member.definition().ownedByCube);
20662084

20672085
const cubes = this.collectFrom(nonViewMembers, this.collectCubeNamesFor.bind(this), 'collectCubeNamesFor');
2068-
const joinHints = this.collectFrom(nonViewMembers, this.collectJoinHintsFor.bind(this), 'collectJoinHintsFor');
2086+
// Not using `collectJoinHintsFromMembers([measure])` because it would collect too many join hints from view
2087+
const joinHints = [
2088+
measure.joinHint,
2089+
...this.collectJoinHintsFromMembers(nonViewMembers),
2090+
];
20692091
if (R.any(cubeName => keyCubeName !== cubeName, cubes)) {
20702092
const measuresJoin = this.joinGraph.buildJoin(joinHints);
20712093
if (measuresJoin.multiplicationFactor[keyCubeName]) {
@@ -2179,12 +2201,29 @@ export class BaseQuery {
21792201
);
21802202
}
21812203

2204+
/**
2205+
*
2206+
* @param {boolean} [excludeTimeDimensions=false]
2207+
* @returns {Array<Array<string>>}
2208+
*/
21822209
collectJoinHints(excludeTimeDimensions = false) {
2183-
return this.collectFromMembers(
2184-
excludeTimeDimensions,
2185-
this.collectJoinHintsFor.bind(this),
2186-
'collectJoinHintsFor'
2187-
);
2210+
const membersToCollectFrom = this.allMembersConcat(excludeTimeDimensions)
2211+
.concat(this.join ? this.join.joins.map(j => ({
2212+
getMembers: () => [{
2213+
path: () => null,
2214+
cube: () => this.cubeEvaluator.cubeFromPath(j.originalFrom),
2215+
definition: () => j.join,
2216+
}]
2217+
})) : []);
2218+
2219+
return this.collectJoinHintsFromMembers(membersToCollectFrom);
2220+
}
2221+
2222+
collectJoinHintsFromMembers(members) {
2223+
return [
2224+
...members.map(m => m.joinHint).filter(h => h?.length > 0),
2225+
...this.collectFrom(members, this.collectJoinHintsFor.bind(this), 'collectJoinHintsFromMembers'),
2226+
];
21882227
}
21892228

21902229
collectFromMembers(excludeTimeDimensions, fn, methodName) {
@@ -2199,6 +2238,11 @@ export class BaseQuery {
21992238
return this.collectFrom(membersToCollectFrom, fn, methodName);
22002239
}
22012240

2241+
/**
2242+
*
2243+
* @param {boolean} excludeTimeDimensions
2244+
* @returns {Array<BaseMeasure | BaseDimension | BaseSegment>}
2245+
*/
22022246
allMembersConcat(excludeTimeDimensions) {
22032247
return this.measures
22042248
.concat(this.dimensions)

packages/cubejs-schema-compiler/src/adapter/BaseSegment.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ export class BaseSegment {
99

1010
public readonly isMemberExpression: boolean = false;
1111

12+
public readonly joinHint: Array<string> = [];
13+
1214
public constructor(
1315
protected readonly query: BaseQuery,
1416
public readonly segment: string | any
@@ -19,6 +21,16 @@ export class BaseSegment {
1921
// In case of SQL push down expressionName doesn't contain cube name. It's just a column name.
2022
this.expressionName = segment.expressionName || `${segment.cubeName}.${segment.name}`;
2123
this.isMemberExpression = !!segment.definition;
24+
} else {
25+
// TODO move this `as` to static types
26+
const segmentPath = segment as string;
27+
const parts = segmentPath.split('.');
28+
if (parts.length > 2) {
29+
// Measure path contains join path
30+
const hint = parts.slice(0, -1);
31+
this.segment = parts.slice(-2).join('.');
32+
this.joinHint = hint;
33+
}
2234
}
2335
}
2436

packages/cubejs-schema-compiler/src/adapter/PreAggregations.js

Lines changed: 53 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -169,13 +169,22 @@ export class PreAggregations {
169169
const timeDimensionsReference =
170170
foundPreAggregation.preAggregation.rollupLambdaTimeDimensionsReference ||
171171
foundPreAggregation.references.timeDimensions;
172+
const timeDimensionReference = timeDimensionsReference[0];
172173

173-
if (td.dimension === timeDimensionsReference[0].dimension) {
174+
// timeDimensionsReference[*].dimension can contain full join path, so we should trim it
175+
// TODO check full join path match here
176+
const timeDimensionReferenceDimension = timeDimensionReference
177+
.dimension
178+
.split(',')
179+
.slice(-2)
180+
.join('.');
181+
182+
if (td.dimension === timeDimensionReferenceDimension) {
174183
return true;
175184
}
176185

177186
// Handling for views
178-
return td.dimension === allBackAliasMembers[timeDimensionsReference[0].dimension];
187+
return td.dimension === allBackAliasMembers[timeDimensionReferenceDimension];
179188
});
180189

181190
const filters = preAggregation.partitionGranularity && this.query.filters.filter(td => {
@@ -485,6 +494,10 @@ export class PreAggregations {
485494
* @returns {function(preagg: Object): boolean}
486495
*/
487496
static canUsePreAggregationForTransformedQueryFn(transformedQuery, refs) {
497+
// TODO this needs to check not only members list, but their join paths as well:
498+
// query can have same members as pre-agg, but different calculated join path
499+
// `refs` will come from preagg references, and would contain full join paths
500+
488501
/**
489502
* Returns an array of 2-elements arrays with the dimension and granularity
490503
* sorted by the concatenated dimension + granularity key.
@@ -1061,22 +1074,38 @@ export class PreAggregations {
10611074
}
10621075

10631076
rollupPreAggregationQuery(cube, aggregation) {
1077+
// `this.evaluateAllReferences` will retain not only members, but their join path as well, and pass join hints
1078+
// to subquery. Otherwise, members in subquery would regenerate new join tree from clean state,
1079+
// and it can be different from expected by join path in pre-aggregation declaration
10641080
const references = this.evaluateAllReferences(cube, aggregation);
10651081
const cubeQuery = this.query.newSubQueryForCube(cube, {});
1066-
return this.query.newSubQueryForCube(
1067-
cube,
1068-
{
1069-
rowLimit: null,
1070-
offset: null,
1071-
measures: references.measures,
1072-
dimensions: references.dimensions,
1073-
timeDimensions: this.mergePartitionTimeDimensions(references, aggregation.partitionTimeDimensions),
1074-
preAggregationQuery: true,
1075-
useOriginalSqlPreAggregationsInPreAggregation: aggregation.useOriginalSqlPreAggregations,
1076-
ungrouped: cubeQuery.preAggregationAllowUngroupingWithPrimaryKey(cube, aggregation) &&
1077-
!!references.dimensions.find(d => this.query.cubeEvaluator.dimensionByPath(d).primaryKey)
1078-
}
1079-
);
1082+
return this.query.newSubQueryForCube(cube, {
1083+
rowLimit: null,
1084+
offset: null,
1085+
measures: references.measures,
1086+
dimensions: references.dimensions,
1087+
timeDimensions: this.mergePartitionTimeDimensions(
1088+
references,
1089+
aggregation.partitionTimeDimensions
1090+
),
1091+
preAggregationQuery: true,
1092+
useOriginalSqlPreAggregationsInPreAggregation:
1093+
aggregation.useOriginalSqlPreAggregations,
1094+
ungrouped:
1095+
cubeQuery.preAggregationAllowUngroupingWithPrimaryKey(
1096+
cube,
1097+
aggregation
1098+
) &&
1099+
!!references.dimensions.find((d) => {
1100+
// `d` can contain full join path, so we should trim it
1101+
// TODO check full join path match here
1102+
const trimmedDimension = d
1103+
.split(',')
1104+
.slice(-2)
1105+
.join('.');
1106+
return this.query.cubeEvaluator.dimensionByPath(trimmedDimension).primaryKey;
1107+
}),
1108+
});
10801109
}
10811110

10821111
autoRollupPreAggregationQuery(cube, aggregation) {
@@ -1101,6 +1130,7 @@ export class PreAggregations {
11011130
}
11021131
return aggregation.timeDimensions.map(d => {
11031132
const toMerge = partitionTimeDimensions.find(
1133+
// Both qd and d comes from PreaggregationReferences
11041134
qd => qd.dimension === d.dimension
11051135
);
11061136
return toMerge ? { ...d, dateRange: toMerge.dateRange, boundaryDateRange: toMerge.boundaryDateRange } : d;
@@ -1119,6 +1149,13 @@ export class PreAggregations {
11191149
.toLowerCase();
11201150
}
11211151

1152+
/**
1153+
*
1154+
* @param {string} cube
1155+
* @param aggregation
1156+
* @param {string} [preAggregationName]
1157+
* @returns {PreAggregationReferences}
1158+
*/
11221159
evaluateAllReferences(cube, aggregation, preAggregationName) {
11231160
const evaluateReferences = () => {
11241161
const references = this.query.cubeEvaluator.evaluatePreAggregationReferences(cube, aggregation);

packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ type PreAggregationTimeDimensionReference = {
8181
granularity: string,
8282
};
8383

84+
/// Strings in `dimensions`, `measures` and `timeDimensions[*].dimension` can contain full join path, not just `cube.member`
8485
type PreAggregationReferences = {
8586
allowNonStrictDateRangeMatch?: boolean,
8687
dimensions: Array<string>,
@@ -742,14 +743,14 @@ export class CubeEvaluator extends CubeSymbols {
742743

743744
if (aggregation.timeDimensionReference) {
744745
timeDimensions.push({
745-
dimension: this.evaluateReferences(cube, aggregation.timeDimensionReference),
746+
dimension: this.evaluateReferences(cube, aggregation.timeDimensionReference, { collectJoinHints: true }),
746747
granularity: aggregation.granularity
747748
});
748749
} else if (aggregation.timeDimensionReferences) {
749750
// eslint-disable-next-line guard-for-in
750751
for (const timeDimensionReference of aggregation.timeDimensionReferences) {
751752
timeDimensions.push({
752-
dimension: this.evaluateReferences(cube, timeDimensionReference.dimension),
753+
dimension: this.evaluateReferences(cube, timeDimensionReference.dimension, { collectJoinHints: true }),
753754
granularity: timeDimensionReference.granularity
754755
});
755756
}
@@ -758,12 +759,12 @@ export class CubeEvaluator extends CubeSymbols {
758759
return {
759760
allowNonStrictDateRangeMatch: aggregation.allowNonStrictDateRangeMatch,
760761
dimensions:
761-
(aggregation.dimensionReferences && this.evaluateReferences(cube, aggregation.dimensionReferences) || [])
762+
(aggregation.dimensionReferences && this.evaluateReferences(cube, aggregation.dimensionReferences, { collectJoinHints: true }) || [])
762763
.concat(
763-
aggregation.segmentReferences && this.evaluateReferences(cube, aggregation.segmentReferences) || []
764+
aggregation.segmentReferences && this.evaluateReferences(cube, aggregation.segmentReferences, { collectJoinHints: true }) || []
764765
),
765766
measures:
766-
aggregation.measureReferences && this.evaluateReferences(cube, aggregation.measureReferences) || [],
767+
(aggregation.measureReferences && this.evaluateReferences(cube, aggregation.measureReferences, { collectJoinHints: true }) || []),
767768
timeDimensions,
768769
rollups:
769770
aggregation.rollupReferences && this.evaluateReferences(cube, aggregation.rollupReferences, { originalSorting: true }) || [],

0 commit comments

Comments
 (0)