diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseDimension.ts b/packages/cubejs-schema-compiler/src/adapter/BaseDimension.ts index 5eac83d7238aa..0c045b6e1cb01 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseDimension.ts +++ b/packages/cubejs-schema-compiler/src/adapter/BaseDimension.ts @@ -10,6 +10,8 @@ export class BaseDimension { public readonly isMemberExpression: boolean = false; + public readonly joinHint: Array = []; + public constructor( protected readonly query: BaseQuery, public readonly dimension: any @@ -20,6 +22,14 @@ export class BaseDimension { // In case of SQL push down expressionName doesn't contain cube name. It's just a column name. this.expressionName = dimension.expressionName || `${dimension.cubeName}.${dimension.name}`; this.isMemberExpression = !!dimension.definition; + } else { + // TODO move this `as` to static types + const dimensionPath = dimension as string | null; + if (dimensionPath !== null) { + const { path, joinHint } = this.query.cubeEvaluator.joinHintFromPath(dimensionPath); + this.dimension = path; + this.joinHint = joinHint; + } } } diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseMeasure.ts b/packages/cubejs-schema-compiler/src/adapter/BaseMeasure.ts index 1e44283cdbca1..635547f0455dc 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseMeasure.ts +++ b/packages/cubejs-schema-compiler/src/adapter/BaseMeasure.ts @@ -13,6 +13,8 @@ export class BaseMeasure { protected readonly patchedMeasure: MeasureDefinition | null = null; + public readonly joinHint: Array = []; + protected preparePatchedMeasure(sourceMeasure: string, newMeasureType: string | null, addFilters: Array<{sql: Function}>): MeasureDefinition { const source = this.query.cubeEvaluator.measureByPath(sourceMeasure); @@ -123,6 +125,12 @@ export class BaseMeasure { measure.expression.addFilters, ); } + } else { + // TODO move this `as` to static types + const measurePath = measure as string; + const { path, joinHint } = this.query.cubeEvaluator.joinHintFromPath(measurePath); + this.measure = path; + this.joinHint = joinHint; } } diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js index 1333d9d4dd10a..a96b99460149c 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js +++ b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js @@ -307,16 +307,16 @@ export class BaseQuery { } prebuildJoin() { - if (this.useNativeSqlPlanner) { - // Tesseract doesn't require join to be prebuilt and there's a case where single join can't be built for multi-fact query - // But we need this join for a fallback when using pre-aggregations. So we’ll try to obtain the join but ignore any errors (which may occur if the query is a multi-fact one). - try { - this.join = this.joinGraph.buildJoin(this.allJoinHints); - } catch (e) { - // Ignore - } - } else { + try { + // TODO allJoinHints should contain join hints form pre-agg this.join = this.joinGraph.buildJoin(this.allJoinHints); + } catch (e) { + if (this.useNativeSqlPlanner) { + // Tesseract doesn't require join to be prebuilt and there's a case where single join can't be built for multi-fact query + // But we need this join for a fallback when using pre-aggregations. So we’ll try to obtain the join but ignore any errors (which may occur if the query is a multi-fact one). + } else { + throw e; + } } } @@ -363,6 +363,10 @@ export class BaseQuery { return this.collectedCubeNames; } + /** + * + * @returns {Array>} + */ get allJoinHints() { if (!this.collectedJoinHints) { this.collectedJoinHints = this.collectJoinHints(); @@ -1203,7 +1207,16 @@ export class BaseQuery { collectAllMultiStageMembers(allMemberChildren) { const allMembers = R.uniq(R.flatten(Object.keys(allMemberChildren).map(k => [k].concat(allMemberChildren[k])))); - return R.fromPairs(allMembers.map(m => ([m, this.memberInstanceByPath(m).isMultiStage()]))); + return R.fromPairs(allMembers.map(m => { + // When `m` is coming from `collectAllMemberChildren`, it can contain `granularities.customGranularityName` in path + // And it would mess up with join hints detection + const trimmedPath = this + .cubeEvaluator + .parsePathAnyType(m) + .slice(0, 2) + .join('.'); + return [m, this.memberInstanceByPath(trimmedPath).isMultiStage()]; + })); } memberInstanceByPath(m) { @@ -1992,7 +2005,7 @@ export class BaseQuery { ); if (shouldBuildJoinForMeasureSelect) { - const joinHints = this.collectFrom(measures, this.collectJoinHintsFor.bind(this), 'collectJoinHintsFor'); + const joinHints = this.collectJoinHintsFromMembers(measures); const measuresJoin = this.joinGraph.buildJoin(joinHints); if (measuresJoin.multiplicationFactor[keyCubeName]) { throw new UserError( @@ -2046,6 +2059,11 @@ export class BaseQuery { (!this.safeEvaluateSymbolContext().ungrouped && this.aggregateSubQueryGroupByClause() || ''); } + /** + * @param {Array} measures + * @param {string} keyCubeName + * @returns {boolean} + */ checkShouldBuildJoinForMeasureSelect(measures, keyCubeName) { // When member expression references view, it would have to collect join hints from view // Consider join A->B, as many-to-one, so B is multiplied and A is not, and member expression like SUM(AB_view.dimB) @@ -2067,7 +2085,11 @@ export class BaseQuery { .filter(member => member.definition().ownedByCube); const cubes = this.collectFrom(nonViewMembers, this.collectCubeNamesFor.bind(this), 'collectCubeNamesFor'); - const joinHints = this.collectFrom(nonViewMembers, this.collectJoinHintsFor.bind(this), 'collectJoinHintsFor'); + // Not using `collectJoinHintsFromMembers([measure])` because it would collect too many join hints from view + const joinHints = [ + measure.joinHint, + ...this.collectJoinHintsFromMembers(nonViewMembers), + ]; if (R.any(cubeName => keyCubeName !== cubeName, cubes)) { const measuresJoin = this.joinGraph.buildJoin(joinHints); if (measuresJoin.multiplicationFactor[keyCubeName]) { @@ -2186,12 +2208,29 @@ export class BaseQuery { ); } + /** + * + * @param {boolean} [excludeTimeDimensions=false] + * @returns {Array>} + */ collectJoinHints(excludeTimeDimensions = false) { - return this.collectFromMembers( - excludeTimeDimensions, - this.collectJoinHintsFor.bind(this), - 'collectJoinHintsFor' - ); + const membersToCollectFrom = this.allMembersConcat(excludeTimeDimensions) + .concat(this.join ? this.join.joins.map(j => ({ + getMembers: () => [{ + path: () => null, + cube: () => this.cubeEvaluator.cubeFromPath(j.originalFrom), + definition: () => j.join, + }] + })) : []); + + return this.collectJoinHintsFromMembers(membersToCollectFrom); + } + + collectJoinHintsFromMembers(members) { + return [ + ...members.map(m => m.joinHint).filter(h => h?.length > 0), + ...this.collectFrom(members, this.collectJoinHintsFor.bind(this), 'collectJoinHintsFromMembers'), + ]; } collectFromMembers(excludeTimeDimensions, fn, methodName) { @@ -2206,6 +2245,11 @@ export class BaseQuery { return this.collectFrom(membersToCollectFrom, fn, methodName); } + /** + * + * @param {boolean} excludeTimeDimensions + * @returns {Array} + */ allMembersConcat(excludeTimeDimensions) { return this.measures .concat(this.dimensions) diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseSegment.ts b/packages/cubejs-schema-compiler/src/adapter/BaseSegment.ts index 4e80d99b6cd26..db2033c7916f7 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseSegment.ts +++ b/packages/cubejs-schema-compiler/src/adapter/BaseSegment.ts @@ -9,6 +9,8 @@ export class BaseSegment { public readonly isMemberExpression: boolean = false; + public readonly joinHint: Array = []; + public constructor( protected readonly query: BaseQuery, public readonly segment: string | any @@ -19,6 +21,12 @@ export class BaseSegment { // In case of SQL push down expressionName doesn't contain cube name. It's just a column name. this.expressionName = segment.expressionName || `${segment.cubeName}.${segment.name}`; this.isMemberExpression = !!segment.definition; + } else { + // TODO move this `as` to static types + const segmentPath = segment as string; + const { path, joinHint } = this.query.cubeEvaluator.joinHintFromPath(segmentPath); + this.segment = path; + this.joinHint = joinHint; } } diff --git a/packages/cubejs-schema-compiler/src/adapter/PreAggregations.js b/packages/cubejs-schema-compiler/src/adapter/PreAggregations.js index 66d90a4b430be..364531eb24fa5 100644 --- a/packages/cubejs-schema-compiler/src/adapter/PreAggregations.js +++ b/packages/cubejs-schema-compiler/src/adapter/PreAggregations.js @@ -169,13 +169,18 @@ export class PreAggregations { const timeDimensionsReference = foundPreAggregation.preAggregation.rollupLambdaTimeDimensionsReference || foundPreAggregation.references.timeDimensions; + const timeDimensionReference = timeDimensionsReference[0]; - if (td.dimension === timeDimensionsReference[0].dimension) { + // timeDimensionsReference[*].dimension can contain full join path, so we should trim it + // TODO check full join path match here + const timeDimensionReferenceDimension = this.query.cubeEvaluator.joinHintFromPath(timeDimensionReference.dimension).path; + + if (td.dimension === timeDimensionReferenceDimension) { return true; } // Handling for views - return td.dimension === allBackAliasMembers[timeDimensionsReference[0].dimension]; + return td.dimension === allBackAliasMembers[timeDimensionReferenceDimension]; }); const filters = preAggregation.partitionGranularity && this.query.filters.filter(td => { @@ -485,6 +490,10 @@ export class PreAggregations { * @returns {function(preagg: Object): boolean} */ static canUsePreAggregationForTransformedQueryFn(transformedQuery, refs) { + // TODO this needs to check not only members list, but their join paths as well: + // query can have same members as pre-agg, but different calculated join path + // `refs` will come from preagg references, and would contain full join paths + /** * Returns an array of 2-elements arrays with the dimension and granularity * sorted by the concatenated dimension + granularity key. @@ -1061,22 +1070,35 @@ export class PreAggregations { } rollupPreAggregationQuery(cube, aggregation) { + // `this.evaluateAllReferences` will retain not only members, but their join path as well, and pass join hints + // to subquery. Otherwise, members in subquery would regenerate new join tree from clean state, + // and it can be different from expected by join path in pre-aggregation declaration const references = this.evaluateAllReferences(cube, aggregation); const cubeQuery = this.query.newSubQueryForCube(cube, {}); - return this.query.newSubQueryForCube( - cube, - { - rowLimit: null, - offset: null, - measures: references.measures, - dimensions: references.dimensions, - timeDimensions: this.mergePartitionTimeDimensions(references, aggregation.partitionTimeDimensions), - preAggregationQuery: true, - useOriginalSqlPreAggregationsInPreAggregation: aggregation.useOriginalSqlPreAggregations, - ungrouped: cubeQuery.preAggregationAllowUngroupingWithPrimaryKey(cube, aggregation) && - !!references.dimensions.find(d => this.query.cubeEvaluator.dimensionByPath(d).primaryKey) - } - ); + return this.query.newSubQueryForCube(cube, { + rowLimit: null, + offset: null, + measures: references.measures, + dimensions: references.dimensions, + timeDimensions: this.mergePartitionTimeDimensions( + references, + aggregation.partitionTimeDimensions + ), + preAggregationQuery: true, + useOriginalSqlPreAggregationsInPreAggregation: + aggregation.useOriginalSqlPreAggregations, + ungrouped: + cubeQuery.preAggregationAllowUngroupingWithPrimaryKey( + cube, + aggregation + ) && + !!references.dimensions.find((d) => { + // `d` can contain full join path, so we should trim it + // TODO check full join path match here + const trimmedDimension = this.query.cubeEvaluator.joinHintFromPath(d).path; + return this.query.cubeEvaluator.dimensionByPath(trimmedDimension).primaryKey; + }), + }); } autoRollupPreAggregationQuery(cube, aggregation) { @@ -1101,6 +1123,7 @@ export class PreAggregations { } return aggregation.timeDimensions.map(d => { const toMerge = partitionTimeDimensions.find( + // Both qd and d comes from PreaggregationReferences qd => qd.dimension === d.dimension ); return toMerge ? { ...d, dateRange: toMerge.dateRange, boundaryDateRange: toMerge.boundaryDateRange } : d; @@ -1119,6 +1142,13 @@ export class PreAggregations { .toLowerCase(); } + /** + * + * @param {string} cube + * @param aggregation + * @param {string} [preAggregationName] + * @returns {PreAggregationReferences} + */ evaluateAllReferences(cube, aggregation, preAggregationName) { const evaluateReferences = () => { const references = this.query.cubeEvaluator.evaluatePreAggregationReferences(cube, aggregation); diff --git a/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts b/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts index 6b7c32f1243a0..6b78e575513a6 100644 --- a/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts +++ b/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts @@ -82,6 +82,7 @@ type PreAggregationTimeDimensionReference = { granularity: string, }; +/// Strings in `dimensions`, `measures` and `timeDimensions[*].dimension` can contain full join path, not just `cube.member` type PreAggregationReferences = { allowNonStrictDateRangeMatch?: boolean, dimensions: Array, @@ -743,14 +744,14 @@ export class CubeEvaluator extends CubeSymbols { if (aggregation.timeDimensionReference) { timeDimensions.push({ - dimension: this.evaluateReferences(cube, aggregation.timeDimensionReference), + dimension: this.evaluateReferences(cube, aggregation.timeDimensionReference, { collectJoinHints: true }), granularity: aggregation.granularity }); } else if (aggregation.timeDimensionReferences) { // eslint-disable-next-line guard-for-in for (const timeDimensionReference of aggregation.timeDimensionReferences) { timeDimensions.push({ - dimension: this.evaluateReferences(cube, timeDimensionReference.dimension), + dimension: this.evaluateReferences(cube, timeDimensionReference.dimension, { collectJoinHints: true }), granularity: timeDimensionReference.granularity }); } @@ -759,12 +760,12 @@ export class CubeEvaluator extends CubeSymbols { return { allowNonStrictDateRangeMatch: aggregation.allowNonStrictDateRangeMatch, dimensions: - (aggregation.dimensionReferences && this.evaluateReferences(cube, aggregation.dimensionReferences) || []) + (aggregation.dimensionReferences && this.evaluateReferences(cube, aggregation.dimensionReferences, { collectJoinHints: true }) || []) .concat( - aggregation.segmentReferences && this.evaluateReferences(cube, aggregation.segmentReferences) || [] + aggregation.segmentReferences && this.evaluateReferences(cube, aggregation.segmentReferences, { collectJoinHints: true }) || [] ), measures: - aggregation.measureReferences && this.evaluateReferences(cube, aggregation.measureReferences) || [], + (aggregation.measureReferences && this.evaluateReferences(cube, aggregation.measureReferences, { collectJoinHints: true }) || []), timeDimensions, rollups: aggregation.rollupReferences && this.evaluateReferences(cube, aggregation.rollupReferences, { originalSorting: true }) || [], diff --git a/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts b/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts index dffa59b9e017c..ed258ba9f5fb4 100644 --- a/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts +++ b/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts @@ -609,6 +609,27 @@ export class CubeSymbols { return array.join('.'); } + /** + * Split join path to member to join hint and member path: `A.B.C.D.E.dim` => `[A, B, C, D, E]` + `E.dim` + * @param path + */ + public joinHintFromPath(path: string): { path: string, joinHint: Array } { + const parts = path.split('.'); + if (parts.length > 2) { + // Path contains join path + const joinHint = parts.slice(0, -1); + return { + path: parts.slice(-2).join('.'), + joinHint, + }; + } else { + return { + path, + joinHint: [], + }; + } + } + protected resolveSymbolsCall( func: (...args: Array) => T | DynamicReference, nameResolver: (id: string) => unknown, diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/pre-aggregations-join-path.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/pre-aggregations-join-path.test.ts new file mode 100644 index 0000000000000..1042a6de1f270 --- /dev/null +++ b/packages/cubejs-schema-compiler/test/integration/postgres/pre-aggregations-join-path.test.ts @@ -0,0 +1,253 @@ +import { PreAggregationPartitionRangeLoader } from '@cubejs-backend/query-orchestrator'; +import { PostgresQuery } from '../../../src/adapter/PostgresQuery'; +import { BigqueryQuery } from '../../../src/adapter/BigqueryQuery'; +import { prepareCompiler } from '../../unit/PrepareCompiler'; +import { dbRunner } from './PostgresDBRunner'; +import { DataSchemaCompiler } from '../../../src/compiler/DataSchemaCompiler'; +import { JoinGraph } from '../../../src/compiler/JoinGraph'; +import { CubeEvaluator } from '../../../src/compiler/CubeEvaluator'; + +describe('PreAggregations join path', () => { + jest.setTimeout(200000); + + let compiler: DataSchemaCompiler; + let joinGraph: JoinGraph; + let cubeEvaluator: CubeEvaluator; + + beforeAll(async () => { + // All joins would look like this + // A-->B-->C-->X + // | ^ + // ├-->D-->E---┤ + // | | + // └-->F-------┘ + // And join declaration would use ADEX path + // It should NOT be the shortest one, nor first in join edges declaration + // All join conditions would be essentially `FALSE`, but with different syntax, to be able to test SQL generation + + // TODO in this model queries like [A.a_id, X.x_id] become ambiguous, probably we want to handle this better + + // language=JavaScript + const prepared = prepareCompiler(` + cube('A', { + sql: 'SELECT 1 AS a_id, 100 AS a_value', + + joins: { + B: { + relationship: 'many_to_one', + sql: "'A' = 'B'", + }, + D: { + relationship: 'many_to_one', + sql: "'A' = 'D'", + }, + F: { + relationship: 'many_to_one', + sql: "'A' = 'F'", + }, + }, + + dimensions: { + a_id: { + type: 'number', + sql: 'a_id', + primaryKey: true, + }, + }, + + measures: { + a_sum: { + sql: 'a_value', + type: 'sum', + }, + }, + + preAggregations: { + adex: { + type: 'rollup', + dimensionReferences: [a_id, CUBE.D.E.X.x_id], + measureReferences: [a_sum, D.E.X.x_sum], + // TODO implement and test segmentReferences + // TODO implement and test timeDimensionReference + }, + }, + }); + + cube('B', { + sql: 'SELECT 1 AS b_id, 100 AS b_value', + + joins: { + C: { + relationship: 'many_to_one', + sql: "'B' = 'C'", + }, + }, + + dimensions: { + b_id: { + type: 'number', + sql: 'b_id', + primaryKey: true, + }, + }, + + measures: { + b_sum: { + sql: 'b_value', + type: 'sum', + }, + }, + }); + + cube('C', { + sql: 'SELECT 1 AS c_id, 100 AS c_value', + + joins: { + X: { + relationship: 'many_to_one', + sql: "'C' = 'X'", + }, + }, + + dimensions: { + c_id: { + type: 'number', + sql: 'c_id', + primaryKey: true, + }, + }, + + measures: { + c_sum: { + sql: 'c_value', + type: 'sum', + }, + }, + }); + + cube('D', { + sql: 'SELECT 1 AS d_id, 100 AS d_value', + + joins: { + E: { + relationship: 'many_to_one', + sql: "'D' = 'E'", + }, + }, + + dimensions: { + d_id: { + type: 'number', + sql: 'd_id', + primaryKey: true, + }, + }, + + measures: { + d_sum: { + sql: 'd_value', + type: 'sum', + }, + }, + }); + + cube('E', { + sql: 'SELECT 1 AS e_id, 100 AS e_value', + + joins: { + X: { + relationship: 'many_to_one', + sql: "'E' = 'X'", + }, + }, + + dimensions: { + e_id: { + type: 'number', + sql: 'e_id', + primaryKey: true, + }, + }, + + measures: { + e_sum: { + sql: 'e_value', + type: 'sum', + }, + }, + }); + + cube('F', { + sql: 'SELECT 1 AS f_id, 100 AS f_value', + + joins: { + X: { + relationship: 'many_to_one', + sql: "'F' = 'X'", + }, + }, + + dimensions: { + f_id: { + type: 'number', + sql: 'f_id', + primaryKey: true, + }, + }, + + measures: { + f_sum: { + sql: 'f_value', + type: 'sum', + }, + }, + }); + + cube('X', { + sql: 'SELECT 1 AS x_id, 100 AS x_value', + + dimensions: { + x_id: { + type: 'number', + sql: 'x_id', + primaryKey: true, + }, + }, + + measures: { + x_sum: { + sql: 'x_value', + type: 'sum', + }, + }, + }); + `); + + ({ compiler, joinGraph, cubeEvaluator } = prepared); + }); + + beforeEach(async () => { + await compiler.compile(); + }); + + it('should respect join path from pre-aggregation declaration', async () => { + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: [], + dimensions: [ + 'A.a_id' + ], + }); + + const preAggregationsDescription: any = query.preAggregations?.preAggregationsDescription(); + const { loadSql } = preAggregationsDescription.find(p => p.preAggregationId === 'A.adex'); + + expect(loadSql[0]).toMatch(/ON 'A' = 'D'/); + expect(loadSql[0]).toMatch(/ON 'D' = 'E'/); + expect(loadSql[0]).toMatch(/ON 'E' = 'X'/); + expect(loadSql[0]).not.toMatch(/ON 'A' = 'B'/); + expect(loadSql[0]).not.toMatch(/ON 'B' = 'C'/); + expect(loadSql[0]).not.toMatch(/ON 'C' = 'X'/); + expect(loadSql[0]).not.toMatch(/ON 'A' = 'F'/); + expect(loadSql[0]).not.toMatch(/ON 'F' = 'X'/); + }); +}); diff --git a/packages/cubejs-server-core/src/core/CompilerApi.js b/packages/cubejs-server-core/src/core/CompilerApi.js index f92ae16e30727..2687d6c6f724f 100644 --- a/packages/cubejs-server-core/src/core/CompilerApi.js +++ b/packages/cubejs-server-core/src/core/CompilerApi.js @@ -425,6 +425,11 @@ export class CompilerApi { } } + /** + * + * @param {unknown} filter + * @returns {Promise>} + */ async preAggregations(filter) { const { cubeEvaluator } = await this.getCompilers(); return cubeEvaluator.preAggregations(filter);