From 021b769bbf5cee0a352e7fb2030dec474170db7a Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Tue, 8 Apr 2025 20:16:15 +0200 Subject: [PATCH 01/15] fix(schema-compiler): Use join paths from pre-aggregation declaration instead of building join tree from scratch (#9431) Without this pre-aggregatoin declaration would build a list of members, and build join tree from scratch. New join tree could be different, and pre-aggregation would contain incorrect data. Add join hints to member adapters, to build join tree with proper members. This is more of a hack than proper solution: join tree would still disallow to have same cube twice with different join paths. Just adding extra join hints on query level does not work because of FKQA and multiplication check in subqueries: we have to know join path for every member, so it should be either explicit or unambigous _for each member_. Join tree from query is still not included in pre-aggregation matching, so user would have to manually ensure that views and pre-aggregations both match each other and does not produce false matches (cherry picked from commit aea3a6c02886f495b00751e5e12dcd0ef3099441) --- .../src/adapter/BaseDimension.ts | 10 + .../src/adapter/BaseMeasure.ts | 8 + .../src/adapter/BaseQuery.js | 78 ++++-- .../src/adapter/BaseSegment.ts | 8 + .../src/adapter/PreAggregations.js | 62 +++-- .../src/compiler/CubeEvaluator.ts | 11 +- .../src/compiler/CubeSymbols.ts | 21 ++ .../pre-aggregations-join-path.test.ts | 253 ++++++++++++++++++ .../src/core/CompilerApi.js | 5 + 9 files changed, 418 insertions(+), 38 deletions(-) create mode 100644 packages/cubejs-schema-compiler/test/integration/postgres/pre-aggregations-join-path.test.ts diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseDimension.ts b/packages/cubejs-schema-compiler/src/adapter/BaseDimension.ts index 5eac83d7238aa..0c045b6e1cb01 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseDimension.ts +++ b/packages/cubejs-schema-compiler/src/adapter/BaseDimension.ts @@ -10,6 +10,8 @@ export class BaseDimension { public readonly isMemberExpression: boolean = false; + public readonly joinHint: Array = []; + public constructor( protected readonly query: BaseQuery, public readonly dimension: any @@ -20,6 +22,14 @@ export class BaseDimension { // In case of SQL push down expressionName doesn't contain cube name. It's just a column name. this.expressionName = dimension.expressionName || `${dimension.cubeName}.${dimension.name}`; this.isMemberExpression = !!dimension.definition; + } else { + // TODO move this `as` to static types + const dimensionPath = dimension as string | null; + if (dimensionPath !== null) { + const { path, joinHint } = this.query.cubeEvaluator.joinHintFromPath(dimensionPath); + this.dimension = path; + this.joinHint = joinHint; + } } } diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseMeasure.ts b/packages/cubejs-schema-compiler/src/adapter/BaseMeasure.ts index c02b6fe6e76cb..87fc252267a20 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseMeasure.ts +++ b/packages/cubejs-schema-compiler/src/adapter/BaseMeasure.ts @@ -13,6 +13,8 @@ export class BaseMeasure { protected readonly patchedMeasure: MeasureDefinition | null = null; + public readonly joinHint: Array = []; + protected preparePatchedMeasure(sourceMeasure: string, newMeasureType: string | null, addFilters: Array<{sql: Function}>): MeasureDefinition { const source = this.query.cubeEvaluator.measureByPath(sourceMeasure); @@ -123,6 +125,12 @@ export class BaseMeasure { measure.expression.addFilters, ); } + } else { + // TODO move this `as` to static types + const measurePath = measure as string; + const { path, joinHint } = this.query.cubeEvaluator.joinHintFromPath(measurePath); + this.measure = path; + this.joinHint = joinHint; } } diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js index 234ca924bb6f4..a011d3a95332f 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js +++ b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js @@ -307,16 +307,16 @@ export class BaseQuery { } prebuildJoin() { - if (this.useNativeSqlPlanner) { - // Tesseract doesn't require join to be prebuilt and there's a case where single join can't be built for multi-fact query - // But we need this join for a fallback when using pre-aggregations. So we’ll try to obtain the join but ignore any errors (which may occur if the query is a multi-fact one). - try { - this.join = this.joinGraph.buildJoin(this.allJoinHints); - } catch (e) { - // Ignore - } - } else { + try { + // TODO allJoinHints should contain join hints form pre-agg this.join = this.joinGraph.buildJoin(this.allJoinHints); + } catch (e) { + if (this.useNativeSqlPlanner) { + // Tesseract doesn't require join to be prebuilt and there's a case where single join can't be built for multi-fact query + // But we need this join for a fallback when using pre-aggregations. So we’ll try to obtain the join but ignore any errors (which may occur if the query is a multi-fact one). + } else { + throw e; + } } } @@ -363,6 +363,10 @@ export class BaseQuery { return this.collectedCubeNames; } + /** + * + * @returns {Array>} + */ get allJoinHints() { if (!this.collectedJoinHints) { this.collectedJoinHints = this.collectJoinHints(); @@ -1203,7 +1207,16 @@ export class BaseQuery { collectAllMultiStageMembers(allMemberChildren) { const allMembers = R.uniq(R.flatten(Object.keys(allMemberChildren).map(k => [k].concat(allMemberChildren[k])))); - return R.fromPairs(allMembers.map(m => ([m, this.memberInstanceByPath(m).isMultiStage()]))); + return R.fromPairs(allMembers.map(m => { + // When `m` is coming from `collectAllMemberChildren`, it can contain `granularities.customGranularityName` in path + // And it would mess up with join hints detection + const trimmedPath = this + .cubeEvaluator + .parsePathAnyType(m) + .slice(0, 2) + .join('.'); + return [m, this.memberInstanceByPath(trimmedPath).isMultiStage()]; + })); } memberInstanceByPath(m) { @@ -1992,7 +2005,7 @@ export class BaseQuery { ); if (shouldBuildJoinForMeasureSelect) { - const joinHints = this.collectFrom(measures, this.collectJoinHintsFor.bind(this), 'collectJoinHintsFor'); + const joinHints = this.collectJoinHintsFromMembers(measures); const measuresJoin = this.joinGraph.buildJoin(joinHints); if (measuresJoin.multiplicationFactor[keyCubeName]) { throw new UserError( @@ -2046,6 +2059,11 @@ export class BaseQuery { (!this.safeEvaluateSymbolContext().ungrouped && this.aggregateSubQueryGroupByClause() || ''); } + /** + * @param {Array} measures + * @param {string} keyCubeName + * @returns {boolean} + */ checkShouldBuildJoinForMeasureSelect(measures, keyCubeName) { // When member expression references view, it would have to collect join hints from view // Consider join A->B, as many-to-one, so B is multiplied and A is not, and member expression like SUM(AB_view.dimB) @@ -2067,7 +2085,11 @@ export class BaseQuery { .filter(member => member.definition().ownedByCube); const cubes = this.collectFrom(nonViewMembers, this.collectCubeNamesFor.bind(this), 'collectCubeNamesFor'); - const joinHints = this.collectFrom(nonViewMembers, this.collectJoinHintsFor.bind(this), 'collectJoinHintsFor'); + // Not using `collectJoinHintsFromMembers([measure])` because it would collect too many join hints from view + const joinHints = [ + measure.joinHint, + ...this.collectJoinHintsFromMembers(nonViewMembers), + ]; if (R.any(cubeName => keyCubeName !== cubeName, cubes)) { const measuresJoin = this.joinGraph.buildJoin(joinHints); if (measuresJoin.multiplicationFactor[keyCubeName]) { @@ -2186,12 +2208,29 @@ export class BaseQuery { ); } + /** + * + * @param {boolean} [excludeTimeDimensions=false] + * @returns {Array>} + */ collectJoinHints(excludeTimeDimensions = false) { - return this.collectFromMembers( - excludeTimeDimensions, - this.collectJoinHintsFor.bind(this), - 'collectJoinHintsFor' - ); + const membersToCollectFrom = this.allMembersConcat(excludeTimeDimensions) + .concat(this.join ? this.join.joins.map(j => ({ + getMembers: () => [{ + path: () => null, + cube: () => this.cubeEvaluator.cubeFromPath(j.originalFrom), + definition: () => j.join, + }] + })) : []); + + return this.collectJoinHintsFromMembers(membersToCollectFrom); + } + + collectJoinHintsFromMembers(members) { + return [ + ...members.map(m => m.joinHint).filter(h => h?.length > 0), + ...this.collectFrom(members, this.collectJoinHintsFor.bind(this), 'collectJoinHintsFromMembers'), + ]; } collectFromMembers(excludeTimeDimensions, fn, methodName) { @@ -2206,6 +2245,11 @@ export class BaseQuery { return this.collectFrom(membersToCollectFrom, fn, methodName); } + /** + * + * @param {boolean} excludeTimeDimensions + * @returns {Array} + */ allMembersConcat(excludeTimeDimensions) { return this.measures .concat(this.dimensions) diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseSegment.ts b/packages/cubejs-schema-compiler/src/adapter/BaseSegment.ts index 4e80d99b6cd26..db2033c7916f7 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseSegment.ts +++ b/packages/cubejs-schema-compiler/src/adapter/BaseSegment.ts @@ -9,6 +9,8 @@ export class BaseSegment { public readonly isMemberExpression: boolean = false; + public readonly joinHint: Array = []; + public constructor( protected readonly query: BaseQuery, public readonly segment: string | any @@ -19,6 +21,12 @@ export class BaseSegment { // In case of SQL push down expressionName doesn't contain cube name. It's just a column name. this.expressionName = segment.expressionName || `${segment.cubeName}.${segment.name}`; this.isMemberExpression = !!segment.definition; + } else { + // TODO move this `as` to static types + const segmentPath = segment as string; + const { path, joinHint } = this.query.cubeEvaluator.joinHintFromPath(segmentPath); + this.segment = path; + this.joinHint = joinHint; } } diff --git a/packages/cubejs-schema-compiler/src/adapter/PreAggregations.js b/packages/cubejs-schema-compiler/src/adapter/PreAggregations.js index 66d90a4b430be..364531eb24fa5 100644 --- a/packages/cubejs-schema-compiler/src/adapter/PreAggregations.js +++ b/packages/cubejs-schema-compiler/src/adapter/PreAggregations.js @@ -169,13 +169,18 @@ export class PreAggregations { const timeDimensionsReference = foundPreAggregation.preAggregation.rollupLambdaTimeDimensionsReference || foundPreAggregation.references.timeDimensions; + const timeDimensionReference = timeDimensionsReference[0]; - if (td.dimension === timeDimensionsReference[0].dimension) { + // timeDimensionsReference[*].dimension can contain full join path, so we should trim it + // TODO check full join path match here + const timeDimensionReferenceDimension = this.query.cubeEvaluator.joinHintFromPath(timeDimensionReference.dimension).path; + + if (td.dimension === timeDimensionReferenceDimension) { return true; } // Handling for views - return td.dimension === allBackAliasMembers[timeDimensionsReference[0].dimension]; + return td.dimension === allBackAliasMembers[timeDimensionReferenceDimension]; }); const filters = preAggregation.partitionGranularity && this.query.filters.filter(td => { @@ -485,6 +490,10 @@ export class PreAggregations { * @returns {function(preagg: Object): boolean} */ static canUsePreAggregationForTransformedQueryFn(transformedQuery, refs) { + // TODO this needs to check not only members list, but their join paths as well: + // query can have same members as pre-agg, but different calculated join path + // `refs` will come from preagg references, and would contain full join paths + /** * Returns an array of 2-elements arrays with the dimension and granularity * sorted by the concatenated dimension + granularity key. @@ -1061,22 +1070,35 @@ export class PreAggregations { } rollupPreAggregationQuery(cube, aggregation) { + // `this.evaluateAllReferences` will retain not only members, but their join path as well, and pass join hints + // to subquery. Otherwise, members in subquery would regenerate new join tree from clean state, + // and it can be different from expected by join path in pre-aggregation declaration const references = this.evaluateAllReferences(cube, aggregation); const cubeQuery = this.query.newSubQueryForCube(cube, {}); - return this.query.newSubQueryForCube( - cube, - { - rowLimit: null, - offset: null, - measures: references.measures, - dimensions: references.dimensions, - timeDimensions: this.mergePartitionTimeDimensions(references, aggregation.partitionTimeDimensions), - preAggregationQuery: true, - useOriginalSqlPreAggregationsInPreAggregation: aggregation.useOriginalSqlPreAggregations, - ungrouped: cubeQuery.preAggregationAllowUngroupingWithPrimaryKey(cube, aggregation) && - !!references.dimensions.find(d => this.query.cubeEvaluator.dimensionByPath(d).primaryKey) - } - ); + return this.query.newSubQueryForCube(cube, { + rowLimit: null, + offset: null, + measures: references.measures, + dimensions: references.dimensions, + timeDimensions: this.mergePartitionTimeDimensions( + references, + aggregation.partitionTimeDimensions + ), + preAggregationQuery: true, + useOriginalSqlPreAggregationsInPreAggregation: + aggregation.useOriginalSqlPreAggregations, + ungrouped: + cubeQuery.preAggregationAllowUngroupingWithPrimaryKey( + cube, + aggregation + ) && + !!references.dimensions.find((d) => { + // `d` can contain full join path, so we should trim it + // TODO check full join path match here + const trimmedDimension = this.query.cubeEvaluator.joinHintFromPath(d).path; + return this.query.cubeEvaluator.dimensionByPath(trimmedDimension).primaryKey; + }), + }); } autoRollupPreAggregationQuery(cube, aggregation) { @@ -1101,6 +1123,7 @@ export class PreAggregations { } return aggregation.timeDimensions.map(d => { const toMerge = partitionTimeDimensions.find( + // Both qd and d comes from PreaggregationReferences qd => qd.dimension === d.dimension ); return toMerge ? { ...d, dateRange: toMerge.dateRange, boundaryDateRange: toMerge.boundaryDateRange } : d; @@ -1119,6 +1142,13 @@ export class PreAggregations { .toLowerCase(); } + /** + * + * @param {string} cube + * @param aggregation + * @param {string} [preAggregationName] + * @returns {PreAggregationReferences} + */ evaluateAllReferences(cube, aggregation, preAggregationName) { const evaluateReferences = () => { const references = this.query.cubeEvaluator.evaluatePreAggregationReferences(cube, aggregation); diff --git a/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts b/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts index 809e31e5c2768..031b166c93713 100644 --- a/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts +++ b/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts @@ -82,6 +82,7 @@ type PreAggregationTimeDimensionReference = { granularity: string, }; +/// Strings in `dimensions`, `measures` and `timeDimensions[*].dimension` can contain full join path, not just `cube.member` type PreAggregationReferences = { allowNonStrictDateRangeMatch?: boolean, dimensions: Array, @@ -735,14 +736,14 @@ export class CubeEvaluator extends CubeSymbols { if (aggregation.timeDimensionReference) { timeDimensions.push({ - dimension: this.evaluateReferences(cube, aggregation.timeDimensionReference), + dimension: this.evaluateReferences(cube, aggregation.timeDimensionReference, { collectJoinHints: true }), granularity: aggregation.granularity }); } else if (aggregation.timeDimensionReferences) { // eslint-disable-next-line guard-for-in for (const timeDimensionReference of aggregation.timeDimensionReferences) { timeDimensions.push({ - dimension: this.evaluateReferences(cube, timeDimensionReference.dimension), + dimension: this.evaluateReferences(cube, timeDimensionReference.dimension, { collectJoinHints: true }), granularity: timeDimensionReference.granularity }); } @@ -751,12 +752,12 @@ export class CubeEvaluator extends CubeSymbols { return { allowNonStrictDateRangeMatch: aggregation.allowNonStrictDateRangeMatch, dimensions: - (aggregation.dimensionReferences && this.evaluateReferences(cube, aggregation.dimensionReferences) || []) + (aggregation.dimensionReferences && this.evaluateReferences(cube, aggregation.dimensionReferences, { collectJoinHints: true }) || []) .concat( - aggregation.segmentReferences && this.evaluateReferences(cube, aggregation.segmentReferences) || [] + aggregation.segmentReferences && this.evaluateReferences(cube, aggregation.segmentReferences, { collectJoinHints: true }) || [] ), measures: - aggregation.measureReferences && this.evaluateReferences(cube, aggregation.measureReferences) || [], + (aggregation.measureReferences && this.evaluateReferences(cube, aggregation.measureReferences, { collectJoinHints: true }) || []), timeDimensions, rollups: aggregation.rollupReferences && this.evaluateReferences(cube, aggregation.rollupReferences, { originalSorting: true }) || [], diff --git a/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts b/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts index ecf31175a1d79..0a45fa329eae5 100644 --- a/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts +++ b/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts @@ -653,6 +653,27 @@ export class CubeSymbols { return array.join('.'); } + /** + * Split join path to member to join hint and member path: `A.B.C.D.E.dim` => `[A, B, C, D, E]` + `E.dim` + * @param path + */ + public joinHintFromPath(path: string): { path: string, joinHint: Array } { + const parts = path.split('.'); + if (parts.length > 2) { + // Path contains join path + const joinHint = parts.slice(0, -1); + return { + path: parts.slice(-2).join('.'), + joinHint, + }; + } else { + return { + path, + joinHint: [], + }; + } + } + protected resolveSymbolsCall( func: (...args: Array) => T | DynamicReference, nameResolver: (id: string) => unknown, diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/pre-aggregations-join-path.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/pre-aggregations-join-path.test.ts new file mode 100644 index 0000000000000..1042a6de1f270 --- /dev/null +++ b/packages/cubejs-schema-compiler/test/integration/postgres/pre-aggregations-join-path.test.ts @@ -0,0 +1,253 @@ +import { PreAggregationPartitionRangeLoader } from '@cubejs-backend/query-orchestrator'; +import { PostgresQuery } from '../../../src/adapter/PostgresQuery'; +import { BigqueryQuery } from '../../../src/adapter/BigqueryQuery'; +import { prepareCompiler } from '../../unit/PrepareCompiler'; +import { dbRunner } from './PostgresDBRunner'; +import { DataSchemaCompiler } from '../../../src/compiler/DataSchemaCompiler'; +import { JoinGraph } from '../../../src/compiler/JoinGraph'; +import { CubeEvaluator } from '../../../src/compiler/CubeEvaluator'; + +describe('PreAggregations join path', () => { + jest.setTimeout(200000); + + let compiler: DataSchemaCompiler; + let joinGraph: JoinGraph; + let cubeEvaluator: CubeEvaluator; + + beforeAll(async () => { + // All joins would look like this + // A-->B-->C-->X + // | ^ + // ├-->D-->E---┤ + // | | + // └-->F-------┘ + // And join declaration would use ADEX path + // It should NOT be the shortest one, nor first in join edges declaration + // All join conditions would be essentially `FALSE`, but with different syntax, to be able to test SQL generation + + // TODO in this model queries like [A.a_id, X.x_id] become ambiguous, probably we want to handle this better + + // language=JavaScript + const prepared = prepareCompiler(` + cube('A', { + sql: 'SELECT 1 AS a_id, 100 AS a_value', + + joins: { + B: { + relationship: 'many_to_one', + sql: "'A' = 'B'", + }, + D: { + relationship: 'many_to_one', + sql: "'A' = 'D'", + }, + F: { + relationship: 'many_to_one', + sql: "'A' = 'F'", + }, + }, + + dimensions: { + a_id: { + type: 'number', + sql: 'a_id', + primaryKey: true, + }, + }, + + measures: { + a_sum: { + sql: 'a_value', + type: 'sum', + }, + }, + + preAggregations: { + adex: { + type: 'rollup', + dimensionReferences: [a_id, CUBE.D.E.X.x_id], + measureReferences: [a_sum, D.E.X.x_sum], + // TODO implement and test segmentReferences + // TODO implement and test timeDimensionReference + }, + }, + }); + + cube('B', { + sql: 'SELECT 1 AS b_id, 100 AS b_value', + + joins: { + C: { + relationship: 'many_to_one', + sql: "'B' = 'C'", + }, + }, + + dimensions: { + b_id: { + type: 'number', + sql: 'b_id', + primaryKey: true, + }, + }, + + measures: { + b_sum: { + sql: 'b_value', + type: 'sum', + }, + }, + }); + + cube('C', { + sql: 'SELECT 1 AS c_id, 100 AS c_value', + + joins: { + X: { + relationship: 'many_to_one', + sql: "'C' = 'X'", + }, + }, + + dimensions: { + c_id: { + type: 'number', + sql: 'c_id', + primaryKey: true, + }, + }, + + measures: { + c_sum: { + sql: 'c_value', + type: 'sum', + }, + }, + }); + + cube('D', { + sql: 'SELECT 1 AS d_id, 100 AS d_value', + + joins: { + E: { + relationship: 'many_to_one', + sql: "'D' = 'E'", + }, + }, + + dimensions: { + d_id: { + type: 'number', + sql: 'd_id', + primaryKey: true, + }, + }, + + measures: { + d_sum: { + sql: 'd_value', + type: 'sum', + }, + }, + }); + + cube('E', { + sql: 'SELECT 1 AS e_id, 100 AS e_value', + + joins: { + X: { + relationship: 'many_to_one', + sql: "'E' = 'X'", + }, + }, + + dimensions: { + e_id: { + type: 'number', + sql: 'e_id', + primaryKey: true, + }, + }, + + measures: { + e_sum: { + sql: 'e_value', + type: 'sum', + }, + }, + }); + + cube('F', { + sql: 'SELECT 1 AS f_id, 100 AS f_value', + + joins: { + X: { + relationship: 'many_to_one', + sql: "'F' = 'X'", + }, + }, + + dimensions: { + f_id: { + type: 'number', + sql: 'f_id', + primaryKey: true, + }, + }, + + measures: { + f_sum: { + sql: 'f_value', + type: 'sum', + }, + }, + }); + + cube('X', { + sql: 'SELECT 1 AS x_id, 100 AS x_value', + + dimensions: { + x_id: { + type: 'number', + sql: 'x_id', + primaryKey: true, + }, + }, + + measures: { + x_sum: { + sql: 'x_value', + type: 'sum', + }, + }, + }); + `); + + ({ compiler, joinGraph, cubeEvaluator } = prepared); + }); + + beforeEach(async () => { + await compiler.compile(); + }); + + it('should respect join path from pre-aggregation declaration', async () => { + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: [], + dimensions: [ + 'A.a_id' + ], + }); + + const preAggregationsDescription: any = query.preAggregations?.preAggregationsDescription(); + const { loadSql } = preAggregationsDescription.find(p => p.preAggregationId === 'A.adex'); + + expect(loadSql[0]).toMatch(/ON 'A' = 'D'/); + expect(loadSql[0]).toMatch(/ON 'D' = 'E'/); + expect(loadSql[0]).toMatch(/ON 'E' = 'X'/); + expect(loadSql[0]).not.toMatch(/ON 'A' = 'B'/); + expect(loadSql[0]).not.toMatch(/ON 'B' = 'C'/); + expect(loadSql[0]).not.toMatch(/ON 'C' = 'X'/); + expect(loadSql[0]).not.toMatch(/ON 'A' = 'F'/); + expect(loadSql[0]).not.toMatch(/ON 'F' = 'X'/); + }); +}); diff --git a/packages/cubejs-server-core/src/core/CompilerApi.js b/packages/cubejs-server-core/src/core/CompilerApi.js index f92ae16e30727..2687d6c6f724f 100644 --- a/packages/cubejs-server-core/src/core/CompilerApi.js +++ b/packages/cubejs-server-core/src/core/CompilerApi.js @@ -425,6 +425,11 @@ export class CompilerApi { } } + /** + * + * @param {unknown} filter + * @returns {Promise>} + */ async preAggregations(filter) { const { cubeEvaluator } = await this.getCompilers(); return cubeEvaluator.preAggregations(filter); From 9549ea9c48c35c850936bbdce4e299535ab78811 Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Tue, 15 Apr 2025 14:42:10 +0200 Subject: [PATCH 02/15] fix(schema-compiler): Use proper stub to prepare compiler in tests --- .../integration/postgres/pre-aggregations-join-path.test.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/pre-aggregations-join-path.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/pre-aggregations-join-path.test.ts index 1042a6de1f270..ef13d422f8b92 100644 --- a/packages/cubejs-schema-compiler/test/integration/postgres/pre-aggregations-join-path.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/postgres/pre-aggregations-join-path.test.ts @@ -1,7 +1,7 @@ import { PreAggregationPartitionRangeLoader } from '@cubejs-backend/query-orchestrator'; import { PostgresQuery } from '../../../src/adapter/PostgresQuery'; import { BigqueryQuery } from '../../../src/adapter/BigqueryQuery'; -import { prepareCompiler } from '../../unit/PrepareCompiler'; +import { prepareJsCompiler } from '../../unit/PrepareCompiler'; import { dbRunner } from './PostgresDBRunner'; import { DataSchemaCompiler } from '../../../src/compiler/DataSchemaCompiler'; import { JoinGraph } from '../../../src/compiler/JoinGraph'; @@ -28,7 +28,7 @@ describe('PreAggregations join path', () => { // TODO in this model queries like [A.a_id, X.x_id] become ambiguous, probably we want to handle this better // language=JavaScript - const prepared = prepareCompiler(` + const prepared = prepareJsCompiler(` cube('A', { sql: 'SELECT 1 AS a_id, 100 AS a_value', From 402b45a0c7eac5300bb6e95ae466574635893874 Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Thu, 10 Apr 2025 19:37:18 +0200 Subject: [PATCH 03/15] fix(schema-compiler): Proper types for byPathAnyType argument --- packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts b/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts index 031b166c93713..a608a579049fd 100644 --- a/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts +++ b/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts @@ -632,7 +632,7 @@ export class CubeEvaluator extends CubeSymbols { return symbol !== undefined; } - public byPathAnyType(path: string[]) { + public byPathAnyType(path: string | string[]) { if (this.isInstanceOfType('measures', path)) { return this.byPath('measures', path); } @@ -645,7 +645,7 @@ export class CubeEvaluator extends CubeSymbols { return this.byPath('segments', path); } - throw new UserError(`Can't resolve member '${path.join('.')}'`); + throw new UserError(`Can't resolve member '${Array.isArray(path) ? path.join('.') : path}'`); } public byPath(type: 'measures' | 'dimensions' | 'segments', path: string | string[]) { From f460aee1c8f0b7c5d99b9793a07446ab928908bb Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Tue, 15 Apr 2025 14:33:47 +0200 Subject: [PATCH 04/15] fix(schema-compiler): Proper type for canUsePreAggregationForTransformedQueryFn --- packages/cubejs-schema-compiler/src/adapter/PreAggregations.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/PreAggregations.js b/packages/cubejs-schema-compiler/src/adapter/PreAggregations.js index 364531eb24fa5..85c8931a7d3ec 100644 --- a/packages/cubejs-schema-compiler/src/adapter/PreAggregations.js +++ b/packages/cubejs-schema-compiler/src/adapter/PreAggregations.js @@ -486,7 +486,7 @@ export class PreAggregations { * Returns function to determine whether pre-aggregation can be used or not * for specified query, or its value for `refs` if specified. * @param {Object} transformedQuery transformed query - * @param {Object?} refs pre-aggs reference + * @param {PreAggregationReferences?} refs pre-aggs reference * @returns {function(preagg: Object): boolean} */ static canUsePreAggregationForTransformedQueryFn(transformedQuery, refs) { From 9bfa94cf104765e446f0eb0638640cd8058c8b4f Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Tue, 15 Apr 2025 14:34:39 +0200 Subject: [PATCH 05/15] refactor(schema-compiler): Export pre-aggregation types from CubeEvaluator --- packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts b/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts index a608a579049fd..71655109e9926 100644 --- a/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts +++ b/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts @@ -83,7 +83,7 @@ type PreAggregationTimeDimensionReference = { }; /// Strings in `dimensions`, `measures` and `timeDimensions[*].dimension` can contain full join path, not just `cube.member` -type PreAggregationReferences = { +export type PreAggregationReferences = { allowNonStrictDateRangeMatch?: boolean, dimensions: Array, measures: Array, @@ -91,7 +91,7 @@ type PreAggregationReferences = { rollups: Array, }; -type PreAggregationInfo = { +export type PreAggregationInfo = { id: string, preAggregationName: string, preAggregation: unknown, From 841e943c17d36dd8bba8e6f754d3c447ab2d332c Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Tue, 15 Apr 2025 14:36:57 +0200 Subject: [PATCH 06/15] refactor(schema-compiler): Use async-await in preagg matching tests --- .../test/unit/pre-agg-by-filter-match.test.ts | 42 +++++++++---------- .../test/unit/pre-agg-time-dim-match.test.ts | 35 ++++++++-------- 2 files changed, 38 insertions(+), 39 deletions(-) diff --git a/packages/cubejs-schema-compiler/test/unit/pre-agg-by-filter-match.test.ts b/packages/cubejs-schema-compiler/test/unit/pre-agg-by-filter-match.test.ts index b5e47d238f2ee..a54aa8f8282d6 100644 --- a/packages/cubejs-schema-compiler/test/unit/pre-agg-by-filter-match.test.ts +++ b/packages/cubejs-schema-compiler/test/unit/pre-agg-by-filter-match.test.ts @@ -21,7 +21,7 @@ describe('Pre Aggregation by filter match tests', () => { return prepareCube('cube', cube); } - function testPreAggregationMatch( + async function testPreAggregationMatch( expecting: boolean, cubedimensions: Array, preAggdimensions: Array, @@ -60,27 +60,27 @@ describe('Pre Aggregation by filter match tests', () => { aaa.sortedDimensions.sort(); aaa.sortedTimeDimensions = [[aaa.timeDimension, aaa.granularity]]; - return compiler.compile().then(() => { - const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { - dimensions: querydimensions.map(d => `cube.${d}`), - measures: ['cube.uniqueField'], - timeDimensions: [{ - dimension: 'cube.created', - granularity: 'day', - dateRange: { from: '2017-01-01', to: '2017-01-30' } - }], - timezone: 'America/Los_Angeles', - filters, - segments: querySegments?.map(s => `cube.${s}`), - }); + await compiler.compile(); - const usePreAggregation = PreAggregations.canUsePreAggregationForTransformedQueryFn( - PreAggregations.transformQueryToCanUseForm(query), - aaa - ); - - expect(usePreAggregation).toEqual(expecting); + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + dimensions: querydimensions.map(d => `cube.${d}`), + measures: ['cube.uniqueField'], + timeDimensions: [{ + dimension: 'cube.created', + granularity: 'day', + dateRange: { from: '2017-01-01', to: '2017-01-30' } + }], + timezone: 'America/Los_Angeles', + filters, + segments: querySegments?.map(s => `cube.${s}`), }); + + const usePreAggregation = PreAggregations.canUsePreAggregationForTransformedQueryFn( + PreAggregations.transformQueryToCanUseForm(query), + aaa + ); + + expect(usePreAggregation).toEqual(expecting); } it('1 Dimension, 1 Filter', () => testPreAggregationMatch( @@ -147,7 +147,7 @@ describe('Pre Aggregation by filter match tests', () => { }, ] )); - + it('1 Dimension, 1 Filter, gt', () => testPreAggregationMatch( false, ['type'], diff --git a/packages/cubejs-schema-compiler/test/unit/pre-agg-time-dim-match.test.ts b/packages/cubejs-schema-compiler/test/unit/pre-agg-time-dim-match.test.ts index 32f644fdd6a34..12fca27127f44 100644 --- a/packages/cubejs-schema-compiler/test/unit/pre-agg-time-dim-match.test.ts +++ b/packages/cubejs-schema-compiler/test/unit/pre-agg-time-dim-match.test.ts @@ -33,7 +33,7 @@ describe('Pre Aggregation by filter match tests', () => { return prepareCube('cube', cube); } - function testPreAggregationMatch( + async function testPreAggregationMatch( expecting: boolean, measures: Array, preAggTimeGranularity: string, @@ -69,24 +69,23 @@ describe('Pre Aggregation by filter match tests', () => { // aaa.sortedDimensions.sort(); aaa.sortedTimeDimensions = [[aaa.timeDimension, aaa.granularity, 'day']]; - return compiler.compile().then(() => { - const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { - measures: measures.map(m => `cube.${m}`), - timeDimensions: [{ - dimension: 'cube.created', - granularity: queryAggTimeGranularity, - dateRange, - }], - timezone: queryTimeZone, - }); - - const usePreAggregation = PreAggregations.canUsePreAggregationForTransformedQueryFn( - PreAggregations.transformQueryToCanUseForm(query), - aaa - ); - - expect(usePreAggregation).toEqual(expecting); + await compiler.compile(); + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: measures.map(m => `cube.${m}`), + timeDimensions: [{ + dimension: 'cube.created', + granularity: queryAggTimeGranularity, + dateRange, + }], + timezone: queryTimeZone, }); + + const usePreAggregation = PreAggregations.canUsePreAggregationForTransformedQueryFn( + PreAggregations.transformQueryToCanUseForm(query), + aaa + ); + + expect(usePreAggregation).toEqual(expecting); } it('1 count measure, day, day', () => testPreAggregationMatch( From 247a2d624baa2a7c04bcbbb5cb1c92570cf60dbb Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Tue, 15 Apr 2025 14:45:56 +0200 Subject: [PATCH 07/15] refactor(schema-compiler): Simplify cube definition in preagg matching tests --- .../test/unit/pre-agg-by-filter-match.test.ts | 7 +------ .../test/unit/pre-agg-time-dim-match.test.ts | 7 +------ 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/packages/cubejs-schema-compiler/test/unit/pre-agg-by-filter-match.test.ts b/packages/cubejs-schema-compiler/test/unit/pre-agg-by-filter-match.test.ts index a54aa8f8282d6..22c8753c1b22a 100644 --- a/packages/cubejs-schema-compiler/test/unit/pre-agg-by-filter-match.test.ts +++ b/packages/cubejs-schema-compiler/test/unit/pre-agg-by-filter-match.test.ts @@ -44,15 +44,10 @@ describe('Pre Aggregation by filter match tests', () => { segments: { qqq: { sql: 'id > 10000' } }, - dimensions: {}, + dimensions: Object.fromEntries(cubedimensions.map(d => [d, { sql: d, type: 'string' }])), preAggregations: { aaa } }; - cubedimensions.forEach(d => { - // @ts-ignore - cube.dimensions[d] = { sql: d, type: 'string' }; - }); - const { compiler, joinGraph, cubeEvaluator } = getCube(cube); if (aaa.segments) aaa.dimensions = aaa.dimensions.concat(aaa.segments); diff --git a/packages/cubejs-schema-compiler/test/unit/pre-agg-time-dim-match.test.ts b/packages/cubejs-schema-compiler/test/unit/pre-agg-time-dim-match.test.ts index 12fca27127f44..2a52d99ac40cc 100644 --- a/packages/cubejs-schema-compiler/test/unit/pre-agg-time-dim-match.test.ts +++ b/packages/cubejs-schema-compiler/test/unit/pre-agg-time-dim-match.test.ts @@ -54,15 +54,10 @@ describe('Pre Aggregation by filter match tests', () => { const cube: any = { dimensions: {}, - measures: {}, + measures: Object.fromEntries(measures.map(m => [m, { type: m, sql: m }])), preAggregations: { aaa } }; - measures.forEach(m => { - // @ts-ignore - cube.measures[m] = { type: m, sql: m }; - }); - const { compiler, joinGraph, cubeEvaluator } = getCube(cube); // aaa.sortedDimensions = aaa.dimensions; From 783628fcb2650b5b45b09cac8515671942dc2c3e Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Tue, 15 Apr 2025 14:50:16 +0200 Subject: [PATCH 08/15] refactor(schema-compiler): More static types in preagg matching tests --- .../test/unit/pre-agg-by-filter-match.test.ts | 22 ++++++++++++------- .../test/unit/pre-agg-time-dim-match.test.ts | 21 ++++++++++++------ 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/packages/cubejs-schema-compiler/test/unit/pre-agg-by-filter-match.test.ts b/packages/cubejs-schema-compiler/test/unit/pre-agg-by-filter-match.test.ts index 22c8753c1b22a..9453b10fad02a 100644 --- a/packages/cubejs-schema-compiler/test/unit/pre-agg-by-filter-match.test.ts +++ b/packages/cubejs-schema-compiler/test/unit/pre-agg-by-filter-match.test.ts @@ -1,6 +1,7 @@ import { PostgresQuery } from '../../src/adapter/PostgresQuery'; import { prepareCube } from './PrepareCompiler'; import { PreAggregations } from '../../src/adapter/PreAggregations'; +import { PreAggregationReferences } from '../../src/compiler/CubeEvaluator'; describe('Pre Aggregation by filter match tests', () => { function getCube(cube) { @@ -30,7 +31,7 @@ describe('Pre Aggregation by filter match tests', () => { preAggSegments: Array | undefined = undefined, querySegments: Array | undefined = undefined ) { - const aaa: any = { + const testPreAgg = { type: 'rollup', measures: ['cube.uniqueField'], dimensions: preAggdimensions.map(d => `cube.${d}`), @@ -40,20 +41,25 @@ describe('Pre Aggregation by filter match tests', () => { partitionGranularity: 'month', }; - const cube: any = { + const cube = { segments: { qqq: { sql: 'id > 10000' } }, dimensions: Object.fromEntries(cubedimensions.map(d => [d, { sql: d, type: 'string' }])), - preAggregations: { aaa } + preAggregations: { testPreAgg } }; const { compiler, joinGraph, cubeEvaluator } = getCube(cube); - if (aaa.segments) aaa.dimensions = aaa.dimensions.concat(aaa.segments); - aaa.sortedDimensions = aaa.dimensions; - aaa.sortedDimensions.sort(); - aaa.sortedTimeDimensions = [[aaa.timeDimension, aaa.granularity]]; + const refs: PreAggregationReferences = { + dimensions: testPreAgg.segments ? testPreAgg.dimensions.concat(testPreAgg.segments) : testPreAgg.dimensions, + measures: testPreAgg.measures, + timeDimensions: [{ + dimension: testPreAgg.timeDimension, + granularity: testPreAgg.granularity, + }], + rollups: [], + }; await compiler.compile(); @@ -72,7 +78,7 @@ describe('Pre Aggregation by filter match tests', () => { const usePreAggregation = PreAggregations.canUsePreAggregationForTransformedQueryFn( PreAggregations.transformQueryToCanUseForm(query), - aaa + refs ); expect(usePreAggregation).toEqual(expecting); diff --git a/packages/cubejs-schema-compiler/test/unit/pre-agg-time-dim-match.test.ts b/packages/cubejs-schema-compiler/test/unit/pre-agg-time-dim-match.test.ts index 2a52d99ac40cc..d2c23af1dfb97 100644 --- a/packages/cubejs-schema-compiler/test/unit/pre-agg-time-dim-match.test.ts +++ b/packages/cubejs-schema-compiler/test/unit/pre-agg-time-dim-match.test.ts @@ -1,6 +1,7 @@ import { PostgresQuery } from '../../src/adapter/PostgresQuery'; import { prepareCube } from './PrepareCompiler'; import { PreAggregations } from '../../src/adapter/PreAggregations'; +import { PreAggregationReferences } from "../../src/compiler/CubeEvaluator"; describe('Pre Aggregation by filter match tests', () => { function getCube(cube) { @@ -42,7 +43,7 @@ describe('Pre Aggregation by filter match tests', () => { dateRange: [ string, string ] = ['2017-01-01', '2017-03-31'], allowNonStrictDateRangeMatch: boolean = false ) { - const aaa: any = { + const testPreAgg = { type: 'rollup', dimensions: [], measures: measures.map(m => `cube.${m}`), @@ -52,17 +53,23 @@ describe('Pre Aggregation by filter match tests', () => { allowNonStrictDateRangeMatch }; - const cube: any = { + const cube = { dimensions: {}, measures: Object.fromEntries(measures.map(m => [m, { type: m, sql: m }])), - preAggregations: { aaa } + preAggregations: { testPreAgg } }; const { compiler, joinGraph, cubeEvaluator } = getCube(cube); - // aaa.sortedDimensions = aaa.dimensions; - // aaa.sortedDimensions.sort(); - aaa.sortedTimeDimensions = [[aaa.timeDimension, aaa.granularity, 'day']]; + const refs: PreAggregationReferences = { + dimensions: testPreAgg.dimensions, + measures: testPreAgg.measures, + timeDimensions: [{ + dimension: testPreAgg.timeDimension, + granularity: testPreAgg.granularity, + }], + rollups: [], + }; await compiler.compile(); const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { @@ -77,7 +84,7 @@ describe('Pre Aggregation by filter match tests', () => { const usePreAggregation = PreAggregations.canUsePreAggregationForTransformedQueryFn( PreAggregations.transformQueryToCanUseForm(query), - aaa + refs ); expect(usePreAggregation).toEqual(expecting); From 153556e4cd73d61cac96605aaeabd8fbd31a724f Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Fri, 11 Apr 2025 16:12:50 +0200 Subject: [PATCH 09/15] refactor(query-orchestrator): Simplify expandPartitionsInPreAggregations --- .../src/orchestrator/PreAggregations.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/cubejs-query-orchestrator/src/orchestrator/PreAggregations.ts b/packages/cubejs-query-orchestrator/src/orchestrator/PreAggregations.ts index bf0f480e01821..453e9546ff017 100644 --- a/packages/cubejs-query-orchestrator/src/orchestrator/PreAggregations.ts +++ b/packages/cubejs-query-orchestrator/src/orchestrator/PreAggregations.ts @@ -590,7 +590,7 @@ export class PreAggregations { return { ...queryBody, - preAggregations: expandedPreAggregations.reduce((a, b) => a.concat(b), []), + preAggregations: expandedPreAggregations.flat(), groupedPartitionPreAggregations: expandedPreAggregations }; } From 8f2475f97b84e9d6e3833a87ce9cb70ee6f35529 Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Fri, 11 Apr 2025 16:13:59 +0200 Subject: [PATCH 10/15] refactor(schema-compiler): More types for BaseQuery --- .../cubejs-schema-compiler/src/adapter/BaseQuery.js | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js index a011d3a95332f..54e67fa9b674b 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js +++ b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js @@ -4350,6 +4350,11 @@ export class BaseQuery { }); } + /** + * + * @param {boolean} excludeSegments + * @returns {Array} + */ flattenAllMembers(excludeSegments = false) { return R.flatten( this.measures @@ -4370,6 +4375,11 @@ export class BaseQuery { return this.backAliasMembers(this.flattenAllMembers()); } + /** + * + * @param {Array} members + * @returns {Record} + */ backAliasMembers(members) { const query = this; return members.map( From 208d526b6b664f5b960e753e92d3999ac98e46ce Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Fri, 11 Apr 2025 16:14:09 +0200 Subject: [PATCH 11/15] refactor(schema-compiler): Simplify backAliasMembers --- packages/cubejs-schema-compiler/src/adapter/BaseQuery.js | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js index 54e67fa9b674b..5263725c72d29 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js +++ b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js @@ -4382,7 +4382,7 @@ export class BaseQuery { */ backAliasMembers(members) { const query = this; - return members.map( + return Object.fromEntries(members.flatMap( member => { const collectedMembers = query.evaluateSymbolSqlWithContext( () => query.collectFrom([member], query.collectMemberNamesFor.bind(query), 'collectMemberNamesFor'), @@ -4397,10 +4397,8 @@ export class BaseQuery { } return !nonAliasSeen; }) - .map(d => ( - { [query.cubeEvaluator.byPathAnyType(d).aliasMember]: memberPath } - )).reduce((a, b) => ({ ...a, ...b }), {}); + .map(d => [query.cubeEvaluator.byPathAnyType(d).aliasMember, memberPath]); } - ).reduce((a, b) => ({ ...a, ...b }), {}); + )); } } From 1ce9a360f07b6e41760f9b873ff63eeaa5a068a9 Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Thu, 10 Apr 2025 20:32:08 +0200 Subject: [PATCH 12/15] refactor(schema-compiler): Move join path in pre-agg tests to multiple join paths tests --- .../postgres/multiple-join-paths.test.ts | 33 +++ .../pre-aggregations-join-path.test.ts | 253 ------------------ 2 files changed, 33 insertions(+), 253 deletions(-) delete mode 100644 packages/cubejs-schema-compiler/test/integration/postgres/pre-aggregations-join-path.test.ts diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/multiple-join-paths.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/multiple-join-paths.test.ts index 07d59f858ac8b..1783df21872fe 100644 --- a/packages/cubejs-schema-compiler/test/integration/postgres/multiple-join-paths.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/postgres/multiple-join-paths.test.ts @@ -59,6 +59,16 @@ describe('Multiple join paths', () => { type: 'sum', }, }, + + preAggregations: { + adex: { + type: 'rollup', + dimensionReferences: [a_id, CUBE.D.E.X.x_id], + measureReferences: [a_sum, D.E.X.x_sum], + // TODO implement and test segmentReferences + // TODO implement and test timeDimensionReference + }, + }, }); cube('B', { @@ -268,4 +278,27 @@ describe('Multiple join paths', () => { expect(sql).not.toMatch(/ON 'F' = 'X'/); }); }); + + describe('PreAggregations join path', () => { + it('should respect join path from pre-aggregation declaration', async () => { + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: [], + dimensions: [ + 'A.a_id' + ], + }); + + const preAggregationsDescription: any = query.preAggregations?.preAggregationsDescription(); + const { loadSql } = preAggregationsDescription.find(p => p.preAggregationId === 'A.adex'); + + expect(loadSql[0]).toMatch(/ON 'A' = 'D'/); + expect(loadSql[0]).toMatch(/ON 'D' = 'E'/); + expect(loadSql[0]).toMatch(/ON 'E' = 'X'/); + expect(loadSql[0]).not.toMatch(/ON 'A' = 'B'/); + expect(loadSql[0]).not.toMatch(/ON 'B' = 'C'/); + expect(loadSql[0]).not.toMatch(/ON 'C' = 'X'/); + expect(loadSql[0]).not.toMatch(/ON 'A' = 'F'/); + expect(loadSql[0]).not.toMatch(/ON 'F' = 'X'/); + }); + }); }); diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/pre-aggregations-join-path.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/pre-aggregations-join-path.test.ts deleted file mode 100644 index ef13d422f8b92..0000000000000 --- a/packages/cubejs-schema-compiler/test/integration/postgres/pre-aggregations-join-path.test.ts +++ /dev/null @@ -1,253 +0,0 @@ -import { PreAggregationPartitionRangeLoader } from '@cubejs-backend/query-orchestrator'; -import { PostgresQuery } from '../../../src/adapter/PostgresQuery'; -import { BigqueryQuery } from '../../../src/adapter/BigqueryQuery'; -import { prepareJsCompiler } from '../../unit/PrepareCompiler'; -import { dbRunner } from './PostgresDBRunner'; -import { DataSchemaCompiler } from '../../../src/compiler/DataSchemaCompiler'; -import { JoinGraph } from '../../../src/compiler/JoinGraph'; -import { CubeEvaluator } from '../../../src/compiler/CubeEvaluator'; - -describe('PreAggregations join path', () => { - jest.setTimeout(200000); - - let compiler: DataSchemaCompiler; - let joinGraph: JoinGraph; - let cubeEvaluator: CubeEvaluator; - - beforeAll(async () => { - // All joins would look like this - // A-->B-->C-->X - // | ^ - // ├-->D-->E---┤ - // | | - // └-->F-------┘ - // And join declaration would use ADEX path - // It should NOT be the shortest one, nor first in join edges declaration - // All join conditions would be essentially `FALSE`, but with different syntax, to be able to test SQL generation - - // TODO in this model queries like [A.a_id, X.x_id] become ambiguous, probably we want to handle this better - - // language=JavaScript - const prepared = prepareJsCompiler(` - cube('A', { - sql: 'SELECT 1 AS a_id, 100 AS a_value', - - joins: { - B: { - relationship: 'many_to_one', - sql: "'A' = 'B'", - }, - D: { - relationship: 'many_to_one', - sql: "'A' = 'D'", - }, - F: { - relationship: 'many_to_one', - sql: "'A' = 'F'", - }, - }, - - dimensions: { - a_id: { - type: 'number', - sql: 'a_id', - primaryKey: true, - }, - }, - - measures: { - a_sum: { - sql: 'a_value', - type: 'sum', - }, - }, - - preAggregations: { - adex: { - type: 'rollup', - dimensionReferences: [a_id, CUBE.D.E.X.x_id], - measureReferences: [a_sum, D.E.X.x_sum], - // TODO implement and test segmentReferences - // TODO implement and test timeDimensionReference - }, - }, - }); - - cube('B', { - sql: 'SELECT 1 AS b_id, 100 AS b_value', - - joins: { - C: { - relationship: 'many_to_one', - sql: "'B' = 'C'", - }, - }, - - dimensions: { - b_id: { - type: 'number', - sql: 'b_id', - primaryKey: true, - }, - }, - - measures: { - b_sum: { - sql: 'b_value', - type: 'sum', - }, - }, - }); - - cube('C', { - sql: 'SELECT 1 AS c_id, 100 AS c_value', - - joins: { - X: { - relationship: 'many_to_one', - sql: "'C' = 'X'", - }, - }, - - dimensions: { - c_id: { - type: 'number', - sql: 'c_id', - primaryKey: true, - }, - }, - - measures: { - c_sum: { - sql: 'c_value', - type: 'sum', - }, - }, - }); - - cube('D', { - sql: 'SELECT 1 AS d_id, 100 AS d_value', - - joins: { - E: { - relationship: 'many_to_one', - sql: "'D' = 'E'", - }, - }, - - dimensions: { - d_id: { - type: 'number', - sql: 'd_id', - primaryKey: true, - }, - }, - - measures: { - d_sum: { - sql: 'd_value', - type: 'sum', - }, - }, - }); - - cube('E', { - sql: 'SELECT 1 AS e_id, 100 AS e_value', - - joins: { - X: { - relationship: 'many_to_one', - sql: "'E' = 'X'", - }, - }, - - dimensions: { - e_id: { - type: 'number', - sql: 'e_id', - primaryKey: true, - }, - }, - - measures: { - e_sum: { - sql: 'e_value', - type: 'sum', - }, - }, - }); - - cube('F', { - sql: 'SELECT 1 AS f_id, 100 AS f_value', - - joins: { - X: { - relationship: 'many_to_one', - sql: "'F' = 'X'", - }, - }, - - dimensions: { - f_id: { - type: 'number', - sql: 'f_id', - primaryKey: true, - }, - }, - - measures: { - f_sum: { - sql: 'f_value', - type: 'sum', - }, - }, - }); - - cube('X', { - sql: 'SELECT 1 AS x_id, 100 AS x_value', - - dimensions: { - x_id: { - type: 'number', - sql: 'x_id', - primaryKey: true, - }, - }, - - measures: { - x_sum: { - sql: 'x_value', - type: 'sum', - }, - }, - }); - `); - - ({ compiler, joinGraph, cubeEvaluator } = prepared); - }); - - beforeEach(async () => { - await compiler.compile(); - }); - - it('should respect join path from pre-aggregation declaration', async () => { - const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { - measures: [], - dimensions: [ - 'A.a_id' - ], - }); - - const preAggregationsDescription: any = query.preAggregations?.preAggregationsDescription(); - const { loadSql } = preAggregationsDescription.find(p => p.preAggregationId === 'A.adex'); - - expect(loadSql[0]).toMatch(/ON 'A' = 'D'/); - expect(loadSql[0]).toMatch(/ON 'D' = 'E'/); - expect(loadSql[0]).toMatch(/ON 'E' = 'X'/); - expect(loadSql[0]).not.toMatch(/ON 'A' = 'B'/); - expect(loadSql[0]).not.toMatch(/ON 'B' = 'C'/); - expect(loadSql[0]).not.toMatch(/ON 'C' = 'X'/); - expect(loadSql[0]).not.toMatch(/ON 'A' = 'F'/); - expect(loadSql[0]).not.toMatch(/ON 'F' = 'X'/); - }); -}); From 6673b230fee23e2c213df1b32337296e67b1b770 Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Tue, 15 Apr 2025 10:18:42 +0200 Subject: [PATCH 13/15] refactor(schema-compiler): Move code around to solve circular dependency between CubeSymbols and BaseQuery --- .../src/adapter/BaseQuery.js | 56 ++----------------- .../src/compiler/CubeEvaluator.ts | 2 +- .../src/compiler/CubeSymbols.ts | 50 ++++++++++++++++- .../src/compiler/CubeToMetaTransformer.js | 5 +- 4 files changed, 57 insertions(+), 56 deletions(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js index 5263725c72d29..c97e7f2d3ac8d 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js +++ b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js @@ -24,6 +24,7 @@ import { timeSeriesFromCustomInterval } from '@cubejs-backend/shared'; +import { CubeSymbols } from "../compiler/CubeSymbols"; import { UserError } from '../compiler/UserError'; import { SqlParser } from '../parser/SqlParser'; import { BaseDimension } from './BaseDimension'; @@ -1411,7 +1412,7 @@ export class BaseQuery { // TODO condition should something else instead of rank multiStageQuery: !!withQuery.measures.find(d => { const { type } = this.newMeasure(d).definition(); - return type === 'rank' || BaseQuery.isCalculatedMeasureType(type); + return type === 'rank' || CubeSymbols.isCalculatedMeasureType(type); }), disableExternalPreAggregations: true, }; @@ -2946,7 +2947,7 @@ export class BaseQuery { funDef = this.countDistinctApprox(evaluateSql); } else if (symbol.type === 'countDistinct' || symbol.type === 'count' && !symbol.sql && multiplied) { funDef = `count(distinct ${evaluateSql})`; - } else if (BaseQuery.isCalculatedMeasureType(symbol.type)) { + } else if (CubeSymbols.isCalculatedMeasureType(symbol.type)) { // TODO calculated measure type will be ungrouped // if (this.multiStageDimensions.length !== this.dimensions.length) { // throw new UserError(`Calculated measure '${measurePath}' uses group_by or reduce_by context modifiers while it isn't allowed`); @@ -2972,23 +2973,12 @@ export class BaseQuery { return this.primaryKeyCount(cubeName, true); } } - if (BaseQuery.isCalculatedMeasureType(symbol.type)) { + if (CubeSymbols.isCalculatedMeasureType(symbol.type)) { return evaluateSql; } return `${symbol.type}(${evaluateSql})`; } - static isCalculatedMeasureType(type) { - return type === 'number' || type === 'string' || type === 'time' || type === 'boolean'; - } - - /** - TODO: support type qualifiers on min and max - */ - static toMemberDataType(type) { - return this.isCalculatedMeasureType(type) ? type : 'number'; - } - aggregateOnGroupedColumn(symbol, evaluateSql, topLevelMerge, measurePath) { const cumulativeMeasureFilters = (this.safeEvaluateSymbolContext().cumulativeMeasureFilters || {})[measurePath]; if (cumulativeMeasureFilters) { @@ -4095,7 +4085,7 @@ export class BaseQuery { sqlUtils: { convertTz: (field) => field, }, - securityContext: BaseQuery.contextSymbolsProxyFrom({}, allocateParam), + securityContext: CubeSymbols.contextSymbolsProxyFrom({}, allocateParam), }; } @@ -4110,41 +4100,7 @@ export class BaseQuery { } contextSymbolsProxy(symbols) { - return BaseQuery.contextSymbolsProxyFrom(symbols, this.paramAllocator.allocateParam.bind(this.paramAllocator)); - } - - static contextSymbolsProxyFrom(symbols, allocateParam) { - return new Proxy(symbols, { - get: (target, name) => { - const propValue = target[name]; - const methods = (paramValue) => ({ - filter: (column) => { - if (paramValue) { - const value = Array.isArray(paramValue) ? - paramValue.map(allocateParam) : - allocateParam(paramValue); - if (typeof column === 'function') { - return column(value); - } else { - return `${column} = ${value}`; - } - } else { - return '1 = 1'; - } - }, - requiredFilter: (column) => { - if (!paramValue) { - throw new UserError(`Filter for ${column} is required`); - } - return methods(paramValue).filter(column); - }, - unsafeValue: () => paramValue - }); - return methods(target)[name] || - typeof propValue === 'object' && propValue !== null && BaseQuery.contextSymbolsProxyFrom(propValue, allocateParam) || - methods(propValue); - } - }); + return CubeSymbols.contextSymbolsProxyFrom(symbols, this.paramAllocator.allocateParam.bind(this.paramAllocator)); } static extractFilterMembers(filter) { diff --git a/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts b/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts index 71655109e9926..b95b75f92714f 100644 --- a/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts +++ b/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts @@ -468,7 +468,7 @@ export class CubeEvaluator extends CubeSymbols { ownedByCube = false; } // Aliases one to one some another member as in case of views - if (!ownedByCube && !member.filters && BaseQuery.isCalculatedMeasureType(member.type) && pathReferencesUsed.length === 1 && this.pathFromArray(pathReferencesUsed[0]) === evaluatedSql) { + if (!ownedByCube && !member.filters && CubeSymbols.isCalculatedMeasureType(member.type) && pathReferencesUsed.length === 1 && this.pathFromArray(pathReferencesUsed[0]) === evaluatedSql) { aliasMember = this.pathFromArray(pathReferencesUsed[0]); } const foreignCubes = cubeReferencesUsed.filter(usedCube => usedCube !== cube.name); diff --git a/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts b/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts index 0a45fa329eae5..286a8e822198b 100644 --- a/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts +++ b/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts @@ -5,7 +5,6 @@ import { camelize } from 'inflection'; import { UserError } from './UserError'; import { DynamicReference } from './DynamicReference'; import { camelizeCube } from './utils'; -import { BaseQuery } from '../adapter'; import type { ErrorReporter } from './ErrorReporter'; @@ -536,7 +535,7 @@ export class CubeSymbols { if (type === 'measures') { memberDefinition = { sql, - type: BaseQuery.toMemberDataType(resolvedMember.type), + type: CubeSymbols.toMemberDataType(resolvedMember.type), aggType: resolvedMember.type, meta: resolvedMember.meta, title: resolvedMember.title, @@ -755,7 +754,7 @@ export class CubeSymbols { return Object.assign({ filterParams: this.filtersProxyDep(), filterGroup: this.filterGroupFunctionDep(), - securityContext: BaseQuery.contextSymbolsProxyFrom({}, (param) => param), + securityContext: CubeSymbols.contextSymbolsProxyFrom({}, (param) => param), sqlUtils: { convertTz: (f) => f @@ -999,4 +998,49 @@ export class CubeSymbols { public isCurrentCube(name) { return CURRENT_CUBE_CONSTANTS.indexOf(name) >= 0; } + + public static isCalculatedMeasureType(type: string): boolean { + return type === 'number' || type === 'string' || type === 'time' || type === 'boolean'; + } + + /** + TODO: support type qualifiers on min and max + */ + public static toMemberDataType(type: string): string { + return this.isCalculatedMeasureType(type) ? type : 'number'; + } + + public static contextSymbolsProxyFrom(symbols: object, allocateParam: (param: unknown) => unknown): object { + return new Proxy(symbols, { + get: (target, name) => { + const propValue = target[name]; + const methods = (paramValue) => ({ + filter: (column) => { + if (paramValue) { + const value = Array.isArray(paramValue) ? + paramValue.map(allocateParam) : + allocateParam(paramValue); + if (typeof column === 'function') { + return column(value); + } else { + return `${column} = ${value}`; + } + } else { + return '1 = 1'; + } + }, + requiredFilter: (column) => { + if (!paramValue) { + throw new UserError(`Filter for ${column} is required`); + } + return methods(paramValue).filter(column); + }, + unsafeValue: () => paramValue + }); + return methods(target)[name] || + typeof propValue === 'object' && propValue !== null && CubeSymbols.contextSymbolsProxyFrom(propValue, allocateParam) || + methods(propValue); + } + }); + } } diff --git a/packages/cubejs-schema-compiler/src/compiler/CubeToMetaTransformer.js b/packages/cubejs-schema-compiler/src/compiler/CubeToMetaTransformer.js index e879b4a095d61..5f57a8b6a615b 100644 --- a/packages/cubejs-schema-compiler/src/compiler/CubeToMetaTransformer.js +++ b/packages/cubejs-schema-compiler/src/compiler/CubeToMetaTransformer.js @@ -2,8 +2,9 @@ import inflection from 'inflection'; import R from 'ramda'; import camelCase from 'camelcase'; +import { CubeSymbols } from './CubeSymbols'; import { UserError } from './UserError'; -import { BaseMeasure, BaseQuery } from '../adapter'; +import { BaseMeasure } from '../adapter'; export class CubeToMetaTransformer { /** @@ -168,7 +169,7 @@ export class CubeToMetaTransformer { cubeName, drillMembers, { originalSorting: true } )) || []; - const type = BaseQuery.toMemberDataType(nameToMetric[1].type); + const type = CubeSymbols.toMemberDataType(nameToMetric[1].type); return { name, From 1a0cbdd83d248dc7c13fa174db9634aa7d8e249f Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Tue, 15 Apr 2025 10:13:18 +0200 Subject: [PATCH 14/15] refactor(schema-compiler): Make joinHintFromPath static --- packages/cubejs-schema-compiler/src/adapter/BaseDimension.ts | 3 ++- packages/cubejs-schema-compiler/src/adapter/BaseMeasure.ts | 3 ++- packages/cubejs-schema-compiler/src/adapter/BaseSegment.ts | 3 ++- .../cubejs-schema-compiler/src/adapter/PreAggregations.js | 5 +++-- packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts | 2 +- 5 files changed, 10 insertions(+), 6 deletions(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseDimension.ts b/packages/cubejs-schema-compiler/src/adapter/BaseDimension.ts index 0c045b6e1cb01..2332bac65f253 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseDimension.ts +++ b/packages/cubejs-schema-compiler/src/adapter/BaseDimension.ts @@ -1,5 +1,6 @@ import type { BaseQuery } from './BaseQuery'; import type { DimensionDefinition, SegmentDefinition } from '../compiler/CubeEvaluator'; +import { CubeSymbols } from "../compiler/CubeSymbols"; export class BaseDimension { public readonly expression: any; @@ -26,7 +27,7 @@ export class BaseDimension { // TODO move this `as` to static types const dimensionPath = dimension as string | null; if (dimensionPath !== null) { - const { path, joinHint } = this.query.cubeEvaluator.joinHintFromPath(dimensionPath); + const { path, joinHint } = CubeSymbols.joinHintFromPath(dimensionPath); this.dimension = path; this.joinHint = joinHint; } diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseMeasure.ts b/packages/cubejs-schema-compiler/src/adapter/BaseMeasure.ts index 87fc252267a20..eb39bd3ffc52b 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseMeasure.ts +++ b/packages/cubejs-schema-compiler/src/adapter/BaseMeasure.ts @@ -1,6 +1,7 @@ import { UserError } from '../compiler/UserError'; import type { BaseQuery } from './BaseQuery'; import { MeasureDefinition } from '../compiler/CubeEvaluator'; +import { CubeSymbols } from "../compiler/CubeSymbols"; export class BaseMeasure { public readonly expression: any; @@ -128,7 +129,7 @@ export class BaseMeasure { } else { // TODO move this `as` to static types const measurePath = measure as string; - const { path, joinHint } = this.query.cubeEvaluator.joinHintFromPath(measurePath); + const { path, joinHint } = CubeSymbols.joinHintFromPath(measurePath); this.measure = path; this.joinHint = joinHint; } diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseSegment.ts b/packages/cubejs-schema-compiler/src/adapter/BaseSegment.ts index db2033c7916f7..7a2ff39747676 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseSegment.ts +++ b/packages/cubejs-schema-compiler/src/adapter/BaseSegment.ts @@ -1,4 +1,5 @@ import type { BaseQuery } from './BaseQuery'; +import { CubeSymbols } from "../compiler/CubeSymbols"; export class BaseSegment { public readonly expression: any; @@ -24,7 +25,7 @@ export class BaseSegment { } else { // TODO move this `as` to static types const segmentPath = segment as string; - const { path, joinHint } = this.query.cubeEvaluator.joinHintFromPath(segmentPath); + const { path, joinHint } = CubeSymbols.joinHintFromPath(segmentPath); this.segment = path; this.joinHint = joinHint; } diff --git a/packages/cubejs-schema-compiler/src/adapter/PreAggregations.js b/packages/cubejs-schema-compiler/src/adapter/PreAggregations.js index 85c8931a7d3ec..efe1d4e453854 100644 --- a/packages/cubejs-schema-compiler/src/adapter/PreAggregations.js +++ b/packages/cubejs-schema-compiler/src/adapter/PreAggregations.js @@ -1,6 +1,7 @@ import R from 'ramda'; import { FROM_PARTITION_RANGE, getEnv, TO_PARTITION_RANGE } from '@cubejs-backend/shared'; +import { CubeSymbols } from "../compiler/CubeSymbols"; import { UserError } from '../compiler/UserError'; export class PreAggregations { @@ -173,7 +174,7 @@ export class PreAggregations { // timeDimensionsReference[*].dimension can contain full join path, so we should trim it // TODO check full join path match here - const timeDimensionReferenceDimension = this.query.cubeEvaluator.joinHintFromPath(timeDimensionReference.dimension).path; + const timeDimensionReferenceDimension = CubeSymbols.joinHintFromPath(timeDimensionReference.dimension).path; if (td.dimension === timeDimensionReferenceDimension) { return true; @@ -1095,7 +1096,7 @@ export class PreAggregations { !!references.dimensions.find((d) => { // `d` can contain full join path, so we should trim it // TODO check full join path match here - const trimmedDimension = this.query.cubeEvaluator.joinHintFromPath(d).path; + const trimmedDimension = CubeSymbols.joinHintFromPath(d).path; return this.query.cubeEvaluator.dimensionByPath(trimmedDimension).primaryKey; }), }); diff --git a/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts b/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts index 286a8e822198b..d5739e117b0dd 100644 --- a/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts +++ b/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts @@ -656,7 +656,7 @@ export class CubeSymbols { * Split join path to member to join hint and member path: `A.B.C.D.E.dim` => `[A, B, C, D, E]` + `E.dim` * @param path */ - public joinHintFromPath(path: string): { path: string, joinHint: Array } { + public static joinHintFromPath(path: string): { path: string, joinHint: Array } { const parts = path.split('.'); if (parts.length > 2) { // Path contains join path From 2bb6df88d84c85b150c00d298294225f6ab51f0a Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Thu, 10 Apr 2025 19:36:52 +0200 Subject: [PATCH 15/15] fix(schema-compiler): Handle pre-aggregations matching in presence of join paths --- .../src/adapter/PreAggregations.js | 81 ++++-- .../postgres/multiple-join-paths.test.ts | 272 +++++++++++++++++- 2 files changed, 319 insertions(+), 34 deletions(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/PreAggregations.js b/packages/cubejs-schema-compiler/src/adapter/PreAggregations.js index efe1d4e453854..8c203102b38bc 100644 --- a/packages/cubejs-schema-compiler/src/adapter/PreAggregations.js +++ b/packages/cubejs-schema-compiler/src/adapter/PreAggregations.js @@ -430,6 +430,13 @@ export class PreAggregations { }; } + /** + * + * @param query + * @param members + * @param {Map>} cubeToJoinPrefix + * @returns {Array} + */ static ownedMembers(query, members) { return R.pipe(R.uniq, R.sortBy(R.identity))( query @@ -495,6 +502,33 @@ export class PreAggregations { // query can have same members as pre-agg, but different calculated join path // `refs` will come from preagg references, and would contain full join paths + // TODO remove this in favor of matching with join path + /** + * @param {PreAggregationReferences} references + * @returns {PreAggregationReferences} + */ + function trimmedReferences(references) { + const timeDimensionsTrimmed = references + .timeDimensions + .map(td => ({ + ...td, + dimension: CubeSymbols.joinHintFromPath(td.dimension).path, + })); + const measuresTrimmed = references + .measures + .map(m => CubeSymbols.joinHintFromPath(m).path); + const dimensionsTrimmed = references + .dimensions + .map(d => CubeSymbols.joinHintFromPath(d).path); + + return { + ...references, + dimensions: dimensionsTrimmed, + measures: measuresTrimmed, + timeDimensions: timeDimensionsTrimmed, + }; + } + /** * Returns an array of 2-elements arrays with the dimension and granularity * sorted by the concatenated dimension + granularity key. @@ -534,18 +568,19 @@ export class PreAggregations { /** * Determine whether pre-aggregation can be used or not. - * @param {*} references + * @param {PreAggregationReferences} references * @returns {boolean} */ const canUsePreAggregationNotAdditive = (references) => { - const refTimeDimensions = - backAlias(references.sortedTimeDimensions || sortTimeDimensions(references.timeDimensions)); + // TODO remove this in favor of matching with join path + const referencesTrimmed = trimmedReferences(references); + + const refTimeDimensions = backAlias(sortTimeDimensions(referencesTrimmed.timeDimensions)); const qryTimeDimensions = references.allowNonStrictDateRangeMatch ? transformedQuery.timeDimensions : transformedQuery.sortedTimeDimensions; - const backAliasMeasures = backAlias(references.measures); - const backAliasSortedDimensions = backAlias(references.sortedDimensions || references.dimensions); - const backAliasDimensions = backAlias(references.dimensions); + const backAliasMeasures = backAlias(referencesTrimmed.measures); + const backAliasDimensions = backAlias(referencesTrimmed.dimensions); return (( transformedQuery.hasNoTimeDimensionsWithoutGranularity ) && ( @@ -557,10 +592,10 @@ export class PreAggregations { R.equals(transformedQuery.timeDimensions, refTimeDimensions) ) && ( filterDimensionsSingleValueEqual && - references.dimensions.length === filterDimensionsSingleValueEqual.size && + referencesTrimmed.dimensions.length === filterDimensionsSingleValueEqual.size && R.all(d => filterDimensionsSingleValueEqual.has(d), backAliasDimensions) || transformedQuery.allFiltersWithinSelectedDimensions && - R.equals(backAliasSortedDimensions, transformedQuery.sortedDimensions) + R.equals(backAliasDimensions, transformedQuery.sortedDimensions) ) && ( R.all(m => backAliasMeasures.indexOf(m) !== -1, transformedQuery.measures) || // TODO do we need backAlias here? @@ -581,16 +616,15 @@ export class PreAggregations { /** * Determine whether time dimensions match to the window granularity or not. - * @param {*} references + * @param {PreAggregationReferences} references * @returns {boolean} */ const windowGranularityMatches = (references) => { if (!transformedQuery.windowGranularity) { return true; } - const sortedTimeDimensions = - references.sortedTimeDimensions || - sortTimeDimensions(references.timeDimensions); + // Beware that sortedTimeDimensions contain full join paths + const sortedTimeDimensions = sortTimeDimensions(references.timeDimensions); return sortedTimeDimensions .map(td => expandGranularity(td[0], transformedQuery.windowGranularity)) @@ -620,7 +654,7 @@ export class PreAggregations { /** * Determine whether pre-aggregation can be used or not. * TODO: revisit cumulative leaf measure matches. - * @param {*} references + * @param {PreAggregationReferences} references * @returns {boolean} */ const canUsePreAggregationLeafMeasureAdditive = (references) => { @@ -636,11 +670,14 @@ export class PreAggregations { ? transformedQuery.ownedTimeDimensionsAsIs.map(expandTimeDimension) : transformedQuery.ownedTimeDimensionsWithRollupGranularity.map(expandTimeDimension); + // TODO remove this in favor of matching with join path + const referencesTrimmed = trimmedReferences(references); + const dimensionsMatch = (dimensions, doBackAlias) => R.all( d => ( doBackAlias ? - backAlias(references.sortedDimensions || references.dimensions) : - (references.sortedDimensions || references.dimensions) + backAlias(referencesTrimmed.dimensions) : + (referencesTrimmed.dimensions) ).indexOf(d) !== -1, dimensions ); @@ -657,13 +694,13 @@ export class PreAggregations { ) )( doBackAlias ? - backAlias(references.sortedTimeDimensions || sortTimeDimensions(references.timeDimensions)) : - (references.sortedTimeDimensions || sortTimeDimensions(references.timeDimensions)) + backAlias(sortTimeDimensions(referencesTrimmed.timeDimensions)) : + (sortTimeDimensions(referencesTrimmed.timeDimensions)) ); if (transformedQuery.ungrouped) { const allReferenceCubes = R.pipe(R.map(m => (m.dimension || m).split('.')[0]), R.uniq, R.sortBy(R.identity))( - references.measures.concat(references.dimensions).concat(references.timeDimensions) + referencesTrimmed.measures.concat(referencesTrimmed.dimensions).concat(referencesTrimmed.timeDimensions) ); if ( !R.equals(transformedQuery.sortedAllCubeNames, allReferenceCubes) || @@ -675,12 +712,12 @@ export class PreAggregations { } } - const backAliasMeasures = backAlias(references.measures); + const backAliasMeasures = backAlias(referencesTrimmed.measures); return (( windowGranularityMatches(references) ) && ( R.all( - m => references.measures.indexOf(m) !== -1, + m => referencesTrimmed.measures.indexOf(m) !== -1, transformedQuery.leafMeasures, ) || R.all( m => backAliasMeasures.indexOf(m) !== -1, @@ -1151,6 +1188,10 @@ export class PreAggregations { * @returns {PreAggregationReferences} */ evaluateAllReferences(cube, aggregation, preAggregationName) { + // TODO build a join tree for all references, so they would always include full join path + // Even for preaggregation references without join path + // It is necessary to be able to match query and preaggregation based on full join tree + const evaluateReferences = () => { const references = this.query.cubeEvaluator.evaluatePreAggregationReferences(cube, aggregation); if (aggregation.type === 'rollupLambda') { diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/multiple-join-paths.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/multiple-join-paths.test.ts index 1783df21872fe..b26868fe5d8e4 100644 --- a/packages/cubejs-schema-compiler/test/integration/postgres/multiple-join-paths.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/postgres/multiple-join-paths.test.ts @@ -28,7 +28,7 @@ describe('Multiple join paths', () => { // language=JavaScript const prepared = prepareJsCompiler(` cube('A', { - sql: 'SELECT 1 AS a_id, 100 AS a_value', + sql: "SELECT 1 AS a_id, CAST('1970-01-01' AS TIMESTAMPTZ) AS a_time, 100 AS a_value", joins: { B: { @@ -51,6 +51,11 @@ describe('Multiple join paths', () => { sql: 'a_id', primaryKey: true, }, + + a_time: { + type: 'time', + sql: 'a_time', + }, }, measures: { @@ -60,13 +65,50 @@ describe('Multiple join paths', () => { }, }, + segments: { + a_seg: { + sql: 'a_id % 2 = 0', + }, + }, + preAggregations: { - adex: { + adex_with_join_paths: { + type: 'rollup', + dimensions: [ + a_id, + A.D.d_id, + A.D.d_name_for_join_paths, + // D.d_id, + A.D.E.X.x_id, + ], + measures: [ + a_sum, + ], + segments: [ + a_seg, + A.D.d_seg, + A.D.E.X.x_seg, + ], + timeDimension: A.D.E.X.x_time, + granularity: 'day', + }, + + ad_without_join_paths: { type: 'rollup', - dimensionReferences: [a_id, CUBE.D.E.X.x_id], - measureReferences: [a_sum, D.E.X.x_sum], - // TODO implement and test segmentReferences - // TODO implement and test timeDimensionReference + dimensions: [ + CUBE.a_id, + D.d_id, + D.d_name_for_no_join_paths, + ], + measures: [ + a_sum, + ], + segments: [ + a_seg, + D.d_seg, + ], + timeDimension: D.d_time, + granularity: 'day', }, }, }); @@ -95,6 +137,12 @@ describe('Multiple join paths', () => { type: 'sum', }, }, + + segments: { + b_seg: { + sql: 'b_id % 2 = 0', + }, + }, }); cube('C', { @@ -121,10 +169,16 @@ describe('Multiple join paths', () => { type: 'sum', }, }, + + segments: { + c_seg: { + sql: 'c_id % 2 = 0', + }, + }, }); cube('D', { - sql: 'SELECT 1 AS d_id, 100 AS d_value', + sql: "SELECT 1 AS d_id, 'foo' AS d_name, CAST('1970-01-01' AS TIMESTAMPTZ) AS d_time, 100 AS d_value", joins: { E: { @@ -139,6 +193,19 @@ describe('Multiple join paths', () => { sql: 'd_id', primaryKey: true, }, + // These are to select different preaggregations from query PoV + d_name_for_join_paths: { + type: 'string', + sql: 'd_name', + }, + d_name_for_no_join_paths: { + type: 'string', + sql: 'd_name', + }, + d_time: { + type: 'time', + sql: 'd_time', + }, }, measures: { @@ -147,6 +214,12 @@ describe('Multiple join paths', () => { type: 'sum', }, }, + + segments: { + d_seg: { + sql: 'd_id % 2 = 0', + }, + }, }); cube('E', { @@ -173,6 +246,12 @@ describe('Multiple join paths', () => { type: 'sum', }, }, + + segments: { + e_seg: { + sql: 'e_id % 2 = 0', + }, + }, }); cube('F', { @@ -199,10 +278,16 @@ describe('Multiple join paths', () => { type: 'sum', }, }, + + segments: { + f_seg: { + sql: 'f_id % 2 = 0', + }, + }, }); cube('X', { - sql: 'SELECT 1 AS x_id, 100 AS x_value', + sql: "SELECT 1 AS x_id, 'foo' AS x_name, CAST('1970-01-01' AS TIMESTAMPTZ) AS x_time, 100 AS x_value", dimensions: { x_id: { @@ -210,13 +295,22 @@ describe('Multiple join paths', () => { sql: 'x_id', primaryKey: true, }, + x_name: { + type: 'string', + sql: 'x_name', + }, // This member should be: // * NOT ownedByCube // * reference only members of same cube // * included in view - x_id_ref: { - type: 'number', - sql: \`\${x_id} + 1\`, + // * NOT included in pre-aggs (as well as at least one of its references) + x_name_ref: { + type: 'string', + sql: \`\${x_name} || 'bar'\`, + }, + x_time: { + type: 'time', + sql: 'x_time', }, }, @@ -226,6 +320,12 @@ describe('Multiple join paths', () => { type: 'sum', }, }, + + segments: { + x_seg: { + sql: 'x_id % 2 = 0', + }, + }, }); view('ADEX_view', { @@ -234,6 +334,20 @@ describe('Multiple join paths', () => { join_path: A, includes: [ 'a_id', + 'a_sum', + 'a_seg', + ], + prefix: false + }, + { + join_path: A.D, + includes: [ + 'd_id', + 'd_name_for_join_paths', + 'd_name_for_no_join_paths', + 'd_time', + 'd_sum', + 'd_seg', ], prefix: false }, @@ -241,7 +355,10 @@ describe('Multiple join paths', () => { join_path: A.D.E.X, includes: [ 'x_id', - 'x_id_ref', + 'x_name_ref', + 'x_time', + 'x_sum', + 'x_seg', ], prefix: false }, @@ -262,7 +379,7 @@ describe('Multiple join paths', () => { measures: [], dimensions: [ 'ADEX_view.a_id', - 'ADEX_view.x_id_ref', + 'ADEX_view.x_name_ref', ], }); @@ -289,7 +406,7 @@ describe('Multiple join paths', () => { }); const preAggregationsDescription: any = query.preAggregations?.preAggregationsDescription(); - const { loadSql } = preAggregationsDescription.find(p => p.preAggregationId === 'A.adex'); + const { loadSql } = preAggregationsDescription.find(p => p.preAggregationId === 'A.adex_with_join_paths'); expect(loadSql[0]).toMatch(/ON 'A' = 'D'/); expect(loadSql[0]).toMatch(/ON 'D' = 'E'/); @@ -300,5 +417,132 @@ describe('Multiple join paths', () => { expect(loadSql[0]).not.toMatch(/ON 'A' = 'F'/); expect(loadSql[0]).not.toMatch(/ON 'F' = 'X'/); }); + + it('should match pre-aggregation with join paths for simple direct query', async () => { + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: [ + 'A.a_sum', + ], + dimensions: [ + 'A.a_id', + 'D.d_id', + 'D.d_name_for_join_paths', + ], + segments: [ + 'A.a_seg', + 'D.d_seg', + ], + }); + + const preAggregationsDescription: any = query.preAggregations?.preAggregationsDescription(); + const preAggregation = preAggregationsDescription.find(p => p.preAggregationId === 'A.adex_with_join_paths'); + expect(preAggregation).toBeDefined(); + }); + + it('should match pre-aggregation with join paths for query through view with same join path', async () => { + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: [ + 'ADEX_view.a_sum', + ], + dimensions: [ + 'ADEX_view.a_id', + 'ADEX_view.d_name_for_join_paths', + 'ADEX_view.x_id', + ], + segments: [ + 'ADEX_view.a_seg', + 'ADEX_view.d_seg', + 'ADEX_view.x_seg', + ], + timeDimensions: [{ + dimension: 'ADEX_view.x_time', + granularity: 'day', + }], + }); + + const preAggregationsDescription: any = query.preAggregations?.preAggregationsDescription(); + const preAggregation = preAggregationsDescription.find(p => p.preAggregationId === 'A.adex_with_join_paths'); + expect(preAggregation).toBeDefined(); + }); + + it('should match pre-aggregation without join paths for simple direct query', async () => { + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: [ + 'A.a_sum', + ], + dimensions: [ + 'A.a_id', + 'D.d_id', + 'D.d_name_for_no_join_paths', + ], + segments: [ + 'A.a_seg', + 'D.d_seg', + ], + timeDimensions: [{ + dimension: 'D.d_time', + granularity: 'day', + }], + }); + + const preAggregationsDescription: any = query.preAggregations?.preAggregationsDescription(); + const preAggregation = preAggregationsDescription.find(p => p.preAggregationId === 'A.ad_without_join_paths'); + expect(preAggregation).toBeDefined(); + }); + + it('should match pre-aggregation without join paths for query through view with same join path', async () => { + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: [ + 'ADEX_view.a_sum', + ], + dimensions: [ + 'ADEX_view.a_id', + 'ADEX_view.d_id', + 'ADEX_view.d_name_for_no_join_paths', + ], + segments: [ + 'ADEX_view.a_seg', + 'ADEX_view.d_seg', + ], + timeDimensions: [{ + dimension: 'ADEX_view.d_time', + granularity: 'day', + }], + }); + + const preAggregationsDescription: any = query.preAggregations?.preAggregationsDescription(); + const preAggregation = preAggregationsDescription.find(p => p.preAggregationId === 'A.ad_without_join_paths'); + expect(preAggregation).toBeDefined(); + }); + + const preAggregationIds = [ + 'A.adex_with_join_paths', + 'A.ad_without_join_paths', + ]; + for (const preAggregationId of preAggregationIds) { + // eslint-disable-next-line no-loop-func + it(`pre-aggregation ${preAggregationId} should match its own references`, async () => { + const preAggregations = cubeEvaluator.preAggregations({}); + + const preAggregation = preAggregations + .find(p => p.id === preAggregationId); + if (preAggregation === undefined) { + throw expect(preAggregation).toBeDefined(); + } + + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + ...preAggregation.references, + preAggregationId: preAggregation.id, + }); + + const preAggregationsDescription: any = query.preAggregations?.preAggregationsDescription(); + const preAggregationFromQuery = preAggregationsDescription.find(p => p.preAggregationId === preAggregation.id); + if (preAggregationFromQuery === undefined) { + throw expect(preAggregationFromQuery).toBeDefined(); + } + + expect(preAggregationFromQuery.preAggregationId).toBe(preAggregationId); + }); + } }); });