From 15af41e68293e0492437ad12f091dddf5872f121 Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Mon, 9 Jun 2025 23:10:49 +0300 Subject: [PATCH 01/13] attempt to make allJoinHints work --- .../src/adapter/BaseQuery.js | 60 +++++++++++++++---- .../src/compiler/CubeSymbols.ts | 5 +- 2 files changed, 52 insertions(+), 13 deletions(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js index dadc6a3a6392d..38b1ccbc5547e 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js +++ b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js @@ -418,10 +418,29 @@ export class BaseQuery { */ get allJoinHints() { if (!this.collectedJoinHints) { + let joinHints = this.collectJoinHints(); + // let joinHints = this.collectJoinHintsFromMembers(this.allMembersConcat(false)); + + // One cube may join the other cube via transitive joined cubes, + // members from which are referenced in the join `on` clauses. + // We need to collect such join hints and push them upfront of the joining one. + // It is important to use queryLevelJoinHints during the calculation if it is set. + + const prevJoins = this.join; + + let newJoin = this.joinGraph.buildJoin([...this.queryLevelJoinHints, ...joinHints]); + while (!R.equals(this.join, newJoin)) { + this.join = newJoin; + joinHints = R.uniq([joinHints[0], ...this.collectJoinHintsFromMembers(this.joinMembersFromJoin()), ...joinHints]); + newJoin = this.joinGraph.buildJoin([...this.queryLevelJoinHints, ...joinHints]); + } + this.collectedJoinHints = [ ...this.queryLevelJoinHints, - ...this.collectJoinHints(), + ...joinHints, ]; + + this.join = prevJoins; } return this.collectedJoinHints; } @@ -2417,7 +2436,20 @@ export class BaseQuery { } else if (s.patchedMeasure?.patchedFrom) { return [s.patchedMeasure.patchedFrom.cubeName].concat(this.evaluateSymbolSql(s.patchedMeasure.patchedFrom.cubeName, s.patchedMeasure.patchedFrom.name, s.definition())); } else { - return this.evaluateSql(s.cube().name, s.definition().sql); + const res = this.evaluateSql(s.cube().name, s.definition().sql); + if (s.isJoinCondition) { + // In a join between Cube A and Cube B, sql() may reference members from other cubes. + // These referenced cubes must be added as join hints before Cube B to ensure correct SQL generation. + const targetCube = s.targetCubeName(); + const { joinHints } = this.safeEvaluateSymbolContext(); + let targetIdx = joinHints.findIndex(e => e === targetCube); + while (targetIdx > -1) { + joinHints.splice(targetIdx, 1); + targetIdx = joinHints.findIndex(e => e === targetCube); + } + joinHints.push(targetCube); + } + return res; } } @@ -2454,23 +2486,27 @@ export class BaseQuery { }; }); - const joinMembers = this.join ? this.join.joins.map(j => ({ - getMembers: () => [{ - path: () => null, - cube: () => this.cubeEvaluator.cubeFromPath(j.originalFrom), - definition: () => j.join, - }] - })) : []; - const membersToCollectFrom = [ ...this.allMembersConcat(excludeTimeDimensions), - ...joinMembers, + ...this.joinMembersFromJoin(), ...customSubQueryJoinMembers, ]; return this.collectJoinHintsFromMembers(membersToCollectFrom); } + joinMembersFromJoin() { + return this.join ? this.join.joins.map(j => ({ + getMembers: () => [{ + path: () => null, + cube: () => this.cubeEvaluator.cubeFromPath(j.originalFrom), + definition: () => j.join, + isJoinCondition: true, + targetCubeName: () => j.originalTo, + }] + })) : []; + } + collectJoinHintsFromMembers(members) { return [ ...members.map(m => m.joinHint).filter(h => h?.length > 0), @@ -2786,7 +2822,7 @@ export class BaseQuery { pushJoinHints(joinHints) { if (this.safeEvaluateSymbolContext().joinHints && joinHints) { - if (joinHints.length === 1) { + if (Array.isArray(joinHints) && joinHints.length === 1) { [joinHints] = joinHints; } this.safeEvaluateSymbolContext().joinHints.push(joinHints); diff --git a/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts b/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts index ba7e0ec33e049..a6449ebb2b227 100644 --- a/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts +++ b/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts @@ -773,6 +773,9 @@ export class CubeSymbols { protected joinHints() { const { joinHints } = this.resolveSymbolsCallContext || {}; + if (Array.isArray(joinHints)) { + return R.uniq(joinHints); + } return joinHints; } @@ -879,7 +882,7 @@ export class CubeSymbols { } else if (this.symbols[cubeName]?.[name]) { cube = this.cubeReferenceProxy( cubeName, - undefined, + collectJoinHints ? [] : undefined, name ); } From 8e41b1e97feff887227af334bff944002e4cefe8 Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Tue, 10 Jun 2025 14:52:49 +0300 Subject: [PATCH 02/13] next attempt to make allJoinHints work --- .../src/adapter/BaseQuery.js | 46 +++++++++++++------ 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js index 38b1ccbc5547e..3fe5cedfcda05 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js +++ b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js @@ -418,8 +418,10 @@ export class BaseQuery { */ get allJoinHints() { if (!this.collectedJoinHints) { - let joinHints = this.collectJoinHints(); - // let joinHints = this.collectJoinHintsFromMembers(this.allMembersConcat(false)); + // let joinHints = this.collectJoinHints(); + const allMembersJoinHints = this.collectJoinHintsFromMembers(this.allMembersConcat(false)); + const customSubQueryJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromCustomSubQuery()); + let joinMembersJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromJoin()); // One cube may join the other cube via transitive joined cubes, // members from which are referenced in the join `on` clauses. @@ -428,16 +430,28 @@ export class BaseQuery { const prevJoins = this.join; - let newJoin = this.joinGraph.buildJoin([...this.queryLevelJoinHints, ...joinHints]); + let newJoin = this.joinGraph.buildJoin([ + ...this.queryLevelJoinHints, + ...allMembersJoinHints, + ...joinMembersJoinHints, + ...customSubQueryJoinHints, + ]); while (!R.equals(this.join, newJoin)) { this.join = newJoin; - joinHints = R.uniq([joinHints[0], ...this.collectJoinHintsFromMembers(this.joinMembersFromJoin()), ...joinHints]); - newJoin = this.joinGraph.buildJoin([...this.queryLevelJoinHints, ...joinHints]); + joinMembersJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromJoin()); + newJoin = this.joinGraph.buildJoin([ + ...this.queryLevelJoinHints, + ...allMembersJoinHints, + ...joinMembersJoinHints, + ...customSubQueryJoinHints, + ]); } this.collectedJoinHints = [ ...this.queryLevelJoinHints, - ...joinHints, + ...allMembersJoinHints, + ...joinMembersJoinHints, + ...customSubQueryJoinHints, ]; this.join = prevJoins; @@ -2471,7 +2485,17 @@ export class BaseQuery { * @returns {Array>} */ collectJoinHints(excludeTimeDimensions = false) { - const customSubQueryJoinMembers = this.customSubQueryJoins.map(j => { + const membersToCollectFrom = [ + ...this.allMembersConcat(excludeTimeDimensions), + ...this.joinMembersFromJoin(), + ...this.joinMembersFromCustomSubQuery(), + ]; + + return this.collectJoinHintsFromMembers(membersToCollectFrom); + } + + joinMembersFromCustomSubQuery() { + return this.customSubQueryJoins.map(j => { const res = { path: () => null, cube: () => this.cubeEvaluator.cubeFromPath(j.on.cubeName), @@ -2485,14 +2509,6 @@ export class BaseQuery { getMembers: () => [res], }; }); - - const membersToCollectFrom = [ - ...this.allMembersConcat(excludeTimeDimensions), - ...this.joinMembersFromJoin(), - ...customSubQueryJoinMembers, - ]; - - return this.collectJoinHintsFromMembers(membersToCollectFrom); } joinMembersFromJoin() { From 12bb2e301ab5e0b82728818866050759e1c85ad8 Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Tue, 10 Jun 2025 16:27:19 +0300 Subject: [PATCH 03/13] udpat traversing to change the ancestors to the original cube hinting --- .../src/adapter/BaseQuery.js | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js index 3fe5cedfcda05..69221a4d6b27c 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js +++ b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js @@ -418,7 +418,7 @@ export class BaseQuery { */ get allJoinHints() { if (!this.collectedJoinHints) { - // let joinHints = this.collectJoinHints(); + // this.collectedJoinHints = this.collectJoinHints(); const allMembersJoinHints = this.collectJoinHintsFromMembers(this.allMembersConcat(false)); const customSubQueryJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromCustomSubQuery()); let joinMembersJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromJoin()); @@ -436,7 +436,7 @@ export class BaseQuery { ...joinMembersJoinHints, ...customSubQueryJoinHints, ]); - while (!R.equals(this.join, newJoin)) { + while (newJoin?.joins.length > 0 && !R.equals(this.join, newJoin)) { this.join = newJoin; joinMembersJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromJoin()); newJoin = this.joinGraph.buildJoin([ @@ -2462,6 +2462,26 @@ export class BaseQuery { targetIdx = joinHints.findIndex(e => e === targetCube); } joinHints.push(targetCube); + + // Special processing is required when one cube extends another, because in this case + // cube names collected during joins evaluation might belong to ancestors which are out of scope of + // the current query and thus will lead to `Can't find join path to join cubes` error. + // To work around this we change the all ancestors cube names in collected join hints to the original one. + if (s.cube().extends) { + const cubeName = s.cube().name; + let parentCube = this.cubeEvaluator.resolveSymbolsCall(s.cube().extends, (name) => this.cubeEvaluator.cubeFromPath(name)); + while (parentCube) { + // eslint-disable-next-line no-loop-func + joinHints.forEach((item, index, array) => { + if (item === parentCube.name) { + array[index] = cubeName; + } + }); + parentCube = parentCube.extends ? + this.cubeEvaluator.resolveSymbolsCall(parentCube.extends, (name) => this.cubeEvaluator.cubeFromPath(name)) + : null; + } + } } return res; } From 0647a9529a9bb080811de08b9e313c8884f11972 Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Tue, 10 Jun 2025 17:33:48 +0300 Subject: [PATCH 04/13] change how join hints are constructed --- .../src/adapter/BaseQuery.js | 38 +++++++++---------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js index 69221a4d6b27c..29475c3f2659c 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js +++ b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js @@ -419,40 +419,38 @@ export class BaseQuery { get allJoinHints() { if (!this.collectedJoinHints) { // this.collectedJoinHints = this.collectJoinHints(); - const allMembersJoinHints = this.collectJoinHintsFromMembers(this.allMembersConcat(false)); + const [rootOfJoin, ...allMembersJoinHints] = this.collectJoinHintsFromMembers(this.allMembersConcat(false)); const customSubQueryJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromCustomSubQuery()); let joinMembersJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromJoin()); // One cube may join the other cube via transitive joined cubes, // members from which are referenced in the join `on` clauses. - // We need to collect such join hints and push them upfront of the joining one. + // We need to collect such join hints and push them upfront of the joining one + // but only if they don't exist yet. Cause in other case we might affect what + // join path will be constructed in join graph. // It is important to use queryLevelJoinHints during the calculation if it is set. + const constructJP = () => { + const filteredJoinMembersJoinHints = joinMembersJoinHints.filter(m => !allMembersJoinHints.includes(m)); + return [ + ...this.queryLevelJoinHints, + rootOfJoin, + ...filteredJoinMembersJoinHints, + ...allMembersJoinHints, + ...customSubQueryJoinHints, + ]; + }; + const prevJoins = this.join; - let newJoin = this.joinGraph.buildJoin([ - ...this.queryLevelJoinHints, - ...allMembersJoinHints, - ...joinMembersJoinHints, - ...customSubQueryJoinHints, - ]); + let newJoin = this.joinGraph.buildJoin(constructJP()); while (newJoin?.joins.length > 0 && !R.equals(this.join, newJoin)) { this.join = newJoin; joinMembersJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromJoin()); - newJoin = this.joinGraph.buildJoin([ - ...this.queryLevelJoinHints, - ...allMembersJoinHints, - ...joinMembersJoinHints, - ...customSubQueryJoinHints, - ]); + newJoin = this.joinGraph.buildJoin(constructJP()); } - this.collectedJoinHints = [ - ...this.queryLevelJoinHints, - ...allMembersJoinHints, - ...joinMembersJoinHints, - ...customSubQueryJoinHints, - ]; + this.collectedJoinHints = constructJP(); this.join = prevJoins; } From 9ce0807382fe99a1ecda3a8b457c0baa4682aae5 Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Tue, 10 Jun 2025 17:34:02 +0300 Subject: [PATCH 05/13] add tests --- .../postgres/sql-generation.test.ts | 143 +++++++++++++++++- 1 file changed, 142 insertions(+), 1 deletion(-) diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/sql-generation.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/sql-generation.test.ts index 264e1f851e8b1..c4ce09d4a8ab0 100644 --- a/packages/cubejs-schema-compiler/test/integration/postgres/sql-generation.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/postgres/sql-generation.test.ts @@ -3,7 +3,7 @@ import { UserError } from '../../../src/compiler/UserError'; import { PostgresQuery } from '../../../src/adapter/PostgresQuery'; import { BigqueryQuery } from '../../../src/adapter/BigqueryQuery'; import { PrestodbQuery } from '../../../src/adapter/PrestodbQuery'; -import { prepareJsCompiler } from '../../unit/PrepareCompiler'; +import { prepareJsCompiler, prepareYamlCompiler } from '../../unit/PrepareCompiler'; import { dbRunner } from './PostgresDBRunner'; import { createJoinedCubesSchema } from '../../unit/utils'; import { testWithPreAggregation } from './pre-aggregation-utils'; @@ -4087,4 +4087,145 @@ SELECT 1 AS revenue, cast('2024-01-01' AS timestamp) as time UNION ALL }] ); }); + + describe('Transitive join paths', () => { + // eslint-disable-next-line @typescript-eslint/no-shadow + const { compiler, joinGraph, cubeEvaluator } = prepareYamlCompiler(` +cubes: + - name: merchant_dims + sql: | + ( + SELECT 101 AS merchant_sk, 'M1' AS merchant_id + UNION ALL + SELECT 102 AS merchant_sk, 'M2' AS merchant_id + ) + dimensions: + - name: merchant_sk + sql: merchant_sk + type: number + primary_key: true + - name: merchant_id + sql: merchant_id + type: string + + - name: product_dims + sql: | + ( + SELECT 201 AS product_sk, 'P1' AS product_id + UNION ALL + SELECT 202 AS product_sk, 'P2' AS product_id + ) + dimensions: + - name: product_sk + sql: product_sk + type: number + primary_key: true + - name: product_id + sql: product_id + type: string + + - name: merchant_and_product_dims + sql: | + ( + SELECT 'M1' AS merchant_id, 'P1' AS product_id, 'Organic' AS acquisition_channel + UNION ALL + SELECT 'M1' AS merchant_id, 'P2' AS product_id, 'Paid' AS acquisition_channel + UNION ALL + SELECT 'M2' AS merchant_id, 'P1' AS product_id, 'Referral' AS acquisition_channel + ) + dimensions: + - name: product_id + sql: product_id + type: string + primary_key: true + - name: merchant_id + sql: merchant_id + type: string + primary_key: true + - name: acquisition_channel + sql: acquisition_channel + type: string + + - name: test_facts + sql: | + ( + SELECT DATE '2023-01-01' AS reporting_date, 101 AS merchant_sk, 201 AS product_sk, 100 AS amount + UNION ALL + SELECT DATE '2023-01-01' AS reporting_date, 101 AS merchant_sk, 202 AS product_sk, 150 AS amount + UNION ALL + SELECT DATE '2023-01-02' AS reporting_date, 102 AS merchant_sk, 201 AS product_sk, 200 AS amount + ) + joins: + - name: merchant_dims + relationship: many_to_one + sql: "{CUBE}.merchant_sk = {merchant_dims.merchant_sk}" + - name: product_dims + relationship: many_to_one + sql: "{CUBE}.product_sk = {product_dims.product_sk}" + - name: merchant_and_product_dims # This join depends on merchant_dims and product_dims + relationship: many_to_one + sql: "{merchant_dims.merchant_id} = {merchant_and_product_dims.merchant_id} AND {product_dims.product_id} = {merchant_and_product_dims.product_id}" + dimensions: + - name: reporting_date + sql: reporting_date + type: time + primary_key: true + - name: merchant_sk + sql: merchant_sk + type: number + primary_key: true + - name: product_sk + sql: product_sk + type: number + primary_key: true + - name: acquisition_channel # This dimension triggers the join to merchant_and_product_dims + sql: "{merchant_and_product_dims.acquisition_channel}" + type: string + measures: + - name: amount_sum + sql: amount + type: sum + `); + + it('querying cube dimension that require transitive joins', async () => { + await compiler.compile(); + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: [], + dimensions: [ + 'test_facts.reporting_date', + 'test_facts.merchant_sk', + 'test_facts.product_sk', + 'test_facts.acquisition_channel' + ], + order: [{ + id: 'test_facts.acquisition_channel' + }], + timezone: 'America/Los_Angeles' + }); + + const res = await dbRunner.testQuery(query.buildSqlAndParams()); + console.log(JSON.stringify(res)); + + expect(res).toEqual([ + { + test_facts__acquisition_channel: 'Organic', + test_facts__merchant_sk: 101, + test_facts__product_sk: 201, + test_facts__reporting_date: '2023-01-01T00:00:00.000Z', + }, + { + test_facts__acquisition_channel: 'Paid', + test_facts__merchant_sk: 101, + test_facts__product_sk: 202, + test_facts__reporting_date: '2023-01-01T00:00:00.000Z', + }, + { + test_facts__acquisition_channel: 'Referral', + test_facts__merchant_sk: 102, + test_facts__product_sk: 201, + test_facts__reporting_date: '2023-01-02T00:00:00.000Z', + }, + ]); + }); + }); }); From f5e2222788035ffdb917afae432aa20a88caa997 Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Tue, 10 Jun 2025 19:10:14 +0300 Subject: [PATCH 06/13] fix allJoinHints() for cases when there are no members --- .../src/adapter/BaseQuery.js | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js index 29475c3f2659c..b7c4d0376c2b6 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js +++ b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js @@ -421,7 +421,7 @@ export class BaseQuery { // this.collectedJoinHints = this.collectJoinHints(); const [rootOfJoin, ...allMembersJoinHints] = this.collectJoinHintsFromMembers(this.allMembersConcat(false)); const customSubQueryJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromCustomSubQuery()); - let joinMembersJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromJoin()); + let joinMembersJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromJoin(this.join)); // One cube may join the other cube via transitive joined cubes, // members from which are referenced in the join `on` clauses. @@ -434,25 +434,23 @@ export class BaseQuery { const filteredJoinMembersJoinHints = joinMembersJoinHints.filter(m => !allMembersJoinHints.includes(m)); return [ ...this.queryLevelJoinHints, - rootOfJoin, + ...(rootOfJoin ? [rootOfJoin] : []), ...filteredJoinMembersJoinHints, ...allMembersJoinHints, ...customSubQueryJoinHints, ]; }; - const prevJoins = this.join; - + let prevJoins = this.join; let newJoin = this.joinGraph.buildJoin(constructJP()); - while (newJoin?.joins.length > 0 && !R.equals(this.join, newJoin)) { - this.join = newJoin; - joinMembersJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromJoin()); + + while (newJoin?.joins.length > 0 && !R.equals(prevJoins, newJoin)) { + prevJoins = newJoin; + joinMembersJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromJoin(newJoin)); newJoin = this.joinGraph.buildJoin(constructJP()); } - this.collectedJoinHints = constructJP(); - - this.join = prevJoins; + this.collectedJoinHints = R.uniq(constructJP()); } return this.collectedJoinHints; } @@ -2505,7 +2503,7 @@ export class BaseQuery { collectJoinHints(excludeTimeDimensions = false) { const membersToCollectFrom = [ ...this.allMembersConcat(excludeTimeDimensions), - ...this.joinMembersFromJoin(), + ...this.joinMembersFromJoin(this.join), ...this.joinMembersFromCustomSubQuery(), ]; @@ -2529,8 +2527,8 @@ export class BaseQuery { }); } - joinMembersFromJoin() { - return this.join ? this.join.joins.map(j => ({ + joinMembersFromJoin(join) { + return join ? join.joins.map(j => ({ getMembers: () => [{ path: () => null, cube: () => this.cubeEvaluator.cubeFromPath(j.originalFrom), From aa36bde7ce370d2c360b0ff7e6097122f37814f3 Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Tue, 10 Jun 2025 20:44:49 +0300 Subject: [PATCH 07/13] remove comment --- packages/cubejs-schema-compiler/src/adapter/BaseQuery.js | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js index b7c4d0376c2b6..9e877e3bc59a8 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js +++ b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js @@ -418,7 +418,6 @@ export class BaseQuery { */ get allJoinHints() { if (!this.collectedJoinHints) { - // this.collectedJoinHints = this.collectJoinHints(); const [rootOfJoin, ...allMembersJoinHints] = this.collectJoinHintsFromMembers(this.allMembersConcat(false)); const customSubQueryJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromCustomSubQuery()); let joinMembersJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromJoin(this.join)); From 209f995c5bdd750b6ef1f8304442af1c4f38cc8a Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Fri, 13 Jun 2025 13:53:00 +0300 Subject: [PATCH 08/13] code polish --- packages/cubejs-schema-compiler/src/adapter/BaseQuery.js | 8 ++++---- .../test/integration/postgres/cube-views.test.ts | 1 + .../test/integration/postgres/sql-generation.test.ts | 5 ++++- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js index 9e877e3bc59a8..f9d3409bec9cc 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js +++ b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js @@ -429,7 +429,7 @@ export class BaseQuery { // join path will be constructed in join graph. // It is important to use queryLevelJoinHints during the calculation if it is set. - const constructJP = () => { + const constructJH = () => { const filteredJoinMembersJoinHints = joinMembersJoinHints.filter(m => !allMembersJoinHints.includes(m)); return [ ...this.queryLevelJoinHints, @@ -441,15 +441,15 @@ export class BaseQuery { }; let prevJoins = this.join; - let newJoin = this.joinGraph.buildJoin(constructJP()); + let newJoin = this.joinGraph.buildJoin(constructJH()); while (newJoin?.joins.length > 0 && !R.equals(prevJoins, newJoin)) { prevJoins = newJoin; joinMembersJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromJoin(newJoin)); - newJoin = this.joinGraph.buildJoin(constructJP()); + newJoin = this.joinGraph.buildJoin(constructJH()); } - this.collectedJoinHints = R.uniq(constructJP()); + this.collectedJoinHints = R.uniq(constructJH()); } return this.collectedJoinHints; } diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/cube-views.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/cube-views.test.ts index eb10187737d1d..049e938ba2243 100644 --- a/packages/cubejs-schema-compiler/test/integration/postgres/cube-views.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/postgres/cube-views.test.ts @@ -6,6 +6,7 @@ import { dbRunner } from './PostgresDBRunner'; describe('Cube Views', () => { jest.setTimeout(200000); + // language=JavaScript const { compiler, joinGraph, cubeEvaluator, metaTransformer } = prepareJsCompiler(` cube(\`Orders\`, { sql: \` diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/sql-generation.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/sql-generation.test.ts index c4ce09d4a8ab0..a065719c5f27a 100644 --- a/packages/cubejs-schema-compiler/test/integration/postgres/sql-generation.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/postgres/sql-generation.test.ts @@ -11,6 +11,7 @@ import { testWithPreAggregation } from './pre-aggregation-utils'; describe('SQL Generation', () => { jest.setTimeout(200000); + // language=JavaScript const { compiler, joinGraph, cubeEvaluator } = prepareJsCompiler(` const perVisitorRevenueMeasure = { type: 'number', @@ -4090,7 +4091,9 @@ SELECT 1 AS revenue, cast('2024-01-01' AS timestamp) as time UNION ALL describe('Transitive join paths', () => { // eslint-disable-next-line @typescript-eslint/no-shadow - const { compiler, joinGraph, cubeEvaluator } = prepareYamlCompiler(` + const { compiler, joinGraph, cubeEvaluator } = + // language=yaml + prepareYamlCompiler(` cubes: - name: merchant_dims sql: | From bb08799163a199854794d9bab7011f77e289a725 Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Fri, 13 Jun 2025 16:47:24 +0300 Subject: [PATCH 09/13] implement smart join tree comparator and loop detector --- .../src/adapter/BaseQuery.js | 37 ++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js index f9d3409bec9cc..9947d0504b936 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js +++ b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js @@ -441,12 +441,47 @@ export class BaseQuery { }; let prevJoins = this.join; + let prevJoinMembersJoinHints = joinMembersJoinHints; let newJoin = this.joinGraph.buildJoin(constructJH()); - while (newJoin?.joins.length > 0 && !R.equals(prevJoins, newJoin)) { + const isOrderPreserved = (base, updated) => { + const common = base.filter(value => updated.includes(value)); + const bFiltered = updated.filter(value => common.includes(value)); + + return common.every((x, i) => x === bFiltered[i]); + }; + + const isJoinTreesEqual = (a, b) => { + if (!a || !b || a.root !== b.root || a.joins.length !== b.joins.length) { + return false; + } + + // We don't care about the order of joins on the same level, so + // we can compare them as sets. + const aJoinsSet = new Set(a.joins.map(j => `${j.originalFrom}->${j.originalTo}`)); + const bJoinsSet = new Set(b.joins.map(j => `${j.originalFrom}->${j.originalTo}`)); + + if (aJoinsSet.size !== bJoinsSet.size) { + return false; + } + + for (const val of aJoinsSet) { + if (!bJoinsSet.has(val)) { + return false; + } + } + + return true; + }; + + while (newJoin?.joins.length > 0 && !isJoinTreesEqual(prevJoins, newJoin)) { prevJoins = newJoin; joinMembersJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromJoin(newJoin)); + if (!isOrderPreserved(prevJoinMembersJoinHints, joinMembersJoinHints)) { + throw new UserError(`Can not construct joins for the query, potential loop detected: ${prevJoinMembersJoinHints.join('->')} vs ${joinMembersJoinHints.join('->')}`); + } newJoin = this.joinGraph.buildJoin(constructJH()); + prevJoinMembersJoinHints = joinMembersJoinHints; } this.collectedJoinHints = R.uniq(constructJH()); From 4cd96e694491aeb5616266f01bd957a0d2b8b1ba Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Fri, 13 Jun 2025 16:47:32 +0300 Subject: [PATCH 10/13] add test for loop --- .../postgres/sql-generation.test.ts | 123 +++++++++++++++++- 1 file changed, 122 insertions(+), 1 deletion(-) diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/sql-generation.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/sql-generation.test.ts index a065719c5f27a..b284269b813e4 100644 --- a/packages/cubejs-schema-compiler/test/integration/postgres/sql-generation.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/postgres/sql-generation.test.ts @@ -4188,7 +4188,104 @@ cubes: - name: amount_sum sql: amount type: sum - `); + +# Join loop for testing transitive joins + - name: alpha_facts + sql: | + ( + SELECT DATE '2023-01-01' AS reporting_date, 1 AS a_id, 10 AS b_id, 100 AS amount + UNION ALL + SELECT DATE '2023-01-02' AS reporting_date, 2 AS a_id, 20 AS b_id, 150 AS amount + ) + joins: + - name: beta_dims + relationship: many_to_one + sql: "{CUBE}.a_id = {beta_dims.a_id}" + - name: gamma_dims + relationship: many_to_one + sql: "{CUBE}.b_id = {gamma_dims.b_id}" + - name: delta_bridge + relationship: many_to_one + sql: "{beta_dims.a_name} = {delta_bridge.a_name} AND {gamma_dims.b_name} = {delta_bridge.b_name}" + dimensions: + - name: reporting_date + sql: reporting_date + type: time + primary_key: true + - name: a_id + sql: a_id + type: number + primary_key: true + - name: b_id + sql: b_id + type: number + primary_key: true + - name: channel + sql: "{delta_bridge.channel}" + type: string + measures: + - name: amount_sum + sql: amount + type: sum + + - name: beta_dims + sql: | + ( + SELECT 1 AS a_id, 'Alpha1' AS a_name + UNION ALL + SELECT 2 AS a_id, 'Alpha2' AS a_name + ) + dimensions: + - name: a_id + sql: a_id + type: number + primary_key: true + - name: a_name + sql: a_name + type: string + + - name: gamma_dims + sql: | + ( + SELECT 10 AS b_id, 'Beta1' AS b_name + UNION ALL + SELECT 20 AS b_id, 'Beta2' AS b_name + ) + dimensions: + - name: b_id + sql: b_id + type: number + primary_key: true + - name: b_name + sql: b_name + type: string + + - name: delta_bridge + sql: | + ( + SELECT 'Alpha1' AS a_name, 'Beta1' AS b_name, 'Organic' AS channel + UNION ALL + SELECT 'Alpha1' AS a_name, 'Beta2' AS b_name, 'Paid' AS channel + UNION ALL + SELECT 'Alpha2' AS a_name, 'Beta1' AS b_name, 'Referral' AS channel + ) + joins: + - name: gamma_dims + relationship: many_to_one + sql: "{CUBE}.b_name = {gamma_dims.b_name}" + dimensions: + - name: a_name + sql: a_name + type: string + primary_key: true + - name: b_name + sql: "{gamma_dims.b_name}" + type: string + primary_key: true + - name: channel + sql: channel + type: string + `); it('querying cube dimension that require transitive joins', async () => { await compiler.compile(); @@ -4230,5 +4327,29 @@ cubes: }, ]); }); + + it('querying cube with transitive joins with loop', async () => { + await compiler.compile(); + + try { + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: [], + dimensions: [ + 'alpha_facts.reporting_date', + 'delta_bridge.b_name', + 'alpha_facts.channel' + ], + order: [{ + id: 'alpha_facts.reporting_date' + }], + timezone: 'America/Los_Angeles' + }); + + await dbRunner.testQuery(query.buildSqlAndParams()); + throw new Error('Should have thrown an error'); + } catch (err: any) { + expect(err.message).toContain('Can not construct joins for the query, potential loop detected'); + } + }); }); }); From e158147b35b9abd7d9fb0f17e24b9675b0b5b7fa Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Fri, 13 Jun 2025 18:03:23 +0300 Subject: [PATCH 11/13] add safeguard --- packages/cubejs-schema-compiler/src/adapter/BaseQuery.js | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js index 9947d0504b936..d8adef1ef8d21 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js +++ b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js @@ -474,7 +474,10 @@ export class BaseQuery { return true; }; - while (newJoin?.joins.length > 0 && !isJoinTreesEqual(prevJoins, newJoin)) { + // Safeguard against infinite loop in case of cyclic joins somehow managed to slip through + let cnt = 0; + + while (newJoin?.joins.length > 0 && !isJoinTreesEqual(prevJoins, newJoin) && cnt < 10000) { prevJoins = newJoin; joinMembersJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromJoin(newJoin)); if (!isOrderPreserved(prevJoinMembersJoinHints, joinMembersJoinHints)) { @@ -482,6 +485,7 @@ export class BaseQuery { } newJoin = this.joinGraph.buildJoin(constructJH()); prevJoinMembersJoinHints = joinMembersJoinHints; + cnt++; } this.collectedJoinHints = R.uniq(constructJH()); From d0e8bc103abcb29a77e3a35e497a0511cc2a8f61 Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Tue, 17 Jun 2025 16:06:13 +0300 Subject: [PATCH 12/13] some code polish + throw error for loop detected --- .../cubejs-schema-compiler/src/adapter/BaseQuery.js | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js index d8adef1ef8d21..13b308eea0790 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js +++ b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js @@ -488,6 +488,10 @@ export class BaseQuery { cnt++; } + if (cnt >= 10000) { + throw new UserError('Can not construct joins for the query, potential loop detected'); + } + this.collectedJoinHints = R.uniq(constructJH()); } return this.collectedJoinHints; @@ -2489,13 +2493,10 @@ export class BaseQuery { // In a join between Cube A and Cube B, sql() may reference members from other cubes. // These referenced cubes must be added as join hints before Cube B to ensure correct SQL generation. const targetCube = s.targetCubeName(); - const { joinHints } = this.safeEvaluateSymbolContext(); - let targetIdx = joinHints.findIndex(e => e === targetCube); - while (targetIdx > -1) { - joinHints.splice(targetIdx, 1); - targetIdx = joinHints.findIndex(e => e === targetCube); - } + let { joinHints } = this.safeEvaluateSymbolContext(); + joinHints = joinHints.filter(e => e !== targetCube); joinHints.push(targetCube); + this.safeEvaluateSymbolContext().joinHints = joinHints; // Special processing is required when one cube extends another, because in this case // cube names collected during joins evaluation might belong to ancestors which are out of scope of From 42f36f46dc46c7b4cb71338c9d4866d9d6ded762 Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Fri, 20 Jun 2025 23:02:31 +0300 Subject: [PATCH 13/13] fix tests and remove unneeded magic --- .../src/adapter/BaseQuery.js | 20 ------------------- .../postgres/pre-aggregations.test.ts | 2 +- .../postgresql/schema/PreAggregationTest.js | 2 +- 3 files changed, 2 insertions(+), 22 deletions(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js index 13b308eea0790..cc9edde5d38ab 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js +++ b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js @@ -2497,26 +2497,6 @@ export class BaseQuery { joinHints = joinHints.filter(e => e !== targetCube); joinHints.push(targetCube); this.safeEvaluateSymbolContext().joinHints = joinHints; - - // Special processing is required when one cube extends another, because in this case - // cube names collected during joins evaluation might belong to ancestors which are out of scope of - // the current query and thus will lead to `Can't find join path to join cubes` error. - // To work around this we change the all ancestors cube names in collected join hints to the original one. - if (s.cube().extends) { - const cubeName = s.cube().name; - let parentCube = this.cubeEvaluator.resolveSymbolsCall(s.cube().extends, (name) => this.cubeEvaluator.cubeFromPath(name)); - while (parentCube) { - // eslint-disable-next-line no-loop-func - joinHints.forEach((item, index, array) => { - if (item === parentCube.name) { - array[index] = cubeName; - } - }); - parentCube = parentCube.extends ? - this.cubeEvaluator.resolveSymbolsCall(parentCube.extends, (name) => this.cubeEvaluator.cubeFromPath(name)) - : null; - } - } } return res; } diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/pre-aggregations.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/pre-aggregations.test.ts index a5ab7c78f2022..544025ac15dd5 100644 --- a/packages/cubejs-schema-compiler/test/integration/postgres/pre-aggregations.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/postgres/pre-aggregations.test.ts @@ -26,7 +26,7 @@ describe('PreAggregations', () => { cards: { relationship: 'hasMany', - sql: \`\${visitors.id} = \${cards.visitorId}\` + sql: \`\${CUBE.id} = \${cards.visitorId}\` } }, diff --git a/packages/cubejs-testing/birdbox-fixtures/postgresql/schema/PreAggregationTest.js b/packages/cubejs-testing/birdbox-fixtures/postgresql/schema/PreAggregationTest.js index 6d37626b7250f..faad2a5d0f655 100644 --- a/packages/cubejs-testing/birdbox-fixtures/postgresql/schema/PreAggregationTest.js +++ b/packages/cubejs-testing/birdbox-fixtures/postgresql/schema/PreAggregationTest.js @@ -12,7 +12,7 @@ cube(`visitors`, { cards: { relationship: 'hasMany', - sql: `${visitors.id} = ${cards.visitorId}` + sql: `${CUBE.id} = ${cards.visitorId}` } },