diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js index dadc6a3a6392d..cc9edde5d38ab 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js +++ b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js @@ -418,10 +418,81 @@ export class BaseQuery { */ get allJoinHints() { if (!this.collectedJoinHints) { - this.collectedJoinHints = [ - ...this.queryLevelJoinHints, - ...this.collectJoinHints(), - ]; + const [rootOfJoin, ...allMembersJoinHints] = this.collectJoinHintsFromMembers(this.allMembersConcat(false)); + const customSubQueryJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromCustomSubQuery()); + let joinMembersJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromJoin(this.join)); + + // One cube may join the other cube via transitive joined cubes, + // members from which are referenced in the join `on` clauses. + // We need to collect such join hints and push them upfront of the joining one + // but only if they don't exist yet. Cause in other case we might affect what + // join path will be constructed in join graph. + // It is important to use queryLevelJoinHints during the calculation if it is set. + + const constructJH = () => { + const filteredJoinMembersJoinHints = joinMembersJoinHints.filter(m => !allMembersJoinHints.includes(m)); + return [ + ...this.queryLevelJoinHints, + ...(rootOfJoin ? [rootOfJoin] : []), + ...filteredJoinMembersJoinHints, + ...allMembersJoinHints, + ...customSubQueryJoinHints, + ]; + }; + + let prevJoins = this.join; + let prevJoinMembersJoinHints = joinMembersJoinHints; + let newJoin = this.joinGraph.buildJoin(constructJH()); + + const isOrderPreserved = (base, updated) => { + const common = base.filter(value => updated.includes(value)); + const bFiltered = updated.filter(value => common.includes(value)); + + return common.every((x, i) => x === bFiltered[i]); + }; + + const isJoinTreesEqual = (a, b) => { + if (!a || !b || a.root !== b.root || a.joins.length !== b.joins.length) { + return false; + } + + // We don't care about the order of joins on the same level, so + // we can compare them as sets. + const aJoinsSet = new Set(a.joins.map(j => `${j.originalFrom}->${j.originalTo}`)); + const bJoinsSet = new Set(b.joins.map(j => `${j.originalFrom}->${j.originalTo}`)); + + if (aJoinsSet.size !== bJoinsSet.size) { + return false; + } + + for (const val of aJoinsSet) { + if (!bJoinsSet.has(val)) { + return false; + } + } + + return true; + }; + + // Safeguard against infinite loop in case of cyclic joins somehow managed to slip through + let cnt = 0; + + while (newJoin?.joins.length > 0 && !isJoinTreesEqual(prevJoins, newJoin) && cnt < 10000) { + prevJoins = newJoin; + joinMembersJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromJoin(newJoin)); + if (!isOrderPreserved(prevJoinMembersJoinHints, joinMembersJoinHints)) { + throw new UserError(`Can not construct joins for the query, potential loop detected: ${prevJoinMembersJoinHints.join('->')} vs ${joinMembersJoinHints.join('->')}`); + } + newJoin = this.joinGraph.buildJoin(constructJH()); + prevJoinMembersJoinHints = joinMembersJoinHints; + cnt++; + } + + if (cnt >= 10000) { + throw new UserError('Can not construct joins for the query, potential loop detected'); + } + + this.collectedJoinHints = R.uniq(constructJH()); } return this.collectedJoinHints; } @@ -2417,7 +2488,17 @@ export class BaseQuery { } else if (s.patchedMeasure?.patchedFrom) { return [s.patchedMeasure.patchedFrom.cubeName].concat(this.evaluateSymbolSql(s.patchedMeasure.patchedFrom.cubeName, s.patchedMeasure.patchedFrom.name, s.definition())); } else { - return this.evaluateSql(s.cube().name, s.definition().sql); + const res = this.evaluateSql(s.cube().name, s.definition().sql); + if (s.isJoinCondition) { + // In a join between Cube A and Cube B, sql() may reference members from other cubes. + // These referenced cubes must be added as join hints before Cube B to ensure correct SQL generation. + const targetCube = s.targetCubeName(); + let { joinHints } = this.safeEvaluateSymbolContext(); + joinHints = joinHints.filter(e => e !== targetCube); + joinHints.push(targetCube); + this.safeEvaluateSymbolContext().joinHints = joinHints; + } + return res; } } @@ -2439,7 +2520,17 @@ export class BaseQuery { * @returns {Array>} */ collectJoinHints(excludeTimeDimensions = false) { - const customSubQueryJoinMembers = this.customSubQueryJoins.map(j => { + const membersToCollectFrom = [ + ...this.allMembersConcat(excludeTimeDimensions), + ...this.joinMembersFromJoin(this.join), + ...this.joinMembersFromCustomSubQuery(), + ]; + + return this.collectJoinHintsFromMembers(membersToCollectFrom); + } + + joinMembersFromCustomSubQuery() { + return this.customSubQueryJoins.map(j => { const res = { path: () => null, cube: () => this.cubeEvaluator.cubeFromPath(j.on.cubeName), @@ -2453,22 +2544,18 @@ export class BaseQuery { getMembers: () => [res], }; }); + } - const joinMembers = this.join ? this.join.joins.map(j => ({ + joinMembersFromJoin(join) { + return join ? join.joins.map(j => ({ getMembers: () => [{ path: () => null, cube: () => this.cubeEvaluator.cubeFromPath(j.originalFrom), definition: () => j.join, + isJoinCondition: true, + targetCubeName: () => j.originalTo, }] })) : []; - - const membersToCollectFrom = [ - ...this.allMembersConcat(excludeTimeDimensions), - ...joinMembers, - ...customSubQueryJoinMembers, - ]; - - return this.collectJoinHintsFromMembers(membersToCollectFrom); } collectJoinHintsFromMembers(members) { @@ -2786,7 +2873,7 @@ export class BaseQuery { pushJoinHints(joinHints) { if (this.safeEvaluateSymbolContext().joinHints && joinHints) { - if (joinHints.length === 1) { + if (Array.isArray(joinHints) && joinHints.length === 1) { [joinHints] = joinHints; } this.safeEvaluateSymbolContext().joinHints.push(joinHints); diff --git a/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts b/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts index ba7e0ec33e049..a6449ebb2b227 100644 --- a/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts +++ b/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts @@ -773,6 +773,9 @@ export class CubeSymbols { protected joinHints() { const { joinHints } = this.resolveSymbolsCallContext || {}; + if (Array.isArray(joinHints)) { + return R.uniq(joinHints); + } return joinHints; } @@ -879,7 +882,7 @@ export class CubeSymbols { } else if (this.symbols[cubeName]?.[name]) { cube = this.cubeReferenceProxy( cubeName, - undefined, + collectJoinHints ? [] : undefined, name ); } diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/cube-views.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/cube-views.test.ts index eb10187737d1d..049e938ba2243 100644 --- a/packages/cubejs-schema-compiler/test/integration/postgres/cube-views.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/postgres/cube-views.test.ts @@ -6,6 +6,7 @@ import { dbRunner } from './PostgresDBRunner'; describe('Cube Views', () => { jest.setTimeout(200000); + // language=JavaScript const { compiler, joinGraph, cubeEvaluator, metaTransformer } = prepareJsCompiler(` cube(\`Orders\`, { sql: \` diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/pre-aggregations.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/pre-aggregations.test.ts index a5ab7c78f2022..544025ac15dd5 100644 --- a/packages/cubejs-schema-compiler/test/integration/postgres/pre-aggregations.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/postgres/pre-aggregations.test.ts @@ -26,7 +26,7 @@ describe('PreAggregations', () => { cards: { relationship: 'hasMany', - sql: \`\${visitors.id} = \${cards.visitorId}\` + sql: \`\${CUBE.id} = \${cards.visitorId}\` } }, diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/sql-generation.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/sql-generation.test.ts index 264e1f851e8b1..b284269b813e4 100644 --- a/packages/cubejs-schema-compiler/test/integration/postgres/sql-generation.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/postgres/sql-generation.test.ts @@ -3,7 +3,7 @@ import { UserError } from '../../../src/compiler/UserError'; import { PostgresQuery } from '../../../src/adapter/PostgresQuery'; import { BigqueryQuery } from '../../../src/adapter/BigqueryQuery'; import { PrestodbQuery } from '../../../src/adapter/PrestodbQuery'; -import { prepareJsCompiler } from '../../unit/PrepareCompiler'; +import { prepareJsCompiler, prepareYamlCompiler } from '../../unit/PrepareCompiler'; import { dbRunner } from './PostgresDBRunner'; import { createJoinedCubesSchema } from '../../unit/utils'; import { testWithPreAggregation } from './pre-aggregation-utils'; @@ -11,6 +11,7 @@ import { testWithPreAggregation } from './pre-aggregation-utils'; describe('SQL Generation', () => { jest.setTimeout(200000); + // language=JavaScript const { compiler, joinGraph, cubeEvaluator } = prepareJsCompiler(` const perVisitorRevenueMeasure = { type: 'number', @@ -4087,4 +4088,268 @@ SELECT 1 AS revenue, cast('2024-01-01' AS timestamp) as time UNION ALL }] ); }); + + describe('Transitive join paths', () => { + // eslint-disable-next-line @typescript-eslint/no-shadow + const { compiler, joinGraph, cubeEvaluator } = + // language=yaml + prepareYamlCompiler(` +cubes: + - name: merchant_dims + sql: | + ( + SELECT 101 AS merchant_sk, 'M1' AS merchant_id + UNION ALL + SELECT 102 AS merchant_sk, 'M2' AS merchant_id + ) + dimensions: + - name: merchant_sk + sql: merchant_sk + type: number + primary_key: true + - name: merchant_id + sql: merchant_id + type: string + + - name: product_dims + sql: | + ( + SELECT 201 AS product_sk, 'P1' AS product_id + UNION ALL + SELECT 202 AS product_sk, 'P2' AS product_id + ) + dimensions: + - name: product_sk + sql: product_sk + type: number + primary_key: true + - name: product_id + sql: product_id + type: string + + - name: merchant_and_product_dims + sql: | + ( + SELECT 'M1' AS merchant_id, 'P1' AS product_id, 'Organic' AS acquisition_channel + UNION ALL + SELECT 'M1' AS merchant_id, 'P2' AS product_id, 'Paid' AS acquisition_channel + UNION ALL + SELECT 'M2' AS merchant_id, 'P1' AS product_id, 'Referral' AS acquisition_channel + ) + dimensions: + - name: product_id + sql: product_id + type: string + primary_key: true + - name: merchant_id + sql: merchant_id + type: string + primary_key: true + - name: acquisition_channel + sql: acquisition_channel + type: string + + - name: test_facts + sql: | + ( + SELECT DATE '2023-01-01' AS reporting_date, 101 AS merchant_sk, 201 AS product_sk, 100 AS amount + UNION ALL + SELECT DATE '2023-01-01' AS reporting_date, 101 AS merchant_sk, 202 AS product_sk, 150 AS amount + UNION ALL + SELECT DATE '2023-01-02' AS reporting_date, 102 AS merchant_sk, 201 AS product_sk, 200 AS amount + ) + joins: + - name: merchant_dims + relationship: many_to_one + sql: "{CUBE}.merchant_sk = {merchant_dims.merchant_sk}" + - name: product_dims + relationship: many_to_one + sql: "{CUBE}.product_sk = {product_dims.product_sk}" + - name: merchant_and_product_dims # This join depends on merchant_dims and product_dims + relationship: many_to_one + sql: "{merchant_dims.merchant_id} = {merchant_and_product_dims.merchant_id} AND {product_dims.product_id} = {merchant_and_product_dims.product_id}" + dimensions: + - name: reporting_date + sql: reporting_date + type: time + primary_key: true + - name: merchant_sk + sql: merchant_sk + type: number + primary_key: true + - name: product_sk + sql: product_sk + type: number + primary_key: true + - name: acquisition_channel # This dimension triggers the join to merchant_and_product_dims + sql: "{merchant_and_product_dims.acquisition_channel}" + type: string + measures: + - name: amount_sum + sql: amount + type: sum + +# Join loop for testing transitive joins + - name: alpha_facts + sql: | + ( + SELECT DATE '2023-01-01' AS reporting_date, 1 AS a_id, 10 AS b_id, 100 AS amount + UNION ALL + SELECT DATE '2023-01-02' AS reporting_date, 2 AS a_id, 20 AS b_id, 150 AS amount + ) + joins: + - name: beta_dims + relationship: many_to_one + sql: "{CUBE}.a_id = {beta_dims.a_id}" + - name: gamma_dims + relationship: many_to_one + sql: "{CUBE}.b_id = {gamma_dims.b_id}" + - name: delta_bridge + relationship: many_to_one + sql: "{beta_dims.a_name} = {delta_bridge.a_name} AND {gamma_dims.b_name} = {delta_bridge.b_name}" + dimensions: + - name: reporting_date + sql: reporting_date + type: time + primary_key: true + - name: a_id + sql: a_id + type: number + primary_key: true + - name: b_id + sql: b_id + type: number + primary_key: true + - name: channel + sql: "{delta_bridge.channel}" + type: string + measures: + - name: amount_sum + sql: amount + type: sum + + - name: beta_dims + sql: | + ( + SELECT 1 AS a_id, 'Alpha1' AS a_name + UNION ALL + SELECT 2 AS a_id, 'Alpha2' AS a_name + ) + dimensions: + - name: a_id + sql: a_id + type: number + primary_key: true + - name: a_name + sql: a_name + type: string + + - name: gamma_dims + sql: | + ( + SELECT 10 AS b_id, 'Beta1' AS b_name + UNION ALL + SELECT 20 AS b_id, 'Beta2' AS b_name + ) + dimensions: + - name: b_id + sql: b_id + type: number + primary_key: true + - name: b_name + sql: b_name + type: string + + - name: delta_bridge + sql: | + ( + SELECT 'Alpha1' AS a_name, 'Beta1' AS b_name, 'Organic' AS channel + UNION ALL + SELECT 'Alpha1' AS a_name, 'Beta2' AS b_name, 'Paid' AS channel + UNION ALL + SELECT 'Alpha2' AS a_name, 'Beta1' AS b_name, 'Referral' AS channel + ) + joins: + - name: gamma_dims + relationship: many_to_one + sql: "{CUBE}.b_name = {gamma_dims.b_name}" + dimensions: + - name: a_name + sql: a_name + type: string + primary_key: true + - name: b_name + sql: "{gamma_dims.b_name}" + type: string + primary_key: true + - name: channel + sql: channel + type: string + `); + + it('querying cube dimension that require transitive joins', async () => { + await compiler.compile(); + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: [], + dimensions: [ + 'test_facts.reporting_date', + 'test_facts.merchant_sk', + 'test_facts.product_sk', + 'test_facts.acquisition_channel' + ], + order: [{ + id: 'test_facts.acquisition_channel' + }], + timezone: 'America/Los_Angeles' + }); + + const res = await dbRunner.testQuery(query.buildSqlAndParams()); + console.log(JSON.stringify(res)); + + expect(res).toEqual([ + { + test_facts__acquisition_channel: 'Organic', + test_facts__merchant_sk: 101, + test_facts__product_sk: 201, + test_facts__reporting_date: '2023-01-01T00:00:00.000Z', + }, + { + test_facts__acquisition_channel: 'Paid', + test_facts__merchant_sk: 101, + test_facts__product_sk: 202, + test_facts__reporting_date: '2023-01-01T00:00:00.000Z', + }, + { + test_facts__acquisition_channel: 'Referral', + test_facts__merchant_sk: 102, + test_facts__product_sk: 201, + test_facts__reporting_date: '2023-01-02T00:00:00.000Z', + }, + ]); + }); + + it('querying cube with transitive joins with loop', async () => { + await compiler.compile(); + + try { + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: [], + dimensions: [ + 'alpha_facts.reporting_date', + 'delta_bridge.b_name', + 'alpha_facts.channel' + ], + order: [{ + id: 'alpha_facts.reporting_date' + }], + timezone: 'America/Los_Angeles' + }); + + await dbRunner.testQuery(query.buildSqlAndParams()); + throw new Error('Should have thrown an error'); + } catch (err: any) { + expect(err.message).toContain('Can not construct joins for the query, potential loop detected'); + } + }); + }); }); diff --git a/packages/cubejs-testing/birdbox-fixtures/postgresql/schema/PreAggregationTest.js b/packages/cubejs-testing/birdbox-fixtures/postgresql/schema/PreAggregationTest.js index 6d37626b7250f..faad2a5d0f655 100644 --- a/packages/cubejs-testing/birdbox-fixtures/postgresql/schema/PreAggregationTest.js +++ b/packages/cubejs-testing/birdbox-fixtures/postgresql/schema/PreAggregationTest.js @@ -12,7 +12,7 @@ cube(`visitors`, { cards: { relationship: 'hasMany', - sql: `${visitors.id} = ${cards.visitorId}` + sql: `${CUBE.id} = ${cards.visitorId}` } },