Skip to content

Commit f556b62

Browse files
authored
feat(schema-compiler): Use views join maps and hints for query joins construction (#10039)
* add tests * enable view join tests for tesseract * fix test * temp comment out tests * correct additional hints predecessors comparator OMG! Is it really working?! everything works besides loop detection correct additional hints: everything works besides loop detection * trying to adopt loop test fix actually incorrect tests * linter fix * small fix in error handling * add test case for join maps test * implement join maps * update snapshot * refactor tests * add more tests * return back loop for join resolution * fix typo * join map in tesseract * enable view join tests for tesseract * cargo fmt * uncomment test * remove println! * remove unneeded rootOfJoin * refactor tests * fix linter warning * refactor allJoinHints() * refactor: extract inlined isJoinTreesEqual() * remove unused * add support for transitive joins in tesseract * uncomment transitive joins tests for tesseract * cargo fmt
1 parent 324ecfa commit f556b62

File tree

18 files changed

+772
-236
lines changed

18 files changed

+772
-236
lines changed

packages/cubejs-schema-compiler/src/adapter/BaseQuery.js

Lines changed: 169 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ import {
2222
localTimestampToUtc,
2323
timeSeries as timeSeriesBase,
2424
timeSeriesFromCustomInterval,
25-
parseSqlInterval,
2625
findMinGranularityDimension
2726
} from '@cubejs-backend/shared';
2827

@@ -387,12 +386,59 @@ export class BaseQuery {
387386
}
388387
}
389388

389+
/**
390+
* Is used by native
391+
* This function follows the same logic as in this.collectJoinHints()
392+
* @private
393+
* @param {Array<(Array<string> | string)>} hints
394+
* @return {import('../compiler/JoinGraph').FinishedJoinTree}
395+
*/
396+
joinTreeForHints(hints) {
397+
const explicitJoinHintMembers = new Set(hints.filter(j => Array.isArray(j)).flat());
398+
const queryJoinMaps = this.queryJoinMap();
399+
const newCollectedHints = [];
400+
401+
const constructJH = () => R.uniq(this.enrichHintsWithJoinMap([
402+
...newCollectedHints,
403+
...hints,
404+
],
405+
queryJoinMaps));
406+
407+
let prevJoin = null;
408+
let newJoin = null;
409+
410+
// Safeguard against infinite loop in case of cyclic joins somehow managed to slip through
411+
let cnt = 0;
412+
let newJoinHintsCollectedCnt;
413+
414+
do {
415+
const allJoinHints = constructJH();
416+
prevJoin = newJoin;
417+
newJoin = this.joinGraph.buildJoin(allJoinHints);
418+
const allJoinHintsFlatten = new Set(allJoinHints.flat());
419+
const joinMembersJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromJoin(newJoin));
420+
421+
const iterationCollectedHints = joinMembersJoinHints.filter(j => !allJoinHintsFlatten.has(j));
422+
newJoinHintsCollectedCnt = iterationCollectedHints.length;
423+
cnt++;
424+
if (newJoin) {
425+
newCollectedHints.push(...joinMembersJoinHints.filter(j => !explicitJoinHintMembers.has(j)));
426+
}
427+
} while (newJoin?.joins.length > 0 && !this.isJoinTreesEqual(prevJoin, newJoin) && cnt < 10000 && newJoinHintsCollectedCnt > 0);
428+
429+
if (cnt >= 10000) {
430+
throw new UserError('Can not construct joins for the query, potential loop detected');
431+
}
432+
433+
return newJoin;
434+
}
435+
390436
cacheValue(key, fn, { contextPropNames, inputProps, cache } = {}) {
391437
const currentContext = this.safeEvaluateSymbolContext();
392438
if (contextPropNames) {
393439
const contextKey = {};
394-
for (let i = 0; i < contextPropNames.length; i++) {
395-
contextKey[contextPropNames[i]] = currentContext[contextPropNames[i]];
440+
for (const element of contextPropNames) {
441+
contextKey[element] = currentContext[element];
396442
}
397443
key = key.concat([JSON.stringify(contextKey)]);
398444
}
@@ -436,83 +482,54 @@ export class BaseQuery {
436482
*/
437483
get allJoinHints() {
438484
if (!this.collectedJoinHints) {
439-
const [rootOfJoin, ...allMembersJoinHints] = this.collectJoinHintsFromMembers(this.allMembersConcat(false));
440-
const customSubQueryJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromCustomSubQuery());
441-
let joinMembersJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromJoin(this.join));
442-
443-
// One cube may join the other cube via transitive joined cubes,
444-
// members from which are referenced in the join `on` clauses.
445-
// We need to collect such join hints and push them upfront of the joining one
446-
// but only if they don't exist yet. Cause in other case we might affect what
447-
// join path will be constructed in join graph.
448-
// It is important to use queryLevelJoinHints during the calculation if it is set.
449-
450-
const constructJH = () => {
451-
const filteredJoinMembersJoinHints = joinMembersJoinHints.filter(m => !allMembersJoinHints.includes(m));
452-
return [
453-
...this.queryLevelJoinHints,
454-
...(rootOfJoin ? [rootOfJoin] : []),
455-
...filteredJoinMembersJoinHints,
456-
...allMembersJoinHints,
457-
...customSubQueryJoinHints,
458-
];
459-
};
460-
461-
let prevJoins = this.join;
462-
let prevJoinMembersJoinHints = joinMembersJoinHints;
463-
let newJoin = this.joinGraph.buildJoin(constructJH());
464-
465-
const isOrderPreserved = (base, updated) => {
466-
const common = base.filter(value => updated.includes(value));
467-
const bFiltered = updated.filter(value => common.includes(value));
468-
469-
return common.every((x, i) => x === bFiltered[i]);
470-
};
471-
472-
const isJoinTreesEqual = (a, b) => {
473-
if (!a || !b || a.root !== b.root || a.joins.length !== b.joins.length) {
474-
return false;
475-
}
476-
477-
// We don't care about the order of joins on the same level, so
478-
// we can compare them as sets.
479-
const aJoinsSet = new Set(a.joins.map(j => `${j.originalFrom}->${j.originalTo}`));
480-
const bJoinsSet = new Set(b.joins.map(j => `${j.originalFrom}->${j.originalTo}`));
481-
482-
if (aJoinsSet.size !== bJoinsSet.size) {
483-
return false;
484-
}
485-
486-
for (const val of aJoinsSet) {
487-
if (!bJoinsSet.has(val)) {
488-
return false;
489-
}
490-
}
485+
this.collectedJoinHints = this.collectJoinHints();
486+
}
487+
return this.collectedJoinHints;
488+
}
491489

492-
return true;
493-
};
490+
/**
491+
* @private
492+
* @return { Record<string, string[][]>}
493+
*/
494+
queryJoinMap() {
495+
const queryMembers = this.allMembersConcat(false);
496+
const joinMaps = {};
497+
498+
for (const member of queryMembers) {
499+
const memberCube = member.cube?.();
500+
if (memberCube?.isView && !joinMaps[memberCube.name] && memberCube.joinMap) {
501+
joinMaps[memberCube.name] = memberCube.joinMap;
502+
}
503+
}
494504

495-
// Safeguard against infinite loop in case of cyclic joins somehow managed to slip through
496-
let cnt = 0;
505+
return joinMaps;
506+
}
497507

498-
while (newJoin?.joins.length > 0 && !isJoinTreesEqual(prevJoins, newJoin) && cnt < 10000) {
499-
prevJoins = newJoin;
500-
joinMembersJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromJoin(newJoin));
501-
if (!isOrderPreserved(prevJoinMembersJoinHints, joinMembersJoinHints)) {
502-
throw new UserError(`Can not construct joins for the query, potential loop detected: ${prevJoinMembersJoinHints.join('->')} vs ${joinMembersJoinHints.join('->')}`);
503-
}
504-
newJoin = this.joinGraph.buildJoin(constructJH());
505-
prevJoinMembersJoinHints = joinMembersJoinHints;
506-
cnt++;
508+
/**
509+
* @private
510+
* @param { (string|string[])[] } hints
511+
* @param { Record<string, string[][]>} joinMap
512+
* @return {(string|string[])[]}
513+
*/
514+
enrichHintsWithJoinMap(hints, joinMap) {
515+
// Potentially, if joins between views would take place, we need to distinguish
516+
// join maps on per view basis.
517+
const allPaths = Object.values(joinMap).flat();
518+
519+
return hints.map(hint => {
520+
if (Array.isArray(hint)) {
521+
return hint;
507522
}
508523

509-
if (cnt >= 10000) {
510-
throw new UserError('Can not construct joins for the query, potential loop detected');
524+
for (const path of allPaths) {
525+
const hintIndex = path.indexOf(hint);
526+
if (hintIndex !== -1) {
527+
return path.slice(0, hintIndex + 1);
528+
}
511529
}
512530

513-
this.collectedJoinHints = R.uniq(constructJH());
514-
}
515-
return this.collectedJoinHints;
531+
return hint;
532+
});
516533
}
517534

518535
get dataSource() {
@@ -2613,18 +2630,89 @@ export class BaseQuery {
26132630
}
26142631

26152632
/**
2616-
*
2633+
* Just a helper to avoid copy/paste
2634+
* @private
2635+
* @param {import('../compiler/JoinGraph').FinishedJoinTree} a
2636+
* @param {import('../compiler/JoinGraph').FinishedJoinTree} b
2637+
* @return {boolean}
2638+
*/
2639+
isJoinTreesEqual(a, b) {
2640+
if (!a || !b || a.root !== b.root || a.joins.length !== b.joins.length) {
2641+
return false;
2642+
}
2643+
2644+
// We don't care about the order of joins on the same level, so
2645+
// we can compare them as sets.
2646+
const aJoinsSet = new Set(a.joins.map(j => `${j.originalFrom}->${j.originalTo}`));
2647+
const bJoinsSet = new Set(b.joins.map(j => `${j.originalFrom}->${j.originalTo}`));
2648+
2649+
if (aJoinsSet.size !== bJoinsSet.size) {
2650+
return false;
2651+
}
2652+
2653+
for (const val of aJoinsSet) {
2654+
if (!bJoinsSet.has(val)) {
2655+
return false;
2656+
}
2657+
}
2658+
2659+
return true;
2660+
}
2661+
2662+
/**
2663+
* @private
26172664
* @param {boolean} [excludeTimeDimensions=false]
2618-
* @returns {Array<Array<string>>}
2665+
* @returns {Array<(Array<string> | string)>}
26192666
*/
26202667
collectJoinHints(excludeTimeDimensions = false) {
2621-
const membersToCollectFrom = [
2622-
...this.allMembersConcat(excludeTimeDimensions),
2623-
...this.joinMembersFromJoin(this.join),
2624-
...this.joinMembersFromCustomSubQuery(),
2625-
];
2668+
const allMembersJoinHints = this.collectJoinHintsFromMembers(this.allMembersConcat(excludeTimeDimensions));
2669+
const explicitJoinHintMembers = new Set(allMembersJoinHints.filter(j => Array.isArray(j)).flat());
2670+
const queryJoinMaps = this.queryJoinMap();
2671+
const customSubQueryJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromCustomSubQuery());
2672+
const newCollectedHints = [];
2673+
2674+
// One cube may join the other cube via transitive joined cubes,
2675+
// members from which are referenced in the join `on` clauses.
2676+
// We need to collect such join hints and push them upfront of the joining one
2677+
// but only if they don't exist yet. Cause in other case we might affect what
2678+
// join path will be constructed in join graph.
2679+
// It is important to use queryLevelJoinHints during the calculation if it is set.
2680+
2681+
const constructJH = () => R.uniq(this.enrichHintsWithJoinMap([
2682+
...this.queryLevelJoinHints,
2683+
...newCollectedHints,
2684+
...allMembersJoinHints,
2685+
...customSubQueryJoinHints,
2686+
],
2687+
queryJoinMaps));
2688+
2689+
let prevJoin = null;
2690+
let newJoin = null;
2691+
2692+
// Safeguard against infinite loop in case of cyclic joins somehow managed to slip through
2693+
let cnt = 0;
2694+
let newJoinHintsCollectedCnt;
2695+
2696+
do {
2697+
const allJoinHints = constructJH();
2698+
prevJoin = newJoin;
2699+
newJoin = this.joinGraph.buildJoin(allJoinHints);
2700+
const allJoinHintsFlatten = new Set(allJoinHints.flat());
2701+
const joinMembersJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromJoin(newJoin));
2702+
2703+
const iterationCollectedHints = joinMembersJoinHints.filter(j => !allJoinHintsFlatten.has(j));
2704+
newJoinHintsCollectedCnt = iterationCollectedHints.length;
2705+
cnt++;
2706+
if (newJoin) {
2707+
newCollectedHints.push(...joinMembersJoinHints.filter(j => !explicitJoinHintMembers.has(j)));
2708+
}
2709+
} while (newJoin?.joins.length > 0 && !this.isJoinTreesEqual(prevJoin, newJoin) && cnt < 10000 && newJoinHintsCollectedCnt > 0);
2710+
2711+
if (cnt >= 10000) {
2712+
throw new UserError('Can not construct joins for the query, potential loop detected');
2713+
}
26262714

2627-
return this.collectJoinHintsFromMembers(membersToCollectFrom);
2715+
return constructJH();
26282716
}
26292717

26302718
joinMembersFromCustomSubQuery() {

packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,12 @@ export type AccessPolicyDefinition = {
133133
};
134134
};
135135

136+
export type ViewIncludedMember = {
137+
type: string;
138+
memberPath: string;
139+
name: string;
140+
};
141+
136142
export interface CubeDefinition {
137143
name: string;
138144
extends?: (...args: Array<unknown>) => { __cubeName: string };
@@ -159,7 +165,8 @@ export interface CubeDefinition {
159165
isView?: boolean;
160166
calendar?: boolean;
161167
isSplitView?: boolean;
162-
includedMembers?: any[];
168+
includedMembers?: ViewIncludedMember[];
169+
joinMap?: string[][];
163170
fileName?: string;
164171
}
165172

@@ -562,6 +569,8 @@ export class CubeSymbols implements TranspilerSymbolResolver {
562569
// `hierarchies` must be processed first
563570
const types = ['hierarchies', 'measures', 'dimensions', 'segments'];
564571

572+
const joinMap: string[][] = [];
573+
565574
for (const type of types) {
566575
let cubeIncludes: any[] = [];
567576

@@ -573,6 +582,11 @@ export class CubeSymbols implements TranspilerSymbolResolver {
573582
const split = fullPath.split('.');
574583
const cubeRef = split[split.length - 1];
575584

585+
// No need to keep a simple direct cube joins in join map
586+
if (split.length > 1) {
587+
joinMap.push(split);
588+
}
589+
576590
if (it.includes === '*') {
577591
return it;
578592
}
@@ -614,11 +628,7 @@ export class CubeSymbols implements TranspilerSymbolResolver {
614628
existing.map(({ type: t, memberPath, name }) => `${t}|${memberPath}|${name}`)
615629
);
616630

617-
const additions: {
618-
type: string;
619-
memberPath: string;
620-
name: string;
621-
}[] = [];
631+
const additions: ViewIncludedMember[] = [];
622632

623633
for (const { member, name } of cubeIncludes) {
624634
const parts = member.split('.');
@@ -636,6 +646,8 @@ export class CubeSymbols implements TranspilerSymbolResolver {
636646
}
637647
}
638648

649+
cube.joinMap = joinMap;
650+
639651
[...memberSets.allMembers].filter(it => !memberSets.resolvedMembers.has(it)).forEach(it => {
640652
errorReporter.error(`Member '${it}' is included in '${cube.name}' but not defined in any cube`);
641653
});

packages/cubejs-schema-compiler/src/compiler/DataSchemaCompiler.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -736,7 +736,7 @@ export class DataSchemaCompiler {
736736
if (e.toString().indexOf('SyntaxError') !== -1) {
737737
const err = e as SyntaxErrorInterface;
738738
const line = file.content.split('\n')[(err.loc?.start?.line || 1) - 1];
739-
const spaces = Array(err.loc?.start.column).fill(' ').join('');
739+
const spaces = Array(err.loc?.start?.column).fill(' ').join('') || '';
740740
errorsReport.error(`Syntax error during parsing: ${err.message}:\n${line}\n${spaces}^`, file.fileName);
741741
} else {
742742
errorsReport.error(e);

0 commit comments

Comments
 (0)