Skip to content

Commit b62bf96

Browse files
committed
impl trie pattern matching instead of regex
1 parent b0cf3c4 commit b62bf96

File tree

4 files changed

+138
-71
lines changed

4 files changed

+138
-71
lines changed

packages/cubejs-schema-compiler/src/compiler/YamlCompiler.ts

Lines changed: 28 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,11 @@ import { JinjaEngine, NativeInstance, PythonCtx } from '@cubejs-backend/native';
88
import type { FileContent } from '@cubejs-backend/shared';
99

1010
import { getEnv } from '@cubejs-backend/shared';
11-
import { CubePropContextTranspiler, transpiledFields, transpiledFieldsPatterns } from './transpilers';
11+
import {
12+
CubePropContextTranspiler,
13+
transpiledFieldMatch,
14+
transpiledFields,
15+
} from './transpilers';
1216
import { PythonParser } from '../parser/PythonParser';
1317
import { CubeSymbols } from './CubeSymbols';
1418
import { DataSchemaCompiler } from './DataSchemaCompiler';
@@ -137,35 +141,30 @@ export class YamlCompiler {
137141
}
138142

139143
private transpileYaml(obj, propertyPath, cubeName, errorsReport: ErrorReporter) {
140-
if (transpiledFields.has(propertyPath[propertyPath.length - 1])) {
141-
for (const p of transpiledFieldsPatterns) {
142-
const fullPath = propertyPath.join('.');
143-
if (fullPath.match(p)) {
144-
if (typeof obj === 'string' && ['sql', 'sqlTable'].includes(propertyPath[propertyPath.length - 1])) {
145-
return this.parsePythonIntoArrowFunction(`f"${this.escapeDoubleQuotes(obj)}"`, cubeName, obj, errorsReport);
146-
} else if (typeof obj === 'string') {
147-
return this.parsePythonIntoArrowFunction(obj, cubeName, obj, errorsReport);
148-
} else if (Array.isArray(obj)) {
149-
const resultAst = t.program([t.expressionStatement(t.arrayExpression(obj.map(code => {
150-
let ast: t.Program | t.NullLiteral | t.BooleanLiteral | t.NumericLiteral | null = null;
151-
// Special case for accessPolicy.rowLevel.filter.values and other values-like fields
152-
if (propertyPath[propertyPath.length - 1] === 'values') {
153-
if (typeof code === 'string') {
154-
ast = this.parsePythonAndTranspileToJs(`f"${this.escapeDoubleQuotes(code)}"`, errorsReport);
155-
} else if (typeof code === 'boolean') {
156-
ast = t.booleanLiteral(code);
157-
} else if (typeof code === 'number') {
158-
ast = t.numericLiteral(code);
159-
}
160-
}
161-
if (ast === null) {
162-
ast = this.parsePythonAndTranspileToJs(code, errorsReport);
163-
}
164-
return this.extractProgramBodyIfNeeded(ast);
165-
}).filter(ast => !!ast)))]);
166-
return this.astIntoArrowFunction(resultAst, '', cubeName);
144+
if (transpiledFields.has(propertyPath[propertyPath.length - 1]) && transpiledFieldMatch(propertyPath)) {
145+
if (typeof obj === 'string' && ['sql', 'sqlTable'].includes(propertyPath[propertyPath.length - 1])) {
146+
return this.parsePythonIntoArrowFunction(`f"${this.escapeDoubleQuotes(obj)}"`, cubeName, obj, errorsReport);
147+
} else if (typeof obj === 'string') {
148+
return this.parsePythonIntoArrowFunction(obj, cubeName, obj, errorsReport);
149+
} else if (Array.isArray(obj)) {
150+
const resultAst = t.program([t.expressionStatement(t.arrayExpression(obj.map(code => {
151+
let ast: t.Program | t.NullLiteral | t.BooleanLiteral | t.NumericLiteral | null = null;
152+
// Special case for accessPolicy.rowLevel.filter.values and other values-like fields
153+
if (propertyPath[propertyPath.length - 1] === 'values') {
154+
if (typeof code === 'string') {
155+
ast = this.parsePythonAndTranspileToJs(`f"${this.escapeDoubleQuotes(code)}"`, errorsReport);
156+
} else if (typeof code === 'boolean') {
157+
ast = t.booleanLiteral(code);
158+
} else if (typeof code === 'number') {
159+
ast = t.numericLiteral(code);
160+
}
167161
}
168-
}
162+
if (ast === null) {
163+
ast = this.parsePythonAndTranspileToJs(code, errorsReport);
164+
}
165+
return this.extractProgramBodyIfNeeded(ast);
166+
}).filter(ast => !!ast)))]);
167+
return this.astIntoArrowFunction(resultAst, '', cubeName);
169168
}
170169
}
171170

packages/cubejs-schema-compiler/src/compiler/transpilers/CubePropContextTranspiler.ts

Lines changed: 8 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -5,37 +5,7 @@ import type { NodePath } from '@babel/traverse';
55
import type { TranspilerInterface, TraverseObject } from './transpiler.interface';
66
import type { CubeSymbols } from '../CubeSymbols';
77
import type { CubeDictionary } from '../CubeDictionary';
8-
9-
/* this list was generated by getTransformPatterns() with additional variants for snake_case */
10-
export const transpiledFieldsPatterns: Array<RegExp> = [
11-
/\.sql$/,
12-
/sql$/,
13-
/(sqlTable|sql_table)$/,
14-
/^measures\.[_a-zA-Z][_a-zA-Z0-9]*\.(drillMemberReferences|drillMembers|drill_members)$/,
15-
/^measures\.[_a-zA-Z][_a-zA-Z0-9]*\.(orderBy|order_by)\.[0-9]+\.sql$/,
16-
/^measures\.[_a-zA-Z][_a-zA-Z0-9]*\.(timeShift|time_shift)\.[0-9]+\.(timeDimension|time_dimension)$/,
17-
/^measures\.[_a-zA-Z][_a-zA-Z0-9]*\.(reduceBy|reduce_by|groupBy|group_by|addGroupBy|add_group_by)$/,
18-
/^dimensions\.[_a-zA-Z][_a-zA-Z0-9]*\.(reduceBy|reduce_by|groupBy|group_by|addGroupBy|add_group_by)$/,
19-
/^(preAggregations|pre_aggregations)\.[_a-zA-Z][_a-zA-Z0-9]*\.indexes\.[_a-zA-Z][_a-zA-Z0-9]*\.columns$/,
20-
/^(preAggregations|pre_aggregations)\.[_a-zA-Z][_a-zA-Z0-9]*\.(timeDimensionReference|timeDimension|time_dimension|segments|dimensions|measures|rollups|segmentReferences|dimensionReferences|measureReferences|rollupReferences)$/,
21-
/^(preAggregations|pre_aggregations)\.[_a-zA-Z][_a-zA-Z0-9]*\.(timeDimensions|time_dimensions)\.\d+\.dimension$/,
22-
/^(preAggregations|pre_aggregations)\.[_a-zA-Z][_a-zA-Z0-9]*\.(outputColumnTypes|output_column_types)\.\d+\.member$/,
23-
/^contextMembers$/,
24-
/^includes$/,
25-
/^excludes$/,
26-
/^hierarchies\.[_a-zA-Z][_a-zA-Z0-9]*\.levels$/,
27-
/^cubes\.[0-9]+\.(joinPath|join_path)$/,
28-
/^(accessPolicy|access_policy)\.[0-9]+\.(rowLevel|row_level)\.filters\.[0-9]+.*\.member$/,
29-
/^(accessPolicy|access_policy)\.[0-9]+\.(rowLevel|row_level)\.filters\.[0-9]+.*\.values$/,
30-
/^(accessPolicy|access_policy)\.[0-9]+\.conditions.[0-9]+\.if$/,
31-
];
32-
33-
export const transpiledFields: Set<String> = new Set<String>();
34-
35-
transpiledFieldsPatterns?.forEach((r) => {
36-
const fields = r.toString().replace(/.*?([_a-zA-Z|][_a-zA-Z0-9|]*)([^_a-zA-Z0-9|]*)$/, '$1').split('|');
37-
fields.forEach((f) => transpiledFields.add(f));
38-
});
8+
import { transpiledFieldMatch, transpiledFields } from './patternMatcher';
399

4010
export class CubePropContextTranspiler implements TranspilerInterface {
4111
public constructor(
@@ -94,35 +64,31 @@ export class CubePropContextTranspiler implements TranspilerInterface {
9464
ObjectProperty: (path) => {
9565
if (path.node.key.type === 'Identifier' && transpiledFields.has(path.node.key.name)) {
9666
const fullPath = this.fullPath(path);
97-
// eslint-disable-next-line no-restricted-syntax
98-
for (const p of transpiledFieldsPatterns) {
99-
if (fullPath.match(p)) {
100-
this.transformObjectProperty(path, resolveSymbol);
101-
return;
102-
}
67+
if (transpiledFieldMatch(fullPath)) {
68+
this.transformObjectProperty(path, resolveSymbol);
10369
}
10470
}
10571
}
10672
};
10773
}
10874

109-
protected fullPath(path: NodePath<t.ObjectProperty>): string {
75+
protected fullPath(path: NodePath<t.ObjectProperty>): string[] {
11076
// @ts-ignore
111-
let fp = path?.node?.key?.name || '';
77+
const fp = [path?.node?.key?.name || ''];
11278
let pp: NodePath<t.Node> | null | undefined = path?.parentPath;
11379
while (pp) {
11480
if (pp?.parentPath?.node?.type === 'ArrayExpression') {
115-
fp = `0.${fp}`;
81+
fp.push('0');
11682
pp = pp?.parentPath;
11783
// @ts-ignore
11884
} else if (pp?.parentPath?.node?.key?.type === 'Identifier') {
11985
// @ts-ignore
120-
fp = `${pp?.parentPath?.node?.key?.name || '0'}.${fp}`;
86+
fp.push(`${pp?.parentPath?.node?.key?.name || '0'}`);
12187
pp = pp?.parentPath?.parentPath;
12288
} else break;
12389
}
12490

125-
return fp;
91+
return fp.reverse();
12692
}
12793

12894
protected knownIdentifiersInjectVisitor(field: RegExp | string, resolveSymbol: (name: string) => void): TraverseObject {

packages/cubejs-schema-compiler/src/compiler/transpilers/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ export * from './CubePropContextTranspiler';
33
export * from './CubeCheckDuplicatePropTranspiler';
44
export * from './ValidationTranspiler';
55
export * from './transpiler.interface';
6+
export * from './patternMatcher';
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
import { performance } from 'perf_hooks';
2+
3+
/* this list was generated by getTransformPatterns() with additional variants for snake_case */
4+
export const transpiledFieldsPatterns: string[] = [
5+
'sql',
6+
'(sqlTable|sql_table)',
7+
'measures.__id__.(drillMemberReferences|drillMembers|drill_members)',
8+
// 'measures.__id__.(orderBy|order_by).__idx__.sql', // Matched by simple `sql`
9+
'measures.__id__.(timeShift|time_shift).__idx__.(timeDimension|time_dimension)',
10+
'measures.__id__.(reduceBy|reduce_by|groupBy|group_by|addGroupBy|add_group_by)',
11+
'dimensions.__id__.(reduceBy|reduce_by|groupBy|group_by|addGroupBy|add_group_by)',
12+
'(preAggregations|pre_aggregations).__id__.indexes.__id__.columns',
13+
'(preAggregations|pre_aggregations).__id__.(timeDimensionReference|timeDimension|time_dimension|segments|dimensions|measures|rollups|segmentReferences|dimensionReferences|measureReferences|rollupReferences)',
14+
'(preAggregations|pre_aggregations).__id__.(timeDimensions|time_dimensions).__idx__.dimension',
15+
'(preAggregations|pre_aggregations).__id__.(outputColumnTypes|output_column_types).__idx__.member',
16+
'contextMembers',
17+
'includes',
18+
'excludes',
19+
'hierarchies.__id__.levels',
20+
'cubes.__idx__.(joinPath|join_path)',
21+
'(accessPolicy|access_policy).__idx__.(rowLevel|row_level).filters.__idx__.member',
22+
'(accessPolicy|access_policy).__idx__.(rowLevel|row_level).filters.__idx__.values',
23+
'(accessPolicy|access_policy).__idx__.conditions.__idx__.if',
24+
];
25+
26+
export const transpiledFields: Set<String> = new Set<String>();
27+
28+
transpiledFieldsPatterns.forEach((r) => {
29+
const fields = r.toString().replace(/.*?([_a-zA-Z|][_a-zA-Z0-9|]*)([^_a-zA-Z0-9|]*)$/, '$1').split('|');
30+
fields.forEach((f) => transpiledFields.add(f));
31+
});
32+
33+
interface Trie {
34+
children: { [key: string]: Trie };
35+
isEnd: boolean;
36+
}
37+
38+
function insertIntoTrie(node: Trie, segments: string[]) {
39+
if (segments.length === 0) {
40+
node.isEnd = true;
41+
return;
42+
}
43+
44+
// eslint-disable-next-line prefer-const
45+
let [first, ...rest] = segments;
46+
if (first.startsWith('(')) {
47+
first = first.slice(1);
48+
first = first.slice(0, -1);
49+
}
50+
51+
const internalSegments = first.split('|');
52+
53+
for (const internalSegment of internalSegments) {
54+
if (!node.children[internalSegment]) {
55+
node.children[internalSegment] = { children: {}, isEnd: false };
56+
}
57+
insertIntoTrie(node.children[internalSegment] as Trie, rest);
58+
}
59+
}
60+
61+
function buildTrie(paths: string[]): Trie {
62+
const root: Trie = { children: {}, isEnd: false };
63+
64+
for (const path of paths) {
65+
const segments = path.split('.');
66+
insertIntoTrie(root, segments);
67+
}
68+
69+
return root;
70+
}
71+
72+
const transpiledFieldsPatternsTree: Trie = buildTrie(transpiledFieldsPatterns);
73+
74+
function matchTree(node: Trie, segments: string[]): boolean {
75+
if (segments.length === 0) {
76+
return node.isEnd;
77+
}
78+
79+
const [first, ...rest] = segments;
80+
81+
if (node.children[first]) {
82+
return matchTree(node.children[first], rest);
83+
} else if (node.children.__idx__ && !Number.isNaN(+first)) {
84+
return matchTree(node.children.__idx__, rest);
85+
} else if (node.children.__id__) {
86+
return matchTree(node.children.__id__, rest);
87+
} else {
88+
return false;
89+
}
90+
}
91+
92+
export const transpiledFieldMatch = (fullPath: string[]): boolean => {
93+
const matchEndings = ['sql', 'sqlTable', 'sql_table'];
94+
const last = fullPath[fullPath.length - 1];
95+
96+
if (matchEndings.includes(last)) {
97+
return true;
98+
}
99+
100+
return matchTree(transpiledFieldsPatternsTree, fullPath);
101+
};

0 commit comments

Comments
 (0)