Skip to content

Commit 449503c

Browse files
authored
perf(schema-compiler): YAML transpilation in worker threads (#9929)
* extracted yaml and jinja transpilation in separate functions * implement yaml transpilation in worker threads * fix to allow use threads and native in parallel * just code polish * small fix * remove speed test as it is flacky * introduce compiledYamlCache
1 parent 13eb112 commit 449503c

File tree

6 files changed

+104
-64
lines changed

6 files changed

+104
-64
lines changed

packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import { DynamicReference } from './DynamicReference';
77
import { camelizeCube } from './utils';
88

99
import type { ErrorReporter } from './ErrorReporter';
10+
import { TranspilerSymbolResolver } from './transpilers';
1011

1112
export type ToString = { toString(): string };
1213

@@ -193,7 +194,7 @@ export const CONTEXT_SYMBOLS = {
193194

194195
export const CURRENT_CUBE_CONSTANTS = ['CUBE', 'TABLE'];
195196

196-
export class CubeSymbols {
197+
export class CubeSymbols implements TranspilerSymbolResolver {
197198
public symbols: Record<string | symbol, CubeSymbolsDefinition>;
198199

199200
private builtCubes: Record<string, CubeDefinitionExtended>;

packages/cubejs-schema-compiler/src/compiler/DataSchemaCompiler.ts

Lines changed: 68 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ export type DataSchemaCompilerOptions = {
8888
compileContext?: any;
8989
allowNodeRequire?: boolean;
9090
compiledScriptCache: LRUCache<string, vm.Script>;
91+
compiledYamlCache: LRUCache<string, string>;
9192
};
9293

9394
export type TranspileOptions = {
@@ -163,6 +164,8 @@ export class DataSchemaCompiler {
163164

164165
private readonly compiledScriptCache: LRUCache<string, vm.Script>;
165166

167+
private readonly compiledYamlCache: LRUCache<string, string>;
168+
166169
private compileV8ContextCache: vm.Context | null = null;
167170

168171
// FIXME: Is public only because of tests, should be private
@@ -196,6 +199,7 @@ export class DataSchemaCompiler {
196199
this.workerPool = null;
197200
this.compilerId = options.compilerId || 'default';
198201
this.compiledScriptCache = options.compiledScriptCache;
202+
this.compiledYamlCache = options.compiledYamlCache;
199203
}
200204

201205
public compileObjects(compileServices: CompilerInterface[], objects, errorsReport: ErrorReporter) {
@@ -268,7 +272,7 @@ export class DataSchemaCompiler {
268272
const transpilationNativeThreadsCount = getThreadsCount();
269273
const { compilerId } = this;
270274

271-
if (!transpilationNative && transpilationWorkerThreads) {
275+
if (transpilationWorkerThreads) {
272276
const wc = getEnv('transpilationWorkerThreadsCount');
273277
this.workerPool = workerpool.pool(
274278
path.join(__dirname, 'transpilers/transpiler_worker'),
@@ -288,7 +292,7 @@ export class DataSchemaCompiler {
288292

289293
if (transpilationNative) {
290294
const nonJsFilesTasks = [...jinjaTemplatedFiles, ...yamlFiles]
291-
.map(f => this.transpileFile(f, errorsReport, { transpilerNames, compilerId }));
295+
.map(f => this.transpileFile(f, errorsReport, { cubeNames, cubeSymbols, transpilerNames, compilerId }));
292296

293297
const jsFiles = originalJsFiles;
294298
let jsFilesTasks: Promise<(FileContent | undefined)[]>[] = [];
@@ -575,14 +579,9 @@ export class DataSchemaCompiler {
575579
(file.fileName.endsWith('.yml') || file.fileName.endsWith('.yaml'))
576580
&& file.content.match(JINJA_SYNTAX)
577581
) {
578-
return this.yamlCompiler.compileYamlWithJinjaFile(
579-
file,
580-
errorsReport,
581-
this.standalone ? {} : this.cloneCompileContextWithGetterAlias(this.compileContext),
582-
this.pythonContext!
583-
);
582+
return this.transpileJinjaFile(file, errorsReport, options);
584583
} else if (file.fileName.endsWith('.yml') || file.fileName.endsWith('.yaml')) {
585-
return this.yamlCompiler.transpileYamlFile(file, errorsReport);
584+
return this.transpileYamlFile(file, errorsReport, options);
586585
} else {
587586
return file;
588587
}
@@ -668,7 +667,7 @@ export class DataSchemaCompiler {
668667
cubeSymbols,
669668
};
670669

671-
const res = await this.workerPool!.exec('transpile', [data]);
670+
const res = await this.workerPool!.exec('transpileJs', [data]);
672671
errorsReport.addErrors(res.errors);
673672
errorsReport.addWarnings(res.warnings);
674673

@@ -705,6 +704,65 @@ export class DataSchemaCompiler {
705704
return undefined;
706705
}
707706

707+
private async transpileYamlFile(
708+
file: FileContent,
709+
errorsReport: ErrorReporter,
710+
{ cubeNames, cubeSymbols, contextSymbols, transpilerNames, compilerId, stage }: TranspileOptions
711+
): Promise<(FileContent | undefined)> {
712+
const cacheKey = crypto.createHash('md5').update(JSON.stringify(file.content)).digest('hex');
713+
714+
if (this.compiledYamlCache.has(cacheKey)) {
715+
const content = this.compiledYamlCache.get(cacheKey)!;
716+
717+
return { ...file, content };
718+
}
719+
720+
/* if (getEnv('transpilationNative')) {
721+
722+
} else */ if (getEnv('transpilationWorkerThreads')) {
723+
const data = {
724+
fileName: file.fileName,
725+
content: file.content,
726+
transpilers: [],
727+
cubeNames,
728+
cubeSymbols,
729+
};
730+
731+
const res = await this.workerPool!.exec('transpileYaml', [data]);
732+
errorsReport.addErrors(res.errors);
733+
errorsReport.addWarnings(res.warnings);
734+
735+
this.compiledYamlCache.set(cacheKey, res.content);
736+
737+
return { ...file, content: res.content };
738+
} else {
739+
const transpiledFile = this.yamlCompiler.transpileYamlFile(file, errorsReport);
740+
741+
this.compiledYamlCache.set(cacheKey, transpiledFile?.content || '');
742+
743+
return transpiledFile;
744+
}
745+
}
746+
747+
private async transpileJinjaFile(
748+
file: FileContent,
749+
errorsReport: ErrorReporter,
750+
{ cubeNames, cubeSymbols, contextSymbols, transpilerNames, compilerId, stage }: TranspileOptions
751+
): Promise<(FileContent | undefined)> {
752+
// if (getEnv('transpilationNative')) {
753+
//
754+
// } else if (getEnv('transpilationWorkerThreads')) {
755+
//
756+
// } else {
757+
return this.yamlCompiler.compileYamlWithJinjaFile(
758+
file,
759+
errorsReport,
760+
this.standalone ? {} : this.cloneCompileContextWithGetterAlias(this.compileContext),
761+
this.pythonContext!
762+
);
763+
// }
764+
}
765+
708766
public withQuery(query, fn) {
709767
const oldQuery = this.currentQuery;
710768
this.currentQuery = query;

packages/cubejs-schema-compiler/src/compiler/PrepareCompiler.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ export type PrepareCompilerOptions = {
3737
headCommitId?: string;
3838
adapter?: string;
3939
compiledScriptCache?: LRUCache<string, vm.Script>;
40+
compiledYamlCache?: LRUCache<string, string>;
4041
};
4142

4243
export interface CompilerInterface {
@@ -59,6 +60,7 @@ export const prepareCompiler = (repo: SchemaFileRepository, options: PrepareComp
5960
const yamlCompiler = new YamlCompiler(cubeSymbols, cubeDictionary, nativeInstance, viewCompiler);
6061

6162
const compiledScriptCache = options.compiledScriptCache || new LRUCache<string, vm.Script>({ max: 250 });
63+
const compiledYamlCache = options.compiledYamlCache || new LRUCache<string, string>({ max: 250 });
6264

6365
const transpilers: TranspilerInterface[] = [
6466
new ValidationTranspiler(),
@@ -79,6 +81,7 @@ export const prepareCompiler = (repo: SchemaFileRepository, options: PrepareComp
7981
transpilers,
8082
viewCompilationGate,
8183
compiledScriptCache,
84+
compiledYamlCache,
8285
viewCompilers: [viewCompiler],
8386
cubeCompilers: [cubeEvaluator, joinGraph, metaTransformer],
8487
contextCompilers: [contextEvaluator],

packages/cubejs-schema-compiler/src/compiler/YamlCompiler.ts

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,14 @@ import { JinjaEngine, NativeInstance, PythonCtx } from '@cubejs-backend/native';
88
import type { FileContent } from '@cubejs-backend/shared';
99

1010
import { getEnv } from '@cubejs-backend/shared';
11-
import { CubePropContextTranspiler, transpiledFields, transpiledFieldsPatterns } from './transpilers';
11+
import {
12+
CubePropContextTranspiler,
13+
transpiledFields,
14+
transpiledFieldsPatterns,
15+
TranspilerCubeResolver, TranspilerSymbolResolver
16+
} from './transpilers';
1217
import { PythonParser } from '../parser/PythonParser';
13-
import { CubeSymbols } from './CubeSymbols';
1418
import { nonStringFields } from './CubeValidator';
15-
import { CubeDictionary } from './CubeDictionary';
1619
import { ErrorReporter } from './ErrorReporter';
1720
import { camelizeCube } from './utils';
1821
import { CompileContext } from './DataSchemaCompiler';
@@ -28,10 +31,10 @@ export class YamlCompiler {
2831
protected jinjaEngine: JinjaEngine | null = null;
2932

3033
public constructor(
31-
private readonly cubeSymbols: CubeSymbols,
32-
private readonly cubeDictionary: CubeDictionary,
34+
private readonly cubeSymbols: TranspilerSymbolResolver,
35+
private readonly cubeDictionary: TranspilerCubeResolver,
3336
private readonly nativeInstance: NativeInstance,
34-
private readonly viewCompiler: CubeSymbols,
37+
private readonly viewCompiler: TranspilerSymbolResolver,
3538
) {
3639
}
3740

@@ -125,15 +128,14 @@ export class YamlCompiler {
125128
cubeObj.dimensions = this.yamlArrayToObj(cubeObj.dimensions || [], 'dimension', errorsReport);
126129
cubeObj.segments = this.yamlArrayToObj(cubeObj.segments || [], 'segment', errorsReport);
127130
cubeObj.preAggregations = this.yamlArrayToObj(cubeObj.preAggregations || [], 'preAggregation', errorsReport);
131+
cubeObj.hierarchies = this.yamlArrayToObj(cubeObj.hierarchies || [], 'hierarchies', errorsReport);
128132

129133
cubeObj.joins = cubeObj.joins || []; // For edge cases where joins are not defined/null
130134
if (!Array.isArray(cubeObj.joins)) {
131135
errorsReport.error('joins must be defined as array');
132136
cubeObj.joins = [];
133137
}
134138

135-
cubeObj.hierarchies = this.yamlArrayToObj(cubeObj.hierarchies || [], 'hierarchies', errorsReport);
136-
137139
return this.transpileYaml(cubeObj, [], cubeObj.name, errorsReport);
138140
}
139141

packages/cubejs-schema-compiler/src/compiler/transpilers/transpiler_worker.ts

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { parse } from '@babel/parser';
33
import babelGenerator from '@babel/generator';
44
import babelTraverse from '@babel/traverse';
55

6+
import { NativeInstance } from '@cubejs-backend/native';
67
import { ValidationTranspiler } from './ValidationTranspiler';
78
import { ImportExportTranspiler } from './ImportExportTranspiler';
89
import { CubeCheckDuplicatePropTranspiler } from './CubeCheckDuplicatePropTranspiler';
@@ -11,6 +12,7 @@ import { ErrorReporter } from '../ErrorReporter';
1112
import { LightweightSymbolResolver } from './LightweightSymbolResolver';
1213
import { LightweightNodeCubeDictionary } from './LightweightNodeCubeDictionary';
1314
import { IIFETranspiler } from './IIFETranspiler';
15+
import { YamlCompiler } from '../YamlCompiler';
1416

1517
type TransferContent = {
1618
fileName: string;
@@ -23,6 +25,7 @@ type TransferContent = {
2325
const cubeDictionary = new LightweightNodeCubeDictionary();
2426
const cubeSymbols = new LightweightSymbolResolver();
2527
const errorsReport = new ErrorReporter(null, []);
28+
const yamlCompiler = new YamlCompiler(cubeSymbols, cubeDictionary, new NativeInstance(), cubeSymbols);
2629

2730
const transpilers = {
2831
ValidationTranspiler: new ValidationTranspiler(),
@@ -32,7 +35,7 @@ const transpilers = {
3235
IIFETranspiler: new IIFETranspiler(),
3336
};
3437

35-
const transpile = (data: TransferContent) => {
38+
const transpileJs = (data: TransferContent) => {
3639
cubeDictionary.setCubeNames(data.cubeNames);
3740
cubeSymbols.setSymbols(data.cubeSymbols);
3841

@@ -64,6 +67,22 @@ const transpile = (data: TransferContent) => {
6467
};
6568
};
6669

70+
const transpileYaml = (data: TransferContent) => {
71+
cubeDictionary.setCubeNames(data.cubeNames);
72+
cubeSymbols.setSymbols(data.cubeSymbols);
73+
74+
errorsReport.inFile(data);
75+
const transpiledFile = yamlCompiler.transpileYamlFile(data, errorsReport);
76+
errorsReport.exitFile();
77+
78+
return {
79+
content: transpiledFile?.content || '',
80+
errors: errorsReport.getErrors(),
81+
warnings: errorsReport.getWarnings()
82+
};
83+
};
84+
6785
workerpool.worker({
68-
transpile,
86+
transpileJs,
87+
transpileYaml,
6988
});

packages/cubejs-schema-compiler/test/integration/postgres/dataschema-compiler.test.ts

Lines changed: 0 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -158,49 +158,6 @@ describe('DataSchemaCompiler', () => {
158158
compiler.throwIfAnyErrors();
159159
});
160160
});
161-
162-
describe('Test perfomance', () => {
163-
const schema = `
164-
cube('visitors', {
165-
sql: 'select * from visitors',
166-
measures: {
167-
count: {
168-
type: 'count',
169-
sql: 'id'
170-
},
171-
duration: {
172-
type: 'avg',
173-
sql: 'duration'
174-
},
175-
},
176-
dimensions: {
177-
date: {
178-
type: 'string',
179-
sql: 'date'
180-
},
181-
browser: {
182-
type: 'string',
183-
sql: 'browser'
184-
}
185-
}
186-
})
187-
`;
188-
189-
it('Should compile 200 schemas in less than 2500ms * 10', async () => {
190-
const repeats = 200;
191-
192-
const compilerWith = prepareJsCompiler(schema, { allowJsDuplicatePropsInSchema: false });
193-
const start = new Date().getTime();
194-
for (let i = 0; i < repeats; i++) {
195-
delete compilerWith.compiler.compilePromise; // Reset compile result
196-
await compilerWith.compiler.compile();
197-
}
198-
const end = new Date().getTime();
199-
const time = end - start;
200-
201-
expect(time).toBeLessThan(2500 * 10);
202-
});
203-
});
204161
});
205162

206163
it('calculated metrics', async () => {

0 commit comments

Comments
 (0)