Skip to content

Commit 3f1ff57

Browse files
authored
perf(schema-compiler): Reduce JS compilation memory usage 3x-5x times. (#9897)
* perf(schema-compiler): Reduce JS compilation memory usage 3x-5x times. * more types * move context to compile() and remove unneded params floating around * a bit simplify wrappedCode * remove unused import * introduce AsyncLocalStorage for context store * fix link * correct resource freeing between stages * fix for async modules * add IIFETranspiler * more types * fix * fix ImportExportTranspiler to generate expression statements instead of raw calls * fix transpilers order * fix tests
1 parent 346d300 commit 3f1ff57

File tree

8 files changed

+221
-113
lines changed

8 files changed

+221
-113
lines changed

packages/cubejs-schema-compiler/src/compiler/DataSchemaCompiler.ts

Lines changed: 164 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import { AsyncLocalStorage } from 'async_hooks';
12
import crypto from 'crypto';
23
import vm from 'vm';
34
import fs from 'fs';
@@ -15,14 +16,16 @@ import { FileContent, getEnv, isNativeSupported, SchemaFileRepository } from '@c
1516
import { NativeInstance, PythonCtx, transpileJs } from '@cubejs-backend/native';
1617
import { UserError } from './UserError';
1718
import { ErrorReporter, ErrorReporterOptions, SyntaxErrorInterface } from './ErrorReporter';
18-
import { CONTEXT_SYMBOLS, CubeSymbols } from './CubeSymbols';
19+
import { CONTEXT_SYMBOLS, CubeDefinition, CubeSymbols } from './CubeSymbols';
1920
import { ViewCompilationGate } from './ViewCompilationGate';
2021
import { TranspilerInterface } from './transpilers';
2122
import { CompilerInterface } from './PrepareCompiler';
2223
import { YamlCompiler } from './YamlCompiler';
2324
import { CubeDictionary } from './CubeDictionary';
2425
import { CompilerCache } from './CompilerCache';
2526

27+
const ctxFileStorage = new AsyncLocalStorage<FileContent>();
28+
2629
const NATIVE_IS_SUPPORTED = isNativeSupported();
2730

2831
const moduleFileCache = {};
@@ -140,6 +143,8 @@ export class DataSchemaCompiler {
140143

141144
private readonly compiledScriptCache: LRUCache<string, vm.Script>;
142145

146+
private compileV8ContextCache: vm.Context | null = null;
147+
143148
// FIXME: Is public only because of tests, should be private
144149
public compilePromise: any;
145150

@@ -229,11 +234,11 @@ export class DataSchemaCompiler {
229234
);
230235
}
231236

232-
const transpile = async (stage: CompileStage) => {
237+
const transpile = async (stage: CompileStage): Promise<FileContent[]> => {
233238
let cubeNames: string[] = [];
234239
let cubeSymbols: Record<string, Record<string, boolean>> = {};
235240
let transpilerNames: string[] = [];
236-
let results;
241+
let results: (FileContent | undefined)[];
237242

238243
if (transpilationNative || transpilationWorkerThreads) {
239244
cubeNames = Object.keys(this.cubeDictionary.byId);
@@ -296,10 +301,113 @@ export class DataSchemaCompiler {
296301
results = await Promise.all(toCompile.map(f => this.transpileFile(f, errorsReport, {})));
297302
}
298303

299-
return results.filter(f => !!f);
304+
return results.filter(f => !!f) as FileContent[];
300305
};
301306

302-
const compilePhase = async (compilers: CompileCubeFilesCompilers, stage: 0 | 1 | 2 | 3) => this.compileCubeFiles(compilers, await transpile(stage), errorsReport);
307+
let cubes: CubeDefinition[] = [];
308+
let exports: Record<string, Record<string, any>> = {};
309+
let contexts: Record<string, any>[] = [];
310+
let compiledFiles: Record<string, boolean> = {};
311+
let asyncModules: CallableFunction[] = [];
312+
let transpiledFiles: FileContent[] = [];
313+
314+
this.compileV8ContextCache = vm.createContext({
315+
view: (name, cube) => {
316+
const file = ctxFileStorage.getStore();
317+
if (!file) {
318+
throw new Error('No file stored in context');
319+
}
320+
return !cube ?
321+
this.cubeFactory({ ...name, fileName: file.fileName, isView: true }) :
322+
cubes.push({ ...cube, name, fileName: file.fileName, isView: true });
323+
},
324+
cube: (name, cube) => {
325+
const file = ctxFileStorage.getStore();
326+
if (!file) {
327+
throw new Error('No file stored in context');
328+
}
329+
return !cube ?
330+
this.cubeFactory({ ...name, fileName: file.fileName }) :
331+
cubes.push({ ...cube, name, fileName: file.fileName });
332+
},
333+
context: (name: string, context) => {
334+
const file = ctxFileStorage.getStore();
335+
if (!file) {
336+
throw new Error('No file stored in context');
337+
}
338+
return contexts.push({ ...context, name, fileName: file.fileName });
339+
},
340+
addExport: (obj) => {
341+
const file = ctxFileStorage.getStore();
342+
if (!file) {
343+
throw new Error('No file stored in context');
344+
}
345+
exports[file.fileName] = exports[file.fileName] || {};
346+
exports[file.fileName] = Object.assign(exports[file.fileName], obj);
347+
},
348+
setExport: (obj) => {
349+
const file = ctxFileStorage.getStore();
350+
if (!file) {
351+
throw new Error('No file stored in context');
352+
}
353+
exports[file.fileName] = obj;
354+
},
355+
asyncModule: (fn) => {
356+
const file = ctxFileStorage.getStore();
357+
if (!file) {
358+
throw new Error('No file stored in context');
359+
}
360+
// We need to run async module code in the context of the original data model file
361+
// where it was defined. So we pass the same file to the async context.
362+
// @see https://nodejs.org/api/async_context.html#class-asynclocalstorage
363+
asyncModules.push(async () => ctxFileStorage.run(file, () => fn()));
364+
},
365+
require: (extensionName: string) => {
366+
const file = ctxFileStorage.getStore();
367+
if (!file) {
368+
throw new Error('No file stored in context');
369+
}
370+
371+
if (this.extensions[extensionName]) {
372+
return new (this.extensions[extensionName])(this.cubeFactory, this, cubes);
373+
} else {
374+
const foundFile = this.resolveModuleFile(file, extensionName, transpiledFiles, errorsReport);
375+
if (!foundFile && this.allowNodeRequire) {
376+
if (extensionName.indexOf('.') === 0) {
377+
extensionName = path.resolve(this.repository.localPath(), extensionName);
378+
}
379+
// eslint-disable-next-line global-require,import/no-dynamic-require
380+
const Extension = require(extensionName);
381+
if (Object.getPrototypeOf(Extension).name === 'AbstractExtension') {
382+
return new Extension(this.cubeFactory, this, cubes);
383+
}
384+
return Extension;
385+
}
386+
this.compileFile(
387+
foundFile,
388+
errorsReport,
389+
compiledFiles,
390+
[],
391+
{ doSyntaxCheck: true }
392+
);
393+
exports[foundFile.fileName] = exports[foundFile.fileName] || {};
394+
return exports[foundFile.fileName];
395+
}
396+
},
397+
COMPILE_CONTEXT: this.standalone ? this.standaloneCompileContextProxy() : this.cloneCompileContextWithGetterAlias(this.compileContext || {}),
398+
});
399+
400+
const compilePhase = async (compilers: CompileCubeFilesCompilers, stage: 0 | 1 | 2 | 3) => {
401+
// clear the objects for the next phase
402+
cubes = [];
403+
exports = {};
404+
contexts = [];
405+
compiledFiles = {};
406+
asyncModules = [];
407+
transpiledFiles = await transpile(stage);
408+
409+
return this.compileCubeFiles(cubes, contexts, compiledFiles, asyncModules, compilers, transpiledFiles, errorsReport);
410+
};
303411

304412
return compilePhase({ cubeCompilers: this.cubeNameCompilers }, 0)
305413
.then(() => compilePhase({ cubeCompilers: this.preTranspileCubeCompilers.concat([this.viewCompilationGate]) }, 1))
@@ -311,6 +419,14 @@ export class DataSchemaCompiler {
311419
contextCompilers: this.contextCompilers,
312420
}, 3))
313421
.then(() => {
422+
// Free unneeded resources
423+
cubes = [];
424+
exports = {};
425+
contexts = [];
426+
compiledFiles = {};
427+
asyncModules = [];
428+
transpiledFiles = [];
429+
314430
if (transpilationNative) {
315431
// Clean up cache
316432
const dummyFile = {
@@ -336,6 +452,7 @@ export class DataSchemaCompiler {
336452
this.throwIfAnyErrors();
337453
}
338454
// Free unneeded resources
455+
this.compileV8ContextCache = null;
339456
this.cubeDictionary.free();
340457
this.cubeSymbols.free();
341458
return res;
@@ -345,7 +462,11 @@ export class DataSchemaCompiler {
345462
return this.compilePromise;
346463
}
347464

348-
private async transpileFile(file: FileContent, errorsReport: ErrorReporter, options: TranspileOptions = {}) {
465+
private async transpileFile(
466+
file: FileContent,
467+
errorsReport: ErrorReporter,
468+
options: TranspileOptions = {}
469+
): Promise<(FileContent | undefined)> {
349470
if (file.fileName.endsWith('.jinja') ||
350471
(file.fileName.endsWith('.yml') || file.fileName.endsWith('.yaml'))
351472
// TODO do Jinja syntax check with jinja compiler
@@ -374,7 +495,11 @@ export class DataSchemaCompiler {
374495
* Right now it is used only for transpilation in native,
375496
* so no checks for transpilation type inside this method
376497
*/
377-
private async transpileJsFilesBulk(files: FileContent[], errorsReport: ErrorReporter, { cubeNames, cubeSymbols, contextSymbols, transpilerNames, compilerId, stage }: TranspileOptions) {
498+
private async transpileJsFilesBulk(
499+
files: FileContent[],
500+
errorsReport: ErrorReporter,
501+
{ cubeNames, cubeSymbols, contextSymbols, transpilerNames, compilerId, stage }: TranspileOptions
502+
): Promise<(FileContent | undefined)[]> {
378503
// for bulk processing this data may be optimized even more by passing transpilerNames, compilerId only once for a bulk
379504
// but this requires more complex logic to be implemented in the native side.
380505
// And comparing to the file content sizes, a few bytes of JSON data is not a big deal here
@@ -408,7 +533,11 @@ export class DataSchemaCompiler {
408533
});
409534
}
410535

411-
private async transpileJsFile(file: FileContent, errorsReport: ErrorReporter, { cubeNames, cubeSymbols, contextSymbols, transpilerNames, compilerId, stage }: TranspileOptions) {
536+
private async transpileJsFile(
537+
file: FileContent,
538+
errorsReport: ErrorReporter,
539+
{ cubeNames, cubeSymbols, contextSymbols, transpilerNames, compilerId, stage }: TranspileOptions
540+
): Promise<(FileContent | undefined)> {
412541
try {
413542
if (getEnv('transpilationNative')) {
414543
const reqData = {
@@ -493,22 +622,20 @@ export class DataSchemaCompiler {
493622
return this.currentQuery;
494623
}
495624

496-
private async compileCubeFiles(compilers: CompileCubeFilesCompilers, toCompile: FileContent[], errorsReport: ErrorReporter) {
497-
const cubes = [];
498-
const exports = {};
499-
const contexts = [];
500-
const compiledFiles = {};
501-
const asyncModules = [];
502-
625+
private async compileCubeFiles(
626+
cubes: CubeDefinition[],
627+
contexts: Record<string, any>[],
628+
compiledFiles: Record<string, boolean>,
629+
asyncModules: CallableFunction[],
630+
compilers: CompileCubeFilesCompilers,
631+
toCompile: FileContent[],
632+
errorsReport: ErrorReporter
633+
) {
503634
toCompile
504635
.forEach((file) => {
505636
this.compileFile(
506637
file,
507638
errorsReport,
508-
cubes,
509-
exports,
510-
contexts,
511-
toCompile,
512639
compiledFiles,
513640
asyncModules
514641
);
@@ -523,7 +650,11 @@ export class DataSchemaCompiler {
523650
}
524651

525652
private compileFile(
526-
file: FileContent, errorsReport: ErrorReporter, cubes, exports, contexts, toCompile, compiledFiles, asyncModules, { doSyntaxCheck } = { doSyntaxCheck: false }
653+
file: FileContent,
654+
errorsReport: ErrorReporter,
655+
compiledFiles: Record<string, boolean>,
656+
asyncModules: CallableFunction[],
657+
{ doSyntaxCheck } = { doSyntaxCheck: false }
527658
) {
528659
if (compiledFiles[file.fileName]) {
529660
return;
@@ -532,7 +663,7 @@ export class DataSchemaCompiler {
532663
compiledFiles[file.fileName] = true;
533664

534665
if (file.fileName.endsWith('.js')) {
535-
this.compileJsFile(file, errorsReport, cubes, contexts, exports, asyncModules, toCompile, compiledFiles, { doSyntaxCheck });
666+
this.compileJsFile(file, errorsReport, { doSyntaxCheck });
536667
} else if (file.fileName.endsWith('.yml.jinja') || file.fileName.endsWith('.yaml.jinja') ||
537668
(
538669
file.fileName.endsWith('.yml') || file.fileName.endsWith('.yaml')
@@ -542,17 +673,11 @@ export class DataSchemaCompiler {
542673
asyncModules.push(() => this.yamlCompiler.compileYamlWithJinjaFile(
543674
file,
544675
errorsReport,
545-
cubes,
546-
contexts,
547-
exports,
548-
asyncModules,
549-
toCompile,
550-
compiledFiles,
551676
this.standalone ? {} : this.cloneCompileContextWithGetterAlias(this.compileContext),
552677
this.pythonContext!
553678
));
554679
} else if (file.fileName.endsWith('.yml') || file.fileName.endsWith('.yaml')) {
555-
this.yamlCompiler.compileYamlFile(file, errorsReport, cubes, contexts, exports, asyncModules, toCompile, compiledFiles);
680+
this.yamlCompiler.compileYamlFile(file, errorsReport);
556681
}
557682
}
558683

@@ -568,7 +693,11 @@ export class DataSchemaCompiler {
568693
return script;
569694
}
570695

571-
public compileJsFile(file: FileContent, errorsReport: ErrorReporter, cubes, contexts, exports, asyncModules, toCompile, compiledFiles, { doSyntaxCheck } = { doSyntaxCheck: false }) {
696+
public compileJsFile(
697+
file: FileContent,
698+
errorsReport: ErrorReporter,
699+
{ doSyntaxCheck } = { doSyntaxCheck: false }
700+
) {
572701
if (doSyntaxCheck) {
573702
// There is no need to run syntax check for data model files
574703
// because they were checked during transpilation/transformation phase
@@ -582,62 +711,12 @@ export class DataSchemaCompiler {
582711
try {
583712
const script = this.getJsScript(file);
584713

585-
script.runInNewContext({
586-
view: (name, cube) => (
587-
!cube ?
588-
this.cubeFactory({ ...name, fileName: file.fileName, isView: true }) :
589-
cubes.push({ ...cube, name, fileName: file.fileName, isView: true })
590-
),
591-
cube:
592-
(name, cube) => (
593-
!cube ?
594-
this.cubeFactory({ ...name, fileName: file.fileName }) :
595-
cubes.push({ ...cube, name, fileName: file.fileName })
596-
),
597-
context: (name, context) => contexts.push({ ...context, name, fileName: file.fileName }),
598-
addExport: (obj) => {
599-
exports[file.fileName] = exports[file.fileName] || {};
600-
exports[file.fileName] = Object.assign(exports[file.fileName], obj);
601-
},
602-
setExport: (obj) => {
603-
exports[file.fileName] = obj;
604-
},
605-
asyncModule: (fn) => {
606-
asyncModules.push(fn);
607-
},
608-
require: (extensionName) => {
609-
if (this.extensions[extensionName]) {
610-
return new (this.extensions[extensionName])(this.cubeFactory, this, cubes);
611-
} else {
612-
const foundFile = this.resolveModuleFile(file, extensionName, toCompile, errorsReport);
613-
if (!foundFile && this.allowNodeRequire) {
614-
if (extensionName.indexOf('.') === 0) {
615-
extensionName = path.resolve(this.repository.localPath(), extensionName);
616-
}
617-
// eslint-disable-next-line global-require,import/no-dynamic-require
618-
const Extension = require(extensionName);
619-
if (Object.getPrototypeOf(Extension).name === 'AbstractExtension') {
620-
return new Extension(this.cubeFactory, this, cubes);
621-
}
622-
return Extension;
623-
}
624-
this.compileFile(
625-
foundFile,
626-
errorsReport,
627-
cubes,
628-
exports,
629-
contexts,
630-
toCompile,
631-
compiledFiles,
632-
[],
633-
{ doSyntaxCheck: true }
634-
);
635-
exports[foundFile.fileName] = exports[foundFile.fileName] || {};
636-
return exports[foundFile.fileName];
637-
}
638-
},
639-
COMPILE_CONTEXT: this.standalone ? this.standaloneCompileContextProxy() : this.cloneCompileContextWithGetterAlias(this.compileContext || {}),
640-
}, { filename: file.fileName, timeout: 15000 });
714+
// We use AsyncLocalStorage to store the current file context
715+
// so that it can be accessed in the script execution context even within async functions.
716+
// @see https://nodejs.org/api/async_context.html#class-asynclocalstorage
717+
ctxFileStorage.run(file, () => {
718+
script.runInContext(this.compileV8ContextCache!, { timeout: 15000 });
719+
});
641720
} catch (e) {
642721
errorsReport.error(e);
643722
}

packages/cubejs-schema-compiler/src/compiler/PrepareCompiler.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import { DataSchemaCompiler } from './DataSchemaCompiler';
99
import {
1010
CubeCheckDuplicatePropTranspiler,
1111
CubePropContextTranspiler,
12+
IIFETranspiler,
1213
ImportExportTranspiler,
1314
TranspilerInterface,
1415
ValidationTranspiler,
@@ -63,6 +64,7 @@ export const prepareCompiler = (repo: SchemaFileRepository, options: PrepareComp
6364
new ValidationTranspiler(),
6465
new ImportExportTranspiler(),
6566
new CubePropContextTranspiler(cubeSymbols, cubeDictionary, viewCompiler),
67+
new IIFETranspiler(),
6668
];
6769

6870
if (!options.allowJsDuplicatePropsInSchema) {

0 commit comments

Comments
 (0)