Skip to content

Commit 36eb395

Browse files
committed
refactor: split first and later compile/transpile stages
1 parent 62167d6 commit 36eb395

File tree

2 files changed

+122
-78
lines changed

2 files changed

+122
-78
lines changed

packages/cubejs-backend-shared/src/FileRepository.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ export interface FileContent {
66
content: string;
77
readOnly?: boolean;
88
isModule?: boolean;
9-
convertedToJs?: boolean;
109
}
1110

1211
export interface SchemaFileRepository {

packages/cubejs-schema-compiler/src/compiler/DataSchemaCompiler.ts

Lines changed: 122 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,24 @@ const getThreadsCount = () => {
4747
return 3; // Default (like the workerpool do)
4848
};
4949

50+
const splitFilesToChunks = (files: FileContent[], chunksCount: number): FileContent[][] => {
51+
let chunks: FileContent[][];
52+
if (files.length < chunksCount * chunksCount) {
53+
chunks = [files];
54+
} else {
55+
const baseSize = Math.floor(files.length / chunksCount);
56+
chunks = [];
57+
for (let i = 0; i < chunksCount; i++) {
58+
// For the last part, we take the remaining files so we don't lose the extra ones.
59+
const start = i * baseSize;
60+
const end = (i === chunksCount - 1) ? files.length : start + baseSize;
61+
chunks.push(files.slice(start, end));
62+
}
63+
}
64+
65+
return chunks;
66+
};
67+
5068
export type DataSchemaCompilerOptions = {
5169
compilerCache: CompilerCache;
5270
omitErrors?: boolean;
@@ -258,34 +276,62 @@ export class DataSchemaCompiler {
258276
);
259277
}
260278

261-
const transpile = async (stage: CompileStage): Promise<FileContent[]> => {
279+
const transpilePhaseFirst = async (stage: CompileStage): Promise<FileContent[]> => {
262280
let cubeNames: string[] = [];
263281
let cubeSymbols: Record<string, Record<string, boolean>> = {};
264282
let transpilerNames: string[] = [];
265283
let results: (FileContent | undefined)[];
266284

267285
if (transpilationNative || transpilationWorkerThreads) {
268-
cubeNames = Object.keys(this.cubeDictionary.byId);
269-
// We need only cubes and all its member names for transpiling.
270-
// Cubes doesn't change during transpiling, but are changed during compilation phase,
271-
// so we can prepare them once for every phase.
272-
// Communication between main and worker threads uses
273-
// The structured clone algorithm (@see https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Structured_clone_algorithm)
274-
// which doesn't allow passing any function objects, so we need to sanitize the symbols.
275-
// Communication with native backend also involves deserialization.
276-
cubeSymbols = Object.fromEntries(
277-
Object.entries(this.cubeSymbols.symbols as Record<string, Record<string, any>>)
278-
.map(
279-
([key, value]: [string, Record<string, any>]) => [key, Object.fromEntries(
280-
Object.keys(value).map((k) => [k, true]),
281-
)],
282-
),
283-
);
286+
({ cubeNames, cubeSymbols, transpilerNames } = this.prepareTranspileSymbols());
287+
}
288+
289+
if (transpilationNative) {
290+
const nonJsFilesTasks = toCompile.filter(file => !file.fileName.endsWith('.js'))
291+
.map(f => this.transpileFile(f, errorsReport, { transpilerNames, compilerId }));
292+
293+
const jsFiles = toCompile.filter(file => file.fileName.endsWith('.js'));
294+
let jsFilesTasks: Promise<(FileContent | undefined)[]>[] = [];
295+
296+
if (jsFiles.length > 0) {
297+
// Warming up swc compiler cache
298+
const dummyFile = {
299+
fileName: 'dummy.js',
300+
content: ';',
301+
};
302+
303+
await this.transpileJsFile(dummyFile, errorsReport, { cubeNames, cubeSymbols, transpilerNames, contextSymbols: CONTEXT_SYMBOLS, compilerId, stage });
304+
305+
const jsChunks = splitFilesToChunks(jsFiles, transpilationNativeThreadsCount);
306+
jsFilesTasks = jsChunks.map(chunk => this.transpileJsFilesBulk(chunk, errorsReport, { transpilerNames, compilerId }));
307+
}
284308

285-
// Transpilers are the same for all files within phase.
286-
transpilerNames = this.transpilers.map(t => t.constructor.name);
309+
results = (await Promise.all([...nonJsFilesTasks, ...jsFilesTasks])).flat();
310+
} else if (transpilationWorkerThreads) {
311+
results = await Promise.all(toCompile.map(f => this.transpileFile(f, errorsReport, { cubeNames, cubeSymbols, transpilerNames })));
312+
} else {
313+
results = await Promise.all(toCompile.map(f => this.transpileFile(f, errorsReport, {})));
314+
}
315+
316+
return results.filter(f => !!f) as FileContent[];
317+
};
318+
319+
const transpilePhase = async (stage: CompileStage): Promise<FileContent[]> => {
320+
let cubeNames: string[] = [];
321+
let cubeSymbols: Record<string, Record<string, boolean>> = {};
322+
let transpilerNames: string[] = [];
323+
let results: (FileContent | undefined)[];
324+
325+
if (toCompile.length === 0) {
326+
return [];
287327
}
288328

329+
if (transpilationNative || transpilationWorkerThreads) {
330+
({ cubeNames, cubeSymbols, transpilerNames } = this.prepareTranspileSymbols());
331+
}
332+
333+
// After the first phase all files are with JS source code: original or transpiled
334+
289335
if (transpilationNative) {
290336
// Warming up swc compiler cache
291337
const dummyFile = {
@@ -295,34 +341,14 @@ export class DataSchemaCompiler {
295341

296342
await this.transpileJsFile(dummyFile, errorsReport, { cubeNames, cubeSymbols, transpilerNames, contextSymbols: CONTEXT_SYMBOLS, compilerId, stage });
297343

298-
const nonJsFilesTasks = toCompile.filter(file => !file.fileName.endsWith('.js') && !file.convertedToJs)
299-
.map(f => this.transpileFile(f, errorsReport, { transpilerNames, compilerId }));
300-
301-
const jsFiles = toCompile.filter(file => file.fileName.endsWith('.js') || file.convertedToJs);
302-
let JsFilesTasks = [];
344+
const jsChunks = splitFilesToChunks(toCompile, transpilationNativeThreadsCount);
345+
const jsFilesTasks = jsChunks.map(chunk => this.transpileJsFilesBulk(chunk, errorsReport, { transpilerNames, compilerId }));
303346

304-
if (jsFiles.length > 0) {
305-
let jsChunks;
306-
if (jsFiles.length < transpilationNativeThreadsCount * transpilationNativeThreadsCount) {
307-
jsChunks = [jsFiles];
308-
} else {
309-
const baseSize = Math.floor(jsFiles.length / transpilationNativeThreadsCount);
310-
jsChunks = [];
311-
for (let i = 0; i < transpilationNativeThreadsCount; i++) {
312-
// For the last part, we take the remaining files so we don't lose the extra ones.
313-
const start = i * baseSize;
314-
const end = (i === transpilationNativeThreadsCount - 1) ? jsFiles.length : start + baseSize;
315-
jsChunks.push(jsFiles.slice(start, end));
316-
}
317-
}
318-
JsFilesTasks = jsChunks.map(chunk => this.transpileJsFilesBulk(chunk, errorsReport, { transpilerNames, compilerId }));
319-
}
320-
321-
results = (await Promise.all([...nonJsFilesTasks, ...JsFilesTasks])).flat();
347+
results = (await Promise.all(jsFilesTasks)).flat();
322348
} else if (transpilationWorkerThreads) {
323-
results = await Promise.all(toCompile.map(f => this.transpileFile(f, errorsReport, { cubeNames, cubeSymbols, transpilerNames })));
349+
results = await Promise.all(toCompile.map(f => this.transpileJsFile(f, errorsReport, { cubeNames, cubeSymbols, transpilerNames })));
324350
} else {
325-
results = await Promise.all(toCompile.map(f => this.transpileFile(f, errorsReport, {})));
351+
results = await Promise.all(toCompile.map(f => this.transpileJsFile(f, errorsReport, {})));
326352
}
327353

328354
return results.filter(f => !!f) as FileContent[];
@@ -335,6 +361,14 @@ export class DataSchemaCompiler {
335361
let asyncModules: CallableFunction[] = [];
336362
let transpiledFiles: FileContent[] = [];
337363

364+
const cleanup = () => {
365+
cubes = [];
366+
exports = {};
367+
contexts = [];
368+
compiledFiles = {};
369+
asyncModules = [];
370+
};
371+
338372
this.compileV8ContextCache = vm.createContext({
339373
view: (name, cube) => {
340374
const file = ctxFileStorage.getStore();
@@ -420,26 +454,28 @@ export class DataSchemaCompiler {
420454
COMPILE_CONTEXT: this.standalone ? this.standaloneCompileContextProxy() : this.cloneCompileContextWithGetterAlias(this.compileContext || {}),
421455
});
422456

423-
const compilePhase = async (compilers: CompileCubeFilesCompilers, stage: 0 | 1 | 2 | 3) => {
457+
const compilePhaseFirst = async (compilers: CompileCubeFilesCompilers, stage: 0 | 1 | 2 | 3) => {
424458
// clear the objects for the next phase
425-
cubes = [];
426-
exports = {};
427-
contexts = [];
428-
compiledFiles = {};
429-
asyncModules = [];
430-
transpiledFiles = await transpile(stage);
459+
cleanup();
460+
transpiledFiles = await transpilePhaseFirst(stage);
431461

432-
if (stage === 0) {
433-
// We render jinja and transpile yaml only once on first phase and then use resulting JS for these files
434-
// afterward avoiding costly YAML/Python parsing again. Original JS files are preserved as is for cache hits.
435-
const convertedToJsFiles = transpiledFiles.filter(f => f.convertedToJs);
436-
toCompile = [...originalJsFiles, ...convertedToJsFiles];
437-
}
462+
// We render jinja and transpile yaml only once on first phase and then use resulting JS for these files
463+
// afterward avoiding costly YAML/Python parsing again. Original JS files are preserved as is for cache hits.
464+
const convertedToJsFiles = transpiledFiles.filter(f => !f.fileName.endsWith('.js'));
465+
toCompile = [...originalJsFiles, ...convertedToJsFiles];
466+
467+
return this.compileCubeFiles(cubes, contexts, compiledFiles, asyncModules, compilers, transpiledFiles, errorsReport);
468+
};
469+
470+
const compilePhase = async (compilers: CompileCubeFilesCompilers, stage: 0 | 1 | 2 | 3) => {
471+
// clear the objects for the next phase
472+
cleanup();
473+
transpiledFiles = await transpilePhase(stage);
438474

439475
return this.compileCubeFiles(cubes, contexts, compiledFiles, asyncModules, compilers, transpiledFiles, errorsReport);
440476
};
441477

442-
return compilePhase({ cubeCompilers: this.cubeNameCompilers }, 0)
478+
return compilePhaseFirst({ cubeCompilers: this.cubeNameCompilers }, 0)
443479
.then(() => compilePhase({ cubeCompilers: this.preTranspileCubeCompilers.concat([this.viewCompilationGate]) }, 1))
444480
.then(() => (this.viewCompilationGate.shouldCompileViews() ?
445481
compilePhase({ cubeCompilers: this.viewCompilers }, 2)
@@ -450,11 +486,7 @@ export class DataSchemaCompiler {
450486
}, 3))
451487
.then(() => {
452488
// Free unneeded resources
453-
cubes = [];
454-
exports = {};
455-
contexts = [];
456-
compiledFiles = {};
457-
asyncModules = [];
489+
cleanup();
458490
transpiledFiles = [];
459491
toCompile = [];
460492

@@ -508,36 +540,49 @@ export class DataSchemaCompiler {
508540
});
509541
}
510542

543+
private prepareTranspileSymbols() {
544+
const cubeNames: string[] = Object.keys(this.cubeDictionary.byId);
545+
// We need only cubes and all its member names for transpiling.
546+
// Cubes doesn't change during transpiling, but are changed during compilation phase,
547+
// so we can prepare them once for every phase.
548+
// Communication between main and worker threads uses
549+
// The structured clone algorithm (@see https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Structured_clone_algorithm)
550+
// which doesn't allow passing any function objects, so we need to sanitize the symbols.
551+
// Communication with native backend also involves deserialization.
552+
const cubeSymbols: Record<string, Record<string, boolean>> = Object.fromEntries(
553+
Object.entries(this.cubeSymbols.symbols as Record<string, Record<string, any>>)
554+
.map(
555+
([key, value]: [string, Record<string, any>]) => [key, Object.fromEntries(
556+
Object.keys(value).map((k) => [k, true]),
557+
)],
558+
),
559+
);
560+
561+
// Transpilers are the same for all files within phase.
562+
const transpilerNames: string[] = this.transpilers.map(t => t.constructor.name);
563+
564+
return { cubeNames, cubeSymbols, transpilerNames };
565+
}
566+
511567
private async transpileFile(
512568
file: FileContent,
513569
errorsReport: ErrorReporter,
514570
options: TranspileOptions = {}
515571
): Promise<(FileContent | undefined)> {
516-
if (file.fileName.endsWith('.js') || file.convertedToJs) {
572+
if (file.fileName.endsWith('.js')) {
517573
return this.transpileJsFile(file, errorsReport, options);
518574
} else if (file.fileName.endsWith('.jinja') ||
519575
(file.fileName.endsWith('.yml') || file.fileName.endsWith('.yaml'))
520576
&& file.content.match(JINJA_SYNTAX)
521577
) {
522-
const transpiledFile = await this.yamlCompiler.compileYamlWithJinjaFile(
578+
return this.yamlCompiler.compileYamlWithJinjaFile(
523579
file,
524580
errorsReport,
525581
this.standalone ? {} : this.cloneCompileContextWithGetterAlias(this.compileContext),
526582
this.pythonContext!
527583
);
528-
if (transpiledFile) {
529-
transpiledFile.convertedToJs = true;
530-
}
531-
532-
return transpiledFile;
533584
} else if (file.fileName.endsWith('.yml') || file.fileName.endsWith('.yaml')) {
534-
const transpiledFile = this.yamlCompiler.transpileYamlFile(file, errorsReport);
535-
536-
if (transpiledFile) {
537-
transpiledFile.convertedToJs = true;
538-
}
539-
540-
return transpiledFile;
585+
return this.yamlCompiler.transpileYamlFile(file, errorsReport);
541586
} else {
542587
return file;
543588
}

0 commit comments

Comments
 (0)