Skip to content

Commit 4932c4e

Browse files
committed
feat: introduce parallelization worker for chunked generation processing
1 parent 260cc1d commit 4932c4e

File tree

20 files changed

+701
-307
lines changed

20 files changed

+701
-307
lines changed

bin/commands/generate.mjs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ import { loadAndParse } from '../utils.mjs';
1616

1717
const availableGenerators = Object.keys(publicGenerators);
1818

19+
// Half of available logical CPUs guarantees in general all physical CPUs are being used
20+
// which in most scenarios is the best way to maximize performance
21+
const optimalThreads = Math.floor(cpus().length / 2) - 1;
22+
1923
/**
2024
* @typedef {Object} Options
2125
* @property {Array<string>|string} input - Specifies the glob/path for input files.
@@ -26,6 +30,7 @@ const availableGenerators = Object.keys(publicGenerators);
2630
* @property {string} typeMap - Specifies the path to the Node.js Type Map.
2731
* @property {string} [gitRef] - Git ref/commit URL.
2832
* @property {number} [threads] - Number of threads to allow.
33+
* @property {number} [chunkSize] - Number of items to process per worker thread.
2934
*/
3035

3136
/**
@@ -61,10 +66,20 @@ export default {
6166
},
6267
threads: {
6368
flags: ['-p', '--threads <number>'],
69+
desc: 'Number of worker threads to use',
6470
prompt: {
6571
type: 'text',
6672
message: 'How many threads to allow',
67-
initialValue: String(Math.max(cpus().length, 1)),
73+
initialValue: String(Math.max(optimalThreads, 1)),
74+
},
75+
},
76+
chunkSize: {
77+
flags: ['--chunk-size <number>'],
78+
desc: 'Number of items to process per worker thread (default: auto)',
79+
prompt: {
80+
type: 'text',
81+
message: 'Items per worker thread',
82+
initialValue: '20',
6883
},
6984
},
7085
version: {
@@ -149,6 +164,7 @@ export default {
149164
releases,
150165
gitRef: opts.gitRef,
151166
threads: parseInt(opts.threads, 10),
167+
chunkSize: parseInt(opts.chunkSize, 10),
152168
index,
153169
typeMap,
154170
});

src/generators.mjs

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import { allGenerators } from './generators/index.mjs';
44
import WorkerPool from './threading/index.mjs';
5+
import createParallelWorker from './threading/parallel.mjs';
56

67
/**
78
* This method creates a system that allows you to register generators
@@ -31,14 +32,26 @@ const createGenerator = input => {
3132
*/
3233
const cachedGenerators = { ast: Promise.resolve(input) };
3334

34-
const threadPool = new WorkerPool();
35-
3635
/**
3736
* Runs the Generator engine with the provided top-level input and the given generator options
3837
*
3938
* @param {GeneratorOptions} options The options for the generator runtime
4039
*/
41-
const runGenerators = async ({ generators, threads, ...extra }) => {
40+
const runGenerators = async ({
41+
generators,
42+
threads,
43+
chunkSize,
44+
...extra
45+
}) => {
46+
// WorkerPool for running full generators in worker threads
47+
const generatorPool = new WorkerPool('./generator-worker.mjs', threads);
48+
49+
// WorkerPool for chunk-level parallelization within generators
50+
const chunkPool = new WorkerPool('./chunk-worker.mjs', threads);
51+
52+
// Options including threading config
53+
const threadingOptions = { threads, chunkSize };
54+
4255
// Note that this method is blocking, and will only execute one generator per-time
4356
// but it ensures all dependencies are resolved, and that multiple bottom-level generators
4457
// can reuse the already parsed content from the top-level/dependency generators
@@ -50,20 +63,32 @@ const createGenerator = input => {
5063
if (dependsOn && dependsOn in cachedGenerators === false) {
5164
await runGenerators({
5265
...extra,
53-
threads,
66+
...threadingOptions,
5467
generators: [dependsOn],
5568
});
5669
}
5770

5871
// Ensures that the dependency output gets resolved before we run the current
5972
// generator with its dependency output as the input
60-
const dependencyOutput = await cachedGenerators[dependsOn];
73+
const input = await cachedGenerators[dependsOn];
74+
75+
// Create a ParallelWorker for this generator to use for item-level parallelization
76+
const worker = createParallelWorker(generatorName, chunkPool, {
77+
...extra,
78+
...threadingOptions,
79+
});
80+
81+
// Generator options with worker instance
82+
const generatorOptions = { ...extra, ...threadingOptions, worker };
83+
84+
// Worker options for the worker thread
85+
const workerOptions = { ...extra, ...threadingOptions };
6186

6287
// Adds the current generator execution Promise to the cache
6388
cachedGenerators[generatorName] =
6489
threads < 2
65-
? generate(dependencyOutput, extra) // Run in main thread
66-
: threadPool.run(generatorName, dependencyOutput, threads, extra); // Offload to worker thread
90+
? generate(input, generatorOptions) // Run in main thread
91+
: generatorPool.run({ generatorName, input, options: workerOptions }); // Offload to worker thread
6792
}
6893

6994
// Returns the value of the last generator of the current pipeline

src/generators/ast-js/index.mjs

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,21 +23,42 @@ export default {
2323
dependsOn: 'metadata',
2424

2525
/**
26-
* @param {Input} _
26+
* Process a chunk of JavaScript files in a worker thread.
27+
* Called by chunk-worker.mjs for parallel processing.
28+
*
29+
* @param {unknown} _ - Unused (we use options.input instead)
30+
* @param {number[]} itemIndices - Indices of source files to process
2731
* @param {Partial<GeneratorOptions>} options
2832
*/
29-
async generate(_, options) {
33+
async processChunk(_, itemIndices, { input }) {
3034
const { loadFiles } = createJsLoader();
3135

32-
// Load all of the Javascript sources into memory
33-
const sourceFiles = loadFiles(options.input ?? []);
36+
const sourceFiles = loadFiles(input ?? []);
37+
38+
const { parseJsSource } = createJsParser();
39+
40+
const results = [];
3441

35-
const { parseJsSources } = createJsParser();
42+
for (const idx of itemIndices) {
43+
results.push(await parseJsSource(sourceFiles[idx]));
44+
}
3645

37-
// Parse the Javascript sources into ASTs
38-
const parsedJsFiles = await parseJsSources(sourceFiles);
46+
return results;
47+
},
48+
49+
/**
50+
* @param {Input} _
51+
* @param {Partial<GeneratorOptions>} options
52+
*/
53+
async generate(_, { input, worker }) {
54+
const { loadFiles } = createJsLoader();
55+
56+
// Load all of the Javascript sources into memory
57+
const sourceFiles = loadFiles(input ?? []);
3958

40-
// Return the ASTs so they can be used in another generator
41-
return parsedJsFiles;
59+
// Parse the Javascript sources into ASTs in parallel using worker threads
60+
// Note: We pass sourceFiles as items but _ (empty) as fullInput since
61+
// processChunk reloads files from options.input
62+
return worker.map(sourceFiles, _, { input });
4263
},
4364
};

src/generators/jsx-ast/index.mjs

Lines changed: 37 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1-
import { OVERRIDDEN_POSITIONS } from './constants.mjs';
21
import { buildSideBarProps } from './utils/buildBarProps.mjs';
32
import buildContent from './utils/buildContent.mjs';
4-
import { groupNodesByModule } from '../../utils/generators.mjs';
3+
import {
4+
buildDocPages,
5+
getSortedHeadNodes,
6+
groupNodesByModule,
7+
} from '../../utils/generators.mjs';
58
import { getRemarkRecma } from '../../utils/remark.mjs';
69

710
/**
@@ -10,78 +13,53 @@ import { getRemarkRecma } from '../../utils/remark.mjs';
1013
* @typedef {Array<ApiDocMetadataEntry>} Input
1114
* @type {GeneratorMetadata<Input, string>}
1215
*/
13-
14-
/**
15-
* Sorts entries by OVERRIDDEN_POSITIONS and then heading name.
16-
* @param {Array<ApiDocMetadataEntry>} entries
17-
*/
18-
const getSortedHeadNodes = entries => {
19-
return entries
20-
.filter(node => node.heading.depth === 1)
21-
.sort((a, b) => {
22-
const ai = OVERRIDDEN_POSITIONS.indexOf(a.api);
23-
const bi = OVERRIDDEN_POSITIONS.indexOf(b.api);
24-
25-
if (ai !== -1 && bi !== -1) {
26-
return ai - bi;
27-
}
28-
29-
if (ai !== -1) {
30-
return -1;
31-
}
32-
33-
if (bi !== -1) {
34-
return 1;
35-
}
36-
37-
return a.heading.data.name.localeCompare(b.heading.data.name);
38-
});
39-
};
40-
4116
export default {
4217
name: 'jsx-ast',
4318
version: '1.0.0',
4419
description: 'Generates JSX AST from the input MDAST',
4520
dependsOn: 'metadata',
4621

4722
/**
48-
* Generates a JSX AST
49-
*
50-
* @param {Input} entries
23+
* Process a chunk of items in a worker thread.
24+
* @param {Input} fullInput
25+
* @param {number[]} itemIndices
5126
* @param {Partial<GeneratorOptions>} options
52-
* @returns {Promise<Array<string>>} Array of generated content
5327
*/
54-
async generate(entries, { index, releases, version }) {
55-
const remarkRecma = getRemarkRecma();
56-
const groupedModules = groupNodesByModule(entries);
57-
const headNodes = getSortedHeadNodes(entries);
58-
59-
// Generate table of contents
60-
const docPages = index
61-
? index.map(({ section, api }) => [section, `${api}.html`])
62-
: headNodes.map(node => [node.heading.data.name, `${node.api}.html`]);
28+
async processChunk(fullInput, itemIndices, { index, releases, version }) {
29+
const processor = getRemarkRecma();
30+
const groupedModules = groupNodesByModule(fullInput);
31+
const headNodes = getSortedHeadNodes(fullInput);
32+
const docPages = buildDocPages(headNodes, index);
6333

64-
// Process each head node and build content
65-
const results = [];
34+
return Promise.all(
35+
itemIndices.map(async idx => {
36+
const entry = headNodes[idx];
6637

67-
for (const entry of headNodes) {
68-
const sideBarProps = buildSideBarProps(
69-
entry,
70-
releases,
71-
version,
72-
docPages
73-
);
38+
const sideBarProps = buildSideBarProps(
39+
entry,
40+
releases,
41+
version,
42+
docPages
43+
);
7444

75-
results.push(
76-
await buildContent(
45+
return buildContent(
7746
groupedModules.get(entry.api),
7847
entry,
7948
sideBarProps,
80-
remarkRecma
81-
)
82-
);
83-
}
49+
processor
50+
);
51+
})
52+
);
53+
},
54+
55+
/**
56+
* Generates a JSX AST
57+
* @param {Input} entries
58+
* @param {Partial<GeneratorOptions>} options
59+
*/
60+
async generate(entries, { index, releases, version, worker }) {
61+
const headNodes = getSortedHeadNodes(entries);
8462

85-
return results;
63+
return worker.map(headNodes, entries, { index, releases, version });
8664
},
8765
};

src/generators/legacy-html-all/index.mjs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import { join, resolve } from 'node:path';
55

66
import HTMLMinifier from '@minify-html/node';
77

8-
import { getRemarkRehypeWithShiki } from '../../utils/remark.mjs';
8+
import { getRemarkRehype } from '../../utils/remark.mjs';
99
import dropdowns from '../legacy-html/utils/buildDropdowns.mjs';
1010
import tableOfContents from '../legacy-html/utils/tableOfContents.mjs';
1111

@@ -49,7 +49,7 @@ export default {
4949
const inputWithoutIndex = input.filter(entry => entry.api !== 'index');
5050

5151
// Gets a Remark Processor that parses Markdown to minified HTML
52-
const remarkWithRehype = getRemarkRehypeWithShiki();
52+
const remarkWithRehype = getRemarkRehype();
5353

5454
// Current directory path relative to the `index.mjs` file
5555
// from the `legacy-html` generator, as all the assets are there

0 commit comments

Comments
 (0)