feat(perf): offload generators to worker threads

avivkeller · avivkeller · commit 3a8cae2bba95 · 2025-04-10T11:38:58.000-04:00
diff --git a/bin/cli.mjs b/bin/cli.mjs
@@ -77,6 +77,12 @@ program
       .choices(Object.keys(reporters))
       .default('console')
   )
+  .addOption(
+    new Option(
+      '--disable-parallelism',
+      'Disable the use of multiple threads'
+    ).default(false)
+  )
   .parse(process.argv);
 
 /**
@@ -108,6 +114,7 @@ const {
   lintDryRun,
   gitRef,
   reporter,
+  disableParallelism,
 } = program.opts();
 
 const linter = createLinter(lintDryRun, disableRule);
@@ -142,6 +149,8 @@ if (target) {
     // An URL containing a git ref URL pointing to the commit or ref that was used
     // to generate the API docs. This is used to link to the source code of the
     gitRef,
+    // Disable the use of parallel threads
+    disableParallelism,
   });
 }
 
diff --git a/src/generators.mjs b/src/generators.mjs
@@ -1,5 +1,8 @@
 'use strict';
 
+import { Worker, isMainThread, parentPort, workerData } from 'worker_threads';
+import os from 'os';
+
 import publicGenerators from './generators/index.mjs';
 import astJs from './generators/ast-js/index.mjs';
 import oramaDb from './generators/orama-db/index.mjs';
@@ -12,6 +15,25 @@ const availableGenerators = {
   'orama-db': oramaDb,
 };
 
+// Thread pool max limit
+const MAX_THREADS = Math.max(1, os.cpus().length - 1);
+
+// If inside a worker thread, perform the generator logic here
+if (!isMainThread) {
+  const { name, dependencyOutput, extra } = workerData;
+  const generator = availableGenerators[name];
+
+  // Execute the generator and send the result back to the parent thread
+  generator
+    .generate(dependencyOutput, extra)
+    .then(result => {
+      parentPort.postMessage(result);
+    })
+    .catch(error => {
+      parentPort.postMessage({ error });
+    });
+}
+
 /**
  * @typedef {{ ast: GeneratorMetadata<ApiDocMetadataEntry, ApiDocMetadataEntry>}} AstGenerator The AST "generator" is a facade for the AST tree and it isn't really a generator
  * @typedef {AvailableGenerators & AstGenerator} AllGenerators A complete set of the available generators, including the AST one
@@ -43,30 +65,103 @@ const createGenerator = markdownInput => {
    */
   const cachedGenerators = { ast: Promise.resolve(markdownInput) };
 
+  // Keep track of how many threads are currently running
+  let activeThreads = 0;
+  const threadQueue = [];
+
+  /**
+   *
+   * @param name
+   * @param dependencyOutput
+   * @param extra
+   */
+  const runInWorker = (name, dependencyOutput, extra) => {
+    return new Promise((resolve, reject) => {
+      /**
+       *
+       */
+      const run = () => {
+        activeThreads++;
+
+        const worker = new Worker(new URL(import.meta.url), {
+          workerData: { name, dependencyOutput, extra },
+        });
+
+        worker.on('message', result => {
+          activeThreads--;
+          processQueue();
+
+          if (result && result.error) {
+            reject(result.error);
+          } else {
+            resolve(result);
+          }
+        });
+
+        worker.on('error', err => {
+          activeThreads--;
+          processQueue();
+          reject(err);
+        });
+      };
+
+      if (activeThreads >= MAX_THREADS) {
+        threadQueue.push(run);
+      } else {
+        run();
+      }
+    });
+  };
+
+  /**
+   *
+   */
+  const processQueue = () => {
+    if (threadQueue.length > 0 && activeThreads < MAX_THREADS) {
+      const next = threadQueue.shift();
+      next();
+    }
+  };
+
   /**
    * Runs the Generator engine with the provided top-level input and the given generator options
    *
    * @param {GeneratorOptions} options The options for the generator runtime
    */
-  const runGenerators = async ({ generators, ...extra }) => {
+  const runGenerators = async ({
+    generators,
+    disableParallelism = false,
+    ...extra
+  }) => {
     // Note that this method is blocking, and will only execute one generator per-time
     // but it ensures all dependencies are resolved, and that multiple bottom-level generators
     // can reuse the already parsed content from the top-level/dependency generators
     for (const generatorName of generators) {
-      const { dependsOn, generate } = availableGenerators[generatorName];
+      const {
+        dependsOn,
+        generate,
+        parallizable = true,
+      } = availableGenerators[generatorName];
 
       // If the generator dependency has not yet been resolved, we resolve
       // the dependency first before running the current generator
-      if (dependsOn && dependsOn in cachedGenerators === false) {
-        await runGenerators({ ...extra, generators: [dependsOn] });
+      if (dependsOn && !(dependsOn in cachedGenerators)) {
+        await runGenerators({
+          ...extra,
+          disableParallelism,
+          generators: [dependsOn],
+        });
       }
 
       // Ensures that the dependency output gets resolved before we run the current
       // generator with its dependency output as the input
       const dependencyOutput = await cachedGenerators[dependsOn];
 
       // Adds the current generator execution Promise to the cache
-      cachedGenerators[generatorName] = generate(dependencyOutput, extra);
+      cachedGenerators[generatorName] =
+        disableParallelism || !parallizable
+          ? generate(dependencyOutput, extra) // Run in main thread
+          : runInWorker(generatorName, dependencyOutput, extra); // Offload to worker thread
     }
 
     // Returns the value of the last generator of the current pipeline
diff --git a/src/generators/json-simple/index.mjs b/src/generators/json-simple/index.mjs
@@ -6,7 +6,6 @@ import { join } from 'node:path';
 import { remove } from 'unist-util-remove';
 
 import createQueries from '../../utils/queries/index.mjs';
-import { getRemark } from '../../utils/remark.mjs';
 
 /**
  * This generator generates a simplified JSON version of the API docs and returns it as a string
@@ -35,9 +34,6 @@ export default {
    * @param {Partial<GeneratorOptions>} options
    */
   async generate(input, options) {
-    // Gets a remark processor for stringifying the AST tree into JSON
-    const remarkProcessor = getRemark();
-
     // Iterates the input (ApiDocMetadataEntry) and performs a few changes
     const mappedInput = input.map(node => {
       // Deep clones the content nodes to avoid affecting upstream nodes
@@ -50,12 +46,6 @@ export default {
         createQueries.UNIST.isHeading,
       ]);
 
-      /**
-       * For the JSON generate we want to transform the whole content into JSON
-       * @returns {string} The stringified JSON version of the content
-       */
-      content.toJSON = () => remarkProcessor.stringify(content);
-
       return { ...node, content };
     });
 
diff --git a/src/generators/legacy-html-all/index.mjs b/src/generators/legacy-html-all/index.mjs
@@ -86,7 +86,7 @@ export default {
       .replace('__ID__', 'all')
       .replace(/__FILENAME__/g, 'all')
       .replace('__SECTION__', 'All')
-      .replace(/__VERSION__/g, `v${version.toString()}`)
+      .replace(/__VERSION__/g, `v${version.version}`)
       .replace(/__TOC__/g, tableOfContents.wrapToC(aggregatedToC))
       .replace(/__GTOC__/g, parsedSideNav)
       .replace('__CONTENT__', aggregatedContent)
diff --git a/src/generators/legacy-html/index.mjs b/src/generators/legacy-html/index.mjs
@@ -84,7 +84,6 @@ export default {
      */
     const replaceTemplateValues = values => {
       const { api, added, section, version, toc, nav, content } = values;
-
       return apiTemplate
         .replace('__ID__', api)
         .replace(/__FILENAME__/g, api)
@@ -139,7 +138,7 @@ export default {
         api: head.api,
         added: head.introduced_in ?? '',
         section: head.heading.data.name || apiAsHeading,
-        version: `v${version.toString()}`,
+        version: `v${version.version}`,
         toc: String(parsedToC),
         nav: String(activeSideNav),
         content: parsedContent,
diff --git a/src/generators/legacy-html/utils/buildDropdowns.mjs b/src/generators/legacy-html/utils/buildDropdowns.mjs
@@ -60,8 +60,9 @@ const buildNavigation = navigationContents =>
 const buildVersions = (api, added, versions) => {
   // All Node.js versions that support the current API; If there's no "introduced_at" field,
   // we simply show all versions, as we cannot pinpoint the exact version
+  const coercedMajor = major(coerceSemVer(added));
   const compatibleVersions = versions.filter(({ version }) =>
-    added ? major(version) >= major(coerceSemVer(added)) : true
+    added ? version.major >= coercedMajor : true
   );
 
   // Parses the SemVer version into something we use for URLs and to display the Node.js version
diff --git a/src/generators/legacy-json/utils/buildSection.mjs b/src/generators/legacy-json/utils/buildSection.mjs
@@ -58,7 +58,7 @@ export const createSectionBuilder = () => {
    * @param {import('../types.d.ts').HierarchizedEntry} entry - The entry providing stability information.
    */
   const parseStability = (section, nodes, { stability }) => {
-    const stabilityInfo = stability.toJSON()?.[0];
+    const stabilityInfo = stability.children.map(node => node.data);
 
     if (stabilityInfo) {
       section.stability = stabilityInfo.index;
diff --git a/src/linter/tests/fixtures/entries.mjs b/src/linter/tests/fixtures/entries.mjs
@@ -1,10 +1,3 @@
-/**
- * Noop function.
- *
- * @returns {any}
- */
-const noop = () => {};
-
 /**
  * @type {ApiDocMetadataEntry}
  */
@@ -69,12 +62,10 @@ export const assertEntry = {
       slug: 'assert',
       type: 'property',
     },
-    toJSON: noop,
   },
   stability: {
     type: 'root',
     children: [],
-    toJSON: noop,
   },
   content: {
     type: 'root',
diff --git a/src/metadata.mjs b/src/metadata.mjs
@@ -140,17 +140,6 @@ const createMetadata = slugger => {
       internalMetadata.heading.data.type =
         type ?? internalMetadata.heading.data.type;
 
-      /**
-       * Defines the toJSON method for the Heading AST node to be converted as JSON
-       */
-      internalMetadata.heading.toJSON = () => internalMetadata.heading.data;
-
-      /**
-       * Maps the Stability Index AST nodes into a JSON objects from their data properties
-       */
-      internalMetadata.stability.toJSON = () =>
-        internalMetadata.stability.children.map(node => node.data);
-
       // Returns the Metadata entry for the API doc
       return {
         api: apiDoc.stem,
diff --git a/src/test/metadata.test.mjs b/src/test/metadata.test.mjs
@@ -33,7 +33,6 @@ describe('createMetadata', () => {
     };
     metadata.addStability(stability);
     const actual = metadata.create(new VFile(), {}).stability;
-    delete actual.toJSON;
     deepStrictEqual(actual, {
       children: [stability],
       type: 'root',
@@ -82,8 +81,15 @@ describe('createMetadata', () => {
       yaml_position: {},
     };
     const actual = metadata.create(apiDoc, section);
-    delete actual.stability.toJSON;
-    delete actual.heading.toJSON;
     deepStrictEqual(actual, expected);
   });
+
+  it('should be serializable', () => {
+    const { create } = createMetadata(new GitHubSlugger());
+    const actual = create(new VFile({ path: 'test.md' }), {
+      type: 'root',
+      children: [],
+    });
+    deepStrictEqual(structuredClone(actual), actual);
+  });
 });
diff --git a/src/types.d.ts b/src/types.d.ts
@@ -3,12 +3,6 @@ import type { Program } from 'acorn';
 import type { SemVer } from 'semver';
 import type { Data, Node, Parent, Position } from 'unist';
 
-// String serialization of the AST tree
-// @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON/stringify#tojson_behavior
-interface WithJSON<T extends Node, J extends any = any> extends T {
-  toJSON: () => J;
-}
-
 // Unist Node with typed Data, which allows better type inference
 interface NodeWithData<T extends Node, J extends Data> extends T {
   data: J;
@@ -88,12 +82,9 @@ declare global {
     // Any changes to the API doc Metadata
     changes: Array<ApiDocMetadataChange>;
     // The parsed Markdown content of a Navigation Entry
-    heading: WithJSON<HeadingMetadataParent, HeadingMetadataEntry>;
+    heading: HeadingMetadataParent;
     // The API doc metadata Entry Stability Index if exists
-    stability: WithJSON<
-      StabilityIndexParent,
-      Array<StabilityIndexMetadataEntry>
-    >;
+    stability: StabilityIndexParent;
     // The subtree containing all Nodes of the API doc entry
     content: Root;
     // Extra YAML section entries that are stringd and serve