Skip to content

Commit 2567fc5

Browse files
authored
Merge pull request #126 from webdoc-labs/feature/multithreading
Feature: Parallel file indexing in @webdoc/parser
2 parents f7eca6b + 9f0eb07 commit 2567fc5

File tree

19 files changed

+413
-152
lines changed

19 files changed

+413
-152
lines changed

.idea/misc.xml

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

example/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
"unit-test": "",
2626
"build": "webdoc --tutorials ./tutorials --site-root example-documentation --site-domain https://webdoc-labs.github.io --verbose",
2727
"build-next": "cd .. && webdoc && cd example",
28-
"build-pixi-api": "cd ../../pixi-api && webdoc && cd ../webdoc/example",
28+
"build-pixi-api": "cd ../../pixi-api && webdoc --verbose && cd ../webdoc/example",
2929
"build-pixi-api-prod": "cd ../../pixi-api && webdoc --site-root pixi-api && cd ../webdoc/example",
3030
"build-pixi-api-gcp": "cd ../../pixi-api && webdoc --tutorials ./projects/guides --verbose && cd ../webdoc/example",
3131
"build-pixi-guides": "cd ../../guides && webdoc --tutorials docs && cd ../webdoc/example",

packages/webdoc-cli/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ generate the `sitemap.xml`. This is useful if you want to integrate with [Algoli
1919
* `--site-root <path>`: If using absolute links in a template, this will set the basepath. The basepath should the directory in which the documentation is being stored relative to where the server is running. The site root is "/" by default - which means that you'll need to serve the documentation directory as top-level. Note that @webdoc/default-template uses absolute links.
2020
* `-c <config-path>`: This sets the path of the configuration file webdoc uses.
2121
* `-u <tutorials-directory>` - (optional) This should point to a directory containing tutorials written in Markdown (".md") or HTML ".html, ".htm". JSON files can be used to configure the hierarchy and naming of tutorials (see the Tutorial Configuration section).
22+
* `--no-workers` - Disables usage of worker threads to parallelize parsing.
2223

2324
### Configuration
2425

packages/webdoc-cli/src/index.js

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@ async function main(argv: yargs.Argv) {
6363
const config = loadConfig(argv.config);
6464
const tutorials = loadTutorials(argv.tutorials, config.template.routes.tutorials);
6565

66-
6766
if (argv.siteRoot) {
6867
config.template.siteRoot = argv.siteRoot;
6968
}
@@ -107,7 +106,9 @@ async function main(argv: yargs.Argv) {
107106
}
108107

109108
try {
110-
parse(sourceFiles, documentTree);
109+
await parse(sourceFiles, documentTree, {
110+
mainThread: !argv.workers,
111+
});
111112
} catch (e) {
112113
// Make sure we get that API structure out so the user can debug the problem!
113114
if (config.opts && config.opts.export) {
@@ -163,7 +164,9 @@ async function main(argv: yargs.Argv) {
163164
const argv = yargs.scriptName("@webdoc/cli")
164165
.usage("$0 -c <configFile> -u <tutorialDir> --verbose " +
165166
"--site-root <siteRoot> " +
166-
"--site-domain <siteDomain>")
167+
"--site-domain <siteDomain>" +
168+
"--no-workers")
169+
.default("workers", true)
167170
.default("config", path.join(process.cwd(), "webdoc.conf.json"), "webdoc config file")
168171
.alias("c", "config")
169172
.alias("u", "tutorials")

packages/webdoc-externalize/test/read-write.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ const parser = require("@webdoc/parser");
55
const expect = require("chai").expect;
66

77
describe("@webdoc/externalize (read-write test)", function() {
8-
it("should deserialize a serialized documented interface properly", function() {
9-
const inputTree = parser.parse(`
8+
it("should deserialize a serialized documented interface properly", async function() {
9+
const inputTree = await parser.parse(`
1010
/** Symbol 0 */
1111
class Symbol0 {
1212
/** Symbol 1 */
Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,9 @@
11
declare module "@webdoc/parser" {
2-
import type {Doc} from "@webdoc/types";
2+
import type {RootDoc} from "@webdoc/types";
33

4-
/**
5-
* The parser will accept:
6-
* + a file's contents
7-
* + an array of file contents
8-
+ + a map mapping file-names to file contents
9-
*/
10-
declare type ParserInput = string | string[] | Map<string, string>;
11-
12-
declare function parse(input: ParserInput): Doc;
4+
declare function parse(
5+
target: string | SourceFile[],
6+
root?: RootDoc,
7+
options?: $Shape<{ mainThread: boolean }>,
8+
): Promise<RootDoc>;
139
}

packages/webdoc-parser/src/Logger.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ export function initLogger(defaultLevel: string = "INFO") {
3535
PartialParser: defaultLevel,
3636
DocParser: defaultLevel,
3737
DocumentTreeModifier: defaultLevel,
38+
Indexer: defaultLevel,
3839
},
3940
(level, tag, msg, params) => {
4041
let tagPrefix = `[${tag}]:`;
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
// @flow
2+
3+
import {type PackageDoc, type SourceFile} from "@webdoc/types";
4+
import {parserLogger, tag} from "../Logger";
5+
import EventEmitter from "events";
6+
import type {LanguageConfig} from "../types/LanguageIntegration";
7+
import {type Symbol} from "../types/Symbol";
8+
// $FlowFixMe
9+
import {Worker} from "worker_threads";
10+
import os from "os";
11+
import path from "path";
12+
13+
export type JobData = {
14+
jobId: number,
15+
jobLanguageIntegrationModule: string,
16+
jobFile: SourceFile,
17+
jobConfig: LanguageConfig,
18+
};
19+
20+
export type JobResult = {
21+
jobId: number,
22+
jobResult?: Symbol,
23+
jobError?: any,
24+
};
25+
26+
class IndexerWorker extends EventEmitter {
27+
onError: (ev: "error", ...args: any[]) => void;
28+
onMessage: (...args: any[]) => void;
29+
worker: Worker;
30+
31+
constructor(worker: Worker) {
32+
super();
33+
34+
this.onError = this.emit.bind(this, "error");
35+
this.onMessage = this.onMessageImpl.bind(this);
36+
37+
this.worker = worker;
38+
this.worker.on("error", this.onError);
39+
this.worker.on("message", this.onMessage);
40+
}
41+
42+
send(
43+
jobId: number,
44+
jobLanguageIntegrationModule: string,
45+
jobFile: SourceFile,
46+
jobConfig: LanguageConfig,
47+
): number {
48+
this.worker.postMessage(({
49+
jobId,
50+
jobLanguageIntegrationModule,
51+
jobFile,
52+
jobConfig,
53+
}: JobData));
54+
55+
return jobId;
56+
}
57+
58+
onMessageImpl({jobId, jobResult, jobError}: JobResult) {
59+
this.emit(`response-${jobId}`, jobResult, jobError);
60+
}
61+
62+
destroy() {
63+
this.worker.terminate();
64+
}
65+
}
66+
67+
export class IndexerWorkerPool {
68+
workers: IndexerWorker[];
69+
ptr: number = 0;
70+
71+
constructor(limit?: number) {
72+
const workerPoolSize = Math.min(os.cpus().length, limit || 4);
73+
const workers = new Array<Worker>(workerPoolSize);
74+
const workerPath = path.resolve(__dirname, "./worker.js");
75+
76+
parserLogger.info(tag.Indexer, "Using " + workerPoolSize + " worker threads for indexing");
77+
78+
for (let i = 0; i < workerPoolSize; i++) {
79+
workers[i] = new IndexerWorker(new Worker(workerPath));
80+
}
81+
82+
this.workers = workers;
83+
}
84+
85+
repair(symbol: Symbol, packages: { [id: string]: PackageDoc }): Symbol {
86+
const file = symbol.loc.file;
87+
const pkgId = file && file.package.id;
88+
89+
if (file && pkgId && packages[pkgId]) {
90+
file.package = packages[pkgId];
91+
}
92+
for (const child of symbol.members) {
93+
this.repair(child, packages);
94+
}
95+
96+
return symbol;
97+
}
98+
99+
index(
100+
langIntegrationModule: string,
101+
file: SourceFile,
102+
config: LanguageConfig,
103+
packages: { [id: string]: PackageDoc },
104+
): Promise<Symbol> {
105+
return new Promise<Symbol>((resolve, reject) => {
106+
const jobId = this.ptr++;
107+
const worker = this.workers[jobId % this.workers.length];
108+
109+
worker.send(jobId, langIntegrationModule, file, config);
110+
worker.on(`response-${jobId}`, (jobResult: Symbol, jobError: any): void => {
111+
if (jobResult) {
112+
resolve(this.repair(jobResult, packages));
113+
} else {
114+
reject(jobError);
115+
}
116+
});
117+
});
118+
}
119+
120+
destroy() {
121+
for (const worker of this.workers) {
122+
worker.destroy();
123+
}
124+
this.workers.length = 0;
125+
}
126+
}
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
// @flow
2+
3+
import type {LanguageConfig, LanguageIntegration} from "../types/LanguageIntegration";
4+
import {parserLogger, tag} from "../Logger";
5+
import {IndexerWorkerPool} from "./IndexerWorkerPool";
6+
import {type SourceFile} from "@webdoc/types";
7+
import {type Symbol} from "../types/Symbol";
8+
import _ from "lodash";
9+
import fs from "fs";
10+
import os from "os";
11+
import path from "path";
12+
13+
declare var globalThis: any;
14+
15+
// File-extension -> LanguageIntegration mapping
16+
const languages: { [id: string]: LanguageIntegration } = {};
17+
18+
// Register a language-integration that will be used to generate a symbol-tree for files with its
19+
// file-extensions.
20+
export function register(lang: LanguageIntegration): void {
21+
for (const ext of lang.extensions) {
22+
if (languages[ext]) {
23+
parserLogger.warn("LanguageIntegration",
24+
`.${ext} file extension has already been registered`);
25+
}
26+
27+
languages[ext] = lang;
28+
}
29+
}
30+
31+
export async function run(
32+
files: SourceFile[],
33+
config: LanguageConfig,
34+
options?: $Shape<{
35+
mainThread: boolean,
36+
}> = {},
37+
): Promise<Symbol[]> {
38+
const startTime = Date.now();
39+
const maxThreads = Math.min(os.cpus().length, 1 + Math.floor(files.length / 125));
40+
41+
parserLogger.info(tag.Indexer, "Indexing " + files.length + " files");
42+
43+
const symbolTrees: Array<Symbol> = new Array(files.length);
44+
45+
if (maxThreads > 1 && !options.mainThread) {
46+
const packages = _.keyBy(
47+
files.map((file) => file.package),
48+
(pkg) => pkg.id,
49+
);
50+
const symbolIndexingOperations: Array<Promise<void>> = new Array(files.length);
51+
const workerPool = new IndexerWorkerPool(maxThreads);
52+
53+
for (let i = 0; i < files.length; i++) {
54+
const fileName = files[i].path;
55+
const extension = fileName.substring(fileName.lastIndexOf(".") + 1, fileName.length);
56+
57+
if (!(extension in languages)) {
58+
throw new Error(`.${extension} language is not registered with the Indexer!`);
59+
}
60+
61+
const file = {
62+
...files[i],
63+
path: path.resolve(globalThis.process.cwd(), files[i].path),
64+
};
65+
const lang = languages[extension].module;
66+
67+
symbolIndexingOperations[i] = workerPool.index(lang, file, config, packages).then(
68+
(symbolTree: Symbol): void => {
69+
symbolTrees[i] = symbolTree;
70+
},
71+
);
72+
}
73+
74+
await Promise.all(symbolIndexingOperations);
75+
workerPool.destroy();
76+
} else {
77+
for (let i = 0; i < files.length; i++) {
78+
const filePath = path.resolve(globalThis.process.cwd(), files[i].path);
79+
const fileContent = files[i].content || fs.readFileSync(filePath, "utf8");
80+
81+
symbolTrees[i] = process(fileContent, files[i], config);
82+
}
83+
}
84+
85+
const endTime = Date.now();
86+
87+
parserLogger.info(tag.Indexer, "Indexing took " + (endTime - startTime) + "ms");
88+
89+
return symbolTrees;
90+
}
91+
92+
export function process(
93+
file: string,
94+
source: SourceFile,
95+
config: LanguageConfig,
96+
): Symbol {
97+
const fileName = source.path;
98+
const lang = languages[fileName.substring(fileName.lastIndexOf(".") + 1, fileName.length)];
99+
100+
if (!lang) {
101+
throw new Error(`.${lang} file language is not registered`);
102+
}
103+
104+
return lang.parse(file, source, config);
105+
}
106+
107+
export function lang(module: string): LanguageIntegration {
108+
module = module.replace("@webdoc/parser", "../../");
109+
110+
// $FlowFixMe
111+
return ((require(module): any).default: LanguageIntegration);
112+
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
// @flow
2+
3+
import * as Indexer from "./";
4+
import type {JobData, JobResult} from "./IndexerWorkerPool";
5+
// $FlowFixMe
6+
import {isMainThread, parentPort} from "worker_threads";
7+
import fs from "fs";
8+
9+
function onMessage(data: JobData) {
10+
function onFileRead(err: ?Error, contents: string) {
11+
if (err) {
12+
return parentPort.postMessage(({
13+
jobId: data.jobId,
14+
jobError: err,
15+
}: JobResult));
16+
}
17+
18+
const lang = Indexer.lang(data.jobLanguageIntegrationModule);
19+
const symbolTree = lang.parse(
20+
contents,
21+
data.jobFile,
22+
data.jobConfig,
23+
);
24+
25+
parentPort.postMessage(({
26+
jobId: data.jobId,
27+
jobResult: symbolTree,
28+
}: JobResult));
29+
}
30+
31+
if (!data.jobFile.content) {
32+
fs.readFile(data.jobFile.path, "utf8", onFileRead);
33+
} else {
34+
onFileRead(null, data.jobFile.content);
35+
}
36+
}
37+
38+
if (!isMainThread) {
39+
parentPort.on("message", onMessage);
40+
}

0 commit comments

Comments
 (0)