Skip to content

Commit 7e3b8de

Browse files
authored
don't read all the website DOMs at once. Closes #4350. (#4754)
* don't read all the website DOMs at once. Closes #4350. * read file content, not name 🤦 * changelog
1 parent e5ed82b commit 7e3b8de

File tree

6 files changed

+62
-20
lines changed

6 files changed

+62
-20
lines changed

news/changelog-1.3.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@
207207
- Fix issue with "No inspectable targets" with Chrome Browser ([#4653](https://github.com/quarto-dev/quarto-cli/issues/4653))
208208
- Add `title` attribute for callouts (can be used rather than heading for defining the title)
209209
- Handle more varieties of raw HTML for Docusaurus output
210+
- Read and process DOM one-file-at-time in books and websites to reduce total memory usage ([#4350](https://github.com/quarto-dev/quarto-cli/issues/4350)).
210211

211212
## Pandoc filter changes
212213

src/core/deno-dom.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,20 @@ export async function parseHtml(src: string): Promise<HTMLDocument> {
2525
return result;
2626
}
2727

28+
export async function writeDomToHtmlFile(
29+
doc: HTMLDocument,
30+
path: string,
31+
doctype?: string,
32+
) {
33+
if (doc.documentElement === null) {
34+
throw new Error("Document has no root element");
35+
}
36+
const output = doctype
37+
? doctype + "\n" + doc.documentElement.outerHTML
38+
: doc.documentElement.outerHTML;
39+
await Deno.writeTextFile(path, output);
40+
}
41+
2842
// We are combining a number of scripts from
2943
// https://github.com/b-fuze/deno-dom/blob/master/deno-dom-native.ts
3044
// into this. If deno-dom fails, it's likely that this needs to be brought up to date.

src/project/types/book/book-bibliography.ts

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,13 @@ import * as ld from "../../../core/lodash.ts";
1212
import { stringify } from "encoding/yaml.ts";
1313
import { error } from "log/mod.ts";
1414

15-
import { Document, Element, parseHtml } from "../../../core/deno-dom.ts";
15+
import {
16+
Document,
17+
Element,
18+
initDenoDom,
19+
parseHtml,
20+
writeDomToHtmlFile,
21+
} from "../../../core/deno-dom.ts";
1622

1723
import { pathWithForwardSlashes, safeExistsSync } from "../../../core/path.ts";
1824
import { execProcess } from "../../../core/process.ts";
@@ -103,6 +109,8 @@ export async function bookBibliographyPostRender(
103109
incremental: boolean,
104110
outputFiles: WebsiteProjectOutputFile[],
105111
) {
112+
await initDenoDom();
113+
106114
// make sure the references file exists and compute it's path
107115
const renderFiles = context.config?.project[kProjectRender] || [];
108116

@@ -130,13 +138,14 @@ export async function bookBibliographyPostRender(
130138
// a global bibliography. also hide the refs div in each document (as it's
131139
// still used by citations-hover)
132140
const citeIds: string[] = [];
133-
outputFiles.forEach((file) => {
141+
for (const file of outputFiles) {
134142
// relative path to refs html
135143
const refsRelative = pathWithForwardSlashes(
136144
relative(dirname(file.file), refsHtml!),
137145
);
138146
// check each citation
139-
forEachCite(file.doc, (cite: Element) => {
147+
const doc = await parseHtml(Deno.readTextFileSync(file.file));
148+
forEachCite(doc, (cite: Element) => {
140149
// record ids
141150
citeIds.push(...citeIdsFromCite(cite));
142151
// fix hrefs
@@ -148,11 +157,11 @@ export async function bookBibliographyPostRender(
148157
});
149158

150159
// hide the bibliography
151-
const refsDiv = file.doc.getElementById("refs");
160+
const refsDiv = doc.getElementById("refs");
152161
if (refsDiv) {
153162
refsDiv.setAttribute("style", "display: none");
154163
}
155-
});
164+
}
156165

157166
// is the refs one of our output files?
158167
const refsOutputFile = outputFiles.find((file) => file.file === refsHtml);
@@ -199,17 +208,28 @@ export async function bookBibliographyPostRender(
199208
"html",
200209
csl,
201210
);
202-
const newRefsDiv = refsOutputFile.doc.createElement("div");
211+
const doc = await parseHtml(Deno.readTextFileSync(refsOutputFile.file));
212+
const newRefsDiv = doc.createElement("div");
203213
newRefsDiv.innerHTML = biblioHtml;
204-
const refsDiv = refsOutputFile.doc.getElementById("refs") as Element;
214+
const refsDiv = doc.getElementById("refs") as Element;
215+
let changed = false;
205216
if (refsDiv) {
217+
changed = true;
206218
refsDiv.replaceWith(newRefsDiv.firstChild);
207219
} else {
208-
const mainEl = refsOutputFile.doc.querySelector("main");
220+
const mainEl = doc.querySelector("main");
209221
if (mainEl) {
222+
changed = true;
210223
mainEl.appendChild(newRefsDiv.firstChild);
211224
}
212225
}
226+
if (changed) {
227+
await writeDomToHtmlFile(
228+
doc,
229+
refsOutputFile.file,
230+
refsOutputFile.doctype,
231+
);
232+
}
213233
}
214234
}
215235
}

src/project/types/book/book-crossrefs.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,12 @@ import { warning } from "log/mod.ts";
99
import { dirname, join, relative } from "path/mod.ts";
1010
import { existsSync } from "fs/mod.ts";
1111

12-
import { Element, HTMLDocument } from "../../../core/deno-dom.ts";
12+
import {
13+
Element,
14+
HTMLDocument,
15+
parseHtml,
16+
writeDomToHtmlFile,
17+
} from "../../../core/deno-dom.ts";
1318

1419
import { pathWithForwardSlashes } from "../../../core/path.ts";
1520

@@ -55,13 +60,15 @@ export async function bookCrossrefsPostRender(
5560
const index = bookCrossrefIndexForOutputFile(fileRelative, indexes);
5661
if (index) {
5762
// resolve crossrefs
63+
const doc = await parseHtml(Deno.readTextFileSync(outputFile.file));
5864
resolveCrossrefs(
5965
context,
6066
fileRelative,
6167
outputFile.format,
62-
outputFile.doc,
68+
doc,
6369
index,
6470
);
71+
writeDomToHtmlFile(doc, outputFile.file, outputFile.doctype);
6572
}
6673
}
6774
}

src/project/types/book/book-render.ts

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -604,13 +604,13 @@ export async function bookPostRender(
604604
await bookBibliographyPostRender(context, incremental, websiteFiles);
605605
await bookCrossrefsPostRender(context, websiteFiles);
606606

607-
// write website files
608-
websiteFiles.forEach((websiteFile) => {
609-
const doctype = websiteFile.doctype;
610-
const htmlOutput = (doctype ? doctype + "\n" : "") +
611-
websiteFile.doc.documentElement?.outerHTML!;
612-
Deno.writeTextFileSync(websiteFile.file, htmlOutput);
613-
});
607+
// website files are now already written on a per-file basis
608+
// websiteFiles.forEach((websiteFile) => {
609+
// const doctype = websiteFile.doctype;
610+
// const htmlOutput = (doctype ? doctype + "\n" : "") +
611+
// websiteFile.doc.documentElement?.outerHTML!;
612+
// Deno.writeTextFileSync(websiteFile.file, htmlOutput);
613+
// });
614614

615615
// run standard website stuff (search, etc.)
616616
await websitePostRender(context, incremental, outputFiles);

src/project/types/website/website.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ export const websiteProjectType: ProjectType = {
319319
};
320320

321321
export interface WebsiteProjectOutputFile extends ProjectOutputFile {
322-
doc: HTMLDocument;
322+
// doc: HTMLDocument;
323323
doctype?: string;
324324
}
325325

@@ -358,10 +358,10 @@ export function websiteOutputFiles(outputFiles: ProjectOutputFile[]) {
358358
.map((outputFile) => {
359359
const contents = Deno.readTextFileSync(outputFile.file);
360360
const doctypeMatch = contents.match(/^<!DOCTYPE.*?>/);
361-
const doc = new DOMParser().parseFromString(contents, "text/html")!;
361+
// const doc = new DOMParser().parseFromString(contents, "text/html")!;
362362
return {
363363
...outputFile,
364-
doc,
364+
// doc,
365365
doctype: doctypeMatch ? doctypeMatch[0] : undefined,
366366
};
367367
});

0 commit comments

Comments
 (0)