Skip to content

Commit 6f1886d

Browse files
committed
use lru-cache to avoid memory usage, fix embed icon src paths
1 parent bdcb460 commit 6f1886d

File tree

4 files changed

+64
-20
lines changed

4 files changed

+64
-20
lines changed

package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
"@types/hast": "^3.0.4",
6464
"@types/html-escaper": "^3.0.0",
6565
"@types/json5": "^2.2.0",
66+
"@types/lru-cache": "^7.10.10",
6667
"@types/mdast": "^4.0.3",
6768
"@types/node": "^20.5.0",
6869
"@types/uuid": "^10.0.0",
@@ -96,6 +97,7 @@
9697
"junk": "^4.0.1",
9798
"lint-staged": "^15.2.7",
9899
"live-server": "^1.2.2",
100+
"lru-cache": "^11.0.2",
99101
"msw": "^2.3.5",
100102
"npm-run-all": "^4.1.5",
101103
"octokit": "^4.0.2",

pnpm-lock.yaml

Lines changed: 20 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/utils/fetch-page-html.ts

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,34 @@
11
import { Element, Root } from "hast";
22
import { fromHtml } from "hast-util-from-html";
33
import { find } from "unist-util-find";
4+
import { LRUCache } from "lru-cache";
5+
6+
export async function fetchAsBrowser(input: string | URL, init?: RequestInit) {
7+
const response = await fetch(input, {
8+
...init,
9+
headers: {
10+
"User-Agent":
11+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
12+
"Accept-Language": "en",
13+
...init?.headers,
14+
},
15+
});
16+
const isSuccess = `${response.status}`.startsWith("2");
17+
if (!isSuccess)
18+
throw new Error(`Request ${input} returned an error: ${response.status}`);
19+
return response;
20+
}
421

5-
const pageHtmlMap = new Map<string, Promise<Root | null>>();
22+
const pageHtmlCache = new LRUCache<string, Promise<Root | null>>({
23+
max: 50,
24+
});
625

726
export function fetchPageHtml(src: string): Promise<Root | null> {
8-
if (pageHtmlMap.has(src)) return pageHtmlMap.get(src)!;
27+
if (pageHtmlCache.has(src)) return pageHtmlCache.get(src)!;
928

1029
const promise = (async () => {
11-
const srcHTML = await fetch(src, {
12-
headers: {
13-
"User-Agent":
14-
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
15-
"Accept-Language": "en",
16-
},
17-
})
18-
.then((r) => (r.status === 200 ? r.text() : undefined))
30+
const srcHTML = await fetchAsBrowser(src)
31+
.then(async (r) => await r.text())
1932
.catch(() => null);
2033

2134
// if fetch fails...
@@ -26,7 +39,7 @@ export function fetchPageHtml(src: string): Promise<Root | null> {
2639
return srcHast;
2740
})();
2841

29-
pageHtmlMap.set(src, promise);
42+
pageHtmlCache.set(src, promise);
3043
return promise;
3144
}
3245

src/utils/markdown/iframes/rehype-transform.ts

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import { Plugin } from "unified";
55
import { visit } from "unist-util-visit";
66

77
import { EMBED_MIN_HEIGHT, EMBED_SIZE } from "../constants";
8-
import { fromHtml } from "hast-util-from-html";
98
import { find } from "unist-util-find";
109
import { getLargestManifestIcon } from "../../get-largest-manifest-icon";
1110
import { IFramePlaceholder } from "./iframe-placeholder";
@@ -15,6 +14,7 @@ import * as stream from "stream";
1514
import sharp from "sharp";
1615
import * as svgo from "svgo";
1716
import { fetchPageHtml, getPageTitle } from "utils/fetch-page-html";
17+
import { LRUCache } from "lru-cache";
1818

1919
interface RehypeUnicornIFrameClickToRunProps {
2020
srcReplacements?: Array<(val: string, root: VFile) => string>;
@@ -29,22 +29,28 @@ function getIconPath(src: URL) {
2929

3030
// Cache the fetch *promises* - so that only one request per manifest/icon is processed,
3131
// and multiple fetchPageInfo() calls can await the same icon
32-
const pageIconMap = new Map<string, Promise<string>>();
32+
const pageIconCache = new LRUCache<string, Promise<string>>({
33+
max: 50,
34+
});
35+
3336
function fetchPageIcon(src: URL, srcHast: Root): Promise<string> {
34-
if (pageIconMap.has(src.hostname)) return pageIconMap.get(src.hostname)!;
37+
if (pageIconCache.has(src.hostname)) return pageIconCache.get(src.hostname)!;
3538

3639
const promise = (async () => {
3740
const iconPath = getIconPath(src);
38-
const iconDir = await fs.promises
39-
.readdir(path.dirname(iconPath))
41+
const iconDir = path.dirname("public/" + iconPath);
42+
await fs.promises.mkdir(iconDir, { recursive: true });
43+
44+
const existingIconFiles = await fs.promises
45+
.readdir(iconDir)
4046
.catch(() => []);
4147

4248
// If an icon has already been downloaded for the origin (in a previous build)
43-
const existingIconFile = iconDir.find((file) =>
49+
const existingIconFile = existingIconFiles.find((file) =>
4450
file.startsWith(path.basename(iconPath)),
4551
);
4652
if (existingIconFile) {
47-
return path.join(path.dirname(iconPath), existingIconFile);
53+
return iconDir.replace(/^public/, "") + "/" + existingIconFile;
4854
}
4955

5056
// <link rel="manifest" href="/manifest.json">
@@ -132,10 +138,13 @@ function fetchPageIcon(src: URL, srcHast: Root): Promise<string> {
132138
return "/" + iconPath + iconExt;
133139
})()
134140
// if an error is thrown, or response is null, use the default page icon
135-
.catch(() => null)
141+
.catch((e) => {
142+
console.error("[rehypeIFrameClickToRun]", e);
143+
return null;
144+
})
136145
.then((p) => p || defaultPageIcon);
137146

138-
pageIconMap.set(src.hostname, promise);
147+
pageIconCache.set(src.hostname, promise);
139148
return promise;
140149
}
141150

0 commit comments

Comments
 (0)