Skip to content

Commit fa97fff

Browse files
committed
feat: optimize file size of bundled page
1 parent a3aee12 commit fa97fff

File tree

1 file changed

+88
-22
lines changed

1 file changed

+88
-22
lines changed

src/util.ts

Lines changed: 88 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ export async function getArchivedConversations(archiveDir: string): Promise<Map<
2323
const map = new Map<string, string>()
2424

2525
for (const file of files) {
26-
const match = file.match(/^(?<id>[^-]+) - .+\.html$/)
26+
const match = file.match(/^(?<id>[^-]+) - .*\.html$/)
2727
if (match) map.set(match.groups?.["id"] as string, file)
2828
}
2929

@@ -50,36 +50,102 @@ export async function archiveConversation(browser: Browser, id: string) {
5050
await page.waitForSelector("message-content", { timeout: 20000 })
5151
await page.waitForTimeout(3000)
5252

53-
// Click all visible elements with text starting with "Show"
54-
const showButtons = await page.getByText("Show").all()
55-
for (const btn of showButtons) {
56-
if (await btn.isVisible()) await btn.click()
57-
}
58-
59-
// @ts-expect-error
60-
const title = (await page.evaluate(() => document.querySelector("h1 > strong").textContent, "")) ?? ""
53+
// Click all visible elements with text "Show"
54+
for (const btn of await page.getByText("Show").all()) await btn.click()
55+
// Click all visible elements with text "More" (Deep Research)
56+
for (const btn of await page.getByText("More").all()) await btn.click()
57+
58+
// In some shared conversations, title does not exists
59+
// page.evaluate: TypeError: Cannot read properties of null (reading 'textContent')
60+
const title = (await page.evaluate(() => document.querySelector("h1 > strong")?.textContent, "")) ?? ""
61+
const includesKatex = await page.evaluate(() => document.getElementsByClassName("katex").length > 0)
62+
63+
// Remove unnecessary elements from the page
64+
await page.evaluate(async () => {
65+
// About Gemini
66+
document.getElementsByTagName("top-bar-actions")[0]?.remove()
67+
68+
// Sign in buttons
69+
document.getElementsByClassName("boqOnegoogleliteOgbOneGoogleBar")[0]?.remove()
70+
document.getElementsByClassName("share-landing-page_footer")[0]?.remove()
71+
72+
// Copy and flag buttons
73+
for (const matButton of document.querySelectorAll("[mat-icon-button]")) matButton.remove()
74+
75+
// Replace mat-icon with equivalent SVGs, as the icon font is heavy
76+
// e.g. expand button for reasoning steps, Deep Research steps
77+
const matIcons = document.getElementsByTagName("mat-icon")
78+
while (matIcons.length > 0) {
79+
const matIcon = matIcons[0]!
80+
const iconName = matIcon.getAttribute("fonticon")
81+
const size = getComputedStyle(matIcon).fontSize
82+
83+
const img = document.createElement("img")
84+
img.src = `https://fonts.gstatic.com/s/i/short-term/release/materialsymbolsoutlined/${iconName}/default/${size}.svg`
85+
matIcon.insertAdjacentElement("afterend", img)
86+
matIcon.remove()
87+
}
88+
89+
// Disclaimer
90+
document.getElementsByClassName("share-viewer_footer_disclaimer")[0]?.remove()
91+
// Legal links
92+
const legalLinks = document.getElementsByClassName("share-viewer_legal-links")[0] as HTMLDivElement | undefined
93+
if (legalLinks) {
94+
legalLinks.style.paddingTop = "0"
95+
while (legalLinks.children.length > 0) legalLinks.children[0]!.remove()
96+
}
97+
98+
// Script tags
99+
const scriptTags = document.getElementsByTagName("script")
100+
while (scriptTags.length > 0) scriptTags[0]!.remove()
101+
102+
// Remove inline CSS variables to make the later step of removing unused CSS variables easier
103+
// <div style="--a: 0px"> ...
104+
// <div style='--a: 0px'> ...
105+
for (const elWithStyleAttribute of document.querySelectorAll("[style]")) {
106+
if (elWithStyleAttribute.getAttribute("style")!.includes("--")) elWithStyleAttribute.removeAttribute("style")
107+
}
108+
})
61109

62-
// https://github.com/gildas-lormeau/single-file-cli/blob/v2.0.75/single-file-cli-api.js#L258
63-
// https://github.com/gildas-lormeau/single-file-cli/blob/v2.0.75/lib/cdp-client.js#L332
64-
// https://github.com/gildas-lormeau/single-file-core/blob/212a657/single-file.js#L125
65110
// @ts-expect-error
66-
const pageData = await page.evaluate(async options => await singlefile.getPageData(options), {
67-
zipScript: ZIP_SCRIPT
111+
const pageData: { content: string } = await page.evaluate(async options => await singlefile.getPageData(options), {
112+
// https://github.com/gildas-lormeau/single-file-cli/blob/v2.0.75/single-file-cli-api.js#L258
113+
// https://github.com/gildas-lormeau/single-file-cli/blob/v2.0.75/lib/cdp-client.js#L332
114+
// https://github.com/gildas-lormeau/single-file-core/blob/212a657/single-file.js#L125
115+
zipScript: ZIP_SCRIPT,
116+
117+
removeUnusedStyles: true,
118+
removeUnusedFonts: true,
119+
removeFrames: true,
120+
insertSingleFileComment: true
68121
})
69122

123+
const variablesUsedInDocument = new Set(
124+
// Variable values could contain other values, so /var\(([^\)]+)/g won't work
125+
// e.g. --a: var(--b, var(--c));
126+
Array.from(pageData.content.matchAll(/var\s*\(\s*(?<variableName>--[A-Za-z0-9\-]+)/g)).map(
127+
regExpExecArray => regExpExecArray.groups!["variableName"]!
128+
)
129+
)
130+
70131
const fileContent = pageData.content
71-
.replaceAll(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>\s*/gi, "")
132+
// Remove fonts
72133
.replaceAll(/@font-face\s*{[^}]*}/g, (fontFaceRule: string) => {
73-
const fontFamilyMatch = fontFaceRule.match(/font-family:\s*(?<quote>['"]?)(?<fontFamily>[^'"]+)\k<quote>;/)
74-
75-
if (fontFamilyMatch && fontFamilyMatch.groups?.["fontFamily"]) {
76-
const fontFamily = fontFamilyMatch.groups?.["fontFamily"].trim()
77-
if (fontFamily === "Google Symbols") return fontFaceRule
78-
if (pageData.content.includes(`class="katex"`) && fontFamily.startsWith("KaTeX")) return fontFaceRule
79-
}
134+
const fontFamilyMatch = fontFaceRule.match(/font-family:\s*(?<quote>['"]?)(?<fontFamily>[^'"]+)\k<quote>/)
135+
const fontFamily = fontFamilyMatch?.groups?.["fontFamily"]?.trim() ?? ""
80136

137+
if (includesKatex && fontFamily.startsWith("KaTeX")) return fontFaceRule
81138
return ""
82139
})
140+
// Remove unused CSS variables
141+
.replaceAll(
142+
// --a: 0px; } .class { ...
143+
/(?<variableName>--[A-Za-z0-9\-]+)\s*:\s*(?<value>[^;\n\}]+)\s*[;\n]?(?<curlyBrace>\})?/gm,
144+
(_match, variableName: string, value: string, curlyBrace: string | undefined = "") => {
145+
if (variablesUsedInDocument.has(variableName)) return `${variableName}:${value};${curlyBrace}`
146+
return curlyBrace
147+
}
148+
)
83149

84150
// Remove illegal filename chars
85151
const sanitizedTitle = title.replace(/[\\/:*?"<>|\n]/g, "").substring(0, 100)

0 commit comments

Comments
 (0)