@@ -23,7 +23,7 @@ export async function getArchivedConversations(archiveDir: string): Promise<Map<
2323 const map = new Map < string , string > ( )
2424
2525 for ( const file of files ) {
26- const match = file . match ( / ^ (?< id > [ ^ - ] + ) - .+ \. h t m l $ / )
26+ const match = file . match ( / ^ (?< id > [ ^ - ] + ) - .* \. h t m l $ / )
2727 if ( match ) map . set ( match . groups ?. [ "id" ] as string , file )
2828 }
2929
@@ -50,36 +50,102 @@ export async function archiveConversation(browser: Browser, id: string) {
5050 await page . waitForSelector ( "message-content" , { timeout : 20000 } )
5151 await page . waitForTimeout ( 3000 )
5252
53- // Click all visible elements with text starting with "Show"
54- const showButtons = await page . getByText ( "Show" ) . all ( )
55- for ( const btn of showButtons ) {
56- if ( await btn . isVisible ( ) ) await btn . click ( )
57- }
58-
59- // @ts -expect-error
60- const title = ( await page . evaluate ( ( ) => document . querySelector ( "h1 > strong" ) . textContent , "" ) ) ?? ""
53+ // Click all visible elements with text "Show"
54+ for ( const btn of await page . getByText ( "Show" ) . all ( ) ) await btn . click ( )
55+ // Click all visible elements with text "More" (Deep Research)
56+ for ( const btn of await page . getByText ( "More" ) . all ( ) ) await btn . click ( )
57+
58+ // In some shared conversations, title does not exists
59+ // page.evaluate: TypeError: Cannot read properties of null (reading 'textContent')
60+ const title = ( await page . evaluate ( ( ) => document . querySelector ( "h1 > strong" ) ?. textContent , "" ) ) ?? ""
61+ const includesKatex = await page . evaluate ( ( ) => document . getElementsByClassName ( "katex" ) . length > 0 )
62+
63+ // Remove unnecessary elements from the page
64+ await page . evaluate ( async ( ) => {
65+ // About Gemini
66+ document . getElementsByTagName ( "top-bar-actions" ) [ 0 ] ?. remove ( )
67+
68+ // Sign in buttons
69+ document . getElementsByClassName ( "boqOnegoogleliteOgbOneGoogleBar" ) [ 0 ] ?. remove ( )
70+ document . getElementsByClassName ( "share-landing-page_footer" ) [ 0 ] ?. remove ( )
71+
72+ // Copy and flag buttons
73+ for ( const matButton of document . querySelectorAll ( "[mat-icon-button]" ) ) matButton . remove ( )
74+
75+ // Replace mat-icon with equivalent SVGs, as the icon font is heavy
76+ // e.g. expand button for reasoning steps, Deep Research steps
77+ const matIcons = document . getElementsByTagName ( "mat-icon" )
78+ while ( matIcons . length > 0 ) {
79+ const matIcon = matIcons [ 0 ] !
80+ const iconName = matIcon . getAttribute ( "fonticon" )
81+ const size = getComputedStyle ( matIcon ) . fontSize
82+
83+ const img = document . createElement ( "img" )
84+ img . src = `https://fonts.gstatic.com/s/i/short-term/release/materialsymbolsoutlined/${ iconName } /default/${ size } .svg`
85+ matIcon . insertAdjacentElement ( "afterend" , img )
86+ matIcon . remove ( )
87+ }
88+
89+ // Disclaimer
90+ document . getElementsByClassName ( "share-viewer_footer_disclaimer" ) [ 0 ] ?. remove ( )
91+ // Legal links
92+ const legalLinks = document . getElementsByClassName ( "share-viewer_legal-links" ) [ 0 ] as HTMLDivElement | undefined
93+ if ( legalLinks ) {
94+ legalLinks . style . paddingTop = "0"
95+ while ( legalLinks . children . length > 0 ) legalLinks . children [ 0 ] ! . remove ( )
96+ }
97+
98+ // Script tags
99+ const scriptTags = document . getElementsByTagName ( "script" )
100+ while ( scriptTags . length > 0 ) scriptTags [ 0 ] ! . remove ( )
101+
102+ // Remove inline CSS variables to make the later step of removing unused CSS variables easier
103+ // <div style="--a: 0px"> ...
104+ // <div style='--a: 0px'> ...
105+ for ( const elWithStyleAttribute of document . querySelectorAll ( "[style]" ) ) {
106+ if ( elWithStyleAttribute . getAttribute ( "style" ) ! . includes ( "--" ) ) elWithStyleAttribute . removeAttribute ( "style" )
107+ }
108+ } )
61109
62- // https://github.com/gildas-lormeau/single-file-cli/blob/v2.0.75/single-file-cli-api.js#L258
63- // https://github.com/gildas-lormeau/single-file-cli/blob/v2.0.75/lib/cdp-client.js#L332
64- // https://github.com/gildas-lormeau/single-file-core/blob/212a657/single-file.js#L125
65110 // @ts -expect-error
66- const pageData = await page . evaluate ( async options => await singlefile . getPageData ( options ) , {
67- zipScript : ZIP_SCRIPT
111+ const pageData : { content : string } = await page . evaluate ( async options => await singlefile . getPageData ( options ) , {
112+ // https://github.com/gildas-lormeau/single-file-cli/blob/v2.0.75/single-file-cli-api.js#L258
113+ // https://github.com/gildas-lormeau/single-file-cli/blob/v2.0.75/lib/cdp-client.js#L332
114+ // https://github.com/gildas-lormeau/single-file-core/blob/212a657/single-file.js#L125
115+ zipScript : ZIP_SCRIPT ,
116+
117+ removeUnusedStyles : true ,
118+ removeUnusedFonts : true ,
119+ removeFrames : true ,
120+ insertSingleFileComment : true
68121 } )
69122
123+ const variablesUsedInDocument = new Set (
124+ // Variable values could contain other values, so /var\(([^\)]+)/g won't work
125+ // e.g. --a: var(--b, var(--c));
126+ Array . from ( pageData . content . matchAll ( / v a r \s * \( \s * (?< variableName > - - [ A - Z a - z 0 - 9 \- ] + ) / g) ) . map (
127+ regExpExecArray => regExpExecArray . groups ! [ "variableName" ] !
128+ )
129+ )
130+
70131 const fileContent = pageData . content
71- . replaceAll ( / < s c r i p t \b [ ^ < ] * (?: (? ! < \/ s c r i p t > ) < [ ^ < ] * ) * < \/ s c r i p t > \s * / gi , "" )
132+ // Remove fonts
72133 . replaceAll ( / @ f o n t - f a c e \s * { [ ^ } ] * } / g, ( fontFaceRule : string ) => {
73- const fontFamilyMatch = fontFaceRule . match ( / f o n t - f a m i l y : \s * (?< quote > [ ' " ] ? ) (?< fontFamily > [ ^ ' " ] + ) \k<quote > ; / )
74-
75- if ( fontFamilyMatch && fontFamilyMatch . groups ?. [ "fontFamily" ] ) {
76- const fontFamily = fontFamilyMatch . groups ?. [ "fontFamily" ] . trim ( )
77- if ( fontFamily === "Google Symbols" ) return fontFaceRule
78- if ( pageData . content . includes ( `class="katex"` ) && fontFamily . startsWith ( "KaTeX" ) ) return fontFaceRule
79- }
134+ const fontFamilyMatch = fontFaceRule . match ( / f o n t - f a m i l y : \s * (?< quote > [ ' " ] ? ) (?< fontFamily > [ ^ ' " ] + ) \k<quote > / )
135+ const fontFamily = fontFamilyMatch ?. groups ?. [ "fontFamily" ] ?. trim ( ) ?? ""
80136
137+ if ( includesKatex && fontFamily . startsWith ( "KaTeX" ) ) return fontFaceRule
81138 return ""
82139 } )
140+ // Remove unused CSS variables
141+ . replaceAll (
142+ // --a: 0px; } .class { ...
143+ / (?< variableName > - - [ A - Z a - z 0 - 9 \- ] + ) \s * : \s * (?< value > [ ^ ; \n \} ] + ) \s * [ ; \n ] ? (?< curlyBrace > \} ) ? / gm,
144+ ( _match , variableName : string , value : string , curlyBrace : string | undefined = "" ) => {
145+ if ( variablesUsedInDocument . has ( variableName ) ) return `${ variableName } :${ value } ;${ curlyBrace } `
146+ return curlyBrace
147+ }
148+ )
83149
84150 // Remove illegal filename chars
85151 const sanitizedTitle = title . replace ( / [ \\ / : * ? " < > | \n ] / g, "" ) . substring ( 0 , 100 )
0 commit comments