@@ -47,7 +47,7 @@ class PuppeteerPlugin {
4747 await blockNavigation ( page , url ) ;
4848 }
4949
50- await page . goto ( url , this . gotoOptions ) ;
50+ const puppeteerResponse = await page . goto ( url , this . gotoOptions ) ;
5151
5252 if ( this . scrollToBottom ) {
5353 await scrollToBottom ( page , this . scrollToBottom . timeout , this . scrollToBottom . viewportN ) ;
@@ -56,10 +56,12 @@ class PuppeteerPlugin {
5656 const content = await page . content ( ) ;
5757 await page . close ( ) ;
5858
59- // convert utf-8 -> binary string because website-scraper needs binary
60- return Buffer . from ( content ) . toString ( 'binary' ) ;
59+ const encoding = extractEncodingFromHeader ( puppeteerResponse . headers ( ) ) ;
60+ const body = Buffer . from ( content ) . toString ( encoding ) ;
61+
62+ return { body, encoding } ;
6163 } else {
62- return response . body ;
64+ return { body : response . body } ;
6365 }
6466 } ) ;
6567
@@ -91,4 +93,10 @@ async function blockNavigation (page, url) {
9193 await page . setRequestInterception ( true ) ;
9294}
9395
96+ function extractEncodingFromHeader ( headers ) {
97+ const contentTypeHeader = headers [ 'content-type' ] ;
98+
99+ return contentTypeHeader && contentTypeHeader . includes ( 'utf-8' ) ? 'utf8' : 'binary' ;
100+ }
101+
94102export default PuppeteerPlugin ;
0 commit comments