diff --git a/lib/index.js b/lib/index.js index 0c2573b..be6dea7 100644 --- a/lib/index.js +++ b/lib/index.js @@ -47,7 +47,7 @@ class PuppeteerPlugin { await blockNavigation(page, url); } - await page.goto(url, this.gotoOptions); + const puppeteerResponse = await page.goto(url, this.gotoOptions); if (this.scrollToBottom) { await scrollToBottom(page, this.scrollToBottom.timeout, this.scrollToBottom.viewportN); @@ -56,10 +56,12 @@ class PuppeteerPlugin { const content = await page.content(); await page.close(); - // convert utf-8 -> binary string because website-scraper needs binary - return Buffer.from(content).toString('binary'); + const encoding = extractEncodingFromHeader(puppeteerResponse.headers()); + const body = Buffer.from(content).toString(encoding); + + return { body, encoding }; } else { - return response.body; + return { body: response.body }; } }); @@ -91,4 +93,10 @@ async function blockNavigation (page, url) { await page.setRequestInterception(true); } +function extractEncodingFromHeader (headers) { + const contentTypeHeader = headers['content-type']; + + return contentTypeHeader && contentTypeHeader.includes('utf-8') ? 'utf8' : 'binary'; +} + export default PuppeteerPlugin; diff --git a/test/mock/index.html b/test/mock/index.html index 00109f5..c67f29a 100644 --- a/test/mock/index.html +++ b/test/mock/index.html @@ -17,7 +17,7 @@ * with cheerio and website-scraper itself. * See https://github.com/cheeriojs/cheerio/pull/2280 */ - document.getElementById('special-characters-test').innerText = '7년 동안 한국에서 살았어요. Слава Україні!'; + document.getElementById('special-characters-test').innerText = '7년 동안 한국에서 살았어요. Слава Україні! 磁致伸缩位移传感器 影响大跨度桥梁施工控制的因素'; }; diff --git a/test/puppeteer-plugin.test.js b/test/puppeteer-plugin.test.js index 9cdde9e..ae56de0 100644 --- a/test/puppeteer-plugin.test.js +++ b/test/puppeteer-plugin.test.js @@ -39,7 +39,7 @@ describe('Puppeteer plugin test', () => { }); it('should render special characters correctly', async () => { - expect(content).to.contain('
7년 동안 한국에서 살았어요. Слава Україні!
'); + expect(content).to.contain('
7년 동안 한국에서 살았어요. Слава Україні! 磁致伸缩位移传感器 影响大跨度桥梁施工控制的因素
'); }); });