-
Notifications
You must be signed in to change notification settings - Fork 695
Open
Description
Hello, I found this issue when parsing some specific HTML markup; I'm not sure what the root cause is.
Reproduction
const { Readability } = require('@mozilla/readability')
const { Browser } = require('happy-dom')
const path = require('path')
const fs = require('fs')
// https://github.com/microlinkhq/metascraper/blob/master/packages/metascraper-readability/test/fixtures/chowhanandsons.com.html
const html = fs.readFileSync(
path.join(__dirname, 'packages/metascraper-readability/test/fixtures/chowhanandsons.com.html'),
'utf-8'
)
const getDocument = ({ url, html }) => {
const browser = new Browser({
settings: {
disableComputedStyleRendering: true,
disableCSSFileLoading: true,
disableIframePageLoading: true,
disableJavaScriptEvaluation: true,
disableJavaScriptFileLoading: true
}
})
const page = browser.newPage()
page.url = url
page.content = html
return {
document: page.mainFrame.document,
teardown: () => browser.close()
}
}
const url = ' https://www.chowhanandsons.com/'
const { document, teardown } = getDocument({ url, html })
const reader = new Readability(document)
console.log(reader.parse())
teardown()Metadata
Metadata
Assignees
Labels
No labels