Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@
"standard-version": "^9.5.0",
"travis-cov": "latest",
"ts-node": "^10.9.1",
"typescript": "latest"
"typescript": "latest",
"yarn": "^1.22.22"
},
"config": {
"blanket": {
Expand Down
25 changes: 25 additions & 0 deletions src/nodes/html.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1006,6 +1006,9 @@ const kElementsClosedByClosing = {
th: { tr: true, table: true, TR: true, TABLE: true },
TH: { tr: true, table: true, TR: true, TABLE: true },
} as Record<string, Record<string, boolean>>;
const kElementsClosedByClosingExcept = {
p: { a: true, audio: true, del: true, ins: true, map: true, noscript: true, video: true },
} as Record<string, Record<string, boolean>>;

export interface Options {
lowerCaseTagName?: boolean;
Expand Down Expand Up @@ -1191,6 +1194,28 @@ export function base_parse(data: string, options = {} as Partial<Options>) {
continue;
}
}
const openTag =
currentParent.rawTagName ?
currentParent.rawTagName.toLowerCase() :
'';
if (kElementsClosedByClosingExcept[openTag]) {
const closingTag = tagName.toLowerCase();
if (stack.length > 1) {
const possibleContainer = stack[stack.length - 2];
if (
possibleContainer &&
possibleContainer.rawTagName &&
possibleContainer.rawTagName.toLowerCase() === closingTag &&
!kElementsClosedByClosingExcept[openTag][closingTag]
) {
// Update range end for closed tag
(<[number, number]>currentParent.range)[1] = createRange(-1, Math.max(lastTextPos, tagEndPos))[1];
stack.pop();
currentParent = arr_back(stack);
continue;
}
}
}
// Use aggressive strategy to handle unmatching markups.
break;
}
Expand Down
12 changes: 12 additions & 0 deletions test/tests/issues/294.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
const { parse, valid } = require('@test/test-target');

describe('issue 294 Closing tag is missing but valid HTML still not parsable', function () {
it('Valid HTML missing closing p tag should parse', function () {
const content = '<body><main class=h-entry><p>hello</main></body>';
valid(content).should.equal(true);
const root = parse(content);
root.outerHTML.should.equal('<body><main class=h-entry><p>hello</p></main></body>');
const list = root.querySelectorAll('.h-entry');
list.length.should.equal(1);
});
});
20 changes: 18 additions & 2 deletions test/tests/valid.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,25 @@ describe('parseWithValidation', function () {
result.should.eql(false);
})

it('hillcrestpartyrentals.html should return Object with valid: false. not closing <p> tag on line 476', function () {
// #294: Closing tag is missing but valid HTML is still not parseable
//
// Tag omission in text/html:
// A p element's end tag can be omitted if the p element is immediately
// followed by an address, article, aside, blockquote, details, dialog,
// div, dl, fieldset, figcaption, figure, footer, form, h1, h2, h3, h4,
// h5, h6, header, hgroup, hr, main, menu, nav, ol, p, pre, search,
// section, table, or ul element, or if there is no more content in the
// parent element and the parent element is an HTML element that is not
// an a, audio, del, ins, map, noscript, or video element, or an
// autonomous custom element.
//
// Based on this, hillcrestpartyrentals.html is in fact valid HTML. All
// the p elements missing close tags are contained within td elements
// and, therefore, should be closed when there is no more content in the
// parent td element (i.e. at the `</td>`).
it('hillcrestpartyrentals.html should return Object with valid: true. not closing <p> tag on line 476', function () {
const result = valid(fs.readFileSync(__dirname + '/../assets/html/hillcrestpartyrentals.html').toString());
result.should.eql(false);
result.should.eql(true);
})

it('google.html should return Object with valid: true', function () {
Expand Down
Loading
Loading