Skip to content

Commit ee73656

Browse files
authored
🐛 Fix script tag nesting issue caused by LIBXML_HTML_NOIMPLIED (#21)
1 parent a6db950 commit ee73656

File tree

1 file changed

+8
-3
lines changed

1 file changed

+8
-3
lines changed

src/Document.php

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ public function __construct(string $html)
2121
$this->document = new DOMDocument(encoding: 'UTF-8');
2222

2323
$this->document->loadHTML(
24-
$html,
25-
LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD | LIBXML_NOXMLDECL | LIBXML_NOWARNING | LIBXML_NOERROR
24+
'<html><body>'.$html.'</body></html>',
25+
LIBXML_HTML_NODEFDTD | LIBXML_NOXMLDECL | LIBXML_NOWARNING | LIBXML_NOERROR
2626
);
2727
}
2828

@@ -59,7 +59,12 @@ public function xpath(string $expression): DOMNodeList
5959
*/
6060
public function html(): string
6161
{
62-
return trim($this->document->saveHTML());
62+
$html = $this->document->saveHTML();
63+
64+
// Strip the html/body wrapper added during parsing
65+
$html = preg_replace('~^.*?<body>|</body>.*$~si', '', $html);
66+
67+
return trim($html);
6368
}
6469

6570
/**

0 commit comments

Comments
 (0)