Skip to content

Commit 85bd823

Browse files
stloydnorberttech
authored andcommitted
Allow reading not strictly valid HTML
1 parent 3c48153 commit 85bd823

File tree

3 files changed

+44
-2
lines changed

3 files changed

+44
-2
lines changed

src/core/etl/src/Flow/ETL/Row/Entry/HTMLEntry.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ public function __construct(
3232
?Metadata $metadata = null,
3333
) {
3434
if (\is_string($value)) {
35-
$this->value = HTMLDocument::createFromString($value);
35+
$this->value = HTMLDocument::createFromString($value, \LIBXML_NOERROR);
3636
} else {
3737
$this->value = $value;
3838
}

src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Entry/HTMLEntryTest.php

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,36 @@ public function test_renames_entry() : void
166166
self::assertEquals($entry->type(), $newEntry->type());
167167
}
168168

169+
public function test_with_non_fully_valid_html_string() : void
170+
{
171+
$invalidHtml = <<<'HTML'
172+
<!DOCTYPE html>
173+
<html lang="en">
174+
<head></head>
175+
<body>
176+
<div>foo</div>
177+
<div><p><span>bar</span></span></p></div>
178+
</body>
179+
</html>
180+
HTML;
181+
182+
$validHtml = <<<'HTML'
183+
<!DOCTYPE html>
184+
<html lang="en">
185+
<head></head>
186+
<body>
187+
<div>foo</div>
188+
<div><p><span>bar</span></p></div>
189+
</body>
190+
</html>
191+
HTML;
192+
193+
$entry = html_entry('html', $invalidHtml);
194+
195+
self::assertHtml($invalidHtml, $entry->toString(), false);
196+
self::assertHtml($validHtml, $entry->toString(), true);
197+
}
198+
169199
public function test_with_value() : void
170200
{
171201
$entry = html_entry('html', '<!DOCTYPE html><html lang="en"><head></head><body><div>foobar</div></body></html>');
@@ -178,4 +208,16 @@ public function test_with_value() : void
178208
self::assertNotEquals($entry->toString(), $newEntry->toString());
179209
self::assertEquals($html, $newEntry->value());
180210
}
211+
212+
private function assertHtml(string $expected, string $html, bool $equals) : void
213+
{
214+
$expected = \preg_replace('/\s*/', '', $expected);
215+
$html = \preg_replace('/\s*/', '', $html);
216+
217+
if ($equals) {
218+
self::assertEquals($expected, $html);
219+
} else {
220+
self::assertNotEquals($expected, $html);
221+
}
222+
}
181223
}

src/lib/types/src/Flow/Types/Type/Logical/HTMLType.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ public function cast(mixed $value) : HTMLDocument
4040

4141
/* @phpstan-ignore-next-line */
4242
if (\is_string($value)) {
43-
return HTMLDocument::createFromString($value);
43+
return HTMLDocument::createFromString($value, \LIBXML_NOERROR);
4444
}
4545

4646
return $value;

0 commit comments

Comments
 (0)