Skip to content

Commit 9e7c5f7

Browse files
authored
Make HTMLDocument reliable (#1948)
* Make `HTMLDocument` reliable * Make `HTMLDocument` reliable * Make `HTMLDocument` reliable * Make `HTMLDocument` reliable
1 parent 31e78b4 commit 9e7c5f7

File tree

5 files changed

+180
-137
lines changed

5 files changed

+180
-137
lines changed

src/lib/types/src/Flow/Types/Type/Logical/HTMLType.php

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
namespace Flow\Types\Type\Logical;
66

7-
use Flow\Types\Exception\{CastingException, InvalidTypeException};
7+
use Flow\Types\Exception\{CastingException, InvalidArgumentException, InvalidTypeException};
88
use Flow\Types\Type;
99
use Flow\Types\Value\HTMLDocument;
1010

@@ -28,19 +28,15 @@ public function cast(mixed $value) : HTMLDocument
2828
return $value;
2929
}
3030

31-
if (\is_string($value)) {
32-
return new HTMLDocument($value);
31+
if (!is_string($value) && !is_object($value)) {
32+
throw new CastingException($value, $this);
3333
}
3434

35-
if ($value instanceof \DOMDocument) {
35+
try {
3636
return new HTMLDocument($value);
37+
} catch (InvalidArgumentException $e) {
38+
throw new CastingException($value, $this, $e);
3739
}
38-
39-
if (\is_object($value) && \is_a($value, 'Dom\HTMLDocument')) {
40-
return new HTMLDocument($value);
41-
}
42-
43-
throw new CastingException($value, $this);
4440
}
4541

4642
public function isValid(mixed $value) : bool

src/lib/types/src/Flow/Types/Value/HTMLDocument.php

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -6,48 +6,39 @@
66

77
use Flow\Types\Exception\InvalidArgumentException;
88

9-
final class HTMLDocument implements \Stringable
9+
final readonly class HTMLDocument implements \Stringable
1010
{
11+
private const HTML_ALIKE_REGEX = <<<'REGXP'
12+
@^
13+
<!DOCTYPE\s+html[^>]*>\s* # must start with <!DOCTYPE html ...>
14+
<html[^>]*>\s* # opening <html>
15+
<head[^>]*>.*?<\/head>\s* # exactly one <head> ... </head>
16+
<body[^>]*>.*?<\/body>\s* # exactly one <body> ... </body>
17+
<\/html>\s* # closing </html>
18+
$@mix
19+
REGXP;
20+
1121
private string $value;
1222

1323
public function __construct(string|object $value)
1424
{
15-
if ('' === $value) {
16-
$this->value = $value;
17-
} elseif (\is_string($value)) {
18-
if (\class_exists('\Dom\HTMLDocument', false)) {
19-
$options = \LIBXML_HTML_NOIMPLIED;
20-
21-
if (defined('Dom\HTML_NO_DEFAULT_NS')) {
22-
$options |= constant('\Dom\HTML_NO_DEFAULT_NS');
23-
}
24-
25-
$document = \Dom\HTMLDocument::createFromString($value, $options);
26-
27-
$this->value = $document->saveHTML();
28-
} else {
29-
$document = new \DOMDocument();
30-
31-
$result = @$document->loadHTML($value, \LIBXML_HTML_NOIMPLIED | \LIBXML_HTML_NODEFDTD);
32-
33-
if ($result === false) {
34-
throw new InvalidArgumentException("Invalid value '{$value}'");
35-
}
36-
37-
$value = $document->saveHTML() ?: throw new InvalidArgumentException("Invalid value '{$value}'");
38-
39-
$this->value = trim($value);
40-
}
41-
} elseif ($value instanceof \DOMDocument) {
42-
$value = $value->saveHTML($value->documentElement) ?: throw new InvalidArgumentException('Invalid value ' . var_export($value, true));
43-
44-
$this->value = trim($value);
25+
if ($value instanceof \DOMDocument) {
26+
$value = $value->saveHTML($value) ?: '';
4527
} elseif (is_a($value, '\Dom\HTMLDocument', true)) {
4628
/* @phpstan-ignore-next-line */
47-
$this->value = $value->saveHtml();
48-
} else {
49-
throw new InvalidArgumentException('Invalid value ' . var_export($value, true));
29+
$value = $value->saveHtml();
30+
} elseif (!is_string($value)) {
31+
throw new InvalidArgumentException('Invalid HTML document type: ' . $value::class);
5032
}
33+
34+
// Cut all new lines and tabs
35+
$value = trim(str_replace(["\n", "\t"], '', $value));
36+
37+
if (!$this->isValid($value)) {
38+
throw new InvalidArgumentException('Invalid HTML document given: ' . var_export($value, true));
39+
}
40+
41+
$this->value = $value;
5142
}
5243

5344
public static function fromString(string $value) : self
@@ -69,4 +60,13 @@ public function toString() : string
6960
{
7061
return $this->value;
7162
}
63+
64+
private function isValid(string $value) : bool
65+
{
66+
if ('' === $value) {
67+
return false;
68+
}
69+
70+
return \preg_match(self::HTML_ALIKE_REGEX, $value) === 1;
71+
}
7272
}

src/lib/types/tests/Flow/Types/Tests/Unit/Type/Logical/HTMLTypeTest.php

Lines changed: 53 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,15 @@
77
use function Flow\Types\DSL\{type_from_array, type_html};
88
use Flow\Types\Exception\{CastingException, InvalidTypeException};
99
use Flow\Types\Value\HTMLDocument;
10-
use PHPUnit\Framework\Attributes\{DataProvider, RequiresPhp};
10+
use PHPUnit\Framework\Attributes\DataProvider;
1111
use PHPUnit\Framework\TestCase;
1212

1313
final class HTMLTypeTest extends TestCase
1414
{
1515
public static function assert_data_provider() : \Generator
1616
{
1717
yield 'valid HTMLDocument' => [
18-
'value' => new HTMLDocument(''),
18+
'value' => new HTMLDocument('<!DOCTYPE html><html><head></head><body></body></html>'),
1919
'exceptionClass' => null,
2020
];
2121

@@ -58,51 +58,66 @@ public static function assert_data_provider() : \Generator
5858
'value' => new \DateTimeImmutable(),
5959
'exceptionClass' => InvalidTypeException::class,
6060
];
61-
}
6261

63-
public static function cast_data_provider_php82() : \Generator
64-
{
65-
yield 'string to HTML' => [
66-
'value' => '<!DOCTYPE html><html lang="en"><body><div><span>1</span></div></body></html>',
67-
'expected' => <<<'HTML'
68-
<!DOCTYPE html>
69-
<html lang="en"><body><div><span>1</span></div></body></html>
70-
HTML,
71-
'exceptionClass' => null,
72-
];
73-
74-
yield 'incomplete string to HTML' => [
62+
yield 'incomplete HTML' => [
7563
'value' => '<div><span>1</span></div>',
76-
'expected' => <<<'HTML'
77-
<div><span>1</span></div>
78-
HTML,
79-
'exceptionClass' => null,
64+
'exceptionClass' => InvalidTypeException::class,
8065
];
8166

82-
yield 'object to HTML' => [
67+
yield 'random object' => [
8368
'value' => new \stdClass(),
84-
'expected' => null,
85-
'exceptionClass' => CastingException::class,
69+
'exceptionClass' => InvalidTypeException::class,
8670
];
8771
}
8872

89-
public static function cast_data_provider_php84() : \Generator
73+
public static function cast_data_provider() : \Generator
9074
{
91-
yield 'string to HTML' => [
92-
'value' => '<!DOCTYPE html><html lang="en"><body><div><span>1</span></div></body></html>',
93-
'expected' => '<!DOCTYPE html><html lang="en"><body><div><span>1</span></div></body></html>',
75+
yield 'valid HTMLDocument' => [
76+
'value' => new HTMLDocument($html = '<!DOCTYPE html><html lang="en"><head></head><body><div><span>1</span></div></body></html>'),
77+
'expected' => $html,
9478
'exceptionClass' => null,
9579
];
9680

97-
yield 'incomplete string to HTML' => [
98-
'value' => '<div><span>1</span></div>',
99-
'expected' => <<<'HTML'
100-
<div><span>1</span></div>
81+
yield 'valid HTML string' => [
82+
'value' => $html = '<!DOCTYPE html><html lang="en"><head></head><body><div><span>1</span></div></body></html>',
83+
'expected' => $html,
84+
'exceptionClass' => null,
85+
];
86+
87+
yield 'valid HTML with spaces' => [
88+
'value' => '<!DOCTYPE html><html> <head><title></title></head> <body><p>invalid</p> </body> </html>',
89+
'expected' => '<!DOCTYPE html><html> <head><title></title></head> <body><p>invalid</p> </body> </html>',
90+
'exceptionClass' => null,
91+
];
92+
93+
yield 'valid HTML with new lines' => [
94+
'value' => <<<'HTML'
95+
<!DOCTYPE html>
96+
<html>
97+
<head><title></title></head>
98+
<body>
99+
<p> invalid</p>
100+
</body>
101+
</html>
101102
HTML,
103+
'expected' => '<!DOCTYPE html><html> <head><title></title></head> <body> <p> invalid</p> </body></html>',
102104
'exceptionClass' => null,
103105
];
104106

105-
yield 'object to HTML' => [
107+
yield 'missing doctype' => [
108+
'value' => '<html><body><div><span>bar</span></div></body></html>',
109+
'expected' => null,
110+
'exceptionClass' => CastingException::class,
111+
];
112+
113+
yield 'missing head' => [
114+
'value' => '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
115+
<html><body><p>invalid</p></body></html>',
116+
'expected' => null,
117+
'exceptionClass' => CastingException::class,
118+
];
119+
120+
yield 'random object' => [
106121
'value' => new \stdClass(),
107122
'expected' => null,
108123
'exceptionClass' => CastingException::class,
@@ -112,22 +127,12 @@ public static function cast_data_provider_php84() : \Generator
112127
public static function is_valid_data_provider() : \Generator
113128
{
114129
yield 'valid HTMLDocument' => [
115-
'value' => new HTMLDocument(''),
130+
'value' => new HTMLDocument('<!DOCTYPE html><html lang="en"><head></head><body><div><span>1</span></div></body></html>'),
116131
'expected' => true,
117132
];
118133

119-
yield 'invalid HTML string' => [
120-
'value' => '<html></html>',
121-
'expected' => false,
122-
];
123-
124-
yield 'invalid date string' => [
125-
'value' => '2020-01-01',
126-
'expected' => false,
127-
];
128-
129-
yield 'invalid datetime string' => [
130-
'value' => '2020-01-01 00:00:00',
134+
yield 'valid HTML string' => [
135+
'value' => '<!DOCTYPE html><html lang="en"><head></head><body><div><span>1</span></div></body></html>',
131136
'expected' => false,
132137
];
133138
}
@@ -143,30 +148,15 @@ public function test_assert(mixed $value, ?string $exceptionClass = null) : void
143148
}
144149
}
145150

146-
#[RequiresPhp('< 8.4')]
147-
#[DataProvider('cast_data_provider_php82')]
148-
public function test_cast_php82(mixed $value, mixed $expected, ?string $exceptionClass) : void
151+
#[DataProvider('cast_data_provider')]
152+
public function test_cast(mixed $value, mixed $expected, ?string $exceptionClass = null) : void
149153
{
150154
if ($exceptionClass !== null) {
151155
$this->expectException($exceptionClass);
152-
type_html()->cast($value);
153-
} else {
154-
$result = type_html()->cast($value);
155-
self::assertSame($expected, $result->toString());
156156
}
157-
}
158157

159-
#[RequiresPhp('>= 8.4')]
160-
#[DataProvider('cast_data_provider_php84')]
161-
public function test_cast_php84(mixed $value, mixed $expected, ?string $exceptionClass) : void
162-
{
163-
if ($exceptionClass !== null) {
164-
$this->expectException($exceptionClass);
165-
type_html()->cast($value);
166-
} else {
167-
$result = type_html()->cast($value);
168-
self::assertSame($expected, $result->toString());
169-
}
158+
$result = type_html()->cast($value);
159+
self::assertSame($expected, $result->toString());
170160
}
171161

172162
#[DataProvider('is_valid_data_provider')]

src/lib/types/tests/Flow/Types/Tests/Unit/Type/TypeDetectorTest.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ public static function provide_logical_types_data() : \Generator
9999
];
100100

101101
yield 'html' => [
102-
HTMLDocument::fromString('<html><div><span>1</span></div></html>'),
102+
HTMLDocument::fromString('<!DOCTYPE html><html lang="en"><head></head><body><div><span>1</span></div></body></html>'),
103103
HTMLType::class,
104104
'html',
105105
];

0 commit comments

Comments
 (0)