Skip to content
This repository was archived by the owner on Nov 20, 2019. It is now read-only.

Commit cceba09

Browse files
authored
fix(Parser): more strict validation for domain and parts lengths (#24)
1 parent 0e2913b commit cceba09

File tree

2 files changed

+67
-0
lines changed

2 files changed

+67
-0
lines changed

src/Extract.php

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
use LayerShifter\TLDSupport\Helpers\Arr;
1616
use LayerShifter\TLDSupport\Helpers\IP;
1717
use LayerShifter\TLDSupport\Helpers\Str;
18+
use TrueBV\Exception\OutOfBoundsException;
1819

1920
/**
2021
* Extract class accurately extracts subdomain, domain and TLD components from URLs.
@@ -48,6 +49,13 @@ class Extract
4849
* @see https://tools.ietf.org/html/rfc3986#section-3.1
4950
*/
5051
const SCHEMA_PATTERN = '#^([a-zA-Z][a-zA-Z0-9+\-.]*:)?//#';
52+
/**
53+
* @const string The specification for this regex is based upon the extracts from RFC 1034 and RFC 2181 below.
54+
*
55+
* @see https://tools.ietf.org/html/rfc1034
56+
* @see https://tools.ietf.org/html/rfc2181
57+
*/
58+
const HOSTNAME_PATTERN = '#^((?!-)[a-z0-9-]{0,62}[a-z0-9]\.)+[a-z]{2,63}|[xn\-\-a-z0-9]]{6,63}$#';
5159

5260
/**
5361
* @var int Value of extraction options.
@@ -262,6 +270,33 @@ private function extractSuffix($hostname)
262270
$hostname = $this->idn->toUTF8($hostname);
263271
}
264272

273+
// URI producers should use names that conform to the DNS syntax, even when use of DNS is not immediately
274+
// apparent, and should limit these names to no more than 255 characters in length.
275+
//
276+
// @see https://tools.ietf.org/html/rfc3986
277+
// @see http://blogs.msdn.com/b/oldnewthing/archive/2012/04/12/10292868.aspx
278+
279+
if (Str::length($hostname) > 253) {
280+
return null;
281+
}
282+
283+
// The DNS itself places only one restriction on the particular labels that can be used to identify resource
284+
// records. That one restriction relates to the length of the label and the full name. The length of any one
285+
// label is limited to between 1 and 63 octets. A full domain name is limited to 255 octets (including the
286+
// separators).
287+
//
288+
// @see http://tools.ietf.org/html/rfc2181
289+
290+
try {
291+
$asciiHostname = $this->idn->toASCII($hostname);
292+
} catch (OutOfBoundsException $e) {
293+
return null;
294+
}
295+
296+
if (0 === preg_match(self::HOSTNAME_PATTERN, $asciiHostname)) {
297+
return null;
298+
}
299+
265300
$suffix = $this->parseSuffix($hostname);
266301

267302
if (null === $suffix) {

tests/ExtractTest.php

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,9 +209,41 @@ public function testParse()
209209
$this->checkPublicDomain('example.COM', 'example.com');
210210
$this->checkPublicDomain('WwW.example.COM', 'example.com');
211211

212+
// Long domains.
213+
214+
$this->checkPublicDomain(
215+
sprintf(
216+
'%s.%s.%s.%s.com',
217+
str_repeat('a', 63),
218+
str_repeat('a', 63),
219+
str_repeat('a', 63),
220+
str_repeat('a', 57)
221+
), // 253 characters
222+
str_repeat('a', 57) . '.com'
223+
);
224+
$this->checkPublicDomain(
225+
sprintf(
226+
'http://%s.%s.%s.%s.com',
227+
str_repeat('a', 63),
228+
str_repeat('a', 63),
229+
str_repeat('a', 63),
230+
str_repeat('a', 57)
231+
), // 253 characters without schema
232+
str_repeat('a', 57) . '.com'
233+
);
234+
235+
// Long and too short parts of domains domains.
236+
237+
$this->checkPublicDomain('test..com', null);
238+
$this->checkPublicDomain(
239+
str_repeat('a', 64) . '.a.com',
240+
null
241+
);
242+
212243
// Leading dot.
213244

214245
$this->checkPublicDomain('.com', null);
246+
$this->checkPublicDomain('..com', null);
215247
$this->checkPublicDomain('.example', null);
216248
$this->checkPublicDomain('.example.com', null);
217249
$this->checkPublicDomain('.example.example', null);

0 commit comments

Comments
 (0)