Skip to content

Commit 95bbff5

Browse files
committed
Making Idna implementation less dependent of ext-intl
1 parent d913107 commit 95bbff5

File tree

7 files changed

+167
-143
lines changed

7 files changed

+167
-143
lines changed

README.md

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ You need:
4747

4848
This library can resolve a domain against:
4949

50-
- The [Public Suffix List](http://publicsuffix.org/)
51-
- The [IANA Root Zone Database](https://data.iana.org/TLD/tlds-alpha-by-domain.txt)
50+
- The [Public Suffix List](https://publicsuffix.org/)
51+
- The [IANA Root Zone Database](https://www.iana.org/domains/root/files)
5252

5353
In both cases this is done using the `resolve` method implemented on the resource
5454
instance. The method returns a `Pdp\ResolvedDomain` object which represents the
@@ -72,7 +72,7 @@ echo $result->suffix()->toString(); //display 'okinawa.jp';
7272
$result->suffix()->isICANN(); //return true;
7373
~~~
7474

75-
For the [IANA Root Zone Database](https://data.iana.org/TLD/tlds-alpha-by-domain.txt),
75+
For the [IANA Root Zone Database](https://www.iana.org/domains/root/files),
7676
the `Pdp\TopLevelDomains` class is use instead:
7777

7878
~~~php
@@ -318,8 +318,6 @@ $newDomain = $domain
318318
->append('www')
319319
->prepend('docs.example');
320320

321-
322-
323321
echo $domain->toString(); //display 'www.example.com'
324322
echo $newDomain->toString(); //display 'docs.example.com.www'
325323
$newDomain->clear()->labels(); //return []

src/Domain.php

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -159,16 +159,16 @@ private function parseValue($domain): ?string
159159

160160
private function domainToAscii(string $domain): string
161161
{
162-
$option = self::IDNA_2003 === $this->type ? IntlIdna::IDNA2003_ASCII : IntlIdna::IDNA2008_ASCII;
162+
$option = self::IDNA_2003 === $this->type ? Idna::IDNA2003_ASCII : Idna::IDNA2008_ASCII;
163163

164-
return IntlIdna::toAscii($domain, $option)->result();
164+
return Idna::toAscii($domain, $option)->result();
165165
}
166166

167167
private function domainToUnicode(string $domain): string
168168
{
169-
$option = self::IDNA_2003 === $this->type ? IntlIdna::IDNA2003_UNICODE : IntlIdna::IDNA2008_UNICODE;
169+
$option = self::IDNA_2003 === $this->type ? Idna::IDNA2003_UNICODE : Idna::IDNA2008_UNICODE;
170170

171-
return IntlIdna::toUnicode($domain, $option)->result();
171+
return Idna::toUnicode($domain, $option)->result();
172172
}
173173

174174
/**

src/Idna.php

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Pdp;
6+
7+
use UnexpectedValueException;
8+
use function idn_to_ascii;
9+
use function idn_to_utf8;
10+
use function preg_match;
11+
use function rawurldecode;
12+
use function strpos;
13+
use function strtolower;
14+
use const INTL_IDNA_VARIANT_UTS46;
15+
16+
/**
17+
* @see https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/uidna_8h.html
18+
*/
19+
final class Idna
20+
{
21+
/**
22+
* IDNA errors.
23+
*/
24+
public const ERROR_EMPTY_LABEL = 1;
25+
public const ERROR_LABEL_TOO_LONG = 2;
26+
public const ERROR_DOMAIN_NAME_TOO_LONG = 4;
27+
public const ERROR_LEADING_HYPHEN = 8;
28+
public const ERROR_TRAILING_HYPHEN = 0x10;
29+
public const ERROR_HYPHEN_3_4 = 0x20;
30+
public const ERROR_LEADING_COMBINING_MARK = 0x40;
31+
public const ERROR_DISALLOWED = 0x80;
32+
public const ERROR_PUNYCODE = 0x100;
33+
public const ERROR_LABEL_HAS_DOT = 0x200;
34+
public const ERROR_INVALID_ACE_LABEL = 0x400;
35+
public const ERROR_BIDI = 0x800;
36+
public const ERROR_CONTEXTJ = 0x1000;
37+
public const ERROR_CONTEXTO_PUNCTUATION = 0x2000;
38+
public const ERROR_CONTEXTO_DIGITS = 0x4000;
39+
40+
/**
41+
* IDNA options.
42+
*/
43+
public const IDNA_DEFAULT = 0;
44+
public const IDNA_ALLOW_UNASSIGNED = 1;
45+
public const IDNA_USE_STD3_RULES = 2;
46+
public const IDNA_CHECK_BIDI = 4;
47+
public const IDNA_CHECK_CONTEXTJ = 8;
48+
public const IDNA_NONTRANSITIONAL_TO_ASCII = 0x10;
49+
public const IDNA_NONTRANSITIONAL_TO_UNICODE = 0x20;
50+
public const IDNA_CHECK_CONTEXTO = 0x40;
51+
52+
public const IDNA2008_ASCII = self::IDNA_NONTRANSITIONAL_TO_ASCII
53+
| self::IDNA_CHECK_BIDI
54+
| self::IDNA_USE_STD3_RULES
55+
| self::IDNA_CHECK_CONTEXTJ;
56+
57+
public const IDNA2008_UNICODE = self::IDNA_NONTRANSITIONAL_TO_UNICODE
58+
| self::IDNA_CHECK_BIDI
59+
| self::IDNA_USE_STD3_RULES
60+
| self::IDNA_CHECK_CONTEXTJ;
61+
62+
public const IDNA2003_ASCII = self::IDNA_DEFAULT;
63+
public const IDNA2003_UNICODE = self::IDNA_DEFAULT;
64+
65+
private const REGEXP_IDNA_PATTERN = '/[^\x20-\x7f]/';
66+
67+
/**
68+
* @codeCoverageIgnore
69+
*/
70+
private static function supportIdna(): void
71+
{
72+
static $idn_support = null;
73+
$idn_support = $idn_support ?? function_exists('\idn_to_ascii') && defined('\INTL_IDNA_VARIANT_UTS46');
74+
if (!$idn_support) {
75+
throw new UnexpectedValueException('IDN host can not be processed. Verify that ext/intl is installed for IDN support and that ICU is at least version 4.6.');
76+
}
77+
}
78+
79+
/**
80+
* Converts the input to its IDNA ASCII form.
81+
*
82+
* This method returns the string converted to IDN ASCII form
83+
*
84+
* @throws SyntaxError if the string can not be converted to ASCII using IDN UTS46 algorithm
85+
*/
86+
public static function toAscii(string $domain, int $options): IdnaInfo
87+
{
88+
$domain = rawurldecode($domain);
89+
if (1 !== preg_match(self::REGEXP_IDNA_PATTERN, $domain)) {
90+
return IdnaInfo::fromIntl([
91+
'result' => strtolower($domain),
92+
'isTransitionalDifferent' => false,
93+
'errors' => 0,
94+
]);
95+
}
96+
97+
self::supportIdna();
98+
99+
idn_to_ascii($domain, $options, INTL_IDNA_VARIANT_UTS46, $idnaInfo);
100+
101+
return self::createIdnaInfo($domain, $idnaInfo);
102+
}
103+
104+
/**
105+
* Converts the input to its IDNA UNICODE form.
106+
*
107+
* This method returns the string converted to IDN UNICODE form
108+
*
109+
* @throws SyntaxError if the string can not be converted to UNICODE using IDN UTS46 algorithm
110+
*/
111+
public static function toUnicode(string $domain, int $options): IdnaInfo
112+
{
113+
if (false === strpos($domain, 'xn--')) {
114+
return IdnaInfo::fromIntl([
115+
'result' => $domain,
116+
'isTransitionalDifferent' => false,
117+
'errors' => 0,
118+
]);
119+
}
120+
121+
self::supportIdna();
122+
123+
idn_to_utf8($domain, $options, INTL_IDNA_VARIANT_UTS46, $idnaInfo);
124+
125+
return self::createIdnaInfo($domain, $idnaInfo);
126+
}
127+
128+
/**
129+
* @param array{result:string, isTransitionalDifferent:bool, errors:int} $infos
130+
*/
131+
private static function createIdnaInfo(string $domain, array $infos): IdnaInfo
132+
{
133+
$result = IdnaInfo::fromIntl($infos);
134+
if (0 !== $result->errors()) {
135+
throw SyntaxError::dueToIDNAError($domain, $result);
136+
}
137+
138+
return $result;
139+
}
140+
}

src/IdnaInfo.php

Lines changed: 15 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -12,41 +12,22 @@
1212
*/
1313
final class IdnaInfo
1414
{
15-
/**
16-
* IDNA errors.
17-
*/
18-
public const ERROR_EMPTY_LABEL = 1;
19-
public const ERROR_LABEL_TOO_LONG = 2;
20-
public const ERROR_DOMAIN_NAME_TOO_LONG = 4;
21-
public const ERROR_LEADING_HYPHEN = 8;
22-
public const ERROR_TRAILING_HYPHEN = 0x10;
23-
public const ERROR_HYPHEN_3_4 = 0x20;
24-
public const ERROR_LEADING_COMBINING_MARK = 0x40;
25-
public const ERROR_DISALLOWED = 0x80;
26-
public const ERROR_PUNYCODE = 0x100;
27-
public const ERROR_LABEL_HAS_DOT = 0x200;
28-
public const ERROR_INVALID_ACE_LABEL = 0x400;
29-
public const ERROR_BIDI = 0x800;
30-
public const ERROR_CONTEXTJ = 0x1000;
31-
public const ERROR_CONTEXTO_PUNCTUATION = 0x2000;
32-
public const ERROR_CONTEXTO_DIGITS = 0x4000;
33-
3415
private const ERRORS = [
35-
self::ERROR_EMPTY_LABEL => 'a non-final domain name label (or the whole domain name) is empty',
36-
self::ERROR_LABEL_TOO_LONG => 'a domain name label is longer than 63 bytes',
37-
self::ERROR_DOMAIN_NAME_TOO_LONG => 'a domain name is longer than 255 bytes in its storage form',
38-
self::ERROR_LEADING_HYPHEN => 'a label starts with a hyphen-minus ("-")',
39-
self::ERROR_TRAILING_HYPHEN => 'a label ends with a hyphen-minus ("-")',
40-
self::ERROR_HYPHEN_3_4 => 'a label contains hyphen-minus ("-") in the third and fourth positions',
41-
self::ERROR_LEADING_COMBINING_MARK => 'a label starts with a combining mark',
42-
self::ERROR_DISALLOWED => 'a label or domain name contains disallowed characters',
43-
self::ERROR_PUNYCODE => 'a label starts with "xn--" but does not contain valid Punycode',
44-
self::ERROR_LABEL_HAS_DOT => 'a label contains a dot=full stop',
45-
self::ERROR_INVALID_ACE_LABEL => 'An ACE label does not contain a valid label string',
46-
self::ERROR_BIDI => 'a label does not meet the IDNA BiDi requirements (for right-to-left characters)',
47-
self::ERROR_CONTEXTJ => 'a label does not meet the IDNA CONTEXTJ requirements',
48-
self::ERROR_CONTEXTO_DIGITS => 'a label does not meet the IDNA CONTEXTO requirements for digits',
49-
self::ERROR_CONTEXTO_PUNCTUATION => 'a label does not meet the IDNA CONTEXTO requirements for punctuation characters. Some punctuation characters "Would otherwise have been DISALLOWED" but are allowed in certain contexts',
16+
Idna::ERROR_EMPTY_LABEL => 'a non-final domain name label (or the whole domain name) is empty',
17+
Idna::ERROR_LABEL_TOO_LONG => 'a domain name label is longer than 63 bytes',
18+
Idna::ERROR_DOMAIN_NAME_TOO_LONG => 'a domain name is longer than 255 bytes in its storage form',
19+
Idna::ERROR_LEADING_HYPHEN => 'a label starts with a hyphen-minus ("-")',
20+
Idna::ERROR_TRAILING_HYPHEN => 'a label ends with a hyphen-minus ("-")',
21+
Idna::ERROR_HYPHEN_3_4 => 'a label contains hyphen-minus ("-") in the third and fourth positions',
22+
Idna::ERROR_LEADING_COMBINING_MARK => 'a label starts with a combining mark',
23+
Idna::ERROR_DISALLOWED => 'a label or domain name contains disallowed characters',
24+
Idna::ERROR_PUNYCODE => 'a label starts with "xn--" but does not contain valid Punycode',
25+
Idna::ERROR_LABEL_HAS_DOT => 'a label contains a dot=full stop',
26+
Idna::ERROR_INVALID_ACE_LABEL => 'An ACE label does not contain a valid label string',
27+
Idna::ERROR_BIDI => 'a label does not meet the IDNA BiDi requirements (for right-to-left characters)',
28+
Idna::ERROR_CONTEXTJ => 'a label does not meet the IDNA CONTEXTJ requirements',
29+
Idna::ERROR_CONTEXTO_DIGITS => 'a label does not meet the IDNA CONTEXTO requirements for digits',
30+
Idna::ERROR_CONTEXTO_PUNCTUATION => 'a label does not meet the IDNA CONTEXTO requirements for punctuation characters. Some punctuation characters "Would otherwise have been DISALLOWED" but are allowed in certain contexts',
5031
];
5132

5233
private string $result;

src/IdnaInfoTest.php

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,19 @@ public function testItCanBeInstantiatedFromArray(): void
2626
self::assertSame('', $result->result());
2727
self::assertFalse($result->isTransitionalDifferent());
2828
self::assertSame(0, $result->errors());
29-
self::assertNull($result->error(IdnaInfo::ERROR_BIDI));
29+
self::assertNull($result->error(Idna::ERROR_BIDI));
3030
self::assertCount(0, $result->errorList());
3131
}
3232

3333
public function testInvalidSyntaxAfterIDNConversion(): void
3434
{
3535
try {
36-
IntlIdna::toAscii('%00.com', IntlIdna::IDNA2008_ASCII);
36+
Idna::toAscii('%00.com', Idna::IDNA2008_ASCII);
3737
} catch (SyntaxError $exception) {
3838
$result = $exception->fetchIdnaResult();
3939
self::assertInstanceOf(IdnaInfo::class, $result);
40-
self::assertSame(IdnaInfo::ERROR_DISALLOWED, $result->errors());
41-
self::assertIsString($result->error(IdnaInfo::ERROR_DISALLOWED));
40+
self::assertSame(Idna::ERROR_DISALLOWED, $result->errors());
41+
self::assertIsString($result->error(Idna::ERROR_DISALLOWED));
4242
self::assertCount(1, $result->errorList());
4343
}
4444
}

src/IntlIdna.php

Lines changed: 0 additions & 95 deletions
This file was deleted.

src/Rules.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ private static function addRule(array $list, array $ruleParts): array
162162
try {
163163
/** @var string $line */
164164
$line = array_pop($ruleParts);
165-
$rule = IntlIdna::toAscii($line, IntlIdna::IDNA2008_ASCII)->result();
165+
$rule = Idna::toAscii($line, Idna::IDNA2008_ASCII)->result();
166166
} catch (CannotProcessHost $exception) {
167167
throw UnableToLoadPublicSuffixList::dueToInvalidRule($line ?? null, $exception);
168168
}

0 commit comments

Comments
 (0)