Skip to content

Commit a7562fd

Browse files
committed
Improve IDNA options support
1 parent e510d8f commit a7562fd

14 files changed

+351
-154
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ All Notable changes to `PHP Domain Parser` **5.x** series will be documented in
66

77
### Added
88

9-
- None
9+
- Support for IDNA_OPTIONS settings in all the classes see [#236](https://github.com/jeremykendall/php-domain-parser/pull/236) thanks to [Insolita](https://github.com/Insolita)
1010

1111
### Fixed
1212

data/pdp-PSL_FULL_5a3cc7f81795bb2e48e848af42d287b4.cache

100644100755
Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

data/pdp-RZD_FULL_f18a70477d29d525b9220612e2115345.cache

100644100755
Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

src/Domain.php

Lines changed: 73 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
use function strlen;
3838
use function strpos;
3939
use function substr;
40+
use const IDNA_DEFAULT;
4041

4142
/**
4243
* Domain Value Object.
@@ -326,6 +327,42 @@ public function keys(string $label): array
326327
return array_keys($this->labels, $label, true);
327328
}
328329

330+
/**
331+
* Set IDNA_* options for functions idn_to_ascii.
332+
*
333+
* @see https://www.php.net/manual/en/intl.constants.php
334+
*
335+
* @return int
336+
*/
337+
public function getAsciiIDNAOption(): int
338+
{
339+
return $this->asciiIDNAOption;
340+
}
341+
342+
/**
343+
* Set IDNA_* options for functions idn_to_utf8.
344+
*
345+
* @see https://www.php.net/manual/en/intl.constants.php
346+
*
347+
* @return int
348+
*/
349+
public function getUnicodeIDNAOption(): int
350+
{
351+
return $this->unicodeIDNAOption;
352+
}
353+
354+
/**
355+
* return true if domain contains deviation characters.
356+
*
357+
* @see http://unicode.org/reports/tr46/#Transition_Considerations
358+
*
359+
* @return bool
360+
*/
361+
public function isTransitionalDifferent(): bool
362+
{
363+
return $this->isTransitionalDifferent;
364+
}
365+
329366
/**
330367
* Returns the registrable domain.
331368
*
@@ -423,7 +460,7 @@ public function toAscii()
423460
return $this;
424461
}
425462

426-
return new self($domain, $this->publicSuffix, $this->getAsciiIDNAOption(), $this->getUnicodeIDNAOption());
463+
return new self($domain, $this->publicSuffix, $this->asciiIDNAOption, $this->unicodeIDNAOption);
427464
}
428465

429466
/**
@@ -438,8 +475,8 @@ public function toUnicode()
438475
return new self(
439476
$this->idnToUnicode($this->domain, $this->unicodeIDNAOption),
440477
$this->publicSuffix,
441-
$this->getAsciiIDNAOption(),
442-
$this->getUnicodeIDNAOption()
478+
$this->asciiIDNAOption,
479+
$this->unicodeIDNAOption
443480
);
444481
}
445482

@@ -464,8 +501,8 @@ public function resolve($publicSuffix): self
464501
$publicSuffix = new PublicSuffix(
465502
$publicSuffix,
466503
'',
467-
$this->getAsciiIDNAOption(),
468-
$this->getUnicodeIDNAOption()
504+
$this->asciiIDNAOption,
505+
$this->unicodeIDNAOption
469506
);
470507
}
471508

@@ -474,7 +511,7 @@ public function resolve($publicSuffix): self
474511
return $this;
475512
}
476513

477-
return new self($this->domain, $publicSuffix, $this->getAsciiIDNAOption(), $this->getUnicodeIDNAOption());
514+
return new self($this->domain, $publicSuffix, $this->asciiIDNAOption, $this->unicodeIDNAOption);
478515
}
479516

480517
/**
@@ -496,8 +533,8 @@ public function withPublicSuffix($publicSuffix): self
496533
$publicSuffix = new PublicSuffix(
497534
$publicSuffix,
498535
'',
499-
$this->getAsciiIDNAOption(),
500-
$this->getUnicodeIDNAOption()
536+
$this->asciiIDNAOption,
537+
$this->unicodeIDNAOption
501538
);
502539
}
503540

@@ -508,14 +545,14 @@ public function withPublicSuffix($publicSuffix): self
508545

509546
$domain = implode('.', array_reverse(array_slice($this->labels, count($this->publicSuffix))));
510547
if (null === $publicSuffix->getContent()) {
511-
return new self($domain, null, $this->getAsciiIDNAOption(), $this->getUnicodeIDNAOption());
548+
return new self($domain, null, $this->asciiIDNAOption, $this->unicodeIDNAOption);
512549
}
513550

514551
return new self(
515552
$domain.'.'.$publicSuffix->getContent(),
516553
$publicSuffix,
517-
$this->getAsciiIDNAOption(),
518-
$this->getUnicodeIDNAOption()
554+
$this->asciiIDNAOption,
555+
$this->unicodeIDNAOption
519556
);
520557
}
521558

@@ -547,16 +584,16 @@ public function withSubDomain($subDomain): self
547584
return new self(
548585
$this->registrableDomain,
549586
$this->publicSuffix,
550-
$this->getAsciiIDNAOption(),
551-
$this->getUnicodeIDNAOption()
587+
$this->asciiIDNAOption,
588+
$this->unicodeIDNAOption
552589
);
553590
}
554591

555592
return new self(
556593
$subDomain.'.'.$this->registrableDomain,
557594
$this->publicSuffix,
558-
$this->getAsciiIDNAOption(),
559-
$this->getUnicodeIDNAOption()
595+
$this->asciiIDNAOption,
596+
$this->unicodeIDNAOption
560597
);
561598
}
562599

@@ -668,16 +705,16 @@ public function withLabel(int $key, $label): self
668705
return new self(
669706
implode('.', array_reverse($labels)),
670707
null,
671-
$this->getAsciiIDNAOption(),
672-
$this->getUnicodeIDNAOption()
708+
$this->asciiIDNAOption,
709+
$this->unicodeIDNAOption
673710
);
674711
}
675712

676713
return new self(
677714
implode('.', array_reverse($labels)),
678715
$this->publicSuffix,
679-
$this->getAsciiIDNAOption(),
680-
$this->getUnicodeIDNAOption()
716+
$this->asciiIDNAOption,
717+
$this->unicodeIDNAOption
681718
);
682719
}
683720

@@ -721,47 +758,35 @@ public function withoutLabel(int $key, int ...$keys): self
721758
}
722759

723760
if ([] === $labels) {
724-
return new self(null, null, $this->getAsciiIDNAOption(), $this->getUnicodeIDNAOption());
761+
return new self(null, null, $this->asciiIDNAOption, $this->unicodeIDNAOption);
725762
}
726763

727764
$domain = implode('.', array_reverse($labels));
728765
$psContent = $this->publicSuffix->getContent();
729766
if (null === $psContent || '.'.$psContent !== substr($domain, - strlen($psContent) - 1)) {
730-
return new self($domain, null, $this->getAsciiIDNAOption(), $this->getUnicodeIDNAOption());
767+
return new self($domain, null, $this->asciiIDNAOption, $this->unicodeIDNAOption);
731768
}
732769

733-
return new self($domain, $this->publicSuffix, $this->getAsciiIDNAOption(), $this->getUnicodeIDNAOption());
734-
}
735-
736-
737-
public function getAsciiIDNAOption(): int
738-
{
739-
return $this->asciiIDNAOption;
770+
return new self($domain, $this->publicSuffix, $this->asciiIDNAOption, $this->unicodeIDNAOption);
740771
}
741772

742-
public function getUnicodeIDNAOption(): int
743-
{
744-
return $this->unicodeIDNAOption;
745-
}
746773
/**
747-
* Set IDNA_* options for functions idn_to_ascii, idn_to_utf.
774+
* Set IDNA_* options for functions idn_to_ascii, idn_to_utf8.
748775
* @see https://www.php.net/manual/en/intl.constants.php
749-
* @param int $forAscii
750-
* @param int $forUnicode
751-
* @return $this
776+
*
777+
* @param int $asciiIDNAOption
778+
* @param int $unicodeIDNAOption
779+
*
780+
* @return self
752781
*/
753-
public function withIDNAOptions(int $forAscii, int $forUnicode)
754-
{
755-
return new self($this->domain, $this->publicSuffix, $forAscii, $forUnicode);
756-
}
757-
758-
/**
759-
* return true if domain contains deviation characters.
760-
* @see http://unicode.org/reports/tr46/#Transition_Considerations
761-
* @return bool
762-
**/
763-
public function isTransitionalDifferent(): bool
782+
public function withIDNAOptions(int $asciiIDNAOption, int $unicodeIDNAOption): self
764783
{
765-
return $this->isTransitionalDifferent;
784+
if ($asciiIDNAOption === $this->asciiIDNAOption
785+
&& $unicodeIDNAOption === $this->unicodeIDNAOption
786+
) {
787+
return $this;
788+
}
789+
790+
return new self($this->domain, $this->publicSuffix, $asciiIDNAOption, $unicodeIDNAOption);
766791
}
767792
}

src/IDNAConverterTrait.php

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
use function implode;
2727
use function is_scalar;
2828
use function is_string;
29-
use function iterator_to_array;
3029
use function method_exists;
3130
use function preg_match;
3231
use function rawurldecode;
@@ -35,6 +34,7 @@
3534
use function strtolower;
3635
use const FILTER_FLAG_IPV4;
3736
use const FILTER_VALIDATE_IP;
37+
use const IDNA_DEFAULT;
3838
use const IDNA_ERROR_BIDI;
3939
use const IDNA_ERROR_CONTEXTJ;
4040
use const IDNA_ERROR_DISALLOWED;
@@ -112,15 +112,30 @@ private static function getIdnErrors(int $error_bit): string
112112
*/
113113
private function idnToAscii(string $domain, int $IDNAOption = IDNA_DEFAULT): string
114114
{
115-
list($domain,) = $this->transformToAscii($domain, $IDNAOption);
115+
list($domain, ) = $this->transformToAscii($domain, $IDNAOption);
116+
116117
return $domain;
117118
}
118119

119-
private function transformToAscii(string $domain, int $option):array
120+
/**
121+
* Returns the IDNA ASCII form and its isTransitionalDifferent state.
122+
*
123+
* @param string $domain
124+
*
125+
* @param int $IDNAOption
126+
* @param int $option
127+
*
128+
* @throws InvalidDomain if the string can not be converted to ASCII using IDN UTS46 algorithm
129+
*
130+
* @return array
131+
*/
132+
private function transformToAscii(string $domain, int $option): array
120133
{
121134
$domain = rawurldecode($domain);
135+
122136
static $pattern = '/[^\x20-\x7f]/';
123-
if (!preg_match($pattern, $domain)) {
137+
138+
if (1 !== preg_match($pattern, $domain)) {
124139
return [strtolower($domain), false];
125140
}
126141

@@ -184,7 +199,7 @@ private function idnToUnicode(string $domain, int $IDNAOption = IDNA_DEFAULT): s
184199
private function setLabels($domain = null, int $asciiOption = 0, int $unicodeOption = 0): array
185200
{
186201
if ($domain instanceof DomainInterface) {
187-
return iterator_to_array($domain, false);
202+
$domain = $domain->getContent();
188203
}
189204

190205
if (null === $domain) {
@@ -214,19 +229,19 @@ private function setLabels($domain = null, int $asciiOption = 0, int $unicodeOpt
214229
(?<reg_name>(?:(?&unreserved)|(?&sub_delims)|(?&encoded)){1,63})
215230
)
216231
^(?:(?&reg_name)\.){0,126}(?&reg_name)\.?$/ix';
217-
if (preg_match($domain_name, $formatted_domain)) {
232+
if (1 === preg_match($domain_name, $formatted_domain)) {
218233
return array_reverse(explode('.', strtolower($formatted_domain)));
219234
}
220235

221236
// a domain name can not contains URI delimiters or space
222237
static $gen_delims = '/[:\/?#\[\]@ ]/';
223-
if (preg_match($gen_delims, $formatted_domain)) {
238+
if (1 === preg_match($gen_delims, $formatted_domain)) {
224239
throw new InvalidDomain(sprintf('The domain `%s` is invalid: it contains invalid characters', $domain));
225240
}
226241

227242
// if the domain name does not contains UTF-8 chars then it is malformed
228243
static $pattern = '/[^\x20-\x7f]/';
229-
if (!preg_match($pattern, $formatted_domain)) {
244+
if (1 !== preg_match($pattern, $formatted_domain)) {
230245
throw new InvalidDomain(sprintf('The domain `%s` is invalid: the labels are malformed', $domain));
231246
}
232247

@@ -235,12 +250,13 @@ private function setLabels($domain = null, int $asciiOption = 0, int $unicodeOpt
235250
return array_reverse(explode('.', $this->idnToUnicode($ascii_domain, $unicodeOption)));
236251
}
237252

238-
private function hasTransitionalDifference($domain):bool
253+
private function hasTransitionalDifference($domain): bool
239254
{
240-
if(!is_string($domain) || '' === $domain){
255+
if (!is_string($domain) || '' === $domain) {
241256
return false;
242257
}
243258
list(, $isTransitionalDifferent) = $this->transformToAscii($domain, IDNA_DEFAULT);
259+
244260
return $isTransitionalDifferent;
245261
}
246262
}

0 commit comments

Comments
 (0)