Skip to content

Commit d972f37

Browse files
committed
New version of PublicSuffixList
1 parent 18a67e0 commit d972f37

File tree

2 files changed

+393
-14
lines changed

2 files changed

+393
-14
lines changed

src/Pdp/PublicSuffixList.php

Lines changed: 174 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,191 @@
11
<?php
22

3+
declare(strict_types=1);
4+
35
/**
4-
* PHP Domain Parser: Public Suffix List based URL parsing.
6+
* Public Suffix List PHP: Public Suffix List based URL parsing.
57
*
6-
* @link http://github.com/jeremykendall/php-domain-parser for the canonical source repository
8+
* @see http://github.com/jeremykendall/publicsuffixlist-php for the canonical source repository
79
*
8-
* @copyright Copyright (c) 2014 Jeremy Kendall (http://about.me/jeremykendall)
9-
* @license http://github.com/jeremykendall/php-domain-parser/blob/master/LICENSE MIT License
10+
* @copyright Copyright (c) 2017 Jeremy Kendall (http://jeremykendall.net)
11+
* @license http://github.com/jeremykendall/publicsuffixlist-php/blob/master/LICENSE MIT License
1012
*/
1113
namespace Pdp;
1214

13-
/**
14-
* Public Suffix List.
15-
*/
16-
class PublicSuffixList extends \ArrayObject
15+
final class PublicSuffixList implements \Countable
1716
{
17+
use LabelsTrait;
18+
19+
/**
20+
* @var array
21+
*/
22+
private $rules;
23+
24+
/**
25+
* PublicSuffixList constructor.
26+
* @param mixed $rules
27+
*/
28+
public function __construct($rules = null)
29+
{
30+
if (is_string($rules) && file_exists($rules) && is_readable($rules)) {
31+
$this->rules = include $rules;
32+
}
33+
34+
if ($rules === null) {
35+
$this->rules = include dirname(__DIR__, 2) . '/data/public-suffix-list.php';
36+
}
37+
38+
if (is_array($rules)) {
39+
$this->rules = $rules;
40+
}
41+
$this->rules = $rules ?? include dirname(__DIR__, 2) . '/data/public-suffix-list.php';
42+
}
43+
44+
public function query(string $domain = null): Domain
45+
{
46+
if (!$this->isMatchable($domain)) {
47+
return new NullDomain();
48+
}
49+
50+
$input = $domain;
51+
$domain = $this->normalize($domain);
52+
$matchingLabels = $this->findMatchingLabels($this->getLabelsReverse($domain), $this->rules);
53+
$publicSuffix = empty($matchingLabels) ? $this->handleNoMatches($domain) : $this->processMatches($matchingLabels);
54+
55+
if ($this->isPunycoded($input) === false) {
56+
$publicSuffix = idn_to_utf8($publicSuffix, 0, INTL_IDNA_VARIANT_UTS46);
57+
}
58+
59+
if (count($matchingLabels) > 0) {
60+
return new MatchedDomain($input, $publicSuffix, true);
61+
}
62+
63+
return new UnmatchedDomain($input, $publicSuffix, false);
64+
}
65+
66+
public function getRules(): array
67+
{
68+
return $this->rules;
69+
}
70+
71+
/**
72+
* @TODO: Remove. Bandaid to fix failing test.
73+
*
74+
* @return int
75+
*/
76+
public function count(): int
77+
{
78+
return count($this->rules);
79+
}
80+
81+
private function isMatchable($domain): bool
82+
{
83+
if ($domain === null) {
84+
return false;
85+
}
86+
87+
if ($this->hasLeadingDot($domain)) {
88+
return false;
89+
}
90+
91+
if ($this->isSingleLabelDomain($domain)) {
92+
return false;
93+
}
94+
95+
if ($this->isIpAddress($domain)) {
96+
return false;
97+
}
98+
99+
return true;
100+
}
101+
18102
/**
19-
* Public constructor.
103+
* Normalize domain.
20104
*
21-
* @param mixed $list Array representing Public Suffix List or PHP Public Suffix List file
105+
* "The domain must be canonicalized in the normal way for hostnames - lower-case, Punycode."
106+
*
107+
* @see http://www.ietf.org/rfc/rfc3492.txt
108+
*
109+
* @param string $domain
110+
*
111+
* @return string
22112
*/
23-
public function __construct($list)
113+
private function normalize(string $domain): string
24114
{
25-
if (!is_array($list)) {
26-
$list = include $list;
115+
return strtolower(idn_to_ascii($domain, 0, INTL_IDNA_VARIANT_UTS46));
116+
}
117+
118+
private function findMatchingLabels(array $labels, array $rules): array
119+
{
120+
$matches = [];
121+
122+
foreach ($labels as $label) {
123+
if ($this->isExceptionRule($label, $rules)) {
124+
break;
125+
}
126+
127+
if ($this->isWildcardRule($rules)) {
128+
array_unshift($matches, $label);
129+
break;
130+
}
131+
132+
if ($this->matchExists($label, $rules)) {
133+
array_unshift($matches, $label);
134+
$rules = $rules[$label];
135+
continue;
136+
}
137+
138+
// Avoids improper parsing when $domain's subdomain + public suffix ===
139+
// a valid public suffix (e.g. domain 'us.example.com' and public suffix 'us.com')
140+
//
141+
// Added by @goodhabit in https://github.com/jeremykendall/php-domain-parser/pull/15
142+
// Resolves https://github.com/jeremykendall/php-domain-parser/issues/16
143+
break;
27144
}
28145

29-
parent::__construct($list);
146+
return $matches;
147+
}
148+
149+
private function processMatches(array $matches): string
150+
{
151+
return implode('.', array_filter($matches, 'strlen'));
152+
}
153+
154+
private function isIpAddress(string $domain): bool
155+
{
156+
return filter_var($domain, FILTER_VALIDATE_IP) !== false;
157+
}
158+
159+
private function isExceptionRule(string $label, array $rules): bool
160+
{
161+
return $this->matchExists($label, $rules)
162+
&& array_key_exists('!', $rules[$label]);
163+
}
164+
165+
private function isWildcardRule(array $rules): bool
166+
{
167+
return array_key_exists('*', $rules);
168+
}
169+
170+
private function matchExists(string $label, array $rules): bool
171+
{
172+
return array_key_exists($label, $rules);
173+
}
174+
175+
private function handleNoMatches(string $domain): string
176+
{
177+
$labels = $this->getLabels($domain);
178+
179+
return array_pop($labels);
180+
}
181+
182+
private function isPunycoded(string $input): bool
183+
{
184+
return strpos($input, 'xn--') !== false;
185+
}
186+
187+
private function hasLeadingDot($domain): bool
188+
{
189+
return strpos($domain, '.') === 0;
30190
}
31191
}

0 commit comments

Comments
 (0)