Skip to content

Commit 9e3e894

Browse files
committed
adding more decoupling
1 parent c1f164c commit 9e3e894

File tree

11 files changed

+398
-225
lines changed

11 files changed

+398
-225
lines changed

README.md

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -231,8 +231,7 @@ In any case, you should setup a cron to regularly update your local cache.
231231
### Public Suffix Resolver
232232

233233

234-
#### Rules and Domain
235-
234+
#### Public Suffix, Domain and Resolution
236235

237236
~~~php
238237
<?php
@@ -248,24 +247,38 @@ final class Rules
248247

249248
The `Rules` constructor expects a `array` representation of the Public Suffix List. This `array` representation is constructed by the `Manager` and stored using a PSR-16 compliant cache.
250249

251-
The `Rules` class resolves the submitted domain against the parsed rules from the PSL. This is done using the `Rules::resolve` method which returns a `Pdp\Domain` object. The method expect a valid domain and you can optionnally specify against which section of rules you want to validate the given domain. By default all section are used (ie `PRIVATE_DOMAIN` and `ICANN_DOMAIN`) if the submitted section is invalid or unknown, the resolver will fallback to use the entire list.
250+
The `Rules` class resolves the submitted domain against the parsed rules from the PSL. This is done using the `Rules::resolve` method which returns a `Pdp\Domain` object. The method expect a valid domain and you can optionnally specify against which section of rules you want to validate the given domain. By default all section are used (ie `PRIVATE` and `ICANN`) if the submitted section is invalid or unknown, the resolver will fallback to use the entire list.
252251

253252
~~~php
254253
<?php
255254

256-
final class Domain
255+
final class PublicSuffix
257256
{
258257

259-
const ICANN_DOMAIN = 'ICANN_DOMAIN';
260-
const PRIVATE_DOMAIN = 'PRIVATE_DOMAIN';
261-
const UNKNOWN_DOMAIN = 'UNKNOWN_DOMAIN';
258+
const ICANN = 'ICANN_DOMAIN';
259+
const PRIVATE = 'PRIVATE_DOMAIN';
260+
const ALL = 'ALL_DOMAIN';
261+
const UNKNOWN = 'UNKNOWN_DOMAIN';
262+
263+
public function __construct(?string $domain = null, string $type = self::UNKNOWN);
264+
public function getContent(): ?string
265+
public function isKnown(): bool;
266+
public function isICANN(): bool;
267+
public function isPrivate(): bool;
268+
}
269+
~~~
270+
271+
~~~php
272+
<?php
262273

263-
public function __construct(?string $domain = null, ?string $publicSuffix = null, string $type = self::UNKNOWN_DOMAIN);
274+
final class Domain
275+
{
276+
public function __construct(?string $domain = null, PublicSuffix $publicSuffix);
264277
public function getDomain(): ?string
265278
public function getPublicSuffix(): ?string
266279
public function getRegistrableDomain(): ?string
267280
public function getSubDomain(); ?string
268-
public function isValid(): bool;
281+
public function isKnown(): bool;
269282
public function isICANN(): bool;
270283
public function isPrivate(): bool;
271284
}

src/Domain.php

Lines changed: 38 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,6 @@
2222
*/
2323
final class Domain
2424
{
25-
const ICANN_DOMAIN = 'ICANN_DOMAIN';
26-
const PRIVATE_DOMAIN = 'PRIVATE_DOMAIN';
27-
const UNKNOWN_DOMAIN = 'UNKNOWN_DOMAIN';
28-
2925
/**
3026
* @var string|null
3127
*/
@@ -46,62 +42,20 @@ final class Domain
4642
*/
4743
private $subDomain;
4844

49-
/**
50-
* @var string
51-
*/
52-
private $type = self::UNKNOWN_DOMAIN;
53-
5445
/**
5546
* New instance.
5647
*
57-
* @param string|null $domain
58-
* @param string|null $publicSuffix
59-
* @param string $type
48+
* @param string|null $domain
49+
* @param PublicSuffix $publicSuffix
6050
*/
61-
public function __construct(
62-
$domain = null,
63-
$publicSuffix = null,
64-
string $type = self::UNKNOWN_DOMAIN
65-
) {
51+
public function __construct($domain = null, PublicSuffix $publicSuffix)
52+
{
6653
$this->domain = $domain;
67-
$this->setPublicSuffix($publicSuffix);
68-
$this->setType($type);
54+
$this->publicSuffix = $publicSuffix;
6955
$this->setRegistrableDomain();
7056
$this->setSubDomain();
7157
}
7258

73-
/**
74-
* Compute the public suffix part
75-
*
76-
* @param string|null $publicSuffix
77-
*/
78-
private function setPublicSuffix($publicSuffix)
79-
{
80-
if (null === $this->domain) {
81-
return;
82-
}
83-
84-
$this->publicSuffix = $publicSuffix;
85-
}
86-
87-
/**
88-
* Compute the domain validity
89-
*
90-
* @param string $type
91-
*/
92-
private function setType(string $type)
93-
{
94-
if (null === $this->publicSuffix) {
95-
return;
96-
}
97-
98-
if (!in_array($type, [self::PRIVATE_DOMAIN, self::ICANN_DOMAIN], true)) {
99-
$type = self::UNKNOWN_DOMAIN;
100-
}
101-
102-
$this->type = $type;
103-
}
104-
10559
/**
10660
* Compute the registrable domain part
10761
*/
@@ -111,7 +65,7 @@ private function setRegistrableDomain()
11165
return;
11266
}
11367

114-
$countLabelsToRemove = count(explode('.', $this->publicSuffix)) + 1;
68+
$countLabelsToRemove = count($this->publicSuffix) + 1;
11569
$domainLabels = explode('.', $this->domain);
11670
$domain = implode('.', array_slice($domainLabels, count($domainLabels) - $countLabelsToRemove));
11771
$this->registrableDomain = $this->normalize($domain);
@@ -124,9 +78,8 @@ private function setRegistrableDomain()
12478
*/
12579
private function hasRegistrableDomain(): bool
12680
{
127-
return null !== $this->publicSuffix
128-
&& strpos($this->domain, '.') > 0
129-
&& $this->publicSuffix !== $this->domain;
81+
return strpos((string) $this->domain, '.') > 0
82+
&& !in_array($this->publicSuffix->getContent(), [null, $this->domain], true);
13083
}
13184

13285
/**
@@ -162,7 +115,7 @@ private function setSubDomain()
162115

163116
$domainLabels = explode('.', $this->domain);
164117
$countLabels = count($domainLabels);
165-
$countLabelsToRemove = count(explode('.', $this->publicSuffix)) + 1;
118+
$countLabelsToRemove = count($this->publicSuffix) + 1;
166119
if ($countLabels === $countLabelsToRemove) {
167120
return;
168121
}
@@ -184,7 +137,7 @@ public function getDomain()
184137
*/
185138
public function getPublicSuffix()
186139
{
187-
return $this->publicSuffix;
140+
return $this->publicSuffix->getContent();
188141
}
189142

190143
/**
@@ -202,9 +155,9 @@ public function getPublicSuffix()
202155
*
203156
* @return bool
204157
*/
205-
public function isValid(): bool
158+
public function isKnown(): bool
206159
{
207-
return $this->type !== self::UNKNOWN_DOMAIN;
160+
return $this->publicSuffix->isKnown();
208161
}
209162

210163
/**
@@ -224,7 +177,7 @@ public function isValid(): bool
224177
*/
225178
public function isICANN(): bool
226179
{
227-
return $this->type === self::ICANN_DOMAIN;
180+
return $this->publicSuffix->isICANN();
228181
}
229182

230183
/**
@@ -244,7 +197,7 @@ public function isICANN(): bool
244197
*/
245198
public function isPrivate(): bool
246199
{
247-
return $this->type === self::PRIVATE_DOMAIN;
200+
return $this->publicSuffix->isPrivate();
248201
}
249202

250203
/**
@@ -280,4 +233,28 @@ public function getSubDomain()
280233
{
281234
return $this->subDomain;
282235
}
236+
237+
/**
238+
* {@inheritdoc}
239+
*/
240+
public function __debugInfo()
241+
{
242+
return [
243+
'domain' => $this->domain,
244+
'publicSuffix' => $this->publicSuffix->getContent(),
245+
'registrableDomain' => $this->registrableDomain,
246+
'subDomain' => $this->subDomain,
247+
'isKnown' => $this->isKnown(),
248+
'isICANN' => $this->isICANN(),
249+
'isPrivate' => $this->isPrivate(),
250+
];
251+
}
252+
253+
/**
254+
* {@inheritdoc}
255+
*/
256+
public function __set_state(array $properties)
257+
{
258+
return new self($properties['domain'], $properties['publicSuffix']);
259+
}
283260
}

src/Manager.php

Lines changed: 4 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
namespace Pdp;
1313

1414
use Psr\SimpleCache\CacheInterface;
15-
use SplTempFileObject;
1615

1716
/**
1817
* Public Suffix List Manager.
@@ -97,109 +96,14 @@ private function getCacheKey(string $str): string
9796
*/
9897
public function refreshRules(string $source_url = self::PSL_URL): bool
9998
{
99+
static $parser;
100+
$parser = $parser ?? new Parser();
100101
$content = $this->http->getContent($source_url);
101-
$rules = $this->parse($content);
102-
if (empty($rules[Domain::ICANN_DOMAIN]) || empty($rules[Domain::PRIVATE_DOMAIN])) {
102+
$rules = $parser->parse($content);
103+
if (empty($rules[PublicSuffix::ICANN]) || empty($rules[PublicSuffix::PRIVATE])) {
103104
return false;
104105
}
105106

106107
return $this->cache->set($this->getCacheKey($source_url), json_encode($rules));
107108
}
108-
109-
/**
110-
* Parses text representation of list to associative, multidimensional array.
111-
*
112-
* @param string $content the Public SUffix List as a SplFileObject
113-
*
114-
* @return array Associative, multidimensional array representation of the
115-
* public suffx list
116-
*/
117-
private function parse(string $content): array
118-
{
119-
$section = Domain::UNKNOWN_DOMAIN;
120-
$rules = [Domain::ICANN_DOMAIN => [], Domain::PRIVATE_DOMAIN => []];
121-
$file = new SplTempFileObject();
122-
$file->fwrite($content);
123-
$file->setFlags(SplTempFileObject::DROP_NEW_LINE | SplTempFileObject::READ_AHEAD | SplTempFileObject::SKIP_EMPTY);
124-
foreach ($file as $line) {
125-
$section = $this->getPslSection($section, $line);
126-
if ($section !== Domain::UNKNOWN_DOMAIN && strpos($line, '//') === false) {
127-
$rules[$section] = $this->addRule($rules[$section], explode('.', $line));
128-
}
129-
}
130-
131-
return $rules;
132-
}
133-
134-
/**
135-
* Tell whether the line can be converted for a given domain.
136-
*
137-
* @param bool $section the previous status
138-
* @param string $line the current file line
139-
*
140-
* @return string
141-
*/
142-
private function getPslSection(string $section, string $line): string
143-
{
144-
if ($section == Domain::UNKNOWN_DOMAIN && strpos($line, '// ===BEGIN ICANN DOMAINS===') === 0) {
145-
return Domain::ICANN_DOMAIN;
146-
}
147-
148-
if ($section == Domain::ICANN_DOMAIN && strpos($line, '// ===END ICANN DOMAINS===') === 0) {
149-
return Domain::UNKNOWN_DOMAIN;
150-
}
151-
152-
if ($section == Domain::UNKNOWN_DOMAIN && strpos($line, '// ===BEGIN PRIVATE DOMAINS===') === 0) {
153-
return Domain::PRIVATE_DOMAIN;
154-
}
155-
156-
if ($section == Domain::PRIVATE_DOMAIN && strpos($line, '// ===END PRIVATE DOMAINS===') === 0) {
157-
return Domain::UNKNOWN_DOMAIN;
158-
}
159-
160-
return $section;
161-
}
162-
163-
/**
164-
* Recursive method to build the array representation of the Public Suffix List.
165-
*
166-
* This method is based heavily on the code found in generateEffectiveTLDs.php
167-
*
168-
* @see https://github.com/usrflo/registered-domain-libs/blob/master/generateEffectiveTLDs.php
169-
* A copy of the Apache License, Version 2.0, is provided with this
170-
* distribution
171-
*
172-
* @param array $list Initially an empty array, this eventually
173-
* becomes the array representation of the Public Suffix List
174-
* @param array $rule_parts One line (rule) from the Public Suffix List
175-
* exploded on '.', or the remaining portion of that array during recursion
176-
*
177-
* @return array
178-
*/
179-
private function addRule(array $list, array $rule_parts): array
180-
{
181-
$part = array_pop($rule_parts);
182-
183-
// Adheres to canonicalization rule from the "Formal Algorithm" section
184-
// of https://publicsuffix.org/list/
185-
// "The domain and all rules must be canonicalized in the normal way
186-
// for hostnames - lower-case, Punycode (RFC 3492)."
187-
188-
$part = idn_to_ascii($part, 0, INTL_IDNA_VARIANT_UTS46);
189-
$isDomain = true;
190-
if (strpos($part, '!') === 0) {
191-
$part = substr($part, 1);
192-
$isDomain = false;
193-
}
194-
195-
if (!isset($list[$part])) {
196-
$list[$part] = $isDomain ? [] : ['!' => ''];
197-
}
198-
199-
if ($isDomain && !empty($rule_parts)) {
200-
$list[$part] = $this->addRule($list[$part], $rule_parts);
201-
}
202-
203-
return $list;
204-
}
205109
}

0 commit comments

Comments
 (0)