Skip to content

Commit e45496a

Browse files
committed
Improve IANA Root Zone parsing and caching
1 parent 10567b5 commit e45496a

10 files changed

+161
-99
lines changed

CHANGELOG.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@ All Notable changes to `PHP Domain Parser` **5.x** series will be documented in
1010
- `Pdp\Manager::getTLDs` a service to return a cache version of the IANA Root zone database
1111
- `Pdp\Manager::refreshTLDs` a service to refresh the cache version of the IANA Root zone database
1212
- `Pdp\Converter::convertRootZoneDatabase` converts the IANA Root zone database into an associative array
13-
- `Pdp\Manager::__construct` added a new `$ttl` parameter to improve PSR-16 supports
13+
- added a new `$ttl` parameter to improve PSR-16 supports to
14+
- `Pdp\Manager::__construct`
15+
- `Pdp\Manager::getRules`
16+
- `Pdp\Manager::refreshRules`
1417
- `Pdp\Exception\CouldNotLoadTLDs` exception
1518

1619
### Fixed

data/pdp-RZD_FULL_f18a70477d29d525b9220612e2115345.cache

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

src/Converter.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ public function convertRootZoneDatabase(string $content): array
142142
foreach ($file as $line) {
143143
$line_content = trim($line);
144144
if (false === strpos($line_content, '#')) {
145-
$records[] = $line_content;
145+
$records[] = $this->idnToAscii($line_content);
146146
continue;
147147
}
148148

src/IDNAConverterTrait.php

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -97,17 +97,22 @@ private static function getIdnErrors(int $error_bit): string
9797
*/
9898
private function idnToAscii(string $domain): string
9999
{
100+
$domain = rawurldecode($domain);
100101
static $pattern = '/[^\x20-\x7f]/';
101102
if (!preg_match($pattern, $domain)) {
102103
return strtolower($domain);
103104
}
104105

105106
$output = idn_to_ascii($domain, 0, INTL_IDNA_VARIANT_UTS46, $arr);
106-
if (!$arr['errors']) {
107+
if (0 !== $arr['errors']) {
108+
throw new InvalidDomain(sprintf('The host `%s` is invalid : %s', $domain, self::getIdnErrors($arr['errors'])));
109+
}
110+
111+
if (false === strpos($output, '%')) {
107112
return $output;
108113
}
109114

110-
throw new InvalidDomain(sprintf('The host `%s` is invalid : %s', $domain, self::getIdnErrors($arr['errors'])));
115+
throw new InvalidDomain(sprintf('The host `%s` is invalid: it contains invalid characters', $domain));
111116
}
112117

113118
/**
@@ -120,7 +125,7 @@ private function idnToAscii(string $domain): string
120125
private function idnToUnicode(string $domain): string
121126
{
122127
$output = idn_to_utf8($domain, 0, INTL_IDNA_VARIANT_UTS46, $arr);
123-
if (!$arr['errors']) {
128+
if (0 === $arr['errors']) {
124129
return $output;
125130
}
126131

@@ -189,16 +194,8 @@ private function setLabels($domain = null): array
189194
throw new InvalidDomain(sprintf('The domain `%s` is invalid: the labels are malformed', $domain));
190195
}
191196

192-
//if a domain name contains UTF-8 chars it must be convertible using IDNA UTS46
193-
$ascii_domain = idn_to_ascii($formatted_domain, 0, INTL_IDNA_VARIANT_UTS46, $arr);
194-
if (0 !== $arr['errors']) {
195-
throw new InvalidDomain(sprintf('The domain `%s` is invalid : %s', $domain, self::getIdnErrors($arr['errors'])));
196-
}
197-
198-
if (false === strpos($ascii_domain, '%')) {
199-
return array_reverse(explode('.', $this->idnToUnicode($ascii_domain)));
200-
}
197+
$ascii_domain = $this->idnToAscii($domain);
201198

202-
throw new InvalidDomain(sprintf('The domain `%s` is invalid: it contains invalid characters', $domain));
199+
return array_reverse(explode('.', $this->idnToUnicode($ascii_domain)));
203200
}
204201
}

src/Installer.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ private static function getVendorPath(Event $event = null)
9999
return $event->getComposer()->getConfig()->get('vendor-dir');
100100
}
101101

102-
for ($i = 2; $i <= 5; $i++) {
102+
for ($i = 1; $i <= 5; $i++) {
103103
if (is_dir($vendor = dirname(__DIR__, $i).'/vendor')) {
104104
return $vendor;
105105
}

src/Manager.php

Lines changed: 79 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
use Pdp\Exception\CouldNotLoadTLDs;
2323
use Psr\SimpleCache\CacheInterface;
2424
use TypeError;
25+
use const DATE_ATOM;
2526
use const FILTER_VALIDATE_INT;
2627
use const JSON_ERROR_NONE;
2728
use function filter_var;
@@ -77,105 +78,82 @@ public function __construct(CacheInterface $cache, HttpClient $http, $ttl = null
7778
{
7879
$this->cache = $cache;
7980
$this->http = $http;
80-
$this->ttl = $this->setTtl($ttl);
81+
$this->ttl = $this->filterTtl($ttl);
8182
$this->converter = new Converter();
8283
}
8384

84-
/**
85-
* set the cache TTL.
86-
*
87-
* @return DateInterval|null
88-
*/
89-
private function setTtl($ttl)
90-
{
91-
if ($ttl instanceof DateInterval || null === $ttl) {
92-
return $ttl;
93-
}
94-
95-
if ($ttl instanceof DateTimeInterface) {
96-
return (new DateTimeImmutable('now', $ttl->getTimezone()))->diff($ttl);
97-
}
98-
99-
if (false !== ($res = filter_var($ttl, FILTER_VALIDATE_INT))) {
100-
return new DateInterval('PT'.$res.'S');
101-
}
102-
103-
if (is_string($ttl)) {
104-
return DateInterval::createFromDateString($ttl);
105-
}
106-
107-
throw new TypeError(sprintf(
108-
'The ttl must an integer, a string or a DateInterval object %s given',
109-
is_object($ttl) ? get_class($ttl) : gettype($ttl)
110-
));
111-
}
112-
11385
/**
11486
* Gets the Public Suffix List Rules.
11587
*
88+
* @param null|mixed $ttl
89+
*
11690
* @throws CouldNotLoadRules If the PSL rules can not be loaded
11791
*/
118-
public function getRules(string $url = self::PSL_URL): Rules
92+
public function getRules(string $url = self::PSL_URL, $ttl = null): Rules
11993
{
120-
$cacheKey = $this->getCacheKey('PSL', $url);
121-
$cacheRules = $this->cache->get($cacheKey);
94+
$key = $this->getCacheKey('PSL', $url);
95+
$data = $this->cache->get($key);
12296

123-
if (null === $cacheRules && !$this->refreshRules($url)) {
97+
if (null === $data && !$this->refreshRules($url, $ttl)) {
12498
throw new CouldNotLoadRules(sprintf('Unable to load the public suffix list rules for %s', $url));
12599
}
126100

127-
$rules = json_decode($cacheRules ?? $this->cache->get($cacheKey), true);
101+
$data = json_decode($data ?? $this->cache->get($key), true);
128102
if (JSON_ERROR_NONE === json_last_error()) {
129-
return new Rules($rules);
103+
return new Rules($data);
130104
}
131105

132106
throw new CouldNotLoadRules('The public suffix list cache is corrupted: '.json_last_error_msg(), json_last_error());
133107
}
134108

135-
/**
136-
* Returns the cache key according to the source URL.
137-
*/
138-
private function getCacheKey(string $prefix, string $str): string
139-
{
140-
return $prefix.'_FULL_'.md5(strtolower($str));
141-
}
142-
143109
/**
144110
* Downloads, converts and cache the Public Suffix.
145111
*
146112
* If a local cache already exists, it will be overwritten.
147113
*
148114
* Returns true if the refresh was successful
115+
*
116+
* @param null|mixed $ttl
149117
*/
150-
public function refreshRules(string $url = self::PSL_URL): bool
118+
public function refreshRules(string $url = self::PSL_URL, $ttl = null): bool
151119
{
152-
$body = $this->http->getContent($url);
153-
$cacheData = $this->converter->convert($body);
154-
$cacheKey = $this->getCacheKey('PSL', $url);
120+
$data = $this->converter->convert($this->http->getContent($url));
121+
$key = $this->getCacheKey('PSL', $url);
122+
$ttl = $this->filterTtl($ttl) ?? $this->ttl;
155123

156-
return $this->cache->set($cacheKey, json_encode($cacheData), $this->ttl);
124+
return $this->cache->set($key, json_encode($data), $ttl);
157125
}
158126

159127
/**
160128
* Gets the Public Suffix List Rules.
161129
*
130+
* @param null|mixed $ttl
131+
*
162132
* @throws Exception If the Top Level Domains can not be returned
163133
*/
164-
public function getTLDs(string $url = self::RZD_URL): TopLevelDomains
134+
public function getTLDs(string $url = self::RZD_URL, $ttl = null): TopLevelDomains
165135
{
166-
$cacheKey = $this->getCacheKey('RZD', $url);
167-
$cacheList = $this->cache->get($cacheKey);
136+
$key = $this->getCacheKey('RZD', $url);
137+
$data = $this->cache->get($key);
168138

169-
if (null === $cacheList && !$this->refreshTLDs($url)) {
139+
if (null === $data && !$this->refreshTLDs($url, $ttl)) {
170140
throw new CouldNotLoadTLDs(sprintf('Unable to load the root zone database from %s', $url));
171141
}
172142

173-
$data = json_decode($cacheList ?? $this->cache->get($cacheKey), true);
174-
if (JSON_ERROR_NONE === json_last_error()) {
175-
return TopLevelDomains::createFromArray($data);
143+
$data = json_decode($data ?? $this->cache->get($key), true);
144+
if (JSON_ERROR_NONE !== json_last_error()) {
145+
throw new CouldNotLoadTLDs('The root zone database cache is corrupted: '.json_last_error_msg(), json_last_error());
146+
}
147+
148+
if (!isset($data['records'], $data['version'], $data['update'])) {
149+
throw new CouldNotLoadTLDs(sprintf('The root zone database cache content is corrupted'));
176150
}
177151

178-
throw new CouldNotLoadTLDs('The root zone database cache is corrupted: '.json_last_error_msg(), json_last_error());
152+
return new TopLevelDomains(
153+
$data['records'],
154+
$data['version'],
155+
DateTimeImmutable::createFromFormat(DATE_ATOM, $data['update'])
156+
);
179157
}
180158

181159
/**
@@ -185,14 +163,53 @@ public function getTLDs(string $url = self::RZD_URL): TopLevelDomains
185163
*
186164
* Returns true if the refresh was successful
187165
*
166+
* @param null|mixed $ttl
167+
*
188168
* @throws Exception if the source is not validated
189169
*/
190-
public function refreshTLDs(string $url = self::RZD_URL): bool
170+
public function refreshTLDs(string $url = self::RZD_URL, $ttl = null): bool
171+
{
172+
$data = $this->converter->convertRootZoneDatabase($this->http->getContent($url));
173+
$key = $this->getCacheKey('RZD', $url);
174+
$ttl = $this->filterTtl($ttl) ?? $this->ttl;
175+
176+
return $this->cache->set($key, json_encode($data), $ttl);
177+
}
178+
179+
/**
180+
* set the cache TTL.
181+
*
182+
* @return DateInterval|null
183+
*/
184+
private function filterTtl($ttl)
191185
{
192-
$body = $this->http->getContent($url);
193-
$cacheData = $this->converter->convertRootZoneDatabase($body);
194-
$cacheKey = $this->getCacheKey('RZD', $url);
186+
if ($ttl instanceof DateInterval || null === $ttl) {
187+
return $ttl;
188+
}
189+
190+
if ($ttl instanceof DateTimeInterface) {
191+
return (new DateTimeImmutable('now', $ttl->getTimezone()))->diff($ttl);
192+
}
193+
194+
if (false !== ($res = filter_var($ttl, FILTER_VALIDATE_INT))) {
195+
return new DateInterval('PT'.$res.'S');
196+
}
197+
198+
if (is_string($ttl)) {
199+
return DateInterval::createFromDateString($ttl);
200+
}
201+
202+
throw new TypeError(sprintf(
203+
'The ttl must an integer, a string or a DateInterval object %s given',
204+
is_object($ttl) ? get_class($ttl) : gettype($ttl)
205+
));
206+
}
195207

196-
return $this->cache->set($cacheKey, json_encode($cacheData), $this->ttl);
208+
/**
209+
* Returns the cache key according to the source URL.
210+
*/
211+
private function getCacheKey(string $prefix, string $str): string
212+
{
213+
return sprintf('%s_FULL_%s', $prefix, md5(strtolower($str)));
197214
}
198215
}

src/TopLevelDomains.php

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -78,17 +78,7 @@ public static function createFromString(string $content): self
7878

7979
$converter = $converter ?? new Converter();
8080

81-
return self::createFromArray($converter->convertRootZoneDatabase($content));
82-
}
83-
84-
/**
85-
* Returns a new instance from a array.
86-
*/
87-
public static function createFromArray(array $data): self
88-
{
89-
if (!isset($data['records'], $data['version'], $data['update'])) {
90-
throw new Exception(sprintf('The provided array must share the same structure returned by %s::toArray method', TopLevelDomains::class));
91-
}
81+
$data = $converter->convertRootZoneDatabase($content);
9282

9383
return new self(
9484
$data['records'],

tests/DomainTest.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ public function provideWrongConstructor()
8787
* @dataProvider invalidDomainProvider
8888
* @covers ::__construct
8989
* @covers ::setLabels
90+
* @covers ::idnToAscii
9091
* @covers ::getIdnErrors
9192
*/
9293
public function testToAsciiThrowsException(string $domain)

tests/ManagerTest.php

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,58 @@ public function has($key)
361361
$manager->getTLDs();
362362
}
363363

364+
/**
365+
* @covers ::getTLDs
366+
*/
367+
public function testGetTLDsThrowsExceptionIfTheCacheContentIsCorrupted()
368+
{
369+
$cachePool = new class() implements CacheInterface {
370+
public function get($key, $default = null)
371+
{
372+
return '{"foo":"bar"}'; //malformed json
373+
}
374+
375+
public function set($key, $value, $ttl = null)
376+
{
377+
return false;
378+
}
379+
380+
public function delete($key)
381+
{
382+
return true;
383+
}
384+
385+
public function clear()
386+
{
387+
return true;
388+
}
389+
390+
public function getMultiple($keys, $default = null)
391+
{
392+
return [];
393+
}
394+
395+
public function setMultiple($values, $ttl = null)
396+
{
397+
return true;
398+
}
399+
public function deleteMultiple($keys)
400+
{
401+
return true;
402+
}
403+
404+
public function has($key)
405+
{
406+
return true;
407+
}
408+
};
409+
410+
self::expectException(CouldNotLoadTLDs::class);
411+
$manager = new Manager($cachePool, new CurlHttpClient());
412+
$manager->getTLDs();
413+
}
414+
415+
364416
/**
365417
* @covers \Pdp\Converter::convert
366418
* @covers \Pdp\Converter::getSection

0 commit comments

Comments
 (0)