Skip to content

Commit 6a85787

Browse files
committed
Decouple TLD conversion from PSL conversion
1 parent e45496a commit 6a85787

11 files changed

+199
-138
lines changed

.php_cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ return PhpCsFixer\Config::create()
4343
'phpdoc_scalar' => true,
4444
'phpdoc_to_comment' => true,
4545
'phpdoc_summary' => true,
46-
'psr0' => true,
4746
'psr4' => true,
4847
'return_type_declaration' => ['space_before' => 'none'],
4948
'single_blank_line_before_namespace' => true,

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ All Notable changes to `PHP Domain Parser` **5.x** series will be documented in
77
### Added
88

99
- `Pdp\TopLevelDomains` to allow resolving domain againts IANA Root zone database
10+
- `Pdp\TLDConverter` converts the IANA Root Zones database into an associative array
1011
- `Pdp\Manager::getTLDs` a service to return a cache version of the IANA Root zone database
1112
- `Pdp\Manager::refreshTLDs` a service to refresh the cache version of the IANA Root zone database
12-
- `Pdp\Converter::convertRootZoneDatabase` converts the IANA Root zone database into an associative array
1313
- added a new `$ttl` parameter to improve PSR-16 supports to
1414
- `Pdp\Manager::__construct`
1515
- `Pdp\Manager::getRules`

data/pdp-PSL_FULL_5a3cc7f81795bb2e48e848af42d287b4.cache

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

data/pdp-RZD_FULL_f18a70477d29d525b9220612e2115345.cache

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

src/Converter.php

Lines changed: 0 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,12 @@
1515

1616
namespace Pdp;
1717

18-
use DateTimeImmutable;
1918
use SplTempFileObject;
20-
use const DATE_ATOM;
2119
use function array_pop;
2220
use function explode;
2321
use function preg_match;
24-
use function sprintf;
2522
use function strpos;
2623
use function substr;
27-
use function trim;
2824

2925
/**
3026
* Public Suffix List Parser.
@@ -128,56 +124,4 @@ private function addRule(array $list, array $rule_parts): array
128124

129125
return $list;
130126
}
131-
/**
132-
* Converts the IANA Root Zone Database into a TopLevelDomains collection object.
133-
*/
134-
public function convertRootZoneDatabase(string $content): array
135-
{
136-
$header = [];
137-
$records = [];
138-
139-
$file = new SplTempFileObject();
140-
$file->fwrite($content);
141-
$file->setFlags(SplTempFileObject::DROP_NEW_LINE | SplTempFileObject::READ_AHEAD | SplTempFileObject::SKIP_EMPTY);
142-
foreach ($file as $line) {
143-
$line_content = trim($line);
144-
if (false === strpos($line_content, '#')) {
145-
$records[] = $this->idnToAscii($line_content);
146-
continue;
147-
}
148-
149-
if ([] === $header) {
150-
$header = $this->getHeaderInfo($line_content);
151-
continue;
152-
}
153-
154-
throw new Exception(sprintf('Invalid Version line: %s', $line_content));
155-
}
156-
157-
if ([] === $records || [] === $header) {
158-
throw new Exception(sprintf('No TLD or Version header found'));
159-
}
160-
161-
$header['records'] = $records;
162-
163-
return $header;
164-
}
165-
166-
/**
167-
* Extract IANA Root Zone Database header info.
168-
*/
169-
private function getHeaderInfo(string $content): array
170-
{
171-
if (preg_match('/^\# Version (?<version>\d+), Last Updated (?<update>.*?)$/', $content, $matches)) {
172-
$date = DateTimeImmutable::createFromFormat('D M d H:i:s Y e', $matches['update']);
173-
$matches['update'] = $date->format(DATE_ATOM);
174-
175-
return [
176-
'version' => $matches['version'],
177-
'update' => $matches['update'],
178-
];
179-
}
180-
181-
throw new Exception(sprintf('Invalid Version line: %s', $content));
182-
}
183127
}

src/Manager.php

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,6 @@ final class Manager
6464
*/
6565
private $ttl;
6666

67-
/**
68-
* @var Converter;
69-
*/
70-
private $converter;
71-
7267
/**
7368
* new instance.
7469
*
@@ -79,7 +74,6 @@ public function __construct(CacheInterface $cache, HttpClient $http, $ttl = null
7974
$this->cache = $cache;
8075
$this->http = $http;
8176
$this->ttl = $this->filterTtl($ttl);
82-
$this->converter = new Converter();
8377
}
8478

8579
/**
@@ -117,11 +111,15 @@ public function getRules(string $url = self::PSL_URL, $ttl = null): Rules
117111
*/
118112
public function refreshRules(string $url = self::PSL_URL, $ttl = null): bool
119113
{
120-
$data = $this->converter->convert($this->http->getContent($url));
121-
$key = $this->getCacheKey('PSL', $url);
122-
$ttl = $this->filterTtl($ttl) ?? $this->ttl;
114+
static $converter;
115+
116+
$converter = $converter ?? new Converter();
123117

124-
return $this->cache->set($key, json_encode($data), $ttl);
118+
return $this->cache->set(
119+
$this->getCacheKey('PSL', $url),
120+
json_encode($converter->convert($this->http->getContent($url))),
121+
$this->filterTtl($ttl) ?? $this->ttl
122+
);
125123
}
126124

127125
/**
@@ -169,11 +167,15 @@ public function getTLDs(string $url = self::RZD_URL, $ttl = null): TopLevelDomai
169167
*/
170168
public function refreshTLDs(string $url = self::RZD_URL, $ttl = null): bool
171169
{
172-
$data = $this->converter->convertRootZoneDatabase($this->http->getContent($url));
173-
$key = $this->getCacheKey('RZD', $url);
174-
$ttl = $this->filterTtl($ttl) ?? $this->ttl;
170+
static $converter;
175171

176-
return $this->cache->set($key, json_encode($data), $ttl);
172+
$converter = $converter ?? new TLDConverter();
173+
174+
return $this->cache->set(
175+
$this->getCacheKey('RZD', $url),
176+
json_encode($converter->convert($this->http->getContent($url))),
177+
$this->filterTtl($ttl) ?? $this->ttl
178+
);
177179
}
178180

179181
/**

src/TLDConverter.php

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
<?php
2+
3+
/**
4+
* PHP Domain Parser: Public Suffix List based URL parsing.
5+
*
6+
* @see http://github.com/jeremykendall/php-domain-parser for the canonical source repository
7+
*
8+
* @copyright Copyright (c) 2017 Jeremy Kendall (http://jeremykendall.net)
9+
*
10+
* For the full copyright and license information, please view the LICENSE
11+
* file that was distributed with this source code.
12+
*/
13+
14+
declare(strict_types=1);
15+
16+
namespace Pdp;
17+
18+
use DateTimeImmutable;
19+
use SplTempFileObject;
20+
use const DATE_ATOM;
21+
use function preg_match;
22+
use function sprintf;
23+
use function strpos;
24+
use function trim;
25+
26+
/**
27+
* IANA Root Zone Database Parser.
28+
*
29+
* This class convert the IANA Root Zone Databas into an associative, multidimensional array
30+
*
31+
* @author Ignace Nyamagana Butera <[email protected]>
32+
*/
33+
final class TLDConverter implements PublicSuffixListSection
34+
{
35+
use IDNAConverterTrait;
36+
37+
/**
38+
* @internal
39+
*/
40+
const IANA_DATE_FORMAT = 'D M d H:i:s Y e';
41+
42+
/**
43+
* Converts the IANA Root Zone Database into a TopLevelDomains associative array.
44+
*/
45+
public function convert(string $content): array
46+
{
47+
$data = [];
48+
$file = new SplTempFileObject();
49+
$file->fwrite($content);
50+
$file->setFlags(SplTempFileObject::DROP_NEW_LINE | SplTempFileObject::READ_AHEAD | SplTempFileObject::SKIP_EMPTY);
51+
foreach ($file as $line) {
52+
$line = trim($line);
53+
if ([] === $data) {
54+
$data = $this->getHeaderInfo($line);
55+
continue;
56+
}
57+
58+
if (false === strpos($line, '#')) {
59+
$data['records'][] = $this->idnToAscii($line);
60+
continue;
61+
}
62+
63+
throw new Exception(sprintf('Invalid line content: %s', $line));
64+
}
65+
66+
if (isset($data['version'], $data['update'], $data['records'])) {
67+
return $data;
68+
}
69+
70+
throw new Exception(sprintf('Invalid content: TLD conversion failed'));
71+
}
72+
73+
/**
74+
* Extract IANA Root Zone Database header info.
75+
*/
76+
private function getHeaderInfo(string $content): array
77+
{
78+
if (!preg_match('/^\# Version (?<version>\d+), Last Updated (?<update>.*?)$/', $content, $matches)) {
79+
throw new Exception(sprintf('Invalid Version line: %s', $content));
80+
}
81+
82+
return [
83+
'version' => $matches['version'],
84+
'update' => DateTimeImmutable::createFromFormat(self::IANA_DATE_FORMAT, $matches['update'])
85+
->format(DATE_ATOM),
86+
];
87+
}
88+
}

src/TopLevelDomains.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,9 @@ public static function createFromString(string $content): self
7676
{
7777
static $converter;
7878

79-
$converter = $converter ?? new Converter();
79+
$converter = $converter ?? new TLDConverter();
8080

81-
$data = $converter->convertRootZoneDatabase($content);
81+
$data = $converter->convert($content);
8282

8383
return new self(
8484
$data['records'],

tests/ManagerTest.php

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
use Pdp\Cache;
2222
use Pdp\Converter;
2323
use Pdp\CurlHttpClient;
24-
use Pdp\Exception;
2524
use Pdp\Exception\CouldNotLoadRules;
2625
use Pdp\Exception\CouldNotLoadTLDs;
2726
use Pdp\Exception\InvalidDomain;
@@ -412,7 +411,6 @@ public function has($key)
412411
$manager->getTLDs();
413412
}
414413

415-
416414
/**
417415
* @covers \Pdp\Converter::convert
418416
* @covers \Pdp\Converter::getSection
@@ -426,17 +424,6 @@ public function testConvertThrowsExceptionWithInvalidContent()
426424
(new Converter())->convert($content);
427425
}
428426

429-
/**
430-
* @covers \Pdp\Converter::convertRootZoneDatabase
431-
* @covers \Pdp\Converter::getHeaderInfo
432-
*/
433-
public function testConvertRootZoneDatabaseThrowsExceptionWithInvalidContent()
434-
{
435-
self::expectException(Exception::class);
436-
$content = file_get_contents(__DIR__.'/data/invalid_suffix_list_content.dat');
437-
(new Converter())->convertRootZoneDatabase($content);
438-
}
439-
440427
/**
441428
* @dataProvider validTtlProvider
442429
*/

tests/TLDConverterTest.php

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
<?php
2+
3+
/**
4+
* PHP Domain Parser: Public Suffix List based URL parsing.
5+
*
6+
* @see http://github.com/jeremykendall/php-domain-parser for the canonical source repository
7+
*
8+
* @copyright Copyright (c) 2017 Jeremy Kendall (http://jeremykendall.net)
9+
*
10+
* For the full copyright and license information, please view the LICENSE
11+
* file that was distributed with this source code.
12+
*/
13+
14+
declare(strict_types=1);
15+
16+
namespace Pdp\Tests;
17+
18+
use Pdp\Exception;
19+
use Pdp\TLDConverter;
20+
use PHPUnit\Framework\TestCase;
21+
22+
/**
23+
* @coversDefaultClass Pdp\TopLevelDomains
24+
*/
25+
class TLDConverterTest extends TestCase
26+
{
27+
public function testConverter()
28+
{
29+
$string = file_get_contents(__DIR__.'/data/root_zones.dat');
30+
$res = (new TLDConverter())->convert($string);
31+
self::assertInternalType('array', $res);
32+
self::assertArrayHasKey('version', $res);
33+
self::assertArrayHasKey('update', $res);
34+
self::assertArrayHasKey('records', $res);
35+
}
36+
37+
/**
38+
* @dataProvider invalidContentProvider
39+
*
40+
*/
41+
public function testConverterThrowsException(string $content)
42+
{
43+
self::expectException(Exception::class);
44+
(new TLDConverter())->convert($content);
45+
}
46+
47+
/**
48+
* @covers ::convert
49+
* @covers ::getHeaderInfo
50+
*/
51+
public function invalidContentProvider()
52+
{
53+
$double_header = <<<EOF
54+
# Version 2018082200, Last Updated Wed Aug 22 07:07:01 2018 UTC
55+
FOO
56+
BAR
57+
# Version 2018082200, Last Updated Wed Aug 22 07:07:01 2018 UTC
58+
ABARTH
59+
ABB
60+
ABBOTT
61+
ABBVIE
62+
EOF;
63+
64+
$invalid_header = <<<EOF
65+
# Version 2018082200
66+
FOO
67+
BAR
68+
EOF;
69+
70+
$header_no_first_line = <<<EOF
71+
FOO
72+
BAR
73+
# Version 2018082200, Last Updated Wed Aug 22 07:07:01 2018 UTC
74+
ABARTH
75+
ABB
76+
ABBOTT
77+
ABBVIE
78+
EOF;
79+
80+
return [
81+
'double header' => [$double_header],
82+
'invalid header' => [$invalid_header],
83+
'empty content' => [''],
84+
'header not on the first line' => [$header_no_first_line],
85+
];
86+
}
87+
}

0 commit comments

Comments
 (0)