Skip to content

Commit 058dda2

Browse files
committed
Improve the Pull Request
- Changed constants suffix from `_SECTION` to `_DOMAINS` - Added `ALL_DOMAINS` constant to refer to the full PSL - `PublicSuffixListMananger::getList` optional parameter default value is now `PublicSuffixListMananger::ALL_DOMAINS` - Removed `PublicSuffixListMananger::$list` protected property - Marked as deprecated `PublicSuffixListMananger::parseListToArray` - Refactored PSL parsing to generate the 3 cache files with only one round - Reverted `PublicSuffixListMananger::write` changes because this is out of scope for this PR.
1 parent 7e9a987 commit 058dda2

File tree

2 files changed

+133
-87
lines changed

2 files changed

+133
-87
lines changed

src/Pdp/PublicSuffixListManager.php

Lines changed: 114 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
*/
1111
namespace Pdp;
1212

13+
use SplFileObject;
14+
1315
/**
1416
* Public Suffix List Manager.
1517
*
@@ -18,13 +20,14 @@
1820
*/
1921
class PublicSuffixListManager
2022
{
23+
const ALL_DOMAINS = 'ALL';
2124
const PDP_PSL_TEXT_FILE = 'public-suffix-list.txt';
2225
const PDP_PSL_PHP_FILE = 'public-suffix-list.php';
2326

24-
const ICANN_SECTION = 'ICANN';
27+
const ICANN_DOMAINS = 'ICANN';
2528
const ICANN_PSL_PHP_FILE = 'icann-public-suffix-list.php';
2629

27-
const PRIVATE_SECTION = 'PRIVATE';
30+
const PRIVATE_DOMAINS = 'PRIVATE';
2831
const PRIVATE_PSL_PHP_FILE = 'private-public-suffix-list.php';
2932

3033
/**
@@ -40,7 +43,11 @@ class PublicSuffixListManager
4043
/**
4144
* @var PublicSuffixList Public Suffix List
4245
*/
43-
protected $list;
46+
protected static $domainList = array(
47+
self::ALL_DOMAINS => self::PDP_PSL_PHP_FILE,
48+
self::ICANN_DOMAINS => self::ICANN_PSL_PHP_FILE,
49+
self::PRIVATE_DOMAINS => self::PRIVATE_PSL_PHP_FILE,
50+
);
4451

4552
/**
4653
* @var \Pdp\HttpAdapter\HttpAdapterInterface Http adapter
@@ -71,21 +78,10 @@ public function refreshPublicSuffixList()
7178
{
7279
$this->fetchListFromSource();
7380
$cacheFile = $this->cacheDir . '/' . self::PDP_PSL_TEXT_FILE;
74-
75-
$this->varExportToFile(
76-
self::PDP_PSL_PHP_FILE,
77-
$this->parseListToArray($cacheFile)
78-
);
79-
80-
$this->varExportToFile(
81-
self::ICANN_PSL_PHP_FILE,
82-
$this->parseSectionToArray(self::ICANN_SECTION, $cacheFile)
83-
);
84-
85-
$this->varExportToFile(
86-
self::PRIVATE_PSL_PHP_FILE,
87-
$this->parseSectionToArray(self::PRIVATE_SECTION, $cacheFile)
88-
);
81+
$publicSuffixListArray = $this->convertListToArray($cacheFile);
82+
foreach ($publicSuffixListArray as $domain => $data) {
83+
$this->varExportToFile(self::$domainList[$domain], $data);
84+
}
8985
}
9086

9187
/**
@@ -104,42 +100,122 @@ public function fetchListFromSource()
104100
/**
105101
* Parses text representation of list to associative, multidimensional array.
106102
*
107-
* This method is based heavily on the code found in generateEffectiveTLDs.php
103+
* @param string $textFile Public Suffix List text filename
108104
*
109-
* @link https://github.com/usrflo/registered-domain-libs/blob/master/generateEffectiveTLDs.php
110-
* A copy of the Apache License, Version 2.0, is provided with this
111-
* distribution
105+
* @return array Associative, multidimensional array representation of the
106+
* public suffx list
107+
*/
108+
protected function convertListToArray($textFile)
109+
{
110+
$addDomain = array(
111+
self::ICANN_DOMAINS => false,
112+
self::PRIVATE_DOMAINS => false,
113+
);
114+
115+
$publicSuffixListArray = array(
116+
self::ALL_DOMAINS => array(),
117+
self::ICANN_DOMAINS => array(),
118+
self::PRIVATE_DOMAINS => array(),
119+
);
120+
121+
$data = new SplFileObject($textFile);
122+
$data->setFlags(SplFileObject::DROP_NEW_LINE | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
123+
foreach ($data as $line) {
124+
$addDomain = $this->validateDomainAddition($line, $addDomain);
125+
if (strstr($line, '//') !== false) {
126+
continue;
127+
}
128+
$publicSuffixListArray = $this->convertLineToArray($line, $publicSuffixListArray, $addDomain);
129+
}
130+
131+
return $publicSuffixListArray;
132+
}
133+
134+
/**
135+
* Update the addition status for a given line against the domain list (ICANN and PRIVATE).
112136
*
113-
* @param string $textFile Public Suffix List text filename
137+
* @param string $line the current file line
138+
* @param array $addDomain the domain addition status
139+
*/
140+
protected function validateDomainAddition($line, array $addDomain)
141+
{
142+
foreach ($addDomain as $section => $status) {
143+
$addDomain[$section] = $this->isValidSection($status, $line, $section);
144+
}
145+
146+
return $addDomain;
147+
}
148+
149+
/**
150+
* Tell whether the line can be converted for a given domain.
151+
*
152+
* @param bool $previousStatus the previous status
153+
* @param string $line the current file line
154+
* @param string $section the section to be considered
155+
*
156+
* @return bool
157+
*/
158+
protected function isValidSection($previousStatus, $line, $section)
159+
{
160+
if (!$previousStatus && 0 === strpos($line, '// ===BEGIN ' . $section . ' DOMAINS===')) {
161+
return true;
162+
}
163+
164+
if ($previousStatus && 0 === strpos($line, '// ===END ' . $section . ' DOMAINS===')) {
165+
return false;
166+
}
167+
168+
return $previousStatus;
169+
}
170+
171+
/**
172+
* Convert a line from the Public Suffix list.
173+
*
174+
* @param string $textLine Public Suffix List text line
175+
* @param array $publicSuffixListArray Associative, multidimensional array representation of the
176+
* public suffx list
177+
* @param array $addDomain Tell which section should be converted
114178
*
115179
* @return array Associative, multidimensional array representation of the
116180
* public suffx list
117181
*/
118-
public function parseListToArray($textFile)
182+
protected function convertLineToArray($textLine, array $publicSuffixListArray, array $addDomain)
119183
{
120-
return $this->parseSectionToArray('', $textFile);
184+
$ruleParts = explode('.', $textLine);
185+
$this->buildArray($publicSuffixListArray[self::ALL_DOMAINS], $ruleParts);
186+
$domainNames = array_keys(array_filter($addDomain));
187+
foreach ($domainNames as $domainName) {
188+
$this->buildArray($publicSuffixListArray[$domainName], $ruleParts);
189+
}
190+
191+
return $publicSuffixListArray;
121192
}
122193

123194
/**
124-
* Parses text representation of the Public suffix list to associative, multidimensional array.
195+
* Parses text representation of list to associative, multidimensional array.
125196
*
126197
* This method is based heavily on the code found in generateEffectiveTLDs.php
127198
*
199+
* DEPRECATION WARNING! This method will be removed in the next major point release
200+
*
201+
* @deprecated deprecated since version 3.1.0
128202
* @link https://github.com/usrflo/registered-domain-libs/blob/master/generateEffectiveTLDs.php
129203
* A copy of the Apache License, Version 2.0, is provided with this
130204
* distribution
131205
*
132-
* @param string $section Public Suffix List section name
133206
* @param string $textFile Public Suffix List text filename
134207
*
135208
* @return array Associative, multidimensional array representation of the
136209
* public suffx list
137210
*/
138-
protected function parseSectionToArray($section, $textFile)
211+
public function parseListToArray($textFile)
139212
{
140-
$publicSuffixListArray = array();
141213
$data = file($textFile, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
142-
$filter = $this->getLineFilter($section);
214+
$filter = function ($line) {
215+
return strstr($line, '//') === false;
216+
};
217+
218+
$publicSuffixListArray = array();
143219
foreach (array_filter($data, $filter) as $line) {
144220
$ruleParts = explode('.', $line);
145221
$this->buildArray($publicSuffixListArray, $ruleParts);
@@ -148,34 +224,6 @@ protected function parseSectionToArray($section, $textFile)
148224
return $publicSuffixListArray;
149225
}
150226

151-
/**
152-
* Return the PSL line filter.
153-
*
154-
* @param string $section Public Suffix List section name
155-
*
156-
* @return Closure
157-
*/
158-
protected function getLineFilter($section)
159-
{
160-
$section = trim($section);
161-
$add = empty($section);
162-
if ($add) {
163-
return function ($line) {
164-
return strstr($line, '//') === false;
165-
};
166-
}
167-
168-
return function ($line) use (&$add, $section) {
169-
if (!$add && 0 === strpos($line, '// ===BEGIN ' . $section . ' DOMAINS===')) {
170-
$add = true;
171-
} elseif ($add && 0 === strpos($line, '// ===END ' . $section . ' DOMAINS===')) {
172-
$add = false;
173-
}
174-
175-
return $add && strstr($line, '//') === false;
176-
};
177-
}
178-
179227
/**
180228
* Recursive method to build the array representation of the Public Suffix List.
181229
*
@@ -250,26 +298,19 @@ protected function varExportToFile($basename, array $input)
250298
/**
251299
* Gets Public Suffix List.
252300
*
253-
* @param string|null $section the Public Suffix List type
301+
* @param string $list the Public Suffix List type
254302
*
255303
* @return PublicSuffixList Instance of Public Suffix List
256304
*/
257-
public function getList($section = null)
305+
public function getList($list = self::ALL_DOMAINS)
258306
{
259-
$sectionList = array(
260-
self::ICANN_SECTION => self::ICANN_PSL_PHP_FILE,
261-
self::PRIVATE_SECTION => self::PRIVATE_PSL_PHP_FILE,
262-
);
263-
264-
$cacheBasename = isset($sectionList[$section]) ? $sectionList[$section] : self::PDP_PSL_PHP_FILE;
265-
$psl_php_file = $this->cacheDir . '/' . $cacheBasename;
266-
if (!file_exists($psl_php_file)) {
307+
$cacheBasename = isset(self::$domainList[$list]) ? self::$domainList[$list] : self::PDP_PSL_PHP_FILE;
308+
$cacheFile = $this->cacheDir . '/' . $cacheBasename;
309+
if (!file_exists($cacheFile)) {
267310
$this->refreshPublicSuffixList();
268311
}
269312

270-
$this->list = new PublicSuffixList($psl_php_file);
271-
272-
return $this->list;
313+
return new PublicSuffixList($cacheFile);
273314
}
274315

275316
/**
@@ -285,15 +326,12 @@ public function getList($section = null)
285326
protected function write($filename, $data)
286327
{
287328
$path = $this->cacheDir . '/' . $filename;
288-
$level = error_reporting(0);
289-
$result = file_put_contents($path, $data);
290-
error_reporting($level);
329+
$result = @file_put_contents($path, $data);
291330
if ($result !== false) {
292331
return $result;
293332
}
294-
$error = error_get_last();
295333

296-
throw new \Exception(sprintf("Cannot write '%s' : %s", $path, $error['message']));
334+
throw new \Exception(sprintf("Cannot write '%s'", $path));
297335
}
298336

299337
/**

tests/src/Pdp/PublicSuffixListManagerTest.php

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -218,16 +218,24 @@ public function testGetProvidedListFromDefaultCacheDir()
218218
public function testGetDifferentPublicList()
219219
{
220220
$listManager = new PublicSuffixListManager();
221-
$publicSuffixList = $listManager->getList();
222-
$icannSuffixList = $listManager->getList(PublicSuffixListManager::ICANN_SECTION);
223-
$privateSuffixList = $listManager->getList(PublicSuffixListManager::PRIVATE_SECTION);
224-
$invalidSuffixList = $listManager->getList('invalid type');
225-
$this->assertInstanceOf('\Pdp\PublicSuffixList', $icannSuffixList);
226-
$this->assertInstanceOf('\Pdp\PublicSuffixList', $privateSuffixList);
227-
$this->assertInstanceOf('\Pdp\PublicSuffixList', $invalidSuffixList);
228-
$this->assertEquals($invalidSuffixList, $publicSuffixList);
229-
$this->assertNotEquals($privateSuffixList, $icannSuffixList);
230-
$this->assertNotEquals($publicSuffixList, $icannSuffixList);
231-
$this->assertNotEquals($publicSuffixList, $privateSuffixList);
221+
$publicList = $listManager->getList();
222+
$invalidList = $listManager->getList('invalid type');
223+
$this->assertEquals($publicList, $invalidList);
224+
}
225+
226+
public function testParserWithDifferentPublicList()
227+
{
228+
$listManager = new PublicSuffixListManager();
229+
$icannList = $listManager->getList(PublicSuffixListManager::ICANN_DOMAINS);
230+
$privateList = $listManager->getList(PublicSuffixListManager::PRIVATE_DOMAINS);
231+
$host = 'thephpleague.github.io';
232+
233+
$icannParser = new Parser($icannList);
234+
$icannSubdomain = $icannParser->parseHost($host)->getSubdomain();
235+
$this->assertSame('thephpleague', $icannSubdomain);
236+
237+
$privateParser = new Parser($privateList);
238+
$privateSubdomain = $privateParser->parseHost($host)->getSubdomain();
239+
$this->assertSame(null, $privateSubdomain);
232240
}
233241
}

0 commit comments

Comments
 (0)