Skip to content

Commit bafb754

Browse files
committed
Merge pull request #31 from jeremykendall/feature/idn-support
Adds IDNA support
2 parents a84c708 + 551aa8a commit bafb754

File tree

7 files changed

+124
-29
lines changed

7 files changed

+124
-29
lines changed

.travis.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,5 @@ php:
99
script: phpunit
1010

1111
before_script:
12-
- composer self-update
13-
- composer update
12+
- composer install
1413
- ./bin/pdp-psl

data/public-suffix-list.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20287,4 +20287,4 @@
2028720287
'cern' =>
2028820288
array (
2028920289
),
20290-
);
20290+
);

library/Pdp/Parser.php

Lines changed: 57 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -57,15 +57,11 @@ public function parseUrl($url)
5757
'fragment' => null,
5858
);
5959

60-
if (preg_match(self::SCHEME_PATTERN, $url, $schemeMatches) === 0) {
60+
if (preg_match(self::SCHEME_PATTERN, $url) === 0) {
6161
$url = 'http://' . preg_replace('#^//#', '', $url, 1);
6262
}
6363

64-
$parts = parse_url($url);
65-
66-
if ($parts === false) {
67-
throw new \InvalidArgumentException(sprintf('Invalid url %s', $url));
68-
}
64+
$parts = $this->mbParseUrl($url);
6965

7066
$elem = (array) $parts + $elem;
7167

@@ -91,6 +87,8 @@ public function parseUrl($url)
9187
*/
9288
public function parseHost($host)
9389
{
90+
$host = mb_strtolower($host, 'UTF-8');
91+
9492
$subdomain = null;
9593
$registerableDomain = null;
9694
$publicSuffix = null;
@@ -130,7 +128,6 @@ public function getPublicSuffix($host)
130128
return null;
131129
}
132130

133-
$host = strtolower($host);
134131
$parts = array_reverse(explode('.', $host));
135132
$publicSuffix = array();
136133
$publicSuffixList = $this->publicSuffixList;
@@ -185,7 +182,13 @@ public function getRegisterableDomain($host)
185182
return null;
186183
}
187184

188-
$host = strtolower($host);
185+
$punycoded = (strpos($host, 'xn--') !== false);
186+
187+
if ($punycoded) {
188+
$host = idn_to_utf8($host);
189+
}
190+
191+
$host = mb_strtolower($host, 'UTF-8');
189192
$publicSuffix = $this->getPublicSuffix($host);
190193

191194
if ($publicSuffix === null || $host == $publicSuffix) {
@@ -196,7 +199,13 @@ public function getRegisterableDomain($host)
196199
$hostParts = array_reverse(explode('.', $host));
197200
$registerableDomainParts = array_slice($hostParts, 0, count($publicSuffixParts) + 1);
198201

199-
return implode('.', array_reverse($registerableDomainParts));
202+
$registerableDomain = implode('.', array_reverse($registerableDomainParts));
203+
204+
if ($punycoded) {
205+
$registerableDomain = idn_to_ascii($registerableDomain);
206+
}
207+
208+
return $registerableDomain;
200209
}
201210

202211
/**
@@ -207,7 +216,6 @@ public function getRegisterableDomain($host)
207216
*/
208217
public function getSubdomain($host)
209218
{
210-
$host = strtolower($host);
211219
$registerableDomain = $this->getRegisterableDomain($host);
212220

213221
if ($registerableDomain === null || $host == $registerableDomain) {
@@ -221,4 +229,43 @@ public function getSubdomain($host)
221229
return implode('.', array_reverse($subdomainParts));
222230
}
223231

232+
/**
233+
* UTF-8 aware parse_url() replacement.
234+
*
235+
* Taken from php.net manual comments {@link http://php.net/manual/en/function.parse-url.php#114817}
236+
*
237+
* @param string $url The URL to parse
238+
* @param integer $component Specify one of PHP_URL_SCHEME, PHP_URL_HOST,
239+
* PHP_URL_PORT, PHP_URL_USER, PHP_URL_PASS, PHP_URL_PATH, PHP_URL_QUERY or
240+
* PHP_URL_FRAGMENT to retrieve just a specific URL component as a string
241+
* (except when PHP_URL_PORT is given, in which case the return value will
242+
* be an integer).
243+
* @return mixed See parse_url documentation {@link http://us1.php.net/parse_url}
244+
*/
245+
public function mbParseUrl($url, $component = -1)
246+
{
247+
$enc_url = preg_replace_callback(
248+
'%[^:/@?&=#]+%usD',
249+
function ($matches) {
250+
return urlencode($matches[0]);
251+
},
252+
$url
253+
);
254+
255+
$parts = parse_url($enc_url, $component);
256+
257+
if ($parts === false) {
258+
throw new \InvalidArgumentException(sprintf('Invalid url %s', $url));
259+
}
260+
261+
if (is_array($parts)) {
262+
foreach ($parts as $name => $value) {
263+
$parts[$name] = urldecode($value);
264+
}
265+
} else {
266+
$parts = urldecode($parts);
267+
}
268+
269+
return $parts;
270+
}
224271
}

library/Pdp/Uri/Url.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ public function __toString()
134134
$host = $this->host->__toString();
135135

136136
if ($host) {
137-
$url .= urlencode($host);
137+
$url .= $host;
138138
}
139139

140140
if ($this->port) {

tests/library/Pdp/CheckPublicSuffixTest.php

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
namespace Pdp;
44

55
/**
6-
* This test case is based on the test data linked at
6+
* This test case is based on the test data linked at
77
* http://publicsuffix.org/list/ and provided by Rob Strading of Comodo.
8-
* @link
8+
* @link
99
* http://mxr.mozilla.org/mozilla-central/source/netwerk/test/unit/data/test_psl.txt?raw=1
1010
*/
1111
class CheckPublicSuffixTest extends \PHPUnit_Framework_TestCase
@@ -27,6 +27,8 @@ public function testPublicSuffixSpec()
2727
{
2828
// Test data from Rob Stradling at Comodo
2929
// http://mxr.mozilla.org/mozilla-central/source/netwerk/test/unit/data/test_psl.txt?raw=1
30+
// Any copyright is dedicated to the Public Domain.
31+
// http://creativecommons.org/publicdomain/zero/1.0/
3032

3133
// null input.
3234
$this->checkPublicSuffix(null, null);
@@ -99,19 +101,39 @@ public function testPublicSuffixSpec()
99101
$this->checkPublicSuffix('k12.ak.us', null);
100102
$this->checkPublicSuffix('test.k12.ak.us', 'test.k12.ak.us');
101103
$this->checkPublicSuffix('www.test.k12.ak.us', 'test.k12.ak.us');
104+
// IDN labels.
105+
$this->checkPublicSuffix('食狮.com.cn', '食狮.com.cn');
106+
$this->checkPublicSuffix('食狮.公司.cn', '食狮.公司.cn');
107+
$this->checkPublicSuffix('www.食狮.公司.cn', '食狮.公司.cn');
108+
$this->checkPublicSuffix('shishi.公司.cn', 'shishi.公司.cn');
109+
$this->checkPublicSuffix('公司.cn', null);
110+
$this->checkPublicSuffix('食狮.中国', '食狮.中国');
111+
$this->checkPublicSuffix('www.食狮.中国', '食狮.中国');
112+
$this->checkPublicSuffix('shishi.中国', 'shishi.中国');
113+
$this->checkPublicSuffix('中国', null);
114+
// Same as above, but punycoded.
115+
$this->checkPublicSuffix('xn--85x722f.com.cn', 'xn--85x722f.com.cn');
116+
$this->checkPublicSuffix('xn--85x722f.xn--55qx5d.cn', 'xn--85x722f.xn--55qx5d.cn');
117+
$this->checkPublicSuffix('www.xn--85x722f.xn--55qx5d.cn', 'xn--85x722f.xn--55qx5d.cn');
118+
$this->checkPublicSuffix('shishi.xn--55qx5d.cn', 'shishi.xn--55qx5d.cn');
119+
$this->checkPublicSuffix('xn--55qx5d.cn', null);
120+
$this->checkPublicSuffix('xn--85x722f.xn--fiqs8s', 'xn--85x722f.xn--fiqs8s');
121+
$this->checkPublicSuffix('www.xn--85x722f.xn--fiqs8s', 'xn--85x722f.xn--fiqs8s');
122+
$this->checkPublicSuffix('shishi.xn--fiqs8s', 'shishi.xn--fiqs8s');
123+
$this->checkPublicSuffix('xn--fiqs8s', null);
102124
}
103125

104126
/**
105-
* This is my version of the checkPublicSuffix function referred to in the
127+
* This is my version of the checkPublicSuffix function referred to in the
106128
* test instructions at the Public Suffix List project.
107129
*
108-
* "You will need to define a checkPublicSuffix() function which takes as a
109-
* parameter a domain name and the public suffix, runs your implementation
130+
* "You will need to define a checkPublicSuffix() function which takes as a
131+
* parameter a domain name and the public suffix, runs your implementation
110132
* on the domain name and checks the result is the public suffix expected."
111133
*
112134
* @link http://publicsuffix.org/list/
113135
*
114-
* @param string $input Domain and public suffix
136+
* @param string $input Domain and public suffix
115137
* @param string $expected Expected result
116138
*/
117139
public function checkPublicSuffix($input, $expected)

tests/library/Pdp/ParserTest.php

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,12 @@ protected function tearDown()
2323

2424
/**
2525
* @covers Pdp\Parser::parseUrl()
26+
* @covers Pdp\Parser::mbParseUrl()
2627
*/
2728
public function testParseBadUrlThrowsInvalidArgumentException()
2829
{
2930
$this->setExpectedException(
30-
'\InvalidArgumentException',
31+
'\InvalidArgumentException',
3132
'Invalid url http:///example.com'
3233
);
3334

@@ -94,21 +95,24 @@ public function testGetSubdomain($url, $publicSuffix, $registerableDomain, $subd
9495
$this->assertSame($subdomain, $pdpUrl->host->subdomain);
9596
$this->assertSame($subdomain, $this->parser->getSubdomain($hostPart));
9697
}
97-
98-
/**
98+
99+
/**
99100
* @dataProvider parseDataProvider
100-
*/
101-
public function testPHPparse_urlCanReturnCorrectHost($url, $publicSuffix, $registerableDomain, $subdomain, $hostPart)
102-
{
103-
$this->assertEquals($hostPart, parse_url('http://' . $hostPart, PHP_URL_HOST));
104-
}
101+
*/
102+
public function testMbParseUrlCanReturnCorrectHost($url, $publicSuffix, $registerableDomain, $subdomain, $hostPart)
103+
{
104+
$this->assertEquals(
105+
$hostPart,
106+
$this->parser->mbParseUrl('http://' . $hostPart, PHP_URL_HOST)
107+
);
108+
}
105109

106110
public function parseDataProvider()
107111
{
108112
// url, public suffix, registerable domain, subdomain, host part
109113
return array(
110114
array('http://www.waxaudio.com.au/audio/albums/the_mashening', 'com.au', 'waxaudio.com.au', 'www', 'www.waxaudio.com.au'),
111-
array('example.com', 'com', 'example.com', null, 'example.com'),
115+
array('example.COM', 'com', 'example.com', null, 'example.com'),
112116
array('giant.yyyy', 'yyyy', 'giant.yyyy', null, 'giant.yyyy'),
113117
array('cea-law.co.il', 'co.il', 'cea-law.co.il', null, 'cea-law.co.il'),
114118
array('http://edition.cnn.com/WORLD/', 'com', 'cnn.com', 'edition', 'edition.cnn.com'),
@@ -139,6 +143,15 @@ public function parseDataProvider()
139143
array('test.museum', 'museum', 'test.museum', null, 'test.museum'),
140144
array('bob.smith.name', 'name', 'smith.name', 'bob', 'bob.smith.name'),
141145
array('tons.of.info', 'info', 'of.info', 'tons', 'tons.of.info'),
146+
// Test IDN parsing
147+
// BEGIN https://github.com/jeremykendall/php-domain-parser/issues/29
148+
array('http://Яндекс.РФ', 'рф', 'яндекс.рф', null, 'яндекс.рф'),
149+
// END https://github.com/jeremykendall/php-domain-parser/issues/29
150+
array('www.食狮.中国', '中国', '食狮.中国', 'www', 'www.食狮.中国'),
151+
array('食狮.com.cn', 'com.cn', '食狮.com.cn', null, '食狮.com.cn'),
152+
// Test punycode URLs
153+
array('www.xn--85x722f.xn--fiqs8s', 'xn--fiqs8s', 'xn--85x722f.xn--fiqs8s', 'www', 'www.xn--85x722f.xn--fiqs8s'),
154+
array('xn--85x722f.com.cn', 'com.cn', 'xn--85x722f.com.cn', null, 'xn--85x722f.com.cn'),
142155
);
143156
}
144157
}

tests/library/Pdp/Uri/UrlTest.php

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,4 +126,18 @@ public function testFtpUrlToString()
126126
$url = $this->parser->parseUrl($ftpUrl);
127127
$this->assertEquals($ftpUrl, $url->__toString());
128128
}
129+
130+
/**
131+
* @group issue29
132+
* @see https://github.com/jeremykendall/php-domain-parser/issues/29
133+
*/
134+
public function testIdnToAscii()
135+
{
136+
$idn = 'Яндекс.РФ';
137+
$expected = 'http://яндекс.рф';
138+
$url = $this->parser->parseUrl($idn);
139+
$actual = $url->__toString();
140+
141+
$this->assertEquals($expected, $actual);
142+
}
129143
}

0 commit comments

Comments
 (0)