Skip to content

Commit ace53d3

Browse files
committed
Support optional charset parameter for full UTF-8 support (utf8mb4)
1 parent 39973a8 commit ace53d3

File tree

7 files changed

+182
-13
lines changed

7 files changed

+182
-13
lines changed

README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,16 @@ authentication. You can explicitly pass a custom timeout value in seconds
168168
$factory->createConnection('localhost?timeout=0.5');
169169
```
170170

171+
By default, the connection uses the `utf8` charset encoding. Note that
172+
MySQL's `utf8` encoding (also known as `utf8mb3`) predates what is now
173+
known as UTF-8 and for historical reasons doesn't support emojis and
174+
other characters. If you want full UTF-8 support, you can pass the
175+
charset encoding like this:
176+
177+
```php
178+
$factory->createConnection('localhost?charset=utf8mb4');
179+
```
180+
171181
#### createLazyConnection()
172182

173183
Creates a new connection.
@@ -274,6 +284,16 @@ timeout) like this:
274284
$factory->createLazyConnection('localhost?idle=0.1');
275285
```
276286

287+
By default, the connection uses the `utf8` charset encoding. Note that
288+
MySQL's `utf8` encoding (also known as `utf8mb3`) predates what is now
289+
known as UTF-8 and for historical reasons doesn't support emojis and
290+
other characters. If you want full UTF-8 support, you can pass the
291+
charset encoding like this:
292+
293+
```php
294+
$factory->createLazyConnection('localhost?charset=utf8mb4');
295+
```
296+
277297
### ConnectionInterface
278298

279299
The `ConnectionInterface` represents a connection that is responsible for

src/Commands/AuthenticateCommand.php

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
/**
99
* @internal
10+
* @link https://dev.mysql.com/doc/internals/en/connection-phase-packets.html#packet-Protocol::HandshakeResponse
1011
*/
1112
class AuthenticateCommand extends AbstractCommand
1213
{
@@ -15,13 +16,51 @@ class AuthenticateCommand extends AbstractCommand
1516
private $dbname;
1617

1718
private $maxPacketSize = 0x1000000;
18-
private $charsetNumber = 0x21;
1919

20-
public function __construct($user, $passwd, $dbname)
20+
/**
21+
* @var int
22+
* @link https://dev.mysql.com/doc/internals/en/character-set.html#packet-Protocol::CharacterSet
23+
*/
24+
private $charsetNumber;
25+
26+
/**
27+
* Mapping from charset name to internal charset ID
28+
*
29+
* Note that this map currently only contains ASCII-compatible charset encodings
30+
* because of quoting rules as defined in the `Query` class.
31+
*
32+
* @var array<string,int>
33+
* @see self::$charsetNumber
34+
* @see \React\MySQL\Io\Query::$escapeChars
35+
*/
36+
private static $charsetMap = array(
37+
'latin1' => 8,
38+
'latin2' => 9,
39+
'ascii' => 11,
40+
'latin5' => 30,
41+
'utf8' => 33,
42+
'latin7' => 41,
43+
'utf8mb4' => 45,
44+
'binary' => 63
45+
);
46+
47+
/**
48+
* @param string $user
49+
* @param string $passwd
50+
* @param string $dbname
51+
* @param string $charset
52+
* @throws \InvalidArgumentException for invalid/unknown charset name
53+
*/
54+
public function __construct($user, $passwd, $dbname, $charset)
2155
{
56+
if (!isset(self::$charsetMap[$charset])) {
57+
throw new \InvalidArgumentException('Unsupported charset selected');
58+
}
59+
2260
$this->user = $user;
2361
$this->passwd = $passwd;
2462
$this->dbname = $dbname;
63+
$this->charsetNumber = self::$charsetMap[$charset];
2564
}
2665

2766
public function getId()

src/Factory.php

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,16 @@ public function __construct(LoopInterface $loop = null, ConnectorInterface $conn
143143
* $factory->createConnection('localhost?timeout=0.5');
144144
* ```
145145
*
146+
* By default, the connection uses the `utf8` charset encoding. Note that
147+
* MySQL's `utf8` encoding (also known as `utf8mb3`) predates what is now
148+
* known as UTF-8 and for historical reasons doesn't support emojis and
149+
* other characters. If you want full UTF-8 support, you can pass the
150+
* charset encoding like this:
151+
*
152+
* ```php
153+
* $factory->createConnection('localhost?charset=utf8mb4');
154+
* ```
155+
*
146156
* @param string $uri
147157
* @return PromiseInterface Promise<ConnectionInterface, Exception>
148158
*/
@@ -153,6 +163,22 @@ public function createConnection($uri)
153163
return \React\Promise\reject(new \InvalidArgumentException('Invalid connect uri given'));
154164
}
155165

166+
$args = [];
167+
if (isset($parts['query'])) {
168+
parse_str($parts['query'], $args);
169+
}
170+
171+
try {
172+
$authCommand = new AuthenticateCommand(
173+
isset($parts['user']) ? rawurldecode($parts['user']) : 'root',
174+
isset($parts['pass']) ? rawurldecode($parts['pass']) : '',
175+
isset($parts['path']) ? rawurldecode(ltrim($parts['path'], '/')) : '',
176+
isset($args['charset']) ? $args['charset'] : 'utf8'
177+
);
178+
} catch (\InvalidArgumentException $e) {
179+
return \React\Promise\reject($e);
180+
}
181+
156182
$connecting = $this->connector->connect(
157183
$parts['host'] . ':' . (isset($parts['port']) ? $parts['port'] : 3306)
158184
);
@@ -168,16 +194,12 @@ public function createConnection($uri)
168194
$connecting->cancel();
169195
});
170196

171-
$connecting->then(function (SocketConnectionInterface $stream) use ($parts, $deferred) {
197+
$connecting->then(function (SocketConnectionInterface $stream) use ($authCommand, $deferred) {
172198
$executor = new Executor();
173199
$parser = new Parser($stream, $executor);
174200

175201
$connection = new Connection($stream, $executor);
176-
$command = $executor->enqueue(new AuthenticateCommand(
177-
isset($parts['user']) ? rawurldecode($parts['user']) : 'root',
178-
isset($parts['pass']) ? rawurldecode($parts['pass']) : '',
179-
isset($parts['path']) ? rawurldecode(ltrim($parts['path'], '/')) : ''
180-
));
202+
$command = $executor->enqueue($authCommand);
181203
$parser->start();
182204

183205
$command->on('success', function () use ($deferred, $connection) {
@@ -191,11 +213,6 @@ public function createConnection($uri)
191213
$deferred->reject(new \RuntimeException('Unable to connect to database server', 0, $error));
192214
});
193215

194-
$args = [];
195-
if (isset($parts['query'])) {
196-
parse_str($parts['query'], $args);
197-
}
198-
199216
// use timeout from explicit ?timeout=x parameter or default to PHP's default_socket_timeout (60)
200217
$timeout = (float) isset($args['timeout']) ? $args['timeout'] : ini_get("default_socket_timeout");
201218
if ($timeout < 0) {
@@ -317,6 +334,16 @@ public function createConnection($uri)
317334
* $factory->createLazyConnection('localhost?idle=0.1');
318335
* ```
319336
*
337+
* By default, the connection uses the `utf8` charset encoding. Note that
338+
* MySQL's `utf8` encoding (also known as `utf8mb3`) predates what is now
339+
* known as UTF-8 and for historical reasons doesn't support emojis and
340+
* other characters. If you want full UTF-8 support, you can pass the
341+
* charset encoding like this:
342+
*
343+
* ```php
344+
* $factory->createLazyConnection('localhost?charset=utf8mb4');
345+
* ```
346+
*
320347
* @param string $uri
321348
* @return ConnectionInterface
322349
*/

src/Io/Query.php

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,15 @@ class Query
1313

1414
private $params = [];
1515

16+
/**
17+
* Mapping from byte/character to escaped character string
18+
*
19+
* Note that this mapping assumes an ASCII-compatible charset encoding such
20+
* as UTF-8, ISO 8859 and others.
21+
*
22+
* @var array<string,string>
23+
* @see \React\MySQL\Commands\AuthenticateCommand::$charsetMap
24+
*/
1625
private $escapeChars = array(
1726
"\x00" => "\\0",
1827
"\r" => "\\r",
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
<?php
2+
3+
namespace React\Tests\MySQL\Commands;
4+
5+
use PHPUnit\Framework\TestCase;
6+
use React\MySQL\Commands\AuthenticateCommand;
7+
8+
class AuthenticateCommandTest extends TestCase
9+
{
10+
/**
11+
* @doesNotPerformAssertions
12+
*/
13+
public function testCtorWithKnownCharset()
14+
{
15+
new AuthenticateCommand('Alice', 'secret', '', 'utf8');
16+
}
17+
18+
public function testCtorWithUnknownCharsetThrows()
19+
{
20+
if (method_exists($this, 'expectException')) {
21+
$this->expectException('InvalidArgumentException');
22+
} else {
23+
// legacy PHPUnit < 5.2
24+
$this->setExpectedException('InvalidArgumentException');
25+
}
26+
new AuthenticateCommand('Alice', 'secret', '', 'utf16');
27+
}
28+
}

tests/FactoryTest.php

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,19 @@ public function testConnectWithInvalidUriWillRejectWithoutConnecting()
8989
$promise->then(null, $this->expectCallableOnce());
9090
}
9191

92+
public function testConnectWithInvalidCharsetWillRejectWithoutConnecting()
93+
{
94+
$loop = $this->getMockBuilder('React\EventLoop\LoopInterface')->getMock();
95+
$connector = $this->getMockBuilder('React\Socket\ConnectorInterface')->getMock();
96+
$connector->expects($this->never())->method('connect');
97+
98+
$factory = new Factory($loop, $connector);
99+
$promise = $factory->createConnection('localhost?charset=unknown');
100+
101+
$this->assertInstanceof('React\Promise\PromiseInterface', $promise);
102+
$promise->then(null, $this->expectCallableOnce());
103+
}
104+
92105
public function testConnectWithInvalidHostRejectsWithConnectionError()
93106
{
94107
$loop = \React\EventLoop\Factory::create();

tests/ResultQueryTest.php

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,39 @@ public function testSelectStaticTextTwoColumnsWithSameNameOverwritesValue()
340340
$loop->run();
341341
}
342342

343+
public function testSelectCharsetDefaultsToUtf8()
344+
{
345+
$loop = \React\EventLoop\Factory::create();
346+
$connection = $this->createConnection($loop);
347+
348+
$connection->query('SELECT @@character_set_client')->then(function (QueryResult $command) {
349+
$this->assertCount(1, $command->resultRows);
350+
$this->assertCount(1, $command->resultRows[0]);
351+
$this->assertSame('utf8', reset($command->resultRows[0]));
352+
});
353+
354+
$connection->quit();
355+
$loop->run();
356+
}
357+
358+
public function testSelectWithExplcitCharsetReturnsCharset()
359+
{
360+
$loop = \React\EventLoop\Factory::create();
361+
$factory = new Factory($loop);
362+
363+
$uri = $this->getConnectionString() . '?charset=latin1';
364+
$connection = $factory->createLazyConnection($uri);
365+
366+
$connection->query('SELECT @@character_set_client')->then(function (QueryResult $command) {
367+
$this->assertCount(1, $command->resultRows);
368+
$this->assertCount(1, $command->resultRows[0]);
369+
$this->assertSame('latin1', reset($command->resultRows[0]));
370+
});
371+
372+
$connection->quit();
373+
$loop->run();
374+
}
375+
343376
public function testSimpleSelect()
344377
{
345378
$loop = \React\EventLoop\Factory::create();

0 commit comments

Comments
 (0)