Skip to content

Commit 5120911

Browse files
committed
Properly handle UTF-8 multi-byte sequences as a single character
1 parent 758d62f commit 5120911

File tree

2 files changed

+176
-29
lines changed

2 files changed

+176
-29
lines changed

src/Readline.php

Lines changed: 36 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,35 @@ public function __construct($output)
4848
$this->sequencer->addSequence(self::ESC_SEQUENCE . self::ESC_DEL, array($this, 'onKeyDelete'));
4949
$this->sequencer->addSequence(self::ESC_SEQUENCE . self::ESC_INS, array($this, 'onKeyInsert'));
5050

51-
$this->sequencer->addFallback('', array($this, 'onFallback'));
51+
$expect = 0;
52+
$char = '';
53+
$that = $this;
54+
$this->sequencer->addFallback('', function ($byte) use (&$expect, &$char, $that) {
55+
if ($expect === 0) {
56+
$code = ord($byte);
57+
// count number of bytes expected for this UTF-8 multi-byte character
58+
$expect = 1;
59+
if ($code & 128 && $code & 64) {
60+
++$expect;
61+
if ($code & 32) {
62+
++$expect;
63+
if ($code & 16) {
64+
++$expect;
65+
}
66+
}
67+
}
68+
}
69+
$char .= $byte;
70+
--$expect;
71+
72+
// forward buffered bytes as a single multi byte character once last byte has been read
73+
if ($expect === 0) {
74+
$save = $char;
75+
$char = '';
76+
$that->onFallback($save);
77+
}
78+
});
79+
5280
$this->sequencer->addFallback(self::ESC_SEQUENCE, function ($bytes) {
5381
echo 'unknown sequence: ' . ord($bytes) . PHP_EOL;
5482
});
@@ -401,16 +429,12 @@ public function onKeyDown()
401429
*/
402430
public function onFallback($chars)
403431
{
404-
$pre = $this->substr($this->linebuffer, 0, $this->linepos); // read everything up until before backspace
432+
// read everything up until before current position
433+
$pre = $this->substr($this->linebuffer, 0, $this->linepos);
405434
$post = $this->substr($this->linebuffer, $this->linepos);
406435

407436
$this->linebuffer = $pre . $chars . $post;
408-
409-
// TODO: fix lineposition for partial multibyte characters
410437
++$this->linepos;
411-
if ($this->linepos >= $this->strlen($this->linebuffer)) {
412-
$this->linepos = $this->strlen($this->linebuffer);
413-
}
414438

415439
$this->redraw();
416440
}
@@ -429,16 +453,17 @@ public function onFallback($chars)
429453
public function deleteChar($n)
430454
{
431455
$len = $this->strlen($this->linebuffer);
432-
if ($n < 0 || $n > $len) {
456+
if ($n < 0 || $n >= $len) {
433457
return;
434458
}
435459

436-
// TODO: multibyte-characters
437-
438-
$pre = $this->substr($this->linebuffer, 0, $n); // read everything up until before current position
460+
// read everything up until before current position
461+
$pre = $this->substr($this->linebuffer, 0, $n);
439462
$post = $this->substr($this->linebuffer, $n + 1);
463+
440464
$this->linebuffer = $pre . $post;
441465

466+
// move cursor one cell to the left if we're deleting in front of the cursor
442467
if ($n < $this->linepos) {
443468
--$this->linepos;
444469
}
@@ -465,24 +490,6 @@ protected function processLine()
465490
$this->redraw();
466491
}
467492

468-
protected function readEscape($char)
469-
{
470-
$this->inEscape = false;
471-
472-
if($char === self::ESC_LEFT && $this->move) {
473-
$this->moveCursorBy(-1);
474-
} else if($char === self::ESC_RIGHT && $this->move) {
475-
$this->moveCursorBy(1);
476-
} else if ($char === self::ESC_UP && $this->history !== null) {
477-
$this->history->moveUp();
478-
} else if ($char === self::ESC_DOWN && $this->history !== null) {
479-
$this->history->moveDown();
480-
} else {
481-
$this->write('invalid char');
482-
// ignore unknown escape code
483-
}
484-
}
485-
486493
protected function strlen($str)
487494
{
488495
return mb_strlen($str, $this->encoding);

tests/ReadlineTest.php

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,4 +153,144 @@ public function testMovingCursorWithoutEchoDoesNotNeedToRedraw()
153153
$this->assertSame($this->readline, $this->readline->moveCursorTo(0));
154154
$this->assertSame($this->readline, $this->readline->moveCursorBy(2));
155155
}
156+
157+
public function testWriteSimpleCharWritesOnce()
158+
{
159+
$this->output->expects($this->once())->method('write')->with($this->equalTo("\r\033[K" . "k"));
160+
161+
$this->pushInputBytes($this->readline, 'k');
162+
}
163+
164+
public function testWriteMultiByteCharWritesOnce()
165+
{
166+
$this->output->expects($this->once())->method('write')->with($this->equalTo("\r\033[K" . "\xF0\x9D\x84\x9E"));
167+
168+
// "𝄞" – U+1D11E MUSICAL SYMBOL G CLEF
169+
$this->pushInputBytes($this->readline, "\xF0\x9D\x84\x9E");
170+
}
171+
172+
public function testKeysSimpleChars()
173+
{
174+
$this->pushInputBytes($this->readline, 'hi!');
175+
176+
$this->assertEquals('hi!', $this->readline->getInput());
177+
$this->assertEquals(3, $this->readline->getCursorPosition());
178+
179+
return $this->readline;
180+
}
181+
182+
/**
183+
* @depends testKeysSimpleChars
184+
* @param Readline $readline
185+
*/
186+
public function testKeysBackspaceDeletesLastCharacter(Readline $readline)
187+
{
188+
$readline->onKeyBackspace();
189+
190+
$this->assertEquals('hi', $readline->getInput());
191+
$this->assertEquals(2, $readline->getCursorPosition());
192+
}
193+
194+
public function testKeysMultiByteInput()
195+
{
196+
$this->pushInputBytes($this->readline, '');
197+
198+
$this->assertEquals('', $this->readline->getInput());
199+
$this->assertEquals(2, $this->readline->getCursorPosition());
200+
201+
return $this->readline;
202+
}
203+
204+
/**
205+
* @depends testKeysMultiByteInput
206+
* @param Readline $readline
207+
*/
208+
public function testKeysBackspaceDeletesWholeMultibyteCharacter(Readline $readline)
209+
{
210+
$readline->onKeyBackspace();
211+
212+
$this->assertEquals('h', $readline->getInput());
213+
}
214+
215+
public function testKeysBackspaceMiddle()
216+
{
217+
$this->readline->setInput('test');
218+
$this->readline->moveCursorTo(2);
219+
220+
$this->readline->onKeyBackspace();
221+
222+
$this->assertEquals('tst', $this->readline->getInput());
223+
$this->assertEquals(1, $this->readline->getCursorPosition());
224+
}
225+
226+
public function testKeysBackspaceFrontDoesNothing()
227+
{
228+
$this->readline->setInput('test');
229+
$this->readline->moveCursorTo(0);
230+
231+
$this->readline->onKeyBackspace();
232+
233+
$this->assertEquals('test', $this->readline->getInput());
234+
$this->assertEquals(0, $this->readline->getCursorPosition());
235+
}
236+
237+
public function testKeysDeleteMiddle()
238+
{
239+
$this->readline->setInput('test');
240+
$this->readline->moveCursorTo(2);
241+
242+
$this->readline->onKeyDelete();
243+
244+
$this->assertEquals('tet', $this->readline->getInput());
245+
$this->assertEquals(2, $this->readline->getCursorPosition());
246+
}
247+
248+
public function testKeysDeleteEndDoesNothing()
249+
{
250+
$this->readline->setInput('test');
251+
252+
$this->readline->onKeyDelete();
253+
254+
$this->assertEquals('test', $this->readline->getInput());
255+
$this->assertEquals(4, $this->readline->getCursorPosition());
256+
}
257+
258+
public function testKeysPrependCharacterInFrontOfMultiByte()
259+
{
260+
$this->readline->setInput('ü');
261+
$this->readline->moveCursorTo(0);
262+
263+
$this->pushInputBytes($this->readline, 'h');
264+
265+
$this->assertEquals('', $this->readline->getInput());
266+
$this->assertEquals(1, $this->readline->getCursorPosition());
267+
}
268+
269+
public function testKeysWriteMultiByteAfterMultiByte()
270+
{
271+
$this->readline->setInput('ü');
272+
273+
$this->pushInputBytes($this->readline, 'ä');
274+
275+
$this->assertEquals('üä', $this->readline->getInput());
276+
$this->assertEquals(2, $this->readline->getCursorPosition());
277+
}
278+
279+
public function testKeysPrependMultiByteInFrontOfMultiByte()
280+
{
281+
$this->readline->setInput('ü');
282+
$this->readline->moveCursorTo(0);
283+
284+
$this->pushInputBytes($this->readline, 'ä');
285+
286+
$this->assertEquals('äü', $this->readline->getInput());
287+
$this->assertEquals(1, $this->readline->getCursorPosition());
288+
}
289+
290+
private function pushInputBytes(Readline $readline, $bytes)
291+
{
292+
foreach (str_split($bytes, 1) as $byte) {
293+
$readline->onChar($byte);
294+
}
295+
}
156296
}

0 commit comments

Comments
 (0)