Skip to content

Commit 7fdc350

Browse files
committed
Merge pull request #20 from clue-labs/multibyte
Support multi-byte UTF-8 characters and account for cell width
2 parents 758d62f + 975dd78 commit 7fdc350

File tree

3 files changed

+355
-49
lines changed

3 files changed

+355
-49
lines changed

README.md

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# clue/stdio-react [![Build Status](https://travis-ci.org/clue/php-stdio-react.svg?branch=master)](https://travis-ci.org/clue/php-stdio-react)
22

3-
Async standard console input & output (STDIN, STDOUT) for React PHP
3+
Async, event-driven and UTF-8 aware standard console input & output (STDIN, STDOUT) for React PHP
44

55
> Note: This project is in early beta stage! Feel free to report any issues you encounter.
66
@@ -181,14 +181,28 @@ simply pass a boolean `true` like this:
181181
$readline->setMove(true);
182182
```
183183

184-
The `getCursorPosition()` method can be used to access the current cursor position.
184+
The `getCursorPosition()` method can be used to access the current cursor position,
185+
measured in number of characters.
185186
This can be useful if you want to get a substring of the current *user input buffer*.
186187
Simply invoke it like this:
187188

188189
```php
189190
$position = $readline->getCursorPosition();
190191
```
191192

193+
The `getCursorCell()` method can be used to get the current cursor position,
194+
measured in number of monospace cells.
195+
Most *normal* characters (plain ASCII and most multi-byte UTF-8 sequences) take a single monospace cell.
196+
However, there are a number of characters that have no visual representation
197+
(and do not take a cell at all) or characters that do not fit within a single
198+
cell (like some asian glyphs).
199+
This method is mostly useful for calculating the visual cursor position on screen,
200+
but you may also invoke it like this:
201+
202+
```php
203+
$cell = $readline->getCursorCell();
204+
```
205+
192206
The `moveCursorTo($position)` method can be used to set the current cursor position to the given absolute character position.
193207
For example, to move the cursor to the beginning of the *user input buffer*, simply call:
194208

src/Readline.php

Lines changed: 103 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,35 @@ public function __construct($output)
4848
$this->sequencer->addSequence(self::ESC_SEQUENCE . self::ESC_DEL, array($this, 'onKeyDelete'));
4949
$this->sequencer->addSequence(self::ESC_SEQUENCE . self::ESC_INS, array($this, 'onKeyInsert'));
5050

51-
$this->sequencer->addFallback('', array($this, 'onFallback'));
51+
$expect = 0;
52+
$char = '';
53+
$that = $this;
54+
$this->sequencer->addFallback('', function ($byte) use (&$expect, &$char, $that) {
55+
if ($expect === 0) {
56+
$code = ord($byte);
57+
// count number of bytes expected for this UTF-8 multi-byte character
58+
$expect = 1;
59+
if ($code & 128 && $code & 64) {
60+
++$expect;
61+
if ($code & 32) {
62+
++$expect;
63+
if ($code & 16) {
64+
++$expect;
65+
}
66+
}
67+
}
68+
}
69+
$char .= $byte;
70+
--$expect;
71+
72+
// forward buffered bytes as a single multi byte character once last byte has been read
73+
if ($expect === 0) {
74+
$save = $char;
75+
$char = '';
76+
$that->onFallback($save);
77+
}
78+
});
79+
5280
$this->sequencer->addFallback(self::ESC_SEQUENCE, function ($bytes) {
5381
echo 'unknown sequence: ' . ord($bytes) . PHP_EOL;
5482
});
@@ -134,12 +162,15 @@ public function setMove($move)
134162
}
135163

136164
/**
137-
* get current cursor position
165+
* Gets current cursor position measured in number of text characters.
138166
*
139-
* cursor position is measured in number of text characters
167+
* Note that the number of text characters doesn't necessarily reflect the
168+
* number of monospace cells occupied by the text characters. If you want
169+
* to know the latter, use `self::getCursorCell()` instead.
140170
*
141171
* @return int
142-
* @see self::moveCursorTo() to move the cursor to a given position
172+
* @see self::getCursorCell() to get the position measured in monospace cells
173+
* @see self::moveCursorTo() to move the cursor to a given character position
143174
* @see self::moveCursorBy() to move the cursor by given number of characters
144175
* @see self::setMove() to toggle whether the user can move the cursor position
145176
*/
@@ -149,23 +180,68 @@ public function getCursorPosition()
149180
}
150181

151182
/**
152-
* move cursor to right by $n chars (or left if $n is negative)
183+
* Gets current cursor position measured in monospace cells.
184+
*
185+
* Note that the cell position doesn't necessarily reflect the number of
186+
* text characters. If you want to know the latter, use
187+
* `self::getCursorPosition()` instead.
188+
*
189+
* Most "normal" characters occupy a single monospace cell, i.e. the ASCII
190+
* sequence for "A" requires a single cell, as do most UTF-8 sequences
191+
* like "Ä".
192+
*
193+
* However, there are a number of code points that do not require a cell
194+
* (i.e. invisible surrogates) or require two cells (e.g. some asian glyphs).
195+
*
196+
* Also note that this takes the echo mode into account, i.e. the cursor is
197+
* always at position zero if echo is off. If using a custom echo character
198+
* (like asterisk), it will take its width into account instead of the actual
199+
* input characters.
200+
*
201+
* @return int
202+
* @see self::getCursorPosition() to get current cursor position measured in characters
203+
* @see self::moveCursorTo() to move the cursor to a given character position
204+
* @see self::moveCursorBy() to move the cursor by given number of characters
205+
* @see self::setMove() to toggle whether the user can move the cursor position
206+
* @see self::setEcho()
207+
*/
208+
public function getCursorCell()
209+
{
210+
if ($this->echo === false) {
211+
return 0;
212+
}
213+
if ($this->echo !== true) {
214+
return $this->strwidth($this->echo) * $this->linepos;
215+
}
216+
return $this->strwidth($this->substr($this->linebuffer, 0, $this->linepos));
217+
}
218+
219+
/**
220+
* Moves cursor to right by $n chars (or left if $n is negative).
153221
*
154-
* zero or out of range moves are simply ignored
222+
* Zero value or values out of range (exceeding current input buffer) are
223+
* simply ignored.
224+
*
225+
* Will redraw() the readline only if the visible cell position changes,
226+
* see `self::getCursorCell()` for more details.
155227
*
156228
* @param int $n
157229
* @return self
158230
* @uses self::moveCursorTo()
231+
* @uses self::redraw()
159232
*/
160233
public function moveCursorBy($n)
161234
{
162235
return $this->moveCursorTo($this->linepos + $n);
163236
}
164237

165238
/**
166-
* move cursor to given position in current line buffer
239+
* Moves cursor to given position in current line buffer.
240+
*
241+
* Values out of range (exceeding current input buffer) are simply ignored.
167242
*
168-
* out of range (exceeding current input buffer) are simply ignored
243+
* Will redraw() the readline only if the visible cell position changes,
244+
* see `self::getCursorCell()` for more details.
169245
*
170246
* @param int $n
171247
* @return self
@@ -177,10 +253,11 @@ public function moveCursorTo($n)
177253
return $this;
178254
}
179255

256+
$old = $this->getCursorCell();
180257
$this->linepos = $n;
181258

182-
// only redraw if cursor is actually visible
183-
if ($this->echo) {
259+
// only redraw if visible cell position change (implies cursor is actually visible)
260+
if ($this->getCursorCell() !== $old) {
184261
$this->redraw();
185262
}
186263

@@ -280,18 +357,13 @@ public function redraw()
280357
$output = "\r\033[K" . $this->prompt;
281358
if ($this->echo !== false) {
282359
if ($this->echo === true) {
283-
$output .= $this->linebuffer;
360+
$buffer = $this->linebuffer;
284361
} else {
285-
$output .= str_repeat($this->echo, $this->strlen($this->linebuffer));
362+
$buffer = str_repeat($this->echo, $this->strlen($this->linebuffer));
286363
}
287364

288-
$len = $this->strlen($this->linebuffer);
289-
if ($this->linepos !== $len) {
290-
$reverse = $len - $this->linepos;
291-
292-
// move back $reverse chars (by sending backspace)
293-
$output .= str_repeat("\x08", $reverse);
294-
}
365+
// write output, then move back $reverse chars (by sending backspace)
366+
$output .= $buffer . str_repeat("\x08", $this->strwidth($buffer) - $this->getCursorCell());
295367
}
296368
$this->write($output);
297369

@@ -401,16 +473,12 @@ public function onKeyDown()
401473
*/
402474
public function onFallback($chars)
403475
{
404-
$pre = $this->substr($this->linebuffer, 0, $this->linepos); // read everything up until before backspace
476+
// read everything up until before current position
477+
$pre = $this->substr($this->linebuffer, 0, $this->linepos);
405478
$post = $this->substr($this->linebuffer, $this->linepos);
406479

407480
$this->linebuffer = $pre . $chars . $post;
408-
409-
// TODO: fix lineposition for partial multibyte characters
410481
++$this->linepos;
411-
if ($this->linepos >= $this->strlen($this->linebuffer)) {
412-
$this->linepos = $this->strlen($this->linebuffer);
413-
}
414482

415483
$this->redraw();
416484
}
@@ -429,16 +497,17 @@ public function onFallback($chars)
429497
public function deleteChar($n)
430498
{
431499
$len = $this->strlen($this->linebuffer);
432-
if ($n < 0 || $n > $len) {
500+
if ($n < 0 || $n >= $len) {
433501
return;
434502
}
435503

436-
// TODO: multibyte-characters
437-
438-
$pre = $this->substr($this->linebuffer, 0, $n); // read everything up until before current position
504+
// read everything up until before current position
505+
$pre = $this->substr($this->linebuffer, 0, $n);
439506
$post = $this->substr($this->linebuffer, $n + 1);
507+
440508
$this->linebuffer = $pre . $post;
441509

510+
// move cursor one cell to the left if we're deleting in front of the cursor
442511
if ($n < $this->linepos) {
443512
--$this->linepos;
444513
}
@@ -465,24 +534,6 @@ protected function processLine()
465534
$this->redraw();
466535
}
467536

468-
protected function readEscape($char)
469-
{
470-
$this->inEscape = false;
471-
472-
if($char === self::ESC_LEFT && $this->move) {
473-
$this->moveCursorBy(-1);
474-
} else if($char === self::ESC_RIGHT && $this->move) {
475-
$this->moveCursorBy(1);
476-
} else if ($char === self::ESC_UP && $this->history !== null) {
477-
$this->history->moveUp();
478-
} else if ($char === self::ESC_DOWN && $this->history !== null) {
479-
$this->history->moveDown();
480-
} else {
481-
$this->write('invalid char');
482-
// ignore unknown escape code
483-
}
484-
}
485-
486537
protected function strlen($str)
487538
{
488539
return mb_strlen($str, $this->encoding);
@@ -496,6 +547,11 @@ protected function substr($str, $start = 0, $len = null)
496547
return (string)mb_substr($str, $start, $len, $this->encoding);
497548
}
498549

550+
private function strwidth($str)
551+
{
552+
return mb_strwidth($str, $this->encoding);
553+
}
554+
499555
protected function write($data)
500556
{
501557
$this->output->write($data);

0 commit comments

Comments
 (0)