Skip to content

Commit 37cf6e1

Browse files
authored
Merge pull request #114 from wp-cli/double_width_optimize
Optimize double-width safe_substr when all double-width.
2 parents c5fb346 + 2c0896b commit 37cf6e1

File tree

4 files changed

+71
-15
lines changed

4 files changed

+71
-15
lines changed

lib/cli/cli.php

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -203,15 +203,27 @@ function safe_substr( $str, $start, $length = false, $is_width = false, $encodin
203203
$eaw_regex = get_unicode_regexs( 'eaw' );
204204
// If there's any East Asian double-width chars...
205205
if ( preg_match( $eaw_regex, $substr ) ) {
206-
// Explode string into an array of UTF-8 chars. Based on core `_mb_substr()` in "wp-includes/compat.php".
207-
$chars = preg_split( '/([\x00-\x7f\xc2-\xf4][^\x00-\x7f\xc2-\xf4]*)/', $substr, $length + 1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
208-
$cnt = min( count( $chars ), $length );
209-
$width = $length;
206+
// Note that if the length ends in the middle of a double-width char, the char is excluded, not included.
210207

211-
for ( $length = 0; $length < $cnt && $width > 0; $length++ ) {
212-
$width -= preg_match( $eaw_regex, $chars[ $length ] ) ? 2 : 1;
208+
// See if it's all EAW - the most likely case.
209+
if ( preg_match_all( $eaw_regex, $substr, $dummy /*needed for PHP 5.3*/ ) === $length ) {
210+
// Just halve the length so (rounded down to a minimum of 1).
211+
$substr = mb_substr( $substr, 0, max( (int) ( $length / 2 ), 1 ), $encoding );
212+
} else {
213+
// Explode string into an array of UTF-8 chars. Based on core `_mb_substr()` in "wp-includes/compat.php".
214+
$chars = preg_split( '/([\x00-\x7f\xc2-\xf4][^\x00-\x7f\xc2-\xf4]*)/', $substr, $length + 1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
215+
$cnt = min( count( $chars ), $length );
216+
$width = $length;
217+
218+
for ( $length = 0; $length < $cnt && $width > 0; $length++ ) {
219+
$width -= preg_match( $eaw_regex, $chars[ $length ] ) ? 2 : 1;
220+
}
221+
// Round down to a minimum of 1.
222+
if ( $width < 0 && $length > 1 ) {
223+
$length--;
224+
}
225+
return join( '', array_slice( $chars, 0, $length ) );
213226
}
214-
return join( '', array_slice( $chars, 0, $length ) );
215227
}
216228
}
217229
} else {
@@ -279,7 +291,7 @@ function strwidth( $string, $encoding = false ) {
279291
return $width;
280292
}
281293
}
282-
return safe_strlen( $string );
294+
return safe_strlen( $string, $encoding );
283295
}
284296

285297
/**

lib/cli/table/Ascii.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ public function row( array $row ) {
136136
$col_width = $this->_widths[ $col ];
137137
$encoding = function_exists( 'mb_detect_encoding' ) ? mb_detect_encoding( $value, null, true /*strict*/ ) : false;
138138
$original_val_width = Colors::width( $value, self::isPreColorized( $col ), $encoding );
139-
if ( $original_val_width > $col_width ) {
139+
if ( $col_width && $original_val_width > $col_width ) {
140140
$row[ $col ] = \cli\safe_substr( $value, 0, $col_width, true /*is_width*/, $encoding );
141141
$value = \cli\safe_substr( $value, \cli\safe_strlen( $row[ $col ], $encoding ), null /*length*/, false /*is_width*/, $encoding );
142142
$i = 0;

tests/test-cli.php

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,8 @@ function test_encoded_substr_is_width() {
9595
$this->assertSame( 'he', \cli\safe_substr( Colors::pad( 'hello', 6 ), 0, 2, true /*is_width*/ ) );
9696
$this->assertSame( 'ór', \cli\safe_substr( Colors::pad( 'óra', 6 ), 0, 2, true /*is_width*/ ) );
9797
$this->assertSame( '', \cli\safe_substr( Colors::pad( '日本語', 8 ), 0, 2, true /*is_width*/ ) );
98+
$this->assertSame( '', \cli\safe_substr( Colors::pad( '日本語', 8 ), 0, 3, true /*is_width*/ ) );
9899
$this->assertSame( '日本', \cli\safe_substr( Colors::pad( '日本語', 8 ), 0, 4, true /*is_width*/ ) );
99-
$this->assertSame( '日本', \cli\safe_substr( Colors::pad( '日本語', 8 ), 0, 3, true /*is_width*/ ) );
100100
$this->assertSame( '日本語', \cli\safe_substr( Colors::pad( '日本語', 8 ), 0, 6, true /*is_width*/ ) );
101101
$this->assertSame( '日本語 ', \cli\safe_substr( Colors::pad( '日本語', 8 ), 0, 7, true /*is_width*/ ) );
102102

@@ -107,12 +107,12 @@ function test_encoded_substr_is_width() {
107107

108108
$this->assertSame( '', \cli\safe_substr( '1日4本語90', 0, 0, true /*is_width*/ ) );
109109
$this->assertSame( '1', \cli\safe_substr( '1日4本語90', 0, 1, true /*is_width*/ ) );
110-
$this->assertSame( '1日', \cli\safe_substr( '1日4本語90', 0, 2, true /*is_width*/ ) );
110+
$this->assertSame( '1', \cli\safe_substr( '1日4本語90', 0, 2, true /*is_width*/ ) );
111111
$this->assertSame( '1日', \cli\safe_substr( '1日4本語90', 0, 3, true /*is_width*/ ) );
112112
$this->assertSame( '1日4', \cli\safe_substr( '1日4本語90', 0, 4, true /*is_width*/ ) );
113-
$this->assertSame( '1日4本', \cli\safe_substr( '1日4本語90', 0, 5, true /*is_width*/ ) );
113+
$this->assertSame( '1日4', \cli\safe_substr( '1日4本語90', 0, 5, true /*is_width*/ ) );
114114
$this->assertSame( '1日4本', \cli\safe_substr( '1日4本語90', 0, 6, true /*is_width*/ ) );
115-
$this->assertSame( '1日4本語', \cli\safe_substr( '1日4本語90', 0, 7, true /*is_width*/ ) );
115+
$this->assertSame( '1日4本', \cli\safe_substr( '1日4本語90', 0, 7, true /*is_width*/ ) );
116116
$this->assertSame( '1日4本語', \cli\safe_substr( '1日4本語90', 0, 8, true /*is_width*/ ) );
117117
$this->assertSame( '1日4本語9', \cli\safe_substr( '1日4本語90', 0, 9, true /*is_width*/ ) );
118118
$this->assertSame( '1日4本語90', \cli\safe_substr( '1日4本語90', 0, 10, true /*is_width*/ ) );
@@ -124,7 +124,7 @@ function test_encoded_substr_is_width() {
124124

125125
$this->assertSame( '', \cli\safe_substr( '1日4本語90', 3, 1, true /*is_width*/ ) );
126126
$this->assertSame( '', \cli\safe_substr( '1日4本語90', 3, 2, true /*is_width*/ ) );
127-
$this->assertSame( '本語', \cli\safe_substr( '1日4本語90', 3, 3, true /*is_width*/ ) );
127+
$this->assertSame( '', \cli\safe_substr( '1日4本語90', 3, 3, true /*is_width*/ ) );
128128
$this->assertSame( '本語', \cli\safe_substr( '1日4本語90', 3, 4, true /*is_width*/ ) );
129129
$this->assertSame( '本語9', \cli\safe_substr( '1日4本語90', 3, 5, true /*is_width*/ ) );
130130

tests/test-table.php

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ public function test_column_value_too_long_with_multibytes() {
4646
$renderer->setConstraintWidth( $constraint_width );
4747
$table->setRenderer( $renderer );
4848
$table->setHeaders( array( 'Field', 'Value' ) );
49-
$table->addRow( array( 'この文章はダミーです。文字の大きさ、量、字間、行間等を確認するために入れています。この文章はダミーです。文字の大きさ、量、字間、行間等を確認するために入れています。この文章はダミーです。文字の大きさ、', 'こんにちは' ) );
49+
$table->addRow( array( '1この文章はダミーです。文字の大きさ、量、字間、行間等を確認するために入れています。2この文章はダミーです。文字の大きさ、量、字間、行間等を確認するために入れています。', 'こんにちは' ) );
5050
$table->addRow( array( 'Lorem Ipsum is simply dummy text of the printing and typesetting industry.', 'Hello' ) );
5151

5252
$out = $table->getDisplayLines();
@@ -55,6 +55,50 @@ public function test_column_value_too_long_with_multibytes() {
5555
}
5656
}
5757

58+
public function test_column_odd_single_width_with_double_width() {
59+
60+
$dummy = new cli\Table;
61+
$renderer = new cli\Table\Ascii;
62+
63+
$strip_borders = function ( $a ) {
64+
return array_map( function ( $v ) {
65+
return substr( $v, 2, -2 );
66+
}, $a );
67+
};
68+
69+
$renderer->setWidths( array( 10 ) );
70+
71+
// 1 single-width, 6 double-width, 1 single-width, 2 double-width, 1 half-width, 2 double-width.
72+
$out = $renderer->row( array( '1あいうえおか2きくカけこ' ) );
73+
$result = $strip_borders( explode( "\n", $out ) );
74+
75+
$this->assertSame( 3, count( $result ) );
76+
$this->assertSame( '1あいうえ ', $result[0] ); // 1 single width, 4 double-width, space = 10.
77+
$this->assertSame( 'おか2きくカ', $result[1] ); // 2 double-width, 1 single-width, 2 double-width, 1 half-width = 10.
78+
$this->assertSame( 'けこ ', $result[2] ); // 2 double-width, 8 spaces = 10.
79+
80+
// Minimum width 1.
81+
82+
$renderer->setWidths( array( 1 ) );
83+
84+
$out = $renderer->row( array( '1あいうえおか2きくカけこ' ) );
85+
$result = $strip_borders( explode( "\n", $out ) );
86+
87+
$this->assertSame( 13, count( $result ) );
88+
// Uneven rows.
89+
$this->assertSame( '1', $result[0] );
90+
$this->assertSame( '', $result[1] );
91+
92+
// Zero width does no wrapping.
93+
94+
$renderer->setWidths( array( 0 ) );
95+
96+
$out = $renderer->row( array( '1あいうえおか2きくカけこ' ) );
97+
$result = $strip_borders( explode( "\n", $out ) );
98+
99+
$this->assertSame( 1, count( $result ) );
100+
}
101+
58102
public function test_ascii_pre_colorized_widths() {
59103

60104
Colors::enable( true );

0 commit comments

Comments
 (0)