Skip to content

Commit 7466f68

Browse files
committed
Copy core _mb_substr. Add encoding arg. Add keep arg to decolorize.
1 parent 62fdd75 commit 7466f68

File tree

5 files changed

+226
-103
lines changed

5 files changed

+226
-103
lines changed

lib/cli/Colors.php

Lines changed: 30 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -115,26 +115,25 @@ static public function color($color) {
115115
static public function colorize($string, $colored = null) {
116116
$passed = $string;
117117

118-
if (isset(self::$_string_cache[md5($passed)]['colorized'])) {
119-
return self::$_string_cache[md5($passed)]['colorized'];
120-
}
121-
122118
if (!self::shouldColorize($colored)) {
123-
$colors = self::getColors();
124-
$search = array_keys( $colors );
125-
$return = str_replace( $search, '', $string );
126-
self::cacheString($passed, $return, $colored);
119+
$return = self::decolorize( $passed, 2 /*keep_encodings*/ );
120+
self::cacheString($passed, $return);
127121
return $return;
128122
}
129123

124+
$md5 = md5($passed);
125+
if (isset(self::$_string_cache[$md5]['colorized'])) {
126+
return self::$_string_cache[$md5]['colorized'];
127+
}
128+
130129
$string = str_replace('%%', '', $string);
131130

132131
foreach (self::getColors() as $key => $value) {
133132
$string = str_replace($key, self::color($value), $string);
134133
}
135134

136135
$string = str_replace('', '%', $string);
137-
self::cacheString($passed, $string, $colored);
136+
self::cacheString($passed, $string);
138137

139138
return $string;
140139
}
@@ -143,20 +142,22 @@ static public function colorize($string, $colored = null) {
143142
* Remove color information from a string.
144143
*
145144
* @param string $string A string with color information.
146-
* @param bool $keep_tokens Optional. If set, color tokens (eg "%n") won't be stripped. Default false.
145+
* @param int $keep Optional. If the 1 bit is set, color tokens (eg "%n") won't be stripped. If the 2 bit is set, color encodings (ANSI escapes) won't be stripped. Default 0.
147146
* @return string A string with color information removed.
148147
*/
149-
static public function decolorize( $string, $keep_tokens = false ) {
150-
if ( ! $keep_tokens ) {
148+
static public function decolorize( $string, $keep = 0 ) {
149+
if ( ! ( $keep & 1 ) ) {
151150
// Get rid of color tokens if they exist
152151
$string = str_replace('%%', '', $string);
153152
$string = str_replace(array_keys(self::getColors()), '', $string);
154153
$string = str_replace('', '%', $string);
155154
}
156155

157-
// Remove color encoding if it exists
158-
foreach (self::getColors() as $key => $value) {
159-
$string = str_replace(self::color($value), '', $string);
156+
if ( ! ( $keep & 2 ) ) {
157+
// Remove color encoding if it exists
158+
foreach (self::getColors() as $key => $value) {
159+
$string = str_replace(self::color($value), '', $string);
160+
}
160161
}
161162

162163
return $string;
@@ -167,13 +168,13 @@ static public function decolorize( $string, $keep_tokens = false ) {
167168
*
168169
* @param string $passed The original string before colorization.
169170
* @param string $colorized The string after running through self::colorize.
170-
* @param string $colored The string without any color information.
171+
* @param string $deprecated Optional. Not used. Default null.
171172
*/
172-
static public function cacheString($passed, $colorized, $colored) {
173+
static public function cacheString( $passed, $colorized, $deprecated = null ) {
173174
self::$_string_cache[md5($passed)] = array(
174175
'passed' => $passed,
175176
'colorized' => $colorized,
176-
'decolorized' => self::decolorize($passed)
177+
'decolorized' => self::decolorize($passed), // Not very useful but keep for BC.
177178
);
178179
}
179180

@@ -190,24 +191,26 @@ static public function length($string) {
190191
/**
191192
* Return the width (length in characters) of the string without color codes if enabled.
192193
*
193-
* @param string $string The string to measure.
194-
* @param bool $pre_colorized Optional. Set if the string is pre-colorized. Default false.
194+
* @param string $string The string to measure.
195+
* @param bool $pre_colorized Optional. Set if the string is pre-colorized. Default false.
196+
* @param string|bool $encoding Optional. The encoding of the string. Default false.
195197
* @return int
196198
*/
197-
static public function width( $string, $pre_colorized = false ) {
198-
return strwidth( $pre_colorized || self::shouldColorize() ? self::decolorize( $string, $pre_colorized /*keep_tokens*/ ) : $string );
199+
static public function width( $string, $pre_colorized = false, $encoding = false ) {
200+
return strwidth( $pre_colorized || self::shouldColorize() ? self::decolorize( $string, $pre_colorized ? 1 /*keep_tokens*/ : 0 ) : $string, $encoding );
199201
}
200202

201203
/**
202204
* Pad the string to a certain display length.
203205
*
204-
* @param string $string The string to pad.
205-
* @param int $length The display length.
206-
* @param bool $pre_colorized Optional. Set if the string is pre-colorized. Default false.
206+
* @param string $string The string to pad.
207+
* @param int $length The display length.
208+
* @param bool $pre_colorized Optional. Set if the string is pre-colorized. Default false.
209+
* @param string|bool $encoding Optional. The encoding of the string. Default false.
207210
* @return string
208211
*/
209-
static public function pad( $string, $length, $pre_colorized = false ) {
210-
$real_length = self::width( $string, $pre_colorized );
212+
static public function pad( $string, $length, $pre_colorized = false, $encoding = false ) {
213+
$real_length = self::width( $string, $pre_colorized, $encoding );
211214
$diff = strlen( $string ) - $real_length;
212215
$length += $diff;
213216

lib/cli/cli.php

Lines changed: 48 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -156,15 +156,19 @@ function menu( $items, $default = null, $title = 'Choose an item' ) {
156156
* Attempts an encoding-safe way of getting string length. If mb_string extensions aren't
157157
* installed, falls back to basic strlen if no encoding is present
158158
*
159-
* @param string The string to check
160-
* @return int Numeric value that represents the string's length
159+
* @param string $str The string to check.
160+
* @param string|bool $encoding Optional. The encoding of the string. Default false.
161+
* @return int Numeric value that represents the string's length
161162
*/
162-
function safe_strlen( $str ) {
163-
if ( function_exists( 'mb_strlen' ) && function_exists( 'mb_detect_encoding' ) ) {
164-
$length = mb_strlen( $str, mb_detect_encoding( $str ) );
163+
function safe_strlen( $str, $encoding = false ) {
164+
if ( function_exists( 'mb_strlen' ) && ( $encoding || function_exists( 'mb_detect_encoding' ) ) ) {
165+
if ( ! $encoding ) {
166+
$encoding = mb_detect_encoding( $str, null, true /*strict*/ );
167+
}
168+
$length = mb_strlen( $str, $encoding );
165169
} else {
166170
// iconv will return PHP notice if non-ascii characters are present in input string
167-
$str = iconv( 'ASCII' , 'ASCII', $str );
171+
$str = iconv( $encoding ? $encoding : 'ASCII', 'ASCII', $str );
168172

169173
$length = strlen( $str );
170174
}
@@ -176,36 +180,43 @@ function safe_strlen( $str ) {
176180
* Attempts an encoding-safe way of getting a substring. If mb_string extensions aren't
177181
* installed, falls back to ascii substring if no encoding is present
178182
*
179-
* @param string $str The input string.
180-
* @param int $start The starting position of the substring.
181-
* @param int|boolean $length Optional. Maximum length of the substring. Default false but should set to null for `substr()` compat behavior.
182-
* @param boolean $width Optional. If set and encoding is UTF-8, $length is interpreted as spacing width. Default false.
183-
* @return string Substring of string specified by start and length parameters
183+
* @param string $str The input string.
184+
* @param int $start The starting position of the substring.
185+
* @param int|bool|null $length Optional. Maximum length of the substring. Default false.
186+
* @param int|bool $is_width Optional. If set and encoding is UTF-8, $length is interpreted as spacing width. Default false.
187+
* @param string|bool $encoding Optional. The encoding of the string. Default false.
188+
* @return string Substring of string specified by start and length parameters
184189
*/
185-
function safe_substr( $str, $start, $length = false, $width = false ) {
186-
// PHP 5.3 substr takes false as full length, PHP > 5.3 takes null - for compat. do strlen.
190+
function safe_substr( $str, $start, $length = false, $is_width = false, $encoding = false ) {
191+
// PHP 5.3 substr takes false as full length, PHP > 5.3 takes null - for compat. do `safe_strlen()`.
187192
if ( null === $length || false === $length ) {
188-
$length = safe_strlen( $str );
193+
$length = safe_strlen( $str, $encoding );
189194
}
190-
if ( function_exists( 'mb_substr' ) && function_exists( 'mb_detect_encoding' ) ) {
191-
$encoding = mb_detect_encoding( $str );
192-
if ( false !== $width && 'UTF-8' === $encoding ) {
195+
if ( function_exists( 'mb_substr' ) && ( $encoding || function_exists( 'mb_detect_encoding' ) ) ) {
196+
if ( ! $encoding ) {
197+
$encoding = mb_detect_encoding( $str, null, true /*strict*/ );
198+
}
199+
$substr = mb_substr( $str, $start, $length, $encoding );
200+
201+
if ( $is_width && 'UTF-8' === $encoding ) {
193202
// Set the East Asian Width regex.
194203
$eaw_regex = get_unicode_regexs( 'eaw' );
195-
if ( preg_match( $eaw_regex, $str ) ) {
196-
$cnt = preg_match_all( '/[\x00-\x7f\xc2-\xf4][^\x00-\x7f\xc2-\xf4]*/', $str, $matches );
197-
$chrs = $matches[0];
204+
// If there's any East Asian double-width chars...
205+
if ( preg_match( $eaw_regex, $substr ) ) {
206+
// Explode string into an array of UTF-8 chars. Based on core `_mb_substr()` in "wp-includes/compat.php".
207+
$chars = preg_split( '/([\x00-\x7f\xc2-\xf4][^\x00-\x7f\xc2-\xf4]*)/', $substr, $length + 1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
208+
$cnt = min( count( $chars ), $length );
198209
$width = $length;
199210

200211
for ( $length = 0; $length < $cnt && $width > 0; $length++ ) {
201-
$width -= preg_match( $eaw_regex, $chrs[ $length ] ) ? 2 : 1;
212+
$width -= preg_match( $eaw_regex, $chars[ $length ] ) ? 2 : 1;
202213
}
214+
return join( '', array_slice( $chars, 0, $length ) );
203215
}
204216
}
205-
$substr = mb_substr( $str, $start, $length, $encoding );
206217
} else {
207218
// iconv will return PHP notice if non-ascii characters are present in input string
208-
$str = iconv( 'ASCII' , 'ASCII', $str );
219+
$str = iconv( $encoding ? $encoding : 'ASCII', 'ASCII', $str );
209220

210221
$substr = substr( $str, $start, $length );
211222
}
@@ -216,12 +227,13 @@ function safe_substr( $str, $start, $length = false, $width = false ) {
216227
/**
217228
* An encoding-safe way of padding string length for display
218229
*
219-
* @param string $string The string to pad
220-
* @param int $length The length to pad it to
230+
* @param string $string The string to pad.
231+
* @param int $length The length to pad it to.
232+
* @param string|bool $encoding Optional. The encoding of the string. Default false.
221233
* @return string
222234
*/
223-
function safe_str_pad( $string, $length ) {
224-
$real_length = strwidth( $string );
235+
function safe_str_pad( $string, $length, $encoding = false ) {
236+
$real_length = strwidth( $string, $encoding );
225237
$diff = strlen( $string ) - $real_length;
226238
$length += $diff;
227239

@@ -231,10 +243,11 @@ function safe_str_pad( $string, $length ) {
231243
/**
232244
* Get width of string, ie length in characters, taking into account multi-byte and mark characters for UTF-8, and multi-byte for non-UTF-8.
233245
*
234-
* @param string The string to check
235-
* @return int The string's width.
246+
* @param string $string The string to check.
247+
* @param string|bool $encoding Optional. The encoding of the string. Default false.
248+
* @return int The string's width.
236249
*/
237-
function strwidth( $string ) {
250+
function strwidth( $string, $encoding = false ) {
238251
// Set the East Asian Width and Mark regexs.
239252
list( $eaw_regex, $m_regex ) = get_unicode_regexs();
240253

@@ -253,8 +266,10 @@ function strwidth( $string ) {
253266
return $width + preg_match_all( $eaw_regex, $string, $dummy /*needed for PHP 5.3*/ );
254267
}
255268
}
256-
if ( function_exists( 'mb_strwidth' ) && function_exists( 'mb_detect_encoding' ) ) {
257-
$encoding = mb_detect_encoding( $string, null, true /*strict*/ );
269+
if ( function_exists( 'mb_strwidth' ) && ( $encoding || function_exists( 'mb_detect_encoding' ) ) ) {
270+
if ( ! $encoding ) {
271+
$encoding = mb_detect_encoding( $string, null, true /*strict*/ );
272+
}
258273
$width = mb_strwidth( $string, $encoding );
259274
if ( 'UTF-8' === $encoding ) {
260275
// Subtract combining characters.
@@ -271,7 +286,7 @@ function strwidth( $string ) {
271286
* Get the regexs generated from Unicode data.
272287
*
273288
* @param string $idx Optional. Return a specific regex only. Default null.
274-
* @return array|string Returns keyed array if not given $idx or $idx doesn't exist, otherwise the specific regex string.
289+
* @return array|string Returns keyed array if not given $idx or $idx doesn't exist, otherwise the specific regex string.
275290
*/
276291
function get_unicode_regexs( $idx = null ) {
277292
static $eaw_regex; // East Asian Width regex. Characters that count as 2 characters as they're "wide" or "fullwidth". See http://www.unicode.org/reports/tr11/tr11-19.html

lib/cli/table/Ascii.php

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -134,17 +134,18 @@ public function row( array $row ) {
134134
$value = str_replace( PHP_EOL, ' ', $value );
135135

136136
$col_width = $this->_widths[ $col ];
137-
$original_val_width = Colors::width( $value, self::isPreColorized( $col ) );
137+
$encoding = function_exists( 'mb_detect_encoding' ) ? mb_detect_encoding( $value, null, true /*strict*/ ) : false;
138+
$original_val_width = Colors::width( $value, self::isPreColorized( $col ), $encoding );
138139
if ( $original_val_width > $col_width ) {
139-
$row[ $col ] = \cli\safe_substr( $value, 0, $col_width, true /*width*/ );
140-
$value = \cli\safe_substr( $value, \cli\safe_strlen( $row[ $col ] ), null );
140+
$row[ $col ] = \cli\safe_substr( $value, 0, $col_width, true /*is_width*/, $encoding );
141+
$value = \cli\safe_substr( $value, \cli\safe_strlen( $row[ $col ], $encoding ), null /*length*/, false /*is_width*/, $encoding );
141142
$i = 0;
142143
do {
143-
$extra_value = \cli\safe_substr( $value, 0, $col_width, true /*width*/ );
144-
$val_width = Colors::width( $extra_value, self::isPreColorized( $col ) );
144+
$extra_value = \cli\safe_substr( $value, 0, $col_width, true /*is_width*/, $encoding );
145+
$val_width = Colors::width( $extra_value, self::isPreColorized( $col ), $encoding );
145146
if ( $val_width ) {
146147
$extra_rows[ $col ][] = $extra_value;
147-
$value = \cli\safe_substr( $value, \cli\safe_strlen( $extra_value ), null );
148+
$value = \cli\safe_substr( $value, \cli\safe_strlen( $extra_value, $encoding ), null /*length*/, false /*is_width*/, $encoding );
148149
$i++;
149150
if ( $i > $extra_row_count ) {
150151
$extra_row_count = $i;

0 commit comments

Comments
 (0)