Skip to content

Commit 62fdd75

Browse files
committed
Refactor the unicode regexs into a function.
1 parent 79a75c9 commit 62fdd75

File tree

1 file changed

+32
-12
lines changed

1 file changed

+32
-12
lines changed

lib/cli/cli.php

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -190,17 +190,15 @@ function safe_substr( $str, $start, $length = false, $width = false ) {
190190
if ( function_exists( 'mb_substr' ) && function_exists( 'mb_detect_encoding' ) ) {
191191
$encoding = mb_detect_encoding( $str );
192192
if ( false !== $width && 'UTF-8' === $encoding ) {
193-
static $eaw_regex; // East Asian Width regex. Characters that count as 2 characters as they're "wide" or "fullwidth". See http://www.unicode.org/reports/tr11/tr11-19.html
194-
if ( null === $eaw_regex ) {
195-
// Load both regexs generated from Unicode data.
196-
require __DIR__ . '/unicode/regex.php';
197-
}
193+
// Set the East Asian Width regex.
194+
$eaw_regex = get_unicode_regexs( 'eaw' );
198195
if ( preg_match( $eaw_regex, $str ) ) {
199196
$cnt = preg_match_all( '/[\x00-\x7f\xc2-\xf4][^\x00-\x7f\xc2-\xf4]*/', $str, $matches );
197+
$chrs = $matches[0];
200198
$width = $length;
201199

202200
for ( $length = 0; $length < $cnt && $width > 0; $length++ ) {
203-
$width -= preg_match( $eaw_regex, $matches[0][ $length ] ) ? 2 : 1;
201+
$width -= preg_match( $eaw_regex, $chrs[ $length ] ) ? 2 : 1;
204202
}
205203
}
206204
}
@@ -237,12 +235,8 @@ function safe_str_pad( $string, $length ) {
237235
* @return int The string's width.
238236
*/
239237
function strwidth( $string ) {
240-
static $eaw_regex; // East Asian Width regex. Characters that count as 2 characters as they're "wide" or "fullwidth". See http://www.unicode.org/reports/tr11/tr11-19.html
241-
static $m_regex; // Mark characters regex (Unicode property "M") - mark combining "Mc", mark enclosing "Me" and mark non-spacing "Mn" chars that should be ignored for spacing purposes.
242-
if ( null === $eaw_regex ) {
243-
// Load both regexs generated from Unicode data.
244-
require __DIR__ . '/unicode/regex.php';
245-
}
238+
// Set the East Asian Width and Mark regexs.
239+
list( $eaw_regex, $m_regex ) = get_unicode_regexs();
246240

247241
// Allow for selective testings - "1" bit set tests grapheme_strlen(), "2" preg_match_all( '/\X/u' ), "4" mb_strwidth(), "other" safe_strlen().
248242
$test_strwidth = getenv( 'PHP_CLI_TOOLS_TEST_STRWIDTH' );
@@ -272,3 +266,29 @@ function strwidth( $string ) {
272266
}
273267
return safe_strlen( $string );
274268
}
269+
270+
/**
271+
* Get the regexs generated from Unicode data.
272+
*
273+
* @param string $idx Optional. Return a specific regex only. Default null.
274+
* @return array|string Returns keyed array if not given $idx or $idx doesn't exist, otherwise the specific regex string.
275+
*/
276+
function get_unicode_regexs( $idx = null ) {
277+
static $eaw_regex; // East Asian Width regex. Characters that count as 2 characters as they're "wide" or "fullwidth". See http://www.unicode.org/reports/tr11/tr11-19.html
278+
static $m_regex; // Mark characters regex (Unicode property "M") - mark combining "Mc", mark enclosing "Me" and mark non-spacing "Mn" chars that should be ignored for spacing purposes.
279+
if ( null === $eaw_regex ) {
280+
// Load both regexs generated from Unicode data.
281+
require __DIR__ . '/unicode/regex.php';
282+
}
283+
284+
if ( null !== $idx ) {
285+
if ( 'eaw' === $idx ) {
286+
return $eaw_regex;
287+
}
288+
if ( 'm' === $idx ) {
289+
return $m_regex;
290+
}
291+
}
292+
293+
return array( $eaw_regex, $m_regex, );
294+
}

0 commit comments

Comments
 (0)