@@ -156,15 +156,19 @@ function menu( $items, $default = null, $title = 'Choose an item' ) {
156156 * Attempts an encoding-safe way of getting string length. If mb_string extensions aren't
157157 * installed, falls back to basic strlen if no encoding is present
158158 *
159- * @param string The string to check
160- * @return int Numeric value that represents the string's length
159+ * @param string $str The string to check.
160+ * @param string|bool $encoding Optional. The encoding of the string. Default false.
161+ * @return int Numeric value that represents the string's length
161162 */
162- function safe_strlen ( $ str ) {
163- if ( function_exists ( 'mb_strlen ' ) && function_exists ( 'mb_detect_encoding ' ) ) {
164- $ length = mb_strlen ( $ str , mb_detect_encoding ( $ str ) );
163+ function safe_strlen ( $ str , $ encoding = false ) {
164+ if ( function_exists ( 'mb_strlen ' ) && ( $ encoding || function_exists ( 'mb_detect_encoding ' ) ) ) {
165+ if ( ! $ encoding ) {
166+ $ encoding = mb_detect_encoding ( $ str , null , true /*strict*/ );
167+ }
168+ $ length = mb_strlen ( $ str , $ encoding );
165169 } else {
166170 // iconv will return PHP notice if non-ascii characters are present in input string
167- $ str = iconv ( 'ASCII ' , 'ASCII ' , $ str );
171+ $ str = iconv ( $ encoding ? $ encoding : 'ASCII ' , 'ASCII ' , $ str );
168172
169173 $ length = strlen ( $ str );
170174 }
@@ -176,36 +180,43 @@ function safe_strlen( $str ) {
176180 * Attempts an encoding-safe way of getting a substring. If mb_string extensions aren't
177181 * installed, falls back to ascii substring if no encoding is present
178182 *
179- * @param string $str The input string.
180- * @param int $start The starting position of the substring.
181- * @param int|boolean $length Optional. Maximum length of the substring. Default false but should set to null for `substr()` compat behavior.
182- * @param boolean $width Optional. If set and encoding is UTF-8, $length is interpreted as spacing width. Default false.
183- * @return string Substring of string specified by start and length parameters
183+ * @param string $str The input string.
184+ * @param int $start The starting position of the substring.
185+ * @param int|bool|null $length Optional. Maximum length of the substring. Default false.
186+ * @param int|bool $is_width Optional. If set and encoding is UTF-8, $length is interpreted as spacing width. Default false.
187+ * @param string|bool $encoding Optional. The encoding of the string. Default false.
188+ * @return string Substring of string specified by start and length parameters
184189 */
185- function safe_substr ( $ str , $ start , $ length = false , $ width = false ) {
186- // PHP 5.3 substr takes false as full length, PHP > 5.3 takes null - for compat. do strlen .
190+ function safe_substr ( $ str , $ start , $ length = false , $ is_width = false , $ encoding = false ) {
191+ // PHP 5.3 substr takes false as full length, PHP > 5.3 takes null - for compat. do `safe_strlen()` .
187192 if ( null === $ length || false === $ length ) {
188- $ length = safe_strlen ( $ str );
193+ $ length = safe_strlen ( $ str, $ encoding );
189194 }
190- if ( function_exists ( 'mb_substr ' ) && function_exists ( 'mb_detect_encoding ' ) ) {
191- $ encoding = mb_detect_encoding ( $ str );
192- if ( false !== $ width && 'UTF-8 ' === $ encoding ) {
195+ if ( function_exists ( 'mb_substr ' ) && ( $ encoding || function_exists ( 'mb_detect_encoding ' ) ) ) {
196+ if ( ! $ encoding ) {
197+ $ encoding = mb_detect_encoding ( $ str , null , true /*strict*/ );
198+ }
199+ $ substr = mb_substr ( $ str , $ start , $ length , $ encoding );
200+
201+ if ( $ is_width && 'UTF-8 ' === $ encoding ) {
193202 // Set the East Asian Width regex.
194203 $ eaw_regex = get_unicode_regexs ( 'eaw ' );
195- if ( preg_match ( $ eaw_regex , $ str ) ) {
196- $ cnt = preg_match_all ( '/[\x00-\x7f\xc2-\xf4][^\x00-\x7f\xc2-\xf4]*/ ' , $ str , $ matches );
197- $ chrs = $ matches [0 ];
204+ // If there's any East Asian double-width chars...
205+ if ( preg_match ( $ eaw_regex , $ substr ) ) {
206+ // Explode string into an array of UTF-8 chars. Based on core `_mb_substr()` in "wp-includes/compat.php".
207+ $ chars = preg_split ( '/([\x00-\x7f\xc2-\xf4][^\x00-\x7f\xc2-\xf4]*)/ ' , $ substr , $ length + 1 , PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
208+ $ cnt = min ( count ( $ chars ), $ length );
198209 $ width = $ length ;
199210
200211 for ( $ length = 0 ; $ length < $ cnt && $ width > 0 ; $ length ++ ) {
201- $ width -= preg_match ( $ eaw_regex , $ chrs [ $ length ] ) ? 2 : 1 ;
212+ $ width -= preg_match ( $ eaw_regex , $ chars [ $ length ] ) ? 2 : 1 ;
202213 }
214+ return join ( '' , array_slice ( $ chars , 0 , $ length ) );
203215 }
204216 }
205- $ substr = mb_substr ( $ str , $ start , $ length , $ encoding );
206217 } else {
207218 // iconv will return PHP notice if non-ascii characters are present in input string
208- $ str = iconv ( 'ASCII ' , 'ASCII ' , $ str );
219+ $ str = iconv ( $ encoding ? $ encoding : 'ASCII ' , 'ASCII ' , $ str );
209220
210221 $ substr = substr ( $ str , $ start , $ length );
211222 }
@@ -216,12 +227,13 @@ function safe_substr( $str, $start, $length = false, $width = false ) {
216227/**
217228 * An encoding-safe way of padding string length for display
218229 *
219- * @param string $string The string to pad
220- * @param int $length The length to pad it to
230+ * @param string $string The string to pad.
231+ * @param int $length The length to pad it to.
232+ * @param string|bool $encoding Optional. The encoding of the string. Default false.
221233 * @return string
222234 */
223- function safe_str_pad ( $ string , $ length ) {
224- $ real_length = strwidth ( $ string );
235+ function safe_str_pad ( $ string , $ length, $ encoding = false ) {
236+ $ real_length = strwidth ( $ string, $ encoding );
225237 $ diff = strlen ( $ string ) - $ real_length ;
226238 $ length += $ diff ;
227239
@@ -231,10 +243,11 @@ function safe_str_pad( $string, $length ) {
231243/**
232244 * Get width of string, ie length in characters, taking into account multi-byte and mark characters for UTF-8, and multi-byte for non-UTF-8.
233245 *
234- * @param string The string to check
235- * @return int The string's width.
246+ * @param string $string The string to check.
247+ * @param string|bool $encoding Optional. The encoding of the string. Default false.
248+ * @return int The string's width.
236249 */
237- function strwidth ( $ string ) {
250+ function strwidth ( $ string, $ encoding = false ) {
238251 // Set the East Asian Width and Mark regexs.
239252 list ( $ eaw_regex , $ m_regex ) = get_unicode_regexs ();
240253
@@ -253,8 +266,10 @@ function strwidth( $string ) {
253266 return $ width + preg_match_all ( $ eaw_regex , $ string , $ dummy /*needed for PHP 5.3*/ );
254267 }
255268 }
256- if ( function_exists ( 'mb_strwidth ' ) && function_exists ( 'mb_detect_encoding ' ) ) {
257- $ encoding = mb_detect_encoding ( $ string , null , true /*strict*/ );
269+ if ( function_exists ( 'mb_strwidth ' ) && ( $ encoding || function_exists ( 'mb_detect_encoding ' ) ) ) {
270+ if ( ! $ encoding ) {
271+ $ encoding = mb_detect_encoding ( $ string , null , true /*strict*/ );
272+ }
258273 $ width = mb_strwidth ( $ string , $ encoding );
259274 if ( 'UTF-8 ' === $ encoding ) {
260275 // Subtract combining characters.
@@ -271,7 +286,7 @@ function strwidth( $string ) {
271286 * Get the regexs generated from Unicode data.
272287 *
273288 * @param string $idx Optional. Return a specific regex only. Default null.
274- * @return array|string Returns keyed array if not given $idx or $idx doesn't exist, otherwise the specific regex string.
289+ * @return array|string Returns keyed array if not given $idx or $idx doesn't exist, otherwise the specific regex string.
275290 */
276291function get_unicode_regexs ( $ idx = null ) {
277292 static $ eaw_regex ; // East Asian Width regex. Characters that count as 2 characters as they're "wide" or "fullwidth". See http://www.unicode.org/reports/tr11/tr11-19.html
0 commit comments