Skip to content

Commit 2450987

Browse files
committed
General: Drop too-verbose protection against homograph usernames.
Props dmsnell
1 parent f9d9982 commit 2450987

File tree

3 files changed

+0
-150
lines changed

3 files changed

+0
-150
lines changed

src/wp-includes/formatting.php

Lines changed: 0 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -2128,134 +2128,6 @@ function sanitize_file_name( $filename ) {
21282128
return apply_filters( 'sanitize_file_name', $filename, $filename_raw );
21292129
}
21302130

2131-
/**
2132-
* Validates that a string contains only characters from a single unicode script.
2133-
*
2134-
* The function only considers alphabetic characters. It returns true if a string
2135-
* contains no more than one unicode script, and false if it contains two or more.
2136-
* An empty string is considered to contain no scripts, and thus returns true.
2137-
*
2138-
* IntlChar does not support returning the script property defined by
2139-
* https://www.unicode.org/reports/tr24/, so this implementation uses a workaround.
2140-
* It maps the known extension blocks ("latin extended a" etc) to the first block
2141-
* for that script, and then checks that the string uses only a single block.
2142-
*
2143-
* This works for the scripts currently in Unicode, and should continue to work for
2144-
* future scripts as long as each new script needs a single code block. While older
2145-
* scripts may have multiple blocks, the Unicode committee has grown better at
2146-
* estimating sizes high enough so that only one block is needed.
2147-
*
2148-
* @since 6.9.0
2149-
*
2150-
* @param string $input A string to check.
2151-
* @return bool True if all letters in the string belong to the same unicode
2152-
* script or if the string is empty.
2153-
* False if letters from two more more scripts are included.
2154-
*/
2155-
function uses_single_unicode_script( string $input ): bool {
2156-
if ( '' === $input ) {
2157-
return true;
2158-
}
2159-
2160-
if ( version_compare( PHP_VERSION, '7.4.0', '<' ) ) {
2161-
// Since mb_str_split is not available in PHP < 7.4 we can only check ASCII characters.
2162-
return (bool) preg_match( '/^[a-zA-Z0-9 _.\-@]+$/i', $input );
2163-
}
2164-
2165-
$block = 0;
2166-
// phpcs:ignore PHPCompatibility.FunctionUse.NewFunctions.mb_str_splitFound -- old versions of PHP are handled above
2167-
foreach ( mb_str_split( $input ) as $cp ) {
2168-
if ( IntlChar::isalpha( $cp ) ) {
2169-
$b = IntlChar::getBlockCode( $cp );
2170-
switch ( $b ) {
2171-
case IntlChar::BLOCK_CODE_LATIN_1_SUPPLEMENT:
2172-
// fall through
2173-
case IntlChar::BLOCK_CODE_LATIN_EXTENDED_A:
2174-
// fall through
2175-
case IntlChar::BLOCK_CODE_LATIN_EXTENDED_B:
2176-
case IntlChar::BLOCK_CODE_LATIN_EXTENDED_C:
2177-
case IntlChar::BLOCK_CODE_LATIN_EXTENDED_D:
2178-
case IntlChar::BLOCK_CODE_IPA_EXTENSIONS: // used in Ghana etc
2179-
case IntlChar::BLOCK_CODE_LATIN_EXTENDED_ADDITIONAL:
2180-
$b = IntlChar::BLOCK_CODE_BASIC_LATIN;
2181-
break;
2182-
case IntlChar::BLOCK_CODE_GREEK_EXTENDED:
2183-
case IntlChar::BLOCK_CODE_COPTIC:
2184-
case IntlChar::BLOCK_CODE_COPTIC_EPACT_NUMBERS:
2185-
// Greek and coptic overlap. Coptic
2186-
// looks like Greek upper case, so
2187-
// readers of Greek can read Coptic,
2188-
// but readers of Coptic can't
2189-
// necessarily read Greek. This led to
2190-
// an unfortunate situation in
2191-
// Unicode, where the two can't be
2192-
// properly distinguished by
2193-
// block. However, because of the
2194-
// overlap, this isn't really a
2195-
// problem.
2196-
$b = IntlChar::BLOCK_CODE_GREEK;
2197-
break;
2198-
case IntlChar::BLOCK_CODE_ETHIOPIC_EXTENDED:
2199-
case IntlChar::BLOCK_CODE_ETHIOPIC_EXTENDED_A:
2200-
case IntlChar::BLOCK_CODE_ETHIOPIC_SUPPLEMENT:
2201-
$b = IntlChar::BLOCK_CODE_ETHIOPIC;
2202-
break;
2203-
case IntlChar::BLOCK_CODE_ARABIC_EXTENDED_A:
2204-
case IntlChar::BLOCK_CODE_ARABIC_SUPPLEMENT:
2205-
case IntlChar::BLOCK_CODE_ARABIC_PRESENTATION_FORMS_A:
2206-
case IntlChar::BLOCK_CODE_ARABIC_PRESENTATION_FORMS_B:
2207-
case IntlChar::BLOCK_CODE_ARABIC_SUPPLEMENT:
2208-
$b = IntlChar::BLOCK_CODE_ARABIC;
2209-
break;
2210-
case IntlChar::BLOCK_CODE_CYRILLIC_EXTENDED_A:
2211-
case IntlChar::BLOCK_CODE_CYRILLIC_EXTENDED_B:
2212-
$b = IntlChar::BLOCK_CODE_CYRILLIC;
2213-
break;
2214-
case IntlChar::BLOCK_CODE_BOPOMOFO_EXTENDED:
2215-
$b = IntlChar::BLOCK_CODE_BOPOMOFO;
2216-
break;
2217-
case IntlChar::BLOCK_CODE_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED:
2218-
$b = IntlChar::BLOCK_CODE_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS;
2219-
break;
2220-
case IntlChar::BLOCK_CODE_DEVANAGARI_EXTENDED:
2221-
$b = IntlChar::BLOCK_CODE_DEVANAGARI;
2222-
break;
2223-
case IntlChar::BLOCK_CODE_HANGUL_COMPATIBILITY_JAMO:
2224-
case IntlChar::BLOCK_CODE_HANGUL_JAMO_EXTENDED_A:
2225-
case IntlChar::BLOCK_CODE_HANGUL_JAMO_EXTENDED_B:
2226-
case IntlChar::BLOCK_CODE_HANGUL_SYLLABLES:
2227-
$b = IntlChar::BLOCK_CODE_HANGUL_JAMO;
2228-
break;
2229-
case IntlChar::BLOCK_CODE_MYANMAR_EXTENDED_A:
2230-
case IntlChar::BLOCK_CODE_MYANMAR_EXTENDED_B:
2231-
$b = IntlChar::BLOCK_CODE_MYANMAR;
2232-
break;
2233-
case IntlChar::BLOCK_CODE_CJK_STROKES:
2234-
case IntlChar::BLOCK_CODE_CJK_UNIFIED_IDEOGRAPHS:
2235-
case IntlChar::BLOCK_CODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A:
2236-
case IntlChar::BLOCK_CODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B:
2237-
case IntlChar::BLOCK_CODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C:
2238-
case IntlChar::BLOCK_CODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D:
2239-
case IntlChar::BLOCK_CODE_CJK_COMPATIBILITY_IDEOGRAPHS:
2240-
case IntlChar::BLOCK_CODE_CJK_RADICALS_SUPPLEMENT:
2241-
case IntlChar::BLOCK_CODE_ENCLOSED_CJK_LETTERS_AND_MONTHS:
2242-
case IntlChar::BLOCK_CODE_CJK_COMPATIBILITY_FORMS:
2243-
case IntlChar::BLOCK_CODE_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT:
2244-
$b = IntlChar::BLOCK_CODE_CJK_UNIFIED_IDEOGRAPHS;
2245-
break;
2246-
}
2247-
if ( 0 === $block ) {
2248-
$block = $b;
2249-
}
2250-
if ( $block !== $b ) {
2251-
return false;
2252-
}
2253-
}
2254-
}
2255-
2256-
return true;
2257-
}
2258-
22592131
/**
22602132
* Sanitizes a username, stripping out unsafe characters.
22612133
*
@@ -2279,11 +2151,6 @@ function sanitize_user( $username, $strict = false ) {
22792151
// Remove HTML entities.
22802152
$username = preg_replace( '/&.+?;/', '', $username );
22812153

2282-
// If mixing different scripts, remove all but ASCII.
2283-
if ( ! uses_single_unicode_script( $username ) ) {
2284-
$username = preg_replace( '|[^a-z0-9 _.\-@]|i', '', $username );
2285-
}
2286-
22872154
// If strict, remove reduce to letters and numbers.
22882155
if ( $strict ) {
22892156
$username = preg_replace( '|[^a-z0-9 _.\-@\p{L}\p{N}]|iu', '', $username );

tests/phpunit/tests/formatting/sanitizeUser.php

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -89,19 +89,4 @@ public function test_accepts_west_african_latin() {
8989
$this->assertSame( $expected, sanitize_user( $expected ) );
9090
$this->assertSame( $expected, sanitize_user( $encoded ) );
9191
}
92-
93-
/*
94-
* Some people are worried about using letters that look alike
95-
* from different alphabets, for example the Cyrillic V looks
96-
* exactly like the Latin B. If any user names use confusable
97-
* letters like that pair, people are sure to have trouble
98-
* logging in, so we try to prevent people from painting
99-
* themselves into that corner.
100-
*
101-
* @ticket 31992
102-
*/
103-
104-
public function test_blocks_latin_cyrillic_mixed_name() {
105-
$this->assertSame( 'arn', sanitize_user( 'arn%D1%82' ) );
106-
}
10792
}

tests/phpunit/tests/user.php

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1118,8 +1118,6 @@ public function test_validate_utf8_usernames() {
11181118
}
11191119
/* WordPress approves of drab grey (grå) Norwegian weather */
11201120
$this->assertTrue( validate_username( 'grå' ) );
1121-
/* Latin I, Cyrillic V like latin B, Latin M */
1122-
$this->assertFalse( validate_username( 'IВM' ) );
11231121
/* Three Cyrillic letters */
11241122
$this->assertTrue( validate_username( 'ІВМ' ) );
11251123
/* A metal umlaut fails because validate_username is

0 commit comments

Comments
 (0)