Skip to content

Commit 998c886

Browse files
committed
General: Restrict UTF8 username/email support to when the database supports UTF8
Props dmsnell.
1 parent b20658d commit 998c886

File tree

3 files changed

+38
-0
lines changed

3 files changed

+38
-0
lines changed

src/wp-includes/default-filters.php

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,18 @@
8787
add_filter( $filter, 'wp_filter_kses' );
8888
}
8989

90+
// Email addresses: Allow so long as the database can store them. This
91+
// affects all addresses, including those entered into contact forms.
92+
if ( 'utf8mb4' !== $wpdb->charset ) {
93+
add_filter( 'sanitize_email', 'wp_ascii_without_controls' );
94+
}
95+
96+
// Usernames: Allow if the database can store them. This might be a
97+
// setting instead, so that a site can restrict its own users to ASCII.
98+
if ( 'utf8mb4' !== $wpdb->charset ) {
99+
add_filter( 'sanitize_user', 'wp_ascii_without_controls' );
100+
}
101+
90102
// Display URL.
91103
foreach ( array( 'user_url', 'link_url', 'link_image', 'link_rss', 'comment_url', 'post_guid' ) as $filter ) {
92104
if ( is_admin() ) {

src/wp-includes/formatting.php

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2172,6 +2172,20 @@ function sanitize_user( $username, $strict = false ) {
21722172
return apply_filters( 'sanitize_user', $username, $raw_username, $strict );
21732173
}
21742174

2175+
2176+
/**
2177+
* Returns a string with all controls and all non-ASCII bytes removed.
2178+
*
2179+
* @since 7.0.0
2180+
*
2181+
* @param string $input The string to be sanitized.
2182+
* @return string The modified string.
2183+
*/
2184+
function wp_ascii_without_controls( $input ) {
2185+
return preg_replace( '/[\x00-\x19\x7F-\xFF]/', '', $input );
2186+
}
2187+
2188+
21752189
/**
21762190
* Sanitizes a string key.
21772191
*

tests/phpunit/tests/formatting/sanitizeUser.php

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,4 +94,16 @@ public function test_accepts_west_african_latin() {
9494
$this->assertSame( $expected, sanitize_user( $expected ) );
9595
$this->assertSame( $expected, sanitize_user( $encoded ) );
9696
}
97+
98+
/**
99+
* If the database doesn't use UTF8, WP will add a filter to
100+
* sanitize_users to prevent creation/use of non-ASCII user
101+
* names. This test tests that that filter works.
102+
*
103+
* @ticket 31992
104+
*/
105+
public function test_reduction_to_ascii_for_non_utf8_databases() {
106+
add_filter( 'sanitize_user', 'wp_ascii_without_controls' );
107+
$this->assertSame( 'tnatn', sanitize_user( 'tɔnatɔn' ) );
108+
}
97109
}

0 commit comments

Comments
 (0)