Skip to content

Commit 57995a2

Browse files
committed
Database: Split the logic of wpdb::init_charset() into a separate method.
The logic for determining the appropriate character set and collation to use is becoming more complex, particularly with the recent additions of [37522] and [37523]. As `init_charset()` has side effects, and makes use of constants instead of parameters, it's not possible to unit test this logic. This commit splits the logic part of `init_charset()` out into a new method, `wpdb::determine_charset()`, along with appropriate unit tests. See #32105, #37522. Fixes #36917. git-svn-id: https://develop.svn.wordpress.org/trunk@37601 602fd350-edb4-49c9-b593-d223f7449a82
1 parent 67f34fb commit 57995a2

File tree

2 files changed

+109
-14
lines changed

2 files changed

+109
-14
lines changed

src/wp-includes/wp-db.php

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -735,41 +735,62 @@ public function __unset( $name ) {
735735
*/
736736
public function init_charset() {
737737
if ( function_exists('is_multisite') && is_multisite() ) {
738-
$this->charset = 'utf8';
738+
$charset = 'utf8';
739739
if ( defined( 'DB_COLLATE' ) && DB_COLLATE ) {
740-
$this->collate = DB_COLLATE;
740+
$collate = DB_COLLATE;
741741
} else {
742-
$this->collate = 'utf8_general_ci';
742+
$collate = 'utf8_general_ci';
743743
}
744744
} elseif ( defined( 'DB_COLLATE' ) ) {
745-
$this->collate = DB_COLLATE;
745+
$collate = DB_COLLATE;
746746
}
747747

748748
if ( defined( 'DB_CHARSET' ) ) {
749-
$this->charset = DB_CHARSET;
749+
$charset = DB_CHARSET;
750750
}
751751

752+
$charset_collate = $this->determine_charset( $charset, $collate );
753+
754+
$this->charset = $charset_collate['charset'];
755+
$this->collate = $charset_collate['collate'];
756+
}
757+
758+
/**
759+
* Given a charset and collation, determine the best charset and collation to use.
760+
*
761+
* For example, when able, utf8mb4 should be used instead of utf8.
762+
*
763+
* @since 4.6.0
764+
*
765+
* @param string $charset The character set to check.
766+
* @param string $collate The collation to check.
767+
*
768+
* @return array The most appropriate character set and collation to use.
769+
*/
770+
public function determine_charset( $charset, $collate ) {
752771
if ( ( $this->use_mysqli && ! ( $this->dbh instanceof mysqli ) ) || empty( $this->dbh ) ) {
753-
return;
772+
return compact( 'charset', 'collate' );
754773
}
755774

756-
if ( 'utf8' === $this->charset && $this->has_cap( 'utf8mb4' ) ) {
757-
$this->charset = 'utf8mb4';
775+
if ( 'utf8' === $charset && $this->has_cap( 'utf8mb4' ) ) {
776+
$charset = 'utf8mb4';
758777
}
759778

760-
if ( 'utf8mb4' === $this->charset ) {
779+
if ( 'utf8mb4' === $charset ) {
761780
// _general_ is outdated, so we can upgrade it to _unicode_, instead.
762-
if ( ! $this->collate || 'utf8_general_ci' === $this->collate ) {
763-
$this->collate = 'utf8mb4_unicode_ci';
781+
if ( ! $collate || 'utf8_general_ci' === $collate ) {
782+
$collate = 'utf8mb4_unicode_ci';
764783
} else {
765-
$this->collate = str_replace( 'utf8_', 'utf8mb4_', $this->collate );
784+
$collate = str_replace( 'utf8_', 'utf8mb4_', $collate );
766785
}
767786
}
768787

769788
// _unicode_520_ is a better collation, we should use that when it's available.
770-
if ( $this->has_cap( 'utf8mb4_520' ) && 'utf8mb4_unicode_ci' === $this->collate ) {
771-
$this->collate = 'utf8mb4_unicode_520_ci';
789+
if ( $this->has_cap( 'utf8mb4_520' ) && 'utf8mb4_unicode_ci' === $collate ) {
790+
$collate = 'utf8mb4_unicode_520_ci';
772791
}
792+
793+
return compact( 'charset', 'collate' );
773794
}
774795

775796
/**

tests/phpunit/tests/db.php

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -955,4 +955,78 @@ function test_close() {
955955

956956
$wpdb->check_connection();
957957
}
958+
959+
/**
960+
* @ticket 36917
961+
*/
962+
function test_charset_not_determined_when_disconnected() {
963+
global $wpdb;
964+
965+
$charset = 'utf8';
966+
$collate = 'this_isnt_a_collation';
967+
968+
$wpdb->close();
969+
970+
$result = $wpdb->determine_charset( $charset, $collate );
971+
972+
$this->assertSame( compact( 'charset', 'collate' ), $result );
973+
974+
$wpdb->check_connection();
975+
}
976+
977+
/**
978+
* @ticket 36917
979+
*/
980+
function test_charset_switched_to_utf8mb4() {
981+
global $wpdb;
982+
983+
if ( ! $wpdb->has_cap( 'utf8mb4' ) ) {
984+
$this->markTestSkipped( 'This test requires utf8mb4 support.' );
985+
}
986+
987+
$charset = 'utf8';
988+
$collate = 'utf8_general_ci';
989+
990+
$result = $wpdb->determine_charset( $charset, $collate );
991+
992+
$this->assertSame( 'utf8mb4', $result['charset'] );
993+
}
994+
995+
/**
996+
* @ticket 32105
997+
* @ticket 36917
998+
*/
999+
function test_collate_switched_to_utf8mb4_520() {
1000+
global $wpdb;
1001+
1002+
if ( ! $wpdb->has_cap( 'utf8mb4_520' ) ) {
1003+
$this->markTestSkipped( 'This test requires utf8mb4_520 support.' );
1004+
}
1005+
1006+
$charset = 'utf8';
1007+
$collate = 'utf8_general_ci';
1008+
1009+
$result = $wpdb->determine_charset( $charset, $collate );
1010+
1011+
$this->assertSame( 'utf8mb4_unicode_520_ci', $result['collate'] );
1012+
}
1013+
1014+
/**
1015+
* @ticket 36917
1016+
* @ticket 37522
1017+
*/
1018+
function test_non_unicode_collations() {
1019+
global $wpdb;
1020+
1021+
if ( ! $wpdb->has_cap( 'utf8mb4' ) ) {
1022+
$this->markTestSkipped( 'This test requires utf8mb4 support.' );
1023+
}
1024+
1025+
$charset = 'utf8';
1026+
$collate = 'utf8_swedish_ci';
1027+
1028+
$result = $wpdb->determine_charset( $charset, $collate );
1029+
1030+
$this->assertSame( 'utf8mb4_swedish_ci', $result['collate'] );
1031+
}
9581032
}

0 commit comments

Comments
 (0)