Skip to content

Commit 0711d62

Browse files
emontnemerybdraco
andauthored
Change collation to utf8mb4_bin for MySQL and MariaDB databases (home-assistant#156297)
Co-authored-by: J. Nick Koston <[email protected]>
1 parent f70aeaf commit 0711d62

File tree

7 files changed

+53
-24
lines changed

7 files changed

+53
-24
lines changed

homeassistant/components/recorder/auto_repairs/schema.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from sqlalchemy.orm.attributes import InstrumentedAttribute
1313

1414
from ..const import SupportedDialect
15-
from ..db_schema import DOUBLE_PRECISION_TYPE_SQL, DOUBLE_TYPE
15+
from ..db_schema import DOUBLE_PRECISION_TYPE_SQL, DOUBLE_TYPE, MYSQL_COLLATE
1616
from ..util import session_scope
1717

1818
if TYPE_CHECKING:
@@ -105,12 +105,13 @@ def _validate_table_schema_has_correct_collation(
105105
or dialect_kwargs.get("mariadb_collate")
106106
or connection.dialect._fetch_setting(connection, "collation_server") # type: ignore[attr-defined] # noqa: SLF001
107107
)
108-
if collate and collate != "utf8mb4_unicode_ci":
108+
if collate and collate != MYSQL_COLLATE:
109109
_LOGGER.debug(
110-
"Database %s collation is not utf8mb4_unicode_ci",
110+
"Database %s collation is not %s",
111111
table,
112+
MYSQL_COLLATE,
112113
)
113-
schema_errors.add(f"{table}.utf8mb4_unicode_ci")
114+
schema_errors.add(f"{table}.{MYSQL_COLLATE}")
114115
return schema_errors
115116

116117

@@ -240,7 +241,7 @@ def correct_db_schema_utf8(
240241
table_name = table_object.__tablename__
241242
if (
242243
f"{table_name}.4-byte UTF-8" in schema_errors
243-
or f"{table_name}.utf8mb4_unicode_ci" in schema_errors
244+
or f"{table_name}.{MYSQL_COLLATE}" in schema_errors
244245
):
245246
from ..migration import ( # noqa: PLC0415
246247
_correct_table_character_set_and_collation,

homeassistant/components/recorder/db_schema.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ class LegacyBase(DeclarativeBase):
7171
"""Base class for tables, used for schema migration."""
7272

7373

74-
SCHEMA_VERSION = 52
74+
SCHEMA_VERSION = 53
7575

7676
_LOGGER = logging.getLogger(__name__)
7777

@@ -128,7 +128,7 @@ class LegacyBase(DeclarativeBase):
128128
LEGACY_MAX_LENGTH_EVENT_CONTEXT_ID: Final = 36
129129
CONTEXT_ID_BIN_MAX_LENGTH = 16
130130

131-
MYSQL_COLLATE = "utf8mb4_unicode_ci"
131+
MYSQL_COLLATE = "utf8mb4_bin"
132132
MYSQL_DEFAULT_CHARSET = "utf8mb4"
133133
MYSQL_ENGINE = "InnoDB"
134134

homeassistant/components/recorder/migration.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1361,7 +1361,7 @@ def _apply_update(self) -> None:
13611361
class _SchemaVersion21Migrator(_SchemaVersionMigrator, target_version=21):
13621362
def _apply_update(self) -> None:
13631363
"""Version specific update method."""
1364-
# Try to change the character set of the statistic_meta table
1364+
# Try to change the character set of events, states and statistics_meta tables
13651365
if self.engine.dialect.name == SupportedDialect.MYSQL:
13661366
for table in ("events", "states", "statistics_meta"):
13671367
_correct_table_character_set_and_collation(table, self.session_maker)
@@ -2125,6 +2125,23 @@ def _apply_update_postgresql_sqlite(self) -> None:
21252125
)
21262126

21272127

2128+
class _SchemaVersion53Migrator(_SchemaVersionMigrator, target_version=53):
2129+
def _apply_update(self) -> None:
2130+
"""Version specific update method."""
2131+
# Try to change the character set of events, states and statistics_meta tables
2132+
if self.engine.dialect.name == SupportedDialect.MYSQL:
2133+
for table in (
2134+
"events",
2135+
"event_data",
2136+
"states",
2137+
"state_attributes",
2138+
"statistics",
2139+
"statistics_meta",
2140+
"statistics_short_term",
2141+
):
2142+
_correct_table_character_set_and_collation(table, self.session_maker)
2143+
2144+
21282145
def _migrate_statistics_columns_to_timestamp_removing_duplicates(
21292146
hass: HomeAssistant,
21302147
instance: Recorder,
@@ -2167,8 +2184,10 @@ def _correct_table_character_set_and_collation(
21672184
"""Correct issues detected by validate_db_schema."""
21682185
# Attempt to convert the table to utf8mb4
21692186
_LOGGER.warning(
2170-
"Updating character set and collation of table %s to utf8mb4. %s",
2187+
"Updating table %s to character set %s and collation %s. %s",
21712188
table,
2189+
MYSQL_DEFAULT_CHARSET,
2190+
MYSQL_COLLATE,
21722191
MIGRATION_NOTE_MINUTES,
21732192
)
21742193
with (

tests/components/recorder/auto_repairs/events/test_schema.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ async def test_validate_db_schema_fix_utf8_issue_event_data(
8282
in caplog.text
8383
)
8484
assert (
85-
"Updating character set and collation of table event_data to utf8mb4"
85+
"Updating table event_data to character set utf8mb4 and collation utf8mb4_bin"
8686
in caplog.text
8787
)
8888

@@ -103,17 +103,18 @@ async def test_validate_db_schema_fix_collation_issue(
103103
with (
104104
patch(
105105
"homeassistant.components.recorder.auto_repairs.schema._validate_table_schema_has_correct_collation",
106-
return_value={"events.utf8mb4_unicode_ci"},
106+
return_value={"events.utf8mb4_bin"},
107107
),
108108
):
109109
async with async_test_recorder(hass):
110110
await async_wait_recording_done(hass)
111111

112112
assert "Schema validation failed" not in caplog.text
113113
assert (
114-
"Database is about to correct DB schema errors: events.utf8mb4_unicode_ci"
114+
"Database is about to correct DB schema errors: events.utf8mb4_bin"
115115
in caplog.text
116116
)
117117
assert (
118-
"Updating character set and collation of table events to utf8mb4" in caplog.text
118+
"Updating table events to character set utf8mb4 and collation utf8mb4_bin"
119+
in caplog.text
119120
)

tests/components/recorder/auto_repairs/states/test_schema.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,8 @@ async def test_validate_db_schema_fix_utf8_issue_states(
8484
in caplog.text
8585
)
8686
assert (
87-
"Updating character set and collation of table states to utf8mb4" in caplog.text
87+
"Updating table states to character set utf8mb4 and collation utf8mb4_bin"
88+
in caplog.text
8889
)
8990

9091

@@ -116,7 +117,7 @@ async def test_validate_db_schema_fix_utf8_issue_state_attributes(
116117
in caplog.text
117118
)
118119
assert (
119-
"Updating character set and collation of table state_attributes to utf8mb4"
120+
"Updating table state_attributes to character set utf8mb4 and collation utf8mb4_bin"
120121
in caplog.text
121122
)
122123

@@ -137,17 +138,18 @@ async def test_validate_db_schema_fix_collation_issue(
137138
with (
138139
patch(
139140
"homeassistant.components.recorder.auto_repairs.schema._validate_table_schema_has_correct_collation",
140-
return_value={"states.utf8mb4_unicode_ci"},
141+
return_value={"states.utf8mb4_bin"},
141142
),
142143
):
143144
async with async_test_recorder(hass):
144145
await async_wait_recording_done(hass)
145146

146147
assert "Schema validation failed" not in caplog.text
147148
assert (
148-
"Database is about to correct DB schema errors: states.utf8mb4_unicode_ci"
149+
"Database is about to correct DB schema errors: states.utf8mb4_bin"
149150
in caplog.text
150151
)
151152
assert (
152-
"Updating character set and collation of table states to utf8mb4" in caplog.text
153+
"Updating table states to character set utf8mb4 and collation utf8mb4_bin"
154+
in caplog.text
153155
)

tests/components/recorder/auto_repairs/statistics/test_schema.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ async def test_validate_db_schema_fix_utf8_issue(
4646
in caplog.text
4747
)
4848
assert (
49-
"Updating character set and collation of table statistics_meta to utf8mb4"
49+
"Updating table statistics_meta to character set utf8mb4 and collation utf8mb4_bin"
5050
in caplog.text
5151
)
5252

@@ -113,18 +113,18 @@ async def test_validate_db_schema_fix_collation_issue(
113113
with (
114114
patch(
115115
"homeassistant.components.recorder.auto_repairs.schema._validate_table_schema_has_correct_collation",
116-
return_value={"statistics.utf8mb4_unicode_ci"},
116+
return_value={"statistics.utf8mb4_bin"},
117117
),
118118
):
119119
async with async_test_recorder(hass):
120120
await async_wait_recording_done(hass)
121121

122122
assert "Schema validation failed" not in caplog.text
123123
assert (
124-
"Database is about to correct DB schema errors: statistics.utf8mb4_unicode_ci"
124+
"Database is about to correct DB schema errors: statistics.utf8mb4_bin"
125125
in caplog.text
126126
)
127127
assert (
128-
"Updating character set and collation of table statistics to utf8mb4"
128+
"Updating table statistics to character set utf8mb4 and collation utf8mb4_bin"
129129
in caplog.text
130130
)

tests/components/recorder/auto_repairs/test_schema.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,16 @@ def _break_states_schema():
103103

104104
@pytest.mark.skip_on_db_engine(["postgresql", "sqlite"])
105105
@pytest.mark.usefixtures("skip_by_db_engine")
106+
@pytest.mark.parametrize(
107+
("charset", "collation"),
108+
[("utf8mb3", "utf8_general_ci"), ("utf8mb4", "utf8mb4_unicode_ci")],
109+
)
106110
async def test_validate_db_schema_fix_incorrect_collation(
107111
hass: HomeAssistant,
108112
recorder_mock: Recorder,
109113
caplog: pytest.LogCaptureFixture,
114+
charset: str,
115+
collation: str,
110116
) -> None:
111117
"""Test validating DB schema with MySQL when the collation is incorrect."""
112118
await async_wait_recording_done(hass)
@@ -116,7 +122,7 @@ def _break_states_schema():
116122
with session_scope(session=session_maker()) as session:
117123
session.execute(
118124
text(
119-
"ALTER TABLE states CHARACTER SET utf8mb3 COLLATE utf8_general_ci, "
125+
f"ALTER TABLE states CHARACTER SET {charset} COLLATE {collation}, "
120126
"LOCK=EXCLUSIVE;"
121127
)
122128
)
@@ -125,7 +131,7 @@ def _break_states_schema():
125131
schema_errors = await recorder_mock.async_add_executor_job(
126132
validate_table_schema_has_correct_collation, recorder_mock, States
127133
)
128-
assert schema_errors == {"states.utf8mb4_unicode_ci"}
134+
assert schema_errors == {"states.utf8mb4_bin"}
129135

130136
# Now repair the schema
131137
await recorder_mock.async_add_executor_job(

0 commit comments

Comments
 (0)