Skip to content

Commit ad070af

Browse files
elianddbclaude
andcommitted
Fix PR review comments - correct table charset specification and add multibyte test
1. Fix table charset specification to use table-level charset instead of column-level 2. Add test case for UTF-8 multibyte sequences to validate fix for b > asciiMax bug - Tests é (0xC3 0xA9), € (0xE2 0x82 0xAC), 🍕 (0xF0 0x9F 0x8D 0x95) - all valid UTF-8 with bytes > 127 - Ensures the formatInvalidByteForError fix correctly handles multibyte sequences 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent 7f46e8e commit ad070af

File tree

1 file changed

+34
-1
lines changed

1 file changed

+34
-1
lines changed

enginetest/queries/script_queries.go

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7167,7 +7167,7 @@ where
71677167
{
71687168
Name: "charset validation strict vs non-strict mode",
71697169
SetUpScript: []string{
7170-
"create table charset_test (c char(10) character set utf8mb4, v varchar(10) character set utf8mb4, txt text character set utf8mb4);",
7170+
"create table charset_test (c char(10), v varchar(10), txt text) character set utf8mb4;",
71717171
},
71727172
Assertions: []ScriptTestAssertion{
71737173
{
@@ -7228,6 +7228,39 @@ where
72287228
},
72297229
},
72307230
},
7231+
{
7232+
Name: "charset validation UTF-8 multibyte sequence validation",
7233+
SetUpScript: []string{
7234+
"create table utf8_validation_test (c char(10), v varchar(20), t text) character set utf8mb4;",
7235+
},
7236+
Assertions: []ScriptTestAssertion{
7237+
{
7238+
Query: "set sql_mode = 'STRICT_TRANS_TABLES';",
7239+
Expected: []sql.Row{{types.OkResult{RowsAffected: 0}}},
7240+
},
7241+
// Test that valid multibyte sequences > 127 are accepted (fixes the b > asciiMax bug)
7242+
{
7243+
Query: "insert into utf8_validation_test(c, v, t) values (UNHEX('C3A9'), UNHEX('E282AC'), UNHEX('F09F8D95'));", // é, €, 🍕 - valid UTF-8 sequences with bytes > 127
7244+
Expected: []sql.Row{{types.OkResult{RowsAffected: 1}}},
7245+
},
7246+
// Test that actual invalid UTF-8 sequences are still rejected
7247+
{
7248+
Query: "insert into utf8_validation_test(c) values (UNHEX('C0C1'));", // Invalid overlong sequence
7249+
ExpectedErrStr: "Incorrect string value: '\\xC0\\xC1' for column 'c' at row 1",
7250+
},
7251+
{
7252+
Query: "insert into utf8_validation_test(v) values (UNHEX('FE'));", // Invalid UTF-8 byte
7253+
ExpectedErrStr: "Incorrect string value: '\\xFE' for column 'v' at row 1",
7254+
},
7255+
// Verify the valid multibyte data was stored correctly
7256+
{
7257+
Query: "select HEX(c), HEX(v), HEX(t) from utf8_validation_test where c is not null and v is not null and t is not null;",
7258+
Expected: []sql.Row{
7259+
{"C3A9", "E282AC", "F09F8D95"}, // é, €, 🍕
7260+
},
7261+
},
7262+
},
7263+
},
72317264
{
72327265
Name: "charset validation issue #8893 - customer scenario",
72337266
SetUpScript: []string{

0 commit comments

Comments
 (0)