@@ -7140,15 +7140,15 @@ where
7140
7140
Assertions : []ScriptTestAssertion {
7141
7141
{
7142
7142
Query : "insert into t(c) values (X'9876543210');" ,
7143
- ExpectedErrStr : "invalid string for charset utf8mb4 : '[152 118 84 50 16]' " ,
7143
+ ExpectedErrStr : "Incorrect string value : '\\ x98vT2 \\ x10' for column 'c' at row 1 " ,
7144
7144
},
7145
7145
{
7146
7146
Query : "insert into t(v) values (X'9876543210');" ,
7147
- ExpectedErrStr : "invalid string for charset utf8mb4 : '[152 118 84 50 16]' " ,
7147
+ ExpectedErrStr : "Incorrect string value : '\\ x98vT2 \\ x10' for column 'v' at row 1 " ,
7148
7148
},
7149
7149
{
7150
7150
Query : "insert into t(txt) values (X'9876543210');" ,
7151
- ExpectedErrStr : "invalid string for charset utf8mb4 : '[152 118 84 50 16]' " ,
7151
+ ExpectedErrStr : "Incorrect string value : '\\ x98vT2 \\ x10' for column 'txt' at row 1 " ,
7152
7152
},
7153
7153
{
7154
7154
Query : "insert into t(b) values (X'9876543210');" ,
@@ -7164,6 +7164,358 @@ where
7164
7164
},
7165
7165
},
7166
7166
},
7167
+ {
7168
+ Name : "charset validation strict vs non-strict mode" ,
7169
+ Dialect : "mysql" ,
7170
+ SetUpScript : []string {
7171
+ "create table charset_test (c char(10), v varchar(10), txt text) character set utf8mb4;" ,
7172
+ },
7173
+ Assertions : []ScriptTestAssertion {
7174
+ {
7175
+ Query : "set sql_mode = 'STRICT_TRANS_TABLES';" ,
7176
+ Expected : []sql.Row {{types.OkResult {RowsAffected : 0 }}},
7177
+ },
7178
+ {
7179
+ Query : "insert into charset_test(c) values (UNHEX('446F6C744C6162AE'));" ,
7180
+ ExpectedErrStr : "Incorrect string value: '\\ xAE' for column 'c' at row 1" ,
7181
+ },
7182
+ {
7183
+ Query : "insert into charset_test(v) values (UNHEX('446F6C744C6162AE'));" ,
7184
+ ExpectedErrStr : "Incorrect string value: '\\ xAE' for column 'v' at row 1" ,
7185
+ },
7186
+ {
7187
+ Query : "insert into charset_test(txt) values (UNHEX('446F6C744C6162AE'));" ,
7188
+ ExpectedErrStr : "Incorrect string value: '\\ xAE' for column 'txt' at row 1" ,
7189
+ },
7190
+ {
7191
+ Query : "set sql_mode = '';" ,
7192
+ Expected : []sql.Row {{types.OkResult {RowsAffected : 0 }}},
7193
+ },
7194
+ {
7195
+ Query : "insert into charset_test(c) values (UNHEX('446F6C744C6162AE'));" ,
7196
+ Expected : []sql.Row {
7197
+ {types.OkResult {RowsAffected : 1 }},
7198
+ },
7199
+ },
7200
+ {
7201
+ Query : "insert into charset_test(v) values (UNHEX('446F6C744C6162AE'));" ,
7202
+ Expected : []sql.Row {
7203
+ {types.OkResult {RowsAffected : 1 }},
7204
+ },
7205
+ },
7206
+ {
7207
+ Query : "insert into charset_test(txt) values (UNHEX('446F6C744C6162AE'));" ,
7208
+ Expected : []sql.Row {
7209
+ {types.OkResult {RowsAffected : 1 }},
7210
+ },
7211
+ },
7212
+ {
7213
+ Query : "select HEX(c), LENGTH(c) from charset_test where c is not null;" ,
7214
+ Expected : []sql.Row {
7215
+ {"446F6C744C6162" , 7 },
7216
+ },
7217
+ },
7218
+ {
7219
+ Query : "select HEX(v), LENGTH(v) from charset_test where v is not null;" ,
7220
+ Expected : []sql.Row {
7221
+ {"446F6C744C6162" , 7 },
7222
+ },
7223
+ },
7224
+ {
7225
+ Query : "select HEX(txt), LENGTH(txt) from charset_test where txt is not null;" ,
7226
+ Expected : []sql.Row {
7227
+ {"446F6C744C6162" , 7 },
7228
+ },
7229
+ },
7230
+ },
7231
+ },
7232
+ {
7233
+ Name : "charset validation issue #8893 - customer scenario" ,
7234
+ Dialect : "mysql" ,
7235
+ SetUpScript : []string {
7236
+ "create table products (id int primary key, name text character set utf8mb4);" ,
7237
+ },
7238
+ Assertions : []ScriptTestAssertion {
7239
+ // Test charset validation with invalid UTF-8 data
7240
+ {
7241
+ Query : "insert into products values (1, UNHEX('446F6C744C6162AE'));" , // "DoltLab" + invalid byte 0xAE
7242
+ ExpectedErrStr : "Incorrect string value: '\\ xAE' for column 'name' at row 1" ,
7243
+ },
7244
+ // Test non-strict mode truncation behavior
7245
+ {
7246
+ Query : "set sql_mode = '';" ,
7247
+ Expected : []sql.Row {{types.OkResult {RowsAffected : 0 }}},
7248
+ },
7249
+ {
7250
+ Query : "insert into products values (1, UNHEX('446F6C744C6162AE'));" , // Now succeeds with truncation
7251
+ Expected : []sql.Row {{types.OkResult {RowsAffected : 1 }}},
7252
+ },
7253
+ // Verify data was truncated at invalid byte (MySQL behavior)
7254
+ {
7255
+ Query : "select id, name, HEX(name) from products;" ,
7256
+ Expected : []sql.Row {
7257
+ {1 , "DoltLab" , "446F6C744C6162" }, // Invalid byte 0xAE was truncated
7258
+ },
7259
+ },
7260
+ // Customer can now query and work with the data
7261
+ {
7262
+ Query : "select id, name from products where name like '%Lab%';" ,
7263
+ Expected : []sql.Row {
7264
+ {1 , "DoltLab" },
7265
+ },
7266
+ },
7267
+ },
7268
+ },
7269
+ {
7270
+ Name : "charset validation edge cases - formatInvalidByteForError testing" ,
7271
+ Dialect : "mysql" ,
7272
+ SetUpScript : []string {
7273
+ "create table charset_edge_test (c char(10), v varchar(20), t text);" ,
7274
+ },
7275
+ Assertions : []ScriptTestAssertion {
7276
+ // STRICT MODE TESTS
7277
+ {
7278
+ Query : "set sql_mode = 'STRICT_TRANS_TABLES';" ,
7279
+ Expected : []sql.Row {{types.OkResult {RowsAffected : 0 }}},
7280
+ },
7281
+ // Single invalid byte (0xAE)
7282
+ {
7283
+ Query : "insert into charset_edge_test(c) values (UNHEX('AE'));" ,
7284
+ ExpectedErrStr : "Incorrect string value: '\\ xAE' for column 'c' at row 1" ,
7285
+ },
7286
+ {
7287
+ Query : "insert into charset_edge_test(v) values (UNHEX('AE'));" ,
7288
+ ExpectedErrStr : "Incorrect string value: '\\ xAE' for column 'v' at row 1" ,
7289
+ },
7290
+ {
7291
+ Query : "insert into charset_edge_test(t) values (UNHEX('AE'));" ,
7292
+ ExpectedErrStr : "Incorrect string value: '\\ xAE' for column 't' at row 1" ,
7293
+ },
7294
+ // Multiple invalid bytes
7295
+ {
7296
+ Query : "insert into charset_edge_test(c) values (UNHEX('AEAEAE'));" ,
7297
+ ExpectedErrStr : "Incorrect string value: '\\ xAE\\ xAE\\ xAE' for column 'c' at row 1" ,
7298
+ },
7299
+ // Overlong sequences
7300
+ {
7301
+ Query : "insert into charset_edge_test(c) values (UNHEX('C0C1'));" ,
7302
+ ExpectedErrStr : "Incorrect string value: '\\ xC0\\ xC1' for column 'c' at row 1" ,
7303
+ },
7304
+ // Invalid bytes 0xFE, 0xFF
7305
+ {
7306
+ Query : "insert into charset_edge_test(c) values (UNHEX('FE'));" ,
7307
+ ExpectedErrStr : "Incorrect string value: '\\ xFE' for column 'c' at row 1" ,
7308
+ },
7309
+ {
7310
+ Query : "insert into charset_edge_test(c) values (UNHEX('FF'));" ,
7311
+ ExpectedErrStr : "Incorrect string value: '\\ xFF' for column 'c' at row 1" ,
7312
+ },
7313
+ // Surrogate pairs
7314
+ {
7315
+ Query : "insert into charset_edge_test(c) values (UNHEX('EDA080'));" ,
7316
+ ExpectedErrStr : "Incorrect string value: '\\ xED\\ xA0\\ x80' for column 'c' at row 1" ,
7317
+ },
7318
+ {
7319
+ Query : "insert into charset_edge_test(c) values (UNHEX('EDBFBF'));" ,
7320
+ ExpectedErrStr : "Incorrect string value: '\\ xED\\ xBF\\ xBF' for column 'c' at row 1" ,
7321
+ },
7322
+ // More overlong sequences
7323
+ {
7324
+ Query : "insert into charset_edge_test(c) values (UNHEX('C080'));" ,
7325
+ ExpectedErrStr : "Incorrect string value: '\\ xC0\\ x80' for column 'c' at row 1" ,
7326
+ },
7327
+ {
7328
+ Query : "insert into charset_edge_test(c) values (UNHEX('E08080'));" ,
7329
+ ExpectedErrStr : "Incorrect string value: '\\ xE0\\ x80\\ x80' for column 'c' at row 1" ,
7330
+ },
7331
+ {
7332
+ Query : "insert into charset_edge_test(c) values (UNHEX('F0808080'));" ,
7333
+ ExpectedErrStr : "Incorrect string value: '\\ xF0\\ x80\\ x80\\ x80' for column 'c' at row 1" ,
7334
+ },
7335
+ // Out of range (beyond U+10FFFF)
7336
+ {
7337
+ Query : "insert into charset_edge_test(c) values (UNHEX('F4908080'));" ,
7338
+ ExpectedErrStr : "Incorrect string value: '\\ xF4\\ x90\\ x80\\ x80' for column 'c' at row 1" ,
7339
+ },
7340
+ // Continuation bytes without start byte
7341
+ {
7342
+ Query : "insert into charset_edge_test(c) values (UNHEX('80'));" ,
7343
+ ExpectedErrStr : "Incorrect string value: '\\ x80' for column 'c' at row 1" ,
7344
+ },
7345
+ {
7346
+ Query : "insert into charset_edge_test(c) values (UNHEX('BF'));" ,
7347
+ ExpectedErrStr : "Incorrect string value: '\\ xBF' for column 'c' at row 1" ,
7348
+ },
7349
+ // Incomplete sequences
7350
+ {
7351
+ Query : "insert into charset_edge_test(c) values (UNHEX('C2'));" ,
7352
+ ExpectedErrStr : "Incorrect string value: '\\ xC2' for column 'c' at row 1" ,
7353
+ },
7354
+ {
7355
+ Query : "insert into charset_edge_test(c) values (UNHEX('E0A0'));" ,
7356
+ ExpectedErrStr : "Incorrect string value: '\\ xE0\\ xA0' for column 'c' at row 1" ,
7357
+ },
7358
+ {
7359
+ Query : "insert into charset_edge_test(c) values (UNHEX('F09080'));" ,
7360
+ ExpectedErrStr : "Incorrect string value: '\\ xF0\\ x90\\ x80' for column 'c' at row 1" ,
7361
+ },
7362
+ // Long sequence (tests truncation with ...)
7363
+ {
7364
+ Query : "insert into charset_edge_test(c) values (UNHEX('999897969594939291'));" ,
7365
+ ExpectedErrStr : "Incorrect string value: '\\ x99\\ x98\\ x97\\ x96\\ x95\\ x94...' for column 'c' at row 1" ,
7366
+ },
7367
+ // Valid UTF-8 with invalid bytes
7368
+ {
7369
+ Query : "insert into charset_edge_test(c) values (UNHEX('446F6C744C6162AE'));" ,
7370
+ ExpectedErrStr : "Incorrect string value: '\\ xAE' for column 'c' at row 1" ,
7371
+ },
7372
+
7373
+ // NON-STRICT MODE TESTS (should truncate)
7374
+ {
7375
+ Query : "set sql_mode = '';" ,
7376
+ Expected : []sql.Row {{types.OkResult {RowsAffected : 0 }}},
7377
+ },
7378
+ {
7379
+ Query : "insert into charset_edge_test(c) values (UNHEX('446F6C744C6162AE'));" ,
7380
+ Expected : []sql.Row {{types.OkResult {RowsAffected : 1 }}},
7381
+ },
7382
+ {
7383
+ Query : "insert into charset_edge_test(v) values (UNHEX('48656C6C6FC0'));" ,
7384
+ Expected : []sql.Row {{types.OkResult {RowsAffected : 1 }}},
7385
+ },
7386
+ {
7387
+ Query : "insert into charset_edge_test(t) values (UNHEX('54657374FF'));" ,
7388
+ Expected : []sql.Row {{types.OkResult {RowsAffected : 1 }}},
7389
+ },
7390
+ // Verify truncated data
7391
+ {
7392
+ Query : "select HEX(c), LENGTH(c) from charset_edge_test where c is not null;" ,
7393
+ Expected : []sql.Row {
7394
+ {"446F6C744C6162" , 7 },
7395
+ },
7396
+ },
7397
+ {
7398
+ Query : "select HEX(v), LENGTH(v) from charset_edge_test where v is not null;" ,
7399
+ Expected : []sql.Row {
7400
+ {"48656C6C6F" , 5 },
7401
+ },
7402
+ },
7403
+ {
7404
+ Query : "select HEX(t), LENGTH(t) from charset_edge_test where t is not null;" ,
7405
+ Expected : []sql.Row {
7406
+ {"54657374" , 4 },
7407
+ },
7408
+ },
7409
+ },
7410
+ },
7411
+ {
7412
+ Name : "charset validation ASCII range tests" ,
7413
+ Dialect : "mysql" ,
7414
+ SetUpScript : []string {
7415
+ "create table ascii_test (c char(10), v varchar(20), t text);" ,
7416
+ },
7417
+ Assertions : []ScriptTestAssertion {
7418
+ {
7419
+ Query : "set sql_mode = 'STRICT_TRANS_TABLES';" ,
7420
+ Expected : []sql.Row {{types.OkResult {RowsAffected : 0 }}},
7421
+ },
7422
+ // ASCII range 0x00-0x7F
7423
+ {
7424
+ Query : "insert into ascii_test(c) values (UNHEX('00'));" ,
7425
+ Expected : []sql.Row {{types.OkResult {RowsAffected : 1 }}},
7426
+ },
7427
+ {
7428
+ Query : "insert into ascii_test(c) values (UNHEX('20'));" ,
7429
+ Expected : []sql.Row {{types.OkResult {RowsAffected : 1 }}},
7430
+ },
7431
+ {
7432
+ Query : "insert into ascii_test(c) values (UNHEX('41'));" ,
7433
+ Expected : []sql.Row {{types.OkResult {RowsAffected : 1 }}},
7434
+ },
7435
+ {
7436
+ Query : "insert into ascii_test(c) values (UNHEX('7F'));" ,
7437
+ Expected : []sql.Row {{types.OkResult {RowsAffected : 1 }}},
7438
+ },
7439
+ {
7440
+ Query : "insert into ascii_test(v) values (UNHEX('48656C6C6F'));" , // "Hello"
7441
+ Expected : []sql.Row {{types.OkResult {RowsAffected : 1 }}},
7442
+ },
7443
+ {
7444
+ Query : "insert into ascii_test(t) values (UNHEX('00207F41'));" ,
7445
+ Expected : []sql.Row {{types.OkResult {RowsAffected : 1 }}},
7446
+ },
7447
+ // Verify ASCII data
7448
+ {
7449
+ Query : "select HEX(c), LENGTH(c) from ascii_test where c is not null order by c;" ,
7450
+ Expected : []sql.Row {
7451
+ {"00" , 1 },
7452
+ {"20" , 1 },
7453
+ {"41" , 1 },
7454
+ {"7F" , 1 },
7455
+ },
7456
+ },
7457
+ {
7458
+ Query : "select HEX(v), LENGTH(v) from ascii_test where v is not null;" ,
7459
+ Expected : []sql.Row {
7460
+ {"48656C6C6F" , 5 }, // "Hello"
7461
+ },
7462
+ },
7463
+ {
7464
+ Query : "select HEX(t), LENGTH(t) from ascii_test where t is not null;" ,
7465
+ Expected : []sql.Row {
7466
+ {"00207F41" , 4 }, // NULL + SPACE + DEL + A
7467
+ },
7468
+ },
7469
+ // Boundary cases
7470
+ {
7471
+ Query : "insert into ascii_test(c) values (UNHEX('7E'));" , // 0x7E is valid ASCII
7472
+ Expected : []sql.Row {{types.OkResult {RowsAffected : 1 }}},
7473
+ },
7474
+ {
7475
+ Query : "insert into ascii_test(c) values (UNHEX('81'));" , // 0x81 is invalid
7476
+ ExpectedErrStr : "Incorrect string value: '\\ x81' for column 'c' at row 1" ,
7477
+ },
7478
+ // Mixed ASCII and invalid (non-strict mode)
7479
+ {
7480
+ Query : "set sql_mode = '';" , // Non-strict mode
7481
+ Expected : []sql.Row {{types.OkResult {RowsAffected : 0 }}},
7482
+ },
7483
+ {
7484
+ Query : "insert into ascii_test(c) values (UNHEX('41424380'));" , // ABC + 0x80 (invalid)
7485
+ Expected : []sql.Row {{types.OkResult {RowsAffected : 1 }}},
7486
+ },
7487
+ // Verify truncation
7488
+ {
7489
+ Query : "select HEX(c), LENGTH(c) from ascii_test where HEX(c) = '414243';" ,
7490
+ Expected : []sql.Row {
7491
+ {"414243" , 3 }, // "ABC" - truncated at invalid byte
7492
+ },
7493
+ },
7494
+ // Valid UTF-8 sequences
7495
+ {
7496
+ Query : "set sql_mode = 'STRICT_TRANS_TABLES';" , // Back to strict mode
7497
+ Expected : []sql.Row {{types.OkResult {RowsAffected : 0 }}},
7498
+ },
7499
+ {
7500
+ Query : "insert into ascii_test(c) values (UNHEX('C3A9'));" , // é (2-byte UTF-8)
7501
+ Expected : []sql.Row {{types.OkResult {RowsAffected : 1 }}},
7502
+ },
7503
+ {
7504
+ Query : "insert into ascii_test(c) values (UNHEX('E282AC'));" , // € (3-byte UTF-8)
7505
+ Expected : []sql.Row {{types.OkResult {RowsAffected : 1 }}},
7506
+ },
7507
+ {
7508
+ Query : "insert into ascii_test(c) values (UNHEX('F09D849E'));" , // 𝄞 (4-byte UTF-8)
7509
+ Expected : []sql.Row {{types.OkResult {RowsAffected : 1 }}},
7510
+ },
7511
+ // Function boundary constants (asciiMin=32, asciiMax=127)
7512
+ {
7513
+ Query : "insert into ascii_test(c) values (UNHEX('1F'));" , // ASCII 31 (below asciiMin=32) - valid ASCII but non-printable
7514
+ Expected : []sql.Row {{types.OkResult {RowsAffected : 1 }}},
7515
+ },
7516
+ // Note: UNHEX('80') test is covered in edge cases test above
7517
+ },
7518
+ },
7167
7519
{
7168
7520
Name : "unix_timestamp script tests" ,
7169
7521
Dialect : "mysql" ,
0 commit comments