Skip to content

Commit dd8defd

Browse files
authored
Merge pull request #2758 from dolthub/daylon/latin1-issue
Fix for latin1 issue
2 parents 7b6a8c1 + 1f11398 commit dd8defd

File tree

2 files changed

+42
-6
lines changed

2 files changed

+42
-6
lines changed

enginetest/enginetests.go

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4305,23 +4305,42 @@ func TestPreparedInsert(t *testing.T, harness Harness) {
43054305
{
43064306
Name: "inserts should trigger string conversion errors",
43074307
SetUpScript: []string{
4308-
"create table test (v varchar(10))",
4308+
"CREATE TABLE test (v1 VARCHAR(10));",
4309+
"CREATE TABLE test2 (v1 VARCHAR(10) CHARACTER SET latin1);",
43094310
},
43104311
Assertions: []queries.ScriptTestAssertion{
43114312
{
4312-
Query: "insert into test values (?)",
4313+
Query: "INSERT INTO test VALUES (?);",
43134314
Bindings: map[string]sqlparser.Expr{
43144315
"v1": mustBuildBindVariable([]byte{0x99, 0x98, 0x97}),
43154316
},
43164317
ExpectedErrStr: "incorrect string value: '[153 152 151]'",
43174318
},
43184319
{
4319-
Query: "insert into test values (?)",
4320+
Query: "INSERT INTO test VALUES (?);",
43204321
Bindings: map[string]sqlparser.Expr{
43214322
"v1": mustBuildBindVariable(string([]byte{0x99, 0x98, 0x97})),
43224323
},
43234324
ExpectedErrStr: "incorrect string value: '[153 152 151]'",
43244325
},
4326+
{
4327+
Query: "INSERT INTO test2 VALUES (?);",
4328+
Bindings: map[string]sqlparser.Expr{
4329+
"v1": mustBuildBindVariable([]byte{0x99, 0x98, 0x97}),
4330+
},
4331+
Expected: []sql.Row{
4332+
{types.OkResult{RowsAffected: 1}},
4333+
},
4334+
},
4335+
{
4336+
Query: "INSERT INTO test2 VALUES (?);",
4337+
Bindings: map[string]sqlparser.Expr{
4338+
"v1": mustBuildBindVariable(string([]byte{0x99, 0x98, 0x97})),
4339+
},
4340+
Expected: []sql.Row{
4341+
{types.OkResult{RowsAffected: 1}},
4342+
},
4343+
},
43254344
},
43264345
},
43274346
}

sql/types/strings.go

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -415,9 +415,26 @@ func ConvertToString(v interface{}, t sql.StringType) (string, error) {
415415
}
416416
}
417417

418-
// TODO: add exceptions for certain collations?
419-
if !IsBinaryType(t) && !utf8.Valid([]byte(val)) {
420-
return "", ErrIncorrectStringValue.New([]byte(val))
418+
// TODO: Completely unsure how this should actually be handled.
419+
// We need to handle the conversion to the correct character set, but we only need to do it once. At this point, we
420+
// don't know if we've done a conversion from an introducer (https://dev.mysql.com/doc/refman/8.4/en/charset-introducer.html).
421+
// Additionally, it's unknown if there are valid UTF8 strings that aren't valid for a conversion.
422+
// It seems like MySQL handles some of this using repertoires, but it seems like a massive refactoring to really get
423+
// it implemented (https://dev.mysql.com/doc/refman/8.4/en/charset-repertoire.html).
424+
// On top of that, we internally only work with UTF8MB4 strings, so we'll make a hard assumption that all UTF8
425+
// strings are valid for all character sets, and that all invalid UTF8 strings have not yet been converted.
426+
// This isn't correct, but it's a better approximation than the old logic.
427+
bytesVal := encodings.StringToBytes(val)
428+
if !IsBinaryType(t) && !utf8.Valid(bytesVal) {
429+
charset := t.CharacterSet()
430+
if charset == sql.CharacterSet_utf8mb4 {
431+
return "", ErrIncorrectStringValue.New(bytesVal)
432+
} else {
433+
var ok bool
434+
if bytesVal, ok = t.CharacterSet().Encoder().Decode(bytesVal); !ok {
435+
return "", ErrIncorrectStringValue.New(bytesVal)
436+
}
437+
}
421438
}
422439

423440
return val, nil

0 commit comments

Comments
 (0)