Skip to content

Commit 1a473a4

Browse files
authored
Merge pull request #1158 from wangzihuacool/fix-charset
Fix: Convert column value in binlog events to bytes instead of utf8 encoded unicode
2 parents ca8aef5 + 9c2c7ba commit 1a473a4

File tree

2 files changed

+21
-4
lines changed

2 files changed

+21
-4
lines changed

go/logic/inspect.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,9 @@ func (this *Inspector) inspectOriginalAndGhostTables() (err error) {
192192
this.migrationContext.MappedSharedColumns.SetEnumToTextConversion(column.Name)
193193
this.migrationContext.MappedSharedColumns.SetEnumValues(column.Name, column.EnumValues)
194194
}
195+
if column.Name == mappedColumn.Name && column.Charset != mappedColumn.Charset {
196+
this.migrationContext.SharedColumns.SetCharsetConversion(column.Name, column.Charset, mappedColumn.Charset)
197+
}
195198
}
196199

197200
for _, column := range this.migrationContext.UniqueKey.Columns.Columns() {

go/sql/types.go

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ type TimezoneConversion struct {
3232
ToTimezone string
3333
}
3434

35+
type CharacterSetConversion struct {
36+
ToCharset string
37+
FromCharset string
38+
}
39+
3540
type Column struct {
3641
Name string
3742
IsUnsigned bool
@@ -43,17 +48,22 @@ type Column struct {
4348
// add Octet length for binary type, fix bytes with suffix "00" get clipped in mysql binlog.
4449
// https://github.com/github/gh-ost/issues/909
4550
BinaryOctetLength uint
51+
charsetConversion *CharacterSetConversion
4652
}
4753

4854
func (this *Column) convertArg(arg interface{}, isUniqueKeyColumn bool) interface{} {
4955
if s, ok := arg.(string); ok {
50-
// string, charset conversion
51-
if encoding, ok := charsetEncodingMap[this.Charset]; ok {
52-
arg, _ = encoding.NewDecoder().String(s)
56+
arg2Bytes := []byte(s)
57+
// convert to bytes if character string without charsetConversion.
58+
if this.Charset != "" && this.charsetConversion == nil {
59+
arg = arg2Bytes
60+
} else {
61+
if encoding, ok := charsetEncodingMap[this.Charset]; ok {
62+
arg, _ = encoding.NewDecoder().String(s)
63+
}
5364
}
5465

5566
if this.Type == BinaryColumnType && isUniqueKeyColumn {
56-
arg2Bytes := []byte(arg.(string))
5767
size := len(arg2Bytes)
5868
if uint(size) < this.BinaryOctetLength {
5969
buf := bytes.NewBuffer(arg2Bytes)
@@ -238,6 +248,10 @@ func (this *ColumnList) Len() int {
238248
return len(this.columns)
239249
}
240250

251+
func (this *ColumnList) SetCharsetConversion(columnName string, fromCharset string, toCharset string) {
252+
this.GetColumn(columnName).charsetConversion = &CharacterSetConversion{FromCharset: fromCharset, ToCharset: toCharset}
253+
}
254+
241255
// UniqueKey is the combination of a key's name and columns
242256
type UniqueKey struct {
243257
Name string

0 commit comments

Comments
 (0)