Skip to content

Commit 792da42

Browse files
authored
sink(cdc): avoid temporary memory allocations for avro (#11637)
close #11590
1 parent dd2d54a commit 792da42

File tree

2 files changed

+69
-76
lines changed

2 files changed

+69
-76
lines changed

cdc/model/sink.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -556,8 +556,8 @@ func (r *RowChangedEvent) GetHandleKeyColumnValues() []string {
556556
}
557557

558558
// HandleKeyColInfos returns the column(s) and colInfo(s) corresponding to the handle key(s)
559-
func (r *RowChangedEvent) HandleKeyColInfos() ([]*Column, []rowcodec.ColInfo) {
560-
pkeyCols := make([]*Column, 0)
559+
func (r *RowChangedEvent) HandleKeyColInfos() ([]*ColumnData, []rowcodec.ColInfo) {
560+
pkeyCols := make([]*ColumnData, 0)
561561
pkeyColInfos := make([]rowcodec.ColInfo, 0)
562562

563563
var cols []*ColumnData
@@ -571,7 +571,7 @@ func (r *RowChangedEvent) HandleKeyColInfos() ([]*Column, []rowcodec.ColInfo) {
571571
colInfos := tableInfo.GetColInfosForRowChangedEvent()
572572
for i, col := range cols {
573573
if col != nil && tableInfo.ForceGetColumnFlagType(col.ColumnID).IsHandleKey() {
574-
pkeyCols = append(pkeyCols, columnData2Column(col, tableInfo))
574+
pkeyCols = append(pkeyCols, col)
575575
pkeyColInfos = append(pkeyColInfos, colInfos[i])
576576
}
577577
}

pkg/sink/codec/avro/avro.go

Lines changed: 66 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ type BatchEncoder struct {
4949
}
5050

5151
type avroEncodeInput struct {
52-
columns []*model.Column
52+
*model.TableInfo
53+
columns []*model.ColumnData
5354
colInfos []rowcodec.ColInfo
5455
}
5556

@@ -82,11 +83,12 @@ func (a *BatchEncoder) encodeKey(ctx context.Context, topic string, e *model.Row
8283
return nil, nil
8384
}
8485

85-
keyColumns := &avroEncodeInput{
86-
columns: cols,
87-
colInfos: colInfos,
86+
keyColumns := avroEncodeInput{
87+
TableInfo: e.TableInfo,
88+
columns: cols,
89+
colInfos: colInfos,
8890
}
89-
avroCodec, header, err := a.getKeySchemaCodec(ctx, topic, &e.TableInfo.TableName, e.TableInfo.Version, keyColumns)
91+
avroCodec, header, err := a.getKeySchemaCodec(ctx, topic, e.TableInfo.TableName, e.TableInfo.Version, keyColumns)
9092
if err != nil {
9193
return nil, errors.Trace(err)
9294
}
@@ -119,7 +121,7 @@ func topicName2SchemaSubjects(topicName, subjectSuffix string) string {
119121
}
120122

121123
func (a *BatchEncoder) getValueSchemaCodec(
122-
ctx context.Context, topic string, tableName *model.TableName, tableVersion uint64, input *avroEncodeInput,
124+
ctx context.Context, topic string, tableName model.TableName, tableVersion uint64, input avroEncodeInput,
123125
) (*goavro.Codec, []byte, error) {
124126
schemaGen := func() (string, error) {
125127
schema, err := a.value2AvroSchema(tableName, input)
@@ -139,7 +141,7 @@ func (a *BatchEncoder) getValueSchemaCodec(
139141
}
140142

141143
func (a *BatchEncoder) getKeySchemaCodec(
142-
ctx context.Context, topic string, tableName *model.TableName, tableVersion uint64, keyColumns *avroEncodeInput,
144+
ctx context.Context, topic string, tableName model.TableName, tableVersion uint64, keyColumns avroEncodeInput,
143145
) (*goavro.Codec, []byte, error) {
144146
schemaGen := func() (string, error) {
145147
schema, err := a.key2AvroSchema(tableName, keyColumns)
@@ -163,15 +165,16 @@ func (a *BatchEncoder) encodeValue(ctx context.Context, topic string, e *model.R
163165
return nil, nil
164166
}
165167

166-
input := &avroEncodeInput{
167-
columns: e.GetColumns(),
168-
colInfos: e.TableInfo.GetColInfosForRowChangedEvent(),
168+
input := avroEncodeInput{
169+
TableInfo: e.TableInfo,
170+
columns: e.Columns,
171+
colInfos: e.TableInfo.GetColInfosForRowChangedEvent(),
169172
}
170173
if len(input.columns) == 0 {
171174
return nil, nil
172175
}
173176

174-
avroCodec, header, err := a.getValueSchemaCodec(ctx, topic, &e.TableInfo.TableName, e.TableInfo.Version, input)
177+
avroCodec, header, err := a.getValueSchemaCodec(ctx, topic, e.TableInfo.TableName, e.TableInfo.Version, input)
175178
if err != nil {
176179
return nil, errors.Trace(err)
177180
}
@@ -386,12 +389,12 @@ var type2TiDBType = map[byte]string{
386389
mysql.TypeTiDBVectorFloat32: "TiDBVECTORFloat32",
387390
}
388391

389-
func getTiDBTypeFromColumn(col *model.Column) string {
390-
tt := type2TiDBType[col.Type]
391-
if col.Flag.IsUnsigned() && (tt == "INT" || tt == "BIGINT") {
392+
func getTiDBTypeFromColumn(col model.ColumnDataX) string {
393+
tt := type2TiDBType[col.GetType()]
394+
if col.GetFlag().IsUnsigned() && (tt == "INT" || tt == "BIGINT") {
392395
return tt + " UNSIGNED"
393396
}
394-
if col.Flag.IsBinary() && tt == "TEXT" {
397+
if col.GetFlag().IsBinary() && tt == "TEXT" {
395398
return "BLOB"
396399
}
397400
return tt
@@ -519,45 +522,44 @@ func (a *BatchEncoder) schemaWithExtension(
519522
return top
520523
}
521524

522-
func (a *BatchEncoder) columns2AvroSchema(
523-
tableName *model.TableName,
524-
input *avroEncodeInput,
525-
) (*avroSchemaTop, error) {
525+
func (a *BatchEncoder) columns2AvroSchema(tableName model.TableName, input avroEncodeInput) (*avroSchemaTop, error) {
526526
top := &avroSchemaTop{
527527
Tp: "record",
528528
Name: common.SanitizeName(tableName.Table),
529529
Namespace: getAvroNamespace(a.namespace, tableName.Schema),
530530
Fields: nil,
531531
}
532-
for i, col := range input.columns {
533-
if col == nil {
532+
for _, col := range input.columns {
533+
colx := model.GetColumnDataX(col, input.TableInfo)
534+
if colx.ColumnData == nil {
534535
continue
535536
}
536-
avroType, err := a.columnToAvroSchema(col, input.colInfos[i].Ft)
537+
538+
avroType, err := a.columnToAvroSchema(colx)
537539
if err != nil {
538540
return nil, err
539541
}
540542
field := make(map[string]interface{})
541-
field["name"] = common.SanitizeName(col.Name)
543+
field["name"] = common.SanitizeName(colx.GetName())
542544

543-
copied := *col
544-
copied.Value = copied.Default
545-
defaultValue, _, err := a.columnToAvroData(&copied, input.colInfos[i].Ft)
545+
copied := colx
546+
copied.ColumnData = &model.ColumnData{ColumnID: colx.ColumnID, Value: colx.GetDefaultValue()}
547+
defaultValue, _, err := a.columnToAvroData(copied)
546548
if err != nil {
547549
log.Error("fail to get default value for avro schema")
548550
return nil, errors.Trace(err)
549551
}
550552
// goavro doesn't support set default value for logical type
551553
// https://github.com/linkedin/goavro/issues/202
552554
if _, ok := avroType.(avroLogicalTypeSchema); ok {
553-
if col.Flag.IsNullable() {
555+
if colx.GetFlag().IsNullable() {
554556
field["type"] = []interface{}{"null", avroType}
555557
field["default"] = nil
556558
} else {
557559
field["type"] = avroType
558560
}
559561
} else {
560-
if col.Flag.IsNullable() {
562+
if colx.GetFlag().IsNullable() {
561563
// https://stackoverflow.com/questions/22938124/avro-field-default-values
562564
if defaultValue == nil {
563565
field["type"] = []interface{}{"null", avroType}
@@ -577,12 +579,9 @@ func (a *BatchEncoder) columns2AvroSchema(
577579
return top, nil
578580
}
579581

580-
func (a *BatchEncoder) value2AvroSchema(
581-
tableName *model.TableName,
582-
input *avroEncodeInput,
583-
) (string, error) {
582+
func (a *BatchEncoder) value2AvroSchema(tableName model.TableName, input avroEncodeInput) (string, error) {
584583
if a.config.EnableRowChecksum {
585-
sort.Sort(input)
584+
sort.Sort(&input)
586585
}
587586

588587
top, err := a.columns2AvroSchema(tableName, input)
@@ -605,10 +604,7 @@ func (a *BatchEncoder) value2AvroSchema(
605604
return string(str), nil
606605
}
607606

608-
func (a *BatchEncoder) key2AvroSchema(
609-
tableName *model.TableName,
610-
keyColumns *avroEncodeInput,
611-
) (string, error) {
607+
func (a *BatchEncoder) key2AvroSchema(tableName model.TableName, keyColumns avroEncodeInput) (string, error) {
612608
top, err := a.columns2AvroSchema(tableName, keyColumns)
613609
if err != nil {
614610
return "", err
@@ -622,45 +618,43 @@ func (a *BatchEncoder) key2AvroSchema(
622618
return string(str), nil
623619
}
624620

625-
func (a *BatchEncoder) columns2AvroData(
626-
input *avroEncodeInput,
627-
) (map[string]interface{}, error) {
621+
func (a *BatchEncoder) columns2AvroData(input avroEncodeInput) (map[string]interface{}, error) {
628622
ret := make(map[string]interface{}, len(input.columns))
629-
for i, col := range input.columns {
630-
if col == nil {
623+
for _, col := range input.columns {
624+
colx := model.GetColumnDataX(col, input.TableInfo)
625+
if colx.ColumnData == nil {
631626
continue
632627
}
633-
data, str, err := a.columnToAvroData(col, input.colInfos[i].Ft)
628+
629+
data, str, err := a.columnToAvroData(colx)
634630
if err != nil {
635631
return nil, err
636632
}
637633

638634
// https: //pkg.go.dev/github.com/linkedin/goavro/v2#Union
639-
if col.Flag.IsNullable() {
640-
ret[common.SanitizeName(col.Name)] = goavro.Union(str, data)
635+
if colx.GetFlag().IsNullable() {
636+
ret[common.SanitizeName(colx.GetName())] = goavro.Union(str, data)
641637
} else {
642-
ret[common.SanitizeName(col.Name)] = data
638+
ret[common.SanitizeName(colx.GetName())] = data
643639
}
644640
}
645641

646642
log.Debug("rowToAvroData", zap.Any("data", ret))
647643
return ret, nil
648644
}
649645

650-
func (a *BatchEncoder) columnToAvroSchema(
651-
col *model.Column,
652-
ft *types.FieldType,
653-
) (interface{}, error) {
646+
func (a *BatchEncoder) columnToAvroSchema(col model.ColumnDataX) (interface{}, error) {
654647
tt := getTiDBTypeFromColumn(col)
655-
switch col.Type {
648+
649+
switch col.GetType() {
656650
case mysql.TypeTiny, mysql.TypeShort, mysql.TypeInt24:
657651
// BOOL/TINYINT/SMALLINT/MEDIUMINT
658652
return avroSchema{
659653
Type: "int",
660654
Parameters: map[string]string{tidbType: tt},
661655
}, nil
662656
case mysql.TypeLong: // INT
663-
if col.Flag.IsUnsigned() {
657+
if col.GetFlag().IsUnsigned() {
664658
return avroSchema{
665659
Type: "long",
666660
Parameters: map[string]string{tidbType: tt},
@@ -672,7 +666,7 @@ func (a *BatchEncoder) columnToAvroSchema(
672666
}, nil
673667
case mysql.TypeLonglong: // BIGINT
674668
t := "long"
675-
if col.Flag.IsUnsigned() &&
669+
if col.GetFlag().IsUnsigned() &&
676670
a.config.AvroBigintUnsignedHandlingMode == common.BigintUnsignedHandlingModeString {
677671
t = "string"
678672
}
@@ -691,9 +685,9 @@ func (a *BatchEncoder) columnToAvroSchema(
691685
Parameters: map[string]string{tidbType: tt},
692686
}, nil
693687
case mysql.TypeBit:
694-
displayFlen := ft.GetFlen()
688+
displayFlen := col.GetColumnInfo().FieldType.GetFlen()
695689
if displayFlen == -1 {
696-
displayFlen, _ = mysql.GetDefaultFieldLengthAndDecimal(col.Type)
690+
displayFlen, _ = mysql.GetDefaultFieldLengthAndDecimal(col.GetType())
697691
}
698692
return avroSchema{
699693
Type: "bytes",
@@ -704,6 +698,7 @@ func (a *BatchEncoder) columnToAvroSchema(
704698
}, nil
705699
case mysql.TypeNewDecimal:
706700
if a.config.AvroDecimalHandlingMode == common.DecimalHandlingModePrecise {
701+
ft := col.GetColumnInfo().FieldType
707702
defaultFlen, defaultDecimal := mysql.GetDefaultFieldLengthAndDecimal(ft.GetType())
708703
displayFlen, displayDecimal := ft.GetFlen(), ft.GetDecimal()
709704
// length not specified, set it to system type default
@@ -738,16 +733,17 @@ func (a *BatchEncoder) columnToAvroSchema(
738733
mysql.TypeLongBlob,
739734
mysql.TypeBlob:
740735
t := "string"
741-
if col.Flag.IsBinary() {
736+
if col.GetFlag().IsBinary() {
742737
t = "bytes"
743738
}
744739
return avroSchema{
745740
Type: t,
746741
Parameters: map[string]string{tidbType: tt},
747742
}, nil
748743
case mysql.TypeEnum, mysql.TypeSet:
749-
es := make([]string, 0, len(ft.GetElems()))
750-
for _, e := range ft.GetElems() {
744+
elems := col.GetColumnInfo().FieldType.GetElems()
745+
es := make([]string, 0, len(elems))
746+
for _, e := range elems {
751747
e = common.EscapeEnumAndSetOptions(e)
752748
es = append(es, e)
753749
}
@@ -779,20 +775,17 @@ func (a *BatchEncoder) columnToAvroSchema(
779775
Parameters: map[string]string{tidbType: tt},
780776
}, nil
781777
default:
782-
log.Error("unknown mysql type", zap.Any("mysqlType", col.Type))
778+
log.Error("unknown mysql type", zap.Any("mysqlType", col.GetType()))
783779
return nil, cerror.ErrAvroEncodeFailed.GenWithStack("unknown mysql type")
784780
}
785781
}
786782

787-
func (a *BatchEncoder) columnToAvroData(
788-
col *model.Column,
789-
ft *types.FieldType,
790-
) (interface{}, string, error) {
783+
func (a *BatchEncoder) columnToAvroData(col model.ColumnDataX) (interface{}, string, error) {
791784
if col.Value == nil {
792785
return nil, "null", nil
793786
}
794787

795-
switch col.Type {
788+
switch col.GetType() {
796789
case mysql.TypeTiny, mysql.TypeShort, mysql.TypeInt24:
797790
if v, ok := col.Value.(string); ok {
798791
n, err := strconv.ParseInt(v, 10, 32)
@@ -801,7 +794,7 @@ func (a *BatchEncoder) columnToAvroData(
801794
}
802795
return int32(n), "int", nil
803796
}
804-
if col.Flag.IsUnsigned() {
797+
if col.GetFlag().IsUnsigned() {
805798
return int32(col.Value.(uint64)), "int", nil
806799
}
807800
return int32(col.Value.(int64)), "int", nil
@@ -811,18 +804,18 @@ func (a *BatchEncoder) columnToAvroData(
811804
if err != nil {
812805
return nil, "", cerror.WrapError(cerror.ErrAvroEncodeFailed, err)
813806
}
814-
if col.Flag.IsUnsigned() {
807+
if col.GetFlag().IsUnsigned() {
815808
return n, "long", nil
816809
}
817810
return int32(n), "int", nil
818811
}
819-
if col.Flag.IsUnsigned() {
812+
if col.GetFlag().IsUnsigned() {
820813
return int64(col.Value.(uint64)), "long", nil
821814
}
822815
return int32(col.Value.(int64)), "int", nil
823816
case mysql.TypeLonglong:
824817
if v, ok := col.Value.(string); ok {
825-
if col.Flag.IsUnsigned() {
818+
if col.GetFlag().IsUnsigned() {
826819
if a.config.AvroBigintUnsignedHandlingMode == common.BigintUnsignedHandlingModeString {
827820
return v, "string", nil
828821
}
@@ -838,7 +831,7 @@ func (a *BatchEncoder) columnToAvroData(
838831
}
839832
return n, "long", nil
840833
}
841-
if col.Flag.IsUnsigned() {
834+
if col.GetFlag().IsUnsigned() {
842835
if a.config.AvroBigintUnsignedHandlingMode == common.BigintUnsignedHandlingModeLong {
843836
return int64(col.Value.(uint64)), "long", nil
844837
}
@@ -888,7 +881,7 @@ func (a *BatchEncoder) columnToAvroData(
888881
mysql.TypeBlob,
889882
mysql.TypeMediumBlob,
890883
mysql.TypeLongBlob:
891-
if col.Flag.IsBinary() {
884+
if col.GetFlag().IsBinary() {
892885
if v, ok := col.Value.(string); ok {
893886
return []byte(v), "bytes", nil
894887
}
@@ -902,7 +895,7 @@ func (a *BatchEncoder) columnToAvroData(
902895
if v, ok := col.Value.(string); ok {
903896
return v, "string", nil
904897
}
905-
elements := ft.GetElems()
898+
elements := col.GetColumnInfo().FieldType.GetElems()
906899
number := col.Value.(uint64)
907900
enumVar, err := types.ParseEnumValue(elements, number)
908901
if err != nil {
@@ -914,7 +907,7 @@ func (a *BatchEncoder) columnToAvroData(
914907
if v, ok := col.Value.(string); ok {
915908
return v, "string", nil
916909
}
917-
elements := ft.GetElems()
910+
elements := col.GetColumnInfo().FieldType.GetElems()
918911
number := col.Value.(uint64)
919912
setVar, err := types.ParseSetValue(elements, number)
920913
if err != nil {
@@ -943,7 +936,7 @@ func (a *BatchEncoder) columnToAvroData(
943936
}
944937
return nil, "", cerror.ErrAvroEncodeFailed
945938
default:
946-
log.Error("unknown mysql type", zap.Any("value", col.Value), zap.Any("mysqlType", col.Type))
939+
log.Error("unknown mysql type", zap.Any("value", col.Value), zap.Any("mysqlType", col.GetType()))
947940
return nil, "", cerror.ErrAvroEncodeFailed.GenWithStack("unknown mysql type")
948941
}
949942
}

0 commit comments

Comments
 (0)