diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/pom.xml b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/pom.xml index cd09cde5354..e5f330545c5 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/pom.xml +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/pom.xml @@ -151,11 +151,30 @@ limitations under the License. test + - org.testcontainers - junit-jupiter - ${testcontainers.version} - test + com.esri.geometry + esri-geometry-api + ${geometry.version} + + + com.fasterxml.jackson.core + jackson-core + + + + + + io.debezium + debezium-connector-mysql + 1.9.8.Final + compile + + + org.apache.flink + flink-cdc-base + ${project.version} + compile org.apache.flink @@ -163,6 +182,7 @@ limitations under the License. ${project.version} test + diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/Listeners/TiDBAntlrDdlParserListener.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/Listeners/TiDBAntlrDdlParserListener.java new file mode 100644 index 00000000000..531370e6c42 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/Listeners/TiDBAntlrDdlParserListener.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.debezium.connector.tidb.Listeners; + +import io.debezium.antlr.AntlrDdlParserListener; +import io.debezium.antlr.ProxyParseTreeListenerUtil; +import io.debezium.connector.mysql.antlr.listener.AlterTableParserListener; +import io.debezium.connector.mysql.antlr.listener.AlterViewParserListener; +import io.debezium.connector.mysql.antlr.listener.CreateAndAlterDatabaseParserListener; +import io.debezium.connector.mysql.antlr.listener.CreateTableParserListener; +import io.debezium.connector.mysql.antlr.listener.CreateUniqueIndexParserListener; +import io.debezium.connector.mysql.antlr.listener.CreateViewParserListener; +import io.debezium.connector.mysql.antlr.listener.DropDatabaseParserListener; +import io.debezium.connector.mysql.antlr.listener.DropTableParserListener; +import io.debezium.connector.mysql.antlr.listener.DropViewParserListener; +import io.debezium.connector.mysql.antlr.listener.RenameTableParserListener; +import io.debezium.connector.mysql.antlr.listener.SetStatementParserListener; +import io.debezium.connector.mysql.antlr.listener.TruncateTableParserListener; +import io.debezium.connector.mysql.antlr.listener.UseStatementParserListener; +import io.debezium.connector.tidb.TiDBAntlrDdlParser; +import io.debezium.ddl.parser.mysql.generated.MySqlParser; +import io.debezium.ddl.parser.mysql.generated.MySqlParserBaseListener; +import io.debezium.text.ParsingException; +import org.antlr.v4.runtime.ParserRuleContext; +import org.antlr.v4.runtime.tree.ErrorNode; +import org.antlr.v4.runtime.tree.ParseTreeListener; +import org.antlr.v4.runtime.tree.TerminalNode; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.CopyOnWriteArrayList; + +public class TiDBAntlrDdlParserListener extends MySqlParserBaseListener + implements AntlrDdlParserListener { + private final List listeners = new CopyOnWriteArrayList<>(); + + /** Flag for skipping phase. */ + private boolean skipNodes; + + /** + * Count of skipped nodes. Each enter event during skipping phase will increase the counter and + * each exit event will decrease it. When counter will be decreased to 0, the skipping phase + * will end. + */ + private int skippedNodesCount = 0; + + /** Collection of catched exceptions. */ + private final Collection errors = new ArrayList<>(); + + public TiDBAntlrDdlParserListener(TiDBAntlrDdlParser parser) { + // initialize listeners + listeners.add(new CreateAndAlterDatabaseParserListener(parser)); + listeners.add(new DropDatabaseParserListener(parser)); + listeners.add(new CreateTableParserListener(parser, listeners)); + listeners.add(new AlterTableParserListener(parser, listeners)); + listeners.add(new DropTableParserListener(parser)); + listeners.add(new RenameTableParserListener(parser)); + listeners.add(new TruncateTableParserListener(parser)); + listeners.add(new CreateViewParserListener(parser, listeners)); + listeners.add(new AlterViewParserListener(parser, listeners)); + listeners.add(new DropViewParserListener(parser)); + listeners.add(new CreateUniqueIndexParserListener(parser)); + listeners.add(new SetStatementParserListener(parser)); + listeners.add(new UseStatementParserListener(parser)); + } + + /** + * Returns all caught errors during tree walk. + * + * @return list of Parsing exceptions + */ + @Override + public Collection getErrors() { + return errors; + } + + @Override + public void enterEveryRule(ParserRuleContext ctx) { + if (skipNodes) { + skippedNodesCount++; + } else { + ProxyParseTreeListenerUtil.delegateEnterRule(ctx, listeners, errors); + } + } + + @Override + public void exitEveryRule(ParserRuleContext ctx) { + if (skipNodes) { + if (skippedNodesCount == 0) { + // back in the node where skipping started + skipNodes = false; + } else { + // going up in a tree, means decreasing a number of skipped nodes + skippedNodesCount--; + } + } else { + ProxyParseTreeListenerUtil.delegateExitRule(ctx, listeners, errors); + } + } + + @Override + public void visitErrorNode(ErrorNode node) { + ProxyParseTreeListenerUtil.visitErrorNode(node, listeners, errors); + } + + @Override + public void visitTerminal(TerminalNode node) { + ProxyParseTreeListenerUtil.visitTerminal(node, listeners, errors); + } + + @Override + public void enterRoutineBody(MySqlParser.RoutineBodyContext ctx) { + // this is a grammar rule for BEGIN ... END part of statements. Skip it. + skipNodes = true; + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBAntlrDdlParser.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBAntlrDdlParser.java new file mode 100644 index 00000000000..6352666b8d2 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBAntlrDdlParser.java @@ -0,0 +1,330 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.debezium.connector.tidb; + +import org.apache.flink.cdc.connectors.tidb.source.converter.TiDBValueConverters; + +import io.debezium.antlr.AntlrDdlParserListener; +import io.debezium.antlr.DataTypeResolver; +import io.debezium.connector.mysql.MySqlSystemVariables; +import io.debezium.connector.mysql.antlr.MySqlAntlrDdlParser; +import io.debezium.connector.tidb.Listeners.TiDBAntlrDdlParserListener; +import io.debezium.ddl.parser.mysql.generated.MySqlLexer; +import io.debezium.ddl.parser.mysql.generated.MySqlParser; +import io.debezium.relational.SystemVariables; +import io.debezium.relational.Tables; +import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.tree.ParseTree; + +import java.sql.Types; +import java.util.Arrays; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; + +public class TiDBAntlrDdlParser extends MySqlAntlrDdlParser { + private final ConcurrentMap charsetNameForDatabase = new ConcurrentHashMap<>(); + private final TiDBValueConverters converters; + private final Tables.TableFilter tableFilter; + + public TiDBAntlrDdlParser() { + this(null, Tables.TableFilter.includeAll()); + } + + public TiDBAntlrDdlParser(TiDBValueConverters converters) { + this(converters, Tables.TableFilter.includeAll()); + } + + public TiDBAntlrDdlParser(TiDBValueConverters converters, Tables.TableFilter tableFilter) { + this(true, false, false, converters, tableFilter); + } + + public TiDBAntlrDdlParser( + boolean throwErrorsFromTreeWalk, + boolean includeViews, + boolean includeComments, + TiDBValueConverters converters, + Tables.TableFilter tableFilter) { + // super(throwErrorsFromTreeWalk, includeViews, includeComments); + systemVariables = new MySqlSystemVariables(); + this.converters = converters; + this.tableFilter = tableFilter; + } + + @Override + protected ParseTree parseTree(MySqlParser parser) { + return parser.root(); + } + + @Override + protected AntlrDdlParserListener createParseTreeWalkerListener() { + return new TiDBAntlrDdlParserListener(this); + } + + @Override + protected MySqlLexer createNewLexerInstance(CharStream charStreams) { + return new MySqlLexer(charStreams); + } + + @Override + protected MySqlParser createNewParserInstance(CommonTokenStream commonTokenStream) { + return new MySqlParser(commonTokenStream); + } + + @Override + protected SystemVariables createNewSystemVariablesInstance() { + return new MySqlSystemVariables(); + } + + @Override + protected boolean isGrammarInUpperCase() { + return true; + } + + @Override + protected DataTypeResolver initializeDataTypeResolver() { + DataTypeResolver.Builder dataTypeResolverBuilder = new DataTypeResolver.Builder(); + + dataTypeResolverBuilder.registerDataTypes( + MySqlParser.StringDataTypeContext.class.getCanonicalName(), + Arrays.asList( + new DataTypeResolver.DataTypeEntry(Types.CHAR, MySqlParser.CHAR), + new DataTypeResolver.DataTypeEntry( + Types.VARCHAR, MySqlParser.CHAR, MySqlParser.VARYING), + new DataTypeResolver.DataTypeEntry(Types.VARCHAR, MySqlParser.VARCHAR), + new DataTypeResolver.DataTypeEntry(Types.VARCHAR, MySqlParser.TINYTEXT), + new DataTypeResolver.DataTypeEntry(Types.VARCHAR, MySqlParser.TEXT), + new DataTypeResolver.DataTypeEntry(Types.VARCHAR, MySqlParser.MEDIUMTEXT), + new DataTypeResolver.DataTypeEntry(Types.VARCHAR, MySqlParser.LONGTEXT), + new DataTypeResolver.DataTypeEntry(Types.NCHAR, MySqlParser.NCHAR), + new DataTypeResolver.DataTypeEntry( + Types.NVARCHAR, MySqlParser.NCHAR, MySqlParser.VARYING), + new DataTypeResolver.DataTypeEntry(Types.NVARCHAR, MySqlParser.NVARCHAR), + new DataTypeResolver.DataTypeEntry( + Types.CHAR, MySqlParser.CHAR, MySqlParser.BINARY), + new DataTypeResolver.DataTypeEntry( + Types.VARCHAR, MySqlParser.VARCHAR, MySqlParser.BINARY), + new DataTypeResolver.DataTypeEntry( + Types.VARCHAR, MySqlParser.TINYTEXT, MySqlParser.BINARY), + new DataTypeResolver.DataTypeEntry( + Types.VARCHAR, MySqlParser.TEXT, MySqlParser.BINARY), + new DataTypeResolver.DataTypeEntry( + Types.VARCHAR, MySqlParser.MEDIUMTEXT, MySqlParser.BINARY), + new DataTypeResolver.DataTypeEntry( + Types.VARCHAR, MySqlParser.LONGTEXT, MySqlParser.BINARY), + new DataTypeResolver.DataTypeEntry( + Types.NCHAR, MySqlParser.NCHAR, MySqlParser.BINARY), + new DataTypeResolver.DataTypeEntry( + Types.NVARCHAR, MySqlParser.NVARCHAR, MySqlParser.BINARY), + new DataTypeResolver.DataTypeEntry(Types.CHAR, MySqlParser.CHARACTER), + new DataTypeResolver.DataTypeEntry( + Types.VARCHAR, MySqlParser.CHARACTER, MySqlParser.VARYING))); + dataTypeResolverBuilder.registerDataTypes( + MySqlParser.NationalStringDataTypeContext.class.getCanonicalName(), + Arrays.asList( + new DataTypeResolver.DataTypeEntry( + Types.NVARCHAR, MySqlParser.NATIONAL, MySqlParser.VARCHAR) + .setSuffixTokens(MySqlParser.BINARY), + new DataTypeResolver.DataTypeEntry( + Types.NCHAR, MySqlParser.NATIONAL, MySqlParser.CHARACTER) + .setSuffixTokens(MySqlParser.BINARY), + new DataTypeResolver.DataTypeEntry( + Types.NVARCHAR, MySqlParser.NCHAR, MySqlParser.VARCHAR) + .setSuffixTokens(MySqlParser.BINARY))); + dataTypeResolverBuilder.registerDataTypes( + MySqlParser.NationalVaryingStringDataTypeContext.class.getCanonicalName(), + Arrays.asList( + new DataTypeResolver.DataTypeEntry( + Types.NVARCHAR, + MySqlParser.NATIONAL, + MySqlParser.CHAR, + MySqlParser.VARYING), + new DataTypeResolver.DataTypeEntry( + Types.NVARCHAR, + MySqlParser.NATIONAL, + MySqlParser.CHARACTER, + MySqlParser.VARYING))); + dataTypeResolverBuilder.registerDataTypes( + MySqlParser.DimensionDataTypeContext.class.getCanonicalName(), + Arrays.asList( + new DataTypeResolver.DataTypeEntry(Types.SMALLINT, MySqlParser.TINYINT) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.SMALLINT, MySqlParser.INT1) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.SMALLINT, MySqlParser.SMALLINT) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.SMALLINT, MySqlParser.INT2) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.INTEGER, MySqlParser.MEDIUMINT) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.INTEGER, MySqlParser.INT3) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.INTEGER, MySqlParser.MIDDLEINT) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.INTEGER, MySqlParser.INT) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.INTEGER, MySqlParser.INTEGER) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.INTEGER, MySqlParser.INT4) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.BIGINT, MySqlParser.BIGINT) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.BIGINT, MySqlParser.INT8) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.REAL, MySqlParser.REAL) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.DOUBLE, MySqlParser.DOUBLE) + .setSuffixTokens( + MySqlParser.PRECISION, + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.DOUBLE, MySqlParser.FLOAT8) + .setSuffixTokens( + MySqlParser.PRECISION, + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.FLOAT, MySqlParser.FLOAT) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.FLOAT, MySqlParser.FLOAT4) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL), + new DataTypeResolver.DataTypeEntry(Types.DECIMAL, MySqlParser.DECIMAL) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL) + .setDefaultLengthScaleDimension(10, 0), + new DataTypeResolver.DataTypeEntry(Types.DECIMAL, MySqlParser.DEC) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL) + .setDefaultLengthScaleDimension(10, 0), + new DataTypeResolver.DataTypeEntry(Types.DECIMAL, MySqlParser.FIXED) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL) + .setDefaultLengthScaleDimension(10, 0), + new DataTypeResolver.DataTypeEntry(Types.NUMERIC, MySqlParser.NUMERIC) + .setSuffixTokens( + MySqlParser.SIGNED, + MySqlParser.UNSIGNED, + MySqlParser.ZEROFILL) + .setDefaultLengthScaleDimension(10, 0), + new DataTypeResolver.DataTypeEntry(Types.BIT, MySqlParser.BIT), + new DataTypeResolver.DataTypeEntry(Types.TIME, MySqlParser.TIME), + new DataTypeResolver.DataTypeEntry( + Types.TIMESTAMP_WITH_TIMEZONE, MySqlParser.TIMESTAMP), + new DataTypeResolver.DataTypeEntry(Types.TIMESTAMP, MySqlParser.DATETIME), + new DataTypeResolver.DataTypeEntry(Types.BINARY, MySqlParser.BINARY), + new DataTypeResolver.DataTypeEntry(Types.VARBINARY, MySqlParser.VARBINARY), + new DataTypeResolver.DataTypeEntry(Types.BLOB, MySqlParser.BLOB), + new DataTypeResolver.DataTypeEntry(Types.INTEGER, MySqlParser.YEAR))); + dataTypeResolverBuilder.registerDataTypes( + MySqlParser.SimpleDataTypeContext.class.getCanonicalName(), + Arrays.asList( + new DataTypeResolver.DataTypeEntry(Types.DATE, MySqlParser.DATE), + new DataTypeResolver.DataTypeEntry(Types.BLOB, MySqlParser.TINYBLOB), + new DataTypeResolver.DataTypeEntry(Types.BLOB, MySqlParser.MEDIUMBLOB), + new DataTypeResolver.DataTypeEntry(Types.BLOB, MySqlParser.LONGBLOB), + new DataTypeResolver.DataTypeEntry(Types.BOOLEAN, MySqlParser.BOOL), + new DataTypeResolver.DataTypeEntry(Types.BOOLEAN, MySqlParser.BOOLEAN), + new DataTypeResolver.DataTypeEntry(Types.BIGINT, MySqlParser.SERIAL))); + dataTypeResolverBuilder.registerDataTypes( + MySqlParser.CollectionDataTypeContext.class.getCanonicalName(), + Arrays.asList( + new DataTypeResolver.DataTypeEntry(Types.CHAR, MySqlParser.ENUM) + .setSuffixTokens(MySqlParser.BINARY), + new DataTypeResolver.DataTypeEntry(Types.CHAR, MySqlParser.SET) + .setSuffixTokens(MySqlParser.BINARY))); + dataTypeResolverBuilder.registerDataTypes( + MySqlParser.SpatialDataTypeContext.class.getCanonicalName(), + Arrays.asList( + new DataTypeResolver.DataTypeEntry( + Types.OTHER, MySqlParser.GEOMETRYCOLLECTION), + new DataTypeResolver.DataTypeEntry(Types.OTHER, MySqlParser.GEOMCOLLECTION), + new DataTypeResolver.DataTypeEntry(Types.OTHER, MySqlParser.LINESTRING), + new DataTypeResolver.DataTypeEntry( + Types.OTHER, MySqlParser.MULTILINESTRING), + new DataTypeResolver.DataTypeEntry(Types.OTHER, MySqlParser.MULTIPOINT), + new DataTypeResolver.DataTypeEntry(Types.OTHER, MySqlParser.MULTIPOLYGON), + new DataTypeResolver.DataTypeEntry(Types.OTHER, MySqlParser.POINT), + new DataTypeResolver.DataTypeEntry(Types.OTHER, MySqlParser.POLYGON), + new DataTypeResolver.DataTypeEntry(Types.OTHER, MySqlParser.JSON), + new DataTypeResolver.DataTypeEntry(Types.OTHER, MySqlParser.GEOMETRY))); + dataTypeResolverBuilder.registerDataTypes( + MySqlParser.LongVarbinaryDataTypeContext.class.getCanonicalName(), + Arrays.asList( + new DataTypeResolver.DataTypeEntry(Types.BLOB, MySqlParser.LONG) + .setSuffixTokens(MySqlParser.VARBINARY))); + dataTypeResolverBuilder.registerDataTypes( + MySqlParser.LongVarcharDataTypeContext.class.getCanonicalName(), + Arrays.asList( + new DataTypeResolver.DataTypeEntry(Types.VARCHAR, MySqlParser.LONG) + .setSuffixTokens(MySqlParser.VARCHAR))); + + return dataTypeResolverBuilder.build(); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBEventMetadataProvider.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBEventMetadataProvider.java new file mode 100644 index 00000000000..e1ca467d9fb --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBEventMetadataProvider.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.debezium.connector.tidb; + +import io.debezium.connector.AbstractSourceInfo; +import io.debezium.connector.mysql.MySqlOffsetContext; +import io.debezium.data.Envelope; +import io.debezium.pipeline.source.spi.EventMetadataProvider; +import io.debezium.pipeline.spi.OffsetContext; +import io.debezium.schema.DataCollectionId; +import io.debezium.util.Collect; +import org.apache.kafka.connect.data.Struct; + +import java.time.Instant; +import java.util.Map; + +import static org.apache.flink.cdc.connectors.tidb.source.offset.TiDBSourceInfo.COMMIT_VERSION_KEY; + +public class TiDBEventMetadataProvider implements EventMetadataProvider { + @Override + public Instant getEventTimestamp( + DataCollectionId source, OffsetContext offset, Object key, Struct value) { + if (value == null) { + return null; + } + final Struct sourceInfo = value.getStruct(Envelope.FieldName.SOURCE); + if (sourceInfo == null) { + return null; + } + final Long timestamp = sourceInfo.getInt64(AbstractSourceInfo.TIMESTAMP_KEY); + return timestamp == null ? null : Instant.ofEpochMilli(timestamp); + } + + @Override + public Map getEventSourcePosition( + DataCollectionId source, OffsetContext offset, Object key, Struct value) { + if (value == null) { + return null; + } + final Struct sourceInfo = value.getStruct(Envelope.FieldName.SOURCE); + if (source == null) { + return null; + } + return Collect.hashMapOf(COMMIT_VERSION_KEY, sourceInfo.getString(COMMIT_VERSION_KEY)); + } + + @Override + public String getTransactionId( + DataCollectionId source, OffsetContext offset, Object key, Struct value) { + return ((MySqlOffsetContext) offset).getTransactionId(); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBPartition.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBPartition.java new file mode 100644 index 00000000000..f14e595ac65 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBPartition.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.debezium.connector.tidb; + +import io.debezium.pipeline.spi.Partition; + +import java.util.Collections; +import java.util.Map; +import java.util.Objects; + +public class TiDBPartition implements Partition { + private final String serverName; + + public TiDBPartition(String serverName) { + this.serverName = serverName; + } + + @Override + public Map getSourcePartition() { + return Collections.emptyMap(); + } + + @Override + public Map getLoggingContext() { + return Partition.super.getLoggingContext(); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + final io.debezium.connector.tidb.TiDBPartition other = + (io.debezium.connector.tidb.TiDBPartition) obj; + return Objects.equals(serverName, other.serverName); + } + + @Override + public String toString() { + return super.toString(); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBTaskContext.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBTaskContext.java new file mode 100644 index 00000000000..70a61a0eebe --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TiDBTaskContext.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.debezium.connector.tidb; + +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; +import org.apache.flink.cdc.connectors.tidb.source.schema.TiDBDatabaseSchema; + +import io.debezium.connector.common.CdcSourceTaskContext; +import io.debezium.relational.TableId; +import io.debezium.schema.TopicSelector; + +public class TiDBTaskContext extends CdcSourceTaskContext { + private final TiDBDatabaseSchema schema; + private final TopicSelector topicSelector; + + public TiDBTaskContext(TiDBConnectorConfig config, TiDBDatabaseSchema schema) { + super(config.getContextName(), config.getLogicalName(), schema::tableIds); + this.schema = schema; + topicSelector = TidbTopicSelector.defaultSelector(config); + } + + public TiDBDatabaseSchema getSchema() { + return schema; + } + + public TopicSelector getTopicSelector() { + return topicSelector; + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TidbTopicSelector.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TidbTopicSelector.java new file mode 100644 index 00000000000..5663029c5de --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/io/debezium/connector/tidb/TidbTopicSelector.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.debezium.connector.tidb; + +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; + +import io.debezium.annotation.ThreadSafe; +import io.debezium.relational.TableId; +import io.debezium.schema.TopicSelector; + +@ThreadSafe +public class TidbTopicSelector { + + /** + * Get the default topic selector logic, which uses a '.' delimiter character when needed. + * + * @param prefix the name of the prefix to be used for all topics; may not be null and must not + * terminate in the {@code delimiter} + * @param heartbeatPrefix the name of the prefix to be used for all heartbeat topics; may not be + * null and must not terminate in the {@code delimiter} + * @return the topic selector; never null + */ + @Deprecated + public static TopicSelector defaultSelector(String prefix, String heartbeatPrefix) { + return TopicSelector.defaultSelector( + prefix, + heartbeatPrefix, + ".", + (t, pref, delimiter) -> String.join(delimiter, pref, t.catalog(), t.table())); + } + + public static TopicSelector defaultSelector(TiDBConnectorConfig connectorConfig) { + return TopicSelector.defaultSelector( + connectorConfig, + (tableId, prefix, delimiter) -> + String.join(delimiter, prefix, tableId.catalog(), tableId.table())); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TDBSourceOptions.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TDBSourceOptions.java deleted file mode 100644 index fc468e68512..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TDBSourceOptions.java +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb; - -import org.apache.flink.cdc.connectors.tidb.table.utils.UriHostMapping; -import org.apache.flink.configuration.ConfigOption; -import org.apache.flink.configuration.ConfigOptions; -import org.apache.flink.configuration.Configuration; - -import org.tikv.common.ConfigUtils; -import org.tikv.common.TiConfiguration; - -import java.util.Map; -import java.util.Optional; - -/** Configurations for {@link TiDBSource}. */ -public class TDBSourceOptions { - - private TDBSourceOptions() {} - - public static final ConfigOption DATABASE_NAME = - ConfigOptions.key("database-name") - .stringType() - .noDefaultValue() - .withDescription("Database name of the TiDB server to monitor."); - - public static final ConfigOption TABLE_NAME = - ConfigOptions.key("table-name") - .stringType() - .noDefaultValue() - .withDescription("Table name of the TiDB database to monitor."); - - public static final ConfigOption SCAN_STARTUP_MODE = - ConfigOptions.key("scan.startup.mode") - .stringType() - .defaultValue("initial") - .withDescription( - "Optional startup mode for TiDB CDC consumer, valid enumerations are " - + "\"initial\", \"latest-offset\""); - - public static final ConfigOption PD_ADDRESSES = - ConfigOptions.key("pd-addresses") - .stringType() - .noDefaultValue() - .withDescription("TiKV cluster's PD address"); - - public static final ConfigOption HOST_MAPPING = - ConfigOptions.key("host-mapping") - .stringType() - .noDefaultValue() - .withDescription( - "TiKV cluster's host-mapping used to configure public IP and intranet IP mapping. When the TiKV cluster is running on the intranet, you can map a set of intranet IPs to public IPs for an outside Flink cluster to access. The format is {Intranet IP1}:{Public IP1};{Intranet IP2}:{Public IP2}, e.g. 192.168.0.2:8.8.8.8;192.168.0.3:9.9.9.9."); - public static final ConfigOption TIKV_GRPC_TIMEOUT = - ConfigOptions.key(ConfigUtils.TIKV_GRPC_TIMEOUT) - .longType() - .noDefaultValue() - .withDescription("TiKV GRPC timeout in ms"); - - public static final ConfigOption TIKV_GRPC_SCAN_TIMEOUT = - ConfigOptions.key(ConfigUtils.TIKV_GRPC_SCAN_TIMEOUT) - .longType() - .noDefaultValue() - .withDescription("TiKV GRPC scan timeout in ms"); - - public static final ConfigOption TIKV_BATCH_GET_CONCURRENCY = - ConfigOptions.key(ConfigUtils.TIKV_BATCH_GET_CONCURRENCY) - .intType() - .noDefaultValue() - .withDescription("TiKV GRPC batch get concurrency"); - - public static final ConfigOption TIKV_BATCH_SCAN_CONCURRENCY = - ConfigOptions.key(ConfigUtils.TIKV_BATCH_SCAN_CONCURRENCY) - .intType() - .noDefaultValue() - .withDescription("TiKV GRPC batch scan concurrency"); - - public static TiConfiguration getTiConfiguration( - final String pdAddrsStr, final String hostMapping, final Map options) { - final Configuration configuration = Configuration.fromMap(options); - - final TiConfiguration tiConf = TiConfiguration.createDefault(pdAddrsStr); - Optional.of(new UriHostMapping(hostMapping)).ifPresent(tiConf::setHostMapping); - configuration.getOptional(TIKV_GRPC_TIMEOUT).ifPresent(tiConf::setTimeout); - configuration.getOptional(TIKV_GRPC_SCAN_TIMEOUT).ifPresent(tiConf::setScanTimeout); - configuration - .getOptional(TIKV_BATCH_GET_CONCURRENCY) - .ifPresent(tiConf::setBatchGetConcurrency); - - configuration - .getOptional(TIKV_BATCH_SCAN_CONCURRENCY) - .ifPresent(tiConf::setBatchScanConcurrency); - return tiConf; - } -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiDBSource.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiDBSource.java deleted file mode 100644 index fa74f69ba88..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiDBSource.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb; - -import org.apache.flink.cdc.connectors.tidb.table.StartupOptions; -import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; - -import org.tikv.common.TiConfiguration; - -/** A builder to build a SourceFunction which can read snapshot and continue to read CDC events. */ -public class TiDBSource { - - public static Builder builder() { - return new Builder<>(); - } - - /** Builder class of {@link TiDBSource}. */ - public static class Builder { - private String database; - private String tableName; - private StartupOptions startupOptions = StartupOptions.initial(); - private TiConfiguration tiConf; - - private TiKVSnapshotEventDeserializationSchema snapshotEventDeserializationSchema; - private TiKVChangeEventDeserializationSchema changeEventDeserializationSchema; - - /** Database name to be monitored. */ - public Builder database(String database) { - this.database = database; - return this; - } - - /** TableName name to be monitored. */ - public Builder tableName(String tableName) { - this.tableName = tableName; - return this; - } - - /** The deserializer used to convert from consumed snapshot event from TiKV. */ - public Builder snapshotEventDeserializer( - TiKVSnapshotEventDeserializationSchema snapshotEventDeserializationSchema) { - this.snapshotEventDeserializationSchema = snapshotEventDeserializationSchema; - return this; - } - - /** The deserializer used to convert from consumed change event from TiKV. */ - public Builder changeEventDeserializer( - TiKVChangeEventDeserializationSchema changeEventDeserializationSchema) { - this.changeEventDeserializationSchema = changeEventDeserializationSchema; - return this; - } - - /** Specifies the startup options. */ - public Builder startupOptions(StartupOptions startupOptions) { - this.startupOptions = startupOptions; - return this; - } - - /** TIDB config. */ - public Builder tiConf(TiConfiguration tiConf) { - this.tiConf = tiConf; - return this; - } - - public RichParallelSourceFunction build() { - - return new TiKVRichParallelSourceFunction<>( - snapshotEventDeserializationSchema, - changeEventDeserializationSchema, - tiConf, - startupOptions.startupMode, - database, - tableName); - } - } -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVChangeEventDeserializationSchema.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVChangeEventDeserializationSchema.java deleted file mode 100644 index bf652624fdf..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVChangeEventDeserializationSchema.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb; - -import org.apache.flink.api.java.typeutils.ResultTypeQueryable; -import org.apache.flink.cdc.common.annotation.PublicEvolving; -import org.apache.flink.util.Collector; - -import org.tikv.kvproto.Cdcpb.Event.Row; - -import java.io.Serializable; - -/** - * The deserialization schema describes how to turn the TiKV Change Event into data types - * (Java/Scala objects) that are processed by Flink. - * - * @param The type created by the deserialization schema. - */ -@PublicEvolving -public interface TiKVChangeEventDeserializationSchema - extends Serializable, ResultTypeQueryable { - - /** Deserialize the TiDB record. */ - void deserialize(Row record, Collector out) throws Exception; -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVRichParallelSourceFunction.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVRichParallelSourceFunction.java deleted file mode 100644 index 9570f40ed23..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVRichParallelSourceFunction.java +++ /dev/null @@ -1,419 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb; - -import org.apache.flink.api.common.state.CheckpointListener; -import org.apache.flink.api.common.state.ListState; -import org.apache.flink.api.common.state.ListStateDescriptor; -import org.apache.flink.api.common.typeinfo.TypeInformation; -import org.apache.flink.api.common.typeutils.base.LongSerializer; -import org.apache.flink.api.java.typeutils.ResultTypeQueryable; -import org.apache.flink.cdc.connectors.tidb.metrics.TiDBSourceMetrics; -import org.apache.flink.cdc.connectors.tidb.table.StartupMode; -import org.apache.flink.cdc.connectors.tidb.table.utils.TableKeyRangeUtils; -import org.apache.flink.configuration.Configuration; -import org.apache.flink.metrics.MetricGroup; -import org.apache.flink.runtime.state.FunctionInitializationContext; -import org.apache.flink.runtime.state.FunctionSnapshotContext; -import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction; -import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; -import org.apache.flink.util.Collector; -import org.apache.flink.util.Preconditions; - -import org.apache.flink.shaded.guava31.com.google.common.util.concurrent.ThreadFactoryBuilder; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.tikv.cdc.CDCClient; -import org.tikv.common.TiConfiguration; -import org.tikv.common.TiSession; -import org.tikv.common.key.RowKey; -import org.tikv.common.meta.TiTableInfo; -import org.tikv.common.meta.TiTimestamp; -import org.tikv.kvproto.Cdcpb; -import org.tikv.kvproto.Coprocessor; -import org.tikv.kvproto.Kvrpcpb; -import org.tikv.shade.com.google.protobuf.ByteString; -import org.tikv.txn.KVClient; - -import java.util.List; -import java.util.Objects; -import java.util.TreeMap; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.ThreadFactory; -import java.util.concurrent.TimeUnit; - -/** - * The source implementation for TiKV that read snapshot events first and then read the change - * event. - */ -public class TiKVRichParallelSourceFunction extends RichParallelSourceFunction - implements CheckpointListener, CheckpointedFunction, ResultTypeQueryable { - - private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(TiKVRichParallelSourceFunction.class); - private static final long SNAPSHOT_VERSION_EPOCH = -1L; - private static final long STREAMING_VERSION_START_EPOCH = 0L; - - private final TiKVSnapshotEventDeserializationSchema snapshotEventDeserializationSchema; - private final TiKVChangeEventDeserializationSchema changeEventDeserializationSchema; - private final TiConfiguration tiConf; - private final StartupMode startupMode; - private final String database; - private final String tableName; - - /** Task local variables. */ - private transient TiSession session = null; - - private transient Coprocessor.KeyRange keyRange = null; - private transient CDCClient cdcClient = null; - private transient SourceContext sourceContext = null; - private transient volatile long resolvedTs = -1L; - private transient TreeMap prewrites = null; - private transient TreeMap commits = null; - private transient BlockingQueue committedEvents = null; - private transient OutputCollector outputCollector; - - private transient boolean running = true; - private transient ExecutorService executorService; - private transient TiDBSourceMetrics sourceMetrics; - - /** offset state. */ - private transient ListState offsetState; - - private static final long CLOSE_TIMEOUT = 30L; - - public TiKVRichParallelSourceFunction( - TiKVSnapshotEventDeserializationSchema snapshotEventDeserializationSchema, - TiKVChangeEventDeserializationSchema changeEventDeserializationSchema, - TiConfiguration tiConf, - StartupMode startupMode, - String database, - String tableName) { - this.snapshotEventDeserializationSchema = snapshotEventDeserializationSchema; - this.changeEventDeserializationSchema = changeEventDeserializationSchema; - this.tiConf = tiConf; - this.startupMode = startupMode; - this.database = database; - this.tableName = tableName; - } - - @Override - public void open(final Configuration config) throws Exception { - super.open(config); - session = TiSession.create(tiConf); - TiTableInfo tableInfo = session.getCatalog().getTable(database, tableName); - if (tableInfo == null) { - throw new RuntimeException( - String.format("Table %s.%s does not exist.", database, tableName)); - } - long tableId = tableInfo.getId(); - keyRange = - TableKeyRangeUtils.getTableKeyRange( - tableId, - getRuntimeContext().getNumberOfParallelSubtasks(), - getRuntimeContext().getIndexOfThisSubtask()); - cdcClient = new CDCClient(session, keyRange); - prewrites = new TreeMap<>(); - commits = new TreeMap<>(); - // cdc event will lose if pull cdc event block when region split - // use queue to separate read and write to ensure pull event unblock. - // since sink jdbc is slow, 5000W queue size may be safe size. - committedEvents = new LinkedBlockingQueue<>(); - outputCollector = new OutputCollector<>(); - resolvedTs = - startupMode == StartupMode.INITIAL - ? SNAPSHOT_VERSION_EPOCH - : STREAMING_VERSION_START_EPOCH; - ThreadFactory threadFactory = - new ThreadFactoryBuilder() - .setNameFormat( - "tidb-source-function-" - + getRuntimeContext().getIndexOfThisSubtask()) - .build(); - executorService = Executors.newSingleThreadExecutor(threadFactory); - final MetricGroup metricGroup = getRuntimeContext().getMetricGroup(); - sourceMetrics = new TiDBSourceMetrics(metricGroup); - sourceMetrics.registerMetrics(); - } - - @Override - public void run(final SourceContext ctx) throws Exception { - sourceContext = ctx; - outputCollector.context = sourceContext; - - if (startupMode == StartupMode.INITIAL) { - synchronized (sourceContext.getCheckpointLock()) { - readSnapshotEvents(); - } - } else { - LOG.info("Skip snapshot read"); - resolvedTs = session.getTimestamp().getVersion(); - } - - LOG.info("start read change events"); - cdcClient.start(resolvedTs); - running = true; - readChangeEvents(); - } - - private void handleRow(final Cdcpb.Event.Row row) { - if (!TableKeyRangeUtils.isRecordKey(row.getKey().toByteArray())) { - // Don't handle index key for now - return; - } - LOG.debug("binlog record, type: {}, data: {}", row.getType(), row); - switch (row.getType()) { - case COMMITTED: - prewrites.put(RowKeyWithTs.ofStart(row), row); - commits.put(RowKeyWithTs.ofCommit(row), row); - break; - case COMMIT: - commits.put(RowKeyWithTs.ofCommit(row), row); - break; - case PREWRITE: - prewrites.put(RowKeyWithTs.ofStart(row), row); - break; - case ROLLBACK: - prewrites.remove(RowKeyWithTs.ofStart(row)); - break; - default: - LOG.warn("Unsupported row type:" + row.getType()); - } - } - - protected void readSnapshotEvents() throws Exception { - LOG.info("read snapshot events"); - try (KVClient scanClient = session.createKVClient()) { - long startTs = session.getTimestamp().getVersion(); - ByteString start = keyRange.getStart(); - while (true) { - final List segment = - scanClient.scan(start, keyRange.getEnd(), startTs); - - if (segment.isEmpty()) { - resolvedTs = startTs; - break; - } - - for (final Kvrpcpb.KvPair pair : segment) { - if (TableKeyRangeUtils.isRecordKey(pair.getKey().toByteArray())) { - snapshotEventDeserializationSchema.deserialize(pair, outputCollector); - reportMetrics(0L, startTs); - } - } - - start = - RowKey.toRawKey(segment.get(segment.size() - 1).getKey()) - .next() - .toByteString(); - } - } - } - - protected void readChangeEvents() throws Exception { - LOG.info("read change event from resolvedTs:{}", resolvedTs); - // child thread to sink committed rows. - executorService.execute( - () -> { - while (running) { - try { - Cdcpb.Event.Row committedRow = committedEvents.take(); - changeEventDeserializationSchema.deserialize( - committedRow, outputCollector); - // use startTs of row as messageTs, use commitTs of row as fetchTs - reportMetrics(committedRow.getStartTs(), committedRow.getCommitTs()); - } catch (Exception e) { - e.printStackTrace(); - } - } - }); - while (resolvedTs >= STREAMING_VERSION_START_EPOCH) { - for (int i = 0; i < 1000; i++) { - final Cdcpb.Event.Row row = cdcClient.get(); - if (row == null) { - break; - } - handleRow(row); - } - resolvedTs = cdcClient.getMaxResolvedTs(); - if (commits.size() > 0) { - flushRows(resolvedTs); - } - } - } - - protected void flushRows(final long timestamp) throws Exception { - Preconditions.checkState(sourceContext != null, "sourceContext shouldn't be null"); - synchronized (sourceContext) { - while (!commits.isEmpty() && commits.firstKey().timestamp <= timestamp) { - final Cdcpb.Event.Row commitRow = commits.pollFirstEntry().getValue(); - final Cdcpb.Event.Row prewriteRow = - prewrites.remove(RowKeyWithTs.ofStart(commitRow)); - // if pull cdc event block when region split, cdc event will lose. - committedEvents.offer(prewriteRow); - } - } - } - - @Override - public void cancel() { - try { - running = false; - if (cdcClient != null) { - cdcClient.close(); - } - if (executorService != null) { - executorService.shutdown(); - if (!executorService.awaitTermination(CLOSE_TIMEOUT, TimeUnit.SECONDS)) { - LOG.warn( - "Failed to close the tidb source function in {} seconds.", - CLOSE_TIMEOUT); - } - } - } catch (final Exception e) { - LOG.error("Unable to close cdcClient", e); - } - } - - @Override - public void snapshotState(final FunctionSnapshotContext context) throws Exception { - LOG.info( - "snapshotState checkpoint: {} at resolvedTs: {}", - context.getCheckpointId(), - resolvedTs); - flushRows(resolvedTs); - offsetState.clear(); - offsetState.add(resolvedTs); - } - - @Override - public void initializeState(final FunctionInitializationContext context) throws Exception { - LOG.info("initialize checkpoint"); - offsetState = - context.getOperatorStateStore() - .getListState( - new ListStateDescriptor<>( - "resolvedTsState", LongSerializer.INSTANCE)); - if (context.isRestored()) { - for (final Long offset : offsetState.get()) { - resolvedTs = offset; - LOG.info("Restore State from resolvedTs: {}", resolvedTs); - return; - } - } else { - resolvedTs = 0; - LOG.info("Initialize State from resolvedTs: {}", resolvedTs); - } - } - - @Override - public void notifyCheckpointComplete(long checkpointId) throws Exception { - // do nothing - } - - @Override - public TypeInformation getProducedType() { - return snapshotEventDeserializationSchema.getProducedType(); - } - - // --------------------------------------- - // static Utils classes - // --------------------------------------- - private static class RowKeyWithTs implements Comparable { - private final long timestamp; - private final RowKey rowKey; - - private RowKeyWithTs(final long timestamp, final RowKey rowKey) { - this.timestamp = timestamp; - this.rowKey = rowKey; - } - - private RowKeyWithTs(final long timestamp, final byte[] key) { - this(timestamp, RowKey.decode(key)); - } - - @Override - public int compareTo(final RowKeyWithTs that) { - int res = Long.compare(this.timestamp, that.timestamp); - if (res == 0) { - res = Long.compare(this.rowKey.getTableId(), that.rowKey.getTableId()); - } - if (res == 0) { - res = Long.compare(this.rowKey.getHandle(), that.rowKey.getHandle()); - } - return res; - } - - @Override - public int hashCode() { - return Objects.hash(this.timestamp, this.rowKey.getTableId(), this.rowKey.getHandle()); - } - - @Override - public boolean equals(final Object thatObj) { - if (thatObj instanceof RowKeyWithTs) { - final RowKeyWithTs that = (RowKeyWithTs) thatObj; - return this.timestamp == that.timestamp && this.rowKey.equals(that.rowKey); - } - return false; - } - - static RowKeyWithTs ofStart(final Cdcpb.Event.Row row) { - return new RowKeyWithTs(row.getStartTs(), row.getKey().toByteArray()); - } - - static RowKeyWithTs ofCommit(final Cdcpb.Event.Row row) { - return new RowKeyWithTs(row.getCommitTs(), row.getKey().toByteArray()); - } - } - - private static class OutputCollector implements Collector { - - private SourceContext context; - - @Override - public void collect(T record) { - context.collect(record); - } - - @Override - public void close() { - // do nothing - } - } - - private void reportMetrics(long messageTs, long fetchTs) { - long now = System.currentTimeMillis(); - // record the latest process time - sourceMetrics.recordProcessTime(now); - long messageTimestamp = TiTimestamp.extractPhysical(messageTs); - long fetchTimestamp = TiTimestamp.extractPhysical(fetchTs); - if (messageTimestamp > 0L) { - // report fetch delay - if (fetchTimestamp >= messageTimestamp) { - sourceMetrics.recordFetchDelay(fetchTimestamp - messageTimestamp); - } - // report emit delay - sourceMetrics.recordEmitDelay(now - messageTimestamp); - } - } -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVSnapshotEventDeserializationSchema.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVSnapshotEventDeserializationSchema.java deleted file mode 100644 index a0a43658181..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/TiKVSnapshotEventDeserializationSchema.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb; - -import org.apache.flink.api.java.typeutils.ResultTypeQueryable; -import org.apache.flink.cdc.common.annotation.PublicEvolving; -import org.apache.flink.util.Collector; - -import org.tikv.kvproto.Kvrpcpb.KvPair; - -import java.io.Serializable; - -/** - * The deserialization schema describes how to turn the TiKV snapshot event into data types - * (Java/Scala objects) that are processed by Flink. - * - * @param The type created by the deserialization schema. - */ -@PublicEvolving -public interface TiKVSnapshotEventDeserializationSchema - extends Serializable, ResultTypeQueryable { - - /** Deserialize the TiDB record. */ - void deserialize(KvPair record, Collector out) throws Exception; -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/metrics/TiDBSourceMetrics.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/metrics/TiDBSourceMetrics.java index 1f32c0f3411..2fe98827b5f 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/metrics/TiDBSourceMetrics.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/metrics/TiDBSourceMetrics.java @@ -17,7 +17,6 @@ package org.apache.flink.cdc.connectors.tidb.metrics; -import org.apache.flink.cdc.connectors.tidb.TiKVRichParallelSourceFunction; import org.apache.flink.metrics.Gauge; import org.apache.flink.metrics.MetricGroup; diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/TiDBDialect.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/TiDBDialect.java new file mode 100644 index 00000000000..d1598fcc0cb --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/TiDBDialect.java @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source; + +import org.apache.flink.cdc.connectors.base.config.JdbcSourceConfig; +import org.apache.flink.cdc.connectors.base.dialect.JdbcDataSourceDialect; +import org.apache.flink.cdc.connectors.base.relational.connection.JdbcConnectionFactory; +import org.apache.flink.cdc.connectors.base.relational.connection.JdbcConnectionPoolFactory; +import org.apache.flink.cdc.connectors.base.source.assigner.splitter.ChunkSplitter; +import org.apache.flink.cdc.connectors.base.source.assigner.state.ChunkSplitterState; +import org.apache.flink.cdc.connectors.base.source.meta.offset.Offset; +import org.apache.flink.cdc.connectors.base.source.meta.split.SourceSplitBase; +import org.apache.flink.cdc.connectors.base.source.reader.external.FetchTask; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceConfig; +import org.apache.flink.cdc.connectors.tidb.source.connection.TiDBConnection; +import org.apache.flink.cdc.connectors.tidb.source.connection.TiDBConnectionPoolFactory; +import org.apache.flink.cdc.connectors.tidb.source.fetch.TiDBScanFetchTask; +import org.apache.flink.cdc.connectors.tidb.source.fetch.TiDBSourceFetchTaskContext; +import org.apache.flink.cdc.connectors.tidb.source.fetch.TiDBStreamFetchTask; +import org.apache.flink.cdc.connectors.tidb.source.schema.TiDBSchema; +import org.apache.flink.cdc.connectors.tidb.source.splitter.TiDBChunkSplitter; +import org.apache.flink.cdc.connectors.tidb.utils.TableDiscoveryUtils; +import org.apache.flink.cdc.connectors.tidb.utils.TiDBConnectionUtils; +import org.apache.flink.cdc.connectors.tidb.utils.TiDBUtils; +import org.apache.flink.util.FlinkRuntimeException; + +import io.debezium.jdbc.JdbcConnection; +import io.debezium.relational.TableId; +import io.debezium.relational.history.TableChanges; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nullable; + +import java.io.IOException; +import java.sql.SQLException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** TiDB data source dialect. */ +public class TiDBDialect implements JdbcDataSourceDialect { + private static final Logger LOG = LoggerFactory.getLogger(TiDBDialect.class); + + private static final String QUOTED_CHARACTER = "`"; + private static final long serialVersionUID = 1L; + + private final TiDBSourceConfig sourceConfig; + private transient TiDBSchema tiDBSchema; + @Nullable private TiDBStreamFetchTask streamFetchTask; + + public TiDBDialect(TiDBSourceConfig sourceConfig) { + this.sourceConfig = sourceConfig; + } + + @Override + public String getName() { + return "TiDB"; + } + + @Override + public Offset displayCurrentOffset(JdbcSourceConfig sourceConfig) { + try (JdbcConnection jdbcConnection = openJdbcConnection(sourceConfig)) { + return TiDBUtils.currentBinlogOffset(jdbcConnection); + } catch (Exception e) { + throw new FlinkRuntimeException("Read the binlog offset error", e); + } + // return null; + } + + @Override + public boolean isDataCollectionIdCaseSensitive(JdbcSourceConfig sourceConfig) { + try (JdbcConnection jdbcConnection = openJdbcConnection(sourceConfig)) { + return TiDBConnectionUtils.isTableIdCaseInsensitive(jdbcConnection); + } catch (SQLException e) { + throw new FlinkRuntimeException("Error reading TiDB variables: " + e.getMessage(), e); + } + } + + @Override + public ChunkSplitter createChunkSplitter(JdbcSourceConfig sourceConfig) { + return new TiDBChunkSplitter( + sourceConfig, this, ChunkSplitterState.NO_SPLITTING_TABLE_STATE); + } + + @Override + public ChunkSplitter createChunkSplitter( + JdbcSourceConfig sourceConfig, ChunkSplitterState chunkSplitterState) { + return new TiDBChunkSplitter(this.sourceConfig, this, chunkSplitterState); + } + + @Override + public FetchTask.Context createFetchTaskContext(JdbcSourceConfig sourceConfig) { + return new TiDBSourceFetchTaskContext(sourceConfig, this, openJdbcConnection()); + } + + @Override + public void notifyCheckpointComplete(long checkpointId, Offset offset) throws Exception { + if (streamFetchTask != null) { + streamFetchTask.commitCurrentOffset(offset); + } + } + + @Override + public boolean isIncludeDataCollection(JdbcSourceConfig sourceConfig, TableId tableId) { + // temp + return true; + } + + @Override + public List discoverDataCollections(JdbcSourceConfig sourceConfig) { + try (JdbcConnection jdbc = openJdbcConnection(sourceConfig)) { + List tableIds = + TableDiscoveryUtils.listTables( + sourceConfig.getDatabaseList().get(0), + jdbc, + sourceConfig.getTableFilters()); + if (tableIds.isEmpty()) { + throw new FlinkRuntimeException( + "No tables discovered for the given tables:" + sourceConfig.getTableList()); + } + return tableIds; + } catch (SQLException e) { + throw new FlinkRuntimeException("Error to discover tables:" + e.getMessage(), e); + } + } + + @Override + public Map discoverDataCollectionSchemas( + JdbcSourceConfig sourceConfig) { + final List capturedTableIds = discoverDataCollections(sourceConfig); + + try (JdbcConnection jdbc = openJdbcConnection(sourceConfig)) { + // fetch table schemas + Map tableSchemas = new HashMap<>(); + for (TableId tableId : capturedTableIds) { + TableChanges.TableChange tableSchema = queryTableSchema(jdbc, tableId); + tableSchemas.put(tableId, tableSchema); + } + return tableSchemas; + } catch (Exception e) { + throw new FlinkRuntimeException( + "Error to discover table schemas: " + e.getMessage(), e); + } + } + + @Override + public JdbcConnection openJdbcConnection(JdbcSourceConfig sourceConfig) { + TiDBSourceConfig tiDBSourceConfig = (TiDBSourceConfig) sourceConfig; + TiDBConnectorConfig dbzConfig = tiDBSourceConfig.getDbzConnectorConfig(); + + JdbcConnection jdbc = + new TiDBConnection( + dbzConfig.getJdbcConfig(), + new JdbcConnectionFactory(sourceConfig, getPooledDataSourceFactory()), + QUOTED_CHARACTER, + QUOTED_CHARACTER); + try { + jdbc.connect(); + } catch (Exception e) { + LOG.error("Failed to open TiDB connection", e); + throw new FlinkRuntimeException(e); + } + return jdbc; + } + + public TiDBConnection openJdbcConnection() { + return (TiDBConnection) openJdbcConnection(sourceConfig); + } + + @Override + public JdbcConnectionPoolFactory getPooledDataSourceFactory() { + return new TiDBConnectionPoolFactory(); + } + + @Override + public TableChanges.TableChange queryTableSchema(JdbcConnection jdbc, TableId tableId) { + if (tiDBSchema == null) { + tiDBSchema = + new TiDBSchema(sourceConfig, isDataCollectionIdCaseSensitive(sourceConfig)); + } + return tiDBSchema.getTableSchema(jdbc, tableId); + } + + @Override + public FetchTask createFetchTask(SourceSplitBase sourceSplitBase) { + if (sourceSplitBase.isSnapshotSplit()) { + return new TiDBScanFetchTask(sourceSplitBase.asSnapshotSplit()); + } else { + this.streamFetchTask = new TiDBStreamFetchTask(sourceSplitBase.asStreamSplit()); + return this.streamFetchTask; + } + } + + @Override + public void close() throws IOException { + JdbcDataSourceDialect.super.close(); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/TiDBSourceBuilder.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/TiDBSourceBuilder.java new file mode 100644 index 00000000000..93f4ff34b60 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/TiDBSourceBuilder.java @@ -0,0 +1,205 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source; + +import org.apache.flink.cdc.connectors.base.config.JdbcSourceConfigFactory; +import org.apache.flink.cdc.connectors.base.options.StartupOptions; +import org.apache.flink.cdc.connectors.base.source.jdbc.JdbcIncrementalSource; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceConfigFactory; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffsetFactory; +import org.apache.flink.cdc.debezium.DebeziumDeserializationSchema; +import org.apache.flink.table.catalog.ObjectPath; + +import org.tikv.common.TiConfiguration; + +import java.time.Duration; +import java.util.Map; +import java.util.Properties; + +import static org.apache.flink.cdc.common.utils.Preconditions.checkNotNull; + +/** Builder for {@link JdbcIncrementalSource}. */ +public class TiDBSourceBuilder { + private final TiDBSourceConfigFactory configFactory = new TiDBSourceConfigFactory(); + private EventOffsetFactory offsetFactory; + private DebeziumDeserializationSchema deserializer; + private TiDBDialect dialect; + + private TiDBSourceBuilder() {} + + public TiDBSourceBuilder startupOptions(StartupOptions startupOptions) { + this.configFactory.startupOptions(startupOptions); + return this; + } + + public TiDBSourceBuilder hostname(String hostname) { + this.configFactory.hostname(hostname); + return this; + } + + public TiDBSourceBuilder port(int port) { + this.configFactory.port(port); + return this; + } + + public TiDBSourceBuilder driverClassName(String driverClassName) { + this.configFactory.driverClassName(driverClassName); + return this; + } + + public TiDBSourceBuilder databaseList(String... databaseList) { + this.configFactory.databaseList(databaseList); + return this; + } + + public TiDBSourceBuilder tableList(String... tableList) { + this.configFactory.tableList(tableList); + return this; + } + + public TiDBSourceBuilder username(String username) { + this.configFactory.username(username); + return this; + } + + public TiDBSourceBuilder password(String password) { + this.configFactory.password(password); + return this; + } + + public TiDBSourceBuilder jdbcProperties(Properties properties) { + this.configFactory.jdbcProperties(properties); + return this; + } + + public TiDBSourceBuilder tikvProperties(Properties properties) { + this.configFactory.tikvProperties(properties); + return this; + } + + public TiDBSourceBuilder serverTimeZone(String timeZone) { + this.configFactory.serverTimeZone(timeZone); + return this; + } + + public TiDBSourceBuilder connectTimeout(Duration connectTimeout) { + this.configFactory.connectTimeout(connectTimeout); + return this; + } + + public TiDBSourceBuilder connectionPoolSize(int connectionPoolSize) { + this.configFactory.connectionPoolSize(connectionPoolSize); + return this; + } + + public TiDBSourceBuilder connectMaxRetries(int connectMaxRetries) { + this.configFactory.connectMaxRetries(connectMaxRetries); + return this; + } + + public TiDBSourceBuilder chunkKeyColumn(String chunkKeyColumn) { + this.configFactory.chunkKeyColumn(chunkKeyColumn); + return this; + } + + public TiDBSourceBuilder chunkKeyColumns(Map chunkKeyColumns) { + this.configFactory.chunkKeyColumns(chunkKeyColumns); + return this; + } + + public TiDBSourceBuilder pdAddresses(String pdAddresses) { + this.configFactory.pdAddresses(pdAddresses); + return this; + } + + public TiDBSourceBuilder hostMapping(String hostMapping) { + this.configFactory.hostMapping(hostMapping); + return this; + } + + /** + * The split size (number of rows) of table snapshot, captured tables are split into multiple + * splits when read the snapshot of table. + */ + public TiDBSourceBuilder splitSize(int splitSize) { + this.configFactory.splitSize(splitSize); + return this; + } + + /** The maximum fetch size for per poll when read table snapshot. */ + public TiDBSourceBuilder fetchSize(int fetchSize) { + this.configFactory.fetchSize(fetchSize); + return this; + } + + public TiDBSourceBuilder splitMetaGroupSize(int splitMetaGroupSize) { + this.configFactory.splitMetaGroupSize(splitMetaGroupSize); + return this; + } + + public TiDBSourceBuilder distributionFactorUpper(double distributionFactorUpper) { + this.configFactory.distributionFactorUpper(distributionFactorUpper); + return this; + } + + /** + * The lower bound of split key evenly distribution factor, the factor is used to determine + * whether the table is evenly distribution or not. + */ + public TiDBSourceBuilder distributionFactorLower(double distributionFactorLower) { + this.configFactory.distributionFactorLower(distributionFactorLower); + return this; + } + + public TiDBSourceBuilder scanNewlyAddedTableEnabled(boolean scanNewlyAddedTableEnabled) { + this.configFactory.scanNewlyAddedTableEnabled(scanNewlyAddedTableEnabled); + return this; + } + + public TiDBSourceBuilder deserializer(DebeziumDeserializationSchema deserializer) { + this.deserializer = deserializer; + return this; + } + + public TiDBSourceBuilder tiConfiguration(TiConfiguration tiConfiguration) { + this.configFactory.tiConfiguration(tiConfiguration); + return this; + } + + public TiDBIncrementalSource build() { + this.offsetFactory = new EventOffsetFactory(); + this.dialect = new TiDBDialect(configFactory.create(0)); + return new TiDBIncrementalSource<>( + configFactory, checkNotNull(deserializer), offsetFactory, dialect); + } + + /** TiDB incremental source. */ + public static class TiDBIncrementalSource extends JdbcIncrementalSource { + public TiDBIncrementalSource( + JdbcSourceConfigFactory configFactory, + DebeziumDeserializationSchema deserializationSchema, + EventOffsetFactory offsetFactory, + TiDBDialect dataSourceDialect) { + super(configFactory, deserializationSchema, offsetFactory, dataSourceDialect); + } + + public static TiDBSourceBuilder builder() { + return new TiDBSourceBuilder<>(); + } + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBConnectorConfig.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBConnectorConfig.java new file mode 100644 index 00000000000..3bd6e481a68 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBConnectorConfig.java @@ -0,0 +1,425 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.config; + +import org.apache.flink.cdc.connectors.tidb.source.offset.TiDBSourceInfoStructMaker; + +import io.debezium.config.CommonConnectorConfig; +import io.debezium.config.Configuration; +import io.debezium.config.EnumeratedValue; +import io.debezium.config.Field; +import io.debezium.connector.SourceInfoStructMaker; +import io.debezium.connector.mysql.MySqlConnectorConfig; +import io.debezium.jdbc.JdbcValueConverters; +import io.debezium.relational.ColumnFilterMode; +import io.debezium.relational.RelationalDatabaseConnectorConfig; +import io.debezium.relational.TableId; +import io.debezium.relational.Tables; +import org.apache.kafka.common.config.ConfigDef; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.math.BigDecimal; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +/** TiDB connector configuration. */ +public class TiDBConnectorConfig extends RelationalDatabaseConnectorConfig { + private static final Logger LOGGER = LoggerFactory.getLogger(TiDBConnectorConfig.class); + + protected static final String LOGICAL_NAME = "tidb_cdc_connector"; + protected static final int DEFAULT_SNAPSHOT_FETCH_SIZE = Integer.MIN_VALUE; + private final boolean readOnlyConnection = true; + protected static final List BUILT_IN_DB_NAMES = + Collections.unmodifiableList( + Arrays.asList("information_schema", "mysql", "tidb", "LBACSYS", "ORAAUDITOR")); + private final TiDBSourceConfig sourceConfig; + + public static final Field READ_ONLY_CONNECTION = + Field.create("read.only") + .withDisplayName("Read only connection") + .withType(ConfigDef.Type.BOOLEAN) + .withDefault(false) + .withWidth(ConfigDef.Width.SHORT) + .withImportance(ConfigDef.Importance.LOW) + .withDescription( + "Switched connector to use alternative methods to deliver signals to Debezium instead of writing to signaling table"); + + public static final Field BIGINT_UNSIGNED_HANDLING_MODE = + Field.create("bigint.unsigned.handling.mode") + .withDisplayName("BIGINT UNSIGNED Handling") + .withEnum(BigIntUnsignedHandlingMode.class, BigIntUnsignedHandlingMode.LONG) + .withGroup(Field.createGroupEntry(Field.Group.CONNECTOR, 27)) + .withWidth(ConfigDef.Width.SHORT) + .withImportance(ConfigDef.Importance.MEDIUM) + .withDescription( + "Specify how BIGINT UNSIGNED columns should be represented in change events, including:" + + "'precise' uses java.math.BigDecimal to represent values, which are encoded in the change events using a binary representation and Kafka Connect's 'org.apache.kafka.connect.data.Decimal' type; " + + "'long' (the default) represents values using Java's 'long', which may not offer the precision but will be far easier to use in consumers."); + + public static final Field ENABLE_TIME_ADJUSTER = + Field.create("enable.time.adjuster") + .withDisplayName("Enable Time Adjuster") + .withType(ConfigDef.Type.BOOLEAN) + .withGroup(Field.createGroupEntry(Field.Group.CONNECTOR, 22)) + .withDefault(true) + .withWidth(ConfigDef.Width.SHORT) + .withImportance(ConfigDef.Importance.LOW) + .withDescription( + "MySQL allows user to insert year value as either 2-digit or 4-digit. In case of two digit the value is automatically mapped into 1970 - 2069." + + "false - delegates the implicit conversion to the database" + + "true - (the default) Debezium makes the conversion"); + + /** The set of predefined options for the handling mode configuration property. */ + public enum BigIntUnsignedHandlingMode implements EnumeratedValue { + /** + * Represent {@code BIGINT UNSIGNED} values as precise {@link BigDecimal} values, which are + * represented in change events in a binary form. This is precise but difficult to use. + */ + PRECISE("precise"), + + /** + * Represent {@code BIGINT UNSIGNED} values as precise {@code long} values. This may be less + * precise but is far easier to use. + */ + LONG("long"); + + private final String value; + + private BigIntUnsignedHandlingMode(String value) { + this.value = value; + } + + @Override + public String getValue() { + return value; + } + + public JdbcValueConverters.BigIntUnsignedMode asBigIntUnsignedMode() { + switch (this) { + case LONG: + return JdbcValueConverters.BigIntUnsignedMode.LONG; + case PRECISE: + default: + return JdbcValueConverters.BigIntUnsignedMode.PRECISE; + } + } + + /** + * Determine if the supplied value is one of the predefined options. + * + * @param value the configuration property value; may not be null + * @return the matching option, or null if no match is found + */ + public static BigIntUnsignedHandlingMode parse(String value) { + if (value == null) { + return null; + } + value = value.trim(); + for (BigIntUnsignedHandlingMode option : BigIntUnsignedHandlingMode.values()) { + if (option.getValue().equalsIgnoreCase(value)) { + return option; + } + } + return null; + } + + /** + * Determine if the supplied value is one of the predefined options. + * + * @param value the configuration property value; may not be null + * @param defaultValue the default value; may be null + * @return the matching option, or null if no match is found and the non-null default is + * invalid + */ + public static BigIntUnsignedHandlingMode parse(String value, String defaultValue) { + BigIntUnsignedHandlingMode mode = parse(value); + if (mode == null && defaultValue != null) { + mode = parse(defaultValue); + } + return mode; + } + } + + @Override + public String getContextName() { + return "TiDB"; + } + + @Override + public String getConnectorName() { + return "TiDB"; + } + + public String databaseName() { + return getConfig().getString(DATABASE_NAME); + } + + public TiDBConnectorConfig(TiDBSourceConfig sourceConfig) { + super( + Configuration.from(sourceConfig.getDbzProperties()), + LOGICAL_NAME, + Tables.TableFilter.fromPredicate( + tableId -> + "mysql".equalsIgnoreCase(sourceConfig.getCompatibleMode()) + ? !BUILT_IN_DB_NAMES.contains(tableId.catalog()) + : !BUILT_IN_DB_NAMES.contains(tableId.schema())), + TableId::identifier, + DEFAULT_SNAPSHOT_FETCH_SIZE, + "mysql".equalsIgnoreCase(sourceConfig.getCompatibleMode()) + ? ColumnFilterMode.CATALOG + : ColumnFilterMode.SCHEMA); + this.sourceConfig = sourceConfig; + } + + public TiDBSourceConfig getSourceConfig() { + return sourceConfig; + } + + @Override + protected SourceInfoStructMaker getSourceInfoStructMaker(Version version) { + return new TiDBSourceInfoStructMaker(); + } + + public static final Field SERVER_NAME = + RelationalDatabaseConnectorConfig.SERVER_NAME.withValidation( + CommonConnectorConfig::validateServerNameIsDifferentFromHistoryTopicName); + + public boolean isReadOnlyConnection() { + return readOnlyConnection; + } + + /** Whether to use SSL/TLS to connect to the database. */ + public enum SecureConnectionMode implements EnumeratedValue { + /** Establish an unencrypted connection. */ + DISABLED("disabled"), + + /** + * Establish a secure (encrypted) connection if the server supports secure connections. Fall + * back to an unencrypted connection otherwise. + */ + PREFERRED("preferred"), + /** + * Establish a secure connection if the server supports secure connections. The connection + * attempt fails if a secure connection cannot be established. + */ + REQUIRED("required"), + /** + * Like REQUIRED, but additionally verify the server TLS certificate against the configured + * Certificate Authority (CA) certificates. The connection attempt fails if no valid + * matching CA certificates are found. + */ + VERIFY_CA("verify_ca"), + /** + * Like VERIFY_CA, but additionally verify that the server certificate matches the host to + * which the connection is attempted. + */ + VERIFY_IDENTITY("verify_identity"); + + private final String value; + + private SecureConnectionMode(String value) { + this.value = value; + } + + @Override + public String getValue() { + return value; + } + + /** + * Determine if the supplied value is one of the predefined options. + * + * @param value the configuration property value; may not be null + * @return the matching option, or null if no match is found + */ + public static SecureConnectionMode parse(String value) { + if (value == null) { + return null; + } + value = value.trim(); + for (SecureConnectionMode option : SecureConnectionMode.values()) { + if (option.getValue().equalsIgnoreCase(value)) { + return option; + } + } + return null; + } + + /** + * Determine if the supplied value is one of the predefined options. + * + * @param value the configuration property value; may not be null + * @param defaultValue the default value; may be null + * @return the matching option, or null if no match is found and the non-null default is + * invalid + */ + public static SecureConnectionMode parse(String value, String defaultValue) { + SecureConnectionMode mode = parse(value); + if (mode == null && defaultValue != null) { + mode = parse(defaultValue); + } + return mode; + } + } + + public static final Field SSL_MODE = + Field.create("database.ssl.mode") + .withDisplayName("SSL mode") + .withEnum( + MySqlConnectorConfig.SecureConnectionMode.class, + MySqlConnectorConfig.SecureConnectionMode.DISABLED) + .withGroup(Field.createGroupEntry(Field.Group.CONNECTION_ADVANCED_SSL, 0)) + .withWidth(ConfigDef.Width.MEDIUM) + .withImportance(ConfigDef.Importance.MEDIUM) + .withDescription( + "Whether to use an encrypted connection to MySQL. Options include" + + "'disabled' (the default) to use an unencrypted connection; " + + "'preferred' to establish a secure (encrypted) connection if the server supports secure connections, " + + "but fall back to an unencrypted connection otherwise; " + + "'required' to use a secure (encrypted) connection, and fail if one cannot be established; " + + "'verify_ca' like 'required' but additionally verify the server TLS certificate against the configured Certificate Authority " + + "(CA) certificates, or fail if no valid matching CA certificates are found; or" + + "'verify_identity' like 'verify_ca' but additionally verify that the server certificate matches the host to which the connection is attempted."); + + public static final Field SSL_KEYSTORE = + Field.create("database.ssl.keystore") + .withDisplayName("SSL Keystore") + .withType(ConfigDef.Type.STRING) + .withGroup(Field.createGroupEntry(Field.Group.CONNECTION_ADVANCED_SSL, 1)) + .withWidth(ConfigDef.Width.LONG) + .withImportance(ConfigDef.Importance.MEDIUM) + .withDescription( + "The location of the key store file. " + + "This is optional and can be used for two-way authentication between the client and the MySQL Server."); + + public static final Field SSL_KEYSTORE_PASSWORD = + Field.create("database.ssl.keystore.password") + .withDisplayName("SSL Keystore Password") + .withType(ConfigDef.Type.PASSWORD) + .withGroup(Field.createGroupEntry(Field.Group.CONNECTION_ADVANCED_SSL, 2)) + .withWidth(ConfigDef.Width.MEDIUM) + .withImportance(ConfigDef.Importance.MEDIUM) + .withDescription( + "The password for the key store file. " + + "This is optional and only needed if 'database.ssl.keystore' is configured."); + + public static final Field SSL_TRUSTSTORE = + Field.create("database.ssl.truststore") + .withDisplayName("SSL Truststore") + .withType(ConfigDef.Type.STRING) + .withGroup(Field.createGroupEntry(Field.Group.CONNECTION_ADVANCED_SSL, 3)) + .withWidth(ConfigDef.Width.LONG) + .withImportance(ConfigDef.Importance.MEDIUM) + .withDescription( + "The location of the trust store file for the server certificate verification."); + + public static final Field SSL_TRUSTSTORE_PASSWORD = + Field.create("database.ssl.truststore.password") + .withDisplayName("SSL Truststore Password") + .withType(ConfigDef.Type.PASSWORD) + .withGroup(Field.createGroupEntry(Field.Group.CONNECTION_ADVANCED_SSL, 4)) + .withWidth(ConfigDef.Width.MEDIUM) + .withImportance(ConfigDef.Importance.MEDIUM) + .withDescription( + "The password for the trust store file. " + + "Used to check the integrity of the truststore, and unlock the truststore."); + + public static final Field CONNECTION_TIMEOUT_MS = + Field.create("connect.timeout.ms") + .withDisplayName("Connection Timeout (ms)") + .withType(ConfigDef.Type.INT) + .withGroup(Field.createGroupEntry(Field.Group.CONNECTION_ADVANCED, 1)) + .withWidth(ConfigDef.Width.SHORT) + .withImportance(ConfigDef.Importance.MEDIUM) + .withDescription( + "Maximum time to wait after trying to connect to the database before timing out, given in milliseconds. Defaults to 30 seconds (30,000 ms).") + .withDefault(30 * 1000) + .withValidation(Field::isPositiveInteger); + + public static final Field EVENT_DESERIALIZATION_FAILURE_HANDLING_MODE = + Field.create("event.deserialization.failure.handling.mode") + .withDisplayName("Event deserialization failure handling") + .withEnum( + EventProcessingFailureHandlingMode.class, + EventProcessingFailureHandlingMode.FAIL) + .withGroup(Field.createGroupEntry(Field.Group.CONNECTOR, 21)) + .withValidation( + TiDBConnectorConfig + ::validateEventDeserializationFailureHandlingModeNotSet) + .withWidth(ConfigDef.Width.SHORT) + .withImportance(ConfigDef.Importance.MEDIUM) + .withDescription( + "Specify how failures during deserialization of binlog events (i.e. when encountering a corrupted event) should be handled, including:" + + "'fail' (the default) an exception indicating the problematic event and its binlog position is raised, causing the connector to be stopped; " + + "'warn' the problematic event and its binlog position will be logged and the event will be skipped;" + + "'ignore' the problematic event will be skipped."); + + public static final Field INCONSISTENT_SCHEMA_HANDLING_MODE = + Field.create("inconsistent.schema.handling.mode") + .withDisplayName("Inconsistent schema failure handling") + .withEnum( + EventProcessingFailureHandlingMode.class, + EventProcessingFailureHandlingMode.FAIL) + .withGroup(Field.createGroupEntry(Field.Group.ADVANCED, 2)) + .withValidation( + TiDBConnectorConfig::validateInconsistentSchemaHandlingModeNotIgnore) + .withWidth(ConfigDef.Width.SHORT) + .withImportance(ConfigDef.Importance.MEDIUM) + .withDescription( + "Specify how binlog events that belong to a table missing from internal schema representation (i.e. internal representation is not consistent with database) should be handled, including:" + + "'fail' (the default) an exception indicating the problematic event and its binlog position is raised, causing the connector to be stopped; " + + "'warn' the problematic event and its binlog position will be logged and the event will be skipped;" + + "'skip' the problematic event will be skipped."); + + private static int validateEventDeserializationFailureHandlingModeNotSet( + Configuration config, Field field, Field.ValidationOutput problems) { + final String modeName = + config.asMap().get(EVENT_DESERIALIZATION_FAILURE_HANDLING_MODE.name()); + if (modeName != null) { + LOGGER.warn( + "Configuration option '{}' is renamed to '{}'", + EVENT_DESERIALIZATION_FAILURE_HANDLING_MODE.name(), + EVENT_PROCESSING_FAILURE_HANDLING_MODE.name()); + if (EventProcessingFailureHandlingMode.OBSOLETE_NAME_FOR_SKIP_FAILURE_HANDLING.equals( + modeName)) { + LOGGER.warn( + "Value '{}' of configuration option '{}' is deprecated and should be replaced with '{}'", + EventProcessingFailureHandlingMode.OBSOLETE_NAME_FOR_SKIP_FAILURE_HANDLING, + EVENT_DESERIALIZATION_FAILURE_HANDLING_MODE.name(), + EventProcessingFailureHandlingMode.SKIP.getValue()); + } + } + return 0; + } + + private static int validateInconsistentSchemaHandlingModeNotIgnore( + Configuration config, Field field, Field.ValidationOutput problems) { + final String modeName = config.getString(INCONSISTENT_SCHEMA_HANDLING_MODE); + if (EventProcessingFailureHandlingMode.OBSOLETE_NAME_FOR_SKIP_FAILURE_HANDLING.equals( + modeName)) { + LOGGER.warn( + "Value '{}' of configuration option '{}' is deprecated and should be replaced with '{}'", + EventProcessingFailureHandlingMode.OBSOLETE_NAME_FOR_SKIP_FAILURE_HANDLING, + INCONSISTENT_SCHEMA_HANDLING_MODE.name(), + EventProcessingFailureHandlingMode.SKIP.getValue()); + } + return 0; + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceConfig.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceConfig.java new file mode 100644 index 00000000000..7268a1d9f19 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceConfig.java @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.config; + +import org.apache.flink.cdc.connectors.base.config.JdbcSourceConfig; +import org.apache.flink.cdc.connectors.base.options.StartupOptions; +import org.apache.flink.table.catalog.ObjectPath; + +import io.debezium.config.Configuration; +import org.tikv.common.TiConfiguration; + +import java.time.Duration; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +/** The configuration for TiDB source. */ +public class TiDBSourceConfig extends JdbcSourceConfig { + private static final long serialVersionUID = 1L; + private final String compatibleMode; + private final String pdAddresses; + + private final String hostMapping; + private TiConfiguration tiConfiguration; + private final Properties jdbcProperties; + private Map chunkKeyColumns; + + public TiDBSourceConfig( + String compatibleMode, + StartupOptions startupOptions, + List databaseList, + List tableList, + String pdAddresses, + String hostMapping, + int splitSize, + int splitMetaGroupSize, + TiConfiguration tiConfiguration, + double distributionFactorUpper, + double distributionFactorLower, + boolean includeSchemaChanges, + boolean closeIdleReaders, + Properties jdbcProperties, + Configuration dbzConfiguration, + String driverClassName, + String hostname, + int port, + String username, + String password, + int fetchSize, + String serverTimeZone, + Duration connectTimeout, + int connectMaxRetries, + int connectionPoolSize, + String chunkKeyColumn, + Map chunkKeyColumns, + boolean skipSnapshotBackfill, + boolean isScanNewlyAddedTableEnabled, + boolean assignUnboundedChunkFirst) { + super( + startupOptions, + databaseList, + null, + tableList, + splitSize, + splitMetaGroupSize, + distributionFactorUpper, + distributionFactorLower, + includeSchemaChanges, + closeIdleReaders, + jdbcProperties, + dbzConfiguration, + driverClassName, + hostname, + port, + username, + password, + fetchSize, + serverTimeZone, + connectTimeout, + connectMaxRetries, + connectionPoolSize, + chunkKeyColumn, + skipSnapshotBackfill, + isScanNewlyAddedTableEnabled, + assignUnboundedChunkFirst); + this.compatibleMode = compatibleMode; + this.pdAddresses = pdAddresses; + this.hostMapping = hostMapping; + this.jdbcProperties = jdbcProperties; + this.tiConfiguration = tiConfiguration; + this.chunkKeyColumns = chunkKeyColumns; + } + + public String getCompatibleMode() { + return compatibleMode; + } + + public String getPdAddresses() { + return pdAddresses; + } + + public String getHostMapping() { + return hostMapping; + } + + public Properties getJdbcProperties() { + return this.jdbcProperties; + } + + public TiConfiguration getTiConfiguration() { + return this.tiConfiguration; + } + + public Map getChunkKeyColumns() { + return this.chunkKeyColumns; + } + + @Override + public TiDBConnectorConfig getDbzConnectorConfig() { + return new TiDBConnectorConfig(this); + } + + public StartupOptions getStartupOptions() { + return startupOptions; + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceConfigFactory.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceConfigFactory.java new file mode 100644 index 00000000000..2e6afbf9496 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceConfigFactory.java @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.config; + +import org.apache.flink.cdc.connectors.base.config.JdbcSourceConfigFactory; +import org.apache.flink.table.catalog.ObjectPath; + +import io.debezium.config.Configuration; +import org.tikv.common.TiConfiguration; + +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; + +import static org.apache.flink.cdc.common.utils.Preconditions.checkNotNull; +import static org.apache.flink.cdc.connectors.base.utils.EnvironmentUtils.checkSupportCheckpointsAfterTasksFinished; + +/** A factory to initialize {@link TiDBSourceConfig}. */ +@SuppressWarnings("UnusedReturnValue") +public class TiDBSourceConfigFactory extends JdbcSourceConfigFactory { + private static final long serialVersionUID = 1L; + private String compatibleMode; + private String driverClassName = "com.mysql.cj.jdbc.Driver"; + private String pdAddresses; + + private String hostMapping; + private TiConfiguration tiConfiguration; + private Properties tikvProperties; + private Properties jdbcProperties; + private Map chunkKeyColumns = new HashMap<>(); + + public TiDBSourceConfigFactory compatibleMode(String compatibleMode) { + this.compatibleMode = compatibleMode; + return this; + } + + public TiDBSourceConfigFactory chunkKeyColumn(ObjectPath objectPath, String chunkKeyColumn) { + this.chunkKeyColumns.put(objectPath, chunkKeyColumn); + return this; + } + + public TiDBSourceConfigFactory chunkKeyColumns(Map chunkKeyColumns) { + this.chunkKeyColumns.putAll(chunkKeyColumns); + return this; + } + + public TiDBSourceConfigFactory driverClassName(String driverClassName) { + this.driverClassName = driverClassName; + return this; + } + + public TiDBSourceConfigFactory pdAddresses(String pdAddresses) { + this.pdAddresses = pdAddresses; + return this; + } + + public TiDBSourceConfigFactory hostMapping(String hostMapping) { + this.hostMapping = hostMapping; + return this; + } + + public TiDBSourceConfigFactory tikvProperties(Properties tikvProperties) { + this.tikvProperties = tikvProperties; + return this; + } + + public TiDBSourceConfigFactory jdbcProperties(Properties jdbcProperties) { + this.jdbcProperties = jdbcProperties; + return this; + } + + public TiDBSourceConfigFactory tiConfiguration(TiConfiguration tiConfiguration) { + this.tiConfiguration = tiConfiguration; + return this; + } + + @Override + public TiDBSourceConfig create(int subtask) { + checkSupportCheckpointsAfterTasksFinished(closeIdleReaders); + Properties props = new Properties(); + props.setProperty("database.server.name", "tidb_cdc"); + props.setProperty("database.hostname", checkNotNull(hostname)); + props.setProperty("database.port", String.valueOf(port)); + props.setProperty("database.user", checkNotNull(username)); + props.setProperty("database.password", checkNotNull(password)); + props.setProperty("database.dbname", checkNotNull(databaseList.get(0))); + props.setProperty("database.connect.timeout.ms", String.valueOf(connectTimeout.toMillis())); + + // table filter + // props.put("database.include.list", String.join(",", databaseList)); + if (tableList != null) { + props.put("table.include.list", String.join(",", tableList)); + } + // value converter + props.put("decimal.handling.mode", "precise"); + props.put("time.precision.mode", "adaptive_time_microseconds"); + props.put("binary.handling.mode", "bytes"); + + if (jdbcProperties != null) { + props.putAll(jdbcProperties); + } + + if (tikvProperties != null) { + props.putAll(tikvProperties); + } + + Configuration dbzConfiguration = Configuration.from(props); + return new TiDBSourceConfig( + compatibleMode, + startupOptions, + databaseList, + tableList, + pdAddresses, + hostMapping, + splitSize, + splitMetaGroupSize, + tiConfiguration, + distributionFactorUpper, + distributionFactorLower, + includeSchemaChanges, + closeIdleReaders, + props, + dbzConfiguration, + driverClassName, + hostname, + port, + username, + password, + fetchSize, + serverTimeZone, + connectTimeout, + connectMaxRetries, + connectionPoolSize, + chunkKeyColumn, + chunkKeyColumns, + skipSnapshotBackfill, + scanNewlyAddedTableEnabled, + assignUnboundedChunkFirst); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceOptions.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceOptions.java new file mode 100644 index 00000000000..fb8a026e1b6 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/config/TiDBSourceOptions.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.config; + +import org.apache.flink.cdc.connectors.base.options.JdbcSourceOptions; +import org.apache.flink.cdc.connectors.tidb.utils.UriHostMapping; +import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.configuration.ConfigOptions; +import org.apache.flink.configuration.Configuration; + +import org.tikv.common.TiConfiguration; + +import java.time.Duration; +import java.util.Map; +import java.util.Optional; + +/** Options for {@link org.apache.flink.cdc.connectors.tidb.table.TiDBTableSource}. */ +public class TiDBSourceOptions extends JdbcSourceOptions { + + public static final ConfigOption TIDB_PORT = + ConfigOptions.key("port") + .intType() + .defaultValue(4000) + .withDescription("Integer port number of the TiDB database server."); + + public static final ConfigOption PD_ADDRESSES = + ConfigOptions.key("pd-addresses") + .stringType() + .noDefaultValue() + .withDescription("TiDB pd-server addresses"); + + public static final ConfigOption HEARTBEAT_INTERVAL = + ConfigOptions.key("heartbeat.interval.ms") + .durationType() + .defaultValue(Duration.ofSeconds(30)) + .withDescription( + "Optional interval of sending heartbeat event for tracing the latest available replication slot offsets"); + + public static final ConfigOption TABLE_LIST = + ConfigOptions.key("table-list") + .stringType() + .noDefaultValue() + .withDescription( + "List of full names of tables, separated by commas, e.g. \"db1.table1, db2.table2\"."); + + public static final ConfigOption HOST_MAPPING = + ConfigOptions.key("host-mapping") + .stringType() + .noDefaultValue() + .withDescription( + "TiKV cluster's host-mapping used to configure public IP and intranet IP mapping. When the TiKV cluster is running on the intranet, you can map a set of intranet IPs to public IPs for an outside Flink cluster to access. The format is {Intranet IP1}:{Public IP1};{Intranet IP2}:{Public IP2}, e.g. 192.168.0.2:8.8.8.8;192.168.0.3:9.9.9.9."); + + public static final ConfigOption JDBC_DRIVER = + ConfigOptions.key("jdbc.driver") + .stringType() + .defaultValue("com.mysql.cj.jdbc.Driver") + .withDescription( + "JDBC driver class name, use 'com.mysql.cj.jdbc.Driver' by default."); + + public static TiConfiguration getTiConfiguration( + final String pdAddrsStr, final String hostMapping, final Map options) { + final Configuration configuration = Configuration.fromMap(options); + + final TiConfiguration tiConf = TiConfiguration.createDefault(pdAddrsStr); + Optional.of(new UriHostMapping(hostMapping)).ifPresent(tiConf::setHostMapping); + // todo add more config to tidb + return tiConf; + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/connection/TiDBConnection.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/connection/TiDBConnection.java new file mode 100644 index 00000000000..48e4cd0d29c --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/connection/TiDBConnection.java @@ -0,0 +1,527 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.connection; + +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffsetContext; +import org.apache.flink.cdc.connectors.tidb.source.schema.TiDBDatabaseSchema; +import org.apache.flink.cdc.connectors.tidb.utils.TiDBUtils; +import org.apache.flink.util.FlinkRuntimeException; + +import io.debezium.connector.tidb.TiDBPartition; +import io.debezium.jdbc.JdbcConfiguration; +import io.debezium.jdbc.JdbcConnection; +import io.debezium.relational.Column; +import io.debezium.relational.TableId; +import io.debezium.relational.Tables; +import io.debezium.relational.history.TableChanges; +import io.debezium.schema.SchemaChangeEvent; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.DatabaseMetaData; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Timestamp; +import java.sql.Types; +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Properties; +import java.util.Set; +import java.util.function.Predicate; +import java.util.function.Supplier; +import java.util.regex.Pattern; + +/** TiDB JDBC connection. */ +public class TiDBConnection extends JdbcConnection { + private static final Logger LOG = LoggerFactory.getLogger(TiDBConnection.class); + + private static final Properties DEFAULT_JDBC_PROPERTIES = initializeDefaultJdbcProperties(); + private static final String MYSQL_URL_PATTERN = + "jdbc:mysql://${hostname}:${port}/?connectTimeout=${connectTimeout}"; + private static final String SHOW_CREATE_TABLE = "SHOW CREATE TABLE "; + private static final int TYPE_BINARY_FLOAT = 100; + private static final int TYPE_BINARY_DOUBLE = 101; + private static final int TYPE_TIMESTAMP_WITH_TIME_ZONE = -101; + private static final int TYPE_TIMESTAMP_WITH_LOCAL_TIME_ZONE = -102; + private static final int TYPE_INTERVAL_YEAR_TO_MONTH = -103; + private static final int TYPE_INTERVAL_DAY_TO_SECOND = -104; + private static final char quote = '`'; + private static final String QUOTED_CHARACTER = "`"; + + public TiDBConnection( + String hostname, + Integer port, + String user, + String password, + Duration timeout, + String jdbcDriver, + Properties jdbcProperties, + ClassLoader classLoader) { + super( + config(hostname, port, user, password, timeout), + JdbcConnection.patternBasedFactory( + formatJdbcUrl(jdbcDriver, jdbcProperties), jdbcDriver, classLoader), + quote + "", + quote + ""); + } + + public TiDBConnection( + JdbcConfiguration config, + ConnectionFactory connectionFactory, + String openingQuoteCharacter, + String closingQuoteCharacter) { + super(config, connectionFactory, openingQuoteCharacter, closingQuoteCharacter); + } + + public TiDBConnection( + JdbcConfiguration config, + ConnectionFactory connectionFactory, + Supplier classLoaderSupplier, + String openingQuoteCharacter, + String closingQuoteCharacter) { + super( + config, + connectionFactory, + classLoaderSupplier, + openingQuoteCharacter, + closingQuoteCharacter); + } + + protected TiDBConnection( + JdbcConfiguration config, + ConnectionFactory connectionFactory, + Operations initialOperations, + Supplier classLoaderSupplier, + String openingQuotingChar, + String closingQuotingChar) { + super( + config, + connectionFactory, + initialOperations, + classLoaderSupplier, + openingQuotingChar, + closingQuotingChar); + } + + private static JdbcConfiguration config( + String hostname, Integer port, String user, String password, Duration timeout) { + return JdbcConfiguration.create() + .with("hostname", hostname) + .with("port", port) + .with("user", user) + .with("password", password) + .with("connectTimeout", timeout == null ? 30000 : timeout.toMillis()) + .build(); + } + + private static String formatJdbcUrl(String jdbcDriver, Properties jdbcProperties) { + Properties combinedProperties = new Properties(); + combinedProperties.putAll(DEFAULT_JDBC_PROPERTIES); + if (jdbcProperties != null) { + combinedProperties.putAll(jdbcProperties); + } + StringBuilder jdbcUrlStringBuilder = new StringBuilder(MYSQL_URL_PATTERN); + combinedProperties.forEach( + (key, value) -> { + jdbcUrlStringBuilder.append("&").append(key).append("=").append(value); + }); + return jdbcUrlStringBuilder.toString(); + } + + private static Properties initializeDefaultJdbcProperties() { + Properties defaultJdbcProperties = new Properties(); + defaultJdbcProperties.setProperty("useInformationSchema", "true"); + defaultJdbcProperties.setProperty("nullCatalogMeansCurrent", "false"); + defaultJdbcProperties.setProperty("useUnicode", "true"); + defaultJdbcProperties.setProperty("zeroDateTimeBehavior", "convertToNull"); + defaultJdbcProperties.setProperty("characterEncoding", "UTF-8"); + defaultJdbcProperties.setProperty("characterSetResults", "UTF-8"); + return defaultJdbcProperties; + } + + public long getCurrentTimestampS() throws SQLException { + try { + long globalTimestamp = getGlobalTimestamp(); + LOG.info("Global timestamp: {}", globalTimestamp); + return Long.parseLong(String.valueOf(globalTimestamp).substring(0, 10)); + } catch (Exception e) { + LOG.warn("Failed to get global timestamp, use local timestamp instead"); + } + return getCurrentTimestamp() + .orElseThrow(IllegalStateException::new) + .toInstant() + .getEpochSecond(); + } + + private long getGlobalTimestamp() throws SQLException { + return querySingleValue( + connection(), "SELECT CURRENT_TIMESTAMP FROM DUAL", ps -> {}, rs -> rs.getLong(1)); + } + + @Override + public Optional getCurrentTimestamp() throws SQLException { + return queryAndMap( + "SELECT LOCALTIMESTAMP FROM DUAL", + rs -> rs.next() ? Optional.of(rs.getTimestamp(1)) : Optional.empty()); + } + + @Override + protected String[] supportedTableTypes() { + return new String[] {"TABLE"}; + } + + @Override + public String quotedTableIdString(TableId tableId) { + return tableId.toQuotedString(quote); + } + + public void readSchemaForCapturedTables( + Tables tables, + String databaseCatalog, + String schemaNamePattern, + Tables.ColumnNameFilter columnFilter, + boolean removeTablesNotFoundInJdbc, + Set capturedTables) + throws SQLException { + + Set tableIdsBefore = new HashSet<>(tables.tableIds()); + + DatabaseMetaData metadata = connection().getMetaData(); + Map> columnsByTable = new HashMap<>(); + + for (TableId tableId : capturedTables) { + try (ResultSet columnMetadata = + metadata.getColumns( + databaseCatalog, schemaNamePattern, tableId.table(), null)) { + while (columnMetadata.next()) { + // add all whitelisted columns + readTableColumn(columnMetadata, tableId, columnFilter) + .ifPresent( + column -> { + columnsByTable + .computeIfAbsent(tableId, t -> new ArrayList<>()) + .add(column.create()); + }); + } + } + } + + // Read the metadata for the primary keys ... + for (Map.Entry> tableEntry : columnsByTable.entrySet()) { + // First get the primary key information, which must be done for *each* table ... + List pkColumnNames = readPrimaryKeyNames(metadata, tableEntry.getKey()); + + // Then define the table ... + List columns = tableEntry.getValue(); + Collections.sort(columns); + tables.overwriteTable(tableEntry.getKey(), columns, pkColumnNames, null); + } + + if (removeTablesNotFoundInJdbc) { + // Remove any definitions for tables that were not found in the database metadata ... + tableIdsBefore.removeAll(columnsByTable.keySet()); + tableIdsBefore.forEach(tables::removeTable); + } + } + + @Override + protected int resolveNativeType(String typeName) { + String upperCaseTypeName = typeName.toUpperCase(); + if (upperCaseTypeName.startsWith("JSON")) { + return Types.VARCHAR; + } + if (upperCaseTypeName.startsWith("NCHAR")) { + return Types.NCHAR; + } + if (upperCaseTypeName.startsWith("NVARCHAR2")) { + return Types.NVARCHAR; + } + if (upperCaseTypeName.startsWith("TIMESTAMP")) { + if (upperCaseTypeName.contains("WITH TIME ZONE")) { + return TYPE_TIMESTAMP_WITH_TIME_ZONE; + } + if (upperCaseTypeName.contains("WITH LOCAL TIME ZONE")) { + return TYPE_TIMESTAMP_WITH_LOCAL_TIME_ZONE; + } + return Types.TIMESTAMP; + } + if (upperCaseTypeName.startsWith("INTERVAL")) { + if (upperCaseTypeName.contains("TO MONTH")) { + return TYPE_INTERVAL_YEAR_TO_MONTH; + } + if (upperCaseTypeName.contains("TO SECOND")) { + return TYPE_INTERVAL_DAY_TO_SECOND; + } + } + return Column.UNSET_INT_VALUE; + } + + public String readSystemVariable(String variable) throws SQLException { + return querySingleValue( + connection(), + "SHOW VARIABLES LIKE ?", + ps -> ps.setString(1, variable), + rs -> rs.getString("VALUE")); + } + + @Override + protected int resolveJdbcType(int metadataJdbcType, int nativeType) { + switch (metadataJdbcType) { + case TYPE_BINARY_FLOAT: + return Types.REAL; + case TYPE_BINARY_DOUBLE: + return Types.DOUBLE; + case TYPE_TIMESTAMP_WITH_TIME_ZONE: + case TYPE_TIMESTAMP_WITH_LOCAL_TIME_ZONE: + case TYPE_INTERVAL_YEAR_TO_MONTH: + case TYPE_INTERVAL_DAY_TO_SECOND: + return Types.OTHER; + default: + return nativeType == Column.UNSET_INT_VALUE ? metadataJdbcType : nativeType; + } + } + + public List getTables(String dbPattern, String tbPattern) throws SQLException { + return listTables( + db -> Pattern.matches(dbPattern, db), + tableId -> Pattern.matches(tbPattern, tableId.table())); + } + + private List listTables( + Predicate databaseFilter, Tables.TableFilter tableFilter) throws SQLException { + List tableIds = new ArrayList<>(); + DatabaseMetaData metaData = connection().getMetaData(); + ResultSet rs = metaData.getCatalogs(); + List dbList = new ArrayList<>(); + while (rs.next()) { + String db = rs.getString("TABLE_CAT"); + if (databaseFilter.test(db)) { + dbList.add(db); + } + } + for (String db : dbList) { + + rs = metaData.getTables(db, null, null, supportedTableTypes()); + while (rs.next()) { + TableId tableId = new TableId(db, null, rs.getString("TABLE_NAME")); + if (tableFilter.isIncluded(tableId)) { + tableIds.add(tableId); + } + } + } + return tableIds; + } + + // 新的readSchema + public void readTiDBSchema( + TiDBConnectorConfig config, + TiDBDatabaseSchema databaseSchema, + Tables tables, + String databaseCatalog, + String schemaNamePattern, + Tables.TableFilter tableFilter, + Tables.ColumnNameFilter columnFilter, + boolean removeTablesNotFoundInJdbc) + throws SQLException { + // Before we make any changes, get the copy of the set of table IDs ... + Set tableIdsBefore = new HashSet<>(tables.tableIds()); + + // Read the metadata for the table columns ... + DatabaseMetaData metadata = connection().getMetaData(); + + // Find regular and materialized views as they cannot be snapshotted + final Set viewIds = new HashSet<>(); + final Set tableIds = new HashSet<>(); + + int totalTables = 0; + try (final ResultSet rs = + metadata.getTables( + databaseCatalog, schemaNamePattern, null, supportedTableTypes())) { + while (rs.next()) { + final String catalogName = resolveCatalogName(rs.getString(1)); + final String schemaName = rs.getString(2); + final String tableName = rs.getString(3); + final String tableType = rs.getString(4); + if (isTableType(tableType)) { + totalTables++; + TableId tableId = new TableId(catalogName, schemaName, tableName); + if (tableFilter == null || tableFilter.isIncluded(tableId)) { + tableIds.add(tableId); + } + } else { + TableId tableId = new TableId(catalogName, schemaName, tableName); + viewIds.add(tableId); + } + } + } + + Map> columnsByTable = new HashMap<>(); + if (totalTables == tableIds.size()) { + columnsByTable = + getColumnsDetailsWithTableChange( + config, + databaseSchema, + databaseCatalog, + schemaNamePattern, + null, + tableFilter, + columnFilter, + metadata, + viewIds); + // LOGGER.info("connection readSchema:", columnsByTable); + } else { + for (TableId includeTable : tableIds) { + Map> cols = + getColumnsDetailsWithTableChange( + config, + databaseSchema, + databaseCatalog, + schemaNamePattern, + null, + tableFilter, + columnFilter, + metadata, + viewIds); + columnsByTable.putAll(cols); + } + } + + // Read the metadata for the primary keys ... + for (Map.Entry> tableEntry : columnsByTable.entrySet()) { + // First get the primary key information, which must be done for *each* table ... + List pkColumnNames = + readPrimaryKeyOrUniqueIndexNames(metadata, tableEntry.getKey()); + + // Then define the table ... + List columns = tableEntry.getValue(); + Collections.sort(columns); + String defaultCharsetName = null; // JDBC does not expose character sets + tables.overwriteTable(tableEntry.getKey(), columns, pkColumnNames, defaultCharsetName); + } + + if (removeTablesNotFoundInJdbc) { + // Remove any definitions for tables that were not found in the database metadata ... + tableIdsBefore.removeAll(columnsByTable.keySet()); + tableIdsBefore.forEach(tables::removeTable); + } + } + + protected Map> getColumnsDetailsWithTableChange( + TiDBConnectorConfig config, + TiDBDatabaseSchema databaseSchema, + String databaseCatalog, + String schemaNamePattern, + String tableName, + Tables.TableFilter tableFilter, + Tables.ColumnNameFilter columnFilter, + DatabaseMetaData metadata, + final Set viewIds) + throws SQLException { + Map> columnsByTable = new HashMap<>(); + try (ResultSet columnMetadata = + metadata.getColumns(databaseCatalog, schemaNamePattern, tableName, null)) { + while (columnMetadata.next()) { + String catalogName = resolveCatalogName(columnMetadata.getString(1)); + String schemaName = columnMetadata.getString(2); + String metaTableName = columnMetadata.getString(3); + TableId tableId = new TableId(catalogName, schemaName, metaTableName); + + // exclude views and non-captured tables + if (viewIds.contains(tableId) + || (tableFilter != null && !tableFilter.isIncluded(tableId))) { + continue; + } + TableChanges.TableChange tableChange = + readTableSchema(config, databaseSchema, tableId); + if (tableChange != null) { + ArrayList columns = new ArrayList<>(tableChange.getTable().columns()); + columnsByTable.put(tableId, columns); + } + } + } + return columnsByTable; + } + + private TableChanges.TableChange readTableSchema( + TiDBConnectorConfig connectorConfig, + TiDBDatabaseSchema databaseSchema, + TableId tableId) { + final Map tableChangeMap = new HashMap<>(); + String showCreateTable = SHOW_CREATE_TABLE + TiDBUtils.quote(tableId); + final TiDBPartition partition = new TiDBPartition(connectorConfig.getLogicalName()); + buildSchemaByShowCreateTable( + connectorConfig, databaseSchema, partition, this, tableId, tableChangeMap); + return tableChangeMap.get(tableId); + } + + private void buildSchemaByShowCreateTable( + TiDBConnectorConfig config, + TiDBDatabaseSchema databaseSchema, + TiDBPartition partition, + JdbcConnection jdbc, + TableId tableId, + Map tableChangeMap) { + final String sql = SHOW_CREATE_TABLE + TiDBUtils.quote(tableId); + try { + jdbc.query( + sql, + rs -> { + if (rs.next()) { + final String ddl = rs.getString(2); + parseSchemaByDdl( + config, + databaseSchema, + partition, + ddl, + tableId, + tableChangeMap); + } + }); + } catch (SQLException e) { + throw new FlinkRuntimeException( + String.format("Failed to read schema for table %s by running %s", tableId, sql), + e); + } + } + + private void parseSchemaByDdl( + TiDBConnectorConfig config, + TiDBDatabaseSchema databaseSchema, + TiDBPartition partition, + String ddl, + TableId tableId, + Map tableChangeMap) { + final EventOffsetContext offsetContext = EventOffsetContext.initial(config); + List schemaChangeEvents = + databaseSchema.parseSnapshotDdl( + partition, ddl, tableId.catalog(), offsetContext, Instant.now()); + for (SchemaChangeEvent schemaChangeEvent : schemaChangeEvents) { + for (TableChanges.TableChange tableChange : schemaChangeEvent.getTableChanges()) { + tableChangeMap.put(tableId, tableChange); + } + } + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/connection/TiDBConnectionPoolFactory.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/connection/TiDBConnectionPoolFactory.java new file mode 100644 index 00000000000..5253ca99650 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/connection/TiDBConnectionPoolFactory.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.connection; + +import org.apache.flink.cdc.connectors.base.config.JdbcSourceConfig; +import org.apache.flink.cdc.connectors.base.relational.connection.JdbcConnectionPoolFactory; + +/** The factory to create {@link TiDBConnectionPool}. */ +public class TiDBConnectionPoolFactory extends JdbcConnectionPoolFactory { + private static final String MYSQL_URL_PATTERN = + "jdbc:mysql://%s:%s/?useUnicode=true&useSSL=false&useInformationSchema=true&nullCatalogMeansCurrent=false&zeroDateTimeBehavior=convertToNull&characterEncoding=UTF-8&characterSetResults=UTF-8"; + + @Override + public String getJdbcUrl(JdbcSourceConfig sourceConfig) { + String hostName = sourceConfig.getHostname(); + int port = sourceConfig.getPort(); + return String.format(MYSQL_URL_PATTERN, hostName, port); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/converter/TiDBDefaultValueConverter.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/converter/TiDBDefaultValueConverter.java new file mode 100644 index 00000000000..f94f7db63a1 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/converter/TiDBDefaultValueConverter.java @@ -0,0 +1,506 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.converter; + +import io.debezium.annotation.Immutable; +import io.debezium.connector.mysql.MySqlDefaultValueConverter; +import io.debezium.connector.mysql.MySqlValueConverters; +import io.debezium.relational.Column; +import io.debezium.relational.DefaultValueConverter; +import io.debezium.relational.ValueConverter; +import io.debezium.util.Collect; +import org.apache.kafka.connect.data.Field; +import org.apache.kafka.connect.data.Schema; +import org.apache.kafka.connect.data.SchemaBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.math.BigDecimal; +import java.math.RoundingMode; +import java.sql.Timestamp; +import java.sql.Types; +import java.time.Duration; +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; +import java.time.temporal.ChronoField; +import java.util.Optional; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** TiDBDefaultValueConverter. */ +public class TiDBDefaultValueConverter implements DefaultValueConverter { + + private static final Logger LOGGER = LoggerFactory.getLogger(MySqlDefaultValueConverter.class); + + private static final Pattern EPOCH_EQUIVALENT_TIMESTAMP = + Pattern.compile( + "(\\d{4}-\\d{2}-00|\\d{4}-00-\\d{2}|0000-\\d{2}-\\d{2}) (00:00:00(\\.\\d{1,6})?)"); + + private static final Pattern EPOCH_EQUIVALENT_DATE = + Pattern.compile("\\d{4}-\\d{2}-00|\\d{4}-00-\\d{2}|0000-\\d{2}-\\d{2}"); + + private static final String EPOCH_TIMESTAMP = "1970-01-01 00:00:00"; + + private static final String EPOCH_DATE = "1970-01-01"; + + private static final Pattern TIMESTAMP_PATTERN = + Pattern.compile("([0-9]*-[0-9]*-[0-9]*) ([0-9]*:[0-9]*:[0-9]*(\\.([0-9]*))?)"); + + private static final Pattern CHARSET_INTRODUCER_PATTERN = + Pattern.compile("^_[A-Za-z0-9]+'(.*)'$"); + + @Immutable + private static final Set TRIM_DATA_TYPES = + Collect.unmodifiableSet( + Types.TINYINT, + Types.INTEGER, + Types.DATE, + Types.TIMESTAMP, + Types.TIMESTAMP_WITH_TIMEZONE, + Types.TIME, + Types.BOOLEAN, + Types.BIT, + Types.NUMERIC, + Types.DECIMAL, + Types.FLOAT, + Types.DOUBLE, + Types.REAL); + + @Immutable + private static final Set NUMBER_DATA_TYPES = + Collect.unmodifiableSet( + Types.BIT, + Types.TINYINT, + Types.SMALLINT, + Types.INTEGER, + Types.BIGINT, + Types.FLOAT, + Types.REAL, + Types.DOUBLE, + Types.NUMERIC, + Types.DECIMAL); + + private static final DateTimeFormatter ISO_LOCAL_DATE_WITH_OPTIONAL_TIME = + new DateTimeFormatterBuilder() + .append(DateTimeFormatter.ISO_LOCAL_DATE) + .optionalStart() + .appendLiteral(" ") + .append(DateTimeFormatter.ISO_LOCAL_TIME) + .optionalEnd() + .toFormatter(); + + private final TiDBValueConverters converters; + + public TiDBDefaultValueConverter(TiDBValueConverters converters) { + this.converters = converters; + } + + @Override + public Optional parseDefaultValue(Column column, String defaultValueExpression) { + Object logicalDefaultValue = convert(column, defaultValueExpression); + if (logicalDefaultValue == null) { + return Optional.empty(); + } + + final SchemaBuilder schemaBuilder = converters.schemaBuilder(column); + if (schemaBuilder == null) { + return Optional.of(logicalDefaultValue); + } + final Schema schema = schemaBuilder.build(); + + // In order to get the valueConverter for this column, we have to create a field; + // The index value -1 in the field will never used when converting default value; + // So we can set any number here; + final Field field = new Field(column.name(), -1, schema); + final ValueConverter valueConverter = converters.converter(column, field); + + return Optional.ofNullable(valueConverter.convert(logicalDefaultValue)); + } + + public Object convert(Column column, String value) { + if (value == null) { + return value; + } + + // trim non varchar data types before converting + if (TRIM_DATA_TYPES.contains(column.jdbcType())) { + value = value.trim(); + } + + // strip character set introducer on default value expressions + value = stripCharacterSetIntroducer(value); + + // boolean is also INT(1) or TINYINT(1) + if (NUMBER_DATA_TYPES.contains(column.jdbcType()) + && ("true".equalsIgnoreCase(value) || "false".equalsIgnoreCase(value))) { + if (Types.DECIMAL == column.jdbcType() || Types.NUMERIC == column.jdbcType()) { + return convertToDecimal(column, value.equalsIgnoreCase("true") ? "1" : "0"); + } + return value.equalsIgnoreCase("true") ? 1 : 0; + } + switch (column.jdbcType()) { + case Types.DATE: + return convertToLocalDate(column, value); + case Types.TIMESTAMP: + return convertToLocalDateTime(column, value); + case Types.TIMESTAMP_WITH_TIMEZONE: + return convertToTimestamp(column, value); + case Types.TIME: + return convertToDuration(column, value); + case Types.BOOLEAN: + return convertToBoolean(value); + case Types.BIT: + return convertToBits(column, value); + + case Types.NUMERIC: + case Types.DECIMAL: + return convertToDecimal(column, value); + + case Types.FLOAT: + case Types.DOUBLE: + case Types.REAL: + return convertToDouble(value); + } + return value; + } + + private Object convertToLocalDate(Column column, String value) { + final boolean zero = + EPOCH_EQUIVALENT_DATE.matcher(value).matches() + || EPOCH_EQUIVALENT_TIMESTAMP.matcher(value).matches() + || "0".equals(value); + + if (zero && column.isOptional()) { + return null; + } + if (zero) { + value = EPOCH_DATE; + } + + try { + return LocalDate.from(ISO_LOCAL_DATE_WITH_OPTIONAL_TIME.parse(value)); + } catch (Exception e) { + LOGGER.warn( + "Invalid default value '{}' for date column '{}'; {}", + value, + column.name(), + e.getMessage()); + if (column.isOptional()) { + return null; + } else { + return LocalDate.from(ISO_LOCAL_DATE_WITH_OPTIONAL_TIME.parse(EPOCH_DATE)); + } + } + } + + /** + * Converts a string object for an object type of {@link LocalDateTime}. If the column + * definition allows null and default value is 0000-00-00 00:00:00, we need return null, else + * 0000-00-00 00:00:00 will be replaced with 1970-01-01 00:00:00; + * + * @param column the column definition describing the {@code data} value; never null + * @param value the string object to be converted into a {@link LocalDateTime} type; + * @return the converted value; + */ + private Object convertToLocalDateTime(Column column, String value) { + final boolean matches = + EPOCH_EQUIVALENT_TIMESTAMP.matcher(value).matches() || "0".equals(value); + if (matches) { + if (column.isOptional()) { + return null; + } + + value = EPOCH_TIMESTAMP; + } + + try { + return LocalDateTime.from(timestampFormat(column.length()).parse(value)); + } catch (Exception e) { + LOGGER.warn( + "Invalid default value '{}' for datetime column '{}'; {}", + value, + column.name(), + e.getMessage()); + if (column.isOptional()) { + return null; + } else { + return LocalDateTime.from(timestampFormat(column.length()).parse(EPOCH_TIMESTAMP)); + } + } + } + + /** + * Converts a string object for an object type of {@link Timestamp}. If the column definition + * allows null and default value is 0000-00-00 00:00:00, we need return null, else 0000-00-00 + * 00:00:00 will be replaced with 1970-01-01 00:00:00; + * + * @param column the column definition describing the {@code data} value; never null + * @param value the string object to be converted into a {@link Timestamp} type; + * @return the converted value; + */ + private Object convertToTimestamp(Column column, String value) { + final boolean matches = + EPOCH_EQUIVALENT_TIMESTAMP.matcher(value).matches() + || "0".equals(value) + || EPOCH_TIMESTAMP.equals(value); + if (matches) { + if (column.isOptional()) { + return null; + } + + return Timestamp.from(Instant.EPOCH); + } + value = cleanTimestamp(value); + return Timestamp.valueOf(value).toInstant().atZone(ZoneId.systemDefault()); + } + + /** + * Converts a string object for an object type of {@link Duration}. + * + * @param column the column definition describing the {@code data} value; never null + * @param value the string object to be converted into a {@link Duration} type; + * @return the converted value; + */ + private Object convertToDuration(Column column, String value) { + Matcher matcher = TIMESTAMP_PATTERN.matcher(value); + if (matcher.matches()) { + value = matcher.group(2); + } + return MySqlValueConverters.stringToDuration(value); + } + + /** + * Converts a string object for an expected JDBC type of {@link Types#DOUBLE}. + * + * @param value the string object to be converted into a {@link Types#DOUBLE} type; + * @return the converted value; + */ + private Object convertToDouble(String value) { + return Double.parseDouble(value); + } + + /** + * Converts a string object for an expected JDBC type of {@link Types#DECIMAL}. + * + * @param column the column definition describing the {@code data} value; never null + * @param value the string object to be converted into a {@link Types#DECIMAL} type; + * @return the converted value; + */ + private Object convertToDecimal(Column column, String value) { + return column.scale().isPresent() + ? new BigDecimal(value).setScale(column.scale().get(), RoundingMode.HALF_UP) + : new BigDecimal(value); + } + + /** + * Converts a string object for an expected JDBC type of {@link Types#BIT}. + * + * @param column the column definition describing the {@code data} value; never null + * @param value the string object to be converted into a {@link Types#BIT} type; + * @return the converted value; + */ + private Object convertToBits(Column column, String value) { + if (column.length() > 1) { + return convertToBits(value); + } + return convertToBit(value); + } + + private Object convertToBit(String value) { + try { + return Short.parseShort(value) != 0; + } catch (NumberFormatException ignore) { + return Boolean.parseBoolean(value); + } + } + + private Object convertToBits(String value) { + int nums = value.length() / Byte.SIZE + (value.length() % Byte.SIZE == 0 ? 0 : 1); + byte[] bytes = new byte[nums]; + for (int i = 0; i < nums; i++) { + int s = value.length() - Byte.SIZE < 0 ? 0 : value.length() - Byte.SIZE; + int e = value.length(); + bytes[nums - i - 1] = (byte) Integer.parseInt(value.substring(s, e), 2); + value = value.substring(0, s); + } + return bytes; + } + + /** + * Converts a string object for an expected JDBC type of {@link Types#BOOLEAN}. + * + * @param value the string object to be converted into a {@link Types#BOOLEAN} type; + * @return the converted value; + */ + private Object convertToBoolean(String value) { + try { + return Integer.parseInt(value) != 0; + } catch (NumberFormatException ignore) { + return Boolean.parseBoolean(value); + } + } + + private DateTimeFormatter timestampFormat(int length) { + final DateTimeFormatterBuilder dtf = + new DateTimeFormatterBuilder() + .appendPattern("yyyy-MM-dd") + .optionalStart() + .appendLiteral(" ") + .append(DateTimeFormatter.ISO_LOCAL_TIME) + .optionalEnd() + .parseDefaulting(ChronoField.HOUR_OF_DAY, 0) + .parseDefaulting(ChronoField.MINUTE_OF_HOUR, 0) + .parseDefaulting(ChronoField.SECOND_OF_MINUTE, 0); + if (length > 0) { + dtf.appendFraction(ChronoField.MICRO_OF_SECOND, 0, length, true); + } + return dtf.toFormatter(); + } + + /** + * Clean input timestamp to yyyy-mm-dd hh:mm:ss[.fffffffff] format. + * + * @param s input timestamp + * @return cleaned timestamp + */ + private String cleanTimestamp(String s) { + if (s == null) { + throw new IllegalArgumentException("null string"); + } + + s = s.trim(); + + // clean first dash + s = replaceFirstNonNumericSubstring(s, 0, '-'); + // clean second dash + s = replaceFirstNonNumericSubstring(s, s.indexOf('-') + 1, '-'); + // clean dividing space + s = replaceFirstNonNumericSubstring(s, s.indexOf('-', s.indexOf('-') + 1) + 1, ' '); + if (s.indexOf(' ') != -1) { + // clean first colon + s = replaceFirstNonNumericSubstring(s, s.indexOf(' ') + 1, ':'); + if (s.indexOf(':') != -1) { + // clean second colon + s = replaceFirstNonNumericSubstring(s, s.indexOf(':') + 1, ':'); + } + } + + final int maxMonth = 12; + final int maxDay = 31; + + // Parse the date + int firstDash = s.indexOf('-'); + int secondDash = s.indexOf('-', firstDash + 1); + int dividingSpace = s.indexOf(' '); + + // Parse the time + int firstColon = s.indexOf(':', dividingSpace + 1); + int secondColon = s.indexOf(':', firstColon + 1); + int period = s.indexOf('.', secondColon + 1); + + int year = 0; + int month = 0; + int day = 0; + int hour = 0; + int minute = 0; + int second = 0; + + // Get the date + int len = s.length(); + boolean parsedDate = false; + if (firstDash > 0 && secondDash > firstDash) { + year = Integer.parseInt(s.substring(0, firstDash)); + month = Integer.parseInt(s.substring(firstDash + 1, secondDash)); + if (dividingSpace != -1) { + day = Integer.parseInt(s.substring(secondDash + 1, dividingSpace)); + } else { + day = Integer.parseInt(s.substring(secondDash + 1, len)); + } + + if ((month >= 1 && month <= maxMonth) && (day >= 1 && day <= maxDay)) { + parsedDate = true; + } + } + if (!parsedDate) { + throw new IllegalArgumentException("Cannot parse the date from " + s); + } + + // Get the time. Hour, minute, second and colons are all optional + if (dividingSpace != -1 && dividingSpace < len - 1) { + if (firstColon == -1) { + hour = Integer.parseInt(s.substring(dividingSpace + 1, len)); + } else { + hour = Integer.parseInt(s.substring(dividingSpace + 1, firstColon)); + if (firstColon < len - 1) { + if (secondColon == -1) { + minute = Integer.parseInt(s.substring(firstColon + 1, len)); + } else { + minute = Integer.parseInt(s.substring(firstColon + 1, secondColon)); + if (secondColon < len - 1) { + if (period == -1) { + second = Integer.parseInt(s.substring(secondColon + 1, len)); + } else { + second = Integer.parseInt(s.substring(secondColon + 1, period)); + } + } + } + } + } + } + + StringBuilder cleanedTimestamp = new StringBuilder(); + cleanedTimestamp = + cleanedTimestamp.append( + String.format( + "%04d-%02d-%02d %02d:%02d:%02d", + year, month, day, hour, minute, second)); + + if (period != -1 && period < len - 1) { + cleanedTimestamp = cleanedTimestamp.append(".").append(s.substring(period + 1)); + } + + return cleanedTimestamp.toString(); + } + + /** + * Replace the first non-numeric substring. + * + * @param s the original string + * @param startIndex the beginning index, inclusive + * @param c the new character + * @return + */ + private String replaceFirstNonNumericSubstring(String s, int startIndex, char c) { + StringBuilder sb = new StringBuilder(); + sb.append(s.substring(0, startIndex)); + + String rest = s.substring(startIndex); + sb.append(rest.replaceFirst("[^\\d]+", Character.toString(c))); + return sb.toString(); + } + + private String stripCharacterSetIntroducer(String value) { + final Matcher matcher = CHARSET_INTRODUCER_PATTERN.matcher(value); + return !matcher.matches() ? value : matcher.group(1); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/converter/TiDBValueConverters.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/converter/TiDBValueConverters.java new file mode 100644 index 00000000000..fb2d3d4c353 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/converter/TiDBValueConverters.java @@ -0,0 +1,726 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.converter; + +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; +import org.apache.flink.util.FlinkRuntimeException; + +import com.github.shyiko.mysql.binlog.event.deserialization.json.JsonBinary; +import io.debezium.DebeziumException; +import io.debezium.config.CommonConnectorConfig; +import io.debezium.connector.mysql.MySqlConnection; +import io.debezium.connector.mysql.MySqlGeometry; +import io.debezium.connector.mysql.MySqlUnsignedIntegerConverter; +import io.debezium.connector.mysql.antlr.MySqlAntlrDdlParser; +import io.debezium.data.Json; +import io.debezium.data.geometry.Geometry; +import io.debezium.data.geometry.Point; +import io.debezium.jdbc.JdbcValueConverters; +import io.debezium.jdbc.TemporalPrecisionMode; +import io.debezium.relational.Column; +import io.debezium.relational.ValueConverter; +import io.debezium.time.Year; +import io.debezium.util.Strings; +import org.apache.kafka.connect.data.Decimal; +import org.apache.kafka.connect.data.Field; +import org.apache.kafka.connect.data.SchemaBuilder; +import org.apache.kafka.connect.errors.ConnectException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.math.BigDecimal; +import java.nio.ByteOrder; +import java.nio.charset.Charset; +import java.nio.charset.IllegalCharsetNameException; +import java.nio.charset.StandardCharsets; +import java.sql.Date; +import java.sql.Timestamp; +import java.sql.Types; +import java.time.Duration; +import java.time.ZoneOffset; +import java.time.temporal.ChronoField; +import java.time.temporal.ChronoUnit; +import java.time.temporal.Temporal; +import java.time.temporal.TemporalAdjuster; +import java.util.List; +import java.util.regex.Pattern; + +/** JdbcValueConverters for tiDB. */ +public class TiDBValueConverters extends JdbcValueConverters { + + /** Handler for parsing errors. */ + @FunctionalInterface + public interface ParsingErrorHandler { + void error(String message, Exception exception); + } + + private static final Logger LOGGER = LoggerFactory.getLogger(TiDBValueConverters.class); + /** Used to parse values of TIME columns. Format: 000:00:00.000000. */ + private static final Pattern TIME_FIELD_PATTERN = + Pattern.compile("(\\-?[0-9]*):([0-9]*):([0-9]*)(\\.([0-9]*))?"); + + /** Used to parse values of DATE columns. Format: 000-00-00. */ + private static final Pattern DATE_FIELD_PATTERN = Pattern.compile("([0-9]*)-([0-9]*)-([0-9]*)"); + + /** Used to parse values of TIMESTAMP columns. Format: 000-00-00 00:00:00.000. */ + private static final Pattern TIMESTAMP_FIELD_PATTERN = + Pattern.compile("([0-9]*)-([0-9]*)-([0-9]*) .*"); + + public static Temporal adjustTemporal(Temporal temporal) { + if (temporal.isSupported(ChronoField.YEAR)) { + int year = temporal.get(ChronoField.YEAR); + if (0 <= year && year <= 69) { + temporal = temporal.plus(2000, ChronoUnit.YEARS); + } else if (70 <= year && year <= 99) { + temporal = temporal.plus(1900, ChronoUnit.YEARS); + } + } + return temporal; + } + + // todo + public TiDBValueConverters(TiDBConnectorConfig connectorConfig) { + super( + connectorConfig.getDecimalMode(), + connectorConfig.getTemporalPrecisionMode(), + ZoneOffset.UTC, + x -> x, + BigIntUnsignedMode.PRECISE, + connectorConfig.binaryHandlingMode()); + } + + public TiDBValueConverters( + DecimalMode decimalMode, + TemporalPrecisionMode temporalPrecisionMode, + BigIntUnsignedMode bigIntUnsignedMode, + CommonConnectorConfig.BinaryHandlingMode binaryMode) { + this( + decimalMode, + temporalPrecisionMode, + bigIntUnsignedMode, + binaryMode, + x -> x, + TiDBValueConverters::defaultParsingErrorHandler); + } + + public TiDBValueConverters( + DecimalMode decimalMode, + TemporalPrecisionMode temporalPrecisionMode, + BigIntUnsignedMode bigIntUnsignedMode, + CommonConnectorConfig.BinaryHandlingMode binaryMode, + TemporalAdjuster adjuster, + ParsingErrorHandler parsingErrorHandler) { + super( + decimalMode, + temporalPrecisionMode, + ZoneOffset.UTC, + adjuster, + bigIntUnsignedMode, + binaryMode); + // this.parsingErrorHandler = parsingErrorHandler; + } + + @Override + protected ByteOrder byteOrderOfBitType() { + return ByteOrder.BIG_ENDIAN; + } + + @Override + public SchemaBuilder schemaBuilder(Column column) { + // Handle a few MySQL-specific types based upon how they are handled by the MySQL binlog + // client ... + String typeName = column.typeName().toUpperCase(); + if (matches(typeName, "JSON")) { + return Json.builder(); + } + if (matches(typeName, "POINT")) { + return Point.builder(); + } + if (matches(typeName, "GEOMETRY") + || matches(typeName, "LINESTRING") + || matches(typeName, "POLYGON") + || matches(typeName, "MULTIPOINT") + || matches(typeName, "MULTILINESTRING") + || matches(typeName, "MULTIPOLYGON") + || isGeometryCollection(typeName)) { + return Geometry.builder(); + } + if (matches(typeName, "YEAR")) { + return Year.builder(); + } + if (matches(typeName, "ENUM")) { + String commaSeparatedOptions = extractEnumAndSetOptionsAsString(column); + return io.debezium.data.Enum.builder(commaSeparatedOptions); + } + if (matches(typeName, "SET")) { + String commaSeparatedOptions = extractEnumAndSetOptionsAsString(column); + return io.debezium.data.EnumSet.builder(commaSeparatedOptions); + } + if (matches(typeName, "SMALLINT UNSIGNED") + || matches(typeName, "SMALLINT UNSIGNED ZEROFILL") + || matches(typeName, "INT2 UNSIGNED") + || matches(typeName, "INT2 UNSIGNED ZEROFILL")) { + // In order to capture unsigned SMALLINT 16-bit data source, INT32 will be required to + // safely capture all valid values + // Source: + // https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html + return SchemaBuilder.int32(); + } + if (matches(typeName, "INT UNSIGNED") + || matches(typeName, "INT UNSIGNED ZEROFILL") + || matches(typeName, "INT4 UNSIGNED") + || matches(typeName, "INT4 UNSIGNED ZEROFILL")) { + // In order to capture unsigned INT 32-bit data source, INT64 will be required to safely + // capture all valid values + // Source: + // https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html + return SchemaBuilder.int64(); + } + if (matches(typeName, "BIGINT UNSIGNED") + || matches(typeName, "BIGINT UNSIGNED ZEROFILL") + || matches(typeName, "INT8 UNSIGNED") + || matches(typeName, "INT8 UNSIGNED ZEROFILL")) { + switch (super.bigIntUnsignedMode) { + case LONG: + return SchemaBuilder.int64(); + case PRECISE: + // In order to capture unsigned INT 64-bit data source, + // org.apache.kafka.connect.data.Decimal:Byte will be required to safely capture + // all valid values with scale of 0 + // Source: + // https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.Type.html + return Decimal.builder(0); + } + } + + // Otherwise, let the base class handle it ... + return super.schemaBuilder(column); + } + + @Override + public ValueConverter converter(Column column, Field fieldDefn) { + // Handle a few MySQL-specific types based upon how they are handled by the MySQL binlog + // client ... + String typeName = column.typeName().toUpperCase(); + if (matches(typeName, "JSON")) { + return (data) -> convertJson(column, fieldDefn, data); + } + if (matches(typeName, "GEOMETRY") + || matches(typeName, "LINESTRING") + || matches(typeName, "POLYGON") + || matches(typeName, "MULTIPOINT") + || matches(typeName, "MULTILINESTRING") + || matches(typeName, "MULTIPOLYGON") + || isGeometryCollection(typeName)) { + return (data -> convertGeometry(column, fieldDefn, data)); + } + if (matches(typeName, "POINT")) { + // backwards compatibility + return (data -> convertPoint(column, fieldDefn, data)); + } + if (matches(typeName, "YEAR")) { + return (data) -> convertYearToInt(column, fieldDefn, data); + } + if (matches(typeName, "ENUM")) { + // Build up the character array based upon the column's type ... + List options = extractEnumAndSetOptions(column); + return (data) -> convertEnumToString(options, column, fieldDefn, data); + } + if (matches(typeName, "SET")) { + // Build up the character array based upon the column's type ... + List options = extractEnumAndSetOptions(column); + return (data) -> convertSetToString(options, column, fieldDefn, data); + } + if (matches(typeName, "TINYINT UNSIGNED") + || matches(typeName, "TINYINT UNSIGNED ZEROFILL") + || matches(typeName, "INT1 UNSIGNED") + || matches(typeName, "INT1 UNSIGNED ZEROFILL")) { + // Convert TINYINT UNSIGNED internally from SIGNED to UNSIGNED based on the boundary + // settings + return (data) -> convertUnsignedTinyint(column, fieldDefn, data); + } + if (matches(typeName, "SMALLINT UNSIGNED") + || matches(typeName, "SMALLINT UNSIGNED ZEROFILL") + || matches(typeName, "INT2 UNSIGNED") + || matches(typeName, "INT2 UNSIGNED ZEROFILL")) { + // Convert SMALLINT UNSIGNED internally from SIGNED to UNSIGNED based on the boundary + // settings + return (data) -> convertUnsignedSmallint(column, fieldDefn, data); + } + if (matches(typeName, "MEDIUMINT UNSIGNED") + || matches(typeName, "MEDIUMINT UNSIGNED ZEROFILL") + || matches(typeName, "INT3 UNSIGNED") + || matches(typeName, "INT3 UNSIGNED ZEROFILL") + || matches(typeName, "MIDDLEINT UNSIGNED") + || matches(typeName, "MIDDLEINT UNSIGNED ZEROFILL")) { + // Convert MEDIUMINT UNSIGNED internally from SIGNED to UNSIGNED based on the boundary + // settings + return (data) -> convertUnsignedMediumint(column, fieldDefn, data); + } + if (matches(typeName, "INT UNSIGNED") + || matches(typeName, "INT UNSIGNED ZEROFILL") + || matches(typeName, "INT4 UNSIGNED") + || matches(typeName, "INT4 UNSIGNED ZEROFILL")) { + // Convert INT UNSIGNED internally from SIGNED to UNSIGNED based on the boundary + // settings + return (data) -> convertUnsignedInt(column, fieldDefn, data); + } + if (matches(typeName, "BIGINT UNSIGNED") + || matches(typeName, "BIGINT UNSIGNED ZEROFILL") + || matches(typeName, "INT8 UNSIGNED") + || matches(typeName, "INT8 UNSIGNED ZEROFILL")) { + switch (super.bigIntUnsignedMode) { + case LONG: + return (data) -> convertBigInt(column, fieldDefn, data); + case PRECISE: + // Convert BIGINT UNSIGNED internally from SIGNED to UNSIGNED based on the + // boundary settings + return (data) -> convertUnsignedBigint(column, fieldDefn, data); + } + } + + // We have to convert bytes encoded in the column's character set ... + switch (column.jdbcType()) { + case Types.CHAR: // variable-length + case Types.VARCHAR: // variable-length + case Types.LONGVARCHAR: // variable-length + case Types.CLOB: // variable-length + case Types.NCHAR: // fixed-length + case Types.NVARCHAR: // fixed-length + case Types.LONGNVARCHAR: // fixed-length + case Types.NCLOB: // fixed-length + case Types.DATALINK: + case Types.SQLXML: + Charset charset = charsetFor(column); + if (charset != null) { + logger.debug("Using {} charset by default for column: {}", charset, column); + return (data) -> convertString(column, fieldDefn, charset, data); + } + logger.warn( + "Using UTF-8 charset by default for column without charset: {}", column); + return (data) -> convertString(column, fieldDefn, StandardCharsets.UTF_8, data); + case Types.TIME: + if (adaptiveTimeMicrosecondsPrecisionMode) { + return (data) -> convertTime(column, fieldDefn, data); + } + break; + case Types.TIMESTAMP: + return ((ValueConverter) + (data -> convertTimestampToLocalDateTime(column, fieldDefn, data))) + .and(super.converter(column, fieldDefn)); + default: + break; + } + + // Otherwise, let the base class handle it ... + return super.converter(column, fieldDefn); + } + + protected Object convertJson(Column column, Field fieldDefn, Object data) { + return this.convertValue( + column, + fieldDefn, + data, + "{}", + (r) -> { + if (data instanceof byte[]) { + if (((byte[]) data).length == 0) { + r.deliver(column.isOptional() ? null : "{}"); + } else { + try { + r.deliver(JsonBinary.parseAsString((byte[]) data)); + } catch (IOException var5) { + throw new FlinkRuntimeException("tidbvalueConverters error"); + // this.parsingErrorHandler.error( + // "Failed to parse and read + // a JSON value on '" + // + column + // + "' value " + // + + // Arrays.toString((byte[]) data), + // var5); + // r.deliver(column.isOptional() ? + // null : "{}"); + } + } + } else if (data instanceof String) { + r.deliver(data); + } + }); + } + + protected Object convertPoint(Column column, Field fieldDefn, Object data) { + MySqlGeometry empty = MySqlGeometry.createEmpty(); + return this.convertValue( + column, + fieldDefn, + data, + Geometry.createValue(fieldDefn.schema(), empty.getWkb(), empty.getSrid()), + (r) -> { + if (data instanceof byte[]) { + MySqlGeometry mySqlGeometry = MySqlGeometry.fromBytes((byte[]) data); + if (!mySqlGeometry.isPoint()) { + throw new ConnectException( + "Failed to parse and read a value of type POINT on " + column); + } + + r.deliver( + Point.createValue( + fieldDefn.schema(), + mySqlGeometry.getWkb(), + mySqlGeometry.getSrid())); + } + }); + } + + protected Object convertYearToInt(Column column, Field fieldDefn, Object data) { + return this.convertValue( + column, + fieldDefn, + data, + 0, + (r) -> { + Object mutData = data; + if (data instanceof java.time.Year) { + r.deliver( + adjustTemporal( + java.time.Year.of( + ((java.time.Year) data).getValue())) + .get(ChronoField.YEAR)); + } else if (data instanceof Date) { + r.deliver(((Date) data).getYear() + 1900); + } else if (data instanceof String) { + mutData = Integer.valueOf((String) data); + } + + if (mutData instanceof Number) { + r.deliver( + adjustTemporal(java.time.Year.of(((Number) mutData).intValue())) + .get(ChronoField.YEAR)); + } + }); + } + + protected Object convertEnumToString( + List options, Column column, Field fieldDefn, Object data) { + return this.convertValue( + column, + fieldDefn, + data, + "", + (r) -> { + if (data instanceof String) { + r.deliver(data); + } else if (data instanceof Integer) { + if (options != null) { + int value = (Integer) data; + if (value == 0) { + r.deliver(""); + } + + int index = value - 1; + if (index < options.size() && index >= 0) { + r.deliver(options.get(index)); + } + } else { + r.deliver((Object) null); + } + } + }); + } + + protected Object convertSetToString( + List options, Column column, Field fieldDefn, Object data) { + return this.convertValue( + column, + fieldDefn, + data, + "", + (r) -> { + if (data instanceof String) { + r.deliver(data); + } else if (data instanceof Long) { + long indexes = (Long) data; + r.deliver(this.convertSetValue(column, indexes, options)); + } + }); + } + + protected String convertSetValue(Column column, long indexes, List options) { + StringBuilder sb = new StringBuilder(); + int index = 0; + boolean first = true; + + for (int optionLen = options.size(); indexes != 0L; indexes >>>= 1) { + if (indexes % 2L != 0L) { + if (first) { + first = false; + } else { + sb.append(','); + } + + if (index < optionLen) { + sb.append((String) options.get(index)); + } else { + this.logger.warn("Found unexpected index '{}' on column {}", index, column); + } + } + + ++index; + } + + return sb.toString(); + } + + protected Object convertUnsignedBigint(Column column, Field fieldDefn, Object data) { + return this.convertValue( + column, + fieldDefn, + data, + 0L, + (r) -> { + if (data instanceof BigDecimal) { + r.deliver( + MySqlUnsignedIntegerConverter.convertUnsignedBigint( + (BigDecimal) data)); + } else if (data instanceof Number) { + r.deliver( + MySqlUnsignedIntegerConverter.convertUnsignedBigint( + new BigDecimal(((Number) data).toString()))); + } else if (data instanceof String) { + r.deliver( + MySqlUnsignedIntegerConverter.convertUnsignedBigint( + new BigDecimal((String) data))); + } else { + r.deliver(this.convertNumeric(column, fieldDefn, data)); + } + }); + } + + protected Charset charsetFor(Column column) { + String mySqlCharsetName = column.charsetName(); + if (mySqlCharsetName == null) { + logger.warn("Column is missing a character set: {}", column); + return null; + } + String encoding = MySqlConnection.getJavaEncodingForMysqlCharSet(mySqlCharsetName); + if (encoding == null) { + logger.debug( + "Column uses MySQL character set '{}', which has no mapping to a Java character set, will try it in lowercase", + mySqlCharsetName); + encoding = + MySqlConnection.getJavaEncodingForMysqlCharSet(mySqlCharsetName.toLowerCase()); + } + if (encoding == null) { + logger.warn( + "Column uses MySQL character set '{}', which has no mapping to a Java character set", + mySqlCharsetName); + } else { + try { + return Charset.forName(encoding); + } catch (IllegalCharsetNameException e) { + logger.error( + "Unable to load Java charset '{}' for column with MySQL character set '{}'", + encoding, + mySqlCharsetName); + } + } + return null; + } + + protected boolean matches(String upperCaseTypeName, String upperCaseMatch) { + if (upperCaseTypeName == null) { + return false; + } else { + return upperCaseMatch.equals(upperCaseTypeName) + || upperCaseTypeName.startsWith(upperCaseMatch + "("); + } + } + + protected Object convertDurationToMicroseconds(Column column, Field fieldDefn, Object data) { + return this.convertValue( + column, + fieldDefn, + data, + 0L, + (r) -> { + try { + if (data instanceof Duration) { + r.deliver(((Duration) data).toNanos() / 1000L); + } + } catch (IllegalArgumentException var3) { + } + }); + } + + protected Object convertUnsignedInt(Column column, Field fieldDefn, Object data) { + return this.convertValue( + column, + fieldDefn, + data, + 0L, + (r) -> { + if (data instanceof Long) { + r.deliver( + MySqlUnsignedIntegerConverter.convertUnsignedInteger((Long) data)); + } else if (data instanceof Number) { + r.deliver( + MySqlUnsignedIntegerConverter.convertUnsignedInteger( + ((Number) data).longValue())); + } else { + r.deliver(this.convertBigInt(column, fieldDefn, data)); + } + }); + } + + protected Object convertUnsignedMediumint(Column column, Field fieldDefn, Object data) { + return this.convertValue( + column, + fieldDefn, + data, + 0, + (r) -> { + if (data instanceof Integer) { + r.deliver( + MySqlUnsignedIntegerConverter.convertUnsignedMediumint( + (Integer) data)); + } else if (data instanceof Number) { + r.deliver( + MySqlUnsignedIntegerConverter.convertUnsignedMediumint( + ((Number) data).intValue())); + } else { + r.deliver(this.convertInteger(column, fieldDefn, data)); + } + }); + } + + protected Object convertUnsignedSmallint(Column column, Field fieldDefn, Object data) { + return this.convertValue( + column, + fieldDefn, + data, + 0, + (r) -> { + if (data instanceof Integer) { + r.deliver( + MySqlUnsignedIntegerConverter.convertUnsignedSmallint( + (Integer) data)); + } else if (data instanceof Number) { + r.deliver( + MySqlUnsignedIntegerConverter.convertUnsignedSmallint( + ((Number) data).intValue())); + } else { + r.deliver(this.convertInteger(column, fieldDefn, data)); + } + }); + } + + protected Object convertUnsignedTinyint(Column column, Field fieldDefn, Object data) { + return this.convertValue( + column, + fieldDefn, + data, + Short.valueOf((short) 0), + (r) -> { + if (data instanceof Short) { + r.deliver( + MySqlUnsignedIntegerConverter.convertUnsignedTinyint((Short) data)); + } else if (data instanceof Number) { + r.deliver( + MySqlUnsignedIntegerConverter.convertUnsignedTinyint( + ((Number) data).shortValue())); + } else { + r.deliver(this.convertSmallInt(column, fieldDefn, data)); + } + }); + } + + protected Object convertGeometry(Column column, Field fieldDefn, Object data) { + MySqlGeometry empty = MySqlGeometry.createEmpty(); + return this.convertValue( + column, + fieldDefn, + data, + Geometry.createValue(fieldDefn.schema(), empty.getWkb(), empty.getSrid()), + (r) -> { + if (data instanceof byte[] && data instanceof byte[]) { + MySqlGeometry mySqlGeometry = MySqlGeometry.fromBytes((byte[]) data); + r.deliver( + Geometry.createValue( + fieldDefn.schema(), + mySqlGeometry.getWkb(), + mySqlGeometry.getSrid())); + } + }); + } + + protected boolean isGeometryCollection(String upperCaseTypeName) { + if (upperCaseTypeName == null) { + return false; + } else { + return upperCaseTypeName.equals("GEOMETRYCOLLECTION") + || upperCaseTypeName.equals("GEOMCOLLECTION") + || upperCaseTypeName.endsWith(".GEOMCOLLECTION"); + } + } + + protected String extractEnumAndSetOptionsAsString(Column column) { + return Strings.join(",", this.extractEnumAndSetOptions(column)); + } + + protected List extractEnumAndSetOptions(Column column) { + return MySqlAntlrDdlParser.extractEnumAndSetOptions(column.enumValues()); + } + + public static void defaultParsingErrorHandler(String message, Exception exception) { + throw new DebeziumException(message, exception); + } + + protected Object convertString( + Column column, Field fieldDefn, Charset columnCharset, Object data) { + return this.convertValue( + column, + fieldDefn, + data, + "", + (r) -> { + if (data instanceof byte[]) { + r.deliver(new String((byte[]) data, columnCharset)); + } else if (data instanceof String) { + r.deliver(data); + } + }); + } + + protected Object convertTimestampToLocalDateTime(Column column, Field fieldDefn, Object data) { + if (data == null && !fieldDefn.schema().isOptional()) { + return null; + } else { + return !(data instanceof Timestamp) ? data : ((Timestamp) data).toLocalDateTime(); + } + } + + @Override + protected Object convertTime(Column column, Field fieldDefn, Object data) { + if (data instanceof String) { + data = Strings.asDuration((String) data); + } + return super.convertTime(column, fieldDefn, data); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/EventEmitter.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/EventEmitter.java new file mode 100644 index 00000000000..607b2d6a226 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/EventEmitter.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.fetch; + +import io.debezium.connector.tidb.TiDBPartition; +import io.debezium.data.Envelope; +import io.debezium.pipeline.spi.OffsetContext; +import io.debezium.relational.RelationalChangeRecordEmitter; +import io.debezium.util.Clock; + +import java.io.Serializable; + +/** TiDB event emitter. */ +public class EventEmitter extends RelationalChangeRecordEmitter { + private final Envelope.Operation operation; + private final Object[] before; + private final Object[] after; + + public EventEmitter( + TiDBPartition partition, + OffsetContext offsetContext, + Clock clock, + Envelope.Operation operation, + Serializable[] before, + Serializable[] after) { + super(partition, offsetContext, clock); + this.operation = operation; + this.before = before; + this.after = after; + } + + @Override + protected Object[] getOldColumnValues() { + return before; + } + + @Override + protected Object[] getNewColumnValues() { + return after; + } + + @Override + public Envelope.Operation getOperation() { + return operation; + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/EventSourceReader.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/EventSourceReader.java new file mode 100644 index 00000000000..ce9d7403124 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/EventSourceReader.java @@ -0,0 +1,454 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.fetch; + +import org.apache.flink.cdc.connectors.base.relational.JdbcSourceEventDispatcher; +import org.apache.flink.cdc.connectors.base.source.meta.split.StreamSplit; +import org.apache.flink.cdc.connectors.base.source.meta.wartermark.WatermarkKind; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffset; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffsetContext; +import org.apache.flink.cdc.connectors.tidb.utils.TableKeyRangeUtils; +import org.apache.flink.util.Preconditions; + +import org.apache.flink.shaded.guava31.com.google.common.util.concurrent.ThreadFactoryBuilder; + +import io.debezium.connector.tidb.TiDBPartition; +import io.debezium.data.Envelope; +import io.debezium.pipeline.ErrorHandler; +import io.debezium.pipeline.source.spi.StreamingChangeEventSource; +import io.debezium.relational.TableId; +import io.debezium.relational.TableSchema; +import io.debezium.util.Clock; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.tikv.cdc.CDCClient; +import org.tikv.common.TiConfiguration; +import org.tikv.common.TiSession; +import org.tikv.common.key.RowKey; +import org.tikv.common.meta.TiColumnInfo; +import org.tikv.common.meta.TiTableInfo; +import org.tikv.kvproto.Cdcpb; +import org.tikv.kvproto.Coprocessor; +import org.tikv.shade.com.google.protobuf.ByteString; + +import java.io.Serializable; +import java.time.Instant; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.TreeMap; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadFactory; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static org.tikv.common.codec.TableCodec.decodeObjects; + +/** TiDB streaming change event source reader. */ +public class EventSourceReader + implements StreamingChangeEventSource { + private static final Logger LOG = LoggerFactory.getLogger(EventSourceReader.class); + private final StreamSplit split; + private final TiDBConnectorConfig connectorConfig; + private final TiConfiguration ticonf; + private final JdbcSourceEventDispatcher eventDispatcher; + private final ErrorHandler errorHandler; + private final TiDBSourceFetchTaskContext taskContext; + private final Map> fieldIndexMap = new HashMap<>(); + public ChangeEventSourceContext context; + + private static final long STREAMING_VERSION_START_EPOCH = 0L; + + /** Task local variables. */ + private transient TiSession session = null; + + private transient Coprocessor.KeyRange keyRange = null; + private transient CDCClient cdcClient = null; + private transient volatile long resolvedTs = -1L; + private transient TreeMap prewrites = null; + private transient TreeMap commits = null; + private transient BlockingQueue committedEvents = null; + private transient TableId tableId; + private transient TiTableInfo tableInfo; + + private transient boolean running = true; + private transient ExecutorService executorService; + + public EventSourceReader( + TiDBConnectorConfig connectorConfig, + JdbcSourceEventDispatcher eventDispatcher, + ErrorHandler errorHandler, + TiDBSourceFetchTaskContext taskContext, + StreamSplit split) { + this.connectorConfig = connectorConfig; + this.ticonf = connectorConfig.getSourceConfig().getTiConfiguration(); + this.eventDispatcher = eventDispatcher; + this.errorHandler = errorHandler; + this.taskContext = taskContext; + this.split = split; + } + + @Override + public void init() throws InterruptedException { + StreamingChangeEventSource.super.init(); + session = TiSession.create(ticonf); + Set tableIds = this.split.getTableSchemas().keySet(); + if (tableIds.isEmpty() && tableIds.size() != 1) { + LOG.error("Currently only single table ingest is supported."); + return; + } + this.tableId = tableIds.stream().findFirst().get(); + this.tableInfo = session.getCatalog().getTable(tableId.catalog(), tableId.table()); + if (tableInfo == null) { + throw new RuntimeException( + String.format( + "Table %s.%s does not exist.", tableId.catalog(), tableId.table())); + } + keyRange = TableKeyRangeUtils.getTableKeyRange(tableInfo.getId(), 1, 0); + cdcClient = new CDCClient(session, keyRange); + prewrites = new TreeMap<>(); + commits = new TreeMap<>(); + // cdc event will lose if pull cdc event block when region split + // use queue to separate read and write to ensure pull event unblock. + // since sink jdbc is slow, 5000W queue size may be safe size. + committedEvents = new LinkedBlockingQueue<>(); + resolvedTs = EventOffset.getStartTs(this.split.getStartingOffset()); + ThreadFactory threadFactory = + new ThreadFactoryBuilder().setNameFormat("tidb-source-function-0").build(); + executorService = Executors.newSingleThreadExecutor(threadFactory); + } + + @Override + public void execute( + ChangeEventSourceContext context, + TiDBPartition partition, + EventOffsetContext offsetContext) + throws InterruptedException { + this.context = context; + if (connectorConfig.getSourceConfig().getStartupOptions().isSnapshotOnly()) { + LOG.info("Streaming is not enabled in current configuration"); + return; + } + this.taskContext.getDatabaseSchema().assureNonEmptySchema(); + cdcClient.start(resolvedTs); + running = true; + EventOffsetContext effectiveOffsetContext = + offsetContext != null + ? offsetContext + : EventOffsetContext.initial(this.connectorConfig); + try { + EventOffset currentOffset = new EventOffset(effectiveOffsetContext.getOffset()); + if (currentOffset.isBefore(split.getStartingOffset())) { + return; + } + readChangeEvents(partition, effectiveOffsetContext); + } catch (Exception e) { + this.errorHandler.setProducerThrowable(e); + } + } + + protected void readChangeEvents(TiDBPartition partition, EventOffsetContext offsetContext) + throws Exception { + LOG.info("read change event from resolvedTs:{}", resolvedTs); + // child thread to sink committed rows. + executorService.execute( + () -> { + while (running) { + try { + Cdcpb.Event.Row committedRow = committedEvents.take(); + EventOffset currentOffset = new EventOffset(offsetContext.getOffset()); + if (currentOffset.isBefore(split.getStartingOffset())) { + return; + } + if (!EventOffset.NO_STOPPING_OFFSET.equals(split.getEndingOffset()) + && currentOffset.isAtOrAfter(split.getEndingOffset())) { + // send watermark event; + try { + eventDispatcher.dispatchWatermarkEvent( + partition.getSourcePartition(), + split, + currentOffset, + WatermarkKind.END); + } catch (InterruptedException e) { + LOG.error("Send signal event error.", e); + errorHandler.setProducerThrowable( + new RuntimeException( + "Error processing log signal event", e)); + } + ((StoppableChangeEventSourceContext) context) + .stopChangeEventSource(); + return; + } + + final EventOffsetContext localOffsetContext = + new EventOffsetContext.Loader(this.connectorConfig) + .load(currentOffset.getOffset()); + emitChangeEvent(partition, localOffsetContext, committedRow); + // use startTs of row as messageTs, use commitTs of row as fetchTs + } catch (Exception e) { + LOG.error("Read change events error.", e); + } + } + }); + while (resolvedTs >= STREAMING_VERSION_START_EPOCH) { + for (int i = 0; i < 1000; i++) { + final Cdcpb.Event.Row row = cdcClient.get(); + if (row == null) { + break; + } + handleRow(row); + } + resolvedTs = cdcClient.getMaxResolvedTs(); + if (commits.size() > 0) { + flushRows(resolvedTs); + } + } + } + + protected void emitChangeEvent( + TiDBPartition partition, EventOffsetContext offsetContext, final Cdcpb.Event.Row row) + throws Exception { + if (!context.isRunning()) { + LOG.info("sourceContext is not running, skip emit change event."); + return; + } + if (tableId == null) { + LOG.warn("No valid tableId found, skipping log message: {}", row); + return; + } + TableSchema tableSchema = taskContext.getDatabaseSchema().schemaFor(tableId); + if (tableSchema == null) { + LOG.warn("No table schema found, skipping log message: {}", row); + return; + } + offsetContext.event(tableSchema.id(), Instant.ofEpochMilli(row.getCommitTs())); + Set fieldIndex = fieldIndexConverter(tableInfo.getColumns(), tableSchema); + + Serializable[] before = null; + Serializable[] after = null; + final RowKey rowKey = RowKey.decode(row.getKey().toByteArray()); + final long handle = rowKey.getHandle(); + Envelope.Operation operation = getOperation(row); + switch (operation) { + case CREATE: + after = + (Serializable[]) + getSerializableObject( + handle, row.getValue(), tableInfo, fieldIndex); + break; + case UPDATE: + before = + (Serializable[]) + getSerializableObject( + handle, row.getOldValue(), tableInfo, fieldIndex); + after = + (Serializable[]) + getSerializableObject( + handle, row.getValue(), tableInfo, fieldIndex); + break; + case DELETE: + before = + (Serializable[]) + getSerializableObject( + handle, row.getOldValue(), tableInfo, fieldIndex); + + break; + default: + LOG.error("Row data opType is not supported,row:{}.", row); + } + eventDispatcher.dispatchDataChangeEvent( + partition, + tableSchema.id(), + new EventEmitter(partition, offsetContext, Clock.SYSTEM, operation, before, after)); + } + + private Object[] getSerializableObject( + long handle, final ByteString value, TiTableInfo tableInfo, Set fieldIndex) { + Object[] serializableObject = new Serializable[fieldIndex.size()]; + try { + if (value == null) { + return null; + } + + Object[] tiKVValueAfter; + if (value != null && !value.isEmpty()) { + tiKVValueAfter = decodeObjects(value.toByteArray(), handle, tableInfo); + } else { + return null; + } + for (int index : fieldIndex) { + serializableObject[index] = tiKVValueAfter[index]; + } + return serializableObject; + } catch (Exception e) { + LOG.error("decode object error", e); + return null; + } + } + + private Set fieldIndexConverter( + List tiColumnInfos, TableSchema tableSchema) { + Map fieldIndex = + fieldIndexMap.computeIfAbsent( + tableSchema, + schema -> + IntStream.range(0, schema.valueSchema().fields().size()) + .boxed() + .collect( + Collectors.toMap( + i -> + schema.valueSchema() + .fields() + .get(i) + .name(), + i -> i))); + Set fieldIndexSet = new HashSet<>(); + for (TiColumnInfo tiColumnInfo : tiColumnInfos) { + if (fieldIndex.keySet().stream() + .anyMatch(key -> key.equalsIgnoreCase(tiColumnInfo.getName()))) { + fieldIndexSet.add(tiColumnInfo.getOffset()); + } + } + return fieldIndexSet; + } + + private Envelope.Operation getOperation(final Cdcpb.Event.Row row) { + if (row.getOpType() == Cdcpb.Event.Row.OpType.PUT) { // create ,update + if (row.getValue() != null && row.getOldValue() != null) { + return Envelope.Operation.UPDATE; + } else { + return Envelope.Operation.CREATE; + } + } else if (row.getOpType() == Cdcpb.Event.Row.OpType.DELETE) { // delete + return Envelope.Operation.DELETE; + } else { + LOG.error("Row data opType is not supported,row:{}.", row); + return null; + } + } + + protected void flushRows(final long timestamp) throws Exception { + Preconditions.checkState(context != null, "sourceContext shouldn't be null"); + synchronized (context) { + while (!commits.isEmpty() && commits.firstKey().timestamp <= timestamp) { + final Cdcpb.Event.Row commitRow = commits.pollFirstEntry().getValue(); + final Cdcpb.Event.Row prewriteRow = + prewrites.remove(RowKeyWithTs.ofStart(commitRow)); + // if pull cdc event block when region split, cdc event will lose. + committedEvents.offer(prewriteRow); + } + } + } + + private void handleRow(final Cdcpb.Event.Row row) { + if (!TableKeyRangeUtils.isRecordKey(row.getKey().toByteArray())) { + // Don't handle index key for now + return; + } + LOG.debug("binlog record, type: {}, data: {}", row.getType(), row); + switch (row.getType()) { + case COMMITTED: + prewrites.put(RowKeyWithTs.ofStart(row), row); + commits.put(RowKeyWithTs.ofCommit(row), row); + break; + case COMMIT: + commits.put(RowKeyWithTs.ofCommit(row), row); + break; + case PREWRITE: + prewrites.put(RowKeyWithTs.ofStart(row), row); + break; + case ROLLBACK: + prewrites.remove(RowKeyWithTs.ofStart(row)); + break; + default: + LOG.warn("Unsupported row type:" + row.getType()); + } + } + + @Override + public boolean executeIteration( + ChangeEventSourceContext context, + TiDBPartition partition, + EventOffsetContext offsetContext) + throws InterruptedException { + return StreamingChangeEventSource.super.executeIteration(context, partition, offsetContext); + } + + @Override + public void commitOffset(Map offset) { + StreamingChangeEventSource.super.commitOffset(offset); + } + + // --------------------------------------- + // static Utils classes + // --------------------------------------- + private static class RowKeyWithTs implements Comparable { + private final long timestamp; + private final RowKey rowKey; + + private RowKeyWithTs(final long timestamp, final RowKey rowKey) { + this.timestamp = timestamp; + this.rowKey = rowKey; + } + + private RowKeyWithTs(final long timestamp, final byte[] key) { + this(timestamp, RowKey.decode(key)); + } + + @Override + public int compareTo(final RowKeyWithTs that) { + int res = Long.compare(this.timestamp, that.timestamp); + if (res == 0) { + res = Long.compare(this.rowKey.getTableId(), that.rowKey.getTableId()); + } + if (res == 0) { + res = Long.compare(this.rowKey.getHandle(), that.rowKey.getHandle()); + } + return res; + } + + @Override + public int hashCode() { + return Objects.hash(this.timestamp, this.rowKey.getTableId(), this.rowKey.getHandle()); + } + + @Override + public boolean equals(final Object thatObj) { + if (thatObj instanceof RowKeyWithTs) { + final RowKeyWithTs that = (RowKeyWithTs) thatObj; + return this.timestamp == that.timestamp && this.rowKey.equals(that.rowKey); + } + return false; + } + + static RowKeyWithTs ofStart(final Cdcpb.Event.Row row) { + return new RowKeyWithTs(row.getStartTs(), row.getKey().toByteArray()); + } + + static RowKeyWithTs ofCommit(final Cdcpb.Event.Row row) { + return new RowKeyWithTs(row.getCommitTs(), row.getKey().toByteArray()); + } + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/StartupMode.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/StoppableChangeEventSourceContext.java similarity index 62% rename from flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/StartupMode.java rename to flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/StoppableChangeEventSourceContext.java index 07a4284212b..1058b114f41 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/StartupMode.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/StoppableChangeEventSourceContext.java @@ -15,14 +15,22 @@ * limitations under the License. */ -package org.apache.flink.cdc.connectors.tidb.table; +package org.apache.flink.cdc.connectors.tidb.source.fetch; -/** - * Startup modes for the TiDB CDC Consumer. - * - * @see StartupOptions - */ -public enum StartupMode { - INITIAL, - LATEST_OFFSET, +import io.debezium.pipeline.source.spi.ChangeEventSource; + +/** A change event source context that can be stopped. */ +public class StoppableChangeEventSourceContext + implements ChangeEventSource.ChangeEventSourceContext { + + private volatile boolean isRunning = true; + + public void stopChangeEventSource() { + isRunning = false; + } + + @Override + public boolean isRunning() { + return isRunning; + } } diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBScanFetchTask.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBScanFetchTask.java new file mode 100644 index 00000000000..8b302e9305c --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBScanFetchTask.java @@ -0,0 +1,318 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.fetch; + +import org.apache.flink.cdc.connectors.base.relational.JdbcSourceEventDispatcher; +import org.apache.flink.cdc.connectors.base.source.meta.split.SnapshotSplit; +import org.apache.flink.cdc.connectors.base.source.meta.split.StreamSplit; +import org.apache.flink.cdc.connectors.base.source.meta.wartermark.WatermarkKind; +import org.apache.flink.cdc.connectors.base.source.reader.external.AbstractScanFetchTask; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; +import org.apache.flink.cdc.connectors.tidb.source.connection.TiDBConnection; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffset; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffsetContext; +import org.apache.flink.cdc.connectors.tidb.source.schema.TiDBDatabaseSchema; +import org.apache.flink.cdc.connectors.tidb.utils.TiDBUtils; + +import io.debezium.connector.tidb.TiDBPartition; +import io.debezium.pipeline.EventDispatcher; +import io.debezium.pipeline.source.AbstractSnapshotChangeEventSource; +import io.debezium.pipeline.source.spi.ChangeEventSource; +import io.debezium.pipeline.source.spi.SnapshotProgressListener; +import io.debezium.pipeline.spi.ChangeRecordEmitter; +import io.debezium.pipeline.spi.SnapshotResult; +import io.debezium.relational.RelationalSnapshotChangeEventSource; +import io.debezium.relational.SnapshotChangeRecordEmitter; +import io.debezium.relational.Table; +import io.debezium.relational.TableId; +import io.debezium.util.Clock; +import io.debezium.util.ColumnUtils; +import io.debezium.util.Strings; +import io.debezium.util.Threads; +import org.apache.kafka.connect.errors.ConnectException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.time.Duration; + +/** A wrapped task to fetch snapshot split of table. */ +public class TiDBScanFetchTask extends AbstractScanFetchTask { + private static final Logger LOG = LoggerFactory.getLogger(TiDBScanFetchTask.class); + + public TiDBScanFetchTask(SnapshotSplit split) { + super(split); + } + + @Override + protected void executeBackfillTask(Context context, StreamSplit backfillStreamSplit) + throws Exception { + + // just for test + TiDBSourceFetchTaskContext ctx = (TiDBSourceFetchTaskContext) context; + final EventOffset currentOffset = + EventOffset.of( + ((TiDBSourceFetchTaskContext) context).getOffsetContext().getOffset()); + JdbcSourceEventDispatcher dispatcher = ctx.getEventDispatcher(); + dispatcher.dispatchWatermarkEvent( + ctx.getPartition().getSourcePartition(), + backfillStreamSplit, + currentOffset, + WatermarkKind.END); + } + + /** + * Execute data snapshot task. + * + * @param context the task context + * @throws Exception exception + */ + @Override + protected void executeDataSnapshot(Context context) throws Exception { + TiDBSourceFetchTaskContext sourceFetchContext = (TiDBSourceFetchTaskContext) context; + TiDBSnapshotSplitReadTask tiDBSnapshotSplitReadTask = + new TiDBSnapshotSplitReadTask( + sourceFetchContext.getDbzConnectorConfig(), + sourceFetchContext.getOffsetContext(), + sourceFetchContext.getSnapshotChangeEventSourceMetrics(), + sourceFetchContext.getConnection(), + sourceFetchContext.getDatabaseSchema(), + sourceFetchContext.getEventDispatcher(), + snapshotSplit); + TiDBSnapshotSplitChangeEventSourceContext tiDBSnapshotSplitChangeEventSourceContext = + new TiDBSnapshotSplitChangeEventSourceContext(); + SnapshotResult snapshotResult = + tiDBSnapshotSplitReadTask.execute( + tiDBSnapshotSplitChangeEventSourceContext, + sourceFetchContext.getPartition(), + sourceFetchContext.getOffsetContext()); + + if (!snapshotResult.isCompletedOrSkipped()) { + taskRunning = false; + throw new IllegalStateException( + String.format("Read snapshot for tidb split %s fail", snapshotResult)); + } + } + + /** A wrapped task to fetch snapshot split of table. 负责从TiDB读取快照分片 */ + public static class TiDBSnapshotSplitReadTask + extends AbstractSnapshotChangeEventSource { + + private static final Logger LOG = LoggerFactory.getLogger(TiDBSnapshotSplitReadTask.class); + private static final Duration LOG_INTERVAL = Duration.ofMillis(10_000); + private final TiDBConnectorConfig connectorConfig; + private final TiDBDatabaseSchema databaseSchema; + + private final TiDBConnection jdbcConnection; + + private final JdbcSourceEventDispatcher dispatcher; + private final Clock clock; + + private final SnapshotSplit snapshotSplit; + + private final EventOffsetContext offsetContext; + private final SnapshotProgressListener snapshotProgressListener; + + public TiDBSnapshotSplitReadTask( + TiDBConnectorConfig connectorConfig, + EventOffsetContext previousOffset, + SnapshotProgressListener snapshotProgressListener, + TiDBConnection jdbcConnection, + TiDBDatabaseSchema databaseSchema, + JdbcSourceEventDispatcher dispatcher, + SnapshotSplit snapshotSplit) { + super(connectorConfig, snapshotProgressListener); + this.connectorConfig = connectorConfig; + this.databaseSchema = databaseSchema; + this.jdbcConnection = jdbcConnection; + this.dispatcher = dispatcher; + this.snapshotSplit = snapshotSplit; + this.offsetContext = previousOffset; + this.snapshotProgressListener = snapshotProgressListener; + this.clock = Clock.SYSTEM; + } + + @Override + public SnapshotResult execute( + ChangeEventSource.ChangeEventSourceContext context, + TiDBPartition partition, + EventOffsetContext previousOffset) + throws InterruptedException { + // todo 返回为null + AbstractSnapshotChangeEventSource.SnapshottingTask snapshottingTask = + getSnapshottingTask(partition, previousOffset); + final TiDBSnapshotContext ctx; + try { + ctx = prepare(partition); + } catch (Exception e) { + LOG.error("Failed to initialize snapshot context.", e); + throw new RuntimeException(e); + } + try { + return doExecute(context, previousOffset, ctx, snapshottingTask); + } catch (InterruptedException e) { + LOG.warn("Snapshot was interrupted before completion"); + throw e; + } catch (Exception e) { + LOG.warn("Snapshot was interrupted before completion"); + throw new RuntimeException(e); + } + } + + private static class TiDBSnapshotContext + extends RelationalSnapshotChangeEventSource.RelationalSnapshotContext< + TiDBPartition, EventOffsetContext> { + + public TiDBSnapshotContext(TiDBPartition partition) throws SQLException { + super(partition, ""); + } + } + + @Override + protected SnapshotResult doExecute( + ChangeEventSourceContext context, + EventOffsetContext previousOffset, + SnapshotContext snapshotContext, + SnapshottingTask snapshottingTask) // 没有调用这个参数 + throws Exception { + final TiDBSnapshotContext ctx = (TiDBSnapshotContext) snapshotContext; + ctx.offset = offsetContext; + createDataEvents(ctx, snapshotSplit.getTableId()); + + return SnapshotResult.completed(ctx.offset); + } + + private void createDataEvents(TiDBSnapshotContext snapshotContext, TableId tableId) + throws Exception { + EventDispatcher.SnapshotReceiver snapshotReceiver = + dispatcher.getSnapshotChangeEventReceiver(); + LOG.debug("Snapshotting table {}", tableId); + createDataEventsForTable( + snapshotContext, snapshotReceiver, databaseSchema.tableFor(tableId)); + snapshotReceiver.completeSnapshot(); + } + + private void createDataEventsForTable( + TiDBSnapshotContext snapshotContext, + EventDispatcher.SnapshotReceiver snapshotReceiver, + Table table) + throws InterruptedException { + + long exportStart = clock.currentTimeInMillis(); + LOG.info( + "Exporting data from split '{}' of table {}", + snapshotSplit.splitId(), + table.id()); + + final String selectSql = + TiDBUtils.buildSplitScanQuery( + snapshotSplit.getTableId(), + snapshotSplit.getSplitKeyType(), + snapshotSplit.getSplitStart() == null, + snapshotSplit.getSplitEnd() == null); + LOG.info( + "For split '{}' of table {} using select statement: '{}'", + snapshotSplit.splitId(), + table.id(), + selectSql); + + try (PreparedStatement selectStatement = + TiDBUtils.readTableSplitDataStatement( + jdbcConnection, + selectSql, + snapshotSplit.getSplitStart() == null, + snapshotSplit.getSplitEnd() == null, + snapshotSplit.getSplitStart(), + snapshotSplit.getSplitEnd(), + snapshotSplit.getSplitKeyType().getFieldCount(), + connectorConfig.getQueryFetchSize()); + ResultSet rs = selectStatement.executeQuery()) { + ColumnUtils.ColumnArray columnArray = ColumnUtils.toArray(rs, table); + long rows = 0; + Threads.Timer logTimer = getTableScanLogTimer(); + + while (rs.next()) { + rows++; + final Object[] row = + jdbcConnection.rowToArray(table, databaseSchema, rs, columnArray); + if (logTimer.expired()) { + long stop = clock.currentTimeInMillis(); + LOG.info( + "Exported {} records for split '{}' after {}", + rows, + snapshotSplit.splitId(), + Strings.duration(stop - exportStart)); + snapshotProgressListener.rowsScanned( + snapshotContext.partition, table.id(), rows); + logTimer = getTableScanLogTimer(); + } + dispatcher.dispatchSnapshotEvent( + snapshotContext.partition, + table.id(), + getChangeRecordEmitter(snapshotContext, table.id(), row), + snapshotReceiver); + } + LOG.info( + "Finished exporting {} records for split '{}', total duration '{}'", + rows, + snapshotSplit.splitId(), + Strings.duration(clock.currentTimeInMillis() - exportStart)); + } catch (SQLException e) { + throw new ConnectException("Snapshotting of table " + table.id() + " failed", e); + } + } + + protected ChangeRecordEmitter getChangeRecordEmitter( + TiDBSnapshotContext snapshotContext, TableId tableId, Object[] row) { + snapshotContext.offset.event(tableId, clock.currentTime()); + return new SnapshotChangeRecordEmitter<>( + snapshotContext.partition, snapshotContext.offset, row, clock); + } + + private Threads.Timer getTableScanLogTimer() { + return Threads.timer(clock, LOG_INTERVAL); + } + + @Override + protected SnapshottingTask getSnapshottingTask( + TiDBPartition partition, EventOffsetContext previousOffset) { + return new SnapshottingTask(false, true); + } + + @Override + protected TiDBSnapshotContext prepare(TiDBPartition partition) throws Exception { + return new TiDBSnapshotContext(partition); + } + } + + /** Context for snapshotting. */ + public class TiDBSnapshotSplitChangeEventSourceContext + implements ChangeEventSource.ChangeEventSourceContext { + + public void finished() { + taskRunning = false; + } + + @Override + public boolean isRunning() { + return taskRunning; + } + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBSourceFetchTaskContext.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBSourceFetchTaskContext.java new file mode 100644 index 00000000000..ed77366d5e1 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBSourceFetchTaskContext.java @@ -0,0 +1,239 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.fetch; + +import org.apache.flink.cdc.connectors.base.WatermarkDispatcher; +import org.apache.flink.cdc.connectors.base.config.JdbcSourceConfig; +import org.apache.flink.cdc.connectors.base.dialect.JdbcDataSourceDialect; +import org.apache.flink.cdc.connectors.base.relational.JdbcSourceEventDispatcher; +import org.apache.flink.cdc.connectors.base.source.meta.offset.Offset; +import org.apache.flink.cdc.connectors.base.source.meta.split.SourceSplitBase; +import org.apache.flink.cdc.connectors.base.source.reader.external.JdbcSourceFetchTaskContext; +import org.apache.flink.cdc.connectors.base.utils.SourceRecordUtils; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; +import org.apache.flink.cdc.connectors.tidb.source.connection.TiDBConnection; +import org.apache.flink.cdc.connectors.tidb.source.handler.TiDBErrorHandler; +import org.apache.flink.cdc.connectors.tidb.source.handler.TiDBSchemaChangeEventHandler; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffset; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffsetContext; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffsetFactory; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffsetUtils; +import org.apache.flink.cdc.connectors.tidb.source.schema.TiDBDatabaseSchema; +import org.apache.flink.cdc.connectors.tidb.utils.TiDBUtils; +import org.apache.flink.table.types.logical.RowType; + +import io.debezium.connector.base.ChangeEventQueue; +import io.debezium.connector.tidb.TiDBEventMetadataProvider; +import io.debezium.connector.tidb.TiDBPartition; +import io.debezium.connector.tidb.TiDBTaskContext; +import io.debezium.pipeline.DataChangeEvent; +import io.debezium.pipeline.ErrorHandler; +import io.debezium.pipeline.metrics.DefaultChangeEventSourceMetricsFactory; +import io.debezium.pipeline.metrics.SnapshotChangeEventSourceMetrics; +import io.debezium.pipeline.metrics.spi.ChangeEventSourceMetricsFactory; +import io.debezium.pipeline.source.spi.EventMetadataProvider; +import io.debezium.relational.Table; +import io.debezium.relational.TableId; +import io.debezium.relational.Tables; +import io.debezium.schema.TopicSelector; +import org.apache.kafka.connect.source.SourceRecord; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** TiDB source fetch task context. */ +public class TiDBSourceFetchTaskContext extends JdbcSourceFetchTaskContext { + + private static final Logger LOG = LoggerFactory.getLogger(TiDBSourceFetchTaskContext.class); + + private TiDBTaskContext tidbTaskContext; + + private final TiDBConnection connection; + private TiDBDatabaseSchema tiDBDatabaseSchema; + private EventOffsetContext offsetContext; + private SnapshotChangeEventSourceMetrics snapshotChangeEventSourceMetrics; + private TopicSelector topicSelector; + private JdbcSourceEventDispatcher dispatcher; + private TiDBPartition tiDBPartition; + private ChangeEventQueue queue; + private ErrorHandler errorHandler; + private EventMetadataProvider metadataProvider; + + public TiDBSourceFetchTaskContext( + JdbcSourceConfig sourceConfig, + JdbcDataSourceDialect dataSourceDialect, + TiDBConnection connection) { + super(sourceConfig, dataSourceDialect); + this.connection = connection; + this.metadataProvider = new TiDBEventMetadataProvider(); + } + + @Override + public void configure(SourceSplitBase sourceSplitBase) { + final TiDBConnectorConfig connectorConfig = getDbzConnectorConfig(); + final boolean tableIdCaseInsensitive = + dataSourceDialect.isDataCollectionIdCaseSensitive(sourceConfig); + TopicSelector topicSelector = + TopicSelector.defaultSelector( + connectorConfig, + (tableId, prefix, delimiter) -> + String.join(delimiter, prefix, tableId.identifier())); + try { + this.tiDBDatabaseSchema = + TiDBUtils.newSchema( + connection, connectorConfig, topicSelector, tableIdCaseInsensitive); + } catch (Exception e) { + throw new RuntimeException("Failed to initialize TiDBSchema", e); + } + + this.tiDBPartition = new TiDBPartition(connectorConfig.getLogicalName()); + this.tidbTaskContext = new TiDBTaskContext(connectorConfig, tiDBDatabaseSchema); + this.offsetContext = + loadStartingOffsetState( + new EventOffsetContext.Loader(connectorConfig), sourceSplitBase); + this.queue = + new ChangeEventQueue.Builder() + .pollInterval(connectorConfig.getPollInterval()) + .maxBatchSize(connectorConfig.getMaxBatchSize()) + .maxQueueSize(connectorConfig.getMaxQueueSize()) + .maxQueueSizeInBytes(connectorConfig.getMaxQueueSizeInBytes()) + .loggingContextSupplier( + () -> + tidbTaskContext.configureLoggingContext( + "tidb-cdc-connector-task")) + // do not buffer any element, we use signal event + // .buffering() + .build(); + this.errorHandler = + new TiDBErrorHandler( + (TiDBConnectorConfig) sourceConfig.getDbzConnectorConfig(), queue); + this.dispatcher = + new JdbcSourceEventDispatcher<>( + connectorConfig, + topicSelector, + tiDBDatabaseSchema, + queue, + connectorConfig.getTableFilters().dataCollectionFilter(), + DataChangeEvent::new, + metadataProvider, + schemaNameAdjuster, + new TiDBSchemaChangeEventHandler()); + + ChangeEventSourceMetricsFactory metricsFactory = + new DefaultChangeEventSourceMetricsFactory<>(); + this.snapshotChangeEventSourceMetrics = + metricsFactory.getSnapshotMetrics(tidbTaskContext, queue, metadataProvider); + } + + public TiDBConnection getConnection() { + return connection; + } + + @Override + public ChangeEventQueue getQueue() { + return queue; + } + + @Override + public Tables.TableFilter getTableFilter() { + return this.sourceConfig.getTableFilters().dataCollectionFilter(); + } + + @Override + public Offset getStreamOffset(SourceRecord record) { + return new EventOffset(record.sourceOffset()); + } + + @Override + public void close() throws Exception { + this.connection.close(); + } + + @Override + public TiDBDatabaseSchema getDatabaseSchema() { + return tiDBDatabaseSchema; + } + + @Override + public boolean isRecordBetween(SourceRecord record, Object[] splitStart, Object[] splitEnd) { + if (this.offsetContext.isSnapshotRunning()) { + RowType splitKeyType = + getSplitType(getDatabaseSchema().tableFor(this.getTableId(record))); + Object[] key = + SourceRecordUtils.getSplitKey(splitKeyType, record, getSchemaNameAdjuster()); + return SourceRecordUtils.splitKeyRangeContains(key, splitStart, splitEnd); + } else { + EventOffset newOffset = new EventOffset(record.sourceOffset()); + return SourceRecordUtils.splitKeyRangeContains( + new EventOffset[] {newOffset}, splitStart, splitEnd); + } + } + + @Override + public RowType getSplitType(Table table) { + return TiDBUtils.getSplitType(table); + } + + @Override + public ErrorHandler getErrorHandler() { + return errorHandler; + } + + @Override + public JdbcSourceEventDispatcher getEventDispatcher() { + return dispatcher; + } + + @Override + public WatermarkDispatcher getWaterMarkDispatcher() { + return dispatcher; + } + + @Override + public EventOffsetContext getOffsetContext() { + return offsetContext; + } + + @Override + public TiDBPartition getPartition() { + return tiDBPartition; + } + + @Override + public TiDBConnectorConfig getDbzConnectorConfig() { + return (TiDBConnectorConfig) super.getDbzConnectorConfig(); + } + + public SnapshotChangeEventSourceMetrics getSnapshotChangeEventSourceMetrics() { + return snapshotChangeEventSourceMetrics; + } + + private EventOffsetContext loadStartingOffsetState( + EventOffsetContext.Loader loader, SourceSplitBase sourceSplitBase) { + Offset offset = + sourceSplitBase.isSnapshotSplit() + ? new EventOffsetFactory() + .createInitialOffset() // get an offset for starting snapshot + : sourceSplitBase.asStreamSplit().getStartingOffset(); + + return EventOffsetUtils.getEventOffsetContext(loader, offset); + } + + public TiDBSourceFetchTaskContext getTaskContext() { + return this; + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBStreamFetchTask.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBStreamFetchTask.java new file mode 100644 index 00000000000..4c7169c4619 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBStreamFetchTask.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.fetch; + +import org.apache.flink.cdc.connectors.base.source.meta.offset.Offset; +import org.apache.flink.cdc.connectors.base.source.meta.split.SourceSplitBase; +import org.apache.flink.cdc.connectors.base.source.meta.split.StreamSplit; +import org.apache.flink.cdc.connectors.base.source.reader.external.FetchTask; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nullable; + +/** TiDBStreamFetchTask. */ +public class TiDBStreamFetchTask implements FetchTask { + private static final Logger LOG = LoggerFactory.getLogger(TiDBStreamFetchTask.class); + private final StreamSplit split; + private volatile boolean taskRunning = false; + private volatile boolean stopped = false; + EventSourceReader eventSourceReader; + + public TiDBStreamFetchTask(StreamSplit split) { + this.split = split; + } + + @Override + public void execute(Context context) throws Exception { + if (stopped) { + LOG.debug( + "StreamFetchTask for split: {} is already stopped and can not be executed", + split); + return; + } else { + LOG.debug("execute StreamFetchTask for split: {}", split); + } + taskRunning = true; + TiDBSourceFetchTaskContext sourceFetchContext = (TiDBSourceFetchTaskContext) context; + sourceFetchContext.getOffsetContext().preSnapshotCompletion(); + + eventSourceReader = + new EventSourceReader( + sourceFetchContext.getDbzConnectorConfig(), + sourceFetchContext.getEventDispatcher(), + sourceFetchContext.getErrorHandler(), + sourceFetchContext.getTaskContext(), + split); + eventSourceReader.init(); + StoppableChangeEventSourceContext changeEventSourceContext = + new StoppableChangeEventSourceContext(); + eventSourceReader.execute( + changeEventSourceContext, + sourceFetchContext.getPartition(), + sourceFetchContext.getOffsetContext()); + } + + public void commitCurrentOffset(@Nullable Offset offsetToCommit) { + // todo + } + + @Override + public boolean isRunning() { + return taskRunning; + } + + @Override + public SourceSplitBase getSplit() { + return split; + } + + @Override + public void close() { + LOG.debug("stopping StreamFetchTask for split: {}", split); + if (eventSourceReader != null) { + ((StoppableChangeEventSourceContext) (eventSourceReader.context)) + .stopChangeEventSource(); + } + stopped = false; + taskRunning = false; + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/handler/TiDBErrorHandler.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/handler/TiDBErrorHandler.java new file mode 100644 index 00000000000..18840340e1e --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/handler/TiDBErrorHandler.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.handler; + +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; + +import com.github.shyiko.mysql.binlog.network.ServerException; +import io.debezium.DebeziumException; +import io.debezium.connector.base.ChangeEventQueue; +import io.debezium.connector.mysql.MySqlConnector; +import io.debezium.pipeline.ErrorHandler; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.EOFException; +import java.sql.SQLException; + +/** TiDBErrorHandler. */ +public class TiDBErrorHandler extends ErrorHandler { + private static final Logger LOG = LoggerFactory.getLogger(TiDBErrorHandler.class); + private static final String SQL_CODE_TOO_MANY_CONNECTIONS = "08004"; + + public TiDBErrorHandler(TiDBConnectorConfig connectorConfig, ChangeEventQueue queue) { + super(MySqlConnector.class, connectorConfig, queue); + } + + protected boolean isRetriable(Throwable throwable) { + LOG.info("start tidb errorHandler : {}", throwable.getClass()); + if (throwable instanceof SQLException) { + final SQLException sql = (SQLException) throwable; + return SQL_CODE_TOO_MANY_CONNECTIONS.equals(sql.getSQLState()); + } else if (throwable instanceof ServerException) { + final ServerException sql = (ServerException) throwable; + return SQL_CODE_TOO_MANY_CONNECTIONS.equals(sql.getSqlState()); + } else if (throwable instanceof EOFException) { + // Retry with reading binlog error + return throwable.getMessage().contains("Failed to read next byte from position"); + } else if (throwable instanceof DebeziumException && throwable.getCause() != null) { + return isRetriable(throwable.getCause()); + } + return false; + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiKVDeserializationRuntimeConverter.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/handler/TiDBSchemaChangeEventHandler.java similarity index 62% rename from flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiKVDeserializationRuntimeConverter.java rename to flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/handler/TiDBSchemaChangeEventHandler.java index b688c824854..d3f11577dcd 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiKVDeserializationRuntimeConverter.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/handler/TiDBSchemaChangeEventHandler.java @@ -15,18 +15,20 @@ * limitations under the License. */ -package org.apache.flink.cdc.connectors.tidb.table; +package org.apache.flink.cdc.connectors.tidb.source.handler; -import org.tikv.common.meta.TiTableInfo; -import org.tikv.common.types.DataType; +import org.apache.flink.cdc.connectors.base.relational.handler.SchemaChangeEventHandler; -import java.io.Serializable; +import io.debezium.schema.SchemaChangeEvent; -/** - * Runtime converter that converts objects of TiKV into objects of Flink Table & SQL internal data - * structures. - */ -@FunctionalInterface -public interface TiKVDeserializationRuntimeConverter extends Serializable { - Object convert(Object object, TiTableInfo tableInfo, DataType dataType) throws Exception; +import java.util.HashMap; +import java.util.Map; + +/** TiDB schema change event handler. */ +public class TiDBSchemaChangeEventHandler implements SchemaChangeEventHandler { + + @Override + public Map parseSource(SchemaChangeEvent event) { + return new HashMap<>(); + } } diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffset.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffset.java new file mode 100644 index 00000000000..2181d09a565 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffset.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.offset; + +import org.apache.flink.cdc.connectors.base.source.meta.offset.Offset; + +import org.tikv.common.meta.TiTimestamp; + +import javax.annotation.Nonnull; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** The offset of TiDB binlog. */ +public class EventOffset extends Offset { + public static final String TIMESTAMP_KEY = "timestamp"; + // TimeStamp Oracle from pd + public static final String COMMIT_VERSION_KEY = "commit_version"; + + public static final EventOffset INITIAL_OFFSET = + new EventOffset(Collections.singletonMap(TIMESTAMP_KEY, "0")); + public static final EventOffset NO_STOPPING_OFFSET = new EventOffset(Long.MAX_VALUE); + + public EventOffset(Map offset) { + Map offsetMap = new HashMap<>(); + for (Map.Entry entry : offset.entrySet()) { + offsetMap.put( + entry.getKey(), entry.getValue() == null ? null : entry.getValue().toString()); + } + this.offset = offsetMap; + } + + public EventOffset(@Nonnull String timestamp, String commitVersion) { + Map offsetMap = new HashMap<>(); + offsetMap.put(TIMESTAMP_KEY, timestamp); + if (commitVersion != null) { + offsetMap.put(COMMIT_VERSION_KEY, commitVersion); + } + this.offset = offsetMap; + } + + public EventOffset(long binlogEpochMill) { + Map offsetMap = new HashMap<>(); + offsetMap.put(TIMESTAMP_KEY, String.valueOf(binlogEpochMill)); + offsetMap.put( + COMMIT_VERSION_KEY, + String.valueOf(new TiTimestamp(binlogEpochMill, 0).getVersion())); + this.offset = offsetMap; + } + + public String getTimestamp() { + return offset.get(TIMESTAMP_KEY); + } + + public String getCommitVersion() { + if (offset.get(COMMIT_VERSION_KEY) == null) { + String timestamp = getTimestamp(); + // timestamp to commit version. + return String.valueOf(new TiTimestamp(Long.parseLong(timestamp), 0).getVersion()); + } + return offset.get(COMMIT_VERSION_KEY); + } + + @Override + public int compareTo(@Nonnull Offset o) { + EventOffset that = (EventOffset) o; + + int flag; + flag = compareLong(getTimestamp(), that.getTimestamp()); + if (flag != 0) { + return flag; + } + return compareLong(getCommitVersion(), that.getCommitVersion()); + } + + private int compareLong(String a, String b) { + if (a == null && b == null) { + return 0; + } + if (a == null) { + return -1; + } + if (b == null) { + return 1; + } + return Long.compare(Long.parseLong(a), Long.parseLong(b)); + } + + public static EventOffset of(Map offsetMap) { + Map offsetStrMap = new HashMap<>(); + for (Map.Entry entry : offsetMap.entrySet()) { + offsetStrMap.put( + entry.getKey(), entry.getValue() == null ? null : entry.getValue().toString()); + } + return new EventOffset(offsetStrMap); + } + + public static long getStartTs(Offset offset) { + if (offset.getOffset().get(COMMIT_VERSION_KEY) != null) { + return Long.parseLong(offset.getOffset().get(COMMIT_VERSION_KEY)); + } else { + return new TiTimestamp(Long.parseLong(offset.getOffset().get(TIMESTAMP_KEY)), 0) + .getVersion(); + } + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetContext.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetContext.java new file mode 100644 index 00000000000..1338bc9fcc1 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetContext.java @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.offset; + +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; + +import io.debezium.connector.AbstractSourceInfo; +import io.debezium.connector.SnapshotRecord; +import io.debezium.connector.mysql.MySqlReadOnlyIncrementalSnapshotContext; +import io.debezium.pipeline.source.snapshot.incremental.IncrementalSnapshotContext; +import io.debezium.pipeline.source.snapshot.incremental.SignalBasedIncrementalSnapshotContext; +import io.debezium.pipeline.spi.OffsetContext; +import io.debezium.pipeline.txmetadata.TransactionContext; +import io.debezium.relational.TableId; +import io.debezium.schema.DataCollectionId; +import org.apache.kafka.connect.data.Schema; +import org.apache.kafka.connect.data.Struct; +import org.tikv.common.meta.TiTimestamp; + +import java.time.Instant; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +import static org.apache.flink.cdc.connectors.tidb.source.offset.EventOffset.COMMIT_VERSION_KEY; +import static org.apache.flink.cdc.connectors.tidb.source.offset.EventOffset.TIMESTAMP_KEY; + +/** The offset context for TiDB connector. */ +public class EventOffsetContext implements OffsetContext { + private static final String SNAPSHOT_COMPLETED_KEY = "snapshot_completed"; + + private final Schema sourceInfoSchema; + private final TiDBSourceInfo sourceInfo; + private final TransactionContext transactionContext; + private final IncrementalSnapshotContext incrementalSnapshotContext; + private boolean snapshotCompleted; + private String commitVersion; + private String timestamp; + + public EventOffsetContext( + boolean snapshot, + boolean snapshotCompleted, + TransactionContext transactionContext, + IncrementalSnapshotContext incrementalSnapshotContext, + TiDBSourceInfo sourceInfo) { + this.sourceInfo = sourceInfo; + this.sourceInfoSchema = sourceInfo.schema(); + this.snapshotCompleted = snapshotCompleted; + + this.transactionContext = transactionContext; + this.incrementalSnapshotContext = incrementalSnapshotContext; + + if (this.snapshotCompleted) { + postSnapshotCompletion(); + } else { + sourceInfo.setSnapshot(snapshot ? SnapshotRecord.TRUE : SnapshotRecord.FALSE); + } + } + + public static EventOffsetContext initial(TiDBConnectorConfig config) { + return new EventOffsetContext( + false, + false, + new TransactionContext(), + new SignalBasedIncrementalSnapshotContext<>(), + new TiDBSourceInfo(config)); + } + + @Override + public Map getOffset() { + HashMap offset = new HashMap<>(); + if (timestamp != null) { + offset.put(TIMESTAMP_KEY, timestamp); + } + + if (commitVersion != null) { + offset.put(COMMIT_VERSION_KEY, commitVersion); + } + if (sourceInfo.isSnapshot()) { + if (!snapshotCompleted) { + offset.put(AbstractSourceInfo.SNAPSHOT_KEY, true); + } + return offset; + } else { + return incrementalSnapshotContext.store(transactionContext.store(offset)); + } + } + + public void databaseEvent(String database, Instant timestamp) { + sourceInfo.setSourceTime(timestamp); + sourceInfo.databaseEvent(database); + sourceInfo.tableEvent((TableId) null); + } + + public void tableEvent(String database, Set tableIds, Instant timestamp) { + sourceInfo.setSourceTime(timestamp); + sourceInfo.databaseEvent(database); + sourceInfo.tableEvent(tableIds); + } + + @Override + public Schema getSourceInfoSchema() { + return sourceInfoSchema.schema(); + } + + @Override + public Struct getSourceInfo() { + return sourceInfo.struct(); + } + + @Override + public boolean isSnapshotRunning() { + return sourceInfo.isSnapshot() && !snapshotCompleted; + } + + @Override + public void markLastSnapshotRecord() { + sourceInfo.setSnapshot(SnapshotRecord.LAST); + } + + @Override + public void preSnapshotStart() { + sourceInfo.setSnapshot(SnapshotRecord.TRUE); + snapshotCompleted = false; + } + + @Override + public void preSnapshotCompletion() { + snapshotCompleted = true; + } + + @Override + public void postSnapshotCompletion() { + snapshotCompleted = true; + } + + @Override + public void event(DataCollectionId collectionId, Instant timestamp) { + sourceInfo.setSourceTime(timestamp); + sourceInfo.tableEvent((TableId) collectionId); + } + + @Override + public TransactionContext getTransactionContext() { + return transactionContext; + } + + public void setCheckpoint(Instant timestamp, String commitVersion) { + this.timestamp = String.valueOf(timestamp.toEpochMilli()); + if (commitVersion == null) { + commitVersion = + String.valueOf(new TiTimestamp(timestamp.toEpochMilli(), 0).getVersion()); + } + this.commitVersion = commitVersion; + } + + /** The loader for TiDB offset context. */ + public static class Loader implements OffsetContext.Loader { + + private final TiDBConnectorConfig connectorConfig; + + public Loader(TiDBConnectorConfig connectorConfig) { + this.connectorConfig = connectorConfig; + } + + @SuppressWarnings("unchecked") + @Override + public EventOffsetContext load(Map offset) { + boolean snapshot = + Boolean.TRUE.equals(offset.get(TiDBSourceInfo.SNAPSHOT_KEY)) + || "true".equals(offset.get(TiDBSourceInfo.SNAPSHOT_KEY)); + boolean snapshotCompleted = + Boolean.TRUE.equals(offset.get(SNAPSHOT_COMPLETED_KEY)) + || "true".equals(offset.get(SNAPSHOT_COMPLETED_KEY)); + IncrementalSnapshotContext incrementalSnapshotContext; + if (connectorConfig.isReadOnlyConnection()) { + incrementalSnapshotContext = MySqlReadOnlyIncrementalSnapshotContext.load(offset); + } else { + incrementalSnapshotContext = SignalBasedIncrementalSnapshotContext.load(offset); + } + final EventOffsetContext offsetContext = + new EventOffsetContext( + snapshot, + snapshotCompleted, + TransactionContext.load(offset), + incrementalSnapshotContext, + new TiDBSourceInfo(connectorConfig)); + String timestamp = (String) offset.get(TIMESTAMP_KEY); + offsetContext.setCheckpoint( + timestamp == null + ? Instant.now() + : Instant.ofEpochMilli(Long.parseLong(timestamp)), + (String) offset.get(COMMIT_VERSION_KEY)); + return offsetContext; + } + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetFactory.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetFactory.java new file mode 100644 index 00000000000..6d36cb174f4 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetFactory.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.offset; + +import org.apache.flink.cdc.connectors.base.source.meta.offset.Offset; +import org.apache.flink.cdc.connectors.base.source.meta.offset.OffsetFactory; + +import java.util.Map; + +import static org.apache.flink.cdc.connectors.tidb.source.offset.EventOffset.NO_STOPPING_OFFSET; + +/** The factory class for {@link EventOffset}. */ +public class EventOffsetFactory extends OffsetFactory { + + @Override + public Offset newOffset(Map offset) { + return new EventOffset(offset); + } + + @Override + public Offset newOffset(String filename, Long position) { + throw new UnsupportedOperationException(); + } + + @Override + public Offset newOffset(Long position) { + return new EventOffset(position); + } + + @Override + public Offset createTimestampOffset(long timestampMillis) { + return new EventOffset(timestampMillis); + } + + @Override + public Offset createInitialOffset() { + return EventOffset.INITIAL_OFFSET; + } + + @Override + public Offset createNoStoppingOffset() { + return NO_STOPPING_OFFSET; + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetUtils.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetUtils.java new file mode 100644 index 00000000000..9f46a6845b3 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/EventOffsetUtils.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.offset; + +import org.apache.flink.cdc.connectors.base.source.meta.offset.Offset; + +import io.debezium.pipeline.spi.OffsetContext; + +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +/** Utils for EventOffset. */ +public class EventOffsetUtils { + public static EventOffsetContext getEventOffsetContext( + OffsetContext.Loader loader, Offset offset) { + Map offsetStrMap = + Objects.requireNonNull(offset, "offset is null for the sourceSplitBase") + .getOffset(); + // all the keys happen to be long type for PostgresOffsetContext.Loader.load + Map offsetMap = new HashMap<>(); + for (String key : offsetStrMap.keySet()) { + String value = offsetStrMap.get(key); + if (value != null) { + offsetMap.put(key, value); + } + } + return (EventOffsetContext) loader.load(offsetMap); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/TiDBSourceInfo.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/TiDBSourceInfo.java new file mode 100644 index 00000000000..7d5a9ba31db --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/TiDBSourceInfo.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.offset; + +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; + +import io.debezium.connector.common.BaseSourceInfo; +import io.debezium.relational.TableId; + +import java.time.Instant; +import java.util.Collections; +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; + +/** The source info of TiDB. */ +public class TiDBSourceInfo extends BaseSourceInfo { + public static final String COMMIT_VERSION_KEY = "commitVersion"; + private Long commitVersion = -1L; + private Instant sourceTime; + private Set tableIds; + private String databaseName; + + public TiDBSourceInfo(TiDBConnectorConfig config) { + super(config); + this.tableIds = new HashSet<>(); + } + + @Override + protected Instant timestamp() { + return sourceTime; + } + + public void setSourceTime(Instant sourceTime) { + this.sourceTime = sourceTime; + } + + public void databaseEvent(String databaseName) { + this.databaseName = databaseName; + } + + public void tableEvent(Set tableIds) { + this.tableIds = new HashSet<>(tableIds); + } + + public void tableEvent(TableId tableId) { + this.tableIds = Collections.singleton(tableId); + } + + @Override + protected String database() { + if (tableIds == null || tableIds.isEmpty()) { + return databaseName; + } + final TableId tableId = tableIds.iterator().next(); + if (tableId == null) { + return databaseName; + } + return tableId.catalog(); + } + + public Long getCommitVersion() { + return commitVersion; + } + + public void setCommitVersion(long commitVersion) { + this.commitVersion = commitVersion; + } + + public String table() { + return (tableIds == null || tableIds.isEmpty()) + ? null + : tableIds.stream() + .filter(Objects::nonNull) + .map(TableId::table) + .collect(Collectors.joining(",")); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/TiDBSourceInfoStructMaker.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/TiDBSourceInfoStructMaker.java new file mode 100644 index 00000000000..7a8df65ab5e --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/offset/TiDBSourceInfoStructMaker.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.offset; + +import io.debezium.connector.SourceInfoStructMaker; +import org.apache.kafka.connect.data.Schema; +import org.apache.kafka.connect.data.SchemaBuilder; +import org.apache.kafka.connect.data.Struct; + +import java.time.Instant; + +/** TiDBSourceInfoStructMaker. */ +public class TiDBSourceInfoStructMaker implements SourceInfoStructMaker { + private final Schema schema; + + public TiDBSourceInfoStructMaker() { + this.schema = + SchemaBuilder.struct() + .field(TiDBSourceInfo.TABLE_NAME_KEY, Schema.STRING_SCHEMA) + .field(TiDBSourceInfo.TIMESTAMP_KEY, Schema.INT64_SCHEMA) + .field(TiDBSourceInfo.DATABASE_NAME_KEY, Schema.OPTIONAL_STRING_SCHEMA) + .field(TiDBSourceInfo.SCHEMA_NAME_KEY, Schema.OPTIONAL_STRING_SCHEMA) + .field(TiDBSourceInfo.COMMIT_VERSION_KEY, Schema.INT64_SCHEMA) + .build(); + } + + @Override + public Schema schema() { + return schema; + } + + @Override + public Struct struct(TiDBSourceInfo sourceInfo) { + Struct source = new Struct(schema); + source.put(TiDBSourceInfo.TABLE_NAME_KEY, sourceInfo.table()); + Instant timestamp = sourceInfo.timestamp(); + long commitVersion = sourceInfo.getCommitVersion(); + source.put(TiDBSourceInfo.TIMESTAMP_KEY, timestamp != null ? timestamp.toEpochMilli() : 0); + // todo timestamp to commit version. + source.put(TiDBSourceInfo.COMMIT_VERSION_KEY, commitVersion); + if (sourceInfo.database() != null) { + source.put(TiDBSourceInfo.DATABASE_NAME_KEY, sourceInfo.database()); + } + return source; + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBDatabaseSchema.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBDatabaseSchema.java new file mode 100644 index 00000000000..c3157801b1a --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBDatabaseSchema.java @@ -0,0 +1,310 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.schema; + +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; +import org.apache.flink.cdc.connectors.tidb.source.connection.TiDBConnection; +import org.apache.flink.cdc.connectors.tidb.source.converter.TiDBDefaultValueConverter; +import org.apache.flink.cdc.connectors.tidb.source.converter.TiDBValueConverters; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffsetContext; + +import io.debezium.connector.tidb.TiDBAntlrDdlParser; +import io.debezium.connector.tidb.TiDBPartition; +import io.debezium.relational.RelationalDatabaseSchema; +import io.debezium.relational.RelationalTableFilters; +import io.debezium.relational.Table; +import io.debezium.relational.TableId; +import io.debezium.relational.TableSchemaBuilder; +import io.debezium.relational.ddl.DdlChanges; +import io.debezium.relational.ddl.DdlParser; +import io.debezium.relational.ddl.DdlParserListener; +import io.debezium.schema.SchemaChangeEvent; +import io.debezium.schema.TopicSelector; +import io.debezium.text.MultipleParsingExceptions; +import io.debezium.text.ParsingException; +import io.debezium.util.Collect; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.SQLException; +import java.time.Instant; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** OceanBase database schema. */ +public class TiDBDatabaseSchema extends RelationalDatabaseSchema { + + private static final Logger LOGGER = LoggerFactory.getLogger(TiDBDatabaseSchema.class); + private final Set ignoredQueryStatements = + Collect.unmodifiableSet("BEGIN", "END", "FLUSH PRIVILEGES"); + private final RelationalTableFilters filters; + private final DdlParser ddlParser; + private final DdlChanges ddlChanges; + + public TiDBDatabaseSchema( + TiDBConnectorConfig config, + TiDBValueConverters tiDBValueConverters, + TopicSelector topicSelector, + boolean tableIdCaseInsensitive) { + super( + config, + topicSelector, + config.getTableFilters().dataCollectionFilter(), + config.getColumnFilter(), + new TableSchemaBuilder( + tiDBValueConverters, + new TiDBDefaultValueConverter(tiDBValueConverters), + config.schemaNameAdjustmentMode().createAdjuster(), + config.customConverterRegistry(), + config.getSourceInfoStructMaker().schema(), + config.getSanitizeFieldNames(), + false), + tableIdCaseInsensitive, + config.getKeyMapper()); + + // todo change + this.ddlParser = + new TiDBAntlrDdlParser( + true, + false, + config.isSchemaCommentsHistoryEnabled(), + tiDBValueConverters, + getTableFilter()); + filters = config.getTableFilters(); + this.ddlChanges = this.ddlParser.getDdlChanges(); + } + + public TiDBDatabaseSchema refresh( + TiDBConnection connection, TiDBConnectorConfig config, boolean printReplicaIdentityInfo) + throws SQLException { + // read all the information from the DB + // connection.readSchema(tables(), null, null, getTableFilter(), null, true); + // LOGGER.info("TiDBDatabaseSchema refresh **********"); + connection.readTiDBSchema(config, this, tables(), null, null, getTableFilter(), null, true); + + // if (printReplicaIdentityInfo) { + // // print out all the replica identity info + // tableIds().forEach(tableId -> printReplicaIdentityInfo(connection, tableId)); + // } + // and then refresh the schemas + refreshSchemas(); + // if (readToastableColumns) { + // tableIds().forEach(tableId -> refreshToastableColumnsMap(connection, tableId)); + // } + return this; + } + + protected void refreshSchemas() { + clearSchemas(); + // Create TableSchema instances for any existing table ... + tableIds().forEach(this::refreshSchema); + } + + @Override + protected void refreshSchema(TableId id) { + if (LOGGER.isDebugEnabled()) { + LOGGER.debug("refreshing DB schema for table '{}'", id); + } + Table table = tableFor(id); + buildAndRegisterSchema(table); + } + + public List parseSnapshotDdl( + TiDBPartition partition, + String ddlStatements, + String databaseName, + EventOffsetContext offset, + Instant sourceTime) { + LOGGER.debug("Processing snapshot DDL '{}' for database '{}'", ddlStatements, databaseName); + return parseDdl(partition, ddlStatements, databaseName, offset, sourceTime, true); + } + + private List parseDdl( + TiDBPartition partition, + String ddlStatements, + String databaseName, + EventOffsetContext offset, + Instant sourceTime, + boolean snapshot) { + final List schemaChangeEvents = new ArrayList<>(3); + + if (ignoredQueryStatements.contains(ddlStatements)) { + return schemaChangeEvents; + } + + try { + this.ddlChanges.reset(); + this.ddlParser.setCurrentSchema(databaseName); + this.ddlParser.parse(ddlStatements, tables()); + } catch (ParsingException | MultipleParsingExceptions e) { + throw e; + } + if (!ddlChanges.isEmpty()) { + ddlChanges.getEventsByDatabase( + (String dbName, List events) -> { + final String sanitizedDbName = (dbName == null) ? "" : dbName; + if (acceptableDatabase(dbName)) { + final Set tableIds = new HashSet<>(); + events.forEach( + event -> { + final TableId tableId = getTableId(event); + if (tableId != null) { + tableIds.add(tableId); + } + }); + events.forEach( + event -> { + final TableId tableId = getTableId(event); + offset.tableEvent(dbName, tableIds, sourceTime); + // For SET with multiple parameters + if (event instanceof DdlParserListener.TableCreatedEvent) { + emitChangeEvent( + partition, + offset, + schemaChangeEvents, + sanitizedDbName, + event, + tableId, + SchemaChangeEvent.SchemaChangeEventType.CREATE, + snapshot); + } else if (event + instanceof + DdlParserListener.TableAlteredEvent + || event + instanceof + DdlParserListener.TableIndexCreatedEvent + || event + instanceof + DdlParserListener.TableIndexDroppedEvent) { + emitChangeEvent( + partition, + offset, + schemaChangeEvents, + sanitizedDbName, + event, + tableId, + SchemaChangeEvent.SchemaChangeEventType.ALTER, + snapshot); + } else if (event + instanceof DdlParserListener.TableDroppedEvent) { + emitChangeEvent( + partition, + offset, + schemaChangeEvents, + sanitizedDbName, + event, + tableId, + SchemaChangeEvent.SchemaChangeEventType.DROP, + snapshot); + } else if (event + instanceof DdlParserListener.SetVariableEvent) { + // SET statement with multiple variable emits event for + // each variable. We want to emit only + // one change event + final DdlParserListener.SetVariableEvent varEvent = + (DdlParserListener.SetVariableEvent) event; + if (varEvent.order() == 0) { + emitChangeEvent( + partition, + offset, + schemaChangeEvents, + sanitizedDbName, + event, + tableId, + SchemaChangeEvent.SchemaChangeEventType + .DATABASE, + snapshot); + } + } else { + emitChangeEvent( + partition, + offset, + schemaChangeEvents, + sanitizedDbName, + event, + tableId, + SchemaChangeEvent.SchemaChangeEventType + .DATABASE, + snapshot); + } + }); + } + }); + } else { + offset.databaseEvent(databaseName, sourceTime); + schemaChangeEvents.add( + SchemaChangeEvent.ofDatabase( + partition, offset, databaseName, ddlStatements, snapshot)); + } + return schemaChangeEvents; + } + + private boolean acceptableDatabase(final String databaseName) { + return filters.databaseFilter().test(databaseName) + || databaseName == null + || databaseName.isEmpty(); + } + + private TableId getTableId(DdlParserListener.Event event) { + if (event instanceof DdlParserListener.TableEvent) { + return ((DdlParserListener.TableEvent) event).tableId(); + } else if (event instanceof DdlParserListener.TableIndexEvent) { + return ((DdlParserListener.TableIndexEvent) event).tableId(); + } + return null; + } + + private void emitChangeEvent( + TiDBPartition partition, + EventOffsetContext offset, + List schemaChangeEvents, + final String sanitizedDbName, + DdlParserListener.Event event, + TableId tableId, + SchemaChangeEvent.SchemaChangeEventType type, + boolean snapshot) { + SchemaChangeEvent schemaChangeEvent; + if (type.equals(SchemaChangeEvent.SchemaChangeEventType.ALTER) + && event instanceof DdlParserListener.TableAlteredEvent + && ((DdlParserListener.TableAlteredEvent) event).previousTableId() != null) { + schemaChangeEvent = + SchemaChangeEvent.ofRename( + partition, + offset, + sanitizedDbName, + null, + event.statement(), + tableId != null ? tableFor(tableId) : null, + ((DdlParserListener.TableAlteredEvent) event).previousTableId()); + } else { + schemaChangeEvent = + SchemaChangeEvent.of( + type, + partition, + offset, + sanitizedDbName, + null, + event.statement(), + tableId != null ? tableFor(tableId) : null, + snapshot); + } + schemaChangeEvents.add(schemaChangeEvent); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBFieldDefinition.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBFieldDefinition.java new file mode 100644 index 00000000000..257d3dba99c --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBFieldDefinition.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.schema; + +import org.apache.flink.cdc.connectors.tidb.utils.TiDBUtils; + +import org.apache.commons.lang3.StringUtils; + +/** TiDB field definition. */ +public class TiDBFieldDefinition { + private String columnName; + private String columnType; + private boolean nullable; + private boolean key; + private String defaultValue; + private String extra; + private boolean unique; + + public String getColumnName() { + return columnName; + } + + public void setColumnName(String columnName) { + this.columnName = columnName; + } + + public String getColumnType() { + return columnType; + } + + public void setColumnType(String columnType) { + this.columnType = columnType; + } + + public void setNullable(boolean nullable) { + this.nullable = nullable; + } + + public String getDefaultValue() { + return StringUtils.isEmpty(defaultValue) ? "" : "DEFAULT " + defaultValue; + } + + public void setDefaultValue(String defaultValue) { + this.defaultValue = defaultValue; + } + + public boolean isUnsigned() { + return StringUtils.containsIgnoreCase(columnType, "unsigned"); + } + + public boolean isNullable() { + return nullable; + } + + public boolean isKey() { + return key; + } + + public void setKey(boolean key) { + this.key = key; + } + + public String getExtra() { + return extra; + } + + public void setExtra(String extra) { + this.extra = extra; + } + + public boolean isUnique() { + return unique; + } + + public void setUnique(boolean unique) { + this.unique = unique; + } + + public String toDdl() { + return TiDBUtils.quote(columnName) + " " + columnType + " " + (nullable ? "" : "NOT NULL"); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBSchema.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBSchema.java new file mode 100644 index 00000000000..f93581dbd32 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBSchema.java @@ -0,0 +1,208 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.schema; + +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceConfig; +import org.apache.flink.cdc.connectors.tidb.source.converter.TiDBValueConverters; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffsetContext; +import org.apache.flink.cdc.connectors.tidb.utils.TiDBUtils; +import org.apache.flink.util.FlinkRuntimeException; + +import io.debezium.connector.tidb.TiDBPartition; +import io.debezium.connector.tidb.TidbTopicSelector; +import io.debezium.jdbc.JdbcConnection; +import io.debezium.relational.TableId; +import io.debezium.relational.history.TableChanges.TableChange; +import io.debezium.schema.SchemaChangeEvent; +import io.debezium.schema.TopicSelector; +import org.apache.commons.lang3.StringUtils; + +import java.sql.SQLException; +import java.time.Instant; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; + +import static org.apache.flink.cdc.connectors.tidb.utils.TiDBConnectionUtils.getValueConverters; + +/** TiDB schema. */ +public class TiDBSchema { + private static final String SHOW_CREATE_TABLE = "SHOW CREATE TABLE "; + private static final String DESC_TABLE = "DESC "; + + private final TiDBConnectorConfig connectorConfig; + private final TiDBDatabaseSchema databaseSchema; + private final SchemasByTableId schemasByTableId; + + public TiDBSchema(TiDBSourceConfig sourceConfig, boolean isTableIdCaseInSensitive) { + this.connectorConfig = sourceConfig.getDbzConnectorConfig(); + this.databaseSchema = createTiDBDatabaseSchema(connectorConfig, isTableIdCaseInSensitive); + this.schemasByTableId = new SchemasByTableId(isTableIdCaseInSensitive); + } + + public TableChange getTableSchema(JdbcConnection jdbc, TableId tableId) { + // read schema from cache first + TableChange schema = schemasByTableId.get(tableId); + if (schema == null) { + schema = readTableSchema(jdbc, tableId); + schemasByTableId.put(tableId, schema); + } + return schema; + } + + public static TiDBDatabaseSchema createTiDBDatabaseSchema( + TiDBConnectorConfig dbzTiDBConfig, boolean isTableIdCaseSensitive) { + TopicSelector topicSelector = TidbTopicSelector.defaultSelector(dbzTiDBConfig); + TiDBValueConverters valueConverters = getValueConverters(dbzTiDBConfig); + return new TiDBDatabaseSchema( + dbzTiDBConfig, valueConverters, topicSelector, isTableIdCaseSensitive); + } + + private TableChange readTableSchema(JdbcConnection jdbc, TableId tableId) { + final Map tableChangeMap = new HashMap<>(); + String showCreateTable = SHOW_CREATE_TABLE + TiDBUtils.quote(tableId); + final TiDBPartition partition = new TiDBPartition(connectorConfig.getLogicalName()); + buildSchemaByShowCreateTable(partition, jdbc, tableId, tableChangeMap); + if (!tableChangeMap.containsKey(tableId)) { + // fallback to desc table + String descTable = DESC_TABLE + TiDBUtils.quote(tableId); + buildSchemaByDescTable(partition, jdbc, descTable, tableId, tableChangeMap); + if (!tableChangeMap.containsKey(tableId)) { + throw new FlinkRuntimeException( + String.format( + "Can't obtain schema for table %s by running %s and %s ", + tableId, showCreateTable, descTable)); + } + } + return tableChangeMap.get(tableId); + } + + private void buildSchemaByShowCreateTable( + TiDBPartition partition, + JdbcConnection jdbc, + TableId tableId, + Map tableChangeMap) { + final String sql = SHOW_CREATE_TABLE + TiDBUtils.quote(tableId); + try { + jdbc.query( + sql, + rs -> { + if (rs.next()) { + final String ddl = rs.getString(2); + parseSchemaByDdl(partition, ddl, tableId, tableChangeMap); + } + }); + } catch (SQLException e) { + throw new FlinkRuntimeException( + String.format("Failed to read schema for table %s by running %s", tableId, sql), + e); + } + } + + private void buildSchemaByDescTable( + TiDBPartition partition, + JdbcConnection jdbc, + String descTable, + TableId tableId, + Map tableChangeMap) { + List fieldMetas = new ArrayList<>(); + List primaryKeys = new ArrayList<>(); + try { + jdbc.query( + descTable, + rs -> { + while (rs.next()) { + TiDBFieldDefinition meta = new TiDBFieldDefinition(); + meta.setColumnName(rs.getString("Field")); + meta.setColumnType(rs.getString("Type")); + meta.setNullable( + StringUtils.equalsIgnoreCase(rs.getString("Null"), "YES")); + meta.setKey("PRI".equalsIgnoreCase(rs.getString("Key"))); + meta.setUnique("UNI".equalsIgnoreCase(rs.getString("Key"))); + meta.setDefaultValue(rs.getString("Default")); + meta.setExtra(rs.getString("Extra")); + if (meta.isKey()) { + primaryKeys.add(meta.getColumnName()); + } + fieldMetas.add(meta); + } + }); + parseSchemaByDdl( + partition, + new TiDBTableDefinition(tableId, fieldMetas, primaryKeys).toDdl(), + tableId, + tableChangeMap); + } catch (SQLException e) { + throw new FlinkRuntimeException( + String.format( + "Failed to read schema for table %s by running %s", tableId, descTable), + e); + } + } + + private void parseSchemaByDdl( + TiDBPartition partition, + String ddl, + TableId tableId, + Map tableChangeMap) { + final EventOffsetContext offsetContext = EventOffsetContext.initial(connectorConfig); + List schemaChangeEvents = + databaseSchema.parseSnapshotDdl( + partition, ddl, tableId.catalog(), offsetContext, Instant.now()); + for (SchemaChangeEvent schemaChangeEvent : schemaChangeEvents) { + for (TableChange tableChange : schemaChangeEvent.getTableChanges()) { + tableChangeMap.put(tableId, tableChange); + } + } + } + + private static class SchemasByTableId { + + private final boolean tableIdCaseInsensitive; + private final ConcurrentMap values; + + public SchemasByTableId(boolean tableIdCaseInsensitive) { + this.tableIdCaseInsensitive = tableIdCaseInsensitive; + this.values = new ConcurrentHashMap<>(); + } + + public void clear() { + values.clear(); + } + + public TableChange remove(TableId tableId) { + return values.remove(toLowerCaseIfNeeded(tableId)); + } + + public TableChange get(TableId tableId) { + return values.get(toLowerCaseIfNeeded(tableId)); + } + + public TableChange put(TableId tableId, TableChange updated) { + return values.put(toLowerCaseIfNeeded(tableId), updated); + } + + private TableId toLowerCaseIfNeeded(TableId tableId) { + return tableIdCaseInsensitive ? tableId.toLowercase() : tableId; + } + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBTableDefinition.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBTableDefinition.java new file mode 100644 index 00000000000..83a163166eb --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/schema/TiDBTableDefinition.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.schema; + +import org.apache.flink.cdc.connectors.tidb.utils.TiDBUtils; +import org.apache.flink.util.CollectionUtil; + +import io.debezium.relational.TableId; + +import java.util.List; +import java.util.stream.Collectors; + +/** TiDB table definition. */ +public class TiDBTableDefinition { + TableId tableId; + List fieldDefinitions; + List primaryKeys; + + public TiDBTableDefinition( + TableId tableId, List fieldDefinitions, List primaryKeys) { + this.tableId = tableId; + this.fieldDefinitions = fieldDefinitions; + this.primaryKeys = primaryKeys; + } + + public String toDdl() { + return String.format( + "CREATE TABLE %s (\n\t %s %s );", + TiDBUtils.quote(tableId), fieldDefinitions(), pkDefinition()); + } + + private String fieldDefinitions() { + return fieldDefinitions.stream() + .map(TiDBFieldDefinition::toDdl) + .collect(Collectors.joining(", \n\t")); + } + + private String pkDefinition() { + StringBuilder pkDefinition = new StringBuilder(); + if (!CollectionUtil.isNullOrEmpty(primaryKeys)) { + pkDefinition.append(","); + pkDefinition.append( + String.format( + "PRIMARY KEY ( %s )", + primaryKeys.stream() + .map(TiDBUtils::quote) + .collect(Collectors.joining(",")))); + } + return pkDefinition.toString(); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/splitter/TiDBChunkSplitter.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/splitter/TiDBChunkSplitter.java new file mode 100644 index 00000000000..cde0819875c --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/source/splitter/TiDBChunkSplitter.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.splitter; + +import org.apache.flink.cdc.connectors.base.config.JdbcSourceConfig; +import org.apache.flink.cdc.connectors.base.dialect.JdbcDataSourceDialect; +import org.apache.flink.cdc.connectors.base.source.assigner.splitter.JdbcSourceChunkSplitter; +import org.apache.flink.cdc.connectors.base.source.assigner.state.ChunkSplitterState; +import org.apache.flink.cdc.connectors.tidb.utils.TiDBUtils; +import org.apache.flink.table.types.DataType; + +import io.debezium.jdbc.JdbcConnection; +import io.debezium.relational.Column; +import io.debezium.relational.TableId; + +import java.sql.SQLException; + +/** TiDB chunk splitter. */ +public class TiDBChunkSplitter extends JdbcSourceChunkSplitter { + + public TiDBChunkSplitter( + JdbcSourceConfig sourceConfig, + JdbcDataSourceDialect dialect, + ChunkSplitterState chunkSplitterState) { + super(sourceConfig, dialect, chunkSplitterState); + } + + @Override + protected Object queryNextChunkMax( + JdbcConnection jdbc, + TableId tableId, + Column splitColumn, + int chunkSize, + Object includedLowerBound) + throws SQLException { + return TiDBUtils.queryNextChunkMax( + jdbc, tableId, splitColumn.name(), chunkSize, includedLowerBound); + } + + @Override + protected Long queryApproximateRowCnt(JdbcConnection jdbc, TableId tableId) + throws SQLException { + return TiDBUtils.queryApproximateRowCnt(jdbc, tableId); + } + + @Override + protected DataType fromDbzColumn(Column splitColumn) { + return TiDBUtils.fromDbzColumn(splitColumn); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/RowDataTiKVChangeEventDeserializationSchema.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/RowDataTiKVChangeEventDeserializationSchema.java deleted file mode 100644 index 05f96e1693c..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/RowDataTiKVChangeEventDeserializationSchema.java +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb.table; - -import org.apache.flink.api.common.typeinfo.TypeInformation; -import org.apache.flink.cdc.connectors.tidb.TiKVChangeEventDeserializationSchema; -import org.apache.flink.table.data.RowData; -import org.apache.flink.table.types.logical.RowType; -import org.apache.flink.types.RowKind; -import org.apache.flink.util.Collector; -import org.apache.flink.util.FlinkRuntimeException; - -import org.tikv.common.TiConfiguration; -import org.tikv.common.key.RowKey; -import org.tikv.kvproto.Cdcpb.Event.Row; - -import static org.apache.flink.util.Preconditions.checkNotNull; -import static org.tikv.common.codec.TableCodec.decodeObjects; - -/** - * Deserialization schema from TiKV Change Event to Flink Table/SQL internal data structure {@link - * RowData}. - */ -public class RowDataTiKVChangeEventDeserializationSchema - extends RowDataTiKVEventDeserializationSchemaBase - implements TiKVChangeEventDeserializationSchema { - - private static final long serialVersionUID = 1L; - - /** TypeInformation of the produced {@link RowData}. * */ - private final TypeInformation resultTypeInfo; - - public RowDataTiKVChangeEventDeserializationSchema( - TiConfiguration tiConf, - String database, - String tableName, - TypeInformation resultTypeInfo, - TiKVMetadataConverter[] metadataConverters, - RowType physicalDataType) { - super(tiConf, database, tableName, metadataConverters, physicalDataType); - this.resultTypeInfo = checkNotNull(resultTypeInfo); - } - - @Override - public void deserialize(Row row, Collector out) throws Exception { - if (tableInfo == null) { - tableInfo = fetchTableInfo(); - } - final RowKey rowKey = RowKey.decode(row.getKey().toByteArray()); - final long handle = rowKey.getHandle(); - Object[] tikvValues; - - switch (row.getOpType()) { - case DELETE: - tikvValues = decodeObjects(row.getOldValue().toByteArray(), handle, tableInfo); - RowData rowDataDelete = - (RowData) physicalConverter.convert(tikvValues, tableInfo, null); - rowDataDelete.setRowKind(RowKind.DELETE); - emit(new TiKVMetadataConverter.TiKVRowValue(row), rowDataDelete, out); - break; - case PUT: - try { - tikvValues = - decodeObjects( - row.getValue().toByteArray(), - RowKey.decode(row.getKey().toByteArray()).getHandle(), - tableInfo); - if (row.getOldValue() == null || row.getOldValue().isEmpty()) { - RowData rowDataUpdateBefore = - (RowData) physicalConverter.convert(tikvValues, tableInfo, null); - rowDataUpdateBefore.setRowKind(RowKind.INSERT); - emit(new TiKVMetadataConverter.TiKVRowValue(row), rowDataUpdateBefore, out); - } else { - RowData rowDataUpdateAfter = - (RowData) physicalConverter.convert(tikvValues, tableInfo, null); - rowDataUpdateAfter.setRowKind(RowKind.UPDATE_AFTER); - emit(new TiKVMetadataConverter.TiKVRowValue(row), rowDataUpdateAfter, out); - } - break; - } catch (final RuntimeException e) { - throw new FlinkRuntimeException( - String.format( - "Fail to deserialize row: %s, table: %s", - row, tableInfo.getId()), - e); - } - default: - throw new IllegalArgumentException("Unknown Row Op Type: " + row.getOpType()); - } - } - - @Override - public TypeInformation getProducedType() { - return resultTypeInfo; - } -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/RowDataTiKVEventDeserializationSchemaBase.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/RowDataTiKVEventDeserializationSchemaBase.java deleted file mode 100644 index 6e9c7dcda61..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/RowDataTiKVEventDeserializationSchemaBase.java +++ /dev/null @@ -1,578 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb.table; - -import org.apache.flink.cdc.debezium.utils.TemporalConversions; -import org.apache.flink.table.data.DecimalData; -import org.apache.flink.table.data.GenericArrayData; -import org.apache.flink.table.data.GenericRowData; -import org.apache.flink.table.data.RowData; -import org.apache.flink.table.data.StringData; -import org.apache.flink.table.data.TimestampData; -import org.apache.flink.table.types.logical.DecimalType; -import org.apache.flink.table.types.logical.LogicalType; -import org.apache.flink.table.types.logical.RowType; -import org.apache.flink.util.Collector; -import org.apache.flink.util.FlinkRuntimeException; - -import org.tikv.common.TiConfiguration; -import org.tikv.common.TiSession; -import org.tikv.common.meta.TiColumnInfo; -import org.tikv.common.meta.TiTableInfo; -import org.tikv.kvproto.Kvrpcpb; - -import java.io.Serializable; -import java.math.BigDecimal; -import java.nio.ByteBuffer; -import java.sql.Timestamp; - -import static org.apache.flink.util.Preconditions.checkNotNull; - -/** - * Base class of deserialization schema from TiKV RowValue (Snapshot or Change Event) to Flink - * Table/SQL internal data structure {@link RowData}. - */ -public class RowDataTiKVEventDeserializationSchemaBase implements Serializable { - private static final long serialVersionUID = 1L; - - /** Whether the deserializer needs to handle metadata columns. */ - private final boolean hasMetadata; - - /** Information of the TiKV table. * */ - protected TiTableInfo tableInfo; - - private final TiConfiguration tiConf; - private final String database; - private final String tableName; - - /** - * A wrapped output collector which is used to append metadata columns after physical columns. - */ - private final TiKVAppendMetadataCollector appendMetadataCollector; - - /** - * Runtime converter that converts Tikv {@link Kvrpcpb.KvPair}s into {@link RowData} consisted - * of physical column values. - */ - protected final TiKVDeserializationRuntimeConverter physicalConverter; - - public RowDataTiKVEventDeserializationSchemaBase( - TiConfiguration tiConf, - String database, - String tableName, - TiKVMetadataConverter[] metadataConverters, - RowType physicalDataType) { - this.tiConf = checkNotNull(tiConf); - this.database = checkNotNull(database); - this.tableName = checkNotNull(tableName); - this.hasMetadata = checkNotNull(metadataConverters).length > 0; - this.appendMetadataCollector = new TiKVAppendMetadataCollector(metadataConverters); - this.physicalConverter = createConverter(checkNotNull(physicalDataType)); - } - - protected TiTableInfo fetchTableInfo() { - try (final TiSession session = TiSession.create(tiConf)) { - return session.getCatalog().getTable(database, tableName); - } catch (final Exception e) { - throw new FlinkRuntimeException(e); - } - } - - public void emit( - TiKVMetadataConverter.TiKVRowValue inRecord, - RowData physicalRow, - Collector collector) { - if (!hasMetadata) { - collector.collect(physicalRow); - return; - } - - appendMetadataCollector.row = inRecord; - appendMetadataCollector.outputCollector = collector; - appendMetadataCollector.collect(physicalRow); - } - - // ------------------------------------------------------------------------------------- - // Runtime Converters - // ------------------------------------------------------------------------------------- - - /** Creates a runtime converter which is null safe. */ - protected static TiKVDeserializationRuntimeConverter createConverter(LogicalType type) { - return wrapIntoNullableConverter(createNotNullConverter(type)); - } - - // -------------------------------------------------------------------------------- - // IMPORTANT! We use anonymous classes instead of lambdas for a reason here. It is - // necessary because the maven shade plugin cannot relocate classes in - // SerializedLambdas (MSHADE-260). - // -------------------------------------------------------------------------------- - - /** Creates a runtime converter which assuming input object is not null. */ - public static TiKVDeserializationRuntimeConverter createNotNullConverter(LogicalType type) { - - // if no matched user defined converter, fallback to the default converter - switch (type.getTypeRoot()) { - case NULL: - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, - TiTableInfo schema, - org.tikv.common.types.DataType dataType) { - return null; - } - }; - case BOOLEAN: - return convertToBoolean(); - case TINYINT: - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, - TiTableInfo schema, - org.tikv.common.types.DataType dataType) { - - return Byte.parseByte(object.toString()); - } - }; - case SMALLINT: - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, - TiTableInfo schema, - org.tikv.common.types.DataType dataType) { - return Short.parseShort(object.toString()); - } - }; - case INTEGER: - case INTERVAL_YEAR_MONTH: - return convertToInt(); - case BIGINT: - case INTERVAL_DAY_TIME: - return convertToLong(); - case DATE: - return convertToDate(); - case TIME_WITHOUT_TIME_ZONE: - return convertToTime(); - case TIMESTAMP_WITHOUT_TIME_ZONE: - return convertToTimestamp(); - case TIMESTAMP_WITH_LOCAL_TIME_ZONE: - return convertToLocalTimeZoneTimestamp(); - case FLOAT: - return convertToFloat(); - case DOUBLE: - return convertToDouble(); - case CHAR: - case VARCHAR: - return convertToString(); - case BINARY: - case VARBINARY: - return convertToBinary(); - case DECIMAL: - return createDecimalConverter((DecimalType) type); - case ROW: - return createRowConverter((RowType) type); - case ARRAY: - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, - TiTableInfo tableInfo, - org.tikv.common.types.DataType dataType) - throws Exception { - String[] strArray = ((String) object).split(","); - StringData[] stringDataArray = new StringData[strArray.length]; - for (int i = 0; i < strArray.length; i++) { - stringDataArray[i] = StringData.fromString(strArray[i]); - } - return new GenericArrayData(stringDataArray); - } - }; - case MAP: - case MULTISET: - case RAW: - default: - throw new UnsupportedOperationException("Unsupported type: " + type); - } - } - - private static TiKVDeserializationRuntimeConverter convertToBoolean() { - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo schema, org.tikv.common.types.DataType dataType) { - if (object instanceof Boolean) { - return object; - } else if (object instanceof Long) { - return (Long) object == 1; - } else if (object instanceof Byte) { - return (byte) object == 1; - } else if (object instanceof Short) { - return (short) object == 1; - } else { - return Boolean.parseBoolean(object.toString()); - } - } - }; - } - - private static TiKVDeserializationRuntimeConverter convertToInt() { - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo schema, org.tikv.common.types.DataType dataType) { - if (object instanceof Integer) { - return object; - } else if (object instanceof Long) { - return dataType.isUnsigned() - ? Integer.valueOf(Short.toUnsignedInt(((Long) object).shortValue())) - : ((Long) object).intValue(); - } else { - return Integer.parseInt(object.toString()); - } - } - }; - } - - private static TiKVDeserializationRuntimeConverter convertToLong() { - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo schema, org.tikv.common.types.DataType dataType) { - if (object instanceof Integer) { - return ((Integer) object).longValue(); - } else if (object instanceof Long) { - return object; - } else { - return Long.parseLong(object.toString()); - } - } - }; - } - - private static TiKVDeserializationRuntimeConverter convertToDouble() { - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo schema, org.tikv.common.types.DataType dataType) { - if (object instanceof Float) { - return ((Float) object).doubleValue(); - } else if (object instanceof Double) { - return object; - } else { - return Double.parseDouble(object.toString()); - } - } - }; - } - - private static TiKVDeserializationRuntimeConverter convertToFloat() { - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo schema, org.tikv.common.types.DataType dataType) { - if (object instanceof Float) { - return object; - } else if (object instanceof Double) { - return ((Double) object).floatValue(); - } else { - return Float.parseFloat(object.toString()); - } - } - }; - } - - private static TiKVDeserializationRuntimeConverter convertToDate() { - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo schema, org.tikv.common.types.DataType dataType) { - return (int) TemporalConversions.toLocalDate(object).toEpochDay(); - } - }; - } - - private static TiKVDeserializationRuntimeConverter convertToTime() { - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo schema, org.tikv.common.types.DataType dataType) { - if (object instanceof Long) { - return (int) ((Long) object / 1000_000); - } - return TemporalConversions.toLocalTime(object).toSecondOfDay() * 1000; - } - }; - } - - private static TiKVDeserializationRuntimeConverter convertToTimestamp() { - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo schema, org.tikv.common.types.DataType dataType) { - - switch (dataType.getType()) { - case TypeTimestamp: - if (object instanceof Timestamp) { - return TimestampData.fromInstant(((Timestamp) object).toInstant()); - } - break; - case TypeDatetime: - if (object instanceof Timestamp) { - return TimestampData.fromLocalDateTime( - ((Timestamp) object).toLocalDateTime()); - } - break; - default: - throw new IllegalArgumentException( - "Unable to convert to TimestampData from unexpected value '" - + object - + "' of type " - + object.getClass().getName()); - } - return object; - } - }; - } - - private static TiKVDeserializationRuntimeConverter convertToLocalTimeZoneTimestamp() { - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo schema, org.tikv.common.types.DataType dataType) { - if (object instanceof Timestamp) { - return TimestampData.fromInstant(((Timestamp) object).toInstant()); - } - throw new IllegalArgumentException( - "Unable to convert to TimestampData from unexpected value '" - + object - + "' of type " - + object.getClass().getName()); - } - }; - } - - private static TiKVDeserializationRuntimeConverter convertToString() { - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo schema, org.tikv.common.types.DataType dataType) { - if (object instanceof byte[]) { - return StringData.fromBytes((byte[]) object); - } - return StringData.fromString(object.toString()); - } - }; - } - - private static TiKVDeserializationRuntimeConverter convertToBinary() { - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo schema, org.tikv.common.types.DataType dataType) { - if (object instanceof byte[]) { - return object; - } else if (object instanceof String) { - return ((String) object).getBytes(); - } else if (object instanceof ByteBuffer) { - ByteBuffer byteBuffer = (ByteBuffer) object; - byte[] bytes = new byte[byteBuffer.remaining()]; - byteBuffer.get(bytes); - return bytes; - } else { - throw new UnsupportedOperationException( - "Unsupported BYTES value type: " + object.getClass().getSimpleName()); - } - } - }; - } - - /** Deal with unsigned column's value. */ - public static Object dealUnsignedColumnValue( - org.tikv.common.types.DataType dataType, Object object) { - // For more information about numeric columns with unsigned, please refer link - // https://docs.pingcap.com/tidb/stable/data-type-numeric. - switch (dataType.getType()) { - case TypeTiny: - return (short) Byte.toUnsignedInt(((Long) object).byteValue()); - case TypeShort: - return Short.toUnsignedInt(((Long) object).shortValue()); - case TypeInt24: - return (((Long) object).intValue()) & 0xffffff; - case TypeLong: - return Integer.toUnsignedLong(((Long) object).intValue()); - case TypeLonglong: - return new BigDecimal(Long.toUnsignedString(((Long) object))); - default: - return object; - } - } - - private static TiKVDeserializationRuntimeConverter createDecimalConverter( - DecimalType decimalType) { - final int precision = decimalType.getPrecision(); - final int scale = decimalType.getScale(); - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo schema, org.tikv.common.types.DataType dataType) { - BigDecimal bigDecimal; - if (object instanceof String) { - bigDecimal = new BigDecimal((String) object); - } else if (object instanceof Long) { - bigDecimal = new BigDecimal((String) object); - } else if (object instanceof Double) { - bigDecimal = BigDecimal.valueOf((Double) object); - } else if (object instanceof BigDecimal) { - bigDecimal = (BigDecimal) object; - } else { - throw new IllegalArgumentException( - "Unable to convert to decimal from unexpected value '" - + object - + "' of type " - + object.getClass()); - } - return DecimalData.fromBigDecimal(bigDecimal, precision, scale); - } - }; - } - - private static TiKVDeserializationRuntimeConverter createRowConverter(RowType rowType) { - final TiKVDeserializationRuntimeConverter[] fieldConverters = - rowType.getFields().stream() - .map(RowType.RowField::getType) - .map(logicType -> createConverter(logicType)) - .toArray(TiKVDeserializationRuntimeConverter[]::new); - final String[] fieldNames = rowType.getFieldNames().toArray(new String[0]); - - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo tableInfo, org.tikv.common.types.DataType dataType) - throws Exception { - int arity = fieldNames.length; - GenericRowData row = new GenericRowData(arity); - for (int i = 0; i < arity; i++) { - String fieldName = fieldNames[i]; - - TiColumnInfo columnInfo = tableInfo.getColumn(fieldName); - if (columnInfo == null) { - row.setField(i, null); - } else { - int offset = columnInfo.getOffset(); - org.tikv.common.types.DataType type = columnInfo.getType(); - Object convertedField = - convertField( - fieldConverters[i], - tableInfo, - type, - ((Object[]) object)[offset]); - row.setField(i, convertedField); - } - } - return row; - } - }; - } - - private static Object convertField( - TiKVDeserializationRuntimeConverter fieldConverter, - TiTableInfo tableInfo, - org.tikv.common.types.DataType dataType, - Object fieldValue) - throws Exception { - if (fieldValue == null) { - return null; - } else { - if (dataType.isUnsigned()) { - fieldValue = dealUnsignedColumnValue(dataType, fieldValue); - } - return fieldConverter.convert(fieldValue, tableInfo, dataType); - } - } - - private static TiKVDeserializationRuntimeConverter wrapIntoNullableConverter( - TiKVDeserializationRuntimeConverter converter) { - return new TiKVDeserializationRuntimeConverter() { - - private static final long serialVersionUID = 1L; - - @Override - public Object convert( - Object object, TiTableInfo schema, org.tikv.common.types.DataType dataType) - throws Exception { - if (object == null) { - return null; - } - return converter.convert(object, schema, dataType); - } - }; - } -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/RowDataTiKVSnapshotEventDeserializationSchema.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/RowDataTiKVSnapshotEventDeserializationSchema.java deleted file mode 100644 index be66a72494b..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/RowDataTiKVSnapshotEventDeserializationSchema.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb.table; - -import org.apache.flink.api.common.typeinfo.TypeInformation; -import org.apache.flink.cdc.connectors.tidb.TiKVSnapshotEventDeserializationSchema; -import org.apache.flink.table.data.RowData; -import org.apache.flink.table.types.logical.RowType; -import org.apache.flink.util.Collector; - -import org.tikv.common.TiConfiguration; -import org.tikv.common.key.RowKey; -import org.tikv.kvproto.Kvrpcpb.KvPair; - -import static org.apache.flink.util.Preconditions.checkNotNull; -import static org.tikv.common.codec.TableCodec.decodeObjects; - -/** - * Deserialization schema from TiKV Snapshot Event to Flink Table/SQL internal data structure {@link - * RowData}. - */ -public class RowDataTiKVSnapshotEventDeserializationSchema - extends RowDataTiKVEventDeserializationSchemaBase - implements TiKVSnapshotEventDeserializationSchema { - - private static final long serialVersionUID = 1L; - - /** TypeInformation of the produced {@link RowData}. * */ - private final TypeInformation resultTypeInfo; - - public RowDataTiKVSnapshotEventDeserializationSchema( - TiConfiguration tiConf, - String database, - String tableName, - TypeInformation resultTypeInfo, - TiKVMetadataConverter[] metadataConverters, - RowType physicalDataType) { - super(tiConf, database, tableName, metadataConverters, physicalDataType); - this.resultTypeInfo = checkNotNull(resultTypeInfo); - } - - @Override - public TypeInformation getProducedType() { - return resultTypeInfo; - } - - @Override - public void deserialize(KvPair record, Collector out) throws Exception { - if (tableInfo == null) { - tableInfo = fetchTableInfo(); - } - Object[] tikvValues = - decodeObjects( - record.getValue().toByteArray(), - RowKey.decode(record.getKey().toByteArray()).getHandle(), - tableInfo); - - emit( - new TiKVMetadataConverter.TiKVRowValue(record), - (RowData) physicalConverter.convert(tikvValues, tableInfo, null), - out); - } -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/StartupOptions.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/StartupOptions.java deleted file mode 100644 index e1dcfa6345c..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/StartupOptions.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb.table; - -import java.util.Objects; - -/** TiDB CDC Source startup options. */ -public final class StartupOptions { - - public final StartupMode startupMode; - - /** - * Performs an initial snapshot on the monitored database tables upon first startup, and - * continue to read the latest CDC events. - */ - public static StartupOptions initial() { - return new StartupOptions(StartupMode.INITIAL); - } - - /** - * Never to perform snapshot on the monitored database tables upon first startup, just read from - * the latest CDC events which means only have the changes since the connector was started. - */ - public static StartupOptions latest() { - return new StartupOptions(StartupMode.LATEST_OFFSET); - } - - private StartupOptions(StartupMode startupMode) { - this.startupMode = startupMode; - - switch (startupMode) { - case INITIAL: - - case LATEST_OFFSET: - break; - - default: - throw new UnsupportedOperationException(startupMode + " mode is not supported."); - } - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - StartupOptions that = (StartupOptions) o; - return startupMode == that.startupMode; - } - - @Override - public int hashCode() { - return Objects.hash(startupMode); - } -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBDeserializationConverterFactory.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBDeserializationConverterFactory.java new file mode 100644 index 00000000000..fcbcb3b086b --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBDeserializationConverterFactory.java @@ -0,0 +1,172 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.table; + +import org.apache.flink.cdc.debezium.table.DeserializationRuntimeConverter; +import org.apache.flink.cdc.debezium.table.DeserializationRuntimeConverterFactory; +import org.apache.flink.table.data.GenericArrayData; +import org.apache.flink.table.data.StringData; +import org.apache.flink.table.types.logical.ArrayType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.LogicalTypeFamily; + +import com.esri.core.geometry.ogc.OGCGeometry; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectWriter; +import io.debezium.data.EnumSet; +import io.debezium.data.geometry.Geometry; +import io.debezium.data.geometry.Point; +import org.apache.kafka.connect.data.Schema; +import org.apache.kafka.connect.data.Struct; + +import java.nio.ByteBuffer; +import java.time.ZoneId; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; + +/** Factory for creating {@link DeserializationRuntimeConverter} for TiDB. */ +public class TiDBDeserializationConverterFactory { + public static DeserializationRuntimeConverterFactory instance() { + return new DeserializationRuntimeConverterFactory() { + + private static final long serialVersionUID = 1L; + + @Override + public Optional createUserDefinedConverter( + LogicalType logicalType, ZoneId serverTimeZone) { + switch (logicalType.getTypeRoot()) { + case TINYINT: + return createTinyIntConverter(); + case CHAR: + case VARCHAR: + return createStringConverter(); + case ARRAY: + return createArrayConverter((ArrayType) logicalType); + default: + // fallback to default converter + return Optional.empty(); + } + } + }; + } + + private static Optional createStringConverter() { + final ObjectMapper objectMapper = new ObjectMapper(); + final ObjectWriter objectWriter = objectMapper.writer(); + return Optional.of( + new DeserializationRuntimeConverter() { + + private static final long serialVersionUID = 1L; + + @Override + public Object convert(Object dbzObj, Schema schema) throws Exception { + // the Geometry datatype in MySQL will be converted to + // a String with Json format + if (Point.LOGICAL_NAME.equals(schema.name()) + || Geometry.LOGICAL_NAME.equals(schema.name())) { + try { + Struct geometryStruct = (Struct) dbzObj; + byte[] wkb = geometryStruct.getBytes("wkb"); + String geoJson = + OGCGeometry.fromBinary(ByteBuffer.wrap(wkb)).asGeoJson(); + JsonNode originGeoNode = objectMapper.readTree(geoJson); + Optional srid = + Optional.ofNullable(geometryStruct.getInt32("srid")); + Map geometryInfo = new HashMap<>(); + String geometryType = originGeoNode.get("type").asText(); + geometryInfo.put("type", geometryType); + if (geometryType.equals("GeometryCollection")) { + geometryInfo.put("geometries", originGeoNode.get("geometries")); + } else { + geometryInfo.put( + "coordinates", originGeoNode.get("coordinates")); + } + geometryInfo.put("srid", srid.orElse(0)); + return StringData.fromString( + objectWriter.writeValueAsString(geometryInfo)); + } catch (Exception e) { + throw new IllegalArgumentException( + String.format( + "Failed to convert %s to geometry JSON.", dbzObj), + e); + } + } else { + return StringData.fromString(dbzObj.toString()); + } + } + }); + } + + private static Optional createArrayConverter( + ArrayType arrayType) { + if (hasFamily(arrayType.getElementType(), LogicalTypeFamily.CHARACTER_STRING)) { + // only map MySQL SET type to Flink ARRAY type + return Optional.of( + new DeserializationRuntimeConverter() { + private static final long serialVersionUID = 1L; + + @Override + public Object convert(Object dbzObj, Schema schema) throws Exception { + if (EnumSet.LOGICAL_NAME.equals(schema.name()) + && dbzObj instanceof String) { + // for SET datatype in mysql, debezium will always + // return a string split by comma like "a,b,c" + String[] enums = ((String) dbzObj).split(","); + StringData[] elements = new StringData[enums.length]; + for (int i = 0; i < enums.length; i++) { + elements[i] = StringData.fromString(enums[i]); + } + return new GenericArrayData(elements); + } else { + throw new IllegalArgumentException( + String.format( + "Unable convert to Flink ARRAY type from unexpected value '%s', " + + "only SET type could be converted to ARRAY type for MySQL", + dbzObj)); + } + } + }); + } else { + // otherwise, fallback to default converter + return Optional.empty(); + } + } + + private static Optional createTinyIntConverter() { + + return Optional.of( + new DeserializationRuntimeConverter() { + private static final long serialVersionUID = 1L; + + @Override + public Object convert(Object dbzObj, Schema schema) throws Exception { + if (dbzObj instanceof Boolean) { + return dbzObj == Boolean.TRUE ? (byte) 1 : (byte) 0; + } else { + return Byte.parseByte(dbzObj.toString()); + } + } + }); + } + + private static boolean hasFamily(LogicalType logicalType, LogicalTypeFamily family) { + return logicalType.getTypeRoot().getFamilies().contains(family); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBReadableMetadata.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBReadableMetadata.java new file mode 100644 index 00000000000..1569ea6b155 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBReadableMetadata.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.table; + +import org.apache.flink.cdc.debezium.table.MetadataConverter; +import org.apache.flink.cdc.debezium.table.RowDataMetadataConverter; +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.StringData; +import org.apache.flink.table.data.TimestampData; +import org.apache.flink.table.types.DataType; + +import io.debezium.connector.AbstractSourceInfo; +import io.debezium.data.Envelope; +import org.apache.kafka.connect.data.Struct; +import org.apache.kafka.connect.source.SourceRecord; + +/** Defines the supported metadata columns for {@link TiDBTableSource}. */ +public enum TiDBReadableMetadata { + + /** Name of the table that contain the row. */ + TABLE_NAME( + "table_name", + DataTypes.STRING().notNull(), + new MetadataConverter() { + private static final long serialVersionUID = 1L; + + @Override + public Object read(SourceRecord record) { + Struct messageStruct = (Struct) record.value(); + Struct sourceStruct = messageStruct.getStruct(Envelope.FieldName.SOURCE); + return StringData.fromString( + sourceStruct.getString(AbstractSourceInfo.TABLE_NAME_KEY)); + } + }), + + /** Name of the database that contain the row. */ + DATABASE_NAME( + "database_name", + DataTypes.STRING().notNull(), + new MetadataConverter() { + private static final long serialVersionUID = 1L; + + @Override + public Object read(SourceRecord record) { + Struct messageStruct = (Struct) record.value(); + Struct sourceStruct = messageStruct.getStruct(Envelope.FieldName.SOURCE); + return StringData.fromString( + sourceStruct.getString(AbstractSourceInfo.DATABASE_NAME_KEY)); + } + }), + + /** + * It indicates the time that the change was made in the database. If the record is read from + * snapshot of the table instead of the binlog, the value is always 0. + */ + OP_TS( + "op_ts", + DataTypes.TIMESTAMP_LTZ(3).notNull(), + new MetadataConverter() { + private static final long serialVersionUID = 1L; + + @Override + public Object read(SourceRecord record) { + Struct messageStruct = (Struct) record.value(); + Struct sourceStruct = messageStruct.getStruct(Envelope.FieldName.SOURCE); + return TimestampData.fromEpochMillis( + (Long) sourceStruct.get(AbstractSourceInfo.TIMESTAMP_KEY)); + } + }), + + /** + * It indicates the row kind of the changelog. '+I' means INSERT message, '-D' means DELETE + * message, '-U' means UPDATE_BEFORE message and '+U' means UPDATE_AFTER message + */ + ROW_KIND( + "row_kind", + DataTypes.STRING().notNull(), + new RowDataMetadataConverter() { + private static final long serialVersionUID = 1L; + + @Override + public Object read(RowData rowData) { + return StringData.fromString(rowData.getRowKind().shortString()); + } + + @Override + public Object read(SourceRecord record) { + throw new UnsupportedOperationException( + "Please call read(RowData rowData) method instead."); + } + }); + + private final String key; + + private final DataType dataType; + + private final MetadataConverter converter; + + TiDBReadableMetadata(String key, DataType dataType, MetadataConverter converter) { + this.key = key; + this.dataType = dataType; + this.converter = converter; + } + + public String getKey() { + return key; + } + + public DataType getDataType() { + return dataType; + } + + public MetadataConverter getConverter() { + return converter; + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableFactory.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableFactory.java new file mode 100644 index 00000000000..9ec64476f7e --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableFactory.java @@ -0,0 +1,254 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.table; + +import org.apache.flink.cdc.connectors.base.options.StartupOptions; +import org.apache.flink.cdc.connectors.base.utils.OptionUtils; +import org.apache.flink.cdc.debezium.table.DebeziumOptions; +import org.apache.flink.cdc.debezium.utils.JdbcUrlUtils; +import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.configuration.ReadableConfig; +import org.apache.flink.table.api.ValidationException; +import org.apache.flink.table.catalog.ObjectPath; +import org.apache.flink.table.catalog.ResolvedSchema; +import org.apache.flink.table.connector.source.DynamicTableSource; +import org.apache.flink.table.factories.DynamicTableSourceFactory; +import org.apache.flink.table.factories.FactoryUtil; + +import java.time.Duration; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import static org.apache.flink.cdc.connectors.base.options.JdbcSourceOptions.CONNECTION_POOL_SIZE; +import static org.apache.flink.cdc.connectors.base.options.JdbcSourceOptions.CONNECT_MAX_RETRIES; +import static org.apache.flink.cdc.connectors.base.options.JdbcSourceOptions.CONNECT_TIMEOUT; +import static org.apache.flink.cdc.connectors.base.options.JdbcSourceOptions.DATABASE_NAME; +import static org.apache.flink.cdc.connectors.base.options.JdbcSourceOptions.HOSTNAME; +import static org.apache.flink.cdc.connectors.base.options.JdbcSourceOptions.PASSWORD; +import static org.apache.flink.cdc.connectors.base.options.JdbcSourceOptions.SCAN_INCREMENTAL_SNAPSHOT_CHUNK_KEY_COLUMN; +import static org.apache.flink.cdc.connectors.base.options.JdbcSourceOptions.SERVER_TIME_ZONE; +import static org.apache.flink.cdc.connectors.base.options.JdbcSourceOptions.TABLE_NAME; +import static org.apache.flink.cdc.connectors.base.options.JdbcSourceOptions.USERNAME; +import static org.apache.flink.cdc.connectors.base.options.SourceOptions.CHUNK_META_GROUP_SIZE; +import static org.apache.flink.cdc.connectors.base.options.SourceOptions.SCAN_INCREMENTAL_SNAPSHOT_CHUNK_SIZE; +import static org.apache.flink.cdc.connectors.base.options.SourceOptions.SCAN_INCREMENTAL_SNAPSHOT_ENABLED; +import static org.apache.flink.cdc.connectors.base.options.SourceOptions.SCAN_SNAPSHOT_FETCH_SIZE; +import static org.apache.flink.cdc.connectors.base.options.SourceOptions.SCAN_STARTUP_MODE; +import static org.apache.flink.cdc.connectors.base.options.SourceOptions.SCAN_STARTUP_TIMESTAMP_MILLIS; +import static org.apache.flink.cdc.connectors.base.options.SourceOptions.SPLIT_KEY_EVEN_DISTRIBUTION_FACTOR_LOWER_BOUND; +import static org.apache.flink.cdc.connectors.base.options.SourceOptions.SPLIT_KEY_EVEN_DISTRIBUTION_FACTOR_UPPER_BOUND; +import static org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions.HEARTBEAT_INTERVAL; +import static org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions.HOST_MAPPING; +import static org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions.JDBC_DRIVER; +import static org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions.PD_ADDRESSES; +import static org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions.TABLE_LIST; +import static org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions.TIDB_PORT; +import static org.apache.flink.cdc.debezium.table.DebeziumOptions.getDebeziumProperties; +import static org.apache.flink.cdc.debezium.utils.ResolvedSchemaUtils.getPhysicalSchema; + +/** Factory for creating configured instances of {@link TiDBTableSource}. */ +public class TiDBTableFactory implements DynamicTableSourceFactory { + private static final String IDENTIFIER = "tidb-cdc"; + + @Override + public String factoryIdentifier() { + return IDENTIFIER; + } + + @Override + public Set> requiredOptions() { + Set> options = new HashSet<>(); + options.add(HOSTNAME); + options.add(USERNAME); + options.add(PASSWORD); + options.add(PD_ADDRESSES); + options.add(TIDB_PORT); + + return options; + } + + @Override + public Set> optionalOptions() { + Set> options = new HashSet<>(); + options.add(SCAN_STARTUP_MODE); + options.add(SCAN_STARTUP_TIMESTAMP_MILLIS); + + options.add(DATABASE_NAME); + options.add(TABLE_NAME); + options.add(TABLE_LIST); + options.add(CONNECT_TIMEOUT); + options.add(SERVER_TIME_ZONE); + options.add(HOST_MAPPING); + options.add(JDBC_DRIVER); + options.add(HEARTBEAT_INTERVAL); + + // increment snapshot options + options.add(SCAN_INCREMENTAL_SNAPSHOT_ENABLED); + options.add(SCAN_INCREMENTAL_SNAPSHOT_CHUNK_SIZE); + options.add(CHUNK_META_GROUP_SIZE); + options.add(CONNECTION_POOL_SIZE); + options.add(CONNECT_MAX_RETRIES); + options.add(SCAN_SNAPSHOT_FETCH_SIZE); + options.add(SCAN_INCREMENTAL_SNAPSHOT_CHUNK_KEY_COLUMN); + options.add(SPLIT_KEY_EVEN_DISTRIBUTION_FACTOR_UPPER_BOUND); + options.add(SPLIT_KEY_EVEN_DISTRIBUTION_FACTOR_LOWER_BOUND); + return options; + } + + private static final String SCAN_STARTUP_MODE_VALUE_INITIAL = "initial"; + private static final String SCAN_STARTUP_MODE_VALUE_LATEST_OFFSET = "latest-offset"; + private static final String SCAN_STARTUP_MODE_VALUE_SNAPSHOT = "snapshot"; + private static final String SCAN_STARTUP_MODE_VALUE_TIMESTAMP = "timestamp"; + + private static StartupOptions getStartupOptions(ReadableConfig config) { + String modeString = config.get(SCAN_STARTUP_MODE); + Long startupTimestamp = config.get(SCAN_STARTUP_TIMESTAMP_MILLIS); + switch (modeString.toLowerCase()) { + case SCAN_STARTUP_MODE_VALUE_INITIAL: + return StartupOptions.initial(); + case SCAN_STARTUP_MODE_VALUE_SNAPSHOT: + return StartupOptions.snapshot(); + case SCAN_STARTUP_MODE_VALUE_LATEST_OFFSET: + return StartupOptions.latest(); + case SCAN_STARTUP_MODE_VALUE_TIMESTAMP: + return StartupOptions.timestamp(startupTimestamp); + default: + throw new ValidationException( + String.format( + "Invalid value for option '%s'. Supported values are [%s, %s, %s, %s], but was: %s", + SCAN_STARTUP_MODE.key(), + SCAN_STARTUP_MODE_VALUE_INITIAL, + SCAN_STARTUP_MODE_VALUE_SNAPSHOT, + SCAN_STARTUP_MODE_VALUE_LATEST_OFFSET, + SCAN_STARTUP_MODE_VALUE_TIMESTAMP, + modeString)); + } + } + + @Override + public DynamicTableSource createDynamicTableSource(Context context) { + final FactoryUtil.TableFactoryHelper helper = + FactoryUtil.createTableFactoryHelper(this, context); + + // 作用 + helper.validateExcept( + JdbcUrlUtils.PROPERTIES_PREFIX, + DebeziumOptions.DEBEZIUM_OPTIONS_PREFIX, + TiKVOptions.TIKV_OPTIONS_PREFIX); + + final ReadableConfig config = helper.getOptions(); + + String hostname = config.get(HOSTNAME); + String username = config.get(USERNAME); + String password = config.get(PASSWORD); + String databaseName = config.get(DATABASE_NAME); + String tableName = config.get(TABLE_NAME); + String tableList = config.get(TABLE_LIST); + + int port = config.get(TIDB_PORT); + String serverTimeZone = config.get(SERVER_TIME_ZONE); + Duration connectTimeout = config.get(CONNECT_TIMEOUT); + String pdAddresses = config.get(PD_ADDRESSES); + String hostMapping = config.get(HOST_MAPPING); + String jdbcDriver = config.get(JDBC_DRIVER); + + // increment snapshot options + boolean enableParallelRead = config.get(SCAN_INCREMENTAL_SNAPSHOT_ENABLED); + int splitSize = config.get(SCAN_INCREMENTAL_SNAPSHOT_CHUNK_SIZE); + int splitMetaGroupSize = config.get(CHUNK_META_GROUP_SIZE); + int fetchSize = config.get(SCAN_SNAPSHOT_FETCH_SIZE); + int connectionPoolSize = config.get(CONNECTION_POOL_SIZE); + int connectMaxRetries = config.get(CONNECT_MAX_RETRIES); + String chunkKeyColumn = + config.getOptional(SCAN_INCREMENTAL_SNAPSHOT_CHUNK_KEY_COLUMN).orElse(null); + Map chunkKeyColumns = new HashMap<>(); + if (chunkKeyColumn != null) { + chunkKeyColumns.put(new ObjectPath(databaseName, tableName), chunkKeyColumn); + } + + double distributionFactorUpper = config.get(SPLIT_KEY_EVEN_DISTRIBUTION_FACTOR_UPPER_BOUND); + double distributionFactorLower = config.get(SPLIT_KEY_EVEN_DISTRIBUTION_FACTOR_LOWER_BOUND); + + ResolvedSchema physicalSchema = + getPhysicalSchema(context.getCatalogTable().getResolvedSchema()); + + StartupOptions startupOptions = getStartupOptions(config); + + Duration heartbeatInterval = config.get(HEARTBEAT_INTERVAL); + + OptionUtils.printOptions(IDENTIFIER, config.toMap()); + + return new TiDBTableSource( + physicalSchema, + port, + hostname, + databaseName, + tableName, + tableList, + username, + password, + serverTimeZone, + getDebeziumProperties(context.getCatalogTable().getOptions()), + enableParallelRead, + heartbeatInterval, + pdAddresses, + hostMapping, + connectTimeout, + TiKVOptions.getTiKVOptions(context.getCatalogTable().getOptions()), + splitSize, + splitMetaGroupSize, + fetchSize, + connectMaxRetries, + connectionPoolSize, + distributionFactorUpper, + distributionFactorLower, + chunkKeyColumn, + chunkKeyColumns, + jdbcDriver, + startupOptions); + } + + static class TiKVOptions { + private static final String TIKV_OPTIONS_PREFIX = "tikv."; + + public static Map getTiKVOptions(Map properties) { + Map tikvOptions = new HashMap<>(); + + if (hasTiKVOptions(properties)) { + properties.keySet().stream() + .filter(key -> key.startsWith(TIKV_OPTIONS_PREFIX)) + .forEach( + key -> { + final String value = properties.get(key); + tikvOptions.put(key, value); + }); + } + return tikvOptions; + } + + /** + * Decides if the table options contains Debezium client properties that start with prefix + * 'debezium'. + */ + private static boolean hasTiKVOptions(Map options) { + return options.keySet().stream().anyMatch(k -> k.startsWith(TIKV_OPTIONS_PREFIX)); + } + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSource.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSource.java index f9310462548..98c5a38f570 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSource.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSource.java @@ -18,13 +18,19 @@ package org.apache.flink.cdc.connectors.tidb.table; import org.apache.flink.api.common.typeinfo.TypeInformation; -import org.apache.flink.cdc.connectors.tidb.TDBSourceOptions; -import org.apache.flink.cdc.connectors.tidb.TiDBSource; +import org.apache.flink.cdc.connectors.base.options.StartupOptions; +import org.apache.flink.cdc.connectors.base.source.jdbc.JdbcIncrementalSource; +import org.apache.flink.cdc.connectors.tidb.source.TiDBSourceBuilder; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions; +import org.apache.flink.cdc.debezium.DebeziumDeserializationSchema; +import org.apache.flink.cdc.debezium.table.MetadataConverter; +import org.apache.flink.cdc.debezium.table.RowDataDebeziumDeserializeSchema; +import org.apache.flink.table.catalog.ObjectPath; import org.apache.flink.table.catalog.ResolvedSchema; import org.apache.flink.table.connector.ChangelogMode; import org.apache.flink.table.connector.source.DynamicTableSource; import org.apache.flink.table.connector.source.ScanTableSource; -import org.apache.flink.table.connector.source.SourceFunctionProvider; +import org.apache.flink.table.connector.source.SourceProvider; import org.apache.flink.table.connector.source.abilities.SupportsReadingMetadata; import org.apache.flink.table.data.RowData; import org.apache.flink.table.types.DataType; @@ -35,32 +41,53 @@ import javax.annotation.Nullable; +import java.time.Duration; +import java.time.ZoneId; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Properties; import java.util.stream.Collectors; import java.util.stream.Stream; import static org.apache.flink.util.Preconditions.checkNotNull; -/** - * A {@link DynamicTableSource} that describes how to create a TiDB change event stream from a - * logical description. - */ +/** TiDB table source. */ public class TiDBTableSource implements ScanTableSource, SupportsReadingMetadata { - private final ResolvedSchema physicalSchema; - private final String database; + + private final StartupOptions startupOptions; + private final String tableList; private final String tableName; + private final Duration connectTimeout; + private final String jdbcDriver; + private final String serverTimeZone; + private final String pdAddresses; - @Nullable private final String hostMapping; - private final StartupOptions startupOptions; - private final Map options; + private final String hostMapping; - // -------------------------------------------------------------------------------------------- - // Mutable attributes - // -------------------------------------------------------------------------------------------- + private final int port; + private final String hostName; + private final String database; + private final String username; + private final String password; + private final Duration heartbeatInterval; + + // incremental snapshot options + private final int splitSize; + private final int splitMetaGroupSize; + private final int fetchSize; + private final int connectionPoolSize; + private final int connectMaxRetries; + private final double distributionFactorUpper; + private final double distributionFactorLower; + private final String chunkKeyColumn; + private final Map chunkKeyColumns; + + private final Properties jdbcProperties; + private final Map options; + private final boolean enableParallelRead; /** Data type that describes the final output of the source. */ protected DataType producedDataType; @@ -70,20 +97,62 @@ public class TiDBTableSource implements ScanTableSource, SupportsReadingMetadata public TiDBTableSource( ResolvedSchema physicalSchema, + int port, + String hostName, String database, String tableName, + String tableList, + String username, + String password, + String serverTimeZone, + Properties jdbcProperties, + boolean enableParallelRead, + Duration heartbeatInterval, String pdAddresses, String hostMapping, - StartupOptions startupOptions, - Map options) { + Duration connectTimeout, + Map options, + int splitSize, + int splitMetaGroupSize, + int fetchSize, + int connectMaxRetries, + int connectionPoolSize, + double distributionFactorUpper, + double distributionFactorLower, + @Nullable String chunkKeyColumn, + @Nullable Map chunkKeyColumns, + String jdbcDriver, + StartupOptions startupOptions) { this.physicalSchema = physicalSchema; this.database = checkNotNull(database); this.tableName = checkNotNull(tableName); this.pdAddresses = checkNotNull(pdAddresses); + this.port = port; + this.username = username; + this.password = password; + this.serverTimeZone = serverTimeZone; + this.jdbcProperties = jdbcProperties; + this.hostName = hostName; + this.options = options; + + // incremental snapshot options + this.enableParallelRead = enableParallelRead; + this.splitSize = splitSize; + this.splitMetaGroupSize = splitMetaGroupSize; + this.fetchSize = fetchSize; + this.connectMaxRetries = connectMaxRetries; + this.connectionPoolSize = connectionPoolSize; + this.distributionFactorUpper = distributionFactorUpper; + this.distributionFactorLower = distributionFactorLower; + this.chunkKeyColumn = chunkKeyColumn; + this.chunkKeyColumns = chunkKeyColumns; + this.heartbeatInterval = heartbeatInterval; + this.jdbcDriver = jdbcDriver; + this.connectTimeout = connectTimeout; + this.tableList = tableList; this.hostMapping = hostMapping; this.startupOptions = startupOptions; this.producedDataType = physicalSchema.toPhysicalRowDataType(); - this.options = options; this.metadataKeys = Collections.emptyList(); } @@ -98,40 +167,59 @@ public ChangelogMode getChangelogMode() { @Override public ScanRuntimeProvider getScanRuntimeProvider(ScanContext scanContext) { + // TIDB source builder final TiConfiguration tiConf = - TDBSourceOptions.getTiConfiguration(pdAddresses, hostMapping, options); + TiDBSourceOptions.getTiConfiguration(pdAddresses, hostMapping, options); + RowType physicalDataType = (RowType) physicalSchema.toPhysicalRowDataType().getLogicalType(); + TypeInformation typeInfo = scanContext.createTypeInformation(producedDataType); - TiKVMetadataConverter[] metadataConverters = getMetadataConverters(); + MetadataConverter[] metadataConverters = getMetadataConverters(); - RowDataTiKVSnapshotEventDeserializationSchema snapshotEventDeserializationSchema = - new RowDataTiKVSnapshotEventDeserializationSchema( - tiConf, - database, - tableName, - typeInfo, - metadataConverters, - physicalDataType); + DebeziumDeserializationSchema deserializer = + RowDataDebeziumDeserializeSchema.newBuilder() + .setPhysicalRowType(physicalDataType) + .setMetadataConverters(metadataConverters) + .setResultTypeInfo(typeInfo) + .setServerTimeZone( + serverTimeZone == null + ? ZoneId.systemDefault() + : ZoneId.of(serverTimeZone)) + .setUserDefinedConverterFactory( + TiDBDeserializationConverterFactory.instance()) + .build(); - RowDataTiKVChangeEventDeserializationSchema changeEventDeserializationSchema = - new RowDataTiKVChangeEventDeserializationSchema( - tiConf, - database, - tableName, - typeInfo, - metadataConverters, - physicalDataType); - - TiDBSource.Builder builder = - TiDBSource.builder() - .database(database) - .tableName(tableName) + JdbcIncrementalSource parallelSource = + TiDBSourceBuilder.TiDBIncrementalSource.builder() + .hostname(hostName) + .port(port) + .tiConfiguration(tiConf) + .databaseList(database) + .tableList(database + "\\." + tableName) + .username(username) + .password(password) + .serverTimeZone(serverTimeZone.toString()) + .splitSize(splitSize) + .splitMetaGroupSize(splitMetaGroupSize) + .distributionFactorUpper(distributionFactorUpper) + .distributionFactorLower(distributionFactorLower) + .fetchSize(fetchSize) + .connectTimeout(connectTimeout) + .connectionPoolSize(connectionPoolSize) + .chunkKeyColumn(chunkKeyColumn) + .chunkKeyColumns(chunkKeyColumns) + .driverClassName(jdbcDriver) + .connectMaxRetries(connectMaxRetries) + .jdbcProperties(jdbcProperties) .startupOptions(startupOptions) - .tiConf(tiConf) - .snapshotEventDeserializer(snapshotEventDeserializationSchema) - .changeEventDeserializer(changeEventDeserializationSchema); - return SourceFunctionProvider.of(builder.build(), false); + .pdAddresses(pdAddresses) + .hostMapping(hostMapping) + .deserializer(deserializer) + .build(); + // todo JdbcIncrementalSource parallelSource = + // TiDBSourceBuilder.TiDBIncrementalSource.builder() + return SourceProvider.of(parallelSource); } @Override @@ -139,33 +227,72 @@ public DynamicTableSource copy() { TiDBTableSource source = new TiDBTableSource( physicalSchema, + port, + hostName, database, tableName, + tableList, + username, + password, + serverTimeZone, + jdbcProperties, + enableParallelRead, + heartbeatInterval, pdAddresses, hostMapping, - startupOptions, - options); + connectTimeout, + options, + splitSize, + splitMetaGroupSize, + fetchSize, + connectMaxRetries, + connectionPoolSize, + distributionFactorUpper, + distributionFactorLower, + chunkKeyColumn, + chunkKeyColumns, + jdbcDriver, + startupOptions); source.producedDataType = producedDataType; source.metadataKeys = metadataKeys; + return source; } - private TiKVMetadataConverter[] getMetadataConverters() { + @Override + public String asSummaryString() { + return "TiDB-CDC"; + } + + @Override + public Map listReadableMetadata() { + return Stream.of(TiDBReadableMetadata.values()) + .collect( + Collectors.toMap( + TiDBReadableMetadata::getKey, TiDBReadableMetadata::getDataType)); + } + + @Override + public void applyReadableMetadata(List metadataKeys, DataType producedDataType) { + this.metadataKeys = metadataKeys; + this.producedDataType = producedDataType; + } + + // TiDBMetadataConverter to MetadataConverter + private MetadataConverter[] getMetadataConverters() { if (metadataKeys.isEmpty()) { - return new TiKVMetadataConverter[0]; + return new MetadataConverter[0]; } return metadataKeys.stream() .map( key -> - Stream.of( - TiKVReadableMetadata.createTiKVReadableMetadata( - database, tableName)) + Stream.of(TiDBReadableMetadata.values()) .filter(m -> m.getKey().equals(key)) .findFirst() .orElseThrow(IllegalStateException::new)) - .map(TiKVReadableMetadata::getConverter) - .toArray(TiKVMetadataConverter[]::new); + .map(TiDBReadableMetadata::getConverter) + .toArray(MetadataConverter[]::new); } @Override @@ -177,11 +304,32 @@ public boolean equals(Object o) { return false; } TiDBTableSource that = (TiDBTableSource) o; - return Objects.equals(physicalSchema, that.physicalSchema) - && Objects.equals(database, that.database) + return port == that.port + && enableParallelRead == that.enableParallelRead + && splitSize == that.splitSize + && splitMetaGroupSize == that.splitMetaGroupSize + && fetchSize == that.fetchSize + && connectionPoolSize == that.connectionPoolSize + && connectMaxRetries == that.connectMaxRetries + && Double.compare(that.distributionFactorUpper, distributionFactorUpper) == 0 + && Double.compare(that.distributionFactorLower, distributionFactorLower) == 0 + && physicalSchema.equals(that.physicalSchema) + && startupOptions.equals(that.startupOptions) + && Objects.equals(tableList, that.tableList) && Objects.equals(tableName, that.tableName) + && Objects.equals(connectTimeout, that.connectTimeout) + && Objects.equals(jdbcDriver, that.jdbcDriver) + && Objects.equals(serverTimeZone, that.serverTimeZone) && Objects.equals(pdAddresses, that.pdAddresses) - && Objects.equals(startupOptions, that.startupOptions) + && Objects.equals(hostMapping, that.hostMapping) + && Objects.equals(hostName, that.hostName) + && Objects.equals(database, that.database) + && Objects.equals(username, that.username) + && Objects.equals(password, that.password) + && Objects.equals(heartbeatInterval, that.heartbeatInterval) + && Objects.equals(chunkKeyColumn, that.chunkKeyColumn) + && Objects.equals(chunkKeyColumns, that.chunkKeyColumns) + && Objects.equals(jdbcProperties, that.jdbcProperties) && Objects.equals(options, that.options) && Objects.equals(producedDataType, that.producedDataType) && Objects.equals(metadataKeys, that.metadataKeys); @@ -191,31 +339,33 @@ public boolean equals(Object o) { public int hashCode() { return Objects.hash( physicalSchema, - database, + startupOptions, + tableList, tableName, + connectTimeout, + jdbcDriver, + serverTimeZone, pdAddresses, - startupOptions, + hostMapping, + port, + hostName, + database, + username, + password, + heartbeatInterval, + splitSize, + splitMetaGroupSize, + fetchSize, + connectMaxRetries, + connectionPoolSize, + distributionFactorUpper, + distributionFactorLower, + chunkKeyColumn, + chunkKeyColumns, + jdbcProperties, options, + enableParallelRead, producedDataType, metadataKeys); } - - @Override - public String asSummaryString() { - return "TiDB-CDC"; - } - - @Override - public Map listReadableMetadata() { - return Stream.of(TiKVReadableMetadata.createTiKVReadableMetadata(database, tableName)) - .collect( - Collectors.toMap( - TiKVReadableMetadata::getKey, TiKVReadableMetadata::getDataType)); - } - - @Override - public void applyReadableMetadata(List metadataKeys, DataType producedDataType) { - this.metadataKeys = metadataKeys; - this.producedDataType = producedDataType; - } } diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactory.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactory.java deleted file mode 100644 index b38175b71b5..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactory.java +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb.table; - -import org.apache.flink.cdc.connectors.tidb.table.utils.OptionUtils; -import org.apache.flink.configuration.ConfigOption; -import org.apache.flink.configuration.Configuration; -import org.apache.flink.configuration.ReadableConfig; -import org.apache.flink.table.api.ValidationException; -import org.apache.flink.table.catalog.ResolvedSchema; -import org.apache.flink.table.connector.source.DynamicTableSource; -import org.apache.flink.table.factories.DynamicTableSourceFactory; -import org.apache.flink.table.factories.FactoryUtil; - -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - -import static org.apache.flink.cdc.connectors.tidb.TDBSourceOptions.DATABASE_NAME; -import static org.apache.flink.cdc.connectors.tidb.TDBSourceOptions.HOST_MAPPING; -import static org.apache.flink.cdc.connectors.tidb.TDBSourceOptions.PD_ADDRESSES; -import static org.apache.flink.cdc.connectors.tidb.TDBSourceOptions.SCAN_STARTUP_MODE; -import static org.apache.flink.cdc.connectors.tidb.TDBSourceOptions.TABLE_NAME; -import static org.apache.flink.cdc.connectors.tidb.TDBSourceOptions.TIKV_BATCH_GET_CONCURRENCY; -import static org.apache.flink.cdc.connectors.tidb.TDBSourceOptions.TIKV_BATCH_SCAN_CONCURRENCY; -import static org.apache.flink.cdc.connectors.tidb.TDBSourceOptions.TIKV_GRPC_SCAN_TIMEOUT; -import static org.apache.flink.cdc.connectors.tidb.TDBSourceOptions.TIKV_GRPC_TIMEOUT; -import static org.apache.flink.cdc.debezium.utils.ResolvedSchemaUtils.getPhysicalSchema; - -/** Factory for creating configured instance of {@link TiDBTableSource}. */ -public class TiDBTableSourceFactory implements DynamicTableSourceFactory { - - private static final String IDENTIFIER = "tidb-cdc"; - - @Override - public DynamicTableSource createDynamicTableSource(Context context) { - final FactoryUtil.TableFactoryHelper helper = - FactoryUtil.createTableFactoryHelper(this, context); - - final ReadableConfig config = helper.getOptions(); - String databaseName = config.get(DATABASE_NAME); - String tableName = config.get(TABLE_NAME); - String pdAddresses = config.get(PD_ADDRESSES); - String hostMapping = config.get(HOST_MAPPING); - StartupOptions startupOptions = getStartupOptions(config); - ResolvedSchema physicalSchema = - getPhysicalSchema(context.getCatalogTable().getResolvedSchema()); - - OptionUtils.printOptions(IDENTIFIER, ((Configuration) config).toMap()); - - return new TiDBTableSource( - physicalSchema, - databaseName, - tableName, - pdAddresses, - hostMapping, - startupOptions, - TiKVOptions.getTiKVOptions(context.getCatalogTable().getOptions())); - } - - @Override - public String factoryIdentifier() { - return IDENTIFIER; - } - - @Override - public Set> requiredOptions() { - Set> options = new HashSet<>(); - options.add(DATABASE_NAME); - options.add(TABLE_NAME); - options.add(PD_ADDRESSES); - return options; - } - - @Override - public Set> optionalOptions() { - Set> options = new HashSet<>(); - options.add(SCAN_STARTUP_MODE); - options.add(HOST_MAPPING); - options.add(TIKV_GRPC_TIMEOUT); - options.add(TIKV_GRPC_SCAN_TIMEOUT); - options.add(TIKV_BATCH_GET_CONCURRENCY); - options.add(TIKV_BATCH_SCAN_CONCURRENCY); - return options; - } - - private static final String SCAN_STARTUP_MODE_VALUE_INITIAL = "initial"; - private static final String SCAN_STARTUP_MODE_VALUE_LATEST = "latest-offset"; - - private static StartupOptions getStartupOptions(ReadableConfig config) { - String modeString = config.get(SCAN_STARTUP_MODE); - - switch (modeString.toLowerCase()) { - case SCAN_STARTUP_MODE_VALUE_INITIAL: - return StartupOptions.initial(); - - case SCAN_STARTUP_MODE_VALUE_LATEST: - return StartupOptions.latest(); - - default: - throw new ValidationException( - String.format( - "Invalid value for option '%s'. Supported values are [%s, %s], but was: %s", - SCAN_STARTUP_MODE.key(), - SCAN_STARTUP_MODE_VALUE_INITIAL, - SCAN_STARTUP_MODE_VALUE_LATEST, - modeString)); - } - } - - static class TiKVOptions { - private static final String TIKV_OPTIONS_PREFIX = "tikv."; - - public static Map getTiKVOptions(Map properties) { - Map tikvOptions = new HashMap<>(); - - if (hasTiKVOptions(properties)) { - properties.keySet().stream() - .filter(key -> key.startsWith(TIKV_OPTIONS_PREFIX)) - .forEach( - key -> { - final String value = properties.get(key); - tikvOptions.put(key, value); - }); - } - return tikvOptions; - } - - /** - * Decides if the table options contains Debezium client properties that start with prefix - * 'debezium'. - */ - private static boolean hasTiKVOptions(Map options) { - return options.keySet().stream().anyMatch(k -> k.startsWith(TIKV_OPTIONS_PREFIX)); - } - } -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiKVAppendMetadataCollector.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiKVAppendMetadataCollector.java deleted file mode 100644 index 43904f3aaaa..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiKVAppendMetadataCollector.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb.table; - -import org.apache.flink.table.data.GenericRowData; -import org.apache.flink.table.data.RowData; -import org.apache.flink.table.data.utils.JoinedRowData; -import org.apache.flink.util.Collector; - -import java.io.Serializable; - -/** Emits a row with physical fields and metadata fields. */ -public class TiKVAppendMetadataCollector implements Collector, Serializable { - - private static final long serialVersionUID = 1L; - - private final TiKVMetadataConverter[] metadataConverters; - - public transient TiKVMetadataConverter.TiKVRowValue row; - public transient Collector outputCollector; - - public TiKVAppendMetadataCollector(TiKVMetadataConverter[] metadataConverters) { - this.metadataConverters = metadataConverters; - } - - @Override - public void collect(RowData physicalRow) { - GenericRowData metaRow = new GenericRowData(metadataConverters.length); - for (int i = 0; i < metadataConverters.length; i++) { - Object meta = metadataConverters[i].read(row); - metaRow.setField(i, meta); - } - RowData outRow = new JoinedRowData(physicalRow.getRowKind(), physicalRow, metaRow); - outputCollector.collect(outRow); - } - - @Override - public void close() { - // nothing to do - } -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiKVMetadataConverter.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiKVMetadataConverter.java deleted file mode 100644 index 891fffb2943..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiKVMetadataConverter.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb.table; - -import org.apache.flink.cdc.common.annotation.Internal; - -import org.tikv.kvproto.Cdcpb; -import org.tikv.kvproto.Kvrpcpb; - -import java.io.Serializable; - -/** A converter converts TiKV Row metadata into Flink internal data structures. */ -@FunctionalInterface -@Internal -public interface TiKVMetadataConverter extends Serializable { - - Object read(TiKVRowValue row); - - /** TiKV Row Value. */ - class TiKVRowValue { - public boolean isSnapshotRecord; - public Kvrpcpb.KvPair kvPair; - public Cdcpb.Event.Row row; - - public TiKVRowValue(Kvrpcpb.KvPair kvPair) { - this.isSnapshotRecord = true; - this.kvPair = kvPair; - } - - public TiKVRowValue(Cdcpb.Event.Row row) { - this.isSnapshotRecord = false; - this.row = row; - } - } -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiKVReadableMetadata.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiKVReadableMetadata.java deleted file mode 100644 index a617347e283..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/TiKVReadableMetadata.java +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb.table; - -import org.apache.flink.table.api.DataTypes; -import org.apache.flink.table.data.StringData; -import org.apache.flink.table.data.TimestampData; -import org.apache.flink.table.types.DataType; - -import java.util.ArrayList; -import java.util.List; - -/** Defines the supported metadata columns for {@link TiDBTableSource}. */ -public class TiKVReadableMetadata { - - private final String key; - - private final DataType dataType; - - private final TiKVMetadataConverter converter; - - TiKVReadableMetadata(String key, DataType dataType, TiKVMetadataConverter converter) { - this.key = key; - this.dataType = dataType; - this.converter = converter; - } - - public String getKey() { - return key; - } - - public DataType getDataType() { - return dataType; - } - - public TiKVMetadataConverter getConverter() { - return converter; - } - - /** Name of the table that contain the row. */ - public static TiKVReadableMetadata createTableNameMetadata(String tableName) { - return new TiKVReadableMetadata( - "table_name", - DataTypes.STRING().notNull(), - new TiKVMetadataConverter() { - private static final long serialVersionUID = 1L; - - @Override - public Object read(TiKVRowValue row) { - return StringData.fromString(tableName); - } - }); - } - - /** Name of the database that contain the row. */ - public static TiKVReadableMetadata createDatabaseNameMetadata(String database) { - return new TiKVReadableMetadata( - "database_name", - DataTypes.STRING().notNull(), - new TiKVMetadataConverter() { - private static final long serialVersionUID = 1L; - - @Override - public Object read(TiKVRowValue row) { - return StringData.fromString(database); - } - }); - } - - /** - * It indicates the time that the change was made in the database. If the record is read from - * snapshot of the table instead of the change stream, the value is always 0. - */ - public static TiKVReadableMetadata createOpTsMetadata() { - return new TiKVReadableMetadata( - "op_ts", - DataTypes.TIMESTAMP_LTZ(3).notNull(), - new TiKVMetadataConverter() { - private static final long serialVersionUID = 1L; - - @Override - public Object read(TiKVRowValue row) { - if (row.isSnapshotRecord) { - // Uses OL as the operation time of snapshot records. - return TimestampData.fromEpochMillis(0L); - } else { - return TimestampData.fromEpochMillis(row.row.getStartTs()); - } - } - }); - } - - public static TiKVReadableMetadata[] createTiKVReadableMetadata( - String database, String tableName) { - List list = new ArrayList<>(); - list.add(createDatabaseNameMetadata(database)); - list.add(createTableNameMetadata(tableName)); - list.add(createOpTsMetadata()); - return list.toArray(new TiKVReadableMetadata[0]); - } -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/utils/OptionUtils.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/utils/OptionUtils.java deleted file mode 100644 index 1b2be59b1f8..00000000000 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/utils/OptionUtils.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.cdc.connectors.tidb.table.utils; - -import org.apache.flink.configuration.ConfigurationUtils; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.Map; - -/** A utility class to print configuration of connectors. */ -public class OptionUtils { - - private static final Logger LOG = LoggerFactory.getLogger(OptionUtils.class); - - /** Utility class can not be instantiated. */ - private OptionUtils() {} - - public static void printOptions(String identifier, Map config) { - Map hideMap = ConfigurationUtils.hideSensitiveValues(config); - LOG.info("Print {} connector configuration:", identifier); - for (String key : hideMap.keySet()) { - LOG.info("{} = {}", key, hideMap.get(key)); - } - } -} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TableDiscoveryUtils.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TableDiscoveryUtils.java new file mode 100644 index 00000000000..0c74c81a8d4 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TableDiscoveryUtils.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.utils; + +import io.debezium.jdbc.JdbcConnection; +import io.debezium.relational.RelationalTableFilters; +import io.debezium.relational.TableId; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +/** Utils for table discovery. */ +public class TableDiscoveryUtils { + private static final Logger LOG = LoggerFactory.getLogger(TableDiscoveryUtils.class); + + public static List listTables( + String database, JdbcConnection jdbc, RelationalTableFilters tableFilters) + throws SQLException { + + Set allTableIds = + jdbc.readTableNames(database, null, null, new String[] {"TABLE"}); + + Set capturedTables = + allTableIds.stream() + .filter(t -> tableFilters.dataCollectionFilter().isIncluded(t)) + .collect(Collectors.toSet()); + LOG.info("listTables include parameters:database:{}", database); + LOG.info( + "TiDB captured tables : {} .", + capturedTables.stream().map(TableId::toString).collect(Collectors.joining(","))); + + return new ArrayList<>(capturedTables); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/utils/TableKeyRangeUtils.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TableKeyRangeUtils.java similarity index 98% rename from flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/utils/TableKeyRangeUtils.java rename to flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TableKeyRangeUtils.java index a76b787b985..d67ccb81259 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/utils/TableKeyRangeUtils.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TableKeyRangeUtils.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.flink.cdc.connectors.tidb.table.utils; +package org.apache.flink.cdc.connectors.tidb.utils; import org.apache.flink.util.Preconditions; diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TiDBConnectionUtils.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TiDBConnectionUtils.java new file mode 100644 index 00000000000..d3f8a5838af --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TiDBConnectionUtils.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.utils; + +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; +import org.apache.flink.cdc.connectors.tidb.source.converter.TiDBValueConverters; +import org.apache.flink.util.FlinkRuntimeException; + +import io.debezium.connector.mysql.MySqlSystemVariables; +import io.debezium.jdbc.JdbcConnection; +import io.debezium.jdbc.JdbcValueConverters; +import io.debezium.jdbc.TemporalPrecisionMode; + +import java.sql.SQLException; +import java.util.HashMap; +import java.util.Map; + +/** Utils to obtain the connection of TiDB. */ +public class TiDBConnectionUtils { + + public static boolean isTableIdCaseInsensitive(JdbcConnection connection) { + return !"0" + .equals( + readMySqlSystemVariables(connection) + .get(MySqlSystemVariables.LOWER_CASE_TABLE_NAMES)); + } + + public static Map readMySqlSystemVariables(JdbcConnection connection) { + // Read the system variables from the MySQL instance and get the current database name ... + return querySystemVariables(connection, "SHOW VARIABLES"); + } + + private static Map querySystemVariables( + JdbcConnection connection, String statement) { + final Map variables = new HashMap<>(); + try { + connection.query( + statement, + rs -> { + while (rs.next()) { + String varName = rs.getString(1); + String value = rs.getString(2); + if (varName != null && value != null) { + variables.put(varName, value); + } + } + }); + } catch (SQLException e) { + throw new FlinkRuntimeException("Error reading TiDB variables: " + e.getMessage(), e); + } + + return variables; + } + + // MysqlValueConverters + public static TiDBValueConverters getValueConverters(TiDBConnectorConfig dbzTiDBConfig) { + TemporalPrecisionMode timePrecisionMode = dbzTiDBConfig.getTemporalPrecisionMode(); + JdbcValueConverters.DecimalMode decimalMode = dbzTiDBConfig.getDecimalMode(); + String bigIntUnsignedHandlingModeStr = + dbzTiDBConfig.getConfig().getString(dbzTiDBConfig.BIGINT_UNSIGNED_HANDLING_MODE); + TiDBConnectorConfig.BigIntUnsignedHandlingMode bigIntUnsignedHandlingMode = + TiDBConnectorConfig.BigIntUnsignedHandlingMode.parse(bigIntUnsignedHandlingModeStr); + JdbcValueConverters.BigIntUnsignedMode bigIntUnsignedMode = + bigIntUnsignedHandlingMode.asBigIntUnsignedMode(); + + boolean timeAdjusterEnabled = + dbzTiDBConfig.getConfig().getBoolean(dbzTiDBConfig.ENABLE_TIME_ADJUSTER); + + return new TiDBValueConverters( + decimalMode, + timePrecisionMode, + bigIntUnsignedMode, + dbzTiDBConfig.binaryHandlingMode(), + timeAdjusterEnabled ? TiDBValueConverters::adjustTemporal : x -> x, + TiDBValueConverters::defaultParsingErrorHandler); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TiDBUtils.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TiDBUtils.java new file mode 100644 index 00000000000..c796284a5d8 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/TiDBUtils.java @@ -0,0 +1,470 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.utils; + +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBConnectorConfig; +import org.apache.flink.cdc.connectors.tidb.source.connection.TiDBConnection; +import org.apache.flink.cdc.connectors.tidb.source.converter.TiDBValueConverters; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffset; +import org.apache.flink.cdc.connectors.tidb.source.schema.TiDBDatabaseSchema; +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.api.ValidationException; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.util.FlinkRuntimeException; + +import io.debezium.jdbc.JdbcConnection; +import io.debezium.relational.Column; +import io.debezium.relational.Table; +import io.debezium.relational.TableId; +import io.debezium.schema.TopicSelector; +import org.tikv.common.meta.TiTimestamp; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.util.Iterator; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +import static org.apache.flink.cdc.connectors.tidb.utils.TiDBConnectionUtils.getValueConverters; +import static org.apache.flink.table.api.DataTypes.FIELD; +import static org.apache.flink.table.api.DataTypes.ROW; + +/** Utils for TiDB. */ +public class TiDBUtils { + private static final String BIT = "BIT"; + private static final String TINYINT = "TINYINT"; + private static final String TINYINT_UNSIGNED = "TINYINT UNSIGNED"; + private static final String SMALLINT = "SMALLINT"; + private static final String SMALLINT_UNSIGNED = "SMALLINT UNSIGNED"; + private static final String MEDIUMINT = "MEDIUMINT"; + private static final String MEDIUMINT_UNSIGNED = "MEDIUMINT UNSIGNED"; + private static final String INT = "INT"; + private static final String INT_UNSIGNED = "INT UNSIGNED"; + private static final String BIGINT = "BIGINT"; + private static final String BIGINT_UNSIGNED = "BIGINT UNSIGNED"; + private static final String FLOAT = "FLOAT"; + private static final String FLOAT_UNSIGNED = "FLOAT UNSIGNED"; + private static final String DOUBLE = "DOUBLE"; + private static final String DOUBLE_UNSIGNED = "DOUBLE UNSIGNED"; + private static final String DECIMAL = "DECIMAL"; + private static final String DECIMAL_UNSIGNED = "DECIMAL UNSIGNED"; + private static final String CHAR = "CHAR"; + private static final String VARCHAR = "VARCHAR"; + private static final String TINYTEXT = "TINYTEXT"; + private static final String MEDIUMTEXT = "MEDIUMTEXT"; + private static final String TEXT = "TEXT"; + private static final String LONGTEXT = "LONGTEXT"; + private static final String DATE = "DATE"; + private static final String TIME = "TIME"; + private static final String DATETIME = "DATETIME"; + private static final String TIMESTAMP = "TIMESTAMP"; + private static final String YEAR = "YEAR"; + private static final String BINARY = "BINARY"; + private static final String VARBINARY = "VARBINARY"; + private static final String TINYBLOB = "TINYBLOB"; + private static final String MEDIUMBLOB = "MEDIUMBLOB"; + private static final String BLOB = "BLOB"; + private static final String LONGBLOB = "LONGBLOB"; + private static final String JSON = "JSON"; + private static final String SET = "SET"; + private static final String ENUM = "ENUM"; + private static final String GEOMETRY = "GEOMETRY"; + private static final String UNKNOWN = "UNKNOWN"; + + public static Object queryNextChunkMax( + JdbcConnection jdbc, + TableId tableId, + String splitColumnName, + int chunkSize, + Object includedLowerBound) + throws SQLException { + String quotedColumn = jdbc.quotedColumnIdString(splitColumnName); + String query = + String.format( + "SELECT MAX(%s) FROM (" + + "SELECT %s FROM %s WHERE %s >= ? ORDER BY %s ASC LIMIT %s" + + ") AS T", + quotedColumn, + quotedColumn, + jdbc.quotedTableIdString(tableId), + quotedColumn, + quotedColumn, + chunkSize); + return jdbc.prepareQueryAndMap( + query, + ps -> ps.setObject(1, includedLowerBound), + rs -> { + if (!rs.next()) { + // this should never happen + throw new SQLException( + String.format( + "No result returned after running query [%s]", query)); + } + return rs.getObject(1); + }); + } + + public static long queryApproximateRowCnt(JdbcConnection jdbc, TableId tableId) + throws SQLException { + // The statement used to get approximate row count which is less + // accurate than COUNT(*), but is more efficient for large table. + final String useDatabaseStatement = String.format("USE %s;", quote(tableId.catalog())); + final String rowCountQuery = String.format("SHOW TABLE STATUS LIKE '%s';", tableId.table()); + jdbc.execute(useDatabaseStatement); + return jdbc.queryAndMap( + rowCountQuery, + rs -> { + if (!rs.next() || rs.getMetaData().getColumnCount() < 5) { + throw new SQLException( + String.format( + "No result returned after running query [%s]", + rowCountQuery)); + } + return rs.getLong(5); + }); + } + + public static DataType fromDbzColumn(Column column) { + DataType dataType = convertFromColumn(column); + if (column.isOptional()) { + return dataType; + } else { + return dataType.notNull(); + } + } + + private static DataType convertFromColumn(Column column) { + String typeName = column.typeName(); + switch (typeName) { + case TINYINT: + return column.length() == 1 ? DataTypes.BOOLEAN() : DataTypes.TINYINT(); + case TINYINT_UNSIGNED: + case SMALLINT: + return DataTypes.SMALLINT(); + case SMALLINT_UNSIGNED: + case INT: + case MEDIUMINT: + return DataTypes.INT(); + case INT_UNSIGNED: + case MEDIUMINT_UNSIGNED: + case BIGINT: + return DataTypes.BIGINT(); + case BIGINT_UNSIGNED: + return DataTypes.DECIMAL(20, 0); + case FLOAT: + return DataTypes.FLOAT(); + case DOUBLE: + return DataTypes.DOUBLE(); + case DECIMAL: + return DataTypes.DECIMAL(column.length(), column.scale().orElse(0)); + case TIME: + return column.length() >= 0 ? DataTypes.TIME(column.length()) : DataTypes.TIME(); + case DATE: + return DataTypes.DATE(); + case DATETIME: + case TIMESTAMP: + return column.length() >= 0 + ? DataTypes.TIMESTAMP(column.length()) + : DataTypes.TIMESTAMP(); + case CHAR: + return DataTypes.CHAR(column.length()); + case VARCHAR: + return DataTypes.VARCHAR(column.length()); + case TEXT: + return DataTypes.STRING(); + case BINARY: + return DataTypes.BINARY(column.length()); + case VARBINARY: + return DataTypes.VARBINARY(column.length()); + case BLOB: + return DataTypes.BYTES(); + default: + throw new UnsupportedOperationException( + String.format("Don't support MySQL type '%s' yet.", typeName)); + } + } + + public static String quote(String dbOrTableName) { + return "`" + dbOrTableName + "`"; + } + + public static String quote(TableId tableId) { + return tableId.toQuotedString('`'); + } + + public static PreparedStatement readTableSplitDataStatement( + JdbcConnection jdbc, + String sql, + boolean isFirstSplit, + boolean isLastSplit, + Object[] splitStart, + Object[] splitEnd, + int primaryKeyNum, + int fetchSize) { + try { + final PreparedStatement statement = initStatement(jdbc, sql, fetchSize); + if (isFirstSplit && isLastSplit) { + return statement; + } + if (isFirstSplit) { + for (int i = 0; i < primaryKeyNum; i++) { + statement.setObject(i + 1, splitEnd[i]); + statement.setObject(i + 1 + primaryKeyNum, splitEnd[i]); + } + } else if (isLastSplit) { + for (int i = 0; i < primaryKeyNum; i++) { + statement.setObject(i + 1, splitStart[i]); + } + } else { + for (int i = 0; i < primaryKeyNum; i++) { + statement.setObject(i + 1, splitStart[i]); + statement.setObject(i + 1 + primaryKeyNum, splitEnd[i]); + statement.setObject(i + 1 + 2 * primaryKeyNum, splitEnd[i]); + } + } + return statement; + } catch (Exception e) { + throw new RuntimeException("Failed to build the split data read statement.", e); + } + } + + private static PreparedStatement initStatement(JdbcConnection jdbc, String sql, int fetchSize) + throws SQLException { + final Connection connection = jdbc.connection(); + connection.setAutoCommit(false); + final PreparedStatement statement = connection.prepareStatement(sql); + statement.setFetchSize(fetchSize); + return statement; + } + + public static String buildSplitScanQuery( + TableId tableId, RowType pkRowType, boolean isFirstSplit, boolean isLastSplit) { + return buildSplitQuery(tableId, pkRowType, isFirstSplit, isLastSplit, -1, true); + } + + private static String buildSplitQuery( + TableId tableId, + RowType pkRowType, + boolean isFirstSplit, + boolean isLastSplit, + int limitSize, + boolean isScanningData) { + final String condition; + + if (isFirstSplit && isLastSplit) { + condition = null; + } else if (isFirstSplit) { + final StringBuilder sql = new StringBuilder(); + addPrimaryKeyColumnsToCondition(pkRowType, sql, " <= ?"); + if (isScanningData) { + sql.append(" AND NOT ("); + addPrimaryKeyColumnsToCondition(pkRowType, sql, " = ?"); + sql.append(")"); + } + condition = sql.toString(); + } else if (isLastSplit) { + final StringBuilder sql = new StringBuilder(); + addPrimaryKeyColumnsToCondition(pkRowType, sql, " >= ?"); + condition = sql.toString(); + } else { + final StringBuilder sql = new StringBuilder(); + addPrimaryKeyColumnsToCondition(pkRowType, sql, " >= ?"); + if (isScanningData) { + sql.append(" AND NOT ("); + addPrimaryKeyColumnsToCondition(pkRowType, sql, " = ?"); + sql.append(")"); + } + sql.append(" AND "); + addPrimaryKeyColumnsToCondition(pkRowType, sql, " <= ?"); + condition = sql.toString(); + } + + if (isScanningData) { + return buildSelectWithRowLimits( + tableId, limitSize, "*", Optional.ofNullable(condition), Optional.empty()); + } else { + final String orderBy = + pkRowType.getFieldNames().stream().collect(Collectors.joining(", ")); + return buildSelectWithBoundaryRowLimits( + tableId, + limitSize, + getPrimaryKeyColumnsProjection(pkRowType), + getMaxPrimaryKeyColumnsProjection(pkRowType), + Optional.ofNullable(condition), + orderBy); + } + } + + private static void addPrimaryKeyColumnsToCondition( + RowType pkRowType, StringBuilder sql, String predicate) { + for (Iterator fieldNamesIt = pkRowType.getFieldNames().iterator(); + fieldNamesIt.hasNext(); ) { + sql.append(fieldNamesIt.next()).append(predicate); + if (fieldNamesIt.hasNext()) { + sql.append(" AND "); + } + } + } + + private static String buildSelectWithBoundaryRowLimits( + TableId tableId, + int limit, + String projection, + String maxColumnProjection, + Optional condition, + String orderBy) { + final StringBuilder sql = new StringBuilder("SELECT "); + sql.append(maxColumnProjection); + sql.append(" FROM ("); + sql.append("SELECT "); + sql.append(projection); + sql.append(" FROM "); + sql.append(quotedTableIdString(tableId)); + if (condition.isPresent()) { + sql.append(" WHERE ").append(condition.get()); + } + sql.append(" ORDER BY ").append(orderBy).append(" LIMIT ").append(limit); + sql.append(") T"); + return sql.toString(); + } + + private static String quotedTableIdString(TableId tableId) { + return tableId.toQuotedString('`'); + } + + private static String buildSelectWithRowLimits( + TableId tableId, + int limit, + String projection, + Optional condition, + Optional orderBy) { + final StringBuilder sql = new StringBuilder("SELECT "); + sql.append(projection).append(" FROM "); + sql.append(quotedTableIdString(tableId)); + if (condition.isPresent()) { + sql.append(" WHERE ").append(condition.get()); + } + if (orderBy.isPresent()) { + sql.append(" ORDER BY ").append(orderBy.get()); + } + if (limit > 0) { + sql.append(" LIMIT ").append(limit); + } + return sql.toString(); + } + + private static String getPrimaryKeyColumnsProjection(RowType pkRowType) { + StringBuilder sql = new StringBuilder(); + for (Iterator fieldNamesIt = pkRowType.getFieldNames().iterator(); + fieldNamesIt.hasNext(); ) { + sql.append(fieldNamesIt.next()); + if (fieldNamesIt.hasNext()) { + sql.append(" , "); + } + } + return sql.toString(); + } + + private static String getMaxPrimaryKeyColumnsProjection(RowType pkRowType) { + StringBuilder sql = new StringBuilder(); + for (Iterator fieldNamesIt = pkRowType.getFieldNames().iterator(); + fieldNamesIt.hasNext(); ) { + sql.append("MAX(" + fieldNamesIt.next() + ")"); + if (fieldNamesIt.hasNext()) { + sql.append(" , "); + } + } + return sql.toString(); + } + + public static EventOffset currentBinlogOffset(JdbcConnection jdbc) { + final String showMasterStmt = "SHOW MASTER STATUS"; + try { + return jdbc.queryAndMap( + showMasterStmt, + rs -> { + if (rs.next()) { + final long eventPosition = rs.getLong(2); + return new EventOffset( + String.valueOf(TiTimestamp.extractPhysical(eventPosition)), + String.valueOf(eventPosition)); + } else { + throw new FlinkRuntimeException( + "Cannot read the binlog filename and position via '" + + showMasterStmt + + "'. Make sure your server is correctly configured"); + } + }); + } catch (SQLException e) { + throw new FlinkRuntimeException( + "Cannot read the binlog filename and position via '" + + showMasterStmt + + "'. Make sure your server is correctly configured", + e); + } + } + + public static TiDBDatabaseSchema newSchema( + TiDBConnection connection, + TiDBConnectorConfig config, + TopicSelector topicSelector, + boolean isTableIdCaseSensitive) + throws SQLException { + // Key.KeyMapper customKeysMapper = new CustomeKeyMapper(); + TiDBValueConverters valueConverters = getValueConverters(config); + TiDBDatabaseSchema schema = + new TiDBDatabaseSchema( + config, valueConverters, topicSelector, isTableIdCaseSensitive); + schema.refresh(connection, config, false); + return schema; + } + + public static TiDBDatabaseSchema createTiDBDatabaseSchema( + TiDBConnectorConfig dbzTiDBConfig, + TopicSelector topicSelector, + boolean isTableIdCaseSensitive) { + TiDBValueConverters valueConverters = getValueConverters(dbzTiDBConfig); + TiDBDatabaseSchema tiDBDatabaseSchema = + new TiDBDatabaseSchema( + dbzTiDBConfig, valueConverters, topicSelector, isTableIdCaseSensitive); + return tiDBDatabaseSchema; + } + + public static RowType getSplitType(Table table) { + List primaryKeys = table.primaryKeyColumns(); + if (primaryKeys.isEmpty()) { + throw new ValidationException( + String.format( + "Incremental snapshot for tables requires primary key," + + " but table %s doesn't have primary key.", + table.id())); + } + + // use first field in primary key as the split key + return getSplitType(primaryKeys.get(0)); + } + + public static RowType getSplitType(Column splitColumn) { + return (RowType) + ROW(FIELD(splitColumn.name(), TiDBUtils.fromDbzColumn(splitColumn))) + .getLogicalType(); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/utils/UriHostMapping.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/UriHostMapping.java similarity index 98% rename from flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/utils/UriHostMapping.java rename to flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/UriHostMapping.java index c1bf04ae8e2..42ccbb087d5 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/table/utils/UriHostMapping.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/java/org/apache/flink/cdc/connectors/tidb/utils/UriHostMapping.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.flink.cdc.connectors.tidb.table.utils; +package org.apache.flink.cdc.connectors.tidb.utils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory index 606391e6953..a54a19fce70 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -org.apache.flink.cdc.connectors.tidb.table.TiDBTableSourceFactory \ No newline at end of file +org.apache.flink.cdc.connectors.tidb.table.TiDBTableFactory \ No newline at end of file diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/TiDBTestBase.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/TiDBTestBase.java index 525fd2a28e4..14b8f6b6b94 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/TiDBTestBase.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/TiDBTestBase.java @@ -17,6 +17,7 @@ package org.apache.flink.cdc.connectors.tidb; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceConfigFactory; import org.apache.flink.test.util.AbstractTestBase; import com.alibaba.dcm.DnsCacheManipulator; @@ -32,7 +33,6 @@ import org.testcontainers.containers.GenericContainer; import org.testcontainers.containers.Network; import org.testcontainers.containers.output.Slf4jLogConsumer; -import org.testcontainers.junit.jupiter.Container; import org.testcontainers.lifecycle.Startables; import java.net.URL; @@ -70,7 +70,6 @@ public class TiDBTestBase extends AbstractTestBase { public static final Network NETWORK = Network.newNetwork(); - @Container public static final GenericContainer PD = new FixedHostPortGenericContainer<>("pingcap/pd:v6.1.0") .withFileSystemBind("src/test/resources/config/pd.toml", "/pd.toml") @@ -90,7 +89,6 @@ public class TiDBTestBase extends AbstractTestBase { .withStartupTimeout(Duration.ofSeconds(120)) .withLogConsumer(new Slf4jLogConsumer(LOG)); - @Container public static final GenericContainer TIKV = new FixedHostPortGenericContainer<>("pingcap/tikv:v6.1.0") .withFixedExposedPort(TIKV_PORT_ORIGIN, TIKV_PORT_ORIGIN) @@ -108,7 +106,6 @@ public class TiDBTestBase extends AbstractTestBase { .withStartupTimeout(Duration.ofSeconds(120)) .withLogConsumer(new Slf4jLogConsumer(LOG)); - @Container public static final GenericContainer TIDB = new GenericContainer<>("pingcap/tidb:v6.1.0") .withExposedPorts(TIDB_PORT) @@ -212,4 +209,28 @@ protected void initializeTidbTable(String sqlFile) { throw new RuntimeException(e); } } + + protected TiDBSourceConfigFactory getMockTiDBSourceConfigFactory( + String database, String schemaName, String tableName, int splitSize) { + return getMockTiDBSourceConfigFactory(database, schemaName, tableName, splitSize, false); + } + + protected TiDBSourceConfigFactory getMockTiDBSourceConfigFactory( + String database, + String schemaName, + String tableName, + int splitSize, + boolean skipSnapshotBackfill) { + + TiDBSourceConfigFactory tiDBSourceConfigFactory = new TiDBSourceConfigFactory(); + tiDBSourceConfigFactory.hostname(TIDB.getContainerIpAddress()); + tiDBSourceConfigFactory.port(TIDB.getMappedPort(TIDB_PORT)); + tiDBSourceConfigFactory.username(TIDB_USER); + tiDBSourceConfigFactory.password(TIDB_PASSWORD); + tiDBSourceConfigFactory.databaseList(database); + tiDBSourceConfigFactory.tableList(database + "." + tableName); + tiDBSourceConfigFactory.splitSize(splitSize); + tiDBSourceConfigFactory.skipSnapshotBackfill(skipSnapshotBackfill); + return tiDBSourceConfigFactory; + } } diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/TiDBDialectTest.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/TiDBDialectTest.java new file mode 100644 index 00000000000..d5574a0f276 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/TiDBDialectTest.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source; + +import org.apache.flink.cdc.connectors.tidb.TiDBTestBase; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceConfigFactory; + +import io.debezium.relational.TableId; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.List; + +/** Test for {@link TiDBDialect}. */ +public class TiDBDialectTest extends TiDBTestBase { + private static final String databaseName = "customer"; + private static final String tableName = "customers"; + + @Test + public void testDiscoverDataCollectionsInMultiDatabases() { + initializeTidbTable("customer"); + TiDBSourceConfigFactory configFactoryOfCustomDatabase = + getMockTiDBSourceConfigFactory(databaseName, null, tableName, 10); + + TiDBDialect dialectOfcustomDatabase = + new TiDBDialect(configFactoryOfCustomDatabase.create(0)); + List tableIdsOfcustomDatabase = + dialectOfcustomDatabase.discoverDataCollections( + configFactoryOfCustomDatabase.create(0)); + Assertions.assertThat(tableIdsOfcustomDatabase.get(0).toString()) + .isEqualTo("customer.customers"); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/TiDBSourceExampleTest.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/TiDBSourceExampleTest.java new file mode 100644 index 00000000000..d1e4161d96f --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/TiDBSourceExampleTest.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source; + +import org.apache.flink.api.common.eventtime.WatermarkStrategy; +import org.apache.flink.cdc.connectors.base.source.jdbc.JdbcIncrementalSource; +import org.apache.flink.cdc.connectors.tidb.TiDBTestBase; +import org.apache.flink.cdc.debezium.DebeziumDeserializationSchema; +import org.apache.flink.cdc.debezium.table.RowDataDebeziumDeserializeSchema; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.conversion.RowRowConverter; +import org.apache.flink.table.runtime.typeutils.InternalTypeInfo; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.utils.TypeConversions; +import org.apache.flink.util.CloseableIterator; + +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +/** Tests for TiDB Source based on incremental snapshot framework . */ +public class TiDBSourceExampleTest extends TiDBTestBase { + + private static final String databaseName = "inventory"; + private static final String tableName = "products"; + + @Test + public void testConsumingScanEvents() throws Exception { + final DataType dataType = + DataTypes.ROW( + DataTypes.FIELD("id", DataTypes.BIGINT()), + DataTypes.FIELD("name", DataTypes.STRING()), + DataTypes.FIELD("description", DataTypes.STRING()), + DataTypes.FIELD("weight", DataTypes.FLOAT())); + + initializeTidbTable("inventory"); + + JdbcIncrementalSource tiDBIncrementalSource = + TiDBSourceBuilder.TiDBIncrementalSource.builder() + .hostname(TIDB.getHost()) + .port(TIDB.getMappedPort(TIDB_PORT)) + .username(TiDBTestBase.TIDB_USER) + .password(TiDBTestBase.TIDB_PASSWORD) + .databaseList(databaseName) + .tableList(this.databaseName + "." + this.tableName) + .splitSize(10) + .deserializer(buildRowDataDebeziumDeserializeSchema(dataType)) + .build(); + + StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + + CloseableIterator iterator = + env.fromSource( + tiDBIncrementalSource, + WatermarkStrategy.noWatermarks(), + "TiDBParallelSource") + .setParallelism(2) + .executeAndCollect(); // collect record + + String[] snapshotExpectedRecords = + new String[] { + "+I[101, scooter, Small 2-wheel scooter, 3.14]", + "+I[102, car battery, 12V car battery, 8.1]", + "+I[103, 12-pack drill bits, 12-pack of drill bits with sizes ranging from #40 to #3, 0.8]", + "+I[104, hammer, 12oz carpenter's hammer, 0.75]", + "+I[105, hammer, 14oz carpenter's hammer, 0.875]", + "+I[106, hammer, 16oz carpenter's hammer, 1.0]", + "+I[107, rocks, box of assorted rocks, 5.3]", + "+I[108, jacket, water resistent black wind breaker, 0.1]", + "+I[109, spare tire, 24 inch spare tire, 22.2]" + }; + + // step-1: consume snapshot data + List snapshotRowDataList = new ArrayList<>(); + for (int i = 0; i < snapshotExpectedRecords.length && iterator.hasNext(); i++) { + snapshotRowDataList.add(iterator.next()); + } + + List snapshotActualRecords = formatResult(snapshotRowDataList, dataType); + assertEqualsInAnyOrder(Arrays.asList(snapshotExpectedRecords), snapshotActualRecords); + } + + private DebeziumDeserializationSchema buildRowDataDebeziumDeserializeSchema( + DataType dataType) { + LogicalType logicalType = TypeConversions.fromDataToLogicalType(dataType); + InternalTypeInfo typeInfo = InternalTypeInfo.of(logicalType); + return RowDataDebeziumDeserializeSchema.newBuilder() + .setPhysicalRowType((RowType) dataType.getLogicalType()) + .setResultTypeInfo(typeInfo) + .build(); + } + + private List formatResult(List records, DataType dataType) { + RowRowConverter rowRowConverter = RowRowConverter.create(dataType); + rowRowConverter.open(Thread.currentThread().getContextClassLoader()); + return records.stream() + .map(rowRowConverter::toExternal) + .map(Object::toString) + .collect(Collectors.toList()); + } + + public static void assertEqualsInAnyOrder(List expected, List actual) { + Assertions.assertThat(expected != null && actual != null).isTrue(); + assertEqualsInOrder( + expected.stream().sorted().collect(Collectors.toList()), + actual.stream().sorted().collect(Collectors.toList())); + } + + public static void assertEqualsInOrder(List expected, List actual) { + Assertions.assertThat(expected != null && actual != null).isTrue(); + Assertions.assertThat(expected.size()).isEqualTo(actual.size()); + Assertions.assertThat(expected.toArray(new String[0])) + .isEqualTo(actual.toArray(new String[0])); + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBScanFetchTaskTest.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBScanFetchTaskTest.java new file mode 100644 index 00000000000..c88d4e4f65b --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/fetch/TiDBScanFetchTaskTest.java @@ -0,0 +1,348 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.fetch; + +import org.apache.flink.cdc.connectors.base.dialect.JdbcDataSourceDialect; +import org.apache.flink.cdc.connectors.base.source.assigner.splitter.ChunkSplitter; +import org.apache.flink.cdc.connectors.base.source.meta.split.SnapshotSplit; +import org.apache.flink.cdc.connectors.base.source.meta.split.SourceRecords; +import org.apache.flink.cdc.connectors.base.source.meta.split.SourceSplitBase; +import org.apache.flink.cdc.connectors.base.source.reader.external.AbstractScanFetchTask; +import org.apache.flink.cdc.connectors.base.source.reader.external.FetchTask; +import org.apache.flink.cdc.connectors.base.source.reader.external.IncrementalSourceScanFetcher; +import org.apache.flink.cdc.connectors.base.source.utils.hooks.SnapshotPhaseHook; +import org.apache.flink.cdc.connectors.base.source.utils.hooks.SnapshotPhaseHooks; +import org.apache.flink.cdc.connectors.tidb.TiDBTestBase; +import org.apache.flink.cdc.connectors.tidb.source.TiDBDialect; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceConfig; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceConfigFactory; +import org.apache.flink.cdc.connectors.tidb.source.connection.TiDBConnection; +import org.apache.flink.cdc.connectors.tidb.testutils.RecordsFormatter; +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.types.DataType; + +import io.debezium.relational.TableId; +import org.apache.kafka.connect.source.SourceRecord; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.stream.Collectors; + +/** Tests for {@link TiDBScanFetchTask}. */ +public class TiDBScanFetchTaskTest extends TiDBTestBase { + private static final String databaseName = "customer"; + private static final String tableName = "customers"; + + private static final int USE_POST_LOWWATERMARK_HOOK = 1; + private static final int USE_PRE_HIGHWATERMARK_HOOK = 2; + + @Test + public void testChangingDataInSnapshotScan() throws Exception { + initializeTidbTable("customer"); + String tableId = databaseName + "." + tableName; + String[] changingDataSql = + new String[] { + "UPDATE " + tableId + " SET address = 'Hangzhou' where id = 103", + "UPDATE " + tableId + " SET address = 'Shanghai' where id = 103", + "UPDATE " + tableId + " SET address = 'Hangzhou' where id = 110", + "UPDATE " + tableId + " SET address = 'Hangzhou' where id = 111", + }; + String[] expected = + new String[] { + "+I[101, user_1, Shanghai, 123567891234]", + "+I[102, user_2, Shanghai, 123567891234]", + "+I[103, user_3, Shanghai, 123567891234]", + "+I[109, user_4, Shanghai, 123567891234]", + "+I[110, user_5, Hangzhou, 123567891234]", + "+I[111, user_6, Hangzhou, 123567891234]", + "+I[118, user_7, Shanghai, 123567891234]", + "+I[121, user_8, Shanghai, 123567891234]", + "+I[123, user_9, Shanghai, 123567891234]", + }; + List actual = + getDataInSnapshotScan(changingDataSql, USE_POST_LOWWATERMARK_HOOK, false); + assertEqualsInAnyOrder(Arrays.asList(expected), actual); + } + + @Test + public void testInsertDataInSnapshotScan() throws Exception { + initializeTidbTable("customer"); + String tableId = databaseName + "." + tableName; + String[] insertDataSql = + new String[] { + "INSERT INTO " + tableId + " VALUES(112, 'user_12','Shanghai','123567891234')", + "INSERT INTO " + tableId + " VALUES(113, 'user_13','Shanghai','123567891234')", + }; + + String[] expected = + new String[] { + "+I[101, user_1, Shanghai, 123567891234]", + "+I[102, user_2, Shanghai, 123567891234]", + "+I[103, user_3, Shanghai, 123567891234]", + "+I[109, user_4, Shanghai, 123567891234]", + "+I[110, user_5, Shanghai, 123567891234]", + "+I[111, user_6, Shanghai, 123567891234]", + "+I[112, user_12, Shanghai, 123567891234]", + "+I[113, user_13, Shanghai, 123567891234]", + "+I[118, user_7, Shanghai, 123567891234]", + "+I[121, user_8, Shanghai, 123567891234]", + "+I[123, user_9, Shanghai, 123567891234]", + }; + + List actual = + getDataInSnapshotScan(insertDataSql, USE_POST_LOWWATERMARK_HOOK, false); + assertEqualsInAnyOrder(Arrays.asList(expected), actual); + } + + @Test + public void testDeleteDataInSnapshotScan() throws Exception { + initializeTidbTable("customer"); + String tableId = databaseName + "." + tableName; + String[] deleteDataSql = + new String[] { + "DELETE FROM " + tableId + " where id = 101", + "DELETE FROM " + tableId + " where id = 102", + }; + String[] expected = + new String[] { + "+I[103, user_3, Shanghai, 123567891234]", + "+I[109, user_4, Shanghai, 123567891234]", + "+I[110, user_5, Shanghai, 123567891234]", + "+I[111, user_6, Shanghai, 123567891234]", + "+I[118, user_7, Shanghai, 123567891234]", + "+I[121, user_8, Shanghai, 123567891234]", + "+I[123, user_9, Shanghai, 123567891234]", + }; + List actual = + getDataInSnapshotScan(deleteDataSql, USE_POST_LOWWATERMARK_HOOK, false); + assertEqualsInAnyOrder(Arrays.asList(expected), actual); + } + + @Test + public void testSnapshotScanSkipBackfillWithPostLowWatermark() throws Exception { + initializeTidbTable("customer"); + String tableId = databaseName + "." + tableName; + + String[] changingDataSql = + new String[] { + "UPDATE " + tableId + " SET address = 'Hangzhou' where id = 103", + "DELETE FROM " + tableId + " where id = 102", + "INSERT INTO " + tableId + " VALUES(102, 'user_2','hangzhou','123567891234')", + "UPDATE " + tableId + " SET address = 'Shanghai' where id = 103", + "UPDATE " + tableId + " SET address = 'Hangzhou' where id = 110", + "UPDATE " + tableId + " SET address = 'Hangzhou' where id = 111", + }; + + String[] expected = + new String[] { + "+I[101, user_1, Shanghai, 123567891234]", + "+I[102, user_2, hangzhou, 123567891234]", + "+I[103, user_3, Shanghai, 123567891234]", + "+I[109, user_4, Shanghai, 123567891234]", + "+I[110, user_5, Hangzhou, 123567891234]", + "+I[111, user_6, Hangzhou, 123567891234]", + "+I[118, user_7, Shanghai, 123567891234]", + "+I[121, user_8, Shanghai, 123567891234]", + "+I[123, user_9, Shanghai, 123567891234]", + }; + + // Change data during [low_watermark, snapshot) will not be captured by snapshotting + List actual = + getDataInSnapshotScan(changingDataSql, USE_POST_LOWWATERMARK_HOOK, true); + assertEqualsInAnyOrder(Arrays.asList(expected), actual); + } + + @Test + public void testSnapshotScanSkipBackfillWithPreHighWatermark() throws Exception { + initializeTidbTable("customer"); + String tableId = databaseName + "." + tableName; + + String[] changingDataSql = + new String[] { + "UPDATE " + tableId + " SET address = 'Hangzhou' where id = 103", + // "DELETE FROM " + tableId + " where id = 102", + // "INSERT INTO " + tableId + " VALUES(102, + // 'user_2',Hangzhou','123567891234')", + "UPDATE " + tableId + " SET address = 'Shanghai' where id = 103", + "UPDATE " + tableId + " SET address = 'Hangzhou' where id = 110", + "UPDATE " + tableId + " SET address = 'Hangzhou' where id = 111", + }; + + String[] expected = + new String[] { + "+I[101, user_1, Shanghai, 123567891234]", + "+I[102, user_2, Shanghai, 123567891234]", + "+I[103, user_3, Shanghai, 123567891234]", + "+I[109, user_4, Shanghai, 123567891234]", + "+I[110, user_5, Hangzhou, 123567891234]", + "+I[111, user_6, Hangzhou, 123567891234]", + "+I[118, user_7, Shanghai, 123567891234]", + "+I[121, user_8, Shanghai, 123567891234]", + "+I[123, user_9, Shanghai, 123567891234]", + }; + + // Change data during [snapshot, high_watermark) will not be captured by snapshotting + List actual = + getDataInSnapshotScan(changingDataSql, USE_POST_LOWWATERMARK_HOOK, true); + assertEqualsInAnyOrder(Arrays.asList(expected), actual); + } + + private List getDataInSnapshotScan( + String[] changingDataSql, int hookType, boolean skipSnapshotBackfill) throws Exception { + TiDBSourceConfigFactory tiDBSourceConfigFactory = new TiDBSourceConfigFactory(); + tiDBSourceConfigFactory.hostname(TIDB.getHost()); + tiDBSourceConfigFactory.port(TIDB.getMappedPort(TIDB_PORT)); + tiDBSourceConfigFactory.username(TiDBTestBase.TIDB_USER); + tiDBSourceConfigFactory.password(TiDBTestBase.TIDB_PASSWORD); + tiDBSourceConfigFactory.databaseList(this.databaseName); + tiDBSourceConfigFactory.tableList(this.databaseName + "." + this.tableName); + tiDBSourceConfigFactory.splitSize(10); + tiDBSourceConfigFactory.skipSnapshotBackfill(skipSnapshotBackfill); + TiDBSourceConfig tiDBSourceConfig = tiDBSourceConfigFactory.create(0); + TiDBDialect tiDBDialect = new TiDBDialect(tiDBSourceConfigFactory.create(0)); + SnapshotPhaseHooks hooks = new SnapshotPhaseHooks(); + + try (TiDBConnection tiDBConnection = tiDBDialect.openJdbcConnection()) { + SnapshotPhaseHook snapshotPhaseHook = + (tidbSourceConfig, split) -> { + tiDBConnection.execute(changingDataSql); + tiDBConnection.commit(); + try { + Thread.sleep(500L); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + }; + if (hookType == USE_POST_LOWWATERMARK_HOOK) { + hooks.setPostLowWatermarkAction(snapshotPhaseHook); + } else if (hookType == USE_PRE_HIGHWATERMARK_HOOK) { + hooks.setPreHighWatermarkAction(snapshotPhaseHook); + } + final DataType dataType = + DataTypes.ROW( + DataTypes.FIELD("id", DataTypes.BIGINT()), + DataTypes.FIELD("name", DataTypes.STRING()), + DataTypes.FIELD("address", DataTypes.STRING()), + DataTypes.FIELD("phone_number", DataTypes.STRING())); + List snapshotSplits = getSnapshotSplits(tiDBSourceConfig, tiDBDialect); + + TiDBSourceFetchTaskContext tidbsourceFetchTaskContext = + new TiDBSourceFetchTaskContext(tiDBSourceConfig, tiDBDialect, tiDBConnection); + + return readTableSnapshotSplits( + snapshotSplits, tidbsourceFetchTaskContext, 1, dataType, hooks); + } + } + + private List readTableSnapshotSplits( + List snapshotSplits, + TiDBSourceFetchTaskContext taskContext, + int scanSplitsNum, + DataType dataType, + SnapshotPhaseHooks snapshotPhaseHooks) + throws Exception { + IncrementalSourceScanFetcher sourceScanFetcher = + new IncrementalSourceScanFetcher(taskContext, 0); + + ArrayList result = new ArrayList<>(); + for (int i = 0; i < scanSplitsNum; i++) { + SnapshotSplit sqlSplit = snapshotSplits.get(i); + if (sourceScanFetcher.isFinished()) { + FetchTask fetchTask = + taskContext.getDataSourceDialect().createFetchTask(sqlSplit); + ((AbstractScanFetchTask) fetchTask).setSnapshotPhaseHooks(snapshotPhaseHooks); + sourceScanFetcher.submitTask(fetchTask); + } + Iterator res; + while ((res = sourceScanFetcher.pollSplitRecords()) != null) { + while (res.hasNext()) { + SourceRecords sourceRecords = res.next(); + result.addAll(sourceRecords.getSourceRecordList()); + } + } + } + sourceScanFetcher.close(); + + Assertions.assertThat(sourceScanFetcher.getExecutorService()).isNotNull(); + Assertions.assertThat(sourceScanFetcher.getExecutorService().isTerminated()).isTrue(); + + return formatResult(result, dataType); + } + + private List formatResult(List records, DataType dataType) { + final RecordsFormatter formatter = new RecordsFormatter(dataType); + return formatter.format(records); + } + + /** Get snapshot splits. */ + private List getSnapshotSplits( + TiDBSourceConfig sourceConfig, JdbcDataSourceDialect sourceDialect) throws Exception { + List discoverTables = sourceDialect.discoverDataCollections(sourceConfig); + final ChunkSplitter chunkSplitter = sourceDialect.createChunkSplitter(sourceConfig); + chunkSplitter.open(); + + List snapshotSplitList = new ArrayList<>(); + for (TableId table : discoverTables) { + List snapshotSplits = + (List) chunkSplitter.generateSplits(table); + snapshotSplitList.addAll(snapshotSplits); + } + return snapshotSplitList; + } + + public static void assertEqualsInAnyOrder(List expected, List actual) { + Assertions.assertThat(expected != null && actual != null).isTrue(); + assertEqualsInOrder( + expected.stream().sorted().collect(Collectors.toList()), + actual.stream().sorted().collect(Collectors.toList())); + } + + public static void assertEqualsInOrder(List expected, List actual) { + Assertions.assertThat(expected != null && actual != null).isTrue(); + Assertions.assertThat(expected.size()).isEqualTo(actual.size()); + Assertions.assertThat(expected.toArray(new String[0])) + .isEqualTo(actual.toArray(new String[0])); + } + + protected TiDBSourceConfigFactory getMockTiDBSourceConfigFactory( + String hostName, + int port, + String userName, + String password, + String databaseName, + String schemaName, + String tableName, + int splitSize, + boolean skipSnapshotBackfill) { + + TiDBSourceConfigFactory tiDBSourceConfigFactory = new TiDBSourceConfigFactory(); + tiDBSourceConfigFactory.hostname(hostName); + tiDBSourceConfigFactory.port(port); + tiDBSourceConfigFactory.username(userName); + tiDBSourceConfigFactory.password(password); + tiDBSourceConfigFactory.databaseList(databaseName); + tiDBSourceConfigFactory.tableList(schemaName + "." + tableName); + tiDBSourceConfigFactory.splitSize(splitSize); + tiDBSourceConfigFactory.skipSnapshotBackfill(skipSnapshotBackfill); + return tiDBSourceConfigFactory; + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/reader/TiDBStreamSplitReaderTest.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/reader/TiDBStreamSplitReaderTest.java new file mode 100644 index 00000000000..724386141fe --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/source/reader/TiDBStreamSplitReaderTest.java @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.source.reader; + +import org.apache.flink.cdc.connectors.base.config.JdbcSourceConfig; +import org.apache.flink.cdc.connectors.base.source.meta.split.ChangeEventRecords; +import org.apache.flink.cdc.connectors.base.source.meta.split.FinishedSnapshotSplitInfo; +import org.apache.flink.cdc.connectors.base.source.meta.split.SourceRecords; +import org.apache.flink.cdc.connectors.base.source.meta.split.StreamSplit; +import org.apache.flink.cdc.connectors.base.source.reader.IncrementalSourceReaderContext; +import org.apache.flink.cdc.connectors.base.source.reader.IncrementalSourceSplitReader; +import org.apache.flink.cdc.connectors.base.source.utils.hooks.SnapshotPhaseHooks; +import org.apache.flink.cdc.connectors.tidb.TiDBTestBase; +import org.apache.flink.cdc.connectors.tidb.source.TiDBDialect; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceConfig; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceConfigFactory; +import org.apache.flink.cdc.connectors.tidb.source.connection.TiDBConnection; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffset; +import org.apache.flink.cdc.connectors.tidb.source.offset.EventOffsetFactory; +import org.apache.flink.connector.base.source.reader.splitreader.SplitsAddition; +import org.apache.flink.connector.testutils.source.reader.TestingReaderContext; + +import io.debezium.relational.TableId; +import io.debezium.relational.history.TableChanges; +import org.apache.kafka.connect.data.Struct; +import org.apache.kafka.connect.source.SourceRecord; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.time.Instant; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +import static java.util.Collections.singletonList; + +/** Test for {@link TiDBTestBase}. */ +public class TiDBStreamSplitReaderTest extends TiDBTestBase { + private static final Logger LOG = LoggerFactory.getLogger(TiDBStreamSplitReaderTest.class); + private static final String databaseName = "customer"; + private static final String tableName = "customers"; + private static final String STREAM_SPLIT_ID = "stream-split"; + + private static final int USE_POST_LOWWATERMARK_HOOK = 1; + private static final int USE_PRE_HIGHWATERMARK_HOOK = 2; + private static final int MAX_RETRY_TIMES = 100; + + private TiDBSourceConfig sourceConfig; + private TiDBDialect tiDBDialect; + private EventOffsetFactory cdcEventOffsetFactory; + + @BeforeEach + public void before() { + initializeTidbTable("customer"); + TiDBSourceConfigFactory tiDBSourceConfigFactory = new TiDBSourceConfigFactory(); + tiDBSourceConfigFactory.pdAddresses( + PD.getContainerIpAddress() + ":" + PD.getMappedPort(PD_PORT_ORIGIN)); + tiDBSourceConfigFactory.hostname(TIDB.getHost()); + tiDBSourceConfigFactory.port(TIDB.getMappedPort(TIDB_PORT)); + tiDBSourceConfigFactory.username(TiDBTestBase.TIDB_USER); + tiDBSourceConfigFactory.password(TiDBTestBase.TIDB_PASSWORD); + tiDBSourceConfigFactory.databaseList(this.databaseName); + tiDBSourceConfigFactory.tableList(this.databaseName + "." + this.tableName); + tiDBSourceConfigFactory.splitSize(10); + tiDBSourceConfigFactory.skipSnapshotBackfill(true); + tiDBSourceConfigFactory.scanNewlyAddedTableEnabled(true); + this.sourceConfig = tiDBSourceConfigFactory.create(0); + this.tiDBDialect = new TiDBDialect(tiDBSourceConfigFactory.create(0)); + this.cdcEventOffsetFactory = new EventOffsetFactory(); + } + + @Test + public void testStreamSplitReader() throws Exception { + String tableId = databaseName + "." + tableName; + IncrementalSourceReaderContext incrementalSourceReaderContext = + new IncrementalSourceReaderContext(new TestingReaderContext()); + IncrementalSourceSplitReader streamSplitReader = + new IncrementalSourceSplitReader<>( + 0, + tiDBDialect, + sourceConfig, + incrementalSourceReaderContext, + SnapshotPhaseHooks.empty()); + try { + EventOffset startOffset = new EventOffset(Instant.now().toEpochMilli()); + String[] insertDataSql = + new String[] { + "INSERT INTO " + + tableId + + " VALUES(112, 'user_12','Shanghai','123567891234')", + "INSERT INTO " + + tableId + + " VALUES(113, 'user_13','Shanghai','123567891234')", + }; + try (TiDBConnection tiDBConnection = tiDBDialect.openJdbcConnection()) { + tiDBConnection.execute(insertDataSql); + tiDBConnection.commit(); + } + TableId tableIds = new TableId(databaseName, null, tableName); + Map tableSchemas = new HashMap<>(); + tableSchemas.put(tableIds, null); + FinishedSnapshotSplitInfo finishedSnapshotSplitInfo = + new FinishedSnapshotSplitInfo( + tableIds, + STREAM_SPLIT_ID, + new Object[] {startOffset}, + new Object[] {EventOffset.NO_STOPPING_OFFSET}, + startOffset, + cdcEventOffsetFactory); + StreamSplit streamSplit = + new StreamSplit( + STREAM_SPLIT_ID, + startOffset, + cdcEventOffsetFactory.createNoStoppingOffset(), + Collections.singletonList(finishedSnapshotSplitInfo), + tableSchemas, + 0); + Assertions.assertThat(streamSplitReader.canAssignNextSplit()).isTrue(); + streamSplitReader.handleSplitsChanges(new SplitsAddition<>(singletonList(streamSplit))); + int retry = 0; + int count = 0; + while (retry < MAX_RETRY_TIMES) { + ChangeEventRecords records = (ChangeEventRecords) streamSplitReader.fetch(); + if (records.nextSplit() != null) { + SourceRecords sourceRecords; + while ((sourceRecords = records.nextRecordFromSplit()) != null) { + Iterator iterator = sourceRecords.iterator(); + while (iterator.hasNext()) { + Struct value = (Struct) iterator.next().value(); + String opType = value.getString("op"); + Assertions.assertThat(opType).isEqualTo("c"); + Struct after = (Struct) value.get("after"); + String name = after.getString("name"); + + Assertions.assertThat(name.contains("user")).isTrue(); + if (++count >= insertDataSql.length) { + return; + } + } + } + } else { + break; + } + } + } catch (Exception e) { + LOG.error("Stream split read error.", e); + } finally { + streamSplitReader.close(); + } + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBConnectorITCase.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBConnectorITCase.java index 73848a46c8a..dbab19ae8ac 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBConnectorITCase.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBConnectorITCase.java @@ -72,12 +72,19 @@ void testConsumingAllEvents() throws Exception { + " PRIMARY KEY (`id`) NOT ENFORCED" + ") WITH (" + " 'connector' = 'tidb-cdc'," - + " 'tikv.grpc.timeout_in_ms' = '20000'," + " 'pd-addresses' = '%s'," + + " 'hostname' = '%s'," + + " 'port' = '%s'," + + " 'password' = '%s'," + + " 'username' = '%s'," + " 'database-name' = '%s'," + " 'table-name' = '%s'" + ")", PD.getContainerIpAddress() + ":" + PD.getMappedPort(PD_PORT_ORIGIN), + TIDB.getHost(), + TIDB.getMappedPort(TIDB_PORT), + TIDB_PASSWORD, + TIDB_USER, "inventory", "products"); @@ -177,12 +184,19 @@ void testDeleteColumn() throws Exception { + " PRIMARY KEY (`id`) NOT ENFORCED" + ") WITH (" + " 'connector' = 'tidb-cdc'," - + " 'tikv.grpc.timeout_in_ms' = '20000'," + " 'pd-addresses' = '%s'," + + " 'hostname' = '%s'," + + " 'port' = '%s'," + + " 'password' = '%s'," + + " 'username' = '%s'," + " 'database-name' = '%s'," + " 'table-name' = '%s'" + ")", PD.getContainerIpAddress() + ":" + PD.getMappedPort(PD_PORT_ORIGIN), + TIDB.getHost(), + TIDB.getMappedPort(TIDB_PORT), + TIDB_PASSWORD, + TIDB_USER, "inventory", "products"); @@ -256,12 +270,19 @@ void testAddColumn() throws Exception { + " PRIMARY KEY (`id`) NOT ENFORCED" + ") WITH (" + " 'connector' = 'tidb-cdc'," - + " 'tikv.grpc.timeout_in_ms' = '20000'," + " 'pd-addresses' = '%s'," + + " 'hostname' = '%s'," + + " 'port' = '%s'," + + " 'password' = '%s'," + + " 'username' = '%s'," + " 'database-name' = '%s'," + " 'table-name' = '%s'" + ")", PD.getContainerIpAddress() + ":" + PD.getMappedPort(PD_PORT_ORIGIN), + TIDB.getHost(), + TIDB.getMappedPort(TIDB_PORT), + TIDB_PASSWORD, + TIDB_USER, "inventory", "products"); @@ -344,12 +365,19 @@ void testMetadataColumns() throws Exception { + " PRIMARY KEY (`id`) NOT ENFORCED" + ") WITH (" + " 'connector' = 'tidb-cdc'," - + " 'tikv.grpc.timeout_in_ms' = '20000'," + " 'pd-addresses' = '%s'," + + " 'hostname' = '%s'," + + " 'port' = '%s'," + + " 'password' = '%s'," + + " 'username' = '%s'," + " 'database-name' = '%s'," + " 'table-name' = '%s'" + ")", PD.getContainerIpAddress() + ":" + PD.getMappedPort(PD_PORT_ORIGIN), + TIDB.getHost(), + TIDB.getMappedPort(TIDB_PORT), + TIDB_PASSWORD, + TIDB_USER, "inventory", "products"); @@ -456,10 +484,18 @@ void testAllDataTypes() throws Throwable { + " 'connector' = 'tidb-cdc'," + " 'tikv.grpc.timeout_in_ms' = '20000'," + " 'pd-addresses' = '%s'," + + " 'hostname' = '%s'," + + " 'port' = '%s'," + + " 'password' = '%s'," + + " 'username' = '%s'," + " 'database-name' = '%s'," + " 'table-name' = '%s'" + ")", PD.getContainerIpAddress() + ":" + PD.getMappedPort(PD_PORT_ORIGIN), + TIDB.getHost(), + TIDB.getMappedPort(TIDB_PORT), + TIDB_PASSWORD, + TIDB_USER, "column_type_test", "full_types"); @@ -569,10 +605,18 @@ void testTiDBServerTimezone(String timezone) throws Exception { + " 'connector' = 'tidb-cdc'," + " 'tikv.grpc.timeout_in_ms' = '20000'," + " 'pd-addresses' = '%s'," + + " 'hostname' = '%s'," + + " 'port' = '%s'," + + " 'password' = '%s'," + + " 'username' = '%s'," + " 'database-name' = '%s'," + " 'table-name' = '%s'" + ")", PD.getContainerIpAddress() + ":" + PD.getMappedPort(PD_PORT_ORIGIN), + TIDB.getHost(), + TIDB.getMappedPort(TIDB_PORT), + TIDB_PASSWORD, + TIDB_USER, "column_type_test", "full_types"); diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBConnectorRegionITCase.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBConnectorRegionITCase.java index 1951f40463d..bc0f1fb58d7 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBConnectorRegionITCase.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBConnectorRegionITCase.java @@ -69,10 +69,18 @@ void testRegionChange() throws Exception { + " 'connector' = 'tidb-cdc'," + " 'tikv.grpc.timeout_in_ms' = '20000'," + " 'pd-addresses' = '%s'," + + " 'hostname' = '%s'," + + " 'port' = '%s'," + + " 'password' = '%s'," + + " 'username' = '%s'," + " 'database-name' = '%s'," + " 'table-name' = '%s'" + ")", PD.getContainerIpAddress() + ":" + PD.getMappedPort(PD_PORT_ORIGIN), + TIDB.getHost(), + TIDB.getMappedPort(TIDB_PORT), + TIDB_PASSWORD, + TIDB_USER, "region_switch_test", "t1"); diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactoryTest.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactoryTest.java index 122ab1db4d1..e840bee0790 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactoryTest.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/TiDBTableSourceFactoryTest.java @@ -17,6 +17,7 @@ package org.apache.flink.cdc.connectors.tidb.table; +import org.apache.flink.cdc.connectors.base.options.StartupOptions; import org.apache.flink.configuration.Configuration; import org.apache.flink.table.api.DataTypes; import org.apache.flink.table.api.Schema; @@ -32,14 +33,28 @@ import org.assertj.core.api.Assertions; import org.junit.jupiter.api.Test; +import java.time.Duration; +import java.time.ZoneId; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Map; +import java.util.Properties; + +import static org.apache.flink.cdc.connectors.base.options.JdbcSourceOptions.CONNECTION_POOL_SIZE; +import static org.apache.flink.cdc.connectors.base.options.JdbcSourceOptions.CONNECT_MAX_RETRIES; +import static org.apache.flink.cdc.connectors.base.options.SourceOptions.CHUNK_META_GROUP_SIZE; +import static org.apache.flink.cdc.connectors.base.options.SourceOptions.SCAN_INCREMENTAL_SNAPSHOT_CHUNK_SIZE; +import static org.apache.flink.cdc.connectors.base.options.SourceOptions.SCAN_SNAPSHOT_FETCH_SIZE; +import static org.apache.flink.cdc.connectors.base.options.SourceOptions.SPLIT_KEY_EVEN_DISTRIBUTION_FACTOR_LOWER_BOUND; +import static org.apache.flink.cdc.connectors.base.options.SourceOptions.SPLIT_KEY_EVEN_DISTRIBUTION_FACTOR_UPPER_BOUND; +import static org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions.CONNECT_TIMEOUT; +import static org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions.HEARTBEAT_INTERVAL; +import static org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions.JDBC_DRIVER; /** Unit tests for TiDB table source factory. */ -class TiDBTableSourceFactoryTest { +public class TiDBTableSourceFactoryTest { private static final ResolvedSchema SCHEMA = new ResolvedSchema( @@ -66,71 +81,126 @@ class TiDBTableSourceFactoryTest { Collections.emptyList(), UniqueConstraint.primaryKey("pk", Collections.singletonList("id"))); - private static final String MY_HOSTNAME = "tidb0:4000"; + private static final String MY_HOSTNAME = "tidb0"; private static final String MY_DATABASE = "inventory"; + private static final String MY_PORT = "4000"; private static final String MY_TABLE = "products"; + private static final String MY_USERNAME = "root"; + private static final String MY_PASSWORD = ""; private static final String PD_ADDRESS = "pd0:2379"; private static final String HOST_MAPPING = "host1:1;host2:2;host3:3"; private static final Map OPTIONS = new HashMap<>(); + private static final Properties PROPERTIES = new Properties(); @Test - void testCommonProperties() { + public void testCommonProperties() { Map properties = getAllOptions(); // validation for source DynamicTableSource actualSource = createTableSource(properties); + System.out.println(actualSource.asSummaryString()); TiDBTableSource expectedSource = new TiDBTableSource( SCHEMA, + Integer.parseInt(MY_PORT), + MY_HOSTNAME, MY_DATABASE, MY_TABLE, + null, + MY_USERNAME, + MY_PASSWORD, + ZoneId.of("UTC").toString(), + PROPERTIES, + false, + HEARTBEAT_INTERVAL.defaultValue(), PD_ADDRESS, HOST_MAPPING, - StartupOptions.latest(), - OPTIONS); - Assertions.assertThat(actualSource).isEqualTo(expectedSource); + CONNECT_TIMEOUT.defaultValue(), + OPTIONS, + SCAN_INCREMENTAL_SNAPSHOT_CHUNK_SIZE.defaultValue(), + CHUNK_META_GROUP_SIZE.defaultValue(), + SCAN_SNAPSHOT_FETCH_SIZE.defaultValue(), + CONNECT_MAX_RETRIES.defaultValue(), + CONNECTION_POOL_SIZE.defaultValue(), + SPLIT_KEY_EVEN_DISTRIBUTION_FACTOR_UPPER_BOUND.defaultValue(), + SPLIT_KEY_EVEN_DISTRIBUTION_FACTOR_LOWER_BOUND.defaultValue(), + null, + new HashMap<>(), + JDBC_DRIVER.defaultValue(), + StartupOptions.initial()); + Assertions.assertThat(expectedSource).isEqualTo(actualSource); } @Test - void testOptionalProperties() { + public void testOptionalProperties() { Map properties = getAllOptions(); - properties.put("host-mapping", "host1:1;host2:2;host3:3"); - properties.put("tikv.grpc.timeout_in_ms", "20000"); - properties.put("tikv.grpc.scan_timeout_in_ms", "20000"); - properties.put("tikv.batch_get_concurrency", "4"); - properties.put("tikv.batch_put_concurrency", "4"); - properties.put("tikv.batch_scan_concurrency", "4"); - properties.put("tikv.batch_delete_concurrency", "4"); + properties.put("port", MY_PORT); + properties.put("scan.startup.mode", "initial"); + properties.put("heartbeat.interval.ms", "15213ms"); + properties.put("debezium.tombstones.on.delete", "true"); + properties.put("debezium.snapshot.mode", "never"); + properties.put("debezium.offset.flush.interval.ms", "3000"); + properties.put("debezium.test", "test"); + // properties.put("server-time-zone", "Asia/Shanghai"); + + Properties dbzProperties = new Properties(); + dbzProperties.put("snapshot.mode", "never"); + dbzProperties.put("offset.flush.interval.ms", "3000"); + dbzProperties.put("tombstones.on.delete", "true"); + dbzProperties.put("test", "test"); - // validation for source DynamicTableSource actualSource = createTableSource(properties); Map options = new HashMap<>(); - options.put("tikv.grpc.timeout_in_ms", "20000"); - options.put("tikv.grpc.scan_timeout_in_ms", "20000"); - options.put("tikv.batch_get_concurrency", "4"); - options.put("tikv.batch_put_concurrency", "4"); - options.put("tikv.batch_scan_concurrency", "4"); - options.put("tikv.batch_delete_concurrency", "4"); + // options.put("tikv.grpc.timeout_in_ms", "20000"); + // options.put("tikv.grpc.scan_timeout_in_ms", "20000"); + // options.put("tikv.batch_get_concurrency", "4"); + // options.put("tikv.batch_put_concurrency", "4"); + // options.put("tikv.batch_scan_concurrency", "4"); + // options.put("tikv.batch_delete_concurrency", "4"); TiDBTableSource expectedSource = new TiDBTableSource( SCHEMA, + 4000, + MY_HOSTNAME, MY_DATABASE, MY_TABLE, + null, + MY_USERNAME, + MY_PASSWORD, + ZoneId.of("UTC").toString(), + dbzProperties, + false, + Duration.ofMillis(15213), PD_ADDRESS, HOST_MAPPING, - StartupOptions.latest(), - options); - Assertions.assertThat(actualSource).isEqualTo(expectedSource); + CONNECT_TIMEOUT.defaultValue(), + OPTIONS, + SCAN_INCREMENTAL_SNAPSHOT_CHUNK_SIZE.defaultValue(), + CHUNK_META_GROUP_SIZE.defaultValue(), + SCAN_SNAPSHOT_FETCH_SIZE.defaultValue(), + CONNECT_MAX_RETRIES.defaultValue(), + CONNECTION_POOL_SIZE.defaultValue(), + SPLIT_KEY_EVEN_DISTRIBUTION_FACTOR_UPPER_BOUND.defaultValue(), + SPLIT_KEY_EVEN_DISTRIBUTION_FACTOR_LOWER_BOUND.defaultValue(), + null, + new HashMap<>(), + JDBC_DRIVER.defaultValue(), + StartupOptions.initial()); + Assertions.assertThat(expectedSource).isEqualTo(actualSource); } private Map getAllOptions() { Map options = new HashMap<>(); options.put("connector", "tidb-cdc"); options.put("hostname", MY_HOSTNAME); + options.put("port", MY_PORT); options.put("database-name", MY_DATABASE); options.put("table-name", MY_TABLE); options.put("pd-addresses", PD_ADDRESS); - options.put("scan.startup.mode", "latest-offset"); + options.put("username", MY_USERNAME); + options.put("password", MY_PASSWORD); + options.put("host-mapping", HOST_MAPPING); + options.put("scan.incremental.snapshot.enabled", String.valueOf(false)); return options; } diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/utils/UriHostMappingTest.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/utils/UriHostMappingTest.java index 304f3fba8e2..8051034edf0 100644 --- a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/utils/UriHostMappingTest.java +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/table/utils/UriHostMappingTest.java @@ -17,7 +17,8 @@ package org.apache.flink.cdc.connectors.tidb.table.utils; -import org.apache.flink.cdc.connectors.tidb.TDBSourceOptions; +import org.apache.flink.cdc.connectors.tidb.source.config.TiDBSourceOptions; +import org.apache.flink.cdc.connectors.tidb.utils.UriHostMapping; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.Test; @@ -26,36 +27,35 @@ import java.util.HashMap; /** Unit test for {@link UriHostMapping}. * */ -class UriHostMappingTest { +public class UriHostMappingTest { @Test - void uriHostMappingTest() { + public void uriHostMappingTest() { final TiConfiguration tiConf = - TDBSourceOptions.getTiConfiguration( + TiDBSourceOptions.getTiConfiguration( "http://0.0.0.0:2347", "host1:1;host2:2;host3:3", new HashMap<>()); UriHostMapping uriHostMapping = (UriHostMapping) tiConf.getHostMapping(); - Assertions.assertThat(uriHostMapping.getHostMapping()) - .hasSize(3) - .containsEntry("host1", "1"); + Assertions.assertThat(uriHostMapping.getHostMapping().size()).isEqualTo(3); + Assertions.assertThat(uriHostMapping.getHostMapping().get("host1")).isEqualTo("1"); } @Test - void uriHostMappingEmpty() { + public void uriHostMappingEmpty() { final TiConfiguration tiConf = - TDBSourceOptions.getTiConfiguration("http://0.0.0.0:2347", "", new HashMap<>()); + TiDBSourceOptions.getTiConfiguration("http://0.0.0.0:2347", "", new HashMap<>()); UriHostMapping uriHostMapping = (UriHostMapping) tiConf.getHostMapping(); - Assertions.assertThat(uriHostMapping.getHostMapping()).isNull(); + Assertions.assertThat(uriHostMapping.getHostMapping()).isEqualTo(null); } @Test - void uriHostMappingError() { - Assertions.assertThatThrownBy( - () -> - TDBSourceOptions.getTiConfiguration( - "http://0.0.0.0:2347", - "host1=1;host2=2;host3=3", - new HashMap<>())) - .isExactlyInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid host mapping string: host1=1;host2=2;host3=3"); + public void uriHostMappingError() { + try { + final TiConfiguration tiConf = + TiDBSourceOptions.getTiConfiguration( + "http://0.0.0.0:2347", "host1=1;host2=2;host3=3", new HashMap<>()); + } catch (IllegalArgumentException e) { + Assertions.assertThat(e.getMessage()) + .isEqualTo("Invalid host mapping string: host1=1;host2=2;host3=3"); + } } } diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/testutils/RecordsFormatter.java b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/testutils/RecordsFormatter.java new file mode 100644 index 00000000000..8dee3688670 --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/java/org/apache/flink/cdc/connectors/tidb/testutils/RecordsFormatter.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.cdc.connectors.tidb.testutils; + +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.cdc.connectors.base.utils.SourceRecordUtils; +import org.apache.flink.cdc.debezium.DebeziumDeserializationSchema; +import org.apache.flink.cdc.debezium.table.RowDataDebeziumDeserializeSchema; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.conversion.RowRowConverter; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.utils.TypeConversions; +import org.apache.flink.types.Row; +import org.apache.flink.util.Collector; + +import org.apache.kafka.connect.source.SourceRecord; + +import java.time.ZoneId; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +/** Formatter that formats the {@link SourceRecord} to String. */ +public class RecordsFormatter { + + private final DataType dataType; + private final ZoneId zoneId; + + private TypeInformation typeInfo; + private DebeziumDeserializationSchema deserializationSchema; + private SimpleCollector collector; + private RowRowConverter rowRowConverter; + + public RecordsFormatter(DataType dataType) { + this(dataType, ZoneId.of("UTC")); + } + + public RecordsFormatter(DataType dataType, ZoneId zoneId) { + this.dataType = dataType; + this.zoneId = zoneId; + this.typeInfo = + (TypeInformation) TypeConversions.fromDataTypeToLegacyInfo(dataType); + this.deserializationSchema = + RowDataDebeziumDeserializeSchema.newBuilder() + .setPhysicalRowType((RowType) dataType.getLogicalType()) + .setResultTypeInfo(typeInfo) + .build(); + this.collector = new SimpleCollector(); + this.rowRowConverter = RowRowConverter.create(dataType); + rowRowConverter.open(Thread.currentThread().getContextClassLoader()); + } + + public List format(List records) { + records.stream() + // Keep DataChangeEvent only + .filter(SourceRecordUtils::isDataChangeRecord) + .forEach( + r -> { + try { + deserializationSchema.deserialize(r, collector); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + return collector.list.stream() + .map(rowRowConverter::toExternal) + .map(Row::toString) + .collect(Collectors.toList()); + } + + private static class SimpleCollector implements Collector { + + private List list = new ArrayList<>(); + + @Override + public void collect(RowData record) { + list.add(record); + } + + @Override + public void close() { + // do nothing + } + } +} diff --git a/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/resources/ddl/customer.sql b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/resources/ddl/customer.sql new file mode 100644 index 00000000000..c772e28e0ff --- /dev/null +++ b/flink-cdc-connect/flink-cdc-source-connectors/flink-connector-tidb-cdc/src/test/resources/ddl/customer.sql @@ -0,0 +1,51 @@ +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. + +-- ---------------------------------------------------------------------------------------------------------------- +-- DATABASE: inventory +-- ---------------------------------------------------------------------------------------------------------------- +CREATE DATABASE customer; + +USE customer; + +-- Create and populate our users using a single insert with many rows +CREATE TABLE customers ( + id INTEGER NOT NULL PRIMARY KEY, + name VARCHAR(255) NOT NULL DEFAULT 'flink', + address VARCHAR(1024), + phone_number VARCHAR(512) +); +INSERT INTO customers +VALUES (101,'user_1','Shanghai','123567891234'), + (102,'user_2','Shanghai','123567891234'), + (103,'user_3','Shanghai','123567891234'), + (109,'user_4','Shanghai','123567891234'), + (110,'user_5','Shanghai','123567891234'), + (111,'user_6','Shanghai','123567891234'), + (118,'user_7','Shanghai','123567891234'), + (121,'user_8','Shanghai','123567891234'), + (123,'user_9','Shanghai','123567891234'), + (1009,'user_10','Shanghai','123567891234'), + (1010,'user_11','Shanghai','123567891234'), + (1011,'user_12','Shanghai','123567891234'), + (1012,'user_13','Shanghai','123567891234'), + (1013,'user_14','Shanghai','123567891234'), + (1014,'user_15','Shanghai','123567891234'), + (1015,'user_16','Shanghai','123567891234'), + (1016,'user_17','Shanghai','123567891234'), + (1017,'user_18','Shanghai','123567891234'), + (1018,'user_19','Shanghai','123567891234'), + (1019,'user_20','Shanghai','123567891234'), + (2000,'user_21','Shanghai','123567891234'); \ No newline at end of file