Skip to content

Commit a36ea28

Browse files
authored
Merge pull request #20 from databendcloud/feat/schema-evolution
feat: support auto schema evolution
2 parents 47d1fe9 + e3a5609 commit a36ea28

File tree

10 files changed

+165
-69
lines changed

10 files changed

+165
-69
lines changed

debezium-server-databend-sink/src/main/java/io/debezium/server/databend/DatabendChangeEvent.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ public class DatabendChangeEvent {
2828
protected final String destination;
2929
protected final JsonNode value;
3030
protected final JsonNode key;
31-
final Schema schema;
31+
public final Schema schema;
3232

3333
public DatabendChangeEvent(String destination, JsonNode value, JsonNode key, JsonNode valueSchema, JsonNode keySchema) {
3434
this.destination = destination;

debezium-server-databend-sink/src/main/java/io/debezium/server/databend/DatabendUtil.java

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -212,10 +212,6 @@ public static void addParametersToStatement(PreparedStatement statement, Databen
212212
Map<String, Object> values = event.valueAsMap();
213213
//DatabendChangeEvent.Schema k = event.schema();
214214
Map<String, String> decimalFields = DatabendUtil.findDecimalFields(event.schema());
215-
System.out.println("valueSchema: " + event.schema.valueSchema());
216-
System.out.println("keySchema: " + event.schema.keySchema());
217-
System.out.println("valueAsMap" + event.valueAsMap());
218-
System.out.println("keyAsMap" + event.keyAsMap());
219215
int index = 1;
220216
for (String key : values.keySet()) {
221217
if (decimalFields.containsKey(key)) {
@@ -259,14 +255,24 @@ public static Map<String, String> findDecimalFields(DatabendChangeEvent.Schema s
259255
return decimalFields;
260256
}
261257

258+
public static boolean isSchemaChanged(DatabendChangeEvent.Schema schema) {
259+
if (schema == null || schema.keySchema() == null || schema.keySchema().get("name") == null) {
260+
return false;
261+
}
262+
String schemaNameStr = schema.keySchema().get("name").textValue();
263+
if (schemaNameStr.toLowerCase().contains("schemachangekey")) {
264+
return true;
265+
}
266+
return false;
267+
}
268+
262269
private static String createTableSQL(String schemaName, String originalSQL, DatabendChangeEvent.Schema schema) {
263270
//"CREATE TABLE debeziumcdc_customers_append (__deleted boolean, id bigint, first_name varchar, __op varchar, __source_ts_ms bigint);";
264271
String[] parts = originalSQL.split("\\s", 4);
265272
parts[2] = schemaName + "." + parts[2];
266273
//
267274
String modifiedSQL = String.join(" ", parts);
268275
// String modifiedSQL = originalSQL;
269-
System.out.println("sjh" + modifiedSQL);
270276
// replace `decimal` with `decimal(precision,scale)` by handling schema.valueSchema()
271277
for (JsonNode jsonSchemaFieldNode : schema.valueSchema().get("fields")) {
272278
// if the field is decimal, replace it with decimal(precision,scale)

debezium-server-databend-sink/src/main/java/io/debezium/server/databend/tablewriter/AppendTableWriter.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import java.sql.Connection;
1212

1313
public class AppendTableWriter extends BaseTableWriter {
14-
public AppendTableWriter(Connection connection, String identifierQuoteCharacter) {
15-
super(connection, identifierQuoteCharacter);
14+
public AppendTableWriter(Connection connection, String identifierQuoteCharacter, boolean isSchemaEvolutionEnabled) {
15+
super(connection, identifierQuoteCharacter, isSchemaEvolutionEnabled);
1616
}
1717
}

debezium-server-databend-sink/src/main/java/io/debezium/server/databend/tablewriter/BaseTableWriter.java

Lines changed: 67 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,22 @@
99
package io.debezium.server.databend.tablewriter;
1010

1111
import com.fasterxml.jackson.databind.JsonNode;
12+
import io.debezium.server.databend.DatabendChangeConsumer;
1213
import io.debezium.server.databend.DatabendChangeEvent;
1314
import io.debezium.server.databend.DatabendUtil;
15+
import org.eclipse.microprofile.config.inject.ConfigProperty;
1416
import org.slf4j.Logger;
1517
import org.slf4j.LoggerFactory;
1618

19+
import javax.enterprise.context.Dependent;
1720
import java.math.BigDecimal;
1821
import java.math.BigInteger;
1922
import java.sql.Connection;
2023
import java.sql.*;
2124
import java.sql.SQLException;
22-
import java.util.Arrays;
23-
import java.util.Base64;
24-
import java.util.List;
25-
import java.util.Map;
25+
import java.util.*;
26+
import java.util.regex.Matcher;
27+
import java.util.regex.Pattern;
2628

2729
import static io.debezium.server.databend.DatabendUtil.addParametersToStatement;
2830

@@ -31,29 +33,83 @@ public abstract class BaseTableWriter {
3133
protected static final Logger LOGGER = LoggerFactory.getLogger(BaseTableWriter.class);
3234
final Connection connection;
3335
final String identifierQuoteCharacter;
36+
final boolean isSchemaEvolutionEnabled;
3437

35-
public BaseTableWriter(final Connection connection, String identifierQuoteCharacter) {
38+
public BaseTableWriter(final Connection connection, String identifierQuoteCharacter, boolean isSchemaEvolutionEnabled) {
3639
this.connection = connection;
3740
this.identifierQuoteCharacter = identifierQuoteCharacter;
41+
this.isSchemaEvolutionEnabled = isSchemaEvolutionEnabled;
3842
}
3943

4044
public void addToTable(final RelationalTable table, final List<DatabendChangeEvent> events) {
4145
final String sql = table.prepareInsertStatement(this.identifierQuoteCharacter);
4246
int inserts = 0;
47+
List<DatabendChangeEvent> schemaEvolutionEvents = new ArrayList<>();
4348
try (PreparedStatement statement = connection.prepareStatement(sql)) {
4449
connection.setAutoCommit(false);
4550
for (DatabendChangeEvent event : events) {
46-
addParametersToStatement(statement, event);
47-
statement.addBatch();
48-
49-
int[] batchResult = statement.executeBatch();
50-
inserts = Arrays.stream(batchResult).sum();
51-
System.out.printf("insert rows %d%n", inserts);
51+
if (DatabendUtil.isSchemaChanged(event.schema()) && isSchemaEvolutionEnabled) {
52+
schemaEvolutionEvents.add(event);
53+
} else {
54+
addParametersToStatement(statement, event);
55+
statement.addBatch();
56+
}
5257
}
58+
59+
// Each batch needs to have the same schemas, so get the buffered records out
60+
int[] batchResult = statement.executeBatch();
61+
inserts = Arrays.stream(batchResult).sum();
62+
System.out.printf("insert rows %d%n", inserts);
5363
} catch (SQLException e) {
5464
throw new RuntimeException(e.getMessage());
5565
}
66+
// handle schema evolution
67+
try {
68+
schemaEvolution(table, schemaEvolutionEvents);
69+
} catch (Exception e) {
70+
throw new RuntimeException(e.getMessage());
71+
}
72+
}
73+
74+
public void schemaEvolution(RelationalTable table, List<DatabendChangeEvent> events) {
75+
for (DatabendChangeEvent event : events) {
76+
Map<String, Object> values = event.valueAsMap();
77+
for (Map.Entry<String, Object> entry : values.entrySet()) {
78+
// String key = entry.getKey();
79+
// Object value = entry.getValue();
80+
// System.out.println("Key: " + key + ", Value: " + value);
81+
if (entry.getKey().contains("ddl") && entry.getValue().toString().toLowerCase().contains("alter table")) {
82+
String tableName = getFirstWordAfterAlterTable(entry.getValue().toString());
83+
String ddlSql = replaceFirstWordAfterTable(entry.getValue().toString(), table.databaseName + "." + tableName);
84+
try (PreparedStatement statement = connection.prepareStatement(ddlSql)) {
85+
System.out.println(ddlSql);
86+
statement.execute(ddlSql);
87+
} catch (SQLException e) {
88+
throw new RuntimeException(e.getMessage());
89+
}
90+
}
91+
}
92+
}
93+
}
94+
95+
public static String replaceFirstWordAfterTable(String statement, String newTableName) {
96+
if (statement == null || newTableName == null) {
97+
return statement;
98+
}
99+
Pattern pattern = Pattern.compile("(?<=table )\\w+");
100+
Matcher matcher = pattern.matcher(statement);
101+
return matcher.replaceFirst(newTableName);
56102
}
57103

104+
public static String getFirstWordAfterAlterTable(String alterStatement) {
105+
if (alterStatement == null) {
106+
return null;
107+
}
108+
String[] parts = alterStatement.split(" ");
109+
if (parts.length >= 3) {
110+
return parts[2];
111+
}
112+
return null;
113+
}
58114
}
59115

debezium-server-databend-sink/src/main/java/io/debezium/server/databend/tablewriter/RelationalTable.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ public class RelationalTable {
2424
protected static final Logger LOGGER = LoggerFactory.getLogger(RelationalTable.class);
2525

2626
public final String tableName;
27-
private final String databaseName;
27+
public final String databaseName;
2828
public final Map<String, DatabendRawType> columns = new HashMap<>();
2929
public final Map<String, Integer> primaryKeysMap = new HashMap<>();
3030
public final String primaryKey;
@@ -59,7 +59,7 @@ public RelationalTable(String primaryKey, String databaseName, String tableName,
5959
if (!primaryKey.isEmpty()) {
6060
primaryKeysMap.put(primaryKey, 1);
6161
}
62-
LOGGER.warn("Loaded Databend table {}.{} \nColumns:{} \nPK:{}", schema, table, columns, primaryKeysMap);
62+
LOGGER.warn("Loaded Databend table {}.{} \nColumns:{} \nPK:{}", schema, table, columns, primaryKeysMap);
6363
}
6464

6565
if (numTablesFound == 0) {

debezium-server-databend-sink/src/main/java/io/debezium/server/databend/tablewriter/TableWriterFactory.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,14 @@ public class TableWriterFactory {
1616
@ConfigProperty(name = "debezium.sink.databend.identifier-quote-char", defaultValue = "")
1717
Optional<String> identifierQuoteCharacter;
1818

19+
@ConfigProperty(name = "debezium.sink.databend.schema.evolution", defaultValue = "false")
20+
boolean isSchemaEvolutionEnabled;
21+
1922
public BaseTableWriter get(final Connection connection) {
2023
if (upsert) {
21-
return new UpsertTableWriter(connection, identifierQuoteCharacter.orElse(""), upsertKeepDeletes);
24+
return new UpsertTableWriter(connection, identifierQuoteCharacter.orElse(""), upsertKeepDeletes, isSchemaEvolutionEnabled);
2225
} else {
23-
return new AppendTableWriter(connection, identifierQuoteCharacter.orElse(""));
26+
return new AppendTableWriter(connection, identifierQuoteCharacter.orElse(""),isSchemaEvolutionEnabled);
2427
}
2528
}
2629
}

debezium-server-databend-sink/src/main/java/io/debezium/server/databend/tablewriter/UpsertTableWriter.java

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@ public class UpsertTableWriter extends BaseTableWriter {
3636
final boolean upsertKeepDeletes;
3737
protected static final Logger LOGGER = LoggerFactory.getLogger(UpsertTableWriter.class);
3838

39-
public UpsertTableWriter(Connection connection, String identifierQuoteCharacter, boolean upsertKeepDeletes) {
40-
super(connection, identifierQuoteCharacter);
39+
public UpsertTableWriter(Connection connection, String identifierQuoteCharacter, boolean upsertKeepDeletes, boolean isSchemaEvolutionEnabled) {
40+
super(connection, identifierQuoteCharacter, isSchemaEvolutionEnabled);
4141
this.upsertKeepDeletes = upsertKeepDeletes;
42-
appendTableWriter = new AppendTableWriter(connection, identifierQuoteCharacter);
42+
appendTableWriter = new AppendTableWriter(connection, identifierQuoteCharacter, isSchemaEvolutionEnabled);
4343
}
4444

4545
@Override
@@ -56,6 +56,7 @@ public void deleteUpsert(final RelationalTable table, final List<DatabendChangeE
5656
final String upsertSql = table.preparedUpsertStatement(this.identifierQuoteCharacter);
5757
int inserts = 0;
5858
List<DatabendChangeEvent> deleteEvents = new ArrayList<>();
59+
List<DatabendChangeEvent> schemaEvolutionEvents = new ArrayList<>();
5960

6061
try (PreparedStatement statement = connection.prepareStatement(upsertSql)) {
6162
connection.setAutoCommit(false);
@@ -71,9 +72,12 @@ public void deleteUpsert(final RelationalTable table, final List<DatabendChangeE
7172
// here use soft delete
7273
// if true delete, we can use this condition event.keyAsMap().containsKey(deleteColumn)
7374
deleteEvents.add(event);
75+
} else if (DatabendUtil.isSchemaChanged(event.schema()) && isSchemaEvolutionEnabled) {
76+
schemaEvolutionEvents.add(event);
7477
}
7578
}
7679

80+
// Each batch needs to have the same schemas, so get the buffered records out
7781
int[] batchResult = statement.executeBatch();
7882
inserts = Arrays.stream(batchResult).sum();
7983

@@ -89,6 +93,12 @@ public void deleteUpsert(final RelationalTable table, final List<DatabendChangeE
8993
throw new RuntimeException(e.getMessage());
9094
}
9195

96+
//handle schema changed events
97+
try {
98+
schemaEvolution(table, schemaEvolutionEvents);
99+
} catch (Exception e) {
100+
throw new RuntimeException(e.getMessage());
101+
}
92102
}
93103

94104
public void deleteFromTable(final RelationalTable table, final List<DatabendChangeEvent> events) throws Exception {
@@ -104,7 +114,6 @@ public void deleteFromTable(final RelationalTable table, final List<DatabendChan
104114
}
105115
}
106116

107-
108117
private String getPrimaryKeyValue(String primaryKey, Map<String, Object> parameters) throws Exception {
109118
String primaryValue = "";
110119
for (Map.Entry<String, Object> entry : parameters.entrySet()) {

debezium-server-databend-sink/src/main/resources/conf/application.properties.example

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,11 @@ debezium.source.database.history=io.debezium.relational.history.FileDatabaseHist
3838
debezium.source.database.history.file.filename=data/status.dat
3939
# do event flattening. unwrap message!
4040
# https://debezium.io/documentation/reference/1.2/configuration/event-flattening.html#extract-new-record-state-drop-tombstones
41-
debezium.transforms=unwrap,a
42-
debezium.transforms.unwrap.type=io.debezium.transforms.ExtractNewRecordState
43-
debezium.transforms.a.type=org.apache.kafka.connect.transforms.TimestampConverter$Value
44-
debezium.transforms.a.target.type=string
45-
debezium.transforms.a.field=a
41+
#debezium.transforms=unwrap,a
42+
#debezium.transforms.unwrap.type=io.debezium.transforms.ExtractNewRecordState
43+
#debezium.transforms.a.type=org.apache.kafka.connect.transforms.TimestampConverter$Value
44+
#debezium.transforms.a.target.type=string
45+
#debezium.transforms.a.field=a
4646
# datetime format
4747
debezium.transforms.a.format=yyyy-MM-dd
4848
debezium.source.time.precision.mode=connect
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
package io.debezium.databend.tablewriter;
2+
3+
import org.junit.Assert;
4+
import org.junit.jupiter.api.Test;
5+
6+
import static io.debezium.server.databend.tablewriter.BaseTableWriter.replaceFirstWordAfterTable;
7+
8+
public class TableWriterTest {
9+
@Test
10+
public void testFirstWordAfterTable() throws Exception {
11+
String statement = "alter table products add column a int";
12+
String newStatement = replaceFirstWordAfterTable(statement, "newTable");
13+
System.out.println(newStatement);
14+
Assert.assertEquals(newStatement, "alter table newTable add column a int");
15+
16+
statement = "alter table products drop column a";
17+
newStatement = replaceFirstWordAfterTable(statement, "yyy");
18+
System.out.println(newStatement);
19+
Assert.assertEquals(newStatement, "alter table yyy drop column a");
20+
}
21+
}

debezium-server-databend-sink/src/test/java/io/debezium/databend/testresources/TestUtil.java

Lines changed: 38 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -15,43 +15,44 @@
1515

1616
import org.apache.kafka.connect.source.SourceRecord;
1717

18+
@SuppressWarnings("unchecked")
1819
public class TestUtil {
19-
static final String AB = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
20-
static final SecureRandom rnd = new SecureRandom();
21-
22-
23-
public static int randomInt(int low, int high) {
24-
return rnd.nextInt(high - low) + low;
25-
}
26-
27-
public static String randomString(int len) {
28-
StringBuilder sb = new StringBuilder(len);
29-
for (int i = 0; i < len; i++)
30-
sb.append(AB.charAt(rnd.nextInt(AB.length())));
31-
return sb.toString();
32-
}
33-
34-
public static DebeziumEngine.RecordCommitter<ChangeEvent<Object, Object>> getCommitter() {
35-
return new DebeziumEngine.RecordCommitter() {
36-
public synchronized void markProcessed(SourceRecord record) {
37-
}
38-
39-
@Override
40-
public void markProcessed(Object record) {
41-
}
42-
43-
public synchronized void markBatchFinished() {
44-
}
45-
46-
@Override
47-
public void markProcessed(Object record, DebeziumEngine.Offsets sourceOffsets) {
48-
}
49-
50-
@Override
51-
public DebeziumEngine.Offsets buildOffsets() {
52-
return null;
53-
}
54-
};
55-
}
20+
static final String AB = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
21+
static final SecureRandom rnd = new SecureRandom();
22+
23+
24+
public static int randomInt(int low, int high) {
25+
return rnd.nextInt(high - low) + low;
26+
}
27+
28+
public static String randomString(int len) {
29+
StringBuilder sb = new StringBuilder(len);
30+
for (int i = 0; i < len; i++)
31+
sb.append(AB.charAt(rnd.nextInt(AB.length())));
32+
return sb.toString();
33+
}
34+
35+
public static DebeziumEngine.RecordCommitter<ChangeEvent<Object, Object>> getCommitter() {
36+
return new DebeziumEngine.RecordCommitter() {
37+
public synchronized void markProcessed(SourceRecord record) {
38+
}
39+
40+
@Override
41+
public void markProcessed(Object record) {
42+
}
43+
44+
public synchronized void markBatchFinished() {
45+
}
46+
47+
@Override
48+
public void markProcessed(Object record, DebeziumEngine.Offsets sourceOffsets) {
49+
}
50+
51+
@Override
52+
public DebeziumEngine.Offsets buildOffsets() {
53+
return null;
54+
}
55+
};
56+
}
5657

5758
}

0 commit comments

Comments
 (0)