Skip to content

Commit 755b866

Browse files
author
Bingqin Zhou
authored
Merge pull request #233 from wepay/DI-2995
Support non existent topics in KCBQ.
2 parents 77d2b6e + 4049471 commit 755b866

14 files changed

+171
-288
lines changed

kcbq-connector/quickstart/properties/connector.properties

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ tasks.max=1
2020
topics=kcbq-quickstart
2121
sanitizeTopics=true
2222

23-
autoCreateTables=true
2423
autoUpdateSchemas=true
2524

2625
schemaRetriever=com.wepay.kafka.connect.bigquery.schemaregistry.schemaretriever.SchemaRegistrySchemaRetriever

kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/BigQuerySinkConnector.java

Lines changed: 10 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
import com.wepay.kafka.connect.bigquery.api.SchemaRetriever;
2525

2626
import com.wepay.kafka.connect.bigquery.config.BigQuerySinkConfig;
27-
import com.wepay.kafka.connect.bigquery.config.BigQuerySinkConnectorConfig;
2827

2928
import com.wepay.kafka.connect.bigquery.convert.SchemaConverter;
3029

@@ -76,7 +75,7 @@ public BigQuerySinkConnector() {
7675
this.testSchemaManager = schemaManager;
7776
}
7877

79-
private BigQuerySinkConnectorConfig config;
78+
private BigQuerySinkConfig config;
8079
private Map<String, String> configProperties;
8180

8281
private static final Logger logger = LoggerFactory.getLogger(BigQuerySinkConnector.class);
@@ -108,58 +107,35 @@ private SchemaManager getSchemaManager(BigQuery bigQuery) {
108107
return new SchemaManager(schemaRetriever, schemaConverter, bigQuery, kafkaKeyFieldName, kafkaDataFieldName);
109108
}
110109

111-
private void ensureExistingTables(
112-
BigQuery bigQuery,
113-
SchemaManager schemaManager,
114-
Map<String, TableId> topicsToTableIds) {
115-
for (Map.Entry<String, TableId> topicToTableId : topicsToTableIds.entrySet()) {
116-
String topic = topicToTableId.getKey();
117-
TableId tableId = topicToTableId.getValue();
118-
if (bigQuery.getTable(tableId) == null) {
119-
logger.info("Table {} does not exist; attempting to create", tableId);
120-
schemaManager.createTable(tableId, topic);
121-
}
122-
}
123-
}
124-
125-
private void ensureExistingTables(
126-
BigQuery bigQuery,
127-
Map<String, TableId> topicsToTableIds) {
110+
private void ensureExistingTables() {
111+
BigQuery bigQuery = getBigQuery();
112+
Map<String, TableId> topicsToTableIds = TopicToTableResolver.getTopicsToTables(config);
128113
for (TableId tableId : topicsToTableIds.values()) {
129114
if (bigQuery.getTable(tableId) == null) {
130115
logger.warn(
131-
"You may want to enable auto table creation by setting {}=true in the properties file",
132-
config.TABLE_CREATE_CONFIG);
116+
"You may want to enable auto table creation by setting {}=true in the properties file",
117+
config.TABLE_CREATE_CONFIG);
133118
throw new BigQueryConnectException("Table '" + tableId + "' does not exist");
134119
}
135120
}
136121
}
137122

138-
private void ensureExistingTables() {
139-
BigQuery bigQuery = getBigQuery();
140-
Map<String, TableId> topicsToTableIds = TopicToTableResolver.getTopicsToTables(config);
141-
if (config.getBoolean(config.TABLE_CREATE_CONFIG)) {
142-
SchemaManager schemaManager = getSchemaManager(bigQuery);
143-
ensureExistingTables(bigQuery, schemaManager, topicsToTableIds);
144-
} else {
145-
ensureExistingTables(bigQuery, topicsToTableIds);
146-
}
147-
}
148-
149123
@Override
150124
public void start(Map<String, String> properties) {
151125
logger.trace("connector.start()");
152126
try {
153127
configProperties = properties;
154-
config = new BigQuerySinkConnectorConfig(properties);
128+
config = new BigQuerySinkConfig(properties);
155129
} catch (ConfigException err) {
156130
throw new SinkConfigConnectException(
157131
"Couldn't start BigQuerySinkConnector due to configuration error",
158132
err
159133
);
160134
}
161135

162-
ensureExistingTables();
136+
if (!config.getBoolean(config.TABLE_CREATE_CONFIG)) {
137+
ensureExistingTables();
138+
}
163139
}
164140

165141
@Override

kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/BigQuerySinkTask.java

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ public class BigQuerySinkTask extends SinkTask {
8989

9090
private final BigQuery testBigQuery;
9191
private final Storage testGcs;
92+
private final SchemaManager testSchemaManager;
9293

9394
private final UUID uuid = UUID.randomUUID();
9495
private ScheduledExecutorService gcsLoadExecutor;
@@ -100,6 +101,7 @@ public BigQuerySinkTask() {
100101
testBigQuery = null;
101102
schemaRetriever = null;
102103
testGcs = null;
104+
testSchemaManager = null;
103105
}
104106

105107
/**
@@ -108,12 +110,14 @@ public BigQuerySinkTask() {
108110
* @param testBigQuery {@link BigQuery} to use for testing (likely a mock)
109111
* @param schemaRetriever {@link SchemaRetriever} to use for testing (likely a mock)
110112
* @param testGcs {@link Storage} to use for testing (likely a mock)
113+
* @param testSchemaManager {@link SchemaManager} to use for testing (likely a mock)
111114
* @see BigQuerySinkTask#BigQuerySinkTask()
112115
*/
113-
public BigQuerySinkTask(BigQuery testBigQuery, SchemaRetriever schemaRetriever, Storage testGcs) {
116+
public BigQuerySinkTask(BigQuery testBigQuery, SchemaRetriever schemaRetriever, Storage testGcs, SchemaManager testSchemaManager) {
114117
this.testBigQuery = testBigQuery;
115118
this.schemaRetriever = schemaRetriever;
116119
this.testGcs = testGcs;
120+
this.testSchemaManager = testSchemaManager;
117121
}
118122

119123
@Override
@@ -137,6 +141,8 @@ private PartitionedTableId getRecordTable(SinkRecord record) {
137141

138142
TableId baseTableId = topicsToBaseTableIds.get(record.topic());
139143

144+
maybeCreateTable(record, baseTableId);
145+
140146
PartitionedTableId.Builder builder = new PartitionedTableId.Builder(baseTableId);
141147
if (useMessageTimeDatePartitioning) {
142148
if (record.timestampType() == TimestampType.NO_TIMESTAMP_TYPE) {
@@ -152,6 +158,20 @@ private PartitionedTableId getRecordTable(SinkRecord record) {
152158
return builder.build();
153159
}
154160

161+
/**
162+
* Create the table which doesn't exist in BigQuery for a (record's) topic when autoCreateTables config is set to true.
163+
* @param record Kafka Sink Record to be streamed into BigQuery.
164+
* @param baseTableId BaseTableId in BigQuery.
165+
*/
166+
private void maybeCreateTable(SinkRecord record, TableId baseTableId) {
167+
BigQuery bigQuery = getBigQuery();
168+
boolean autoCreateTables = config.getBoolean(config.TABLE_CREATE_CONFIG);
169+
if (autoCreateTables && bigQuery.getTable(baseTableId) == null) {
170+
getSchemaManager(bigQuery).createTable(baseTableId, record.topic());
171+
logger.info("Table {} does not exist, auto-created table for topic {}", baseTableId, record.topic());
172+
}
173+
}
174+
155175
private RowToInsert getRecordRow(SinkRecord record) {
156176
Map<String, Object> convertedRecord = recordConverter.convertRecord(record, KafkaSchemaRecordType.VALUE);
157177
Optional<String> kafkaKeyFieldName = config.getKafkaKeyFieldName();
@@ -248,6 +268,9 @@ private BigQuery getBigQuery() {
248268
}
249269

250270
private SchemaManager getSchemaManager(BigQuery bigQuery) {
271+
if (testSchemaManager != null) {
272+
return testSchemaManager;
273+
}
251274
schemaRetriever = config.getSchemaRetriever();
252275
SchemaConverter<com.google.cloud.bigquery.Schema> schemaConverter =
253276
config.getSchemaConverter();

kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/BigQuerySinkConfig.java

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
import org.apache.kafka.common.config.ConfigException;
3333

3434
import org.apache.kafka.connect.sink.SinkConnector;
35+
import org.slf4j.Logger;
36+
import org.slf4j.LoggerFactory;
3537

3638
import java.lang.reflect.Constructor;
3739
import java.lang.reflect.InvocationTargetException;
@@ -52,6 +54,7 @@
5254
public class BigQuerySinkConfig extends AbstractConfig {
5355
private static final ConfigDef config;
5456
private static final Validator validator = new Validator();
57+
private static final Logger logger = LoggerFactory.getLogger(BigQuerySinkConfig.class);
5558

5659
// Values taken from https://github.com/apache/kafka/blob/1.1.1/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SinkConnectorConfig.java#L33
5760
public static final String TOPICS_CONFIG = SinkConnector.TOPICS_CONFIG;
@@ -209,6 +212,13 @@ public class BigQuerySinkConfig extends AbstractConfig {
209212
"If true, no fields in any produced BigQuery schema will be REQUIRED. All "
210213
+ "non-nullable avro fields will be translated as NULLABLE (or REPEATED, if arrays).";
211214

215+
public static final String TABLE_CREATE_CONFIG = "autoCreateTables";
216+
private static final ConfigDef.Type TABLE_CREATE_TYPE = ConfigDef.Type.BOOLEAN;
217+
public static final boolean TABLE_CREATE_DEFAULT = true;
218+
private static final ConfigDef.Importance TABLE_CREATE_IMPORTANCE = ConfigDef.Importance.HIGH;
219+
private static final String TABLE_CREATE_DOC =
220+
"Automatically create BigQuery tables if they don't already exist";
221+
212222
static {
213223
config = new ConfigDef()
214224
.define(
@@ -324,7 +334,13 @@ public class BigQuerySinkConfig extends AbstractConfig {
324334
CONVERT_DOUBLE_SPECIAL_VALUES_DEFAULT,
325335
CONVERT_DOUBLE_SPECIAL_VALUES_IMPORTANCE,
326336
CONVERT_DOUBLE_SPECIAL_VALUES_DOC
327-
);
337+
).define(
338+
TABLE_CREATE_CONFIG,
339+
TABLE_CREATE_TYPE,
340+
TABLE_CREATE_DEFAULT,
341+
TABLE_CREATE_IMPORTANCE,
342+
TABLE_CREATE_DOC
343+
);
328344
}
329345

330346
@SuppressWarnings("unchecked")
@@ -608,6 +624,18 @@ private void verifyBucketSpecified() throws ConfigException {
608624
}
609625
}
610626

627+
private void checkAutoCreateTables() {
628+
629+
Class<?> schemaRetriever = getClass(BigQuerySinkConfig.SCHEMA_RETRIEVER_CONFIG);
630+
boolean autoCreateTables = getBoolean(TABLE_CREATE_CONFIG);
631+
632+
if (autoCreateTables && schemaRetriever == null) {
633+
throw new ConfigException(
634+
"Cannot specify automatic table creation without a schema retriever"
635+
);
636+
}
637+
}
638+
611639
/**
612640
* Return the ConfigDef object used to define this config's fields.
613641
*
@@ -625,6 +653,7 @@ protected BigQuerySinkConfig(ConfigDef config, Map<String, String> properties) {
625653
public BigQuerySinkConfig(Map<String, String> properties) {
626654
super(config, properties);
627655
verifyBucketSpecified();
656+
checkAutoCreateTables();
628657
}
629658

630659
}

kcbq-connector/src/main/java/com/wepay/kafka/connect/bigquery/config/BigQuerySinkConnectorConfig.java

Lines changed: 0 additions & 82 deletions
This file was deleted.

0 commit comments

Comments
 (0)