GoogleCloudPlatform · brandtnewton · Jan 8, 2026 · Jan 8, 2026 · Jan 8, 2026 · Jan 8, 2026
diff --git a/kafka-connect-bigtable-sink/.gitignore b/kafka-connect-bigtable-sink/.gitignore
@@ -1,3 +1,4 @@
 target
 .idea
 *.iml
+dependency-reduced-pom.xml
diff --git a/kafka-connect-bigtable-sink/README.md b/kafka-connect-bigtable-sink/README.md
@@ -3,7 +3,8 @@
 This [Kafka Connect Sink](https://kafka.apache.org/documentation/#connect) is
 for writing Kafka data to the Google Bigtable database.
 This was designed to stream data into Bigtable with as little
-latency as possible.
+latency as possible. This project also includes some [custom SMTs](#SMT), to
+conveniently prepare your data for Bigtable without any extra plugins.
 
 ## Features
 
@@ -13,6 +14,7 @@ latency as possible.
 * Deletes
 * At least once delivery
 * Dead Letter Queue
+* Bundled SMTs for convenience
 
 ### Flexible Key Mapping
 
@@ -31,6 +33,18 @@ type:
   complicated delimiters, and string constants are required in your Row Key,
   consider configuring an SMT to add relevant fields to the Kafka Record key.
 
+#### Using Message Values for Row Keys
+
+If you need to use fields from the message value, rather than the message key, use the `org.apache.kafka.connect.transforms.ValueToKey` SMT to map the value onto the key:
+
+```properties
+transforms=createKey
+transforms.createKey.type=org.apache.kafka.connect.transforms.ValueToKey
+transforms.createKey.fields=orderId,userId
+row.key.definition=userId,orderId
+row.key.delimiter=#
+```
+
 ### Dead Letter Queue
 
 A DLQ can be enabled by setting `errors.tolerance` to `all` and by
@@ -52,6 +66,78 @@ optimizing configs for latency will reduce throughput and efficiency.
 When `value.null.mode` is set to `delete`, Kafka messages with a null value will
 result in the corresponding row being deleted.
 
+## SMT
+
+This project includes SMTs that may be useful for preparing your data for Bigtable.
+
+### Flatten Array Element
+
+This SMT is used to flatten nested array fields that can be generated by some serialization libraries.
+
+#### Configuration
+
+`array.field`
+
+The name of the root level array field. Note: this field is not the direct parent of the array, see the example below.
+
+* Type: string
+* Default:
+* Valid Values: non-empty string
+* Importance: high
+
+`array.inner.wrapper`
+
+The name of the field wrapping the actual array field.
+
+* Type: string
+* Default:
+* Valid Values: non-empty string
+* Importance: high
+
+`array.element.wrapper`
+
+The name of the field wrapping individual elements within the array.
+
+* Type: string
+* Default:
+* Valid Values: non-empty string
+* Importance: high
+
+#### Example
+
+Given the following input message value, with `array.field="products"` `array.inner.wrapper="list"` `array.element.wrapper="element"`
+
+```json
+{
+  "orderId": "order1",
+  "products": {
+    "list": [
+      {
+        "element": {
+          "name": "Ball",
+          "value": "24"
+        }
+      }
+    ]
+  }
+}
+```
+
+The resulting output would be:
+
+```json
+{
+  "orderId": "order1",
+  "products": [
+    {
+      "name": "Ball",
+      "value": "24"
+    }
+   ]
+ }
+}
+```
+
 ## Configuration
 
 See [config/](./config/bigtable-kafka-sink-connector.properties) for a sample

diff --git a/kafka-connect-bigtable-sink/doc/tests.md b/kafka-connect-bigtable-sink/doc/tests.md
@@ -113,3 +113,9 @@ cbt -project "$PROJECT" -instance "$INSTANCE" ls | xargs -P 0 -I {} cbt -project
 ```bash
 mvn clean verify -DskipUnitTests
 ```
+
+#### To run a specific integration test
+
+```bash
+mvn clean verify -DskipUnitTests -Dit.test=InsertUpsertIT#testUpsert
+```
diff --git a/kafka-connect-bigtable-sink/integration-tests/pom.xml b/kafka-connect-bigtable-sink/integration-tests/pom.xml
@@ -33,7 +33,7 @@
         <maven.compiler.source>17</maven.compiler.source>
         <maven.compiler.target>17</maven.compiler.target>
         <integration.test.plugin.path>${project.basedir}/integration_test_plugins</integration.test.plugin.path>
-        <google.sink.package.path>${project.basedir}/../sink/target/sink-${project.version}-package</google.sink.package.path>
+        <google.sink.package.path>${project.basedir}/../sink/target/</google.sink.package.path>
         <google.sink.package.plugin.dir>${integration.test.plugin.path}/google-sink</google.sink.package.plugin.dir>
     </properties>
     <dependencies>

diff --git a/.../java/com/google/cloud/kafka/connect/bigtable/integration/BaseKafkaConnectBigtableIT.java b/.../java/com/google/cloud/kafka/connect/bigtable/integration/BaseKafkaConnectBigtableIT.java
@@ -23,15 +23,20 @@
 import com.google.cloud.bigtable.admin.v2.models.CreateTableRequest;
 import com.google.cloud.bigtable.admin.v2.models.Table;
 import com.google.cloud.bigtable.data.v2.BigtableDataClient;
+import com.google.cloud.bigtable.data.v2.models.Filters;
 import com.google.cloud.bigtable.data.v2.models.Query;
 import com.google.cloud.bigtable.data.v2.models.Row;
+import com.google.cloud.bigtable.data.v2.models.TableId;
 import com.google.cloud.kafka.connect.bigtable.wrappers.BigtableTableAdminClientInterface;
 import com.google.common.util.concurrent.Futures;
 import com.google.protobuf.ByteString;
 
+import java.nio.charset.StandardCharsets;
+import java.time.Instant;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Future;
 import java.util.function.Supplier;
@@ -94,7 +99,7 @@ public void createTablesAndColumnFamilies(Map<String, Set<String>> tablesAndColu
   public Map<ByteString, Row> readAllRows(BigtableDataClient bigtable, String table) {
     Integer numRecords = null;
     try {
-      Query query = Query.create(table);
+      Query query = Query.create(TableId.of(table)).filter(Filters.FILTERS.limit().cellsPerColumn(1));
       Map<ByteString, Row> result =
           bigtable.readRows(query).stream().collect(Collectors.toMap(Row::getKey, r -> r));
       numRecords = result.size();
@@ -106,6 +111,17 @@ public Map<ByteString, Row> readAllRows(BigtableDataClient bigtable, String tabl
     }
   }
 
+  public String[] readAllRowKeys(BigtableDataClient bigtable, String table) {
+    try {
+      Query query = Query.create(table);
+      String[] result =
+          bigtable.readRows(query).stream().map(r -> r.getKey().toString(StandardCharsets.UTF_8)).toList().toArray(String[]::new);
+      return result;
+    } catch (Throwable t) {
+      throw t;
+    }
+  }
+
   public long cellCount(Map<ByteString, Row> rows) {
     return rows.values().stream().mapToLong(r -> r.getCells().size()).sum();
   }
@@ -119,6 +135,18 @@ public void waitUntilBigtableContainsNumberOfRows(String tableId, long numberOfR
         "Records not consumed in time.");
   }
 
+  public void waitUntilBigtableWriteTime(String tableId, Instant time)
+      throws InterruptedException {
+    long start = System.currentTimeMillis();
+    waitForCondition(
+        testConditionIgnoringTransientErrors(
+            () -> readAllRows(bigtableData, tableId).values().stream().anyMatch(r -> r.getCells().stream().anyMatch(c -> c.getTimestamp() >= time.toEpochMilli() * 1000))),
+        DEFAULT_BIGTABLE_RETRY_TIMEOUT_MILLIS,
+        "Records not consumed in time.");
+    long elapsed = System.currentTimeMillis() - start;
+    System.out.printf("Bigtable rows found in table %s after %dms\n", tableId, elapsed);
+  }
+
   public void waitUntilBigtableContainsNumberOfCells(String tableId, long numberOfCells)
       throws InterruptedException {
     waitForCondition(

diff --git a/...src/test/java/com/google/cloud/kafka/connect/bigtable/integration/BaseKafkaConnectIT.java b/...src/test/java/com/google/cloud/kafka/connect/bigtable/integration/BaseKafkaConnectIT.java
@@ -169,6 +169,7 @@ private String startConnector(Map<String, String> configProps, Set<String> topic
     if (topicNameSuffixes.isEmpty()) {
       configProps.put(SinkConnectorConfig.TOPICS_CONFIG, id);
       connect.kafka().createTopic(id, numTasks);
+      logger.info("created topic: " + id);
     } else {
       configProps.put(SinkConnectorConfig.TOPICS_REGEX_CONFIG, id + ".*");
       for (String suffix : topicNameSuffixes) {

diff --git a/.../test/java/com/google/cloud/kafka/connect/bigtable/integration/FlattenArrayElementIT.java b/.../test/java/com/google/cloud/kafka/connect/bigtable/integration/FlattenArrayElementIT.java
@@ -0,0 +1,143 @@
+package com.google.cloud.kafka.connect.bigtable.integration;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ArrayNode;
+import com.google.cloud.bigtable.data.v2.models.Row;
+import com.google.cloud.bigtable.data.v2.models.RowCell;
+import com.google.cloud.kafka.connect.bigtable.config.BigtableErrorMode;
+import com.google.cloud.kafka.connect.bigtable.config.InsertMode;
+import com.google.cloud.kafka.connect.bigtable.transformations.FlattenArrayElement;
+import com.google.protobuf.ByteString;
+import org.apache.kafka.connect.data.Schema;
+import org.apache.kafka.connect.data.SchemaBuilder;
+import org.apache.kafka.connect.data.Struct;
+import org.apache.kafka.connect.json.JsonConverter;
+import org.apache.kafka.connect.storage.StringConverter;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.nio.charset.StandardCharsets;
+import java.util.*;
+import java.util.concurrent.ExecutionException;
+
+import static com.google.cloud.kafka.connect.bigtable.config.BigtableSinkConfig.*;
+import static org.apache.kafka.connect.runtime.WorkerConfig.KEY_CONVERTER_CLASS_CONFIG;
+import static org.apache.kafka.connect.runtime.WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+@RunWith(JUnit4.class)
+
+public class FlattenArrayElementIT extends BaseKafkaConnectBigtableIT {
+  private static final String KEY1 = "key1";
+
+  @Test
+  public void testFlattenArrayElementSmt() throws InterruptedException, ExecutionException, JsonProcessingException {
+    Map<String, String> props = baseConnectorProps();
+    props.put(INSERT_MODE_CONFIG, InsertMode.UPSERT.name());
+    props.put("transforms", "flattenElements");
+    props.put("transforms.flattenElements.type", FlattenArrayElement.class.getName());
+    props.put("transforms.flattenElements." + FlattenArrayElement.ARRAY_FIELD_NAME, "products");
+    props.put("transforms.flattenElements." + FlattenArrayElement.ARRAY_INNER_WRAPPER_FIELD_NAME, "list");
+    props.put("transforms.flattenElements." + FlattenArrayElement.ARRAY_ELEMENT_WRAPPER_FIELD_NAME, "element");
+    props.put(DEFAULT_COLUMN_FAMILY_CONFIG, "cf");
+    props.put(ERROR_MODE_CONFIG, BigtableErrorMode.FAIL.name());
+    props.put(KEY_CONVERTER_CLASS_CONFIG, StringConverter.class.getName());
+    props.put(VALUE_CONVERTER_CLASS_CONFIG, JsonConverter.class.getName());
+
+    String testId = startSingleTopicConnector(props);
+    createTablesAndColumnFamilies(Map.of(testId, Set.of(testId, "cf", "products")));
+
+    Schema productSchema = SchemaBuilder.struct()
+        .field("name", Schema.STRING_SCHEMA)
+        .field("id", Schema.STRING_SCHEMA)
+        .field("quantity", Schema.INT32_SCHEMA)
+        .build();
+
+    Schema elementSchema = SchemaBuilder.struct().field("element", productSchema).build();
+
+    Schema schema = SchemaBuilder.struct().optional()
+        .field("orderId", Schema.STRING_SCHEMA)
+        .field("userId", Schema.STRING_SCHEMA)
+        .field("products",
+            SchemaBuilder.struct().field("list", SchemaBuilder.array(elementSchema)).build()
+        )
+        .build();
+
+    JsonConverter converter = new JsonConverter();
+    converter.configure(Collections.singletonMap("schemas.enable", "true"), false);
+
+    Struct productElement1 = new Struct(elementSchema).put("element", new Struct(productSchema)
+        .put("name", "Ball")
+        .put("id", "PROD-123")
+        .put("quantity", 5)
+    );
+    Struct productElement2 = new Struct(elementSchema).put("element", new Struct(productSchema)
+        .put("name", "Car")
+        .put("id", "PROD-456")
+        .put("quantity", 1)
+    );
+    Struct productElement3 = new Struct(elementSchema).put("element", new Struct(productSchema)
+        .put("name", "Tambourine")
+        .put("id", "PROD-789")
+        .put("quantity", 2)
+    );
+
+    List<Struct> productList = Arrays.stream(new Struct[]{productElement1, productElement2, productElement3}).toList();
+
+    Struct productsWrapper = new Struct(schema.field("products").schema())
+        .put("list", productList);
+
+    Struct value = new Struct(schema)
+        .put("orderId", "ORD-999")
+        .put("userId", "USER-42")
+        .put("products", productsWrapper);
+
+
+    byte[] schemaAsJson = converter.fromConnectData(testId, schema, value);
+    System.out.println(new String(schemaAsJson));
+
+    connect.kafka().produce(testId, KEY1, new String(schemaAsJson));
+
+    waitUntilBigtableContainsNumberOfRows(testId, 1);
+    Map<ByteString, Row> rows = readAllRows(bigtableData, testId);
+    ByteString key = ByteString.copyFrom(KEY1.getBytes(StandardCharsets.UTF_8));
+    Row row1 = rows.get(key);
+    assertNotNull(row1);
+    assertEquals(3, row1.getCells().size());
+
+    List<RowCell> orderIdCells = row1.getCells("cf", "orderId");
+    assertEquals(1, orderIdCells.size());
+    assertEquals("ORD-999", orderIdCells.get(0).getValue().toString(StandardCharsets.UTF_8));
+
+    List<RowCell> userIdCells = row1.getCells("cf", "userId");
+    assertEquals(1, userIdCells.size());
+    assertEquals("USER-42", userIdCells.get(0).getValue().toString(StandardCharsets.UTF_8));
+
+    List<RowCell> productCells = row1.getCells("cf", "products");
+    assertEquals(1, productCells.size());
+    String rawProductsJson = productCells.get(0).getValue().toString(StandardCharsets.UTF_8);
+
+    ObjectMapper mapper = new ObjectMapper();
+    ArrayNode productsJson = (ArrayNode) mapper.readTree(rawProductsJson);
+    assertEquals(3, productsJson.size());
+
+    // product 1
+    assertEquals("Ball", productsJson.get(0).get("name").asText());
+    assertEquals("PROD-123", productsJson.get(0).get("id").asText());
+    assertEquals(5, productsJson.get(0).get("quantity").asInt());
+
+    // product 2
+    assertEquals("Car", productsJson.get(1).get("name").asText());
+    assertEquals("PROD-456", productsJson.get(1).get("id").asText());
+    assertEquals(1, productsJson.get(1).get("quantity").asInt());
+
+    // product 3
+    assertEquals("Tambourine", productsJson.get(2).get("name").asText());
+    assertEquals("PROD-789", productsJson.get(2).get("id").asText());
+    assertEquals(2, productsJson.get(2).get("quantity").asInt());
+  }
+}