CC-7163: Add task information injection (#50)

gharris1727 · web-flow · commit 0531064f03fe · 2019-12-09T08:56:46.000-08:00
* Add task id header
* Add task generation header
* Add current iteration header

Signed-off-by: Greg Harris &lt;gregh@confluent.io&gt;
diff --git a/README.md b/README.md
@@ -212,6 +212,19 @@ If you are using Avro format for producing data to Kafka, here is the correspond
 
 If you are not using Avro format for producing data to Kafka, there will be no schema in Confluent Schema Registry.
 
+# Utility Headers
+
+The Datagen Connector will capture details about the record's generation in the headers of the records it produces.
+The following fields are populated:
+
+Header Key | Header Value
+-|-
+`task.generation` | Task generation number (starts at 0, incremented each time the task restarts)
+`task.id` | Task id number (0 up to `tasks.max` - 1)
+`current.iteration` | Record iteration number (starts at 0, incremented each time a record is generated)
+
+
+
 # Publishing Docker Images
 
 *Note: The following instructions are only relevant if you are an administrator of this repository and have push access to the https://hub.docker.com/r/cnfldemos/kafka-connect-datagen/ repository.  The local Docker daemon must be logged into a proper Docker Hub account.*
diff --git a/checkstyle/suppressions.xml b/checkstyle/suppressions.xml
@@ -6,4 +6,5 @@
 
 <suppressions>
 <suppress checks="ClassDataAbstractionCoupling" files="(DatagenTask|AvroMessageReader|RestService|Errors|SchemaRegistryRestApplication|KafkaSchemaRegistry|KafkaStore|AvroData|KafkaGroupMasterElector).java"/>
+  <suppress checks="(CyclomaticComplexity|NPathComplexity)" files="DatagenTask.java"/>
 </suppressions>
diff --git a/src/main/java/io/confluent/kafka/connect/datagen/DatagenTask.java b/src/main/java/io/confluent/kafka/connect/datagen/DatagenTask.java
@@ -32,6 +32,7 @@
 import org.apache.avro.generic.GenericRecord;
 import org.apache.kafka.connect.data.SchemaAndValue;
 import org.apache.kafka.connect.errors.ConnectException;
+import org.apache.kafka.connect.header.ConnectHeaders;
 import org.apache.kafka.connect.source.SourceRecord;
 import org.apache.kafka.connect.source.SourceTask;
 import org.apache.kafka.connect.data.Schema;
@@ -66,7 +67,7 @@ public class DatagenTask extends SourceTask {
   private AvroData avroData;
   private int taskId;
   private Map<String, Object> sourcePartition;
-  private int taskGeneration;
+  private long taskGeneration;
   private Random random;
 
   protected enum Quickstart {
@@ -110,6 +111,7 @@ public void start(Map<String, String> props) {
     maxRecords = config.getIterations();
     schemaFilename = config.getSchemaFilename();
     schemaKeyField = config.getSchemaKeyfield();
+    taskGeneration = 0;
     taskId = Integer.parseInt(props.get(TASK_ID));
     sourcePartition = Collections.singletonMap(TASK_ID, taskId);
 
@@ -232,21 +234,29 @@ public List<SourceRecord> poll() throws InterruptedException {
     // Essentially, the "next" state of the connector after this loop completes
     Map<String, Object> sourceOffset = new HashMap<>();
     // The next lifetime will be a member of the next generation.
-    sourceOffset.put(TASK_GENERATION, (long) (taskGeneration + 1));
+    sourceOffset.put(TASK_GENERATION, taskGeneration + 1);
     // We will have produced this record
     sourceOffset.put(CURRENT_ITERATION, count + 1);
     // This is the seed that we just re-seeded for our own next iteration.
     sourceOffset.put(RANDOM_SEED, seed);
 
+    final ConnectHeaders headers = new ConnectHeaders();
+    headers.addLong(TASK_GENERATION, taskGeneration);
+    headers.addLong(TASK_ID, taskId);
+    headers.addLong(CURRENT_ITERATION, count);
+
     final List<SourceRecord> records = new ArrayList<>();
     SourceRecord record = new SourceRecord(
         sourcePartition,
         sourceOffset,
         topic,
+        null,
         KEY_SCHEMA,
         keyString,
         messageSchema,
-        messageValue
+        messageValue,
+        null,
+        headers
     );
     records.add(record);
     count += records.size();
diff --git a/src/test/java/io/confluent/kafka/connect/datagen/DatagenTaskTest.java b/src/test/java/io/confluent/kafka/connect/datagen/DatagenTaskTest.java
@@ -152,6 +152,17 @@ public void shouldRestoreFromSourceOffsets() throws Exception {
     assertEquals(pollA.value(), pollB.value());
   }
 
+  @Test
+  public void shouldInjectHeaders()  throws Exception {
+    createTaskWith(Quickstart.USERS);
+    generateRecords();
+    for (SourceRecord record : records) {
+      assertEquals((long) TASK_ID, record.headers().lastWithName(DatagenTask.TASK_ID).value());
+      assertEquals(0L, record.headers().lastWithName(DatagenTask.TASK_GENERATION).value());
+      assertNotNull(record.headers().lastWithName(DatagenTask.CURRENT_ITERATION));
+    }
+  }
+
   @Test
   public void shouldFailToGenerateMoreRecordsThanSpecified() throws Exception {
     // Generate the expected number of records