Add batch staleness, resolution, and offset management to eachBatch

milindl · milindl · commit b2b546a0a062 · 2024-07-02T16:17:47.000+05:30
diff --git a/lib/kafkajs/_consumer.js b/lib/kafkajs/_consumer.js
@@ -130,6 +130,11 @@ class Consumer {
    */
   #userManagedStores = false;
 
+  /**
+   * Whether the user has enabled manual offset management (commits).
+   */
+  #autoCommit = false;
+
   /**
    * Signals an intent to disconnect the consumer.
    */
@@ -161,6 +166,16 @@ class Consumer {
    */
   #fetchInProgress = false;
 
+  /**
+   * Maps topic-partition key to the batch payload for marking staleness.
+   *
+   * Only used with eachBatch.
+   * NOTE: given that size of this map will never exceed #concurrency, a
+   * linear search might actually be faster over what will generally be <10 elems.
+   * But a map makes conceptual sense. Revise at a later point if needed.
+   */
+  #topicPartitionToBatchPayload = new Map();
+
   /**
    * TODO: remove this or make it a bit more reliable.
    * This is a debug property for this branch.
@@ -540,6 +555,12 @@ class Consumer {
       this.#userManagedStores = !rdKafkaConfig['enable.auto.offset.store'];
     }
 
+    if (!Object.hasOwn(rdKafkaConfig, 'enable.auto.commit')) {
+      this.#autoCommit = true; /* librdkafka default. */
+    } else {
+      this.#autoCommit = rdKafkaConfig['enable.auto.commit'];
+    }
+
     return rdKafkaConfig;
   }
 
@@ -616,6 +637,66 @@ class Consumer {
     };
   }
 
+  /**
+   * Method used by #createBatchPayload to resolve offsets.
+   * Resolution stores the offset into librdkafka if needed, and into the lastConsumedOffsets map
+   * that we use for seeking to the last consumed offset when forced to clear cache.
+   *
+   * @param {*} payload The payload we're creating. This is a method attached to said object.
+   * @param {*} offsetToResolve The offset to resolve.
+   * @param {*} leaderEpoch The leader epoch of the message (optional). We expect users to provide it, but for API-compatibility reasons, it's optional.
+   */
+  #eachBatchPayload_resolveOffsets(payload, offsetToResolve, leaderEpoch = -1) {
+    const offset = +offsetToResolve;
+
+    if (isNaN(offset)) {
+      /* Not much we can do but throw and log an error. */
+      const e = new error.KafkaJSError(`Invalid offset to resolve: ${offsetToResolve}`, { code: error.ErrorCodes.ERR__INVALID_ARG });
+      throw e;
+    }
+
+    /* The user might resolve offset N (< M) after resolving offset M. Given that in librdkafka we can only
+     * store one offset, store the last possible one. */
+    if (offset <= payload._lastResolvedOffset.offset)
+      return;
+
+    const topic = payload.batch.topic;
+    const partition = payload.batch.partition;
+    const key = partitionKey({ topic, partition });
+
+    payload._lastResolvedOffset = { offset, leaderEpoch };
+
+    try {
+      if (!this.#userManagedStores) {
+        this.#internalClient.offsetsStore([{
+          topic,
+          partition,
+          offset: offset + 1,
+          leaderEpoch: leaderEpoch,
+        }]);
+      }
+      this.#lastConsumedOffsets.set(key, offset + 1);
+    } catch (e) {
+      /* Not much we can do, except log the error. */
+      if (this.#logger)
+        this.#logger.error(`Consumer encountered error while storing offset. Error details: ${e}:${e.stack}`);
+    }
+  }
+
+  /**
+   * Method used by #createBatchPayload to commit offsets.
+   */
+  async #eachBatchPayload_commitOffsetsIfNecessary() {
+    if (this.#autoCommit) {
+      /* librdkafka internally handles committing of whatever we store.
+       * We don't worry about it here. */
+      return;
+    }
+    /* If the offsets are being resolved by the user, they've already called resolveOffset() at this point
+     * We just need to commit the offsets that we've stored. */
+    await this.commitOffsets();
+  }
+
   /**
    * Converts a list of messages returned by node-rdkafka into a message that can be used by the eachBatch callback.
    * @param {import("../..").Message[]} messages - must not be empty. Must contain messages from the same topic and partition.
@@ -666,27 +747,31 @@ class Consumer {
     const batch = {
       topic,
       partition,
-      highWatermark: '-1001', // Invalid - we don't fetch it
+      highWatermark: '-1001', /* We don't fetch it yet. We can call committed() to fetch it but that might incur network calls. */
       messages: messagesConverted,
       isEmpty: () => false,
-      firstOffset: () => messagesConverted[0].offset,
-      lastOffset: () => messagesConverted[messagesConverted.length - 1].offset,
+      firstOffset: () => (messagesConverted[0].offset).toString(),
+      lastOffset: () => (messagesConverted[messagesConverted.length - 1].offset).toString(),
       offsetLag: () => notImplemented(),
       offsetLagLow: () => notImplemented(),
     };
 
     const returnPayload = {
       batch,
-      _messageResolved: false,
-      resolveOffset: () => { returnPayload._messageResolved = true; },
+      _stale: false,
+      _lastResolvedOffset: { offset: -1, leaderEpoch: -1 },
       heartbeat: async () => { /* no op */ },
       pause: this.pause.bind(this, [{ topic, partitions: [partition] }]),
-      commitOffsetsIfNecessary: async () => { /* no op */ },
-      uncommittedOffsets: () => notImplemented(),
+      commitOffsetsIfNecessary: this.#eachBatchPayload_commitOffsetsIfNecessary.bind(this),
       isRunning: () => this.#running,
-      isStale: () => false,
+      isStale: () => returnPayload._stale,
+      /* NOTE: Probably never to be implemented. Not sure exactly how we'd compute this
+       * inexpensively. */
+      uncommittedOffsets: () => notImplemented(),
     };
 
+    returnPayload.resolveOffset = this.#eachBatchPayload_resolveOffsets.bind(this, returnPayload);
+
     return returnPayload;
   }
 
@@ -1020,20 +1105,33 @@ class Consumer {
   /**
    * Processes a batch of messages.
    *
-   * @param ms Messages as obtained from #consumeCachedN.
+   * @param ms Messages as obtained from #consumeCachedN (ms.length !== 0).
    * @param config Config as passed to run().
    * @returns {Promise<number>} the cache index of the message that was processed.
    */
   async #batchProcessor(ms, config) {
-    let eachMessageProcessed = false;
+    const key = partitionKey(ms[0]);
     const payload = this.#createBatchPayload(ms);
+
+    this.#topicPartitionToBatchPayload.set(key, payload);
+
+    let lastOffsetProcessed = { offset: -1, leaderEpoch: -1 };
+    const lastOffset = +(ms[ms.length - 1].offset);
+    const lastLeaderEpoch = ms[ms.length - 1].leaderEpoch;
     try {
       await config.eachBatch(payload);
-      if (config.eachBatchAutoResolve) {
-        eachMessageProcessed = true;
-      } else {
-        eachMessageProcessed = payload._messageResolved;
+
+      /* If the user isn't resolving offsets, we resolve them here. It's significant here to call this method
+       * because besides updating `payload._lastResolvedOffset`, this method is also storing the offsets to
+       * librdkafka, and accounting for any cache invalidations.
+       * Don't bother resolving offsets if payload became stale at some point. We can't know when the payload
+       * became stale, so either the user has been nice enough to keep resolving messages, or we must seek to
+       * the first offset to ensure no message loss. */
+      if (config.eachBatchAutoResolve && !payload._stale) {
+        payload.resolveOffset(lastOffset, lastLeaderEpoch);
       }
+
+      lastOffsetProcessed = payload._lastResolvedOffset;
     } catch (e) {
       /* It's not only possible, but expected that an error will be thrown by eachBatch.
        * This is especially true since the pattern of pause() followed by throwing an error
@@ -1053,38 +1151,24 @@ class Consumer {
       this.#logger.error(`Consumer encountered error while processing message. Error details: ${e}: ${e.stack}. The same message may be reprocessed.`);
 
       /* The value of eachBatchAutoResolve is not important. The only place where a message is marked processed
-       * despite an error is if the user says so, and the user can use resolveOffsets for both the possible
+       * despite an error is if the user says so, and the user can use resolveOffset for both the possible
        * values eachBatchAutoResolve can take. */
-      eachMessageProcessed = payload._messageResolved;
+      lastOffsetProcessed = payload._lastResolvedOffset;
     }
 
-    /* If the message is unprocessed, due to an error, or because the user has not resolved it, we seek back. */
-    /* TODO: currently we're seeking to just the first offset. Fix this to take care of messages we are resolving. */
-    if (!eachMessageProcessed) {
+    this.#topicPartitionToBatchPayload.delete(key);
+
+    /* If any message is unprocessed, either due to an error or due to the user not marking it processed, we must seek
+     * back to get it so it can be reprocessed. */
+    if (lastOffsetProcessed.offset !== lastOffset) {
+      const offsetToSeekTo = lastOffsetProcessed.offset === -1 ? ms[0].offset : (lastOffsetProcessed.offset + 1);
       await this.seek({
         topic: ms[0].topic,
         partition: ms[0].partition,
-        offset: ms[0].offset,
+        offset: offsetToSeekTo,
       });
     }
 
-    /* Store the offsets we need to store, or at least record them for cache invalidation reasons. */
-    /* TODO: currently we just store the last offset of the batch. Fix it to store the last resolved one + 1. */
-    if (eachMessageProcessed) {
-      try {
-        if (!this.#userManagedStores) {
-          this.#internalClient.offsetsStore([{
-            topic: ms[ms.length - 1].topic, partition: ms[ms.length - 1].partition, offset: Number(ms[ms.length - 1].offset) + 1, leaderEpoch: ms[ms.length - 1].leaderEpoch
-          }]);
-        }
-        this.#lastConsumedOffsets.set(partitionKey(ms[ms.length - 1]), Number(ms[ms.length - 1].offset) + 1);
-      } catch (e) {
-        /* Not much we can do, except log the error. */
-        if (this.#logger)
-          this.#logger.error(`Consumer encountered error while storing offset. Error details: ${JSON.stringify(e)}`);
-      }
-    }
-
     /* Force a immediate seek here. It's possible that there are no more messages to be passed to the user,
      * but the user seeked in the call to eachMessage, or else we encountered the error catch block.
      * In that case, the results of that seek will never be reflected unless we do this. */
@@ -1164,8 +1248,18 @@ class Consumer {
     while (!this.#disconnectStarted) {
       this.#workerTerminationScheduled = false;
       const workersToSpawn = Math.max(1, Math.min(this.#concurrency, this.#partitionCount));
-      this.#workers = Array(workersToSpawn).fill().map((_, i) => this.#worker(config, perMessageProcessor.bind(this), fetcher.bind(this), i));
-      await Promise.all(this.#workers);
+      this.#workers =
+        Array(workersToSpawn)
+          .fill()
+          .map((_, i) =>
+            this.#worker(config, perMessageProcessor.bind(this), fetcher.bind(this), i)
+              .catch(e => {
+                if (this.#logger)
+                  this.#logger.error(`Worker ${i} encountered an error: ${e}:${e.stack}`);
+              }));
+
+      /* Best we can do is log errors on worker issues - handled by the catch block above. */
+      await Promise.allSettled(this.#workers)
 
       /* One of the possible reasons for the workers to end is that the cache is globally stale.
        * We need to take care of expiring it. */
@@ -1416,7 +1510,17 @@ class Consumer {
     }
 
     this.#checkPendingSeeks = true;
-    this.#pendingSeeks.set(partitionKey(rdKafkaTopicPartitionOffset), rdKafkaTopicPartitionOffset.offset);
+    const key = partitionKey(rdKafkaTopicPartitionOffset)
+    this.#pendingSeeks.set(key, rdKafkaTopicPartitionOffset.offset);
+
+    /* Only for eachBatch:
+     * Immediately mark the batch it's associated with as stale, even if we don't
+     * do the actual 'seekInternal' at this time. This is because we need read-after-write
+     * consistency for eachBatch, and calling seek(toppar) from within eachBatch(toppar)
+     * should change the result of batch.isStale() immediately. */
+    if (this.#topicPartitionToBatchPayload.has(key)) {
+      this.#topicPartitionToBatchPayload.get(key)._stale = true;
+    }
   }
 
   async describeGroup() {
@@ -1490,6 +1594,11 @@ class Consumer {
      * making it unusable. */
     this.#messageCache.markStale(topics);
 
+    /* If anyone's using eachBatch, mark the batch as stale. */
+    topics.map(partitionKey)
+      .filter(key => this.#topicPartitionToBatchPayload.has(key))
+      .forEach(key => this.#topicPartitionToBatchPayload.get(key)._stale = true);
+
     topics.map(JSON.stringify).forEach(topicPartition => this.#pausedPartitions.add(topicPartition));
 
     return () => this.resume(topics);