Separate eachMessage and eachBatch internal consume loop

milindl · milindl · commit 82b57d5b4194 · 2024-06-05T11:43:36.000+05:30
diff --git a/lib/kafkajs/_consumer.js b/lib/kafkajs/_consumer.js
@@ -902,11 +902,16 @@ class Consumer {
     }
 
     /* We deliberately don't await this. */
-    this.#runInternal(config);
+    if (config.eachMessage) {
+      this.#runInternalEachMessage(config);
+    } else {
+      this.#runInternalEachBatch(config);
+    }
   }
 
-  /* Internal polling loop. It accepts the same config object that `run` accepts. */
-  async #runInternal(config) {
+  /* Internal polling loop.
+   * It accepts the same config object that `run` accepts, but config.eachMessage must be set. */
+  async #runInternalEachMessage(config) {
     while (this.#state === ConsumerState.CONNECTED) {
 
       /* We need to acquire a lock here, because we need to ensure that we don't
@@ -954,26 +959,125 @@ class Consumer {
       }
 
       let eachMessageProcessed = false;
-      let payload;
-      if (config.eachMessage) {
-        payload = this.#createPayload(m);
-      } else {
-        payload = this.#createBatchPayload(m);
+      const payload = this.#createPayload(m);
+      try {
+        await config.eachMessage(payload);
+        eachMessageProcessed = true;
+      } catch (e) {
+        /* It's not only possible, but expected that an error will be thrown by eachMessage.
+         * This is especially true since the pattern of pause() followed by throwing an error
+         * is encouraged. To meet the API contract, we seek one offset backward (which
+         * means seeking to the message offset).
+         * However, we don't do this inside the catch, but just outside it. This is because throwing an
+         * error is not the only case where we might want to seek back.
+         *
+         * So - do nothing but a debug log, but at this point eachMessageProcessed is false.
+         */
+        this.#logger.debug(`Consumer encountered error while processing message. Error details: ${JSON.stringify(e)}. The same message may be reprocessed.`);
+      }
+
+      /* If the message is unprocessed, due to an error, or because the user has not resolved it, we seek back. */
+      if (!eachMessageProcessed) {
+        await this.seek({
+          topic: m.topic,
+          partition: m.partition,
+          offset: m.offset,
+        });
+      }
+
+      /* Store the offsets we need to store, or at least record them for cache invalidation reasons. */
+      if (eachMessageProcessed) {
+        try {
+          if (!this.#userManagedStores) {
+            this.#internalClient.offsetsStore([{
+              topic: m.topic, partition: m.partition, offset: Number(m.offset) + 1, leaderEpoch: m.leaderEpoch
+            }]);
+          }
+          this.#lastConsumedOffsets.set(`${m.topic}|${m.partition}`, Number(m.offset) + 1);
+        } catch (e) {
+          /* Not much we can do, except log the error. */
+          if (this.#logger)
+            this.#logger.error(`Consumer encountered error while storing offset. Error details: ${JSON.stringify(e)}`);
+        }
+      }
+
+      /* Force a immediate seek here. It's possible that there are no more messages to be passed to the user,
+       * but the user seeked in the call to eachMessage, or else we encountered the error catch block.
+       * In that case, the results of that seek will never be reflected unless we do this. */
+      if (this.#checkPendingSeeks)
+        await this.#seekInternal();
+
+      /* TODO: another check we need to do here is to see how kafkaJS is handling
+       * commits. Are they commmitting after a message is _processed_?
+       * In that case we need to turn off librdkafka's auto-commit, and commit
+       * inside this function.
+       */
+
+      /* Release the lock so that any pending disconnect can go through. */
+      await this.#lock.release();
+    }
+  }
+
+  /* Internal polling loop.
+   * It accepts the same config object that `run` accepts, but config.eachBatch must be set. */
+  async #runInternalEachBatch(config) {
+    while (this.#state === ConsumerState.CONNECTED) {
+
+      /* We need to acquire a lock here, because we need to ensure that we don't
+      * disconnect while in the middle of processing a message. */
+      if (!(await acquireOrLog(this.#lock, this.#logger)))
+        continue;
+
+      /* Invalidate the message cache if needed. */
+      if (this.#messageCache.isStale()) {
+        await this.#clearCacheAndResetPositions(true);
+        await this.#lock.release();
+        continue;
+      }
+
+      const m = await this.#consumeSingleCached().catch(e => {
+        /* Since this error cannot be exposed to the user in the current situation, just log and retry.
+         * This is due to restartOnFailure being set to always true. */
+        if (this.#logger)
+          this.#logger.error(`Consumer encountered error while consuming. Retrying. Error details: ${JSON.stringify(e)}`);
+      });
+
+      if (!m) {
+        await this.#lock.release();
+        continue;
+      }
+
+      /* TODO: add partitionsConsumedConcurrently-based concurrency here.
+      * If we maintain a map of topic partitions to promises, and a counter,
+      * we can probably achieve it with the correct guarantees of ordering
+      * though to maximize performance, we need to consume only from partitions for which
+      * an eachMessage call is not already going.
+      * It's risky to consume, and then store the message in something like an
+      * array/list until it can be processed, because librdkafka marks it as
+      * 'stored'... but anyway - we can implement something like this.
+      */
+
+      /* Make pending seeks 'concrete'. */
+      if (this.#checkPendingSeeks) {
+        const invalidateMessage = await this.#seekInternal({ topic: m.topic, partition: m.partition });
+        if (invalidateMessage) {
+          /* Don't pass this message on to the user if this topic partition was seeked to. */
+          this.#lock.release();
+          continue;
+        }
       }
+
+      let eachMessageProcessed = false;
+      const payload = this.#createBatchPayload(m);
       try {
-        if (config.eachMessage) {
-          await config.eachMessage(payload);
+        await config.eachBatch(payload);
+        if (config.eachBatchAutoResolve) {
           eachMessageProcessed = true;
         } else {
-          await config.eachBatch(payload);
-          if (config.eachBatchAutoResolve) {
-            eachMessageProcessed = true;
-          } else {
-            eachMessageProcessed = payload._messageResolved;
-          }
+          eachMessageProcessed = payload._messageResolved;
         }
       } catch (e) {
-        /* It's not only possible, but expected that an error will be thrown by eachMessage or eachBatch.
+        /* It's not only possible, but expected that an error will be thrown by eachBatch.
          * This is especially true since the pattern of pause() followed by throwing an error
          * is encouraged. To meet the API contract, we seek one offset backward (which
          * means seeking to the message offset).