Add naive batching (without resolution handling)

milindl · milindl · commit 2f4bd7682550 · 2024-07-01T09:18:00.000+05:30
diff --git a/lib/kafkajs/_consumer.js b/lib/kafkajs/_consumer.js
@@ -617,52 +617,60 @@ class Consumer {
   }
 
   /**
-   * Converts a message returned by node-rdkafka into a message that can be used by the eachBatch callback.
-   * @param {import("../..").Message} message
+   * Converts a list of messages returned by node-rdkafka into a message that can be used by the eachBatch callback.
+   * @param {import("../..").Message[]} messages - must not be empty. Must contain messages from the same topic and partition.
    * @returns {import("../../types/kafkajs").EachBatchPayload}
-   * @note Unlike the KafkaJS consumer, a batch here is for API compatibility only. It is always a single message.
    */
-  #createBatchPayload(message) {
-    let key = message.key;
-    if (typeof key === 'string') {
-      key = Buffer.from(key);
-    }
+  #createBatchPayload(messages) {
+    const topic = messages[0].topic;
+    const partition = messages[0].partition;
+
+    const messagesConverted = [];
+    for (let i = 0; i < messages.length; i++) {
+      const message = messages[i];
+      let key = message.key;
+      if (typeof key === 'string') {
+        key = Buffer.from(key);
+      }
 
-    let timestamp = message.timestamp ? String(message.timestamp) : '';
+      let timestamp = message.timestamp ? String(message.timestamp) : '';
 
-    let headers;
-    if (message.headers) {
-      headers = {}
-      for (const [key, value] of Object.entries(message.headers)) {
-        if (!Object.hasOwn(headers, key)) {
-          headers[key] = value;
-        } else if (headers[key].constructor === Array) {
-          headers[key].push(value);
-        } else {
-          headers[key] = [headers[key], value];
+      let headers;
+      if (message.headers) {
+        headers = {}
+        for (const [key, value] of Object.entries(message.headers)) {
+          if (!Object.hasOwn(headers, key)) {
+            headers[key] = value;
+          } else if (headers[key].constructor === Array) {
+            headers[key].push(value);
+          } else {
+            headers[key] = [headers[key], value];
+          }
         }
       }
-    }
 
-    const messageConverted = {
-      key,
-      value: message.value,
-      timestamp,
-      attributes: 0,
-      offset: String(message.offset),
-      size: message.size,
-      leaderEpoch: message.leaderEpoch,
-      headers
-    };
+      const messageConverted = {
+        key,
+        value: message.value,
+        timestamp,
+        attributes: 0,
+        offset: String(message.offset),
+        size: message.size,
+        leaderEpoch: message.leaderEpoch,
+        headers
+      };
+
+      messagesConverted.push(messageConverted);
+    }
 
     const batch = {
-      topic: message.topic,
-      partition: message.partition,
+      topic,
+      partition,
       highWatermark: '-1001', // Invalid - we don't fetch it
-      messages: [messageConverted],
+      messages: messagesConverted,
       isEmpty: () => false,
-      firstOffset: () => messageConverted.offset,
-      lastOffset: () => messageConverted.offset,
+      firstOffset: () => messagesConverted[0].offset,
+      lastOffset: () => messagesConverted[messagesConverted.length - 1].offset,
       offsetLag: () => notImplemented(),
       offsetLagLow: () => notImplemented(),
     };
@@ -672,7 +680,7 @@ class Consumer {
       _messageResolved: false,
       resolveOffset: () => { returnPayload._messageResolved = true; },
       heartbeat: async () => { /* no op */ },
-      pause: this.pause.bind(this, [{ topic: message.topic, partitions: [message.partition] }]),
+      pause: this.pause.bind(this, [{ topic, partitions: [partition] }]),
       commitOffsetsIfNecessary: async () => { /* no op */ },
       uncommittedOffsets: () => notImplemented(),
       isRunning: () => this.#running,
@@ -685,7 +693,7 @@ class Consumer {
   /**
    * Consumes a single message from the internal consumer.
    * @param {number} savedIndex - the index of the message in the cache to return.
-   * @returns {Promise<import("../..").Message>} a promise that resolves to a single message.
+   * @returns {Promise<import("../..").Message | null>} a promise that resolves to a single message or null.
    * @note this method caches messages as well, but returns only a single message.
    */
   async #consumeSingleCached(savedIndex) {
@@ -726,6 +734,52 @@ class Consumer {
     });
   }
 
+  /**
+   * Consumes a single message from the internal consumer.
+   * @param {number} savedIndex - the index of the message in the cache to return.
+   * @param {number} size - the number of messages to fetch.
+   * @returns {Promise<import("../..").Message[] | null>} a promise that resolves to a list of messages or null.
+   * @note this method caches messages as well.
+   * @sa #consumeSingleCached
+   */
+  async #consumeCachedN(savedIndex, size) {
+    const msgs = this.#messageCache.nextN(savedIndex, size);
+    if (msgs) {
+      return msgs;
+    }
+
+    /* It's possible that we get msgs = null, but that's because partitionConcurrency
+     * exceeds the number of partitions containing messages. So in this case,
+     * we should not call for new fetches, rather, try to focus on what we have left.
+     */
+    if (!msgs && this.#messageCache.pendingSize() !== 0) {
+      return null;
+    }
+
+    if (this.#fetchInProgress) {
+      return null;
+    }
+
+    this.#fetchInProgress = true;
+    return new Promise((resolve, reject) => {
+      this.#internalClient.consume(this.#messageCache.maxSize, (err, messages) => {
+        this.#fetchInProgress = false;
+        if (err) {
+          reject(createKafkaJsErrorFromLibRdKafkaError(err));
+          return;
+        }
+        this.#messageCache.addMessages(messages);
+        const msgsList = this.#messageCache.nextN(-1, size);
+        if (messages.length === this.#messageCache.maxSize) {
+          this.#messageCache.increaseMaxSize();
+        } else {
+          this.#messageCache.decreaseMaxSize(messages.length);
+        }
+        resolve(msgsList);
+      });
+    });
+  }
+
   /**
    * Consumes n messages from the internal consumer.
    * @returns {Promise<import("../..").Message[]>} a promise that resolves to a list of messages.
@@ -964,15 +1018,15 @@ class Consumer {
   }
 
   /**
-   * Processes a batch message (a single message as of now).
+   * Processes a batch of messages.
    *
-   * @param m Message as obtained from #consumeSingleCached.
+   * @param ms Messages as obtained from #consumeCachedN.
    * @param config Config as passed to run().
    * @returns {Promise<number>} the cache index of the message that was processed.
    */
-  async #batchProcessor(m, config) {
+  async #batchProcessor(ms, config) {
     let eachMessageProcessed = false;
-    const payload = this.#createBatchPayload(m);
+    const payload = this.#createBatchPayload(ms);
     try {
       await config.eachBatch(payload);
       if (config.eachBatchAutoResolve) {
@@ -1005,23 +1059,25 @@ class Consumer {
     }
 
     /* If the message is unprocessed, due to an error, or because the user has not resolved it, we seek back. */
+    /* TODO: currently we're seeking to just the first offset. Fix this to take care of messages we are resolving. */
     if (!eachMessageProcessed) {
       await this.seek({
-        topic: m.topic,
-        partition: m.partition,
-        offset: m.offset,
+        topic: ms[0].topic,
+        partition: ms[0].partition,
+        offset: ms[0].offset,
       });
     }
 
     /* Store the offsets we need to store, or at least record them for cache invalidation reasons. */
+    /* TODO: currently we just store the last offset of the batch. Fix it to store the last resolved one + 1. */
     if (eachMessageProcessed) {
       try {
         if (!this.#userManagedStores) {
           this.#internalClient.offsetsStore([{
-            topic: m.topic, partition: m.partition, offset: Number(m.offset) + 1, leaderEpoch: m.leaderEpoch
+            topic: ms[ms.length - 1].topic, partition: ms[ms.length - 1].partition, offset: Number(ms[ms.length - 1].offset) + 1, leaderEpoch: ms[ms.length - 1].leaderEpoch
           }]);
         }
-        this.#lastConsumedOffsets.set(partitionKey(m), Number(m.offset) + 1);
+        this.#lastConsumedOffsets.set(partitionKey(ms[ms.length - 1]), Number(ms[ms.length - 1].offset) + 1);
       } catch (e) {
         /* Not much we can do, except log the error. */
         if (this.#logger)
@@ -1035,7 +1091,7 @@ class Consumer {
     if (this.#checkPendingSeeks)
       await this.#seekInternal();
 
-    return m.index;
+    return ms.index;
   }
 
   /**
@@ -1050,7 +1106,7 @@ class Consumer {
    *
    * Worker termination acts as a async barrier.
    */
-  async #worker(config, perMessageProcessor, id) {
+  async #worker(config, perMessageProcessor, fetcher, id) {
     let nextIdx = -1;
     while (!this.#workerTerminationScheduled) {
       /* Invalidate the message cache if needed */
@@ -1066,7 +1122,7 @@ class Consumer {
         continue;
       }
 
-      const m = await this.#consumeSingleCached(nextIdx).catch(e => {
+      const m = await fetcher(nextIdx).catch(e => {
         /* Since this error cannot be exposed to the user in the current situation, just log and retry.
          * This is due to restartOnFailure being set to always true. */
         if (this.#logger)
@@ -1097,13 +1153,18 @@ class Consumer {
   async #runInternal(config) {
     this.#concurrency = config.partitionsConsumedConcurrently;
     const perMessageProcessor = config.eachMessage ? this.#messageProcessor : this.#batchProcessor;
+    /* TODO: make this dynamic, based on max batch size / size of last message seen. */
+    const maxBatchSize = 30;
+    const fetcher = config.eachMessage
+      ? (savedIdx) => this.#consumeSingleCached(savedIdx)
+      : (savedIdx) => this.#consumeCachedN(savedIdx, maxBatchSize);
     this.#workers = [];
     while (!(await acquireOrLog(this.#lock, this.#logger)));
 
     while (!this.#disconnectStarted) {
       this.#workerTerminationScheduled = false;
       const workersToSpawn = Math.max(1, Math.min(this.#concurrency, this.#partitionCount));
-      this.#workers = Array(workersToSpawn).fill().map((_, i) => this.#worker(config, perMessageProcessor.bind(this), i));
+      this.#workers = Array(workersToSpawn).fill().map((_, i) => this.#worker(config, perMessageProcessor.bind(this), fetcher.bind(this), i));
       await Promise.all(this.#workers);
 
       /* One of the possible reasons for the workers to end is that the cache is globally stale.