confluentinc
diff --git a/‎lib/kafkajs/_consumer.js‎
Lines changed: 165 additions & 97 deletions b/‎lib/kafkajs/_consumer.js‎
Lines changed: 165 additions & 97 deletions
@@ -130,6 +130,24 @@ class Consumer {
    */
   #userManagedStores = false;
 
+  /**
+   * Populated with Promises for each partition that is being processed concurrently.
+   * Each promise might run eachMessage/eachBatch.
+   */
+  #runningPromises = [];
+
+  /**
+   * Each message that is consumed has an associated cache index.
+   * This array maps a an index within runningPromises to the associated cached index.
+   * ie. runningPromises[i] is associated with the cache index #savedIndexToPromiseIndex[i].
+   */
+  #savedIndexToPromiseIndex = [];
+
+  /**
+   * Signals an intent to disconnect the consumer.
+   */
+  #disconnectStarted = false;
+
   /**
    * @constructor
    * @param {import("../../types/kafkajs").ConsumerConfig} kJSConfig
@@ -637,16 +655,17 @@ class Consumer {
       return msg;
     }
 
-    // TODO: Add this block for concurrency
-    // if (!msg) {
-    //   // it's possible that we get msg = null, but that's because partitionConcurrency
-    //   // exceeds the number of partitions containing messages. So in this case,
-    //   // we should not call for new fetches, rather, try to focus on what we have left.
-    //   return null;
-    // }
+    /* It's possible that we get msg = null, but that's because partitionConcurrency
+     * exceeds the number of partitions containing messages. So in this case,
+     * we should not call for new fetches, rather, try to focus on what we have left.
+     */
+    if (!msg && this.#messageCache.pendingSize() !== 0) {
+      return null;
+    }
 
     return new Promise((resolve, reject) => {
       this.#internalClient.consume(this.#messageCache.maxSize, (err, messages) => {
+
         if (err) {
           reject(createKafkaJsErrorFromLibRdKafkaError(err));
           return;
@@ -721,9 +740,6 @@ class Consumer {
     }
 
     const rdKafkaConfig = this.#config();
-    const maxPollInterval = rdKafkaConfig['max.poll.interval.ms'] ?? 300000;
-    this.#messageCache = new MessageCache(Math.floor(maxPollInterval * 0.8), 1);
-
     this.#state = ConsumerState.CONNECTING;
     this.#internalClient = new RdKafka.KafkaConsumer(rdKafkaConfig);
     this.#internalClient.on('ready', this.#readyCb.bind(this));
@@ -815,10 +831,6 @@ class Consumer {
       throw new error.KafkaJSError(CompatibilityErrorMessages.runOptionsAutoCommitThreshold(), { code: error.ErrorCodes.ERR__NOT_IMPLEMENTED });
     }
 
-    if (Object.hasOwn(config, 'partitionsConsumedConcurrently')) {
-      throw new error.KafkaJSError(CompatibilityErrorMessages.runOptionsPartitionsConsumedConcurrently(), { code: error.ErrorCodes.ERR__NOT_IMPLEMENTED });
-    }
-
     if (this.#running) {
       throw new error.KafkaJSError('Consumer is already running.', { code: error.ErrorCodes.ERR__STATE });
     }
@@ -829,6 +841,14 @@ class Consumer {
       config.eachBatchAutoResolve = true;
     }
 
+    if (!Object.hasOwn(config, 'partitionsConsumedConcurrently')) {
+      config.partitionsConsumedConcurrently = 1;
+    }
+
+    const rdKafkaConfig = this.#config();
+    const maxPollInterval = rdKafkaConfig['max.poll.interval.ms'] ?? 300000;
+    this.#messageCache = new MessageCache(Math.floor(maxPollInterval * 0.8), config.partitionsConsumedConcurrently);
+
     /* We deliberately don't await this. */
     if (config.eachMessage) {
       this.#runInternalEachMessage(config);
@@ -837,126 +857,173 @@ class Consumer {
     }
   }
 
+  /**
+   * Processes a single message.
+   *
+   * @param m Message as obtained from #consumeSingleCached.
+   * @param config Config as passed to run().
+   * @returns {Promise<number>} the cache index of the message that was processed.
+   */
+  async #messageProcessor(m, config) {
+    let eachMessageProcessed = false;
+    const payload = this.#createPayload(m);
+
+    try {
+      await config.eachMessage(payload);
+      eachMessageProcessed = true;
+    } catch (e) {
+      /* It's not only possible, but expected that an error will be thrown by eachMessage.
+       * This is especially true since the pattern of pause() followed by throwing an error
+       * is encouraged. To meet the API contract, we seek one offset backward (which
+       * means seeking to the message offset).
+       * However, we don't do this inside the catch, but just outside it. This is because throwing an
+       * error is not the only case where we might want to seek back.
+       *
+       * So - do nothing but a debug log, but at this point eachMessageProcessed is false.
+       */
+      this.#logger.debug(`Consumer encountered error while processing message. Error details: ${e}: ${e.stack}. The same message may be reprocessed.`);
+    }
+
+    /* If the message is unprocessed, due to an error, or because the user has not resolved it, we seek back. */
+    if (!eachMessageProcessed) {
+      await this.seek({
+        topic: m.topic,
+        partition: m.partition,
+        offset: m.offset,
+      });
+    }
+
+    /* Store the offsets we need to store, or at least record them for cache invalidation reasons. */
+    if (eachMessageProcessed) {
+      try {
+        if (!this.#userManagedStores) {
+          this.#internalClient.offsetsStore([{
+            topic: m.topic, partition: m.partition, offset: Number(m.offset) + 1, leaderEpoch: m.leaderEpoch
+          }]);
+        }
+        this.#lastConsumedOffsets.set(partitionKey(m), Number(m.offset) + 1);
+      } catch (e) {
+        /* Not much we can do, except log the error. */
+        if (this.#logger)
+          this.#logger.error(`Consumer encountered error while storing offset. Error details: ${JSON.stringify(e)}`);
+      }
+    }
+
+
+    /* Force a immediate seek here. It's possible that there are no more messages to be passed to the user,
+     * but the user seeked in the call to eachMessage, or else we encountered the error catch block.
+     * In that case, the results of that seek will never be reflected unless we do this.
+     * TOOD: this block can probably be common and not per message. */
+    if (this.#checkPendingSeeks)
+      await this.#seekInternal();
+
+    return m.index;
+  }
+
+  /**
+   * Awaits the completion of a single message's processing.
+   *
+   * @returns {Promise<number>} the cache index of the message in the cache that was processed.
+   */
+  async waitOne() {
+    const savedIndex = await Promise.any(this.#runningPromises);
+    const promiseIndex = this.#savedIndexToPromiseIndex.findIndex(p => p === savedIndex);
+    if (promiseIndex === -1) {
+      console.error("Promise not found in runningPromises");
+      throw new Error("Promise not found in runningPromises");
+    }
+    this.#runningPromises[promiseIndex] = this.#runningPromises[this.#runningPromises.length - 1];
+    this.#savedIndexToPromiseIndex[promiseIndex] = this.#savedIndexToPromiseIndex[this.#savedIndexToPromiseIndex.length - 1];
+    this.#runningPromises.pop();
+    this.#savedIndexToPromiseIndex.pop();
+
+    return savedIndex;
+  }
+
+  /**
+   * Awaits the completion of all messages that are being processed.
+   *
+   * @returns {Promise<number[]>} a list of cache indices of the messages that were processed.
+   */
+  async waitAll() {
+    const indices = await Promise.all(this.#runningPromises);
+    this.#runningPromises = [];
+    this.#savedIndexToPromiseIndex = [];
+    return indices;
+  }
+
   /* Internal polling loop.
    * It accepts the same config object that `run` accepts, but config.eachMessage must be set. */
   async #runInternalEachMessage(config) {
-    let savedIdx = -1;
-    while (this.#state === ConsumerState.CONNECTED) {
+    const concurrency = config.partitionsConsumedConcurrently;
+    let nextIdx = -1;
+    while (!(await acquireOrLog(this.#lock, this.#logger)));
 
-      /* We need to acquire a lock here, because we need to ensure that we don't
-      * disconnect while in the middle of processing a message. */
-      if (!(await acquireOrLog(this.#lock, this.#logger)))
-        continue;
+    while (this.#state === ConsumerState.CONNECTED) {
+      /* Release lock and cleanup if we intend to disconnect. */
+      if (this.#disconnectStarted) {
+        const indices = await this.waitAll();
+        indices.forEach(idx => this.#messageCache.return(idx));
+        if (nextIdx !== -1) {
+          this.#messageCache.return(nextIdx);
+        }
+        nextIdx = -1;
+        this.#lock.release();
+        break;
+      }
 
       /* Invalidate the message cache if needed */
       const locallyStale = this.#messageCache.popLocallyStale();
       if (this.#messageCache.isStale()) { /* global staleness */
-        // TODO: await all concurrent promises for eachMessage here.
+        const indices = await this.waitAll();
+        indices.forEach(idx => this.#messageCache.return(idx));
+        if (nextIdx !== -1) {
+          this.#messageCache.return(nextIdx);
+        }
+        nextIdx = -1;
         await this.#clearCacheAndResetPositions();
-        await this.#lock.release();
         continue;
       } else if (locallyStale.length !== 0) { /* local staleness */
         // TODO: is it correct to await some concurrent promises for eachMessage here?
         // to be safe we can do it, but I don't think we really need to do that for
-        // correctness.
+        // any correctness reason.
         await this.#clearCacheAndResetPositions(locallyStale);
-        await this.#lock.release();
         continue;
       }
 
-      const m = await this.#consumeSingleCached(savedIdx).catch(e => {
+      const m = await this.#consumeSingleCached(nextIdx).catch(e => {
         /* Since this error cannot be exposed to the user in the current situation, just log and retry.
          * This is due to restartOnFailure being set to always true. */
         if (this.#logger)
           this.#logger.error(`Consumer encountered error while consuming. Retrying. Error details: ${e} : ${e.stack}`);
       });
 
+      nextIdx = -1;
+
       if (!m) {
-        // await all concurrency related promises right here if this is null, if any such promise exists.
+        // await any concurrency related promises right here if this is null, if any such promise exists.
         // see note in consumeSingleCached
-        savedIdx = -1;
-        await this.#lock.release();
-        continue;
-      }
-      savedIdx = m.index;
-
-      /* TODO: add partitionsConsumedConcurrently-based concurrency here.
-      * If we maintain a map of topic partitions to promises, and a counter,
-      * we can probably achieve it with the correct guarantees of ordering
-      * though to maximize performance, we need to consume only from partitions for which
-      * an eachMessage call is not already going.
-      * It's risky to consume, and then store the message in something like an
-      * array/list until it can be processed, because librdkafka marks it as
-      * 'stored'... but anyway - we can implement something like this.
-      */
-
-      /* Make pending seeks 'concrete'. */
-      if (this.#checkPendingSeeks) {
-        const invalidateMessage = await this.#seekInternal({ topic: m.topic, partition: m.partition });
-        if (invalidateMessage) {
-          /* Don't pass this message on to the user if this topic partition was seeked to. */
-          this.#lock.release();
-          continue;
+        if (this.#runningPromises.length) {
+          nextIdx = await this.waitOne();
         }
+        continue;
       }
 
-      let eachMessageProcessed = false;
-      const payload = this.#createPayload(m);
-      try {
-        await config.eachMessage(payload);
-        eachMessageProcessed = true;
-      } catch (e) {
-        /* It's not only possible, but expected that an error will be thrown by eachMessage.
-         * This is especially true since the pattern of pause() followed by throwing an error
-         * is encouraged. To meet the API contract, we seek one offset backward (which
-         * means seeking to the message offset).
-         * However, we don't do this inside the catch, but just outside it. This is because throwing an
-         * error is not the only case where we might want to seek back.
-         *
-         * So - do nothing but a debug log, but at this point eachMessageProcessed is false.
-         */
-        this.#logger.debug(`Consumer encountered error while processing message. Error details: ${e}: ${e.stack}. The same message may be reprocessed.`);
-      }
-
-      /* If the message is unprocessed, due to an error, or because the user has not resolved it, we seek back. */
-      if (!eachMessageProcessed) {
-        await this.seek({
-          topic: m.topic,
-          partition: m.partition,
-          offset: m.offset,
-        });
-      }
+      const p = this.#messageProcessor(m, config);
+      this.#runningPromises.push(p);
+      this.#savedIndexToPromiseIndex.push(m.index);
 
-      /* Store the offsets we need to store, or at least record them for cache invalidation reasons. */
-      if (eachMessageProcessed) {
-        try {
-          if (!this.#userManagedStores) {
-            this.#internalClient.offsetsStore([{
-              topic: m.topic, partition: m.partition, offset: Number(m.offset) + 1, leaderEpoch: m.leaderEpoch
-            }]);
-          }
-          this.#lastConsumedOffsets.set(partitionKey(m), Number(m.offset) + 1);
-        } catch (e) {
-          /* Not much we can do, except log the error. */
-          if (this.#logger)
-            this.#logger.error(`Consumer encountered error while storing offset. Error details: ${JSON.stringify(e)}`);
-        }
+      if (this.#runningPromises.length < concurrency) {
+        continue;
       }
 
-      /* Force a immediate seek here. It's possible that there are no more messages to be passed to the user,
-       * but the user seeked in the call to eachMessage, or else we encountered the error catch block.
-       * In that case, the results of that seek will never be reflected unless we do this. */
-      if (this.#checkPendingSeeks)
-        await this.#seekInternal();
+      nextIdx = await this.waitOne();
 
       /* TODO: another check we need to do here is to see how kafkaJS is handling
        * commits. Are they commmitting after a message is _processed_?
        * In that case we need to turn off librdkafka's auto-commit, and commit
        * inside this function.
        */
-
-      /* Release the lock so that any pending disconnect can go through. */
-      await this.#lock.release();
     }
   }
 
@@ -1497,6 +1564,7 @@ class Consumer {
       return;
     }
 
+    this.#disconnectStarted = true;
     while (!(await acquireOrLog(this.#lock, this.#logger))); /* Just retry... */
 
     this.#state = ConsumerState.DISCONNECTING;