Add message set capability to message cache

milindl · milindl · commit 9a2f9c42e489 · 2024-06-25T11:23:50.000+05:30
diff --git a/lib/kafkajs/_consumer_cache.js b/lib/kafkajs/_consumer_cache.js
@@ -52,6 +52,26 @@ class PerPartitionMessageCache {
     next() {
         return this.currentIndex < this.cache.length ? this.cache[this.currentIndex++] : null;
     }
+
+    /**
+     * @returns Upto `n` next elements in the cache or an null if none available.
+     * @warning Does not check for staleness.
+     */
+    nextN(n) {
+        if (this.currentIndex >= this.cache.length) {
+            return null;
+        }
+
+        if (this.currentIndex + n >= this.cache.length) {
+            const res = this.cache.slice(this.currentIndex);
+            this.currentIndex = this.cache.length;
+            return res;
+        }
+
+        const res = this.cache.slice(this.currentIndex, this.currentIndex + n);
+        this.currentIndex += n;
+        return res;
+    }
 }
 
 
@@ -387,6 +407,66 @@ class MessageCache {
         return null; // Caller is responsible for triggering fetch logic here if next == null.
     }
 
+    /**
+     * Returns the next `size` elements in the cache as an array, or null if none exists.
+     *
+     * @sa next, the behaviour is similar in other aspects.
+     */
+    nextN(idx = -1, size = 1) {
+        let index = idx;
+        if (index !== -1 && !this.pendingIndices.has(index)) {
+            /* The user is behaving well by returning the index to us, but in the meanwhile, it's possible
+             * that we ran out of messages and fetched a new batch. So we just discard what the user is
+             * returning to us. */
+            this.logger.error("Returning unowned index", idx, "to cache. Discarding it.");
+            index = -1;
+        } else if (index !== -1) {
+            this.pendingIndices.delete(index);
+            /* We don't add the index back to the this.indices here because we're just going to remove it again the
+             * first thing in the loop below, so it's slightly better to just avoid doing it. */
+        }
+
+        if (index === -1) {
+            if (this.indices.size() === 0 || this.pendingIndices.size === this.maxConcurrency) {
+                return null;
+            }
+            index = this.indices.pop(); // index cannot be undefined here since indices.size > 0
+        }
+
+        /* This loop will always terminate. Why?
+         * On each iteration:
+         * 1. We either return (if next is not null).
+         * 2. We change the PPC index we're interested in, and there are a finite number of PPCs.
+         *    (PPCs don't repeat within the loop since the indices of the PPC are popped from within the
+         *     heap and not put back in, or else a new index is created bounded by ppcList.length).
+        */
+        while (true) {
+            const next = this.ppcList[index].nextN(size);
+            if (this.ppcList[index].isStale() || next === null) {
+                /* If the current PPC is stale or empty, then we move on to the next one.
+                 * It is equally valid to choose any PPC available within this.indices, or else
+                 * move on to the next PPC (maxIndicesIndex + 1) if available.
+                 * We prefer the second option a bit more since we don't have to do a heap operation. */
+                const toAdd = this.maxIndicesIndex + 1;
+                if (toAdd < this.ppcList.length) {
+                    this.maxIndicesIndex = toAdd;
+                    index = toAdd;
+                } else if (!this.indices.isEmpty()) {
+                    index = this.indices.pop()
+                } else {
+                    break; // nothing left.
+                }
+                continue;
+            }
+
+            this.pendingIndices.add(index);
+            /* Arrays are just objects. Setting a property is odd, but not disallowed. */
+            next.index = index;
+            return next;
+        }
+        return null; // Caller is responsible for triggering fetch logic here if next == null.
+    }
+
     /**
      * Clears the cache completely.
      * This resets it to a base state, and reduces the capacity of the cache back to 1.
diff --git a/test/promisified/unit/cache.spec.js b/test/promisified/unit/cache.spec.js
@@ -8,9 +8,6 @@ describe('MessageCache', () => {
             .fill()
             .map((_, i) => ({ topic: 'topic', partition: i % 3, number: i }));
 
-    beforeEach(() => {
-    });
-
     describe("with concurrency", () => {
         let cache;
         beforeEach(() => {
@@ -37,6 +34,31 @@ describe('MessageCache', () => {
             expect(receivedMessages.slice(61, 30).every((msg, i) => msg.partition === receivedMessages[60].partition && (msg.number - 3) ===  receivedMessages[i].number)).toBeTruthy();
         });
 
+        it('caches messages and retrieves N of them', () => {
+            const msgs = messages.slice(0, 90);
+            cache.addMessages(msgs);
+
+            const receivedMessages = [];
+            let nextIdx = -1;
+            const expectedFetchedSizes = [11, 11, 8];
+            for (let i = 0; i < (90/11); i++) {
+                /* We choose to fetch 11 messages together rather than 10 so that we can test the case where
+                 * remaining messages > 0 but less than requested size. */
+                const next = cache.nextN(nextIdx, 11);
+                /* There are 30 messages per partition, the first fetch will get 11, the second 11, and the last one
+                 * 8, and then it repeats for each partition. */
+                expect(next.length).toBe(expectedFetchedSizes[i % 3]);
+                expect(next).not.toBeNull();
+                receivedMessages.push(...next);
+                nextIdx = next.index;
+            }
+
+            /* Results are on a per-partition basis and well-ordered */
+            expect(receivedMessages.slice(1, 30).every((msg, i) => msg.partition === receivedMessages[0].partition && (msg.number - 3) ===  receivedMessages[i].number)).toBeTruthy();
+            expect(receivedMessages.slice(31, 30).every((msg, i) => msg.partition === receivedMessages[30].partition && (msg.number - 3) ===  receivedMessages[i].number)).toBeTruthy();
+            expect(receivedMessages.slice(61, 30).every((msg, i) => msg.partition === receivedMessages[60].partition && (msg.number - 3) ===  receivedMessages[i].number)).toBeTruthy();
+        });
+
         it('does not allow fetching more than 1 message at a time', () => {
             const msgs = messages.slice(0, 90);
             cache.addMessages(msgs);
@@ -119,8 +141,30 @@ describe('MessageCache', () => {
                 nextIdxs = [next0.index, next1.index];
             }
 
-            /* Results are on a zig-zag basis. */
-            expect(receivedMessages.every((msg, i) => msg.number === receivedMessages.number));
+            expect(receivedMessages.length).toBe(60);
+            expect(receivedMessages.filter(msg => msg.partition === 0).length).toBe(30);
+            expect(receivedMessages.filter(msg => msg.partition === 1).length).toBe(30);
+        });
+
+        it('caches messages and retrieves N of them 2-at-a-time', () => {
+            const msgs = messages.slice(0, 90).filter(msg => msg.partition !== 3);
+            cache.addMessages(msgs);
+
+            const receivedMessages = [];
+            let nextIdxs = [-1, -1];
+            for (let i = 0; i < 30/11; i++) {
+                const next0 = cache.nextN(nextIdxs[0], 11);
+                const next1 = cache.nextN(nextIdxs[1], 11);
+                expect(next0).not.toBeNull();
+                expect(next1).not.toBeNull();
+                receivedMessages.push(...next0);
+                receivedMessages.push(...next1);
+                nextIdxs = [next0.index, next1.index];
+            }
+
+            expect(receivedMessages.length).toBe(60);
+            expect(receivedMessages.filter(msg => msg.partition === 0).length).toBe(30);
+            expect(receivedMessages.filter(msg => msg.partition === 1).length).toBe(30);
         });
 
         it('does not allow fetching more than 2 message at a time', () => {
@@ -141,6 +185,25 @@ describe('MessageCache', () => {
             expect(next).not.toBeNull();
         });
 
+
+        it('does not allow fetching more than 2 message sets at a time', () => {
+            const msgs = messages.slice(0, 90);
+            cache.addMessages(msgs);
+
+            let next = cache.nextN(-1, 11);
+            let savedIndex = next.index;
+            expect(next).not.toBeNull();
+            next = cache.nextN(-1, 11);
+            expect(next).not.toBeNull();
+            next = cache.nextN(-1, 11);
+            expect(next).toBeNull();
+            expect(cache.pendingSize()).toBe(2);
+
+            // Fetch after returning index works.
+            next = cache.nextN(savedIndex, 11);
+            expect(next).not.toBeNull();
+        });
+
         it('stops fetching from stale partition', () => {
             const msgs = messages.slice(0, 90);
             cache.addMessages(msgs);
@@ -163,6 +226,29 @@ describe('MessageCache', () => {
             expect(receivedMessages).toEqual(expect.arrayContaining(msgs.slice(0, 3)));
         });
 
+        it('stops fetching message sets from stale partition', () => {
+            const msgs = messages.slice(0, 90);
+            cache.addMessages(msgs);
+
+            const receivedMessages = [];
+            let nextIdx = -1;
+            for (let i = 0; i < 3; i++) {
+                const next = cache.nextN(nextIdx, 11);
+                expect(next).not.toBeNull();
+                receivedMessages.push(...next);
+                nextIdx = next.index;
+                cache.markStale([{topic: next[0].topic, partition: next[0].partition}]);
+            }
+
+            // We should not be able to get anything more.
+            expect(cache.nextN(nextIdx, 11)).toBeNull();
+            // Nothing should be pending, we've returned everything.
+            expect(cache.pendingSize()).toBe(0);
+            // The first [11, 11, 11] messages from different toppars.
+            expect(receivedMessages.length).toBe(33);
+            expect(receivedMessages).toEqual(expect.arrayContaining(msgs.slice(0, 33)));
+        });
+
         it('one slow processing message should not slow down others', () => {
             const msgs = messages.slice(0, 90);
             cache.addMessages(msgs);
@@ -188,6 +274,31 @@ describe('MessageCache', () => {
             expect(receivedMessages.slice(31, 30).every((msg, i) => msg.partition === receivedMessages[30].partition && (msg.number - 3) ===  receivedMessages[i].number)).toBeTruthy();
         });
 
+        it('one slow processing message set should not slow down others', () => {
+            const msgs = messages.slice(0, 90);
+            cache.addMessages(msgs);
+
+            const receivedMessages = [];
+            let nextIdx = -1;
+            const slowMsg = cache.nextN(nextIdx, 11);
+            for (let i = 0; i < 60/11; i++) { /* 60 - for non-partition 0 msgs */
+                const next = cache.nextN(nextIdx, 11);
+                expect(next).not.toBeNull();
+                receivedMessages.push(...next);
+                nextIdx = next.index;
+            }
+
+
+            // We should not be able to get anything more.
+            expect(cache.nextN(nextIdx, 11)).toBeNull();
+            // The slowMsg should be pending.
+            expect(cache.pendingSize()).toBe(1);
+
+            /* Messages should be partition-wise and well-ordered. */
+            expect(receivedMessages.slice(1, 30).every((msg, i) => msg.partition === receivedMessages[0].partition && (msg.number - 3) ===  receivedMessages[i].number)).toBeTruthy();
+            expect(receivedMessages.slice(31, 30).every((msg, i) => msg.partition === receivedMessages[30].partition && (msg.number - 3) ===  receivedMessages[i].number)).toBeTruthy();
+        });
+
         it('should not be able to handle cache-clearance in the middle of processing', () => {
             const msgs = messages.slice(0, 90);
             cache.addMessages(msgs);