@@ -617,52 +617,60 @@ class Consumer {
617
617
}
618
618
619
619
/**
620
- * Converts a message returned by node-rdkafka into a message that can be used by the eachBatch callback.
621
- * @param {import("../..").Message } message
620
+ * Converts a list of messages returned by node-rdkafka into a message that can be used by the eachBatch callback.
621
+ * @param {import("../..").Message[] } messages - must not be empty. Must contain messages from the same topic and partition.
622
622
* @returns {import("../../types/kafkajs").EachBatchPayload }
623
- * @note Unlike the KafkaJS consumer, a batch here is for API compatibility only. It is always a single message.
624
623
*/
625
- #createBatchPayload( message ) {
626
- let key = message . key ;
627
- if ( typeof key === 'string' ) {
628
- key = Buffer . from ( key ) ;
629
- }
624
+ #createBatchPayload( messages ) {
625
+ const topic = messages [ 0 ] . topic ;
626
+ const partition = messages [ 0 ] . partition ;
627
+
628
+ const messagesConverted = [ ] ;
629
+ for ( let i = 0 ; i < messages . length ; i ++ ) {
630
+ const message = messages [ i ] ;
631
+ let key = message . key ;
632
+ if ( typeof key === 'string' ) {
633
+ key = Buffer . from ( key ) ;
634
+ }
630
635
631
- let timestamp = message . timestamp ? String ( message . timestamp ) : '' ;
636
+ let timestamp = message . timestamp ? String ( message . timestamp ) : '' ;
632
637
633
- let headers ;
634
- if ( message . headers ) {
635
- headers = { }
636
- for ( const [ key , value ] of Object . entries ( message . headers ) ) {
637
- if ( ! Object . hasOwn ( headers , key ) ) {
638
- headers [ key ] = value ;
639
- } else if ( headers [ key ] . constructor === Array ) {
640
- headers [ key ] . push ( value ) ;
641
- } else {
642
- headers [ key ] = [ headers [ key ] , value ] ;
638
+ let headers ;
639
+ if ( message . headers ) {
640
+ headers = { }
641
+ for ( const [ key , value ] of Object . entries ( message . headers ) ) {
642
+ if ( ! Object . hasOwn ( headers , key ) ) {
643
+ headers [ key ] = value ;
644
+ } else if ( headers [ key ] . constructor === Array ) {
645
+ headers [ key ] . push ( value ) ;
646
+ } else {
647
+ headers [ key ] = [ headers [ key ] , value ] ;
648
+ }
643
649
}
644
650
}
645
- }
646
651
647
- const messageConverted = {
648
- key,
649
- value : message . value ,
650
- timestamp,
651
- attributes : 0 ,
652
- offset : String ( message . offset ) ,
653
- size : message . size ,
654
- leaderEpoch : message . leaderEpoch ,
655
- headers
656
- } ;
652
+ const messageConverted = {
653
+ key,
654
+ value : message . value ,
655
+ timestamp,
656
+ attributes : 0 ,
657
+ offset : String ( message . offset ) ,
658
+ size : message . size ,
659
+ leaderEpoch : message . leaderEpoch ,
660
+ headers
661
+ } ;
662
+
663
+ messagesConverted . push ( messageConverted ) ;
664
+ }
657
665
658
666
const batch = {
659
- topic : message . topic ,
660
- partition : message . partition ,
667
+ topic,
668
+ partition,
661
669
highWatermark : '-1001' , // Invalid - we don't fetch it
662
- messages : [ messageConverted ] ,
670
+ messages : messagesConverted ,
663
671
isEmpty : ( ) => false ,
664
- firstOffset : ( ) => messageConverted . offset ,
665
- lastOffset : ( ) => messageConverted . offset ,
672
+ firstOffset : ( ) => messagesConverted [ 0 ] . offset ,
673
+ lastOffset : ( ) => messagesConverted [ messagesConverted . length - 1 ] . offset ,
666
674
offsetLag : ( ) => notImplemented ( ) ,
667
675
offsetLagLow : ( ) => notImplemented ( ) ,
668
676
} ;
@@ -672,7 +680,7 @@ class Consumer {
672
680
_messageResolved : false ,
673
681
resolveOffset : ( ) => { returnPayload . _messageResolved = true ; } ,
674
682
heartbeat : async ( ) => { /* no op */ } ,
675
- pause : this . pause . bind ( this , [ { topic : message . topic , partitions : [ message . partition ] } ] ) ,
683
+ pause : this . pause . bind ( this , [ { topic, partitions : [ partition ] } ] ) ,
676
684
commitOffsetsIfNecessary : async ( ) => { /* no op */ } ,
677
685
uncommittedOffsets : ( ) => notImplemented ( ) ,
678
686
isRunning : ( ) => this . #running,
@@ -685,7 +693,7 @@ class Consumer {
685
693
/**
686
694
* Consumes a single message from the internal consumer.
687
695
* @param {number } savedIndex - the index of the message in the cache to return.
688
- * @returns {Promise<import("../..").Message> } a promise that resolves to a single message.
696
+ * @returns {Promise<import("../..").Message | null > } a promise that resolves to a single message or null .
689
697
* @note this method caches messages as well, but returns only a single message.
690
698
*/
691
699
async #consumeSingleCached( savedIndex ) {
@@ -726,6 +734,52 @@ class Consumer {
726
734
} ) ;
727
735
}
728
736
737
+ /**
738
+ * Consumes a single message from the internal consumer.
739
+ * @param {number } savedIndex - the index of the message in the cache to return.
740
+ * @param {number } size - the number of messages to fetch.
741
+ * @returns {Promise<import("../..").Message[] | null> } a promise that resolves to a list of messages or null.
742
+ * @note this method caches messages as well.
743
+ * @sa #consumeSingleCached
744
+ */
745
+ async #consumeCachedN( savedIndex , size ) {
746
+ const msgs = this . #messageCache. nextN ( savedIndex , size ) ;
747
+ if ( msgs ) {
748
+ return msgs ;
749
+ }
750
+
751
+ /* It's possible that we get msgs = null, but that's because partitionConcurrency
752
+ * exceeds the number of partitions containing messages. So in this case,
753
+ * we should not call for new fetches, rather, try to focus on what we have left.
754
+ */
755
+ if ( ! msgs && this . #messageCache. pendingSize ( ) !== 0 ) {
756
+ return null ;
757
+ }
758
+
759
+ if ( this . #fetchInProgress) {
760
+ return null ;
761
+ }
762
+
763
+ this . #fetchInProgress = true ;
764
+ return new Promise ( ( resolve , reject ) => {
765
+ this . #internalClient. consume ( this . #messageCache. maxSize , ( err , messages ) => {
766
+ this . #fetchInProgress = false ;
767
+ if ( err ) {
768
+ reject ( createKafkaJsErrorFromLibRdKafkaError ( err ) ) ;
769
+ return ;
770
+ }
771
+ this . #messageCache. addMessages ( messages ) ;
772
+ const msgsList = this . #messageCache. nextN ( - 1 , size ) ;
773
+ if ( messages . length === this . #messageCache. maxSize ) {
774
+ this . #messageCache. increaseMaxSize ( ) ;
775
+ } else {
776
+ this . #messageCache. decreaseMaxSize ( messages . length ) ;
777
+ }
778
+ resolve ( msgsList ) ;
779
+ } ) ;
780
+ } ) ;
781
+ }
782
+
729
783
/**
730
784
* Consumes n messages from the internal consumer.
731
785
* @returns {Promise<import("../..").Message[]> } a promise that resolves to a list of messages.
@@ -964,15 +1018,15 @@ class Consumer {
964
1018
}
965
1019
966
1020
/**
967
- * Processes a batch message (a single message as of now) .
1021
+ * Processes a batch of messages .
968
1022
*
969
- * @param m Message as obtained from #consumeSingleCached .
1023
+ * @param ms Messages as obtained from #consumeCachedN .
970
1024
* @param config Config as passed to run().
971
1025
* @returns {Promise<number> } the cache index of the message that was processed.
972
1026
*/
973
- async #batchProcessor( m , config ) {
1027
+ async #batchProcessor( ms , config ) {
974
1028
let eachMessageProcessed = false ;
975
- const payload = this . #createBatchPayload( m ) ;
1029
+ const payload = this . #createBatchPayload( ms ) ;
976
1030
try {
977
1031
await config . eachBatch ( payload ) ;
978
1032
if ( config . eachBatchAutoResolve ) {
@@ -1005,23 +1059,25 @@ class Consumer {
1005
1059
}
1006
1060
1007
1061
/* If the message is unprocessed, due to an error, or because the user has not resolved it, we seek back. */
1062
+ /* TODO: currently we're seeking to just the first offset. Fix this to take care of messages we are resolving. */
1008
1063
if ( ! eachMessageProcessed ) {
1009
1064
await this . seek ( {
1010
- topic : m . topic ,
1011
- partition : m . partition ,
1012
- offset : m . offset ,
1065
+ topic : ms [ 0 ] . topic ,
1066
+ partition : ms [ 0 ] . partition ,
1067
+ offset : ms [ 0 ] . offset ,
1013
1068
} ) ;
1014
1069
}
1015
1070
1016
1071
/* Store the offsets we need to store, or at least record them for cache invalidation reasons. */
1072
+ /* TODO: currently we just store the last offset of the batch. Fix it to store the last resolved one + 1. */
1017
1073
if ( eachMessageProcessed ) {
1018
1074
try {
1019
1075
if ( ! this . #userManagedStores) {
1020
1076
this . #internalClient. offsetsStore ( [ {
1021
- topic : m . topic , partition : m . partition , offset : Number ( m . offset ) + 1 , leaderEpoch : m . leaderEpoch
1077
+ topic : ms [ ms . length - 1 ] . topic , partition : ms [ ms . length - 1 ] . partition , offset : Number ( ms [ ms . length - 1 ] . offset ) + 1 , leaderEpoch : ms [ ms . length - 1 ] . leaderEpoch
1022
1078
} ] ) ;
1023
1079
}
1024
- this . #lastConsumedOffsets. set ( partitionKey ( m ) , Number ( m . offset ) + 1 ) ;
1080
+ this . #lastConsumedOffsets. set ( partitionKey ( ms [ ms . length - 1 ] ) , Number ( ms [ ms . length - 1 ] . offset ) + 1 ) ;
1025
1081
} catch ( e ) {
1026
1082
/* Not much we can do, except log the error. */
1027
1083
if ( this . #logger)
@@ -1035,7 +1091,7 @@ class Consumer {
1035
1091
if ( this . #checkPendingSeeks)
1036
1092
await this . #seekInternal( ) ;
1037
1093
1038
- return m . index ;
1094
+ return ms . index ;
1039
1095
}
1040
1096
1041
1097
/**
@@ -1050,7 +1106,7 @@ class Consumer {
1050
1106
*
1051
1107
* Worker termination acts as a async barrier.
1052
1108
*/
1053
- async #worker( config , perMessageProcessor , id ) {
1109
+ async #worker( config , perMessageProcessor , fetcher , id ) {
1054
1110
let nextIdx = - 1 ;
1055
1111
while ( ! this . #workerTerminationScheduled) {
1056
1112
/* Invalidate the message cache if needed */
@@ -1066,7 +1122,7 @@ class Consumer {
1066
1122
continue ;
1067
1123
}
1068
1124
1069
- const m = await this . #consumeSingleCached ( nextIdx ) . catch ( e => {
1125
+ const m = await fetcher ( nextIdx ) . catch ( e => {
1070
1126
/* Since this error cannot be exposed to the user in the current situation, just log and retry.
1071
1127
* This is due to restartOnFailure being set to always true. */
1072
1128
if ( this . #logger)
@@ -1097,13 +1153,18 @@ class Consumer {
1097
1153
async #runInternal( config ) {
1098
1154
this . #concurrency = config . partitionsConsumedConcurrently ;
1099
1155
const perMessageProcessor = config . eachMessage ? this . #messageProcessor : this . #batchProcessor;
1156
+ /* TODO: make this dynamic, based on max batch size / size of last message seen. */
1157
+ const maxBatchSize = 30 ;
1158
+ const fetcher = config . eachMessage
1159
+ ? ( savedIdx ) => this . #consumeSingleCached( savedIdx )
1160
+ : ( savedIdx ) => this . #consumeCachedN( savedIdx , maxBatchSize ) ;
1100
1161
this . #workers = [ ] ;
1101
1162
while ( ! ( await acquireOrLog ( this . #lock, this . #logger) ) ) ;
1102
1163
1103
1164
while ( ! this . #disconnectStarted) {
1104
1165
this . #workerTerminationScheduled = false ;
1105
1166
const workersToSpawn = Math . max ( 1 , Math . min ( this . #concurrency, this . #partitionCount) ) ;
1106
- this . #workers = Array ( workersToSpawn ) . fill ( ) . map ( ( _ , i ) => this . #worker( config , perMessageProcessor . bind ( this ) , i ) ) ;
1167
+ this . #workers = Array ( workersToSpawn ) . fill ( ) . map ( ( _ , i ) => this . #worker( config , perMessageProcessor . bind ( this ) , fetcher . bind ( this ) , i ) ) ;
1107
1168
await Promise . all ( this . #workers) ;
1108
1169
1109
1170
/* One of the possible reasons for the workers to end is that the cache is globally stale.
0 commit comments