Skip to content

Commit de5fea3

Browse files
author
Collin Van Dyck
committed
Fix bug when seeking offsets in compressed batches
The reader was failing to mark the current message as read when fast forwarding through a compressed message set with more than one inner messages. Additionally, it was only discarding the message key instead of both the key and the value. After fixing this, the reader was able to discard the message correctly, and start parsing the next message header.
1 parent c5dc58d commit de5fea3

File tree

2 files changed

+271
-41
lines changed

2 files changed

+271
-41
lines changed

message_reader.go

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,6 @@ func (r *messageSetReader) readMessageV1(min int64, key readBytesFunc, val readB
157157
if err = r.discardN(4); err != nil {
158158
return
159159
}
160-
r.dumpHex("After discarding 4 bytes")
161160
// read and decompress the contained message set.
162161
var decompressed bytes.Buffer
163162
if err = r.readBytesWith(func(r *bufio.Reader, sz int, n int) (remain int, err error) {
@@ -197,8 +196,6 @@ func (r *messageSetReader) readMessageV1(min int64, key readBytesFunc, val readB
197196
base: offset,
198197
parent: r.readerStack,
199198
}
200-
201-
r.dumpHex("After pushing decompressed")
202199
continue
203200
}
204201

@@ -209,10 +206,23 @@ func (r *messageSetReader) readMessageV1(min int64, key readBytesFunc, val readB
209206
// When the messages are compressed kafka may return messages at an
210207
// earlier offset than the one that was requested, it's the client's
211208
// responsibility to ignore those.
209+
//
210+
// At this point, the message header has been read, so discarding
211+
// the rest of the message means we have to discard the key, and then
212+
// the value. Each of those are preceeded by a 4-byte length. Discarding
213+
// them is then reading that length variable and then discarding that
214+
// amount.
212215
if offset < min {
213-
if r.remain, err = discardBytes(r.reader, r.remain); err != nil {
216+
// discard the key
217+
if err = r.discardBytes(); err != nil {
218+
return
219+
}
220+
// discard the value
221+
if err = r.discardBytes(); err != nil {
214222
return
215223
}
224+
// since we have fully consumed the message, mark as read
225+
r.markRead()
216226
continue
217227
}
218228
if err = r.readBytesWith(key); err != nil {
@@ -270,7 +280,6 @@ func (r *messageSetReader) readMessageV2(_ int64, key readBytesFunc, val readByt
270280
// stack. here we set the parent count to 0 so that when the child set is exhausted, the
271281
// reader will then try to read the header of the next message set
272282
r.readerStack.parent.count = 0
273-
r.dumpHex("After pushing stack")
274283
}
275284
}
276285
var length int64
@@ -311,6 +320,11 @@ func (r *messageSetReader) readMessageV2(_ int64, key readBytesFunc, val readByt
311320
return
312321
}
313322

323+
func (r *messageSetReader) discardBytes() (err error) {
324+
r.remain, err = discardBytes(r.reader, r.remain)
325+
return
326+
}
327+
314328
func (r *messageSetReader) discardN(sz int) (err error) {
315329
r.remain, err = discardN(r.reader, r.remain, sz)
316330
return
@@ -322,12 +336,14 @@ func (r *messageSetReader) markRead() {
322336
}
323337
r.count--
324338
r.unwindStack()
339+
r.log("Mark read remain=%d", r.remain)
325340
}
326341

327342
func (r *messageSetReader) unwindStack() {
328343
for r.count == 0 {
329344
if r.remain == 0 {
330345
if r.parent != nil {
346+
r.log("Popped reader stack")
331347
r.readerStack = r.parent
332348
continue
333349
}
@@ -370,11 +386,6 @@ func (r *messageSetReader) readHeader() (err error) {
370386
// currently reading a set of messages, no need to read a header until they are exhausted.
371387
return
372388
}
373-
374-
r.dumpHex("Before reading header")
375-
defer r.dumpHex("After reading header")
376-
377-
r.log("Reading header...")
378389
r.header = messagesHeader{}
379390
if err = r.readInt64(&r.header.firstOffset); err != nil {
380391
return
@@ -406,7 +417,7 @@ func (r *messageSetReader) readHeader() (err error) {
406417
return
407418
}
408419
r.count = 1
409-
r.log("Read v1 header with magic=%d and attributes=%d", r.header.magic, r.header.v1.attributes)
420+
r.log("Read v1 header with remain=%d offset=%d magic=%d and attributes=%d", r.remain, r.header.firstOffset, r.header.magic, r.header.v1.attributes)
410421
case 2:
411422
r.header.v2.leaderEpoch = crcOrLeaderEpoch
412423
if err = r.readInt32(&r.header.crc); err != nil {

0 commit comments

Comments
 (0)