Skip to content

Commit b14efd8

Browse files
KAFKA-19732, KAFKA-19716: Clear out coordinator snapshots periodically while loading (#20591)
When nested Timeline collections are created and discarded while loading a coordinator partition, references to them accumulate in the current snapshot. Allow the GC to reclaim them by starting a new snapshot and discarding previous snapshots every 16,384 records. Small intervals degrade loading times for non-transactional offset commit workloads while large intervals degrade loading times for transactional workloads. A default of 16,384 was chosen as a compromise. Cherry pick of d067c6c. Reviewers: David Jacot <[email protected]>
1 parent e76213e commit b14efd8

File tree

3 files changed

+130
-21
lines changed

3 files changed

+130
-21
lines changed

core/src/main/scala/kafka/coordinator/group/CoordinatorLoaderImpl.scala

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,20 +34,38 @@ import java.util.concurrent.CompletableFuture
3434
import java.util.concurrent.atomic.AtomicBoolean
3535
import scala.jdk.CollectionConverters._
3636

37+
object CoordinatorLoaderImpl {
38+
/**
39+
* The interval between updating the last committed offset during loading, in offsets. Smaller
40+
* values commit more often at the expense of loading times when the workload is simple and does
41+
* not create collections that need to participate in {@link CoordinatorPlayback} snapshotting.
42+
* Larger values commit less often and allow more temporary data to accumulate before the next
43+
* commit when the workload creates many temporary collections that need to be snapshotted.
44+
*
45+
* The value of 16,384 was chosen as a trade-off between the performance of these two workloads.
46+
*
47+
* When changing this value, please run the GroupCoordinatorShardLoadingBenchmark to evaluate
48+
* the relative change in performance.
49+
*/
50+
val DEFAULT_COMMIT_INTERVAL_OFFSETS = 16384L
51+
}
52+
3753
/**
3854
* Coordinator loader which reads records from a partition and replays them
3955
* to a group coordinator.
4056
*
41-
* @param replicaManager The replica manager.
42-
* @param deserializer The deserializer to use.
43-
* @param loadBufferSize The load buffer size.
57+
* @param replicaManager The replica manager.
58+
* @param deserializer The deserializer to use.
59+
* @param loadBufferSize The load buffer size.
60+
* @param commitIntervalOffsets The interval between updating the last committed offset during loading, in offsets.
4461
* @tparam T The record type.
4562
*/
4663
class CoordinatorLoaderImpl[T](
4764
time: Time,
4865
replicaManager: ReplicaManager,
4966
deserializer: Deserializer[T],
50-
loadBufferSize: Int
67+
loadBufferSize: Int,
68+
commitIntervalOffsets: Long = CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
5169
) extends CoordinatorLoader[T] with Logging {
5270
private val isRunning = new AtomicBoolean(true)
5371
private val scheduler = new KafkaScheduler(1)
@@ -99,7 +117,7 @@ class CoordinatorLoaderImpl[T](
99117
// the log end offset but the log is empty. This could happen with compacted topics.
100118
var readAtLeastOneRecord = true
101119

102-
var previousHighWatermark = -1L
120+
var lastCommittedOffset = -1L
103121
var numRecords = 0L
104122
var numBytes = 0L
105123
while (currentOffset < logEndOffset && readAtLeastOneRecord && isRunning.get) {
@@ -208,10 +226,14 @@ class CoordinatorLoaderImpl[T](
208226
if (currentOffset >= currentHighWatermark) {
209227
coordinator.updateLastWrittenOffset(currentOffset)
210228

211-
if (currentHighWatermark > previousHighWatermark) {
229+
if (currentHighWatermark > lastCommittedOffset) {
212230
coordinator.updateLastCommittedOffset(currentHighWatermark)
213-
previousHighWatermark = currentHighWatermark
231+
lastCommittedOffset = currentHighWatermark
214232
}
233+
} else if (currentOffset - lastCommittedOffset >= commitIntervalOffsets) {
234+
coordinator.updateLastWrittenOffset(currentOffset)
235+
coordinator.updateLastCommittedOffset(currentOffset)
236+
lastCommittedOffset = currentOffset
215237
}
216238
}
217239
numBytes = numBytes + memoryRecords.sizeInBytes()

core/src/main/scala/kafka/server/BrokerServer.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -620,7 +620,8 @@ class BrokerServer(
620620
time,
621621
replicaManager,
622622
serde,
623-
config.groupCoordinatorConfig.offsetsLoadBufferSize
623+
config.groupCoordinatorConfig.offsetsLoadBufferSize,
624+
CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
624625
)
625626
val writer = new CoordinatorPartitionWriter(
626627
replicaManager
@@ -650,7 +651,8 @@ class BrokerServer(
650651
time,
651652
replicaManager,
652653
serde,
653-
config.shareCoordinatorConfig.shareCoordinatorLoadBufferSize()
654+
config.shareCoordinatorConfig.shareCoordinatorLoadBufferSize(),
655+
CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
654656
)
655657
val writer = new CoordinatorPartitionWriter(
656658
replicaManager

core/src/test/scala/unit/kafka/coordinator/group/CoordinatorLoaderImplTest.scala

Lines changed: 97 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ class CoordinatorLoaderImplTest {
6262
time = Time.SYSTEM,
6363
replicaManager = replicaManager,
6464
deserializer = serde,
65-
loadBufferSize = 1000
65+
loadBufferSize = 1000,
66+
commitIntervalOffsets = CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
6667
)) { loader =>
6768
when(replicaManager.getLog(tp)).thenReturn(None)
6869

@@ -82,7 +83,8 @@ class CoordinatorLoaderImplTest {
8283
time = Time.SYSTEM,
8384
replicaManager = replicaManager,
8485
deserializer = serde,
85-
loadBufferSize = 1000
86+
loadBufferSize = 1000,
87+
commitIntervalOffsets = CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
8688
)) { loader =>
8789
loader.close()
8890

@@ -103,7 +105,8 @@ class CoordinatorLoaderImplTest {
103105
time = Time.SYSTEM,
104106
replicaManager = replicaManager,
105107
deserializer = serde,
106-
loadBufferSize = 1000
108+
loadBufferSize = 1000,
109+
commitIntervalOffsets = CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
107110
)) { loader =>
108111
when(replicaManager.getLog(tp)).thenReturn(Some(log))
109112
when(log.logStartOffset).thenReturn(0L)
@@ -186,7 +189,8 @@ class CoordinatorLoaderImplTest {
186189
time = Time.SYSTEM,
187190
replicaManager = replicaManager,
188191
deserializer = serde,
189-
loadBufferSize = 1000
192+
loadBufferSize = 1000,
193+
commitIntervalOffsets = CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
190194
)) { loader =>
191195
when(replicaManager.getLog(tp)).thenReturn(Some(log))
192196
when(log.logStartOffset).thenReturn(0L)
@@ -229,7 +233,8 @@ class CoordinatorLoaderImplTest {
229233
time = Time.SYSTEM,
230234
replicaManager = replicaManager,
231235
deserializer = serde,
232-
loadBufferSize = 1000
236+
loadBufferSize = 1000,
237+
commitIntervalOffsets = CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
233238
)) { loader =>
234239
when(replicaManager.getLog(tp)).thenReturn(Some(log))
235240
when(log.logStartOffset).thenReturn(0L)
@@ -265,7 +270,8 @@ class CoordinatorLoaderImplTest {
265270
time = Time.SYSTEM,
266271
replicaManager = replicaManager,
267272
deserializer = serde,
268-
loadBufferSize = 1000
273+
loadBufferSize = 1000,
274+
commitIntervalOffsets = CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
269275
)) { loader =>
270276
when(replicaManager.getLog(tp)).thenReturn(Some(log))
271277
when(log.logStartOffset).thenReturn(0L)
@@ -303,7 +309,8 @@ class CoordinatorLoaderImplTest {
303309
time = Time.SYSTEM,
304310
replicaManager = replicaManager,
305311
deserializer = serde,
306-
loadBufferSize = 1000
312+
loadBufferSize = 1000,
313+
commitIntervalOffsets = CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
307314
)) { loader =>
308315
when(replicaManager.getLog(tp)).thenReturn(Some(log))
309316
when(log.logStartOffset).thenReturn(0L)
@@ -331,7 +338,8 @@ class CoordinatorLoaderImplTest {
331338
time,
332339
replicaManager = replicaManager,
333340
deserializer = serde,
334-
loadBufferSize = 1000
341+
loadBufferSize = 1000,
342+
commitIntervalOffsets = CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
335343
)) { loader =>
336344
val startTimeMs = time.milliseconds()
337345
when(replicaManager.getLog(tp)).thenReturn(Some(log))
@@ -378,7 +386,8 @@ class CoordinatorLoaderImplTest {
378386
time = Time.SYSTEM,
379387
replicaManager = replicaManager,
380388
deserializer = serde,
381-
loadBufferSize = 1000
389+
loadBufferSize = 1000,
390+
commitIntervalOffsets = CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
382391
)) { loader =>
383392
when(replicaManager.getLog(tp)).thenReturn(Some(log))
384393
when(log.logStartOffset).thenReturn(0L)
@@ -441,7 +450,8 @@ class CoordinatorLoaderImplTest {
441450
time = Time.SYSTEM,
442451
replicaManager = replicaManager,
443452
deserializer = serde,
444-
loadBufferSize = 1000
453+
loadBufferSize = 1000,
454+
commitIntervalOffsets = CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
445455
)) { loader =>
446456
when(replicaManager.getLog(tp)).thenReturn(Some(log))
447457
when(log.logStartOffset).thenReturn(0L)
@@ -467,7 +477,8 @@ class CoordinatorLoaderImplTest {
467477
time = Time.SYSTEM,
468478
replicaManager = replicaManager,
469479
deserializer = serde,
470-
loadBufferSize = 1000
480+
loadBufferSize = 1000,
481+
commitIntervalOffsets = CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
471482
)) { loader =>
472483
when(replicaManager.getLog(tp)).thenReturn(Some(log))
473484
when(log.logStartOffset).thenReturn(0L)
@@ -531,7 +542,8 @@ class CoordinatorLoaderImplTest {
531542
time = Time.SYSTEM,
532543
replicaManager = replicaManager,
533544
deserializer = serde,
534-
loadBufferSize = 1000
545+
loadBufferSize = 1000,
546+
commitIntervalOffsets = CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
535547
)) { loader =>
536548
when(replicaManager.getLog(tp)).thenReturn(Some(log))
537549
when(log.logStartOffset).thenReturn(0L)
@@ -559,6 +571,79 @@ class CoordinatorLoaderImplTest {
559571
}
560572
}
561573

574+
@Test
575+
def testUpdateLastWrittenOffsetCommitInterval(): Unit = {
576+
val tp = new TopicPartition("foo", 0)
577+
val replicaManager = mock(classOf[ReplicaManager])
578+
val serde = new StringKeyValueDeserializer
579+
val log = mock(classOf[UnifiedLog])
580+
val coordinator = mock(classOf[CoordinatorPlayback[(String, String)]])
581+
582+
Using.resource(new CoordinatorLoaderImpl[(String, String)](
583+
time = Time.SYSTEM,
584+
replicaManager = replicaManager,
585+
deserializer = serde,
586+
loadBufferSize = 1000,
587+
commitIntervalOffsets = 2L
588+
)) { loader =>
589+
when(replicaManager.getLog(tp)).thenReturn(Some(log))
590+
when(log.logStartOffset).thenReturn(0L)
591+
when(log.highWatermark).thenReturn(7L)
592+
when(replicaManager.getLogEndOffset(tp)).thenReturn(Some(7L))
593+
594+
val readResult1 = logReadResult(startOffset = 0, records = Seq(
595+
new SimpleRecord("k1".getBytes, "v1".getBytes),
596+
new SimpleRecord("k2".getBytes, "v2".getBytes)
597+
))
598+
599+
when(log.read(0L, 1000, FetchIsolation.LOG_END, true
600+
)).thenReturn(readResult1)
601+
602+
val readResult2 = logReadResult(startOffset = 2, records = Seq(
603+
new SimpleRecord("k3".getBytes, "v3".getBytes),
604+
new SimpleRecord("k4".getBytes, "v4".getBytes),
605+
new SimpleRecord("k5".getBytes, "v5".getBytes)
606+
))
607+
608+
when(log.read(2L, 1000, FetchIsolation.LOG_END, true
609+
)).thenReturn(readResult2)
610+
611+
val readResult3 = logReadResult(startOffset = 5, records = Seq(
612+
new SimpleRecord("k6".getBytes, "v6".getBytes)
613+
))
614+
615+
when(log.read(5L, 1000, FetchIsolation.LOG_END, true
616+
)).thenReturn(readResult3)
617+
618+
val readResult4 = logReadResult(startOffset = 6, records = Seq(
619+
new SimpleRecord("k7".getBytes, "v7".getBytes)
620+
))
621+
622+
when(log.read(6L, 1000, FetchIsolation.LOG_END, true
623+
)).thenReturn(readResult4)
624+
625+
assertNotNull(loader.load(tp, coordinator).get(10, TimeUnit.SECONDS))
626+
627+
verify(coordinator).replay(0L, RecordBatch.NO_PRODUCER_ID, RecordBatch.NO_PRODUCER_EPOCH, ("k1", "v1"))
628+
verify(coordinator).replay(1L, RecordBatch.NO_PRODUCER_ID, RecordBatch.NO_PRODUCER_EPOCH, ("k2", "v2"))
629+
verify(coordinator).replay(2L, RecordBatch.NO_PRODUCER_ID, RecordBatch.NO_PRODUCER_EPOCH, ("k3", "v3"))
630+
verify(coordinator).replay(3L, RecordBatch.NO_PRODUCER_ID, RecordBatch.NO_PRODUCER_EPOCH, ("k4", "v4"))
631+
verify(coordinator).replay(4L, RecordBatch.NO_PRODUCER_ID, RecordBatch.NO_PRODUCER_EPOCH, ("k5", "v5"))
632+
verify(coordinator).replay(5L, RecordBatch.NO_PRODUCER_ID, RecordBatch.NO_PRODUCER_EPOCH, ("k6", "v6"))
633+
verify(coordinator).replay(6L, RecordBatch.NO_PRODUCER_ID, RecordBatch.NO_PRODUCER_EPOCH, ("k7", "v7"))
634+
verify(coordinator, times(0)).updateLastWrittenOffset(0L)
635+
verify(coordinator, times(1)).updateLastWrittenOffset(2L)
636+
verify(coordinator, times(1)).updateLastWrittenOffset(5L)
637+
verify(coordinator, times(0)).updateLastWrittenOffset(6L)
638+
verify(coordinator, times(1)).updateLastWrittenOffset(7L)
639+
verify(coordinator, times(0)).updateLastCommittedOffset(0L)
640+
verify(coordinator, times(1)).updateLastCommittedOffset(2L)
641+
verify(coordinator, times(1)).updateLastCommittedOffset(5L)
642+
verify(coordinator, times(0)).updateLastCommittedOffset(6L)
643+
verify(coordinator, times(1)).updateLastCommittedOffset(7L)
644+
}
645+
}
646+
562647
private def logReadResult(
563648
startOffset: Long,
564649
producerId: Long = RecordBatch.NO_PRODUCER_ID,

0 commit comments

Comments
 (0)