Skip to content

Commit 8248d1d

Browse files
KAFKA-19732, KAFKA-19716: Clear out coordinator snapshots periodically while loading (apache#20590)
When nested Timeline collections are created and discarded while loading a coordinator partition, references to them accumulate in the current snapshot. Allow the GC to reclaim them by starting a new snapshot and discarding previous snapshots every 16,384 records. Small intervals degrade loading times for non-transactional offset commit workloads while large intervals degrade loading times for transactional workloads. A default of 16,384 was chosen as a compromise. Cherry pick of d067c6c. Reviewers: David Jacot <[email protected]>
1 parent 7ba7f5e commit 8248d1d

File tree

3 files changed

+130
-21
lines changed

3 files changed

+130
-21
lines changed

core/src/main/scala/kafka/coordinator/group/CoordinatorLoaderImpl.scala

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,20 +34,38 @@ import java.util.concurrent.CompletableFuture
3434
import java.util.concurrent.atomic.AtomicBoolean
3535
import scala.jdk.CollectionConverters._
3636

37+
object CoordinatorLoaderImpl {
38+
/**
39+
* The interval between updating the last committed offset during loading, in offsets. Smaller
40+
* values commit more often at the expense of loading times when the workload is simple and does
41+
* not create collections that need to participate in {@link CoordinatorPlayback} snapshotting.
42+
* Larger values commit less often and allow more temporary data to accumulate before the next
43+
* commit when the workload creates many temporary collections that need to be snapshotted.
44+
*
45+
* The value of 16,384 was chosen as a trade-off between the performance of these two workloads.
46+
*
47+
* When changing this value, please run the GroupCoordinatorShardLoadingBenchmark to evaluate
48+
* the relative change in performance.
49+
*/
50+
val DEFAULT_COMMIT_INTERVAL_OFFSETS = 16384L
51+
}
52+
3753
/**
3854
* Coordinator loader which reads records from a partition and replays them
3955
* to a group coordinator.
4056
*
41-
* @param replicaManager The replica manager.
42-
* @param deserializer The deserializer to use.
43-
* @param loadBufferSize The load buffer size.
57+
* @param replicaManager The replica manager.
58+
* @param deserializer The deserializer to use.
59+
* @param loadBufferSize The load buffer size.
60+
* @param commitIntervalOffsets The interval between updating the last committed offset during loading, in offsets.
4461
* @tparam T The record type.
4562
*/
4663
class CoordinatorLoaderImpl[T](
4764
time: Time,
4865
replicaManager: ReplicaManager,
4966
deserializer: Deserializer[T],
50-
loadBufferSize: Int
67+
loadBufferSize: Int,
68+
commitIntervalOffsets: Long = CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
5169
) extends CoordinatorLoader[T] with Logging {
5270
private val isRunning = new AtomicBoolean(true)
5371
private val scheduler = new KafkaScheduler(1)
@@ -99,7 +117,7 @@ class CoordinatorLoaderImpl[T](
99117
// the log end offset but the log is empty. This could happen with compacted topics.
100118
var readAtLeastOneRecord = true
101119

102-
var previousHighWatermark = -1L
120+
var lastCommittedOffset = -1L
103121
var numRecords = 0L
104122
var numBytes = 0L
105123
while (currentOffset < logEndOffset && readAtLeastOneRecord && isRunning.get) {
@@ -213,10 +231,14 @@ class CoordinatorLoaderImpl[T](
213231
if (currentOffset >= currentHighWatermark) {
214232
coordinator.updateLastWrittenOffset(currentOffset)
215233

216-
if (currentHighWatermark > previousHighWatermark) {
234+
if (currentHighWatermark > lastCommittedOffset) {
217235
coordinator.updateLastCommittedOffset(currentHighWatermark)
218-
previousHighWatermark = currentHighWatermark
236+
lastCommittedOffset = currentHighWatermark
219237
}
238+
} else if (currentOffset - lastCommittedOffset >= commitIntervalOffsets) {
239+
coordinator.updateLastWrittenOffset(currentOffset)
240+
coordinator.updateLastCommittedOffset(currentOffset)
241+
lastCommittedOffset = currentOffset
220242
}
221243
}
222244
numBytes = numBytes + memoryRecords.sizeInBytes()

core/src/main/scala/kafka/server/BrokerServer.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -634,7 +634,8 @@ class BrokerServer(
634634
time,
635635
replicaManager,
636636
serde,
637-
config.groupCoordinatorConfig.offsetsLoadBufferSize
637+
config.groupCoordinatorConfig.offsetsLoadBufferSize,
638+
CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
638639
)
639640
val writer = new CoordinatorPartitionWriter(
640641
replicaManager
@@ -673,7 +674,8 @@ class BrokerServer(
673674
time,
674675
replicaManager,
675676
serde,
676-
config.shareCoordinatorConfig.shareCoordinatorLoadBufferSize()
677+
config.shareCoordinatorConfig.shareCoordinatorLoadBufferSize(),
678+
CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
677679
)
678680
val writer = new CoordinatorPartitionWriter(
679681
replicaManager

core/src/test/scala/unit/kafka/coordinator/group/CoordinatorLoaderImplTest.scala

Lines changed: 97 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ class CoordinatorLoaderImplTest {
6363
time = Time.SYSTEM,
6464
replicaManager = replicaManager,
6565
deserializer = serde,
66-
loadBufferSize = 1000
66+
loadBufferSize = 1000,
67+
commitIntervalOffsets = CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
6768
)) { loader =>
6869
when(replicaManager.getLog(tp)).thenReturn(None)
6970

@@ -83,7 +84,8 @@ class CoordinatorLoaderImplTest {
8384
time = Time.SYSTEM,
8485
replicaManager = replicaManager,
8586
deserializer = serde,
86-
loadBufferSize = 1000
87+
loadBufferSize = 1000,
88+
commitIntervalOffsets = CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
8789
)) { loader =>
8890
loader.close()
8991

@@ -104,7 +106,8 @@ class CoordinatorLoaderImplTest {
104106
time = Time.SYSTEM,
105107
replicaManager = replicaManager,
106108
deserializer = serde,
107-
loadBufferSize = 1000
109+
loadBufferSize = 1000,
110+
commitIntervalOffsets = CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
108111
)) { loader =>
109112
when(replicaManager.getLog(tp)).thenReturn(Some(log))
110113
when(log.logStartOffset).thenReturn(0L)
@@ -207,7 +210,8 @@ class CoordinatorLoaderImplTest {
207210
time = Time.SYSTEM,
208211
replicaManager = replicaManager,
209212
deserializer = serde,
210-
loadBufferSize = 1000
213+
loadBufferSize = 1000,
214+
commitIntervalOffsets = CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
211215
)) { loader =>
212216
when(replicaManager.getLog(tp)).thenReturn(Some(log))
213217
when(log.logStartOffset).thenReturn(0L)
@@ -250,7 +254,8 @@ class CoordinatorLoaderImplTest {
250254
time = Time.SYSTEM,
251255
replicaManager = replicaManager,
252256
deserializer = serde,
253-
loadBufferSize = 1000
257+
loadBufferSize = 1000,
258+
commitIntervalOffsets = CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
254259
)) { loader =>
255260
when(replicaManager.getLog(tp)).thenReturn(Some(log))
256261
when(log.logStartOffset).thenReturn(0L)
@@ -290,7 +295,8 @@ class CoordinatorLoaderImplTest {
290295
time = Time.SYSTEM,
291296
replicaManager = replicaManager,
292297
deserializer = serde,
293-
loadBufferSize = 1000
298+
loadBufferSize = 1000,
299+
commitIntervalOffsets = CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
294300
)) { loader =>
295301
when(replicaManager.getLog(tp)).thenReturn(Some(log))
296302
when(log.logStartOffset).thenReturn(0L)
@@ -332,7 +338,8 @@ class CoordinatorLoaderImplTest {
332338
time = Time.SYSTEM,
333339
replicaManager = replicaManager,
334340
deserializer = serde,
335-
loadBufferSize = 1000
341+
loadBufferSize = 1000,
342+
commitIntervalOffsets = CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
336343
)) { loader =>
337344
when(replicaManager.getLog(tp)).thenReturn(Some(log))
338345
when(log.logStartOffset).thenReturn(0L)
@@ -364,7 +371,8 @@ class CoordinatorLoaderImplTest {
364371
time,
365372
replicaManager = replicaManager,
366373
deserializer = serde,
367-
loadBufferSize = 1000
374+
loadBufferSize = 1000,
375+
commitIntervalOffsets = CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
368376
)) { loader =>
369377
val startTimeMs = time.milliseconds()
370378
when(replicaManager.getLog(tp)).thenReturn(Some(log))
@@ -419,7 +427,8 @@ class CoordinatorLoaderImplTest {
419427
time = Time.SYSTEM,
420428
replicaManager = replicaManager,
421429
deserializer = serde,
422-
loadBufferSize = 1000
430+
loadBufferSize = 1000,
431+
commitIntervalOffsets = CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
423432
)) { loader =>
424433
when(replicaManager.getLog(tp)).thenReturn(Some(log))
425434
when(log.logStartOffset).thenReturn(0L)
@@ -494,7 +503,8 @@ class CoordinatorLoaderImplTest {
494503
time = Time.SYSTEM,
495504
replicaManager = replicaManager,
496505
deserializer = serde,
497-
loadBufferSize = 1000
506+
loadBufferSize = 1000,
507+
commitIntervalOffsets = CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
498508
)) { loader =>
499509
when(replicaManager.getLog(tp)).thenReturn(Some(log))
500510
when(log.logStartOffset).thenReturn(0L)
@@ -520,7 +530,8 @@ class CoordinatorLoaderImplTest {
520530
time = Time.SYSTEM,
521531
replicaManager = replicaManager,
522532
deserializer = serde,
523-
loadBufferSize = 1000
533+
loadBufferSize = 1000,
534+
commitIntervalOffsets = CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
524535
)) { loader =>
525536
when(replicaManager.getLog(tp)).thenReturn(Some(log))
526537
when(log.logStartOffset).thenReturn(0L)
@@ -596,7 +607,8 @@ class CoordinatorLoaderImplTest {
596607
time = Time.SYSTEM,
597608
replicaManager = replicaManager,
598609
deserializer = serde,
599-
loadBufferSize = 1000
610+
loadBufferSize = 1000,
611+
commitIntervalOffsets = CoordinatorLoaderImpl.DEFAULT_COMMIT_INTERVAL_OFFSETS
600612
)) { loader =>
601613
when(replicaManager.getLog(tp)).thenReturn(Some(log))
602614
when(log.logStartOffset).thenReturn(0L)
@@ -632,6 +644,79 @@ class CoordinatorLoaderImplTest {
632644
}
633645
}
634646

647+
@Test
648+
def testUpdateLastWrittenOffsetCommitInterval(): Unit = {
649+
val tp = new TopicPartition("foo", 0)
650+
val replicaManager = mock(classOf[ReplicaManager])
651+
val serde = new StringKeyValueDeserializer
652+
val log = mock(classOf[UnifiedLog])
653+
val coordinator = mock(classOf[CoordinatorPlayback[(String, String)]])
654+
655+
Using.resource(new CoordinatorLoaderImpl[(String, String)](
656+
time = Time.SYSTEM,
657+
replicaManager = replicaManager,
658+
deserializer = serde,
659+
loadBufferSize = 1000,
660+
commitIntervalOffsets = 2L
661+
)) { loader =>
662+
when(replicaManager.getLog(tp)).thenReturn(Some(log))
663+
when(log.logStartOffset).thenReturn(0L)
664+
when(log.highWatermark).thenReturn(7L)
665+
when(replicaManager.getLogEndOffset(tp)).thenReturn(Some(7L))
666+
667+
val readResult1 = logReadResult(startOffset = 0, records = Seq(
668+
new SimpleRecord("k1".getBytes, "v1".getBytes),
669+
new SimpleRecord("k2".getBytes, "v2".getBytes)
670+
))
671+
672+
when(log.read(0L, 1000, FetchIsolation.LOG_END, true
673+
)).thenReturn(readResult1)
674+
675+
val readResult2 = logReadResult(startOffset = 2, records = Seq(
676+
new SimpleRecord("k3".getBytes, "v3".getBytes),
677+
new SimpleRecord("k4".getBytes, "v4".getBytes),
678+
new SimpleRecord("k5".getBytes, "v5".getBytes)
679+
))
680+
681+
when(log.read(2L, 1000, FetchIsolation.LOG_END, true
682+
)).thenReturn(readResult2)
683+
684+
val readResult3 = logReadResult(startOffset = 5, records = Seq(
685+
new SimpleRecord("k6".getBytes, "v6".getBytes)
686+
))
687+
688+
when(log.read(5L, 1000, FetchIsolation.LOG_END, true
689+
)).thenReturn(readResult3)
690+
691+
val readResult4 = logReadResult(startOffset = 6, records = Seq(
692+
new SimpleRecord("k7".getBytes, "v7".getBytes)
693+
))
694+
695+
when(log.read(6L, 1000, FetchIsolation.LOG_END, true
696+
)).thenReturn(readResult4)
697+
698+
assertNotNull(loader.load(tp, coordinator).get(10, TimeUnit.SECONDS))
699+
700+
verify(coordinator).replay(0L, RecordBatch.NO_PRODUCER_ID, RecordBatch.NO_PRODUCER_EPOCH, ("k1", "v1"))
701+
verify(coordinator).replay(1L, RecordBatch.NO_PRODUCER_ID, RecordBatch.NO_PRODUCER_EPOCH, ("k2", "v2"))
702+
verify(coordinator).replay(2L, RecordBatch.NO_PRODUCER_ID, RecordBatch.NO_PRODUCER_EPOCH, ("k3", "v3"))
703+
verify(coordinator).replay(3L, RecordBatch.NO_PRODUCER_ID, RecordBatch.NO_PRODUCER_EPOCH, ("k4", "v4"))
704+
verify(coordinator).replay(4L, RecordBatch.NO_PRODUCER_ID, RecordBatch.NO_PRODUCER_EPOCH, ("k5", "v5"))
705+
verify(coordinator).replay(5L, RecordBatch.NO_PRODUCER_ID, RecordBatch.NO_PRODUCER_EPOCH, ("k6", "v6"))
706+
verify(coordinator).replay(6L, RecordBatch.NO_PRODUCER_ID, RecordBatch.NO_PRODUCER_EPOCH, ("k7", "v7"))
707+
verify(coordinator, times(0)).updateLastWrittenOffset(0L)
708+
verify(coordinator, times(1)).updateLastWrittenOffset(2L)
709+
verify(coordinator, times(1)).updateLastWrittenOffset(5L)
710+
verify(coordinator, times(0)).updateLastWrittenOffset(6L)
711+
verify(coordinator, times(1)).updateLastWrittenOffset(7L)
712+
verify(coordinator, times(0)).updateLastCommittedOffset(0L)
713+
verify(coordinator, times(1)).updateLastCommittedOffset(2L)
714+
verify(coordinator, times(1)).updateLastCommittedOffset(5L)
715+
verify(coordinator, times(0)).updateLastCommittedOffset(6L)
716+
verify(coordinator, times(1)).updateLastCommittedOffset(7L)
717+
}
718+
}
719+
635720
private def logReadResult(
636721
startOffset: Long,
637722
producerId: Long = RecordBatch.NO_PRODUCER_ID,

0 commit comments

Comments
 (0)