Skip to content

Commit 53d06ee

Browse files
committed
(refact) seperate progress from abstract commit policy
1 parent 9541107 commit 53d06ee

25 files changed

+628
-609
lines changed

src/main/java/cn/leancloud/kafka/consumer/AbstractCommitPolicy.java

Lines changed: 10 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
11
package cn.leancloud.kafka.consumer;
22

33
import org.apache.kafka.clients.consumer.Consumer;
4-
import org.apache.kafka.clients.consumer.ConsumerRecord;
54
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
65
import org.apache.kafka.common.TopicPartition;
76
import org.apache.kafka.common.errors.RetriableException;
87

98
import java.time.Duration;
109
import java.util.*;
1110

12-
import static java.util.stream.Collectors.toSet;
11+
import static java.util.Collections.emptySet;
1312

14-
abstract class AbstractCommitPolicy<K, V> implements CommitPolicy<K, V> {
13+
abstract class AbstractCommitPolicy<K, V> implements CommitPolicy {
1514
static SleepFunction sleepFunction = Thread::sleep;
1615

1716
interface SleepFunction {
@@ -45,116 +44,35 @@ void onError(RetriableException e) {
4544
}
4645

4746
protected final Consumer<K, V> consumer;
48-
private final Map<TopicPartition, Long> topicOffsetHighWaterMark;
49-
private final Map<TopicPartition, CompletedOffsets> completedOffsets;
5047
private final long syncCommitRetryIntervalMs;
5148
private final int maxAttemptsForEachSyncCommit;
5249

5350
AbstractCommitPolicy(Consumer<K, V> consumer, Duration syncCommitRetryInterval, int maxAttemptsForEachSyncCommit) {
5451
this.consumer = consumer;
55-
this.topicOffsetHighWaterMark = new HashMap<>();
56-
this.completedOffsets = new HashMap<>();
5752
this.syncCommitRetryIntervalMs = syncCommitRetryInterval.toMillis();
5853
this.maxAttemptsForEachSyncCommit = maxAttemptsForEachSyncCommit;
5954
}
6055

6156
@Override
62-
public void markPendingRecord(ConsumerRecord<K, V> record) {
63-
final TopicPartition topicPartition = new TopicPartition(record.topic(), record.partition());
64-
topicOffsetHighWaterMark.merge(
65-
topicPartition,
66-
record.offset() + 1,
67-
Math::max);
68-
69-
final CompletedOffsets offset = completedOffsets.get(topicPartition);
70-
// please note that if offset exists, it could happen for record.offset() >= offset.nextOffsetToCommit()
71-
// when there're duplicate records which have lower offset than our next offset to commit consumed from broker
72-
if (offset == null) {
73-
completedOffsets.put(topicPartition, new CompletedOffsets(record.offset() - 1L));
74-
}
75-
}
76-
77-
@Override
78-
public void markCompletedRecord(ConsumerRecord<K, V> record) {
79-
final CompletedOffsets offset = completedOffsets.get(new TopicPartition(record.topic(), record.partition()));
80-
// offset could be null, when the partition of the record was revoked before its processing was done
81-
if (offset != null) {
82-
offset.addCompleteOffset(record.offset());
83-
}
84-
}
85-
86-
@Override
87-
public void revokePartitions(Collection<TopicPartition> partitions) {
88-
clearProcessingRecordStatesFor(partitions);
89-
}
90-
91-
@Override
92-
public Set<TopicPartition> partialCommitSync() {
93-
final Map<TopicPartition, OffsetAndMetadata> offsetsToCommit = completedTopicOffsetsToCommit();
57+
public Set<TopicPartition> partialCommitSync(ProcessRecordsProgress progress) {
58+
final Map<TopicPartition, OffsetAndMetadata> offsetsToCommit = progress.completedOffsetsToCommit();
9459
if (offsetsToCommit.isEmpty()) {
95-
return Collections.emptySet();
60+
return emptySet();
9661
}
9762
commitSyncWithRetry(offsetsToCommit);
98-
updatePartialCommittedOffsets(offsetsToCommit);
63+
progress.updateCommittedOffsets(offsetsToCommit);
9964

100-
return clearProcessingRecordStatesForCompletedPartitions(offsetsToCommit);
65+
return progress.clearCompletedPartitions(offsetsToCommit);
10166
}
10267

103-
Set<TopicPartition> fullCommitSync() {
68+
Set<TopicPartition> fullCommitSync(ProcessRecordsProgress progress) {
10469
commitSyncWithRetry();
10570

106-
final Set<TopicPartition> completePartitions = partitionsForAllRecordsStates();
107-
clearAllProcessingRecordStates();
71+
final Set<TopicPartition> completePartitions = progress.allPartitions();
72+
progress.clearAll();
10873
return completePartitions;
10974
}
11075

111-
@VisibleForTesting
112-
Map<TopicPartition, Long> topicOffsetHighWaterMark() {
113-
return topicOffsetHighWaterMark;
114-
}
115-
116-
Map<TopicPartition, OffsetAndMetadata> completedTopicOffsetsToCommit() {
117-
if (noCompletedOffsets()) {
118-
return Collections.emptyMap();
119-
}
120-
121-
final Map<TopicPartition, OffsetAndMetadata> offsets = new HashMap<>();
122-
for (Map.Entry<TopicPartition, CompletedOffsets> entry : completedOffsets.entrySet()) {
123-
final CompletedOffsets offset = entry.getValue();
124-
if (offset.hasOffsetToCommit()) {
125-
offsets.put(entry.getKey(), offset.getOffsetToCommit());
126-
}
127-
}
128-
129-
return offsets;
130-
}
131-
132-
boolean noTopicOffsetsToCommit() {
133-
if (noCompletedOffsets()) {
134-
return true;
135-
}
136-
137-
for (Map.Entry<TopicPartition, CompletedOffsets> entry : completedOffsets.entrySet()) {
138-
final CompletedOffsets offset = entry.getValue();
139-
if (offset.hasOffsetToCommit()) {
140-
return false;
141-
}
142-
}
143-
144-
return true;
145-
}
146-
147-
void updatePartialCommittedOffsets(Map<TopicPartition, OffsetAndMetadata> offsets) {
148-
for (Map.Entry<TopicPartition, OffsetAndMetadata> entry : offsets.entrySet()) {
149-
final CompletedOffsets offset = completedOffsets.get(entry.getKey());
150-
offset.updateCommittedOffset(entry.getValue().offset());
151-
}
152-
}
153-
154-
boolean noCompletedOffsets() {
155-
return completedOffsets.isEmpty();
156-
}
157-
15876
void commitSyncWithRetry() {
15977
final RetryContext context = context();
16078
do {
@@ -179,50 +97,6 @@ void commitSyncWithRetry(Map<TopicPartition, OffsetAndMetadata> offsets) {
17997
} while (true);
18098
}
18199

182-
Set<TopicPartition> partitionsForAllRecordsStates() {
183-
return new HashSet<>(topicOffsetHighWaterMark.keySet());
184-
}
185-
186-
void clearAllProcessingRecordStates() {
187-
topicOffsetHighWaterMark.clear();
188-
completedOffsets.clear();
189-
}
190-
191-
Set<TopicPartition> clearProcessingRecordStatesForCompletedPartitions(Map<TopicPartition, OffsetAndMetadata> committedOffsets) {
192-
final Set<TopicPartition> partitions = partitionsToSafeResume(committedOffsets);
193-
clearProcessingRecordStatesFor(partitions);
194-
return partitions;
195-
}
196-
197-
void clearProcessingRecordStatesFor(Collection<TopicPartition> partitions) {
198-
for (TopicPartition p : partitions) {
199-
topicOffsetHighWaterMark.remove(p);
200-
completedOffsets.remove(p);
201-
}
202-
}
203-
204-
Set<TopicPartition> partitionsToSafeResume() {
205-
return partitionsToSafeResume(completedTopicOffsetsToCommit());
206-
}
207-
208-
Set<TopicPartition> partitionsToSafeResume(Map<TopicPartition, OffsetAndMetadata> completedOffsets) {
209-
return completedOffsets
210-
.entrySet()
211-
.stream()
212-
.filter(entry -> topicOffsetMeetHighWaterMark(entry.getKey(), entry.getValue()))
213-
.map(Map.Entry::getKey)
214-
.collect(toSet());
215-
}
216-
217-
private boolean topicOffsetMeetHighWaterMark(TopicPartition topicPartition, OffsetAndMetadata offset) {
218-
final Long offsetHighWaterMark = topicOffsetHighWaterMark.get(topicPartition);
219-
if (offsetHighWaterMark != null) {
220-
return offset.offset() >= offsetHighWaterMark;
221-
}
222-
// maybe this partition revoked before a msg of this partition was processed
223-
return true;
224-
}
225-
226100
private RetryContext context() {
227101
return new RetryContext(syncCommitRetryIntervalMs, maxAttemptsForEachSyncCommit);
228102
}

src/main/java/cn/leancloud/kafka/consumer/AbstractRecommitAwareCommitPolicy.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,15 @@ abstract class AbstractRecommitAwareCommitPolicy<K, V> extends AbstractCommitPol
2323
}
2424

2525
@Override
26-
public final Set<TopicPartition> tryCommit(boolean noPendingRecords) {
26+
public final Set<TopicPartition> tryCommit(boolean noPendingRecords, ProcessRecordsProgress progress) {
2727
if (needRecommit()) {
2828
commitSyncWithRetry(offsetsForRecommit());
2929
updateNextRecommitTime();
3030
}
31-
return tryCommit0(noPendingRecords);
31+
return tryCommit0(noPendingRecords, progress);
3232
}
3333

34-
abstract Set<TopicPartition> tryCommit0(boolean noPendingRecords);
34+
abstract Set<TopicPartition> tryCommit0(boolean noPendingRecords, ProcessRecordsProgress progress);
3535

3636
void updateNextRecommitTime() {
3737
updateNextRecommitTime(System.nanoTime());
Lines changed: 16 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,11 @@
11
package cn.leancloud.kafka.consumer;
22

33
import org.apache.kafka.clients.consumer.Consumer;
4-
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
5-
import org.apache.kafka.clients.consumer.OffsetCommitCallback;
64
import org.apache.kafka.common.TopicPartition;
75
import org.slf4j.Logger;
86
import org.slf4j.LoggerFactory;
97

108
import java.time.Duration;
11-
import java.util.Map;
129
import java.util.Set;
1310

1411
import static java.util.Collections.emptySet;
@@ -17,7 +14,6 @@ final class AsyncCommitPolicy<K, V> extends AbstractRecommitAwareCommitPolicy<K,
1714
private static final Logger logger = LoggerFactory.getLogger(AsyncCommitPolicy.class);
1815

1916
private final int maxPendingAsyncCommits;
20-
private final OffsetCommitCallback callback;
2117
private int pendingAsyncCommitCounter;
2218
private boolean forceSync;
2319

@@ -28,19 +24,18 @@ final class AsyncCommitPolicy<K, V> extends AbstractRecommitAwareCommitPolicy<K,
2824
int maxPendingAsyncCommits) {
2925
super(consumer, syncCommitRetryInterval, maxAttemptsForEachSyncCommit, recommitInterval);
3026
this.maxPendingAsyncCommits = maxPendingAsyncCommits;
31-
this.callback = new AsyncCommitCallback();
3227
}
3328

3429
@Override
35-
Set<TopicPartition> tryCommit0(boolean noPendingRecords) {
30+
Set<TopicPartition> tryCommit0(boolean noPendingRecords, ProcessRecordsProgress progress) {
3631
// with forceSync mark it means a previous async commit was failed, so
3732
// we do a sync commit no matter if there's any pending records or completed offsets
38-
if (!forceSync && (!noPendingRecords || noTopicOffsetsToCommit())) {
33+
if (!forceSync && (!noPendingRecords || progress.noOffsetsToCommit())) {
3934
return emptySet();
4035
}
4136

42-
final Set<TopicPartition> partitions = partitionsForAllRecordsStates();
43-
commit();
37+
final Set<TopicPartition> partitions = progress.allPartitions();
38+
commit(progress);
4439

4540
// for our commit policy, no matter syncCommit or asyncCommit we are using, we always
4641
// commit all assigned offsets, so we can update recommit time here safely. And
@@ -60,29 +55,24 @@ boolean forceSync() {
6055
return forceSync;
6156
}
6257

63-
private void commit() {
58+
private void commit(ProcessRecordsProgress progress) {
6459
if (forceSync || pendingAsyncCommitCounter >= maxPendingAsyncCommits) {
6560
commitSyncWithRetry();
6661
pendingAsyncCommitCounter = 0;
6762
forceSync = false;
68-
clearAllProcessingRecordStates();
63+
progress.clearAll();
6964
} else {
7065
++pendingAsyncCommitCounter;
71-
consumer.commitAsync(callback);
72-
}
73-
}
74-
75-
private class AsyncCommitCallback implements OffsetCommitCallback {
76-
@Override
77-
public void onComplete(Map<TopicPartition, OffsetAndMetadata> offsets, Exception exception) {
78-
--pendingAsyncCommitCounter;
79-
assert pendingAsyncCommitCounter >= 0 : "actual: " + pendingAsyncCommitCounter;
80-
if (exception != null) {
81-
logger.warn("Failed to commit offsets: " + offsets + " asynchronously", exception);
82-
forceSync = true;
83-
} else {
84-
clearProcessingRecordStatesForCompletedPartitions(offsets);
85-
}
66+
consumer.commitAsync(((offsets, exception) -> {
67+
--pendingAsyncCommitCounter;
68+
assert pendingAsyncCommitCounter >= 0 : "actual: " + pendingAsyncCommitCounter;
69+
if (exception != null) {
70+
logger.warn("Failed to commit offsets: " + offsets + " asynchronously", exception);
71+
forceSync = true;
72+
} else {
73+
progress.clearCompletedPartitions(offsets);
74+
}
75+
}));
8676
}
8777
}
8878
}

src/main/java/cn/leancloud/kafka/consumer/AutoCommitPolicy.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,18 @@ final class AutoCommitPolicy<K, V> extends AbstractCommitPolicy<K, V> {
1515
}
1616

1717
@Override
18-
public Set<TopicPartition> tryCommit(boolean noPendingRecords) {
19-
if (noTopicOffsetsToCommit()) {
18+
public Set<TopicPartition> tryCommit(boolean noPendingRecords, ProcessRecordsProgress progress) {
19+
if (progress.noOffsetsToCommit()) {
2020
return emptySet();
2121
}
2222

2323
final Set<TopicPartition> partitions;
2424
if (noPendingRecords) {
25-
partitions = partitionsForAllRecordsStates();
26-
clearAllProcessingRecordStates();
25+
partitions = progress.allPartitions();
26+
progress.clearAll();
2727
} else {
28-
partitions = partitionsToSafeResume();
29-
clearProcessingRecordStatesFor(partitions);
28+
partitions = progress.completedPartitions();
29+
progress.clearFor(partitions);
3030
}
3131

3232
return partitions;

src/main/java/cn/leancloud/kafka/consumer/CommitPolicy.java

Lines changed: 3 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,7 @@
66
import java.util.Collection;
77
import java.util.Set;
88

9-
interface CommitPolicy<K, V> {
10-
/**
11-
* Mark an {@link ConsumerRecord} as pending before processing it. So {@link CommitPolicy} can know which and
12-
* how many records we need to process. It is called by {@link Fetcher} when {@code Fetcher} fetched any
13-
* {@link ConsumerRecord}s from Broker.
14-
*
15-
* @param record the {@link ConsumerRecord} need to process
16-
*/
17-
void markPendingRecord(ConsumerRecord<K, V> record);
18-
19-
/**
20-
* Mark an {@link ConsumerRecord} as completed after processing it. So {@link CommitPolicy} can know which and
21-
* how many records we have processed. It is called by {@link Fetcher} when {@code Fetcher} make sure that
22-
* a {@code ConsumerRecord} was processed successfully.
23-
*
24-
* @param record the {@link ConsumerRecord} processed
25-
*/
26-
void markCompletedRecord(ConsumerRecord<K, V> record);
27-
9+
interface CommitPolicy {
2810
/**
2911
* Try commit offset for any {@link TopicPartition}s which has processed {@link ConsumerRecord}s based on the
3012
* intrinsic policy of this {@link CommitPolicy}. This method is called whenever there're any
@@ -35,7 +17,7 @@ interface CommitPolicy<K, V> {
3517
* calculate this value much quicker
3618
* @return those {@link TopicPartition}s which have no pending {@code ConsumerRecord}s
3719
*/
38-
Set<TopicPartition> tryCommit(boolean noPendingRecords);
20+
Set<TopicPartition> tryCommit(boolean noPendingRecords, ProcessRecordsProgress progress);
3921

4022
/**
4123
* Do a dedicated partition commit synchronously which only commit those {@link ConsumerRecord}s that have
@@ -44,12 +26,5 @@ interface CommitPolicy<K, V> {
4426
*
4527
* @return those {@link TopicPartition}s which have no pending {@code ConsumerRecord}s
4628
*/
47-
Set<TopicPartition> partialCommitSync();
48-
49-
/**
50-
* Revoke internal states for some partitions.
51-
*
52-
* @param partitions which was revoked from consumer
53-
*/
54-
void revokePartitions(Collection<TopicPartition> partitions);
29+
Set<TopicPartition> partialCommitSync(ProcessRecordsProgress progress);
5530
}

0 commit comments

Comments
 (0)