Skip to content

Commit 275d39a

Browse files
committed
Remove obsolete offset within range check
1 parent ab5b88d commit 275d39a

File tree

3 files changed

+7
-56
lines changed

3 files changed

+7
-56
lines changed

sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaUnboundedReader.java

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,6 @@ public boolean start() throws IOException {
151151
@Override
152152
public boolean advance() throws IOException {
153153
/* Read first record (if any). we need to loop here because :
154-
* - (a) some records initially need to be skipped if they are before consumedOffset
155154
* - (b) if curBatch is empty, we want to fetch next batch and then advance.
156155
* - (c) curBatch is an iterator of iterators. we interleave the records from each.
157156
* curBatch.next() might return an empty iterator.
@@ -173,19 +172,6 @@ public boolean advance() throws IOException {
173172
elementsReadBySplit.inc();
174173

175174
ConsumerRecord<byte[], byte[]> rawRecord = pState.recordIter.next();
176-
long expected = pState.nextOffset;
177-
long offset = rawRecord.offset();
178-
179-
if (offset < expected) { // -- (a)
180-
// this can happen when compression is enabled in Kafka (seems to be fixed in 0.10)
181-
// should we check if the offset is way off from consumedOffset (say > 1M)?
182-
LOG.warn(
183-
"{}: ignoring already consumed offset {} for {}",
184-
this,
185-
offset,
186-
pState.topicPartition);
187-
continue;
188-
}
189175

190176
// Apply user deserializers. User deserializers might throw, which will be propagated up
191177
// and 'curRecord' remains unchanged. The runner should close this reader.
@@ -212,7 +198,7 @@ public boolean advance() throws IOException {
212198
int recordSize =
213199
(rawRecord.key() == null ? 0 : rawRecord.key().length)
214200
+ (rawRecord.value() == null ? 0 : rawRecord.value().length);
215-
pState.recordConsumed(offset, recordSize);
201+
pState.recordConsumed(rawRecord.offset(), recordSize);
216202
bytesRead.inc(recordSize);
217203
bytesReadBySplit.inc(recordSize);
218204

sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFn.java

Lines changed: 2 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -441,12 +441,9 @@ public ProcessContinuation processElement(
441441
try (Consumer<byte[], byte[]> consumer = consumerFactoryFn.apply(updatedConsumerConfig)) {
442442
ConsumerSpEL.evaluateAssign(
443443
consumer, ImmutableList.of(kafkaSourceDescriptor.getTopicPartition()));
444-
long startOffset = tracker.currentRestriction().getFrom();
445-
long expectedOffset = startOffset;
446-
consumer.seek(kafkaSourceDescriptor.getTopicPartition(), startOffset);
444+
long expectedOffset = tracker.currentRestriction().getFrom();
445+
consumer.seek(kafkaSourceDescriptor.getTopicPartition(), expectedOffset);
447446
ConsumerRecords<byte[], byte[]> rawRecords = ConsumerRecords.empty();
448-
long skippedRecords = 0L;
449-
final Stopwatch sw = Stopwatch.createStarted();
450447

451448
while (true) {
452449
// Fetch the record size accumulator.
@@ -466,36 +463,6 @@ public ProcessContinuation processElement(
466463
return ProcessContinuation.resume();
467464
}
468465
for (ConsumerRecord<byte[], byte[]> rawRecord : rawRecords) {
469-
// If the Kafka consumer returns a record with an offset that is already processed
470-
// the record can be safely skipped. This is needed because there is a possibility
471-
// that the seek() above fails to move the offset to the desired position. In which
472-
// case poll() would return records that are already cnsumed.
473-
if (rawRecord.offset() < startOffset) {
474-
// If the start offset is not reached even after skipping the records for 10 seconds
475-
// then the processing is stopped with a backoff to give the Kakfa server some time
476-
// catch up.
477-
if (sw.elapsed().getSeconds() > 10L) {
478-
LOG.error(
479-
"The expected offset ({}) was not reached even after"
480-
+ " skipping consumed records for 10 seconds. The offset we could"
481-
+ " reach was {}. The processing of this bundle will be attempted"
482-
+ " at a later time.",
483-
expectedOffset,
484-
rawRecord.offset());
485-
return ProcessContinuation.resume()
486-
.withResumeDelay(org.joda.time.Duration.standardSeconds(10L));
487-
}
488-
skippedRecords++;
489-
continue;
490-
}
491-
if (skippedRecords > 0L) {
492-
LOG.warn(
493-
"{} records were skipped due to seek returning an"
494-
+ " earlier position than requested position of {}",
495-
skippedRecords,
496-
expectedOffset);
497-
skippedRecords = 0L;
498-
}
499466
if (!tracker.tryClaim(rawRecord.offset())) {
500467
return ProcessContinuation.stop();
501468
}

sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFnTest.java

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import static org.apache.beam.sdk.transforms.errorhandling.BadRecordRouter.BAD_RECORD_TAG;
2121
import static org.junit.Assert.assertEquals;
22+
import static org.junit.Assert.assertThrows;
2223
import static org.junit.Assert.assertTrue;
2324

2425
import java.nio.charset.StandardCharsets;
@@ -523,12 +524,9 @@ public void testProcessElementWithEarlierOffset() throws Exception {
523524
new OffsetRangeTracker(new OffsetRange(startOffset, startOffset + 3));
524525
KafkaSourceDescriptor descriptor =
525526
KafkaSourceDescriptor.of(topicPartition, null, null, null, null, null);
526-
ProcessContinuation result =
527-
dofnInstanceWithBrokenSeek.processElement(descriptor, tracker, null, receiver);
528-
assertEquals(ProcessContinuation.stop(), result);
529-
assertEquals(
530-
createExpectedRecords(descriptor, startOffset, 3, "key", "value"),
531-
receiver.getGoodRecords());
527+
assertThrows(
528+
IllegalArgumentException.class,
529+
() -> dofnInstanceWithBrokenSeek.processElement(descriptor, tracker, null, receiver));
532530
}
533531

534532
@Test

0 commit comments

Comments
 (0)