apache
diff --git a/‎flink-cdc-connect/flink-cdc-source-connectors/flink-cdc-base/src/main/java/org/apache/flink/cdc/connectors/base/source/reader/IncrementalSourceSplitReader.java‎
Lines changed: 20 additions & 25 deletions b/‎flink-cdc-connect/flink-cdc-source-connectors/flink-cdc-base/src/main/java/org/apache/flink/cdc/connectors/base/source/reader/IncrementalSourceSplitReader.java‎
Lines changed: 20 additions & 25 deletions
diff --git a/‎flink-cdc-connect/flink-cdc-source-connectors/flink-cdc-base/src/main/java/org/apache/flink/cdc/connectors/base/source/reader/external/IncrementalSourceScanFetcher.java‎
Lines changed: 74 additions & 50 deletions b/‎flink-cdc-connect/flink-cdc-source-connectors/flink-cdc-base/src/main/java/org/apache/flink/cdc/connectors/base/source/reader/external/IncrementalSourceScanFetcher.java‎
Lines changed: 74 additions & 50 deletions
diff --git a/‎flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mongodb-cdc/src/test/java/org/apache/flink/cdc/connectors/mongodb/source/MongoDBFullChangelogITCase.java‎
Lines changed: 9 additions & 0 deletions b/‎flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mongodb-cdc/src/test/java/org/apache/flink/cdc/connectors/mongodb/source/MongoDBFullChangelogITCase.java‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mongodb-cdc/src/test/java/org/apache/flink/cdc/connectors/mongodb/source/NewlyAddedTableITCase.java‎
Lines changed: 2 additions & 0 deletions b/‎flink-cdc-connect/flink-cdc-source-connectors/flink-connector-mongodb-cdc/src/test/java/org/apache/flink/cdc/connectors/mongodb/source/NewlyAddedTableITCase.java‎
Lines changed: 2 additions & 0 deletions
@@ -45,6 +45,7 @@
 
 import java.io.IOException;
 import java.util.ArrayDeque;
+import java.util.Collections;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Set;
@@ -92,7 +93,6 @@ public IncrementalSourceSplitReader(
 
     @Override
     public RecordsWithSplitIds<SourceRecords> fetch() throws IOException {
-
         try {
             suspendStreamReaderIfNeed();
             return pollSplitRecords();
@@ -145,13 +145,13 @@ private ChangeEventRecords pollSplitRecords() throws InterruptedException {
         Iterator<SourceRecords> dataIt = null;
         if (currentFetcher == null) {
             // (1) Reads stream split firstly and then read snapshot split
-            if (streamSplits.size() > 0) {
+            if (!streamSplits.isEmpty()) {
                 // the stream split may come from:
                 // (a) the initial stream split
                 // (b) added back stream-split in newly added table process
                 StreamSplit nextSplit = streamSplits.poll();
                 submitStreamSplit(nextSplit);
-            } else if (snapshotSplits.size() > 0) {
+            } else if (!snapshotSplits.isEmpty()) {
                 submitSnapshotSplit(snapshotSplits.poll());
             } else {
                 LOG.info("No available split to read.");
@@ -162,19 +162,21 @@ private ChangeEventRecords pollSplitRecords() throws InterruptedException {
             } else {
                 currentSplitId = null;
             }
-            return dataIt == null ? finishedSplit() : forRecords(dataIt);
+            return dataIt == null ? finishedSplit(true) : forUnfinishedRecords(dataIt);
         } else if (currentFetcher instanceof IncrementalSourceScanFetcher) {
-            // (2) try to switch to stream split reading util current snapshot split finished
             dataIt = currentFetcher.pollSplitRecords();
             if (dataIt != null) {
                 // first fetch data of snapshot split, return and emit the records of snapshot split
-                ChangeEventRecords records;
+                return forUnfinishedRecords(dataIt);
+            } else {
+                // (2) try to switch to stream split reading util current snapshot split finished
+                ChangeEventRecords finishedRecords;
                 if (context.isHasAssignedStreamSplit()) {
-                    records = forNewAddedTableFinishedSplit(currentSplitId, dataIt);
+                    finishedRecords = forNewAddedTableFinishedSplit(currentSplitId);
                     closeScanFetcher();
                     closeStreamFetcher();
                 } else {
-                    records = forRecords(dataIt);
+                    finishedRecords = finishedSplit(false);
                     SnapshotSplit nextSplit = snapshotSplits.poll();
                     if (nextSplit != null) {
                         checkState(reusedScanFetcher != null);
@@ -183,9 +185,7 @@ private ChangeEventRecords pollSplitRecords() throws InterruptedException {
                         closeScanFetcher();
                     }
                 }
-                return records;
-            } else {
-                return finishedSplit();
+                return finishedRecords;
             }
         } else if (currentFetcher instanceof IncrementalSourceStreamFetcher) {
             // (3) switch to snapshot split reading if there are newly added snapshot splits
@@ -203,7 +203,7 @@ private ChangeEventRecords pollSplitRecords() throws InterruptedException {
                 // null will be returned after receiving suspend stream event
                 // finish current stream split reading
                 closeStreamFetcher();
-                return finishedSplit();
+                return finishedSplit(true);
             }
         } else {
             throw new IllegalStateException("Unsupported reader type.");
@@ -215,9 +215,12 @@ public boolean canAssignNextSplit() {
         return currentFetcher == null || currentFetcher.isFinished();
     }
 
-    private ChangeEventRecords finishedSplit() {
+    private ChangeEventRecords finishedSplit(boolean recycleScanFetcher) {
         final ChangeEventRecords finishedRecords =
                 ChangeEventRecords.forFinishedSplit(currentSplitId);
+        if (recycleScanFetcher) {
+            closeScanFetcher();
+        }
         currentSplitId = null;
         return finishedRecords;
     }
@@ -226,24 +229,16 @@ private ChangeEventRecords finishedSplit() {
      * Finishes new added snapshot split, mark the stream split as finished too, we will add the
      * stream split back in {@code MySqlSourceReader}.
      */
-    private ChangeEventRecords forNewAddedTableFinishedSplit(
-            final String splitId, final Iterator<SourceRecords> recordsForSplit) {
+    private ChangeEventRecords forNewAddedTableFinishedSplit(final String splitId) {
         final Set<String> finishedSplits = new HashSet<>();
         finishedSplits.add(splitId);
         finishedSplits.add(STREAM_SPLIT_ID);
         currentSplitId = null;
-        return new ChangeEventRecords(splitId, recordsForSplit, finishedSplits);
+        return new ChangeEventRecords(splitId, Collections.emptyIterator(), finishedSplits);
     }
 
-    private ChangeEventRecords forRecords(Iterator<SourceRecords> dataIt) {
-        if (currentFetcher instanceof IncrementalSourceScanFetcher) {
-            final ChangeEventRecords finishedRecords =
-                    ChangeEventRecords.forSnapshotRecords(currentSplitId, dataIt);
-            closeScanFetcher();
-            return finishedRecords;
-        } else {
-            return ChangeEventRecords.forRecords(currentSplitId, dataIt);
-        }
+    private ChangeEventRecords forUnfinishedRecords(Iterator<SourceRecords> dataIt) {
+        return ChangeEventRecords.forRecords(currentSplitId, dataIt);
     }
 
     private void submitSnapshotSplit(SnapshotSplit snapshotSplit) {
 
@@ -35,6 +35,7 @@
 import javax.annotation.Nullable;
 
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
@@ -115,64 +116,87 @@ public Iterator<SourceRecords> pollSplitRecords() throws InterruptedException {
         checkReadException();
 
         if (hasNextElement.get()) {
-            // eg:
-            // data input: [low watermark event][snapshot events][high watermark event][change
-            // events][end watermark event]
-            // data output: [low watermark event][normalized events][high watermark event]
-            boolean reachChangeLogStart = false;
-            boolean reachChangeLogEnd = false;
-            SourceRecord lowWatermark = null;
-            SourceRecord highWatermark = null;
-            Map<Struct, SourceRecord> outputBuffer = new HashMap<>();
-            while (!reachChangeLogEnd) {
-                checkReadException();
-                List<DataChangeEvent> batch = queue.poll();
-                for (DataChangeEvent event : batch) {
-                    SourceRecord record = event.getRecord();
-                    if (lowWatermark == null) {
-                        lowWatermark = record;
-                        assertLowWatermark(lowWatermark);
-                        continue;
-                    }
+            if (taskContext.getSourceConfig().isSkipSnapshotBackfill()) {
+                return pollWithoutBuffer();
+            } else {
+                return pollWithBuffer();
+            }
+        }
+        // the data has been polled, no more data
+        reachEnd.compareAndSet(false, true);
+        return null;
+    }
 
-                    if (highWatermark == null && isHighWatermarkEvent(record)) {
-                        highWatermark = record;
-                        // snapshot events capture end and begin to capture stream events
-                        reachChangeLogStart = true;
-                        continue;
-                    }
+    public Iterator<SourceRecords> pollWithoutBuffer() throws InterruptedException {
+        checkReadException();
+        List<DataChangeEvent> batch = queue.poll();
+        final List<SourceRecord> records = new ArrayList<>();
+        for (DataChangeEvent event : batch) {
+            if (isEndWatermarkEvent(event.getRecord())) {
+                hasNextElement.set(false);
+                break;
+            }
+            records.add(event.getRecord());
+        }
 
-                    if (reachChangeLogStart && isEndWatermarkEvent(record)) {
-                        // capture to end watermark events, stop the loop
-                        reachChangeLogEnd = true;
-                        break;
-                    }
+        return Collections.singletonList(new SourceRecords(records)).iterator();
+    }
+
+    public Iterator<SourceRecords> pollWithBuffer() throws InterruptedException {
+        // eg:
+        // data input: [low watermark event][snapshot events][high watermark event][change
+        // events][end watermark event]
+        // data output: [low watermark event][normalized events][high watermark event]
+        boolean reachChangeLogStart = false;
+        boolean reachChangeLogEnd = false;
+        SourceRecord lowWatermark = null;
+        SourceRecord highWatermark = null;
+        Map<Struct, SourceRecord> outputBuffer = new HashMap<>();
+        while (!reachChangeLogEnd) {
+            checkReadException();
+            List<DataChangeEvent> batch = queue.poll();
+            for (DataChangeEvent event : batch) {
+                SourceRecord record = event.getRecord();
+                if (lowWatermark == null) {
+                    lowWatermark = record;
+                    assertLowWatermark(lowWatermark);
+                    continue;
+                }
+
+                if (highWatermark == null && isHighWatermarkEvent(record)) {
+                    highWatermark = record;
+                    // snapshot events capture end and begin to capture stream events
+                    reachChangeLogStart = true;
+                    continue;
+                }
+
+                if (reachChangeLogStart && isEndWatermarkEvent(record)) {
+                    // capture to end watermark events, stop the loop
+                    reachChangeLogEnd = true;
+                    break;
+                }
 
-                    if (!reachChangeLogStart) {
-                        outputBuffer.put((Struct) record.key(), record);
-                    } else {
-                        if (isChangeRecordInChunkRange(record)) {
-                            // rewrite overlapping snapshot records through the record key
-                            taskContext.rewriteOutputBuffer(outputBuffer, record);
-                        }
+                if (!reachChangeLogStart) {
+                    outputBuffer.put((Struct) record.key(), record);
+                } else {
+                    if (isChangeRecordInChunkRange(record)) {
+                        // rewrite overlapping snapshot records through the record key
+                        taskContext.rewriteOutputBuffer(outputBuffer, record);
                     }
                 }
             }
-            // snapshot split return its data once
-            hasNextElement.set(false);
+        }
+        // snapshot split return its data once
+        hasNextElement.set(false);
 
-            final List<SourceRecord> normalizedRecords = new ArrayList<>();
-            normalizedRecords.add(lowWatermark);
-            normalizedRecords.addAll(taskContext.formatMessageTimestamp(outputBuffer.values()));
-            normalizedRecords.add(highWatermark);
+        final List<SourceRecord> normalizedRecords = new ArrayList<>();
+        normalizedRecords.add(lowWatermark);
+        normalizedRecords.addAll(taskContext.formatMessageTimestamp(outputBuffer.values()));
+        normalizedRecords.add(highWatermark);
 
-            final List<SourceRecords> sourceRecordsSet = new ArrayList<>();
-            sourceRecordsSet.add(new SourceRecords(normalizedRecords));
-            return sourceRecordsSet.iterator();
-        }
-        // the data has been polled, no more data
-        reachEnd.compareAndSet(false, true);
-        return null;
+        final List<SourceRecords> sourceRecordsSet = new ArrayList<>();
+        sourceRecordsSet.add(new SourceRecords(normalizedRecords));
+        return sourceRecordsSet.iterator();
     }
 
     private void checkReadException() {
 
@@ -517,6 +517,15 @@ private List<String> testBackfillWhenWritingEvents(
                     mongoCollection.updateOne(
                             Filters.eq("cid", 2000L), Updates.set("address", "Pittsburgh"));
                     mongoCollection.deleteOne(Filters.eq("cid", 1019L));
+
+                    // Rarely happens, but if there's no operation or heartbeat events between
+                    // watermark #a (the ChangeStream opLog caused by the last event in this hook)
+                    // and watermark #b (the calculated high watermark that limits the bounded
+                    // back-filling stream fetch task), the last event of hook will be missed since
+                    // back-filling task reads between [loW, hiW) (high watermark not included).
+                    // Workaround: insert a dummy event in another collection to forcefully push
+                    // opLog forward.
+                    database.getCollection("customers_1").insertOne(new Document());
                 };
 
         switch (hookType) {
 
@@ -786,6 +786,8 @@ private void testNewlyAddedCollectionOneByOne(
             waitForUpsertSinkSize("sink", fetchedDataList.size());
             MongoDBAssertUtils.assertEqualsInAnyOrder(
                     fetchedDataList, TestValuesTableFactory.getResultsAsStrings("sink"));
+            // Wait 1s until snapshot phase finished, make sure the binlog data is not lost.
+            Thread.sleep(1000L);
 
             // step 3: make some changelog data for this round
             makeFirstPartOplogForAddressCollection(