1818
1919package org .apache .paimon .flink .sink ;
2020
21+ import org .apache .paimon .Snapshot ;
22+ import org .apache .paimon .catalog .Identifier ;
2123import org .apache .paimon .data .BinaryRow ;
2224import org .apache .paimon .data .GenericRow ;
2325import org .apache .paimon .data .InternalRow ;
2426import org .apache .paimon .flink .FlinkRowData ;
2527import org .apache .paimon .io .DataFileMeta ;
28+ import org .apache .paimon .manifest .ManifestCommittable ;
2629import org .apache .paimon .operation .WriteRestore ;
30+ import org .apache .paimon .schema .Schema ;
2731import org .apache .paimon .table .FileStoreTable ;
2832import org .apache .paimon .table .TableTestBase ;
2933import org .apache .paimon .table .sink .SinkRecord ;
34+ import org .apache .paimon .table .sink .TableCommitImpl ;
35+ import org .apache .paimon .table .sink .TableWriteImpl ;
36+ import org .apache .paimon .table .source .InnerTableRead ;
37+ import org .apache .paimon .table .source .PlanImpl ;
38+ import org .apache .paimon .table .source .StreamDataTableScan ;
39+ import org .apache .paimon .table .source .TableScan ;
40+ import org .apache .paimon .table .system .CompactBucketsTable ;
41+ import org .apache .paimon .types .DataTypes ;
3042import org .apache .paimon .utils .Pair ;
3143import org .apache .paimon .utils .SerializationUtils ;
3244
3345import org .apache .flink .api .common .ExecutionConfig ;
3446import org .apache .flink .api .common .typeutils .TypeSerializer ;
47+ import org .apache .flink .streaming .api .environment .CheckpointConfig ;
3548import org .apache .flink .streaming .runtime .streamrecord .StreamRecord ;
3649import org .apache .flink .streaming .util .OneInputStreamOperatorTestHarness ;
3750import org .apache .flink .table .data .RowData ;
51+ import org .junit .jupiter .api .Test ;
3852import org .junit .jupiter .params .ParameterizedTest ;
3953import org .junit .jupiter .params .provider .ValueSource ;
4054
55+ import javax .annotation .Nullable ;
56+
57+ import java .util .Collections ;
4158import java .util .List ;
59+ import java .util .Map ;
60+ import java .util .UUID ;
4261
4362import static org .assertj .core .api .Assertions .assertThat ;
63+ import static org .assertj .core .api .Assertions .assertThatThrownBy ;
4464
4565/** Test for {@link StoreCompactOperator}. */
4666public class StoreCompactOperatorTest extends TableTestBase {
@@ -86,6 +106,91 @@ public void testCompactExactlyOnce(boolean streamingMode) throws Exception {
86106 assertThat (compactRememberStoreWrite .compactTime ).isEqualTo (3 );
87107 }
88108
109+ @ Test
110+ public void testStreamingCompactConflictWithOverwrite () throws Exception {
111+ Schema schema =
112+ Schema .newBuilder ()
113+ .column ("pt" , DataTypes .INT ())
114+ .column ("a" , DataTypes .INT ())
115+ .column ("b" , DataTypes .INT ())
116+ .partitionKeys ("pt" )
117+ .primaryKey ("pt" , "a" )
118+ .option ("bucket" , "1" )
119+ .build ();
120+ Identifier identifier = identifier ();
121+ catalog .createTable (identifier , schema , false );
122+ FileStoreTable table = (FileStoreTable ) catalog .getTable (identifier );
123+
124+ String writeJobCommitUser = UUID .randomUUID ().toString ();
125+ String compactJobCommitUser = UUID .randomUUID ().toString ();
126+
127+ CompactBucketsTable compactBucketsTable = new CompactBucketsTable (table , true );
128+ StreamDataTableScan scan = compactBucketsTable .newStreamScan ();
129+ InnerTableRead read = compactBucketsTable .newRead ();
130+
131+ CheckpointConfig checkpointConfig = new CheckpointConfig ();
132+ checkpointConfig .setCheckpointInterval (500 );
133+ StoreCompactOperator .Factory operatorFactory =
134+ new StoreCompactOperator .Factory (
135+ table ,
136+ StoreSinkWrite .createWriteProvider (
137+ table , checkpointConfig , true , false , false ),
138+ compactJobCommitUser ,
139+ true );
140+
141+ TypeSerializer <Committable > serializer =
142+ new CommittableTypeInfo ().createSerializer (new ExecutionConfig ());
143+ OneInputStreamOperatorTestHarness <RowData , Committable > harness =
144+ new OneInputStreamOperatorTestHarness <>(operatorFactory );
145+ harness .setup (serializer );
146+ harness .initializeEmptyState ();
147+ harness .open ();
148+ StoreCompactOperator operator = (StoreCompactOperator ) harness .getOperator ();
149+
150+ FileStoreTable writeOnlyTable = table .copy (Collections .singletonMap ("write-only" , "true" ));
151+
152+ // write base data
153+ batchWriteAndCommit (writeOnlyTable , writeJobCommitUser , null , GenericRow .of (1 , 1 , 100 ));
154+ read .createReader (scan .plan ())
155+ .forEachRemaining (
156+ row -> {
157+ try {
158+ harness .processElement (new StreamRecord <>(new FlinkRowData (row )));
159+ } catch (Exception e ) {
160+ throw new RuntimeException (e );
161+ }
162+ });
163+
164+ List <Committable > committables1 = operator .prepareCommit (true , 1 );
165+ commit (table , compactJobCommitUser , committables1 , 1 );
166+ assertThat (table .snapshotManager ().latestSnapshot ().commitKind ())
167+ .isEqualTo (Snapshot .CommitKind .COMPACT );
168+
169+ // overwrite and insert
170+ batchWriteAndCommit (
171+ writeOnlyTable ,
172+ writeJobCommitUser ,
173+ Collections .singletonMap ("pt" , "1" ),
174+ GenericRow .of (1 , 2 , 200 ));
175+ batchWriteAndCommit (writeOnlyTable , writeJobCommitUser , null , GenericRow .of (1 , 3 , 300 ));
176+ assertThat (table .snapshotManager ().latestSnapshot ().id ()).isEqualTo (4 );
177+ TableScan .Plan plan = scan .plan ();
178+ assertThat (((PlanImpl ) plan ).snapshotId ()).isEqualTo (4 );
179+ read .createReader (plan )
180+ .forEachRemaining (
181+ row -> {
182+ try {
183+ harness .processElement (new StreamRecord <>(new FlinkRowData (row )));
184+ } catch (Exception e ) {
185+ throw new RuntimeException (e );
186+ }
187+ });
188+
189+ List <Committable > committables2 = operator .prepareCommit (true , 2 );
190+ assertThatThrownBy (() -> commit (table , compactJobCommitUser , committables2 , 2 ))
191+ .hasMessageContaining ("File deletion conflicts detected! Give up committing." );
192+ }
193+
89194 private RowData data (int bucket ) {
90195 GenericRow genericRow =
91196 GenericRow .of (
@@ -96,6 +201,40 @@ private RowData data(int bucket) {
96201 return new FlinkRowData (genericRow );
97202 }
98203
204+ private void batchWriteAndCommit (
205+ FileStoreTable table ,
206+ String commitUser ,
207+ @ Nullable Map <String , String > overwritePartition ,
208+ InternalRow ... rows )
209+ throws Exception {
210+ try (TableWriteImpl <?> write = table .newWrite (commitUser );
211+ TableCommitImpl commit =
212+ table .newCommit (commitUser ).withOverwrite (overwritePartition )) {
213+ for (InternalRow row : rows ) {
214+ write .write (row );
215+ }
216+ commit .commit (write .prepareCommit ());
217+ }
218+ }
219+
220+ private void commit (
221+ FileStoreTable table ,
222+ String commitUser ,
223+ List <Committable > committables ,
224+ long checkpointId )
225+ throws Exception {
226+ try (TableCommitImpl commit = table .newCommit (commitUser )) {
227+ StoreCommitter committer =
228+ new StoreCommitter (
229+ table ,
230+ commit ,
231+ Committer .createContext (commitUser , null , true , false , null , 1 , 1 ));
232+ ManifestCommittable manifestCommittable =
233+ committer .combine (checkpointId , System .currentTimeMillis (), committables );
234+ committer .commit (Collections .singletonList (manifestCommittable ));
235+ }
236+ }
237+
99238 private static class CompactRememberStoreWrite implements StoreSinkWrite {
100239
101240 private final boolean streamingMode ;
0 commit comments