2828import org .apache .paimon .io .DataOutputViewStreamWrapper ;
2929import org .apache .paimon .manifest .FileSource ;
3030import org .apache .paimon .stats .SimpleStats ;
31+ import org .apache .paimon .stats .SimpleStatsEvolutions ;
32+ import org .apache .paimon .types .BigIntType ;
33+ import org .apache .paimon .types .DataField ;
34+ import org .apache .paimon .types .DoubleType ;
35+ import org .apache .paimon .types .FloatType ;
36+ import org .apache .paimon .types .IntType ;
37+ import org .apache .paimon .types .SmallIntType ;
38+ import org .apache .paimon .types .TimestampType ;
3139import org .apache .paimon .utils .IOUtils ;
3240import org .apache .paimon .utils .InstantiationUtil ;
3341
3442import org .junit .jupiter .api .Test ;
3543
44+ import javax .annotation .Nullable ;
45+
3646import java .io .ByteArrayOutputStream ;
3747import java .io .IOException ;
3848import java .time .LocalDateTime ;
3949import java .util .ArrayList ;
4050import java .util .Arrays ;
4151import java .util .Collections ;
52+ import java .util .HashMap ;
4253import java .util .List ;
54+ import java .util .Map ;
4355import java .util .concurrent .ThreadLocalRandom ;
4456
4557import static org .apache .paimon .data .BinaryArray .fromLongArray ;
@@ -84,6 +96,70 @@ public void testSplitMergedRowCount() {
8496 assertThat (split .mergedRowCount ()).isEqualTo (5700L );
8597 }
8698
99+ @ Test
100+ public void testSplitMinMaxValue () {
101+ Map <Long , List <DataField >> schemas = new HashMap <>();
102+
103+ Timestamp minTs = Timestamp .fromLocalDateTime (LocalDateTime .parse ("2025-01-01T00:00:00" ));
104+ Timestamp maxTs1 = Timestamp .fromLocalDateTime (LocalDateTime .parse ("2025-03-01T00:00:00" ));
105+ Timestamp maxTs2 = Timestamp .fromLocalDateTime (LocalDateTime .parse ("2025-03-12T00:00:00" ));
106+ BinaryRow min1 = newBinaryRow (new Object [] {10 , 123L , 888.0D , minTs });
107+ BinaryRow max1 = newBinaryRow (new Object [] {99 , 456L , 999.0D , maxTs1 });
108+ SimpleStats valueStats1 = new SimpleStats (min1 , max1 , fromLongArray (new Long [] {0L }));
109+
110+ BinaryRow min2 = newBinaryRow (new Object [] {5 , 0L , 777.0D , minTs });
111+ BinaryRow max2 = newBinaryRow (new Object [] {90 , 789L , 899.0D , maxTs2 });
112+ SimpleStats valueStats2 = new SimpleStats (min2 , max2 , fromLongArray (new Long [] {0L }));
113+
114+ // test the common case.
115+ DataFileMeta d1 = newDataFile (100 , valueStats1 , null );
116+ DataFileMeta d2 = newDataFile (100 , valueStats2 , null );
117+ DataSplit split1 = newDataSplit (true , Arrays .asList (d1 , d2 ), null );
118+
119+ DataField intField = new DataField (0 , "c_int" , new IntType ());
120+ DataField longField = new DataField (1 , "c_long" , new BigIntType ());
121+ DataField doubleField = new DataField (2 , "c_double" , new DoubleType ());
122+ DataField tsField = new DataField (3 , "c_ts" , new TimestampType ());
123+ schemas .put (1L , Arrays .asList (intField , longField , doubleField , tsField ));
124+
125+ SimpleStatsEvolutions evolutions = new SimpleStatsEvolutions (schemas ::get , 1 );
126+ assertThat (split1 .minValue (0 , intField , evolutions )).isEqualTo (5 );
127+ assertThat (split1 .maxValue (0 , intField , evolutions )).isEqualTo (99 );
128+ assertThat (split1 .minValue (1 , longField , evolutions )).isEqualTo (0L );
129+ assertThat (split1 .maxValue (1 , longField , evolutions )).isEqualTo (789L );
130+ assertThat (split1 .minValue (2 , doubleField , evolutions )).isEqualTo (777D );
131+ assertThat (split1 .maxValue (2 , doubleField , evolutions )).isEqualTo (999D );
132+ assertThat (split1 .minValue (3 , tsField , evolutions )).isEqualTo (minTs );
133+ assertThat (split1 .maxValue (3 , tsField , evolutions )).isEqualTo (maxTs2 );
134+
135+ // test the case which provide non-null valueStatsCol and there are different between file
136+ // schema and table schema.
137+ BinaryRow min3 = newBinaryRow (new Object [] {10 , 123L , minTs });
138+ BinaryRow max3 = newBinaryRow (new Object [] {99 , 456L , maxTs1 });
139+ SimpleStats valueStats3 = new SimpleStats (min3 , max3 , fromLongArray (new Long [] {0L }));
140+ BinaryRow min4 = newBinaryRow (new Object [] {5 , 0L , minTs });
141+ BinaryRow max4 = newBinaryRow (new Object [] {90 , 789L , maxTs2 });
142+ SimpleStats valueStats4 = new SimpleStats (min4 , max4 , fromLongArray (new Long [] {0L }));
143+ List <String > valueStatsCols2 = Arrays .asList ("c_int" , "c_long" , "c_ts" );
144+ DataFileMeta d3 = newDataFile (100 , valueStats3 , valueStatsCols2 );
145+ DataFileMeta d4 = newDataFile (100 , valueStats4 , valueStatsCols2 );
146+ DataSplit split2 = newDataSplit (true , Arrays .asList (d3 , d4 ), null );
147+
148+ DataField smallField = new DataField (4 , "c_small" , new SmallIntType ());
149+ DataField floatField = new DataField (5 , "c_float" , new FloatType ());
150+ schemas .put (2L , Arrays .asList (intField , smallField , tsField , floatField ));
151+
152+ evolutions = new SimpleStatsEvolutions (schemas ::get , 2 );
153+ assertThat (split2 .minValue (0 , intField , evolutions )).isEqualTo (5 );
154+ assertThat (split2 .maxValue (0 , intField , evolutions )).isEqualTo (99 );
155+ assertThat (split2 .minValue (1 , smallField , evolutions )).isEqualTo (null );
156+ assertThat (split2 .maxValue (1 , smallField , evolutions )).isEqualTo (null );
157+ assertThat (split2 .minValue (2 , tsField , evolutions )).isEqualTo (minTs );
158+ assertThat (split2 .maxValue (2 , tsField , evolutions )).isEqualTo (maxTs2 );
159+ assertThat (split2 .minValue (3 , floatField , evolutions )).isEqualTo (null );
160+ assertThat (split2 .maxValue (3 , floatField , evolutions )).isEqualTo (null );
161+ }
162+
87163 @ Test
88164 public void testSerializer () throws IOException {
89165 DataFileTestDataGenerator gen = DataFileTestDataGenerator .builder ().build ();
@@ -436,18 +512,23 @@ public void testSerializerCompatibleV5() throws Exception {
436512 }
437513
438514 private DataFileMeta newDataFile (long rowCount ) {
515+ return newDataFile (rowCount , null , null );
516+ }
517+
518+ private DataFileMeta newDataFile (
519+ long rowCount , SimpleStats rowStats , @ Nullable List <String > valueStatsCols ) {
439520 return DataFileMeta .forAppend (
440521 "my_data_file.parquet" ,
441522 1024 * 1024 ,
442523 rowCount ,
443- null ,
524+ rowStats ,
444525 0L ,
445- rowCount ,
526+ rowCount - 1 ,
446527 1 ,
447528 Collections .emptyList (),
448529 null ,
449530 null ,
450- null ,
531+ valueStatsCols ,
451532 null );
452533 }
453534
@@ -467,4 +548,27 @@ private DataSplit newDataSplit(
467548 }
468549 return builder .build ();
469550 }
551+
552+ private BinaryRow newBinaryRow (Object [] objs ) {
553+ BinaryRow row = new BinaryRow (objs .length );
554+ BinaryRowWriter writer = new BinaryRowWriter (row );
555+ writer .reset ();
556+ for (int i = 0 ; i < objs .length ; i ++) {
557+ if (objs [i ] instanceof Integer ) {
558+ writer .writeInt (i , (Integer ) objs [i ]);
559+ } else if (objs [i ] instanceof Long ) {
560+ writer .writeLong (i , (Long ) objs [i ]);
561+ } else if (objs [i ] instanceof Float ) {
562+ writer .writeFloat (i , (Float ) objs [i ]);
563+ } else if (objs [i ] instanceof Double ) {
564+ writer .writeDouble (i , (Double ) objs [i ]);
565+ } else if (objs [i ] instanceof Timestamp ) {
566+ writer .writeTimestamp (i , (Timestamp ) objs [i ], 5 );
567+ } else {
568+ throw new UnsupportedOperationException ("It's not supported." );
569+ }
570+ }
571+ writer .complete ();
572+ return row ;
573+ }
470574}
0 commit comments