1717
1818import static org .junit .jupiter .api .Assertions .*;
1919
20+ import java .util .Arrays ;
21+ import java .util .List ;
22+ import java .util .stream .Collectors ;
2023import org .apache .spark .sql .delta .DeltaParquetFileFormat ;
24+ import org .apache .spark .sql .execution .datasources .parquet .ParquetFileFormat ;
2125import org .apache .spark .sql .types .DataTypes ;
2226import org .apache .spark .sql .types .StructType ;
2327import org .junit .jupiter .api .Test ;
28+ import org .junit .jupiter .params .ParameterizedTest ;
29+ import org .junit .jupiter .params .provider .CsvSource ;
2430
2531public class DeletionVectorSchemaContextTest {
2632
@@ -30,27 +36,64 @@ public class DeletionVectorSchemaContextTest {
3036 private static final StructType PARTITION_SCHEMA =
3137 new StructType ().add ("date" , DataTypes .StringType );
3238
39+ @ ParameterizedTest (name = "useMetadataRowIndex={0}" )
40+ @ CsvSource ({"false, 3, 2" , "true, 4, 3" })
41+ void testSchemaWithDvColumn (
42+ boolean useMetadataRowIndex , int expectedFieldCount , int expectedDvIndex ) {
43+ DeletionVectorSchemaContext context =
44+ new DeletionVectorSchemaContext (DATA_SCHEMA , PARTITION_SCHEMA , useMetadataRowIndex );
45+
46+ StructType schemaWithDv = context .getSchemaWithDvColumn ();
47+ assertEquals (expectedFieldCount , schemaWithDv .fields ().length );
48+ assertEquals ("id" , schemaWithDv .fields ()[0 ].name ());
49+ assertEquals ("name" , schemaWithDv .fields ()[1 ].name ());
50+
51+ if (useMetadataRowIndex ) {
52+ assertEquals (
53+ ParquetFileFormat .ROW_INDEX_TEMPORARY_COLUMN_NAME (), schemaWithDv .fields ()[2 ].name ());
54+ }
55+ assertEquals (
56+ DeltaParquetFileFormat .IS_ROW_DELETED_COLUMN_NAME (),
57+ schemaWithDv .fields ()[expectedDvIndex ].name ());
58+ }
59+
60+ @ ParameterizedTest (name = "useMetadataRowIndex={0}" )
61+ @ CsvSource ({"false, 4" , "true, 5" })
62+ void testInputColumnCount (boolean useMetadataRowIndex , int expectedCount ) {
63+ DeletionVectorSchemaContext context =
64+ new DeletionVectorSchemaContext (DATA_SCHEMA , PARTITION_SCHEMA , useMetadataRowIndex );
65+ assertEquals (expectedCount , context .getInputColumnCount ());
66+ }
67+
68+ @ ParameterizedTest (name = "useMetadataRowIndex={0}" )
69+ @ CsvSource ({"false, '0,1,3'" , "true, '0,1,4'" })
70+ void testOutputColumnOrdinals (boolean useMetadataRowIndex , String expectedOrdinalsStr ) {
71+ DeletionVectorSchemaContext context =
72+ new DeletionVectorSchemaContext (DATA_SCHEMA , PARTITION_SCHEMA , useMetadataRowIndex );
73+
74+ List <Integer > expected =
75+ Arrays .stream (expectedOrdinalsStr .split ("," ))
76+ .map (String ::trim )
77+ .map (Integer ::parseInt )
78+ .collect (Collectors .toList ());
79+ assertEquals (expected , context .getOutputColumnOrdinalsAsList ());
80+ }
81+
3382 @ Test
34- void testWithFullSchemas () {
83+ void testOutputSchema () {
3584 DeletionVectorSchemaContext context =
36- new DeletionVectorSchemaContext (DATA_SCHEMA , PARTITION_SCHEMA );
85+ new DeletionVectorSchemaContext (DATA_SCHEMA , PARTITION_SCHEMA , /* useMetadataRowIndex= */ false );
3786
38- StructType expectedSchemaWithDv =
39- DATA_SCHEMA .add (DeltaParquetFileFormat .IS_ROW_DELETED_STRUCT_FIELD ());
40- assertEquals (expectedSchemaWithDv , context .getSchemaWithDvColumn ());
41- assertEquals (2 , context .getDvColumnIndex ());
42- // Input: 2 data + 1 DV + 1 partition = 4.
43- assertEquals (4 , context .getInputColumnCount ());
44- StructType expectedOutputSchema =
87+ StructType expectedSchema =
4588 DATA_SCHEMA .merge (PARTITION_SCHEMA , /* handleDuplicateColumns= */ false );
46- assertEquals (expectedOutputSchema , context .getOutputSchema ());
89+ assertEquals (expectedSchema , context .getOutputSchema ());
4790 }
4891
4992 @ Test
5093 void testEmptyPartitionSchema () {
51- StructType emptyPartition = new StructType ();
94+ StructType emptyPartitionSchema = new StructType ();
5295 DeletionVectorSchemaContext context =
53- new DeletionVectorSchemaContext (DATA_SCHEMA , emptyPartition );
96+ new DeletionVectorSchemaContext (DATA_SCHEMA , emptyPartitionSchema , /* useMetadataRowIndex= */ false );
5497
5598 StructType expectedSchemaWithDv =
5699 DATA_SCHEMA .add (DeltaParquetFileFormat .IS_ROW_DELETED_STRUCT_FIELD ());
@@ -63,12 +106,12 @@ void testEmptyPartitionSchema() {
63106
64107 @ Test
65108 void testEmptyDataSchema () {
66- StructType emptyData = new StructType ();
109+ StructType emptyDataSchema = new StructType ();
67110 DeletionVectorSchemaContext context =
68- new DeletionVectorSchemaContext (emptyData , PARTITION_SCHEMA );
111+ new DeletionVectorSchemaContext (emptyDataSchema , PARTITION_SCHEMA , /* useMetadataRowIndex= */ false );
69112
70113 StructType expectedSchemaWithDv =
71- emptyData .add (DeltaParquetFileFormat .IS_ROW_DELETED_STRUCT_FIELD ());
114+ emptyDataSchema .add (DeltaParquetFileFormat .IS_ROW_DELETED_STRUCT_FIELD ());
72115 assertEquals (expectedSchemaWithDv , context .getSchemaWithDvColumn ());
73116 assertEquals (0 , context .getDvColumnIndex ());
74117 // Input: 1 DV + 1 partition = 2.
@@ -87,7 +130,9 @@ void testDuplicateDvColumnThrowsException() {
87130 IllegalArgumentException exception =
88131 assertThrows (
89132 IllegalArgumentException .class ,
90- () -> new DeletionVectorSchemaContext (schemaWithDv , new StructType ()));
133+ () ->
134+ new DeletionVectorSchemaContext (
135+ schemaWithDv , new StructType (), /* useMetadataRowIndex= */ false ));
91136
92137 assertTrue (
93138 exception .getMessage ().contains (DeltaParquetFileFormat .IS_ROW_DELETED_COLUMN_NAME ()));
0 commit comments