[Kernel-Spark] Phase 3: Enable file splitting for DV tables with _metadata.row_index support

huan233usc · huan233usc · commit b6b2d6c85c63 · 2026-02-05T18:30:36.000Z
diff --git a/spark/v2/src/main/java/io/delta/spark/internal/v2/read/deletionvector/DeletionVectorSchemaContext.java b/spark/v2/src/main/java/io/delta/spark/internal/v2/read/deletionvector/DeletionVectorSchemaContext.java
@@ -16,8 +16,11 @@
 package io.delta.spark.internal.v2.read.deletionvector;
 
 import java.io.Serializable;
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.List;
 import org.apache.spark.sql.delta.DeltaParquetFileFormat;
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat;
 import org.apache.spark.sql.types.StructType;
 import scala.collection.immutable.Seq;
 
@@ -35,15 +38,18 @@ public class DeletionVectorSchemaContext implements Serializable {
   private final int inputColumnCount;
   private final StructType outputSchema;
   private final Seq<Object> outputColumnOrdinals;
+  private final List<Integer> outputColumnOrdinalsList;
 
   /**
    * Create a DV schema context for encapsulating schema info and indices needed for DV filtering.
    *
    * @param readDataSchema original data schema without DV column
    * @param partitionSchema partition columns schema
+   * @param useMetadataRowIndex whether to include _metadata.row_index for file splitting support
    * @throws IllegalArgumentException if readDataSchema already contains the DV column
    */
-  public DeletionVectorSchemaContext(StructType readDataSchema, StructType partitionSchema) {
+  public DeletionVectorSchemaContext(
+      StructType readDataSchema, StructType partitionSchema, boolean useMetadataRowIndex) {
     // Validate that readDataSchema doesn't already contain the DV column to ensure the DV column
     // is added only once. While Delta uses the "__delta_internal_" prefix as a naming convention
     // for internal columns (listed in DeltaColumnMapping.DELTA_INTERNAL_COLUMNS), there's no
@@ -54,21 +60,47 @@ public DeletionVectorSchemaContext(StructType readDataSchema, StructType partiti
       throw new IllegalArgumentException(
           "readDataSchema already contains the deletion vector column: " + dvColumnName);
     }
+
+    // Build schema: data columns + (optional row_index) + DV column
+    StructType schemaBuilder = readDataSchema;
+    if (useMetadataRowIndex) {
+      schemaBuilder = schemaBuilder.add(ParquetFileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME(), "long");
+    }
     this.schemaWithDvColumn =
-        readDataSchema.add(DeltaParquetFileFormat.IS_ROW_DELETED_STRUCT_FIELD());
+        schemaBuilder.add(DeltaParquetFileFormat.IS_ROW_DELETED_STRUCT_FIELD());
+
     this.dvColumnIndex =
         schemaWithDvColumn.fieldIndex(DeltaParquetFileFormat.IS_ROW_DELETED_COLUMN_NAME());
     this.inputColumnCount = schemaWithDvColumn.fields().length + partitionSchema.fields().length;
     this.outputSchema = readDataSchema.merge(partitionSchema, /* handleDuplicateColumns= */ false);
-    // Pre-compute output column ordinals: all indices except dvColumnIndex.
-    int[] ordinals = new int[inputColumnCount - 1];
-    int idx = 0;
-    for (int i = 0; i < inputColumnCount; i++) {
-      if (i != dvColumnIndex) {
-        ordinals[idx++] = i;
-      }
+
+    // Pre-compute output column ordinals: data columns + partition columns (skip row_index and DV)
+    List<Integer> ordinals = new ArrayList<>();
+    int partitionStartIdx = schemaWithDvColumn.fields().length;
+
+    // Add data column indices (0 to readDataSchema.length - 1)
+    for (int i = 0; i < readDataSchema.fields().length; i++) {
+      ordinals.add(i);
+    }
+    // Add partition column indices
+    for (int i = 0; i < partitionSchema.fields().length; i++) {
+      ordinals.add(partitionStartIdx + i);
     }
-    this.outputColumnOrdinals = scala.Predef.wrapIntArray(ordinals).toSeq();
+
+    this.outputColumnOrdinalsList = ordinals;
+    int[] ordinalsArray = ordinals.stream().mapToInt(Integer::intValue).toArray();
+    this.outputColumnOrdinals = scala.Predef.wrapIntArray(ordinalsArray).toSeq();
+  }
+
+  /**
+   * Create a DV schema context without row_index support (for basic DV reads).
+   *
+   * @param readDataSchema original data schema without DV column
+   * @param partitionSchema partition columns schema
+   * @throws IllegalArgumentException if readDataSchema already contains the DV column
+   */
+  public DeletionVectorSchemaContext(StructType readDataSchema, StructType partitionSchema) {
+    this(readDataSchema, partitionSchema, /* useMetadataRowIndex= */ false);
   }
 
   /** Returns schema with the __delta_internal_is_row_deleted column added. */
@@ -92,4 +124,9 @@ public StructType getOutputSchema() {
   public Seq<Object> getOutputColumnOrdinals() {
     return outputColumnOrdinals;
   }
+
+  /** Returns pre-computed output column ordinals as a Java List (for testing). */
+  public List<Integer> getOutputColumnOrdinalsAsList() {
+    return outputColumnOrdinalsList;
+  }
 }
diff --git a/spark/v2/src/test/java/io/delta/spark/internal/v2/read/deletionvector/DeletionVectorSchemaContextTest.java b/spark/v2/src/test/java/io/delta/spark/internal/v2/read/deletionvector/DeletionVectorSchemaContextTest.java
@@ -17,10 +17,16 @@
 
 import static org.junit.jupiter.api.Assertions.*;
 
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
 import org.apache.spark.sql.delta.DeltaParquetFileFormat;
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat;
 import org.apache.spark.sql.types.DataTypes;
 import org.apache.spark.sql.types.StructType;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.CsvSource;
 
 public class DeletionVectorSchemaContextTest {
 
@@ -30,27 +36,64 @@ public class DeletionVectorSchemaContextTest {
   private static final StructType PARTITION_SCHEMA =
       new StructType().add("date", DataTypes.StringType);
 
+  @ParameterizedTest(name = "useMetadataRowIndex={0}")
+  @CsvSource({"false, 3, 2", "true, 4, 3"})
+  void testSchemaWithDvColumn(
+      boolean useMetadataRowIndex, int expectedFieldCount, int expectedDvIndex) {
+    DeletionVectorSchemaContext context =
+        new DeletionVectorSchemaContext(DATA_SCHEMA, PARTITION_SCHEMA, useMetadataRowIndex);
+
+    StructType schemaWithDv = context.getSchemaWithDvColumn();
+    assertEquals(expectedFieldCount, schemaWithDv.fields().length);
+    assertEquals("id", schemaWithDv.fields()[0].name());
+    assertEquals("name", schemaWithDv.fields()[1].name());
+
+    if (useMetadataRowIndex) {
+      assertEquals(
+          ParquetFileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME(), schemaWithDv.fields()[2].name());
+    }
+    assertEquals(
+        DeltaParquetFileFormat.IS_ROW_DELETED_COLUMN_NAME(),
+        schemaWithDv.fields()[expectedDvIndex].name());
+  }
+
+  @ParameterizedTest(name = "useMetadataRowIndex={0}")
+  @CsvSource({"false, 4", "true, 5"})
+  void testInputColumnCount(boolean useMetadataRowIndex, int expectedCount) {
+    DeletionVectorSchemaContext context =
+        new DeletionVectorSchemaContext(DATA_SCHEMA, PARTITION_SCHEMA, useMetadataRowIndex);
+    assertEquals(expectedCount, context.getInputColumnCount());
+  }
+
+  @ParameterizedTest(name = "useMetadataRowIndex={0}")
+  @CsvSource({"false, '0,1,3'", "true, '0,1,4'"})
+  void testOutputColumnOrdinals(boolean useMetadataRowIndex, String expectedOrdinalsStr) {
+    DeletionVectorSchemaContext context =
+        new DeletionVectorSchemaContext(DATA_SCHEMA, PARTITION_SCHEMA, useMetadataRowIndex);
+
+    List<Integer> expected =
+        Arrays.stream(expectedOrdinalsStr.split(","))
+            .map(String::trim)
+            .map(Integer::parseInt)
+            .collect(Collectors.toList());
+    assertEquals(expected, context.getOutputColumnOrdinalsAsList());
+  }
+
   @Test
-  void testWithFullSchemas() {
+  void testOutputSchema() {
     DeletionVectorSchemaContext context =
-        new DeletionVectorSchemaContext(DATA_SCHEMA, PARTITION_SCHEMA);
+        new DeletionVectorSchemaContext(DATA_SCHEMA, PARTITION_SCHEMA, /* useMetadataRowIndex= */ false);
 
-    StructType expectedSchemaWithDv =
-        DATA_SCHEMA.add(DeltaParquetFileFormat.IS_ROW_DELETED_STRUCT_FIELD());
-    assertEquals(expectedSchemaWithDv, context.getSchemaWithDvColumn());
-    assertEquals(2, context.getDvColumnIndex());
-    // Input: 2 data + 1 DV + 1 partition = 4.
-    assertEquals(4, context.getInputColumnCount());
-    StructType expectedOutputSchema =
+    StructType expectedSchema =
         DATA_SCHEMA.merge(PARTITION_SCHEMA, /* handleDuplicateColumns= */ false);
-    assertEquals(expectedOutputSchema, context.getOutputSchema());
+    assertEquals(expectedSchema, context.getOutputSchema());
   }
 
   @Test
   void testEmptyPartitionSchema() {
-    StructType emptyPartition = new StructType();
+    StructType emptyPartitionSchema = new StructType();
     DeletionVectorSchemaContext context =
-        new DeletionVectorSchemaContext(DATA_SCHEMA, emptyPartition);
+        new DeletionVectorSchemaContext(DATA_SCHEMA, emptyPartitionSchema, /* useMetadataRowIndex= */ false);
 
     StructType expectedSchemaWithDv =
         DATA_SCHEMA.add(DeltaParquetFileFormat.IS_ROW_DELETED_STRUCT_FIELD());
@@ -63,12 +106,12 @@ void testEmptyPartitionSchema() {
 
   @Test
   void testEmptyDataSchema() {
-    StructType emptyData = new StructType();
+    StructType emptyDataSchema = new StructType();
     DeletionVectorSchemaContext context =
-        new DeletionVectorSchemaContext(emptyData, PARTITION_SCHEMA);
+        new DeletionVectorSchemaContext(emptyDataSchema, PARTITION_SCHEMA, /* useMetadataRowIndex= */ false);
 
     StructType expectedSchemaWithDv =
-        emptyData.add(DeltaParquetFileFormat.IS_ROW_DELETED_STRUCT_FIELD());
+        emptyDataSchema.add(DeltaParquetFileFormat.IS_ROW_DELETED_STRUCT_FIELD());
     assertEquals(expectedSchemaWithDv, context.getSchemaWithDvColumn());
     assertEquals(0, context.getDvColumnIndex());
     // Input: 1 DV + 1 partition = 2.
@@ -87,7 +130,9 @@ void testDuplicateDvColumnThrowsException() {
     IllegalArgumentException exception =
         assertThrows(
             IllegalArgumentException.class,
-            () -> new DeletionVectorSchemaContext(schemaWithDv, new StructType()));
+            () ->
+                new DeletionVectorSchemaContext(
+                    schemaWithDv, new StructType(), /* useMetadataRowIndex= */ false));
 
     assertTrue(
         exception.getMessage().contains(DeltaParquetFileFormat.IS_ROW_DELETED_COLUMN_NAME()));
diff --git a/spark/v2/src/test/java/io/delta/spark/internal/v2/read/deletionvector/DeletionVectorVectorizedReaderTest.java b/spark/v2/src/test/java/io/delta/spark/internal/v2/read/deletionvector/DeletionVectorVectorizedReaderTest.java
@@ -136,6 +136,37 @@ private Row convertInternalRowToRow(InternalRow internalRow, StructType schema)
     return new org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema(values, schema);
   }
 
+  /**
+   * Test that file splitting is enabled for DV tables when useMetadataRowIndex=true (default). This
+   * verifies PR4's key change: optimizationsEnabled=true enables isSplitable=true.
+   */
+  @Test
+  public void testFileSplittingEnabledForDvTable() throws Exception {
+    String tableName = "dv-partitioned-with-checkpoint";
+    String tablePath = goldenTablePath(tableName);
+
+    SparkTable table =
+        new SparkTable(
+            Identifier.of(new String[] {"spark_catalog", "default"}, tableName), tablePath);
+    SparkScanBuilder scanBuilder =
+        (SparkScanBuilder) table.newScanBuilder(new CaseInsensitiveStringMap(java.util.Map.of()));
+    SparkScan scan = (SparkScan) scanBuilder.build();
+    Batch batch = scan.toBatch();
+
+    // Verify we can get partitions (file splitting is allowed)
+    InputPartition[] partitions = batch.planInputPartitions();
+    assertTrue(partitions.length > 0, "Should have at least one partition");
+
+    // The key assertion: with useMetadataRowIndex=true (default), DV tables allow file splitting.
+    // This is verified indirectly by the fact that planInputPartitions() succeeds and
+    // supportColumnarReads() returns true. If optimizationsEnabled was false,
+    // isSplitable would return false, preventing efficient file partitioning.
+    PartitionReaderFactory readerFactory = batch.createReaderFactory();
+    assertTrue(
+        readerFactory.supportColumnarReads(partitions[0]),
+        "DV table with useMetadataRowIndex=true should support columnar reads");
+  }
+
   private String goldenTablePath(String name) {
     return GoldenTableUtils$.MODULE$.goldenTablePath(name);
   }