huan233usc
diff --git a/‎spark/v2/src/main/java/io/delta/spark/internal/v2/read/SparkPartitionReader.java‎
Lines changed: 13 additions & 12 deletions b/‎spark/v2/src/main/java/io/delta/spark/internal/v2/read/SparkPartitionReader.java‎
Lines changed: 13 additions & 12 deletions
diff --git a/‎spark/v2/src/main/java/io/delta/spark/internal/v2/read/deletionvector/DeletionVectorReadFunction.java‎
Lines changed: 86 additions & 0 deletions b/‎spark/v2/src/main/java/io/delta/spark/internal/v2/read/deletionvector/DeletionVectorReadFunction.java‎
Lines changed: 86 additions & 0 deletions
diff --git a/‎spark/v2/src/main/java/io/delta/spark/internal/v2/read/deletionvector/DeletionVectorSchemaContext.java‎
Lines changed: 95 additions & 0 deletions b/‎spark/v2/src/main/java/io/delta/spark/internal/v2/read/deletionvector/DeletionVectorSchemaContext.java‎
Lines changed: 95 additions & 0 deletions
@@ -15,26 +15,25 @@
  */
 package io.delta.spark.internal.v2.read;
 
+import java.io.Closeable;
 import java.io.IOException;
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.connector.read.PartitionReader;
 import org.apache.spark.sql.execution.datasources.FilePartition;
 import org.apache.spark.sql.execution.datasources.PartitionedFile;
-import org.apache.spark.sql.execution.datasources.RecordReaderIterator;
 import scala.Function1;
 import scala.collection.Iterator;
 
 public class SparkPartitionReader<T> implements PartitionReader<T> {
-  // Function that produces a Spark RecordReaderIterator for a given file.
+  // Function that produces an Iterator for a given file.
   private final Function1<PartitionedFile, Iterator<InternalRow>> readFunc;
   private final FilePartition partition;
 
   // Index of the next file to read within the partition.
   private int currentFileIndex = 0;
 
-  // Spark's readers return RecordReaderIterator for both row and columnar modes.
-  // Keep a reference so it can be closed when advancing to the next file.
-  private RecordReaderIterator<T> currentIterator = null;
+  // Current iterator for the file being read.
+  private Iterator<T> currentIterator = null;
 
   public SparkPartitionReader(
       Function1<PartitionedFile, Iterator<InternalRow>> readFunc, FilePartition partition) {
@@ -50,18 +49,15 @@ public boolean next() throws IOException {
         return true;
       }
 
-      if (currentIterator != null) {
-        currentIterator.close();
-        currentIterator = null;
-      }
+      closeCurrentIterator();
 
       if (currentFileIndex >= partition.files().length) {
         return false;
       }
 
       final PartitionedFile file = partition.files()[currentFileIndex++];
       @SuppressWarnings("unchecked")
-      RecordReaderIterator<T> it = (RecordReaderIterator<T>) readFunc.apply(file);
+      Iterator<T> it = (Iterator<T>) readFunc.apply(file);
       currentIterator = it;
     }
   }
@@ -71,14 +67,19 @@ public T get() {
     if (currentIterator == null) {
       throw new IllegalStateException("No current record. Call next() before get().");
     }
-    // RecordReaderIterator.next() returns the current record and advances the iterator.
     return currentIterator.next();
   }
 
   @Override
   public void close() throws IOException {
+    closeCurrentIterator();
+  }
+
+  private void closeCurrentIterator() throws IOException {
     if (currentIterator != null) {
-      currentIterator.close();
+      if (currentIterator instanceof Closeable) {
+        ((Closeable) currentIterator).close();
+      }
       currentIterator = null;
     }
   }
 
@@ -0,0 +1,86 @@
+/*
+ * Copyright (2026) The Delta Lake Project Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.delta.spark.internal.v2.read.deletionvector;
+
+import io.delta.spark.internal.v2.utils.CloseableIterator;
+import java.io.Serializable;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.catalyst.ProjectingInternalRow;
+import org.apache.spark.sql.execution.datasources.PartitionedFile;
+import scala.Function1;
+import scala.collection.Iterator;
+import scala.runtime.AbstractFunction1;
+
+/**
+ * Wraps a Parquet reader function to apply deletion vector filtering.
+ *
+ * <p>This function:
+ *
+ * <ol>
+ *   <li>Reads rows from the base Parquet reader (which includes the is_row_deleted column)
+ *   <li>Filters out deleted rows (where is_row_deleted != 0)
+ *   <li>Projects out the is_row_deleted column from the output
+ * </ol>
+ *
+ * <p>The returned iterator implements {@link java.io.Closeable} to ensure proper resource cleanup
+ * of the underlying Parquet reader, even when the iterator is not fully consumed.
+ */
+public class DeletionVectorReadFunction
+    extends AbstractFunction1<PartitionedFile, Iterator<InternalRow>> implements Serializable {
+
+  private static final long serialVersionUID = 1L;
+
+  /** Byte value in the DV column indicating the row is NOT deleted (row should be kept). */
+  private static final byte ROW_NOT_DELETED = 0;
+
+  private final Function1<PartitionedFile, Iterator<InternalRow>> baseReadFunc;
+  private final DeletionVectorSchemaContext dvSchemaContext;
+
+  private DeletionVectorReadFunction(
+      Function1<PartitionedFile, Iterator<InternalRow>> baseReadFunc,
+      DeletionVectorSchemaContext dvSchemaContext) {
+    this.baseReadFunc = baseReadFunc;
+    this.dvSchemaContext = dvSchemaContext;
+  }
+
+  @Override
+  public Iterator<InternalRow> apply(PartitionedFile file) {
+    int dvColumnIndex = dvSchemaContext.getDvColumnIndex();
+    // Use pre-computed ordinals from DeletionVectorSchemaContext.
+    ProjectingInternalRow projection =
+        ProjectingInternalRow.apply(
+            dvSchemaContext.getOutputSchema(), dvSchemaContext.getOutputColumnOrdinals());
+
+    // Wrap the base iterator as CloseableIterator to preserve close() through filter/map.
+    // This ensures proper resource cleanup even when the iterator is not fully consumed.
+    Iterator<InternalRow> baseIterator = baseReadFunc.apply(file);
+
+    return CloseableIterator.wrap(baseIterator)
+        .filterCloseable(row -> row.getByte(dvColumnIndex) == ROW_NOT_DELETED)
+        .mapCloseable(
+            row -> {
+              projection.project(row);
+              return (InternalRow) projection;
+            });
+  }
+
+  /** Factory method to wrap a reader function with DV filtering. */
+  public static DeletionVectorReadFunction wrap(
+      Function1<PartitionedFile, Iterator<InternalRow>> baseReadFunc,
+      DeletionVectorSchemaContext dvSchemaContext) {
+    return new DeletionVectorReadFunction(baseReadFunc, dvSchemaContext);
+  }
+}
@@ -0,0 +1,95 @@
+/*
+ * Copyright (2026) The Delta Lake Project Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.delta.spark.internal.v2.read.deletionvector;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import org.apache.spark.sql.delta.DeltaParquetFileFormat;
+import org.apache.spark.sql.types.StructType;
+import scala.collection.immutable.Seq;
+
+/**
+ * Schema context for deletion vector processing in the V2 connector.
+ *
+ * <p>Encapsulates schema with DV column and pre-computed indices needed for DV filtering.
+ */
+public class DeletionVectorSchemaContext implements Serializable {
+
+  private static final long serialVersionUID = 1L;
+
+  private final StructType schemaWithDvColumn;
+  private final int dvColumnIndex;
+  private final int inputColumnCount;
+  private final StructType outputSchema;
+  private final Seq<Object> outputColumnOrdinals;
+
+  /**
+   * Create a DV schema context for encapsulating schema info and indices needed for DV filtering.
+   *
+   * @param readDataSchema original data schema without DV column
+   * @param partitionSchema partition columns schema
+   * @throws IllegalArgumentException if readDataSchema already contains the DV column
+   */
+  public DeletionVectorSchemaContext(StructType readDataSchema, StructType partitionSchema) {
+    // Validate that readDataSchema doesn't already contain the DV column to ensure the DV column
+    // is added only once. While Delta uses the "__delta_internal_" prefix as a naming convention
+    // for internal columns (listed in DeltaColumnMapping.DELTA_INTERNAL_COLUMNS), there's no
+    // enforced schema validation that prevents users from creating such columns. This check
+    // provides a safety guard in the V2 connector.
+    String dvColumnName = DeltaParquetFileFormat.IS_ROW_DELETED_COLUMN_NAME();
+    if (Arrays.asList(readDataSchema.fieldNames()).contains(dvColumnName)) {
+      throw new IllegalArgumentException(
+          "readDataSchema already contains the deletion vector column: " + dvColumnName);
+    }
+    this.schemaWithDvColumn =
+        readDataSchema.add(DeltaParquetFileFormat.IS_ROW_DELETED_STRUCT_FIELD());
+    this.dvColumnIndex =
+        schemaWithDvColumn.fieldIndex(DeltaParquetFileFormat.IS_ROW_DELETED_COLUMN_NAME());
+    this.inputColumnCount = schemaWithDvColumn.fields().length + partitionSchema.fields().length;
+    this.outputSchema = readDataSchema.merge(partitionSchema, /* handleDuplicateColumns= */ false);
+    // Pre-compute output column ordinals: all indices except dvColumnIndex.
+    int[] ordinals = new int[inputColumnCount - 1];
+    int idx = 0;
+    for (int i = 0; i < inputColumnCount; i++) {
+      if (i != dvColumnIndex) {
+        ordinals[idx++] = i;
+      }
+    }
+    this.outputColumnOrdinals = scala.Predef.wrapIntArray(ordinals).toSeq();
+  }
+
+  /** Returns schema with the __delta_internal_is_row_deleted column added. */
+  public StructType getSchemaWithDvColumn() {
+    return schemaWithDvColumn;
+  }
+
+  public int getDvColumnIndex() {
+    return dvColumnIndex;
+  }
+
+  public int getInputColumnCount() {
+    return inputColumnCount;
+  }
+
+  public StructType getOutputSchema() {
+    return outputSchema;
+  }
+
+  /** Returns pre-computed output column ordinals for ProjectingInternalRow. */
+  public Seq<Object> getOutputColumnOrdinals() {
+    return outputColumnOrdinals;
+  }
+}