apache
diff --git a/‎spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/SparkReadConf.java‎
Lines changed: 8 additions & 0 deletions b/‎spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/SparkReadConf.java‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/SparkSQLProperties.java‎
Lines changed: 5 additions & 0 deletions b/‎spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/SparkSQLProperties.java‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/source/InternalRowComparator.java‎
Lines changed: 72 additions & 0 deletions b/‎spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/source/InternalRowComparator.java‎
Lines changed: 72 additions & 0 deletions
diff --git a/‎spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/source/MergingPartitionReader.java‎
Lines changed: 150 additions & 0 deletions b/‎spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/source/MergingPartitionReader.java‎
Lines changed: 150 additions & 0 deletions
diff --git a/‎spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/source/MergingSortedRowDataReader.java‎
Lines changed: 139 additions & 0 deletions b/‎spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/source/MergingSortedRowDataReader.java‎
Lines changed: 139 additions & 0 deletions
@@ -268,6 +268,14 @@ public boolean preserveDataGrouping() {
         .parse();
   }
 
+  public boolean preserveDataOrdering() {
+    return confParser
+        .booleanConf()
+        .sessionConf(SparkSQLProperties.PRESERVE_DATA_ORDERING)
+        .defaultValue(SparkSQLProperties.PRESERVE_DATA_ORDERING_DEFAULT)
+        .parse();
+  }
+
   public boolean aggregatePushDownEnabled() {
     return confParser
         .booleanConf()
 
@@ -43,6 +43,11 @@ private SparkSQLProperties() {}
       "spark.sql.iceberg.planning.preserve-data-grouping";
   public static final boolean PRESERVE_DATA_GROUPING_DEFAULT = false;
 
+  // Controls whether to preserve data ordering and report it to Spark
+  public static final String PRESERVE_DATA_ORDERING =
+      "spark.sql.iceberg.planning.preserve-data-ordering";
+  public static final boolean PRESERVE_DATA_ORDERING_DEFAULT = false;
+
   // Controls whether to push down aggregate (MAX/MIN/COUNT) to Iceberg
   public static final String AGGREGATE_PUSH_DOWN_ENABLED =
       "spark.sql.iceberg.aggregate-push-down.enabled";
 
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.source;
+
+import java.util.Comparator;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.SortOrder;
+import org.apache.iceberg.SortOrderComparators;
+import org.apache.iceberg.StructLike;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * A comparator for Spark {@link InternalRow} objects based on an Iceberg {@link SortOrder}.
+ *
+ * <p>This comparator adapts Spark's InternalRow to Iceberg's StructLike interface and delegates to
+ * Iceberg's existing {@link SortOrderComparators} infrastructure, which provides full support for:
+ *
+ * <ul>
+ *   <li>All Iceberg data types
+ *   <li>ASC/DESC sort directions
+ *   <li>NULLS_FIRST/NULLS_LAST null ordering
+ *   <li>Transform functions (identity, bucket, truncate, etc.)
+ * </ul>
+ *
+ * <p><strong>This class is NOT thread-safe.</strong>
+ */
+class InternalRowComparator implements Comparator<InternalRow> {
+  private final Comparator<StructLike> delegate;
+  private final InternalRowWrapper leftWrapper;
+  private final InternalRowWrapper rightWrapper;
+
+  /**
+   * Creates a comparator for the given sort order and schemas.
+   *
+   * @param sortOrder the Iceberg sort order to use for comparison
+   * @param sparkSchema the Spark schema of the rows to compare
+   * @param icebergSchema the Iceberg schema of the rows to compare
+   */
+  InternalRowComparator(SortOrder sortOrder, StructType sparkSchema, Schema icebergSchema) {
+    Preconditions.checkArgument(
+        sortOrder.isSorted(), "Cannot create comparator for unsorted order");
+    Preconditions.checkNotNull(sparkSchema, "Spark schema cannot be null");
+    Preconditions.checkNotNull(icebergSchema, "Iceberg schema cannot be null");
+
+    this.delegate = SortOrderComparators.forSchema(icebergSchema, sortOrder);
+    this.leftWrapper = new InternalRowWrapper(sparkSchema, icebergSchema.asStruct());
+    this.rightWrapper = new InternalRowWrapper(sparkSchema, icebergSchema.asStruct());
+  }
+
+  @Override
+  public int compare(InternalRow row1, InternalRow row2) {
+    return delegate.compare(leftWrapper.wrap(row1), rightWrapper.wrap(row2));
+  }
+}
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.source;
+
+import java.io.IOException;
+import java.util.Comparator;
+import java.util.List;
+import java.util.stream.Collectors;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.SortOrder;
+import org.apache.iceberg.io.CloseableIterable;
+import org.apache.iceberg.io.CloseableIterator;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.util.SortedMerge;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.connector.read.PartitionReader;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * A {@link PartitionReader} that performs a k-way merge of multiple sorted readers.
+ *
+ * <p>This reader takes multiple {@link PartitionReader}s (one per file), each producing sorted data
+ * according to the same {@link SortOrder}, and merges them into a single sorted stream using
+ * Iceberg's {@link SortedMerge} utility.
+ *
+ * <p>The merge is performed using a priority queue (heap) to efficiently select the next row from
+ * among all readers, maintaining the sort order with O(log k) comparisons per row, where k is the
+ * number of files being merged.
+ *
+ * @param <T> the type of InternalRow being read
+ */
+class MergingPartitionReader<T extends InternalRow> implements PartitionReader<T> {
+  private final List<PartitionReader<T>> readers;
+  private final CloseableIterator<T> mergedIterator;
+  private T current = null;
+  private boolean closed = false;
+
+  MergingPartitionReader(
+      List<PartitionReader<T>> readers,
+      SortOrder sortOrder,
+      StructType sparkSchema,
+      Schema icebergSchema) {
+    Preconditions.checkNotNull(readers, "Readers cannot be null");
+    Preconditions.checkArgument(!readers.isEmpty(), "Readers cannot be empty");
+    Preconditions.checkNotNull(sortOrder, "Sort order cannot be null");
+    Preconditions.checkArgument(sortOrder.isSorted(), "Sort order must be sorted");
+
+    this.readers = readers;
+
+    Comparator<T> comparator =
+        (Comparator<T>) new InternalRowComparator(sortOrder, sparkSchema, icebergSchema);
+
+    List<CloseableIterable<T>> iterables =
+        readers.stream().map(this::readerToIterable).collect(Collectors.toList());
+
+    SortedMerge<T> sortedMerge = new SortedMerge<>(comparator, iterables);
+    this.mergedIterator = sortedMerge.iterator();
+  }
+
+  /** Converts a PartitionReader to a CloseableIterable for use with SortedMerge. */
+  private CloseableIterable<T> readerToIterable(PartitionReader<T> reader) {
+    return new CloseableIterable<T>() {
+      @Override
+      public CloseableIterator<T> iterator() {
+        return new CloseableIterator<T>() {
+          private boolean advanced = false;
+          private boolean hasNext = false;
+
+          @Override
+          public boolean hasNext() {
+            if (!advanced) {
+              try {
+                hasNext = reader.next();
+                advanced = true;
+              } catch (IOException e) {
+                throw new RuntimeException("Failed to advance reader", e);
+              }
+            }
+            return hasNext;
+          }
+
+          @Override
+          public T next() {
+            if (!advanced) {
+              hasNext();
+            }
+            advanced = false;
+            // Spark readers reuse InternalRow objects for performance (see
+            // SparkParquetReaders.java:547)
+            // Return a copy of the row to avoid corruption.
+            return (T) reader.get().copy();
+          }
+
+          @Override
+          public void close() throws IOException {
+            reader.close();
+          }
+        };
+      }
+
+      @Override
+      public void close() throws IOException {
+        reader.close();
+      }
+    };
+  }
+
+  @Override
+  public boolean next() throws IOException {
+    if (mergedIterator.hasNext()) {
+      this.current = mergedIterator.next();
+      return true;
+    }
+    return false;
+  }
+
+  @Override
+  public T get() {
+    return current;
+  }
+
+  @Override
+  public void close() throws IOException {
+    if (closed) {
+      return;
+    }
+
+    try {
+      mergedIterator.close();
+    } finally {
+      closed = true;
+    }
+  }
+}
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.source;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+import org.apache.iceberg.BaseScanTaskGroup;
+import org.apache.iceberg.FileScanTask;
+import org.apache.iceberg.ScanTaskGroup;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.SortOrder;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.spark.SparkSchemaUtil;
+import org.apache.iceberg.util.SnapshotUtil;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.connector.metric.CustomTaskMetric;
+import org.apache.spark.sql.connector.read.PartitionReader;
+import org.apache.spark.sql.types.StructType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A {@link PartitionReader} that reads multiple sorted files and merges them into a single sorted
+ * stream.
+ *
+ * <p>This reader is used when {@code preserve-data-ordering} is enabled and the task group contains
+ * multiple files that all have the same sort order. It creates one {@link RowDataReader} per file
+ * and uses {@link MergingPartitionReader} to perform a k-way merge.
+ */
+class MergingSortedRowDataReader implements PartitionReader<InternalRow> {
+  private static final Logger LOG = LoggerFactory.getLogger(MergingSortedRowDataReader.class);
+
+  private final MergingPartitionReader<InternalRow> mergingReader;
+  private final List<RowDataReader> fileReaders;
+
+  MergingSortedRowDataReader(SparkInputPartition partition, int reportableSortOrderId) {
+    Table table = partition.table();
+    ScanTaskGroup<FileScanTask> taskGroup = partition.taskGroup();
+    Schema tableSchema = SnapshotUtil.schemaFor(table, partition.branch());
+    Schema expectedSchema = partition.expectedSchema();
+
+    Preconditions.checkArgument(
+        reportableSortOrderId > 0, "Invalid sort order ID: %s", reportableSortOrderId);
+    Preconditions.checkArgument(
+        taskGroup.tasks().size() > 1,
+        "Merging reader requires multiple files, got %s",
+        taskGroup.tasks().size());
+
+    LOG.info(
+        "Creating merging reader for {} files with sort order ID {} in table {}",
+        taskGroup.tasks().size(),
+        reportableSortOrderId,
+        table.name());
+
+    SortOrder sortOrder = table.sortOrders().get(reportableSortOrderId);
+    Preconditions.checkNotNull(
+        sortOrder,
+        "Cannot find sort order with ID %s in table %s",
+        reportableSortOrderId,
+        table.name());
+
+    this.fileReaders =
+        taskGroup.tasks().stream()
+            .map(
+                task -> {
+                  ScanTaskGroup<FileScanTask> singleTaskGroup =
+                      new BaseScanTaskGroup<>(java.util.Collections.singletonList(task));
+
+                  return new RowDataReader(
+                      table,
+                      singleTaskGroup,
+                      tableSchema,
+                      expectedSchema,
+                      partition.isCaseSensitive(),
+                      partition.cacheDeleteFilesOnExecutors());
+                })
+            .collect(Collectors.toList());
+
+    List<PartitionReader<InternalRow>> readers =
+        fileReaders.stream()
+            .map(reader -> (PartitionReader<InternalRow>) reader)
+            .collect(Collectors.toList());
+
+    StructType sparkSchema = SparkSchemaUtil.convert(expectedSchema);
+    this.mergingReader =
+        new MergingPartitionReader<>(readers, sortOrder, sparkSchema, expectedSchema);
+  }
+
+  @Override
+  public boolean next() throws IOException {
+    return mergingReader.next();
+  }
+
+  @Override
+  public InternalRow get() {
+    return mergingReader.get();
+  }
+
+  @Override
+  public void close() throws IOException {
+    mergingReader.close();
+  }
+
+  public CustomTaskMetric[] currentMetricsValues() {
+    long totalSplits = fileReaders.size();
+
+    long totalDeletes =
+        fileReaders.stream()
+            .flatMap(reader -> Arrays.stream(reader.currentMetricsValues()))
+            .filter(
+                metric -> metric instanceof org.apache.iceberg.spark.source.metrics.TaskNumDeletes)
+            .mapToLong(CustomTaskMetric::value)
+            .sum();
+
+    return new CustomTaskMetric[] {
+      new org.apache.iceberg.spark.source.metrics.TaskNumSplits(totalSplits),
+      new org.apache.iceberg.spark.source.metrics.TaskNumDeletes(totalDeletes)
+    };
+  }
+}