fix DSv2 custom metrics

peter-toth · peter-toth · commit c1e3e9312c64 · 2026-02-17T21:21:22.000+01:00
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceRDD.scala
@@ -24,6 +24,7 @@ import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.metric.CustomTaskMetric
 import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.metric.{CustomMetrics, SQLMetric}
@@ -33,6 +34,19 @@ import org.apache.spark.util.ArrayImplicits._
 class DataSourceRDDPartition(val index: Int, val inputPartition: Option[InputPartition])
   extends Partition with Serializable
 
+/**
+ * Holds the state in a thread, used by the completion listener to access the most recently created
+ * reader and iterator for final metrics updates and cleanup.
+ *
+ * @param reader The partition reader
+ * @param iterator The metrics iterator wrapping the reader
+ * @param metrics Optional array of custom task metrics from the previous reader
+ */
+private case class State(
+    reader: PartitionReader[_],
+    iterator: MetricsIterator[_],
+    metrics: Option[Array[CustomTaskMetric]])
+
 // TODO: we should have 2 RDDs: an RDD[InternalRow] for row-based scan, an `RDD[ColumnarBatch]` for
 // columnar scan.
 class DataSourceRDD(
@@ -43,6 +57,10 @@ class DataSourceRDD(
     customMetrics: Map[String, SQLMetric])
   extends RDD[InternalRow](sc, Nil) {
 
+  // ThreadLocal to store the last state for this thread.
+  // A null value indicates that no completion listener has been added yet.
+  @transient lazy private val lastThreadLocal = new ThreadLocal[State]()
+
   override protected def getPartitions: Array[Partition] = {
     inputPartitions.zipWithIndex.map {
       case (inputPartition, index) => new DataSourceRDDPartition(index, inputPartition)
@@ -59,21 +77,39 @@ class DataSourceRDD(
       val (iter, reader) = if (columnarReads) {
         val batchReader = partitionReaderFactory.createColumnarReader(inputPartition)
         val iter = new MetricsBatchIterator(
-          new PartitionIterator[ColumnarBatch](batchReader, customMetrics))
+          new PartitionIterator[ColumnarBatch](batchReader, customMetrics), lastThreadLocal)
         (iter, batchReader)
       } else {
         val rowReader = partitionReaderFactory.createReader(inputPartition)
         val iter = new MetricsRowIterator(
-          new PartitionIterator[InternalRow](rowReader, customMetrics))
+          new PartitionIterator[InternalRow](rowReader, customMetrics), lastThreadLocal)
         (iter, rowReader)
       }
-      context.addTaskCompletionListener[Unit] { _ =>
-        // In case of early stopping before consuming the entire iterator,
-        // we need to do one more metric update at the end of the task.
-        CustomMetrics.updateMetrics(reader.currentMetricsValues.toImmutableArraySeq, customMetrics)
-        iter.forceUpdateMetrics()
-        reader.close()
+
+      // Add completion listener only once per thread (null means no listener added yet)
+      val last = lastThreadLocal.get()
+      if (last == null) {
+        context.addTaskCompletionListener[Unit] { _ =>
+          // Use the reader and iterator from ThreadLocal (the last ones created in this thread)
+          val last = lastThreadLocal.get()
+          if (last != null) {
+            // In case of early stopping before consuming the entire iterator,
+            // we need to do one more metric update at the end of the task.
+            CustomMetrics.updateMetrics(
+              last.reader.currentMetricsValues.toImmutableArraySeq, customMetrics)
+            last.iterator.forceUpdateMetrics()
+            last.reader.close()
+          }
+          lastThreadLocal.remove()
+        }
+      } else {
+        last.metrics.foreach(reader.initMetricsValues)
       }
+
+      // Store the current reader and iterator in ThreadLocal so the completion listener
+      // can access the most recently created instances
+      lastThreadLocal.set(State(reader, iter, None))
+
       // TODO: SPARK-25083 remove the type erasure hack in data source scan
       new InterruptibleIterator(context, iter.asInstanceOf[Iterator[InternalRow]])
     }
@@ -113,7 +149,7 @@ private class PartitionIterator[T](
   }
 }
 
-private class MetricsHandler extends Logging with Serializable {
+private[spark] class MetricsHandler extends Logging with Serializable {
   private val inputMetrics = TaskContext.get().taskMetrics().inputMetrics
   private val startingBytesRead = inputMetrics.bytesRead
   private val getBytesRead = SparkHadoopUtil.get.getFSBytesReadOnThreadCallback()
@@ -128,13 +164,18 @@ private class MetricsHandler extends Logging with Serializable {
   }
 }
 
-private abstract class MetricsIterator[I](iter: Iterator[I]) extends Iterator[I] {
+private[spark] abstract class MetricsIterator[I](
+    iter: Iterator[I],
+    lastThreadLocal: ThreadLocal[State]
+  ) extends Iterator[I] {
   protected val metricsHandler = new MetricsHandler
 
   override def hasNext: Boolean = {
     if (iter.hasNext) {
       true
     } else {
+      val last = lastThreadLocal.get()
+      lastThreadLocal.set(last.copy(metrics = Some(last.reader.currentMetricsValues())))
       forceUpdateMetrics()
       false
     }
@@ -143,17 +184,21 @@ private abstract class MetricsIterator[I](iter: Iterator[I]) extends Iterator[I]
   def forceUpdateMetrics(): Unit = metricsHandler.updateMetrics(0, force = true)
 }
 
-private class MetricsRowIterator(
-    iter: Iterator[InternalRow]) extends MetricsIterator[InternalRow](iter) {
+private[spark] class MetricsRowIterator(
+    iter: Iterator[InternalRow],
+    lastThreadLocal: ThreadLocal[State]
+  ) extends MetricsIterator[InternalRow](iter, lastThreadLocal) {
   override def next(): InternalRow = {
     val item = iter.next()
     metricsHandler.updateMetrics(1)
     item
   }
 }
 
-private class MetricsBatchIterator(
-    iter: Iterator[ColumnarBatch]) extends MetricsIterator[ColumnarBatch](iter) {
+private[spark] class MetricsBatchIterator(
+    iter: Iterator[ColumnarBatch],
+    lastThreadLocal: ThreadLocal[State]
+  ) extends MetricsIterator[ColumnarBatch](iter, lastThreadLocal) {
   override def next(): ColumnarBatch = {
     val batch: ColumnarBatch = iter.next()
     metricsHandler.updateMetrics(batch.numRows)