[Spark] Make MergeIntoSuiteBase agnostic to name/path-based access (delta-io#4809)

ctring · web-flow · commit 9e183e68f9eb · 2025-07-07T17:10:01.000-07:00
#### Which Delta project/connector is this regarding?  - [x] Spark - [ ] Standalone - [ ] Flink - [ ] Kernel - [ ] Other (fill in here) ## Description This PR is a continuation of delta-io#4808. Here, we continue pushing the `DeltaDMLByPathTestUtils` trait down closer to the MergeInto suites. In the process, we need to change all explicit path-based accesses to more generic constructs such as `tableSQLIdentifier` and `readDeltaTableByIdentifier`, which can be overrode with either `DeltaDMLByPathTestUtils` or `DeltaDMLByNameTestUtils` (to be added later) traits. ## How was this patch tested? Existing UTs ## Does this PR introduce _any_ user-facing changes? No
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/DeltaTestUtils.scala b/spark/src/test/scala/org/apache/spark/sql/delta/DeltaTestUtils.scala
@@ -42,7 +42,7 @@ import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark.{SparkContext, SparkFunSuite, SparkThrowable}
 import org.apache.spark.scheduler.{JobFailed, SparkListener, SparkListenerJobEnd, SparkListenerJobStart}
-import org.apache.spark.sql.{AnalysisException, DataFrame, SparkSession}
+import org.apache.spark.sql.{AnalysisException, DataFrame, DataFrameWriter, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util.{quietly, FailFastMode}
@@ -578,20 +578,11 @@ trait DeltaDMLTestUtils
   protected def tableSQLIdentifier: String
 
   protected def append(df: DataFrame, partitionBy: Seq[String] = Nil): Unit = {
-    import DeltaTestUtils.TableIdentifierOrPath
-
     val dfw = df.write.format("delta").mode("append")
     if (partitionBy.nonEmpty) {
       dfw.partitionBy(partitionBy: _*)
     }
-    getTableIdentifierOrPath(tableSQLIdentifier) match {
-      case TableIdentifierOrPath.Identifier(id, _) => dfw.saveAsTable(id.toString)
-      // A cleaner way to write this is to just use `saveAsTable` where the
-      // table name is delta.`path`. However, it will throw an error when
-      // we use "append" mode and the table does not exist, so we use `save`
-      // here instead.
-      case TableIdentifierOrPath.Path(path, _) => dfw.save(path)
-    }
+    writeTable(dfw, tableSQLIdentifier)
   }
 
   protected def withKeyValueData(
@@ -633,10 +624,24 @@ trait DeltaDMLTestUtils
    * Reads a delta table by its identifier. The identifier can either be the table name or table
    * path that is in the form of delta.`tablePath`.
    */
-  protected def readDeltaTableByIdentifier(tableIdentifier: String): DataFrame = {
+  protected def readDeltaTableByIdentifier(
+      tableIdentifier: String = tableSQLIdentifier): DataFrame = {
     spark.read.format("delta").table(tableIdentifier)
   }
 
+  protected def writeTable[T](dfw: DataFrameWriter[T], tableName: String): Unit = {
+    import DeltaTestUtils.TableIdentifierOrPath
+
+    getTableIdentifierOrPath(tableName) match {
+      case TableIdentifierOrPath.Identifier(id, _) => dfw.saveAsTable(id.toString)
+      // A cleaner way to write this is to just use `saveAsTable` where the
+      // table name is delta.`path`. However, it will throw an error when
+      // we use "append" mode and the table does not exist, so we use `save`
+      // here instead.
+      case TableIdentifierOrPath.Path(path, _) => dfw.save(path)
+    }
+  }
+
   /**
    * Finds the latest operation of the given type that ran on the test table and returns the
    * dataframe with the changes of the corresponding table version.
@@ -692,6 +697,4 @@ trait DeltaDMLByPathTestUtils extends DeltaDMLTestUtils {
   protected def readDeltaTable(path: String): DataFrame = {
     spark.read.format("delta").load(path)
   }
-
-  protected def getDeltaFileStmt(path: String): String = s"SELECT * FROM delta.`$path`"
 }
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoDVsSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoDVsSuite.scala
@@ -38,16 +38,16 @@ trait MergeIntoDVsMixin extends MergeIntoSQLMixin with DeletionVectorsTestUtils
 
   override def excluded: Seq[String] = {
     val miscFailures = Seq(
-      "basic case - merge to view on a Delta table by path, " +
+      "basic case - merge to view on a Delta table, " +
         "partitioned: true skippingEnabled: false useSqlView: true",
-      "basic case - merge to view on a Delta table by path, " +
+      "basic case - merge to view on a Delta table, " +
         "partitioned: true skippingEnabled: false useSqlView: false",
-      "basic case - merge to view on a Delta table by path, " +
+      "basic case - merge to view on a Delta table, " +
         "partitioned: false skippingEnabled: false useSqlView: true",
-      "basic case - merge to view on a Delta table by path, " +
+      "basic case - merge to view on a Delta table, " +
         "partitioned: false skippingEnabled: false useSqlView: false",
-      "basic case - merge to Delta table by name, isPartitioned: false skippingEnabled: false",
-      "basic case - merge to Delta table by name, isPartitioned: true skippingEnabled: false",
+      "basic case - merge to Delta table, isPartitioned: false skippingEnabled: false",
+      "basic case - merge to Delta table, isPartitioned: true skippingEnabled: false",
       "not matched by source - all 3 clauses - no changes - " +
         "isPartitioned: true - cdcEnabled: true",
       "not matched by source - all 3 clauses - no changes - " +
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoNotMatchedBySourceSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoNotMatchedBySourceSuite.scala
@@ -49,10 +49,8 @@ trait MergeIntoNotMatchedBySourceSuite extends MergeIntoSuiteBaseMixin {
             withSQLConf(DeltaSQLConf.MERGE_INSERT_ONLY_ENABLED.key -> "true") {
               executeMerge(s"$targetName t", s"$sourceName s", mergeOn, mergeClauses: _*)
             }
-            val deltaPath = if (targetName.startsWith("delta.`")) {
-              targetName.stripPrefix("delta.`").stripSuffix("`")
-            } else targetName
-            checkAnswer(readDeltaTable(deltaPath), result.map { case (k, v) => Row(k, v) })
+            checkAnswer(readDeltaTableByIdentifier(targetName),
+              result.map { case (k, v) => Row(k, v) })
           }
           if (cdcEnabled) {
             checkAnswer(getCDCForLatestOperation(deltaLog, DeltaOperations.OP_MERGE), cdc.toDF())
@@ -504,28 +502,34 @@ trait MergeIntoNotMatchedBySourceSuite extends MergeIntoSuiteBaseMixin {
     cdc = Seq.empty)
 
   test(s"special character in path - not matched by source delete") {
-    val source = s"$tempDir/sou rce^"
-    val target = s"$tempDir/tar get="
-    spark.range(0, 10, 2).write.format("delta").save(source)
-    spark.range(10).write.format("delta").save(target)
-    executeMerge(
-      tgt = s"delta.`$target` t",
-      src = s"delta.`$source` s",
-      cond = "t.id = s.id",
-      clauses = deleteNotMatched())
-    checkAnswer(readDeltaTable(target), Seq(0, 2, 4, 6, 8).toDF("id"))
+    withTempDir { tempDir =>
+      val source = s"$tempDir/sou rce^"
+      val target = s"$tempDir/tar get="
+      spark.range(0, 10, 2).write.format("delta").save(source)
+      spark.range(10).write.format("delta").save(target)
+      executeMerge(
+        tgt = s"delta.`$target` t",
+        src = s"delta.`$source` s",
+        cond = "t.id = s.id",
+        clauses = deleteNotMatched())
+      checkAnswer(readDeltaTableByIdentifier(s"delta.`$target`"), Seq(0, 2, 4, 6, 8).toDF("id"))
+    }
   }
 
   test(s"special character in path - not matched by source update") {
-    val source = s"$tempDir/sou rce@"
-    val target = s"$tempDir/tar get#"
-    spark.range(0, 10, 2).write.format("delta").save(source)
-    spark.range(10).write.format("delta").save(target)
-    executeMerge(
-      tgt = s"delta.`$target` t",
-      src = s"delta.`$source` s",
-      cond = "t.id = s.id",
-      clauses = updateNotMatched(set = "id = t.id * 10"))
-    checkAnswer(readDeltaTable(target), Seq(0, 10, 2, 30, 4, 50, 6, 70, 8, 90).toDF("id"))
+    withTempDir { tempDir =>
+      val source = s"$tempDir/sou rce@"
+      val target = s"$tempDir/tar get#"
+      spark.range(0, 10, 2).write.format("delta").save(source)
+      spark.range(10).write.format("delta").save(target)
+      executeMerge(
+        tgt = s"delta.`$target` t",
+        src = s"delta.`$source` s",
+        cond = "t.id = s.id",
+        clauses = updateNotMatched(set = "id = t.id * 10"))
+      checkAnswer(
+        readDeltaTableByIdentifier(s"delta.`$target`"),
+        Seq(0, 10, 2, 30, 4, 50, 6, 70, 8, 90).toDF("id"))
+    }
   }
 }
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoSQLSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoSQLSuite.scala
@@ -35,6 +35,7 @@ import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
 trait MergeIntoSQLMixin extends MergeIntoSuiteBaseMixin
   with MergeIntoSQLTestUtils
   with DeltaSQLCommandTest
+  with DeltaDMLByPathTestUtils
   with DeltaTestUtilsForTempViews {
 
   override def excluded: Seq[String] = super.excluded ++ Seq(
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoScalaSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoScalaSuite.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.types.StructType
 trait MergeIntoScalaMixin extends MergeIntoSuiteBaseMixin
   with MergeIntoScalaTestUtils
   with DeltaSQLCommandTest
+  with DeltaDMLByPathTestUtils
   with DeltaTestUtilsForTempViews
   with DeltaExcludedTestMixin {
 
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoSchemaEvolutionSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoSchemaEvolutionSuite.scala
@@ -77,14 +77,14 @@ trait MergeIntoSchemaEvolutionMixin {
           errorContains(Utils.exceptionString(ex), error)
         } else {
           executeMerge(s"$tableSQLIdentifier t", "source s", cond, clauses: _*)
-          checkAnswer(readDeltaTableByIdentifier(tableSQLIdentifier), df.collect())
+          checkAnswer(readDeltaTableByIdentifier(), df.collect())
           if (schema != null) {
-            assert(readDeltaTableByIdentifier(tableSQLIdentifier).schema === schema)
+            assert(readDeltaTableByIdentifier().schema === schema)
           } else {
             // Check against the schema of the expected result df if no explicit schema was
             // provided. Nullability of fields will vary depending on the actual data in the df so
             // we ignore it.
-            assert(readDeltaTableByIdentifier(tableSQLIdentifier).schema.asNullable ===
+            assert(readDeltaTableByIdentifier().schema.asNullable ===
               df.schema.asNullable)
           }
         }
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoSuiteBase.scala b/spark/src/test/scala/org/apache/spark/sql/delta/MergeIntoSuiteBase.scala