Merge branch-25.04 into main [skip ci] (#12545)

NvTimLiu · web-flow · commit 51345be0cd4e · 2025-04-17T09:51:55.000+08:00
Merge branch-25.04 into main

Note: merge this PR with **Create a merge commit to merge**
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,5 @@
 # Change log
-Generated on 2025-04-14
+Generated on 2025-04-17
 
 ## Release 25.04
 
@@ -30,11 +30,16 @@ Generated on 2025-04-14
 ### Bugs Fixed
 |||
 |:---|:---|
+|[#12530](https://github.com/NVIDIA/spark-rapids/issues/12530)|[BUG] Outer join result is incorrect when Spark is 3.5.x, join side is outer side,  join on struct column and there is null|
 |[#12410](https://github.com/NVIDIA/spark-rapids/issues/12410)|[BUG] ThrottlingExecutorSuite: test task metrics failed intermittently|
 |[#12435](https://github.com/NVIDIA/spark-rapids/issues/12435)|[BUG] Running integration tests with `PERFILE` results in failed tests|
+|[#12360](https://github.com/NVIDIA/spark-rapids/issues/12360)|[BUG] delta_lake_test test_delta_deletion_vector cases failed in databricks 14.3 runtime|
 |[#12123](https://github.com/NVIDIA/spark-rapids/issues/12123)|[BUG] delta_lake_update_test.test_delta_update_fallback_with_deletion_vectors failed assertion failed: Could not find RapidsDeltaWriteExec in the GPU plans with spark34Xshims|
+|[#12405](https://github.com/NVIDIA/spark-rapids/issues/12405)|[BUG] test_delta_deletion_vector_fallback fails on [databricks] 14.3 CI|
+|[#12460](https://github.com/NVIDIA/spark-rapids/issues/12460)|[BUG] Fallback to the CPU when FileSourceScan is reading Deletion Vectors on Databricks 14.3|
 |[#12027](https://github.com/NVIDIA/spark-rapids/issues/12027)|[BUG] [DB 14.3] `tightBounds` stat in Delta Lake tables is set incorrectly|
 |[#12379](https://github.com/NVIDIA/spark-rapids/issues/12379)|[BUG] test_parse_url_supported fails on [databricks] 14.3|
+|[#12428](https://github.com/NVIDIA/spark-rapids/issues/12428)|[BUG] Multiple python udf integration test cases failed in DB 14.3|
 |[#12408](https://github.com/NVIDIA/spark-rapids/issues/12408)|[BUG] Job timeout registration pathologically fails in some [databricks] CI_PART1 pipelines|
 |[#12413](https://github.com/NVIDIA/spark-rapids/issues/12413)|[BUG] nightly shuffle multi-thread/UCX CI failed possibly out of memory or process/resource limits reached|
 |[#12376](https://github.com/NVIDIA/spark-rapids/issues/12376)|[BUG] test_col_size_exceeding_cudf_limit fails on [databricks]|
@@ -82,6 +87,8 @@ Generated on 2025-04-14
 ### PRs
 |||
 |:---|:---|
+|[#12535](https://github.com/NVIDIA/spark-rapids/pull/12535)|Fix bug when join side is outer side|
+|[#12494](https://github.com/NVIDIA/spark-rapids/pull/12494)|Update changelog for v25.04.0 release [skip ci]|
 |[#12497](https://github.com/NVIDIA/spark-rapids/pull/12497)|[DOC] update the download page for 2504 release [skip ci]|
 |[#12473](https://github.com/NVIDIA/spark-rapids/pull/12473)|Update dependency version JNI, private, hybrid to 25.04.0|
 |[#12485](https://github.com/NVIDIA/spark-rapids/pull/12485)|Enable the  14.3 Shim|
diff --git a/integration_tests/src/main/python/join_test.py b/integration_tests/src/main/python/join_test.py
@@ -337,6 +337,36 @@ def test_hash_join_side_is_build_side_asymmetric(data_gen, join_type, kudo_enabl
     }
     hash_join_side_is_build_side(data_gen, join_type, confs)
 
+@ignore_order(local=True)
+@pytest.mark.parametrize('join_type', all_asymmetric_sized_join_types, ids=idfn)
+def test_hash_join_side_is_build_side_basic(join_type):
+    def _do_join(spark):
+        left = [
+            (1, ("Alice",)),
+            (2, ("Bob",)),
+            (3, None),
+            (4, (None,)),
+        ]
+        right = [
+            (11, ("Alice",)),
+            (33, None),
+            (333, None),
+            (44, (None,)),
+        ]
+        schema = StructType([
+            StructField("id", IntegerType()),
+            StructField("name", StructType([
+                StructField("value", StringType())]))])
+        left = spark.createDataFrame(left, schema)
+        right = spark.createDataFrame(right, schema)
+        if (join_type == "LeftOuter"):
+            return left.hint("SHUFFLE_HASH").join(right, "name", join_type).select(left.id, left.name, right.id, right.name)
+        elif (join_type == "RightOuter"):
+            return left.join(right.hint("SHUFFLE_HASH"), "name", join_type).select(left.id, left.name, right.id, right.name)
+        else:
+            raise RuntimeError("Only supports left join and right join")
+    assert_gpu_and_cpu_are_equal_collect(_do_join)
+
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffledSizedHashJoinExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffledSizedHashJoinExec.scala
@@ -86,20 +86,11 @@ object GpuShuffledSizedHashJoinExec {
       val flippedCondition = condition.map { c =>
         GpuBindReferences.bindGpuReference(c, conditionLeftAttrs ++ conditionRightAttrs)
       }
-      // For join types other than FullOuter and outer joins where the build side matches the
-      // outer side, we simply set compareNullsEqual as true to adapt struct keys with nullable
-      // children. Non-nested keys can also be correctly processed with compareNullsEqual = true,
-      // because we filter all null records from build table before join.
-      // For full outer and outer joins with build side matching outer side, we need to keep the
-      // nulls in the build table and thus cannot compare nulls as equal.
-      // For details, see https://github.com/NVIDIA/spark-rapids/issues/2126.
-      val treatNullsEqual = joinType match {
-        case FullOuter => false
-        case LeftOuter if buildSide == GpuBuildLeft => false
-        case RightOuter if buildSide == GpuBuildRight => false
-        case _ => GpuHashJoin.anyNullableStructChild(boundStreamKeys)
-      }
-      val needNullFilter = treatNullsEqual && boundStreamKeys.exists(_.nullable)
+
+      val treatNullsEqual = GpuHashJoin.compareNullsEqual(joinType, boundStreamKeys)
+      val needNullFilter = GpuHashJoin.buildSideNeedsNullFilter(
+        joinType, treatNullsEqual, buildSide, boundBuildKeys)
+
       BoundJoinExprs(boundStreamKeys, streamTypes, streamOutput,
         boundBuildKeys, buildTypes, buildOutput,
         flippedCondition, conditionLeftAttrs.size, treatNullsEqual, needNullFilter)
@@ -133,16 +124,14 @@ object GpuShuffledSizedHashJoinExec {
       val boundCondition = condition.map { c =>
         GpuBindReferences.bindGpuReference(c, streamOutput ++ buildOutput)
       }
-      // For join types other than FullOuter, we simply set compareNullsEqual as true to adapt
-      // struct keys with nullable children. Non-nested keys can also be correctly processed with
-      // compareNullsEqual = true, because we filter all null records from build table before join.
-      // For details, see https://github.com/NVIDIA/spark-rapids/issues/2126.
-      val compareNullsEqual = (joinType != FullOuter) &&
-        GpuHashJoin.anyNullableStructChild(boundBuildKeys)
-      val needNullFilter = compareNullsEqual && boundBuildKeys.exists(_.nullable)
+
+      val treatNullsEqual = GpuHashJoin.compareNullsEqual(joinType, boundBuildKeys)
+      val needNullFilter = GpuHashJoin.buildSideNeedsNullFilter(
+        joinType, treatNullsEqual, buildSide, boundBuildKeys)
+
       BoundJoinExprs(boundBuildKeys, buildTypes, buildOutput,
         boundStreamKeys, streamTypes, streamOutput,
-        boundCondition, streamOutput.size, compareNullsEqual, needNullFilter)
+        boundCondition, streamOutput.size, treatNullsEqual, needNullFilter)
     }
   }
 
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuHashJoin.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuHashJoin.scala
@@ -103,6 +103,37 @@ object JoinTypeChecks {
 
 object GpuHashJoin {
 
+  // For the join on struct, it's equal for nulls in child columns of struct column, it's not
+  // equal for the root struct when meets both nulls.
+  // So for join types other than FullOuter and join on struct column with nullable child,
+  // we simply set compareNullsEqual as true, and Spark plan already filter out the nulls for root
+  // struct column.
+  // For details, see https://github.com/NVIDIA/spark-rapids/issues/2126.
+  // For Non-nested keys, it is also correctly processed, because compareNullsEqual will be set to
+  // false which is the semantic of join: null != null when join
+  def compareNullsEqual(
+      joinType: JoinType,
+      buildKeys: Seq[Expression]): Boolean = (joinType != FullOuter) &&
+    GpuHashJoin.anyNullableStructChild(buildKeys)
+
+  // For full outer and outer joins with build side matching outer side, we need to keep the
+  // nulls in the build table and compare nulls as equal.
+  // Note: for outer joins with build side matching outer side and join on struct column with child
+  // is nullable, MUST not filter out null records from build table.
+  def buildSideNeedsNullFilter(
+      joinType: JoinType,
+      compareNullsEqual: Boolean, // from function: compareNullsEqual
+      buildSide: GpuBuildSide,
+      buildKeys:Seq[Expression]): Boolean = {
+    val needFilterOutNull = joinType match {
+      case FullOuter => false
+      case LeftOuter if buildSide == GpuBuildLeft => false
+      case RightOuter if buildSide == GpuBuildRight => false
+      case _ => true
+    }
+    needFilterOutNull && compareNullsEqual && buildKeys.exists(_.nullable)
+  }
+
   def tagJoin(
       meta: SparkPlanMeta[_],
       joinType: JoinType,
@@ -1114,12 +1145,8 @@ trait GpuHashJoin extends GpuJoinExec {
     (rightData, remappedRightOutput)
   }
 
-  // For join types other than FullOuter, we simply set compareNullsEqual as true to adapt
-  // struct keys with nullable children. Non-nested keys can also be correctly processed with
-  // compareNullsEqual = true, because we filter all null records from build table before join.
-  // For some details, please refer the issue: https://github.com/NVIDIA/spark-rapids/issues/2126
-  protected lazy val compareNullsEqual: Boolean = (joinType != FullOuter) &&
-      GpuHashJoin.anyNullableStructChild(buildKeys)
+  protected lazy val compareNullsEqual: Boolean =
+    GpuHashJoin.compareNullsEqual(joinType, buildKeys)
 
   protected lazy val (boundBuildKeys, boundStreamKeys) = {
     val lkeys = GpuBindReferences.bindGpuReferences(leftKeys, left.output)
@@ -1150,11 +1177,11 @@ trait GpuHashJoin extends GpuJoinExec {
       numOutputBatches: GpuMetric,
       opTime: GpuMetric,
       joinTime: GpuMetric): Iterator[ColumnarBatch] = {
-    // Filtering nulls on the build side is a workaround for Struct joins with nullable children
-    // see https://github.com/NVIDIA/spark-rapids/issues/2126 for more info
-    val builtAnyNullable = compareNullsEqual && buildKeys.exists(_.nullable)
 
-    val nullFiltered = if (builtAnyNullable) {
+    val filterOutNull = GpuHashJoin.buildSideNeedsNullFilter(joinType, compareNullsEqual,
+      buildSide, buildKeys)
+
+    val nullFiltered = if (filterOutNull) {
       val sb = closeOnExcept(builtBatch)(
         SpillableColumnarBatch(_, SpillPriorities.ACTIVE_ON_DECK_PRIORITY))
       GpuHashJoin.filterNullsWithRetryAndClose(sb, boundBuildKeys)