Skip to content

Commit 005e450

Browse files
committed
Merge branch-25.04 into merge-branch-25.04-to-main
2 parents 7b1fc53 + 9fcf74e commit 005e450

File tree

50 files changed

+876
-400
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+876
-400
lines changed

CHANGELOG.md

Lines changed: 195 additions & 185 deletions
Large diffs are not rendered by default.

delta-lake/common/src/main/databricks/scala/com/nvidia/spark/rapids/delta/UpdateCommandMeta.scala

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2023, NVIDIA CORPORATION.
2+
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -19,11 +19,12 @@ package com.nvidia.spark.rapids.delta
1919
import com.databricks.sql.transaction.tahoe.commands.{UpdateCommand, UpdateCommandEdge}
2020
import com.databricks.sql.transaction.tahoe.rapids.{GpuDeltaLog, GpuUpdateCommand}
2121
import com.nvidia.spark.rapids.{DataFromReplacementRule, RapidsConf, RapidsMeta, RunnableCommandMeta}
22+
import com.nvidia.spark.rapids.delta.shims.UpdateCommandMetaShim
2223

2324
import org.apache.spark.sql.execution.command.RunnableCommand
2425

2526
class UpdateCommandMeta(
26-
updateCmd: UpdateCommand,
27+
val updateCmd: UpdateCommand,
2728
conf: RapidsConf,
2829
parent: Option[RapidsMeta[_, _, _]],
2930
rule: DataFromReplacementRule)
@@ -34,6 +35,7 @@ class UpdateCommandMeta(
3435
willNotWorkOnGpu("Delta Lake output acceleration has been disabled. To enable set " +
3536
s"${RapidsConf.ENABLE_DELTA_WRITE} to true")
3637
}
38+
UpdateCommandMetaShim.tagForGpu(this)
3739
RapidsDeltaUtils.tagForDeltaWrite(this, updateCmd.target.schema,
3840
Some(updateCmd.tahoeFileIndex.deltaLog), Map.empty, updateCmd.tahoeFileIndex.spark)
3941
}
@@ -50,7 +52,7 @@ class UpdateCommandMeta(
5052
}
5153

5254
class UpdateCommandEdgeMeta(
53-
updateCmd: UpdateCommandEdge,
55+
val updateCmd: UpdateCommandEdge,
5456
conf: RapidsConf,
5557
parent: Option[RapidsMeta[_, _, _]],
5658
rule: DataFromReplacementRule)
@@ -61,6 +63,7 @@ class UpdateCommandEdgeMeta(
6163
willNotWorkOnGpu("Delta Lake output acceleration has been disabled. To enable set " +
6264
s"${RapidsConf.ENABLE_DELTA_WRITE} to true")
6365
}
66+
UpdateCommandMetaShim.tagForGpu(this)
6467
RapidsDeltaUtils.tagForDeltaWrite(this, updateCmd.target.schema,
6568
Some(updateCmd.tahoeFileIndex.deltaLog), Map.empty, updateCmd.tahoeFileIndex.spark)
6669
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/*
2+
* Copyright (c) 2025, NVIDIA CORPORATION.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.nvidia.spark.rapids.delta.shims
18+
19+
import com.nvidia.spark.rapids.delta.{UpdateCommandEdgeMeta, UpdateCommandMeta}
20+
21+
object UpdateCommandMetaShim {
22+
def tagForGpu(meta: UpdateCommandMeta): Unit = {}
23+
24+
def tagForGpu(meta: UpdateCommandEdgeMeta): Unit = {}
25+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/*
2+
* Copyright (c) 2025, NVIDIA CORPORATION.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.nvidia.spark.rapids.delta.shims
18+
19+
import com.nvidia.spark.rapids.delta.{UpdateCommandEdgeMeta, UpdateCommandMeta}
20+
21+
object UpdateCommandMetaShim {
22+
def tagForGpu(meta: UpdateCommandMeta): Unit = {}
23+
24+
def tagForGpu(meta: UpdateCommandEdgeMeta): Unit = {}
25+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/*
2+
* Copyright (c) 2025, NVIDIA CORPORATION.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.nvidia.spark.rapids.delta.shims
18+
19+
import com.nvidia.spark.rapids.delta.{UpdateCommandEdgeMeta, UpdateCommandMeta}
20+
21+
object UpdateCommandMetaShim {
22+
def tagForGpu(meta: UpdateCommandMeta): Unit = {}
23+
24+
def tagForGpu(meta: UpdateCommandEdgeMeta): Unit = {}
25+
}

delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuOptimisticTransaction.scala

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import scala.collection.mutable.ListBuffer
2727

2828
import com.databricks.sql.transaction.tahoe._
2929
import com.databricks.sql.transaction.tahoe.actions.{AddFile, FileAction}
30+
import com.databricks.sql.transaction.tahoe.commands.DeletionVectorUtils
3031
import com.databricks.sql.transaction.tahoe.constraints.{Constraint, Constraints}
3132
import com.databricks.sql.transaction.tahoe.schema.InvariantViolationException
3233
import com.databricks.sql.transaction.tahoe.sources.DeltaSQLConf
@@ -109,12 +110,10 @@ class GpuOptimisticTransaction(
109110
}
110111

111112
val _spark = spark
112-
val protocol = deltaLog.unsafeVolatileSnapshot.protocol
113-
114113
val statsCollection = new GpuStatisticsCollection {
115114
override val spark = _spark
116-
override val deletionVectorsSupported =
117-
protocol.isFeatureSupported(DeletionVectorsTableFeature)
115+
override val deletionVectorsSupported: Boolean =
116+
DeletionVectorUtils.deletionVectorsWritable(snapshot, newProtocol, newMetadata)
118117
override val tableDataSchema = tableSchema
119118
override val dataSchema = statsDataSchema.toStructType
120119
override val numIndexedCols = indexedCols

delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/GpuDeltaParquetFileFormat.scala

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -247,8 +247,14 @@ case class GpuDeltaParquetFileFormat(
247247

248248
object GpuDeltaParquetFileFormat {
249249
def tagSupportForGpuFileSourceScan(meta: SparkPlanMeta[FileSourceScanExec]): Unit = {
250-
if (!meta.conf.isParquetPerFileReadEnabled) {
251-
meta.willNotWorkOnGpu("Deletion vectors only supported for PERFILE reader")
250+
val format = meta.wrapped.relation.fileFormat.asInstanceOf[DeltaParquetFileFormat]
251+
val requiredSchema = meta.wrapped.requiredSchema
252+
if (requiredSchema.exists(_.name.startsWith("_databricks_internal"))) {
253+
meta.willNotWorkOnGpu(
254+
s"reading metadata columns starting with prefix _databricks_internal is not supported")
255+
}
256+
if (format.hasDeletionVectorMap) {
257+
meta.willNotWorkOnGpu("deletion vectors are not supported")
252258
}
253259
}
254260

delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/shims/DeleteCommandMetaShim.scala

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,28 @@
1616

1717
package com.nvidia.spark.rapids.delta.shims
1818

19+
import com.databricks.sql.transaction.tahoe.commands.DeletionVectorUtils
20+
import com.databricks.sql.transaction.tahoe.sources.DeltaSQLConf
1921
import com.nvidia.spark.rapids.delta.{DeleteCommandEdgeMeta, DeleteCommandMeta}
2022

2123
object DeleteCommandMetaShim {
22-
def tagForGpu(meta: DeleteCommandMeta): Unit = {}
24+
def tagForGpu(meta: DeleteCommandMeta): Unit = {
25+
val dvFeatureEnabled = DeletionVectorUtils.deletionVectorsWritable(
26+
meta.deleteCmd.deltaLog.unsafeVolatileSnapshot)
27+
if (dvFeatureEnabled && meta.deleteCmd.conf.getConf(
28+
DeltaSQLConf.DELETE_USE_PERSISTENT_DELETION_VECTORS)) {
29+
// https://github.com/NVIDIA/spark-rapids/issues/8654
30+
meta.willNotWorkOnGpu("Deletion vector writes are not supported on GPU")
31+
}
32+
}
2333

24-
def tagForGpu(meta: DeleteCommandEdgeMeta): Unit = {}
34+
def tagForGpu(meta: DeleteCommandEdgeMeta): Unit = {
35+
val dvFeatureEnabled = DeletionVectorUtils.deletionVectorsWritable(
36+
meta.deleteCmd.deltaLog.unsafeVolatileSnapshot)
37+
if (dvFeatureEnabled && meta.deleteCmd.conf.getConf(
38+
DeltaSQLConf.DELETE_USE_PERSISTENT_DELETION_VECTORS)) {
39+
// https://github.com/NVIDIA/spark-rapids/issues/8654
40+
meta.willNotWorkOnGpu("Deletion vector writes are not supported on GPU")
41+
}
42+
}
2543
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/*
2+
* Copyright (c) 2025, NVIDIA CORPORATION.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.nvidia.spark.rapids.delta.shims
18+
19+
import com.databricks.sql.transaction.tahoe.commands.DeletionVectorUtils
20+
import com.databricks.sql.transaction.tahoe.sources.DeltaSQLConf
21+
import com.nvidia.spark.rapids.delta.{UpdateCommandEdgeMeta, UpdateCommandMeta}
22+
23+
object UpdateCommandMetaShim {
24+
def tagForGpu(meta: UpdateCommandMeta): Unit = {
25+
val deltaLog = meta.updateCmd.tahoeFileIndex.deltaLog
26+
val dvFeatureEnabled =
27+
DeletionVectorUtils.deletionVectorsWritable(deltaLog.unsafeVolatileSnapshot)
28+
29+
if (dvFeatureEnabled && meta.updateCmd.conf.getConf(
30+
DeltaSQLConf.UPDATE_USE_PERSISTENT_DELETION_VECTORS)) {
31+
// https://github.com/NVIDIA/spark-rapids/issues/8654
32+
meta.willNotWorkOnGpu("Deletion vector writes are not supported on GPU")
33+
}
34+
}
35+
36+
def tagForGpu(meta: UpdateCommandEdgeMeta): Unit = {
37+
val deltaLog = meta.updateCmd.tahoeFileIndex.deltaLog
38+
val dvFeatureEnabled =
39+
DeletionVectorUtils.deletionVectorsWritable(deltaLog.unsafeVolatileSnapshot)
40+
41+
if (dvFeatureEnabled && meta.updateCmd.conf.getConf(
42+
DeltaSQLConf.UPDATE_USE_PERSISTENT_DELETION_VECTORS)) {
43+
// https://github.com/NVIDIA/spark-rapids/issues/8654
44+
meta.willNotWorkOnGpu("Deletion vector writes are not supported on GPU")
45+
}
46+
}
47+
}

docs/archive.md

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,97 @@ nav_order: 15
55
---
66
Below are archived releases for RAPIDS Accelerator for Apache Spark.
77

8+
## Release v25.02.1
9+
### Hardware Requirements:
10+
11+
The plugin is tested on the following architectures:
12+
13+
GPU Models: NVIDIA V100, T4, A10/A100, L4, H100 and B100 GPUs
14+
15+
### Software Requirements:
16+
17+
OS: Spark RAPIDS is compatible with any Linux distribution with glibc >= 2.28 (Please check ldd --version output). glibc 2.28 was released August 1, 2018.
18+
Tested on Ubuntu 20.04, Ubuntu 22.04, Rocky Linux 8 and Rocky Linux 9
19+
20+
NVIDIA Driver*: R470+
21+
22+
Runtime:
23+
Scala 2.12, 2.13
24+
Python, Java Virtual Machine (JVM) compatible with your spark-version.
25+
26+
* Check the Spark documentation for Python and Java version compatibility with your specific
27+
Spark version. For instance, visit `https://spark.apache.org/docs/3.4.1` for Spark 3.4.1.
28+
29+
Supported Spark versions:
30+
Apache Spark 3.2.0, 3.2.1, 3.2.2, 3.2.3, 3.2.4
31+
Apache Spark 3.3.0, 3.3.1, 3.3.2, 3.3.3, 3.3.4
32+
Apache Spark 3.4.0, 3.4.1, 3.4.2, 3.4.3, 3.4.4
33+
Apache Spark 3.5.0, 3.5.1, 3.5.2, 3.5.3, 3.5.4, 3.5.5
34+
35+
Supported Databricks runtime versions for Azure and AWS:
36+
Databricks 11.3 ML LTS (GPU, Scala 2.12, Spark 3.3.0)
37+
Databricks 12.2 ML LTS (GPU, Scala 2.12, Spark 3.3.2)
38+
Databricks 13.3 ML LTS (GPU, Scala 2.12, Spark 3.4.1)
39+
40+
Supported Dataproc versions (Debian/Ubuntu/Rocky):
41+
GCP Dataproc 2.1
42+
GCP Dataproc 2.2
43+
44+
Supported Dataproc Serverless versions:
45+
Spark runtime 1.1 LTS
46+
Spark runtime 2.0
47+
Spark runtime 2.1
48+
Spark runtime 2.2
49+
50+
*Some hardware may have a minimum driver version greater than R470. Check the GPU spec sheet
51+
for your hardware's minimum driver version.
52+
53+
*For Cloudera and EMR support, please refer to the
54+
[Distributions](https://docs.nvidia.com/spark-rapids/user-guide/latest/faq.html#which-distributions-are-supported) section of the FAQ.
55+
56+
### RAPIDS Accelerator's Support Policy for Apache Spark
57+
The RAPIDS Accelerator maintains support for Apache Spark versions available for download from [Apache Spark](https://spark.apache.org/downloads.html)
58+
59+
### Download RAPIDS Accelerator for Apache Spark v25.02.1
60+
61+
| Processor | Scala Version | Download Jar | Download Signature |
62+
|-----------|---------------|--------------|--------------------|
63+
| x86_64 | Scala 2.12 | [RAPIDS Accelerator v25.02.1](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/25.02.1/rapids-4-spark_2.12-25.02.1.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/25.02.1/rapids-4-spark_2.12-25.02.1.jar.asc) |
64+
| x86_64 | Scala 2.13 | [RAPIDS Accelerator v25.02.1](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/25.02.1/rapids-4-spark_2.13-25.02.1.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/25.02.1/rapids-4-spark_2.13-25.02.1.jar.asc) |
65+
| arm64 | Scala 2.12 | [RAPIDS Accelerator v25.02.1](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/25.02.1/rapids-4-spark_2.12-25.02.1-cuda11-arm64.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/25.02.1/rapids-4-spark_2.12-25.02.1-cuda11-arm64.jar.asc) |
66+
| arm64 | Scala 2.13 | [RAPIDS Accelerator v25.02.1](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/25.02.1/rapids-4-spark_2.13-25.02.1-cuda11-arm64.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/25.02.1/rapids-4-spark_2.13-25.02.1-cuda11-arm64.jar.asc) |
67+
68+
This package is built against CUDA 11.8. It is tested on V100, T4, A10, A100, L4 and H100 GPUs with
69+
CUDA 11.8 through CUDA 12.0.
70+
71+
### Verify signature
72+
* Download the [PUB_KEY](https://keys.openpgp.org/search?q=sw-spark@nvidia.com).
73+
* Import the public key: `gpg --import PUB_KEY`
74+
* Verify the signature for Scala 2.12 jar:
75+
`gpg --verify rapids-4-spark_2.12-25.02.1.jar.asc rapids-4-spark_2.12-25.02.1.jar`
76+
* Verify the signature for Scala 2.13 jar:
77+
`gpg --verify rapids-4-spark_2.13-25.02.1.jar.asc rapids-4-spark_2.13-25.02.1.jar`
78+
79+
The output of signature verify:
80+
81+
gpg: Good signature from "NVIDIA Spark (For the signature of spark-rapids release jars) <sw-spark@nvidia.com>"
82+
83+
### Release Notes
84+
* Support the Spark functions Bin and TruncDate
85+
* Support group-limit optimization for ROW_NUMBER
86+
* Improve Spark metrics: Print the batch size information to executor log
87+
* Refine filter push down to avoid double evaluation
88+
* Grab the GPU Semaphore when reading cached batch data with the GPU to avoid a GPU OOM case
89+
* Add an option to disable measuring buffer copy to improve large shuffle large partition serialization
90+
* For updates on RAPIDS Accelerator Tools, please visit [this link](https://github.com/NVIDIA/spark-rapids-tools/releases)
91+
* Upgraded statically linked CUDA toolkit to 12.8, which includes support for GB100 GPUs
92+
93+
Note: There is a known issue in the 25.02.1 release when decompressing gzip files on H100 GPUs.
94+
Please find more details in [issue-16661](https://github.com/rapidsai/cudf/issues/16661).
95+
96+
For a detailed list of changes, please refer to the
97+
[CHANGELOG](https://github.com/NVIDIA/spark-rapids/blob/main/CHANGELOG.md).
98+
899
## Release v25.02.0
9100
### Hardware Requirements:
10101

0 commit comments

Comments
 (0)