Skip to content

Commit d8d533d

Browse files
authored
[Build] Remove the delta-hudi and delta-contribs spark suffix for artifacts. (#6090)
#### Which Delta project/connector is this regarding? - [x] Spark - [ ] Standalone - [ ] Flink - [ ] Kernel - [ ] Other (fill in here) ## Description Currently, after we publish the `./build/sbt release` artifacts to the staging service, and it has the following artifacts ``` io/delta/delta-connect-client_2.13/4.1.0/ io/delta/delta-connect-client_4.0_2.13/4.1.0/ io/delta/delta-connect-client_4.1_2.13/4.1.0/ io/delta/delta-connect-common_2.13/4.1.0/ io/delta/delta-connect-common_4.0_2.13/4.1.0/ io/delta/delta-connect-common_4.1_2.13/4.1.0/ io/delta/delta-connect-server_2.13/4.1.0/ io/delta/delta-connect-server_4.0_2.13/4.1.0/ io/delta/delta-connect-server_4.1_2.13/4.1.0/ io/delta/delta-contribs_2.13/4.1.0/ io/delta/delta-contribs_4.0_2.13/4.1.0/ io/delta/delta-contribs_4.1_2.13/4.1.0/ io/delta/delta-hudi_2.13/4.1.0/ io/delta/delta-hudi_4.0_2.13/4.1.0/ io/delta/delta-hudi_4.1_2.13/4.1.0/ io/delta/delta-iceberg_2.13/4.1.0/ io/delta/delta-kernel-api/4.1.0/ io/delta/delta-kernel-defaults/4.1.0/ io/delta/delta-kernel-unitycatalog/4.1.0/ io/delta/delta-sharing-spark_2.13/4.1.0/ io/delta/delta-sharing-spark_4.0_2.13/4.1.0/ io/delta/delta-sharing-spark_4.1_2.13/4.1.0/ io/delta/delta-spark_2.13/4.1.0/ io/delta/delta-spark_4.0_2.13/4.1.0/ io/delta/delta-spark_4.1_2.13/4.1.0/ io/delta/delta-storage/4.1.0/ io/delta/delta-storage-s3-dynamodb/4.1.0/ ``` For `delta-hudi`, we should remove the `_4.0` and `_4.1` suffix , since we don't need the spark suffix for `delta-hudi`. For `delta-contribs`, it only have two `LogStore` classes inside it, we should also remove the spark suffix for them. ## How was this patch tested? Use the `test_cross_spark_publish.py` to test this. ## Does this PR introduce _any_ user-facing changes? Yes. remove the spark suffix for `delta-hudi`. then `delta-spark_4.0_2.13` artifacts won't be available any more. --------- Signed-off-by: openinx <openinx@gmail.com>
1 parent 2d9a652 commit d8d533d

File tree

3 files changed

+52
-24
lines changed

3 files changed

+52
-24
lines changed

build.sbt

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -662,7 +662,10 @@ lazy val contribs = (project in file("contribs"))
662662
commonSettings,
663663
scalaStyleSettings,
664664
releaseSettings,
665-
CrossSparkVersions.sparkDependentModuleName(sparkVersion),
665+
// Set sparkVersion directly (not sparkDependentModuleName) so that
666+
// runOnlyForReleasableSparkModules discovers this module, but without adding a Spark
667+
// suffix to the artifact name. delta-contribs is only published as delta-contribs_2.13.
668+
sparkVersion := CrossSparkVersions.getSparkVersion(),
666669
Compile / packageBin / mappings := (Compile / packageBin / mappings).value ++
667670
listPythonFiles(baseDirectory.value.getParentFile / "python"),
668671

@@ -1315,15 +1318,30 @@ lazy val hudi = (project in file("hudi"))
13151318
commonSettings,
13161319
scalaStyleSettings,
13171320
releaseSettings,
1318-
CrossSparkVersions.sparkDependentSettings(sparkVersion),
1319-
libraryDependencies ++= Seq(
1320-
"org.apache.hudi" % "hudi-java-client" % "0.15.0" % "compile" excludeAll(
1321-
ExclusionRule(organization = "org.apache.hadoop"),
1322-
ExclusionRule(organization = "org.apache.zookeeper"),
1323-
),
1324-
"org.apache.spark" %% "spark-avro" % sparkVersion.value % "test" excludeAll ExclusionRule(organization = "org.apache.hadoop"),
1325-
"org.apache.parquet" % "parquet-avro" % "1.12.3" % "compile"
1326-
),
1321+
// Set sparkVersion directly (not sparkDependentModuleName) so that
1322+
// runOnlyForReleasableSparkModules discovers this module, but without adding a Spark
1323+
// suffix to the artifact name. delta-hudi is only published as delta-hudi_2.13.
1324+
sparkVersion := CrossSparkVersions.getSparkVersion(),
1325+
libraryDependencies ++= {
1326+
if (supportHudi) {
1327+
Seq(
1328+
"org.apache.hudi" % "hudi-java-client" % "0.15.0" % "compile" excludeAll(
1329+
ExclusionRule(organization = "org.apache.hadoop"),
1330+
ExclusionRule(organization = "org.apache.zookeeper"),
1331+
),
1332+
"org.apache.spark" %% "spark-avro" % sparkVersion.value % "test" excludeAll ExclusionRule(organization = "org.apache.hadoop"),
1333+
"org.apache.parquet" % "parquet-avro" % "1.12.3" % "compile"
1334+
)
1335+
} else {
1336+
Seq.empty
1337+
}
1338+
},
1339+
// Skip compilation and publishing when supportHudi is false
1340+
Compile / skip := !supportHudi,
1341+
Test / skip := !supportHudi,
1342+
publish / skip := !supportHudi,
1343+
publishLocal / skip := !supportHudi,
1344+
publishM2 / skip := !supportHudi,
13271345
assembly / assemblyJarName := s"${name.value}-assembly_${scalaBinaryVersion.value}-${version.value}.jar",
13281346
assembly / logLevel := Level.Info,
13291347
assembly / test := {},

project/CrossSparkVersions.scala

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ import Unidoc._
8282
* 1. Spark-Dependent Published Modules:
8383
* - Use CrossSparkVersions.sparkDependentSettings(sparkVersion)
8484
* - Include releaseSettings (publishable)
85-
* - Examples: delta-spark, delta-connect-*, delta-sharing-spark, delta-iceberg
85+
* - Examples: delta-spark, delta-connect-*, delta-sharing-spark, delta-iceberg, delta-hudi, delta-contribs
8686
* - These modules get version-specific artifact names for non-default Spark versions
8787
* - Automatically included in cross-Spark publishing
8888
*
@@ -277,7 +277,6 @@ object SparkVersionSpec {
277277
targetJvm = "17",
278278
additionalSourceDir = Some("scala-shims/spark-4.1"),
279279
supportIceberg = false,
280-
supportHudi = false,
281280
antlr4Version = "4.13.1",
282281
additionalJavaOptions = java17TestSettings,
283282
jacksonVersion = "2.18.2"
@@ -288,7 +287,6 @@ object SparkVersionSpec {
288287
targetJvm = "17",
289288
additionalSourceDir = Some("scala-shims/spark-4.2"),
290289
supportIceberg = false,
291-
supportHudi = false,
292290
antlr4Version = "4.13.1",
293291
additionalJavaOptions = java17TestSettings,
294292
jacksonVersion = "2.18.2",

project/tests/test_cross_spark_publish.py

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,6 @@
3535
"delta-connect-client{suffix}_2.13-{version}.jar",
3636
"delta-connect-server{suffix}_2.13-{version}.jar",
3737
"delta-sharing-spark{suffix}_2.13-{version}.jar",
38-
"delta-contribs{suffix}_2.13-{version}.jar",
39-
"delta-hudi{suffix}_2.13-{version}.jar",
4038
]
4139

4240
# Iceberg-related modules - only built for Spark versions with supportIceberg=true
@@ -45,15 +43,21 @@
4543
"delta-iceberg_2.13-{version}.jar",
4644
]
4745

46+
# Hudi-related modules - only built for Spark versions with supportHudi=true
47+
# delta-hudi has no Spark suffix (always delta-hudi_2.13)
48+
DELTA_HUDI_JAR_TEMPLATES = [
49+
"delta-hudi_2.13-{version}.jar",
50+
]
51+
4852
# Non-spark-related modules (built once, same for all Spark versions)
4953
# Template format: {version} = Delta version (e.g., "3.4.0-SNAPSHOT")
5054
NON_SPARK_RELATED_JAR_TEMPLATES = [
51-
# Java-only modules (no Scala version)
5255
"delta-storage-{version}.jar",
5356
"delta-kernel-api-{version}.jar",
5457
"delta-kernel-defaults-{version}.jar",
5558
"delta-storage-s3-dynamodb-{version}.jar",
56-
"delta-kernel-unitycatalog-{version}.jar"
59+
"delta-kernel-unitycatalog-{version}.jar",
60+
"delta-contribs_2.13-{version}.jar",
5761
]
5862

5963

@@ -65,6 +69,7 @@ class SparkVersionSpec:
6569
"""
6670
suffix: str # e.g., "" for default, "_X.Y" for other versions
6771
support_iceberg: bool = False # Whether this Spark version supports iceberg integration
72+
support_hudi: bool = True # Whether this Spark version supports hudi integration
6873

6974
def __post_init__(self):
7075
"""Generate JAR templates with the suffix applied."""
@@ -80,22 +85,28 @@ def __post_init__(self):
8085
else:
8186
self.iceberg_jars = []
8287

88+
# Hudi JARs have no Spark suffix (always delta-hudi_2.13)
89+
if self.support_hudi:
90+
self.hudi_jars = list(DELTA_HUDI_JAR_TEMPLATES)
91+
else:
92+
self.hudi_jars = []
93+
8394
# Non-Spark-related JAR templates are the same for all Spark versions
8495
self.non_spark_related_jars = list(NON_SPARK_RELATED_JAR_TEMPLATES)
8596

8697
@property
8798
def all_jars(self) -> List[str]:
88-
"""All JAR templates for this Spark version (Spark-related + non-Spark-related + iceberg if supported)."""
89-
return self.spark_related_jars + self.non_spark_related_jars + self.iceberg_jars
99+
"""All JAR templates for this Spark version."""
100+
return self.spark_related_jars + self.non_spark_related_jars + self.iceberg_jars + self.hudi_jars
90101

91102

92103
# Spark versions to test (key = full version string, value = spec with suffix)
93104
# By default, ALL versions get a Spark suffix (e.g., delta-spark_4.0_2.13)
94105
# skipSparkSuffix=true removes the suffix (used during release for backward compat)
95106
# These should mirror CrossSparkVersions.scala
96107
SPARK_VERSIONS: Dict[str, SparkVersionSpec] = {
97-
"4.0.1": SparkVersionSpec(suffix="_4.0", support_iceberg=True),
98-
"4.1.0": SparkVersionSpec(suffix="_4.1", support_iceberg=False)
108+
"4.0.1": SparkVersionSpec(suffix="_4.0", support_iceberg=True, support_hudi=True),
109+
"4.1.0": SparkVersionSpec(suffix="_4.1", support_iceberg=False, support_hudi=True)
99110
}
100111

101112
# The default Spark version
@@ -223,7 +234,7 @@ def test_backward_compat_publish(self) -> bool:
223234
# Create a spec without suffix for backward compatibility
224235
# Uses the same iceberg support as the default Spark version
225236
default_spark_spec = SPARK_VERSIONS[DEFAULT_SPARK]
226-
spark_spec_no_suffix = SparkVersionSpec(suffix="", support_iceberg=default_spark_spec.support_iceberg)
237+
spark_spec_no_suffix = SparkVersionSpec(suffix="", support_iceberg=default_spark_spec.support_iceberg, support_hudi=default_spark_spec.support_hudi)
227238

228239
print("\n" + "="*70)
229240
print(f"TEST: skipSparkSuffix=true (backward compatibility - no suffix)")
@@ -271,12 +282,12 @@ def test_cross_spark_workflow(self) -> bool:
271282
# Build expected JARs:
272283
# 1. All modules WITHOUT suffix (from Step 1 - backward compat)
273284
# 2. Spark-dependent modules WITH suffix for each non-master version (from Step 2)
274-
# 3. Iceberg JARs for supported versions (with suffix only)
285+
# 3. Iceberg/Hudi JARs for supported versions (no Spark suffix)
275286
expected = set()
276287

277288
# Step 1: All modules without suffix (uses default Spark version's iceberg support)
278289
default_spark_spec = SPARK_VERSIONS[DEFAULT_SPARK]
279-
no_suffix_spec = SparkVersionSpec(suffix="", support_iceberg=default_spark_spec.support_iceberg)
290+
no_suffix_spec = SparkVersionSpec(suffix="", support_iceberg=default_spark_spec.support_iceberg, support_hudi=default_spark_spec.support_hudi)
280291
expected.update(substitute_xversion(no_suffix_spec.all_jars, self.delta_version))
281292

282293
# Step 2: Spark-dependent modules WITH suffix for each non-master version
@@ -286,6 +297,7 @@ def test_cross_spark_workflow(self) -> bool:
286297

287298
expected.update(substitute_xversion(spark_spec.spark_related_jars, self.delta_version))
288299
expected.update(substitute_xversion(spark_spec.iceberg_jars, self.delta_version))
300+
expected.update(substitute_xversion(spark_spec.hudi_jars, self.delta_version))
289301

290302
return self.validate_jars(expected, "Cross-Spark Workflow")
291303

0 commit comments

Comments
 (0)