Skip to content

Commit e0cacce

Browse files
committed
fix spark pom
1 parent ede7356 commit e0cacce

File tree

2 files changed

+96
-10
lines changed

2 files changed

+96
-10
lines changed

build.sbt

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -573,19 +573,39 @@ lazy val spark = (project in file("spark-unified"))
573573
allMappings.distinct
574574
},
575575

576-
// Exclude internal modules from published POM
576+
// Exclude internal modules from published POM and add kernel dependencies.
577+
// Kernel modules are transitive through sparkV2 (an internal module), so they
578+
// are lost when sparkV2 is filtered out. We re-add them explicitly here.
577579
pomPostProcess := { node =>
578580
val internalModules = internalModuleNames.value
581+
val ver = version.value
579582
import scala.xml._
580583
import scala.xml.transform._
584+
585+
def kernelDependencyNode(artifactId: String): Elem = {
586+
<dependency>
587+
<groupId>io.delta</groupId>
588+
<artifactId>{artifactId}</artifactId>
589+
<version>{ver}</version>
590+
</dependency>
591+
}
592+
593+
val kernelDeps = Seq(
594+
kernelDependencyNode("delta-kernel-api"),
595+
kernelDependencyNode("delta-kernel-defaults"),
596+
kernelDependencyNode("delta-kernel-unitycatalog")
597+
)
598+
581599
new RuleTransformer(new RewriteRule {
582600
override def transform(n: Node): Seq[Node] = n match {
583-
case e: Elem if e.label == "dependency" =>
584-
val artifactId = (e \ "artifactId").text
585-
// Check if artifactId starts with any internal module name
586-
// (e.g., "delta-spark-v1_4.1_2.13" starts with "delta-spark-v1")
587-
val isInternal = internalModules.exists(module => artifactId.startsWith(module))
588-
if (isInternal) Seq.empty else Seq(n)
601+
case e: Elem if e.label == "dependencies" =>
602+
val filtered = e.child.filter {
603+
case child: Elem if child.label == "dependency" =>
604+
val artifactId = (child \ "artifactId").text
605+
!internalModules.exists(module => artifactId.startsWith(module))
606+
case _ => true
607+
}
608+
Seq(e.copy(child = filtered ++ kernelDeps))
589609
case _ => Seq(n)
590610
}
591611
}).transform(node).head

project/tests/test_cross_spark_publish.py

Lines changed: 69 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import json
2121
import subprocess
2222
import sys
23+
import xml.etree.ElementTree as ET
2324
from dataclasses import dataclass
2425
from pathlib import Path
2526
from typing import List, Set, Dict
@@ -56,6 +57,15 @@
5657
"delta-kernel-unitycatalog-{version}.jar"
5758
]
5859

60+
# Kernel modules that must appear as dependencies in delta-spark's POM.
61+
# These are transitive through sparkV2 (an internal module filtered from the POM),
62+
# so they are explicitly added by pomPostProcess in build.sbt.
63+
DELTA_SPARK_EXPECTED_POM_DEPS = [
64+
"delta-kernel-api",
65+
"delta-kernel-defaults",
66+
"delta-kernel-unitycatalog",
67+
]
68+
5969

6070
@dataclass
6171
class SparkVersionSpec:
@@ -201,6 +211,45 @@ def validate_jars(self, expected: Set[str], test_name: str) -> bool:
201211

202212
return False
203213

214+
def validate_delta_spark_pom(self, spark_spec: 'SparkVersionSpec', test_name: str) -> bool:
215+
"""Validates that delta-spark's POM includes kernel module dependencies."""
216+
m2_repo = Path.home() / ".m2" / "repository" / "io" / "delta"
217+
218+
# delta-spark artifact name includes Spark suffix and Scala version
219+
artifact_name = f"delta-spark{spark_spec.suffix}_{self.scala_version}"
220+
pom_dir = m2_repo / artifact_name / self.delta_version
221+
pom_file = pom_dir / f"{artifact_name}-{self.delta_version}.pom"
222+
223+
if not pom_file.exists():
224+
print(f"✗ {test_name} POM - File not found: {pom_file}")
225+
return False
226+
227+
try:
228+
tree = ET.parse(pom_file)
229+
root = tree.getroot()
230+
ns = {"m": "http://maven.apache.org/POM/4.0.0"}
231+
232+
found_artifact_ids = set()
233+
for dep in root.findall(".//m:dependencies/m:dependency", ns):
234+
aid = dep.find("m:artifactId", ns)
235+
if aid is not None:
236+
found_artifact_ids.add(aid.text)
237+
238+
missing = [d for d in DELTA_SPARK_EXPECTED_POM_DEPS if d not in found_artifact_ids]
239+
240+
if not missing:
241+
print(f"✓ {test_name} POM - All kernel dependencies present in {artifact_name}")
242+
return True
243+
244+
print(f"✗ {test_name} POM - Missing kernel dependencies in {artifact_name}:")
245+
for dep in missing:
246+
print(f" ✗ {dep}")
247+
return False
248+
249+
except ET.ParseError as e:
250+
print(f"✗ {test_name} POM - Failed to parse {pom_file}: {e}")
251+
return False
252+
204253
def test_default_publish(self) -> bool:
205254
"""Default publishM2 should publish ALL modules WITH Spark suffix."""
206255
spark_spec = SPARK_VERSIONS[DEFAULT_SPARK]
@@ -219,7 +268,9 @@ def test_default_publish(self) -> bool:
219268

220269
# Default behavior: all Spark-dependent modules have suffix (e.g., delta-spark_4.0_2.13)
221270
expected = substitute_xversion(spark_spec.all_jars, self.delta_version)
222-
return self.validate_jars(expected, "Default publishM2 (with suffix)")
271+
jars_ok = self.validate_jars(expected, "Default publishM2 (with suffix)")
272+
pom_ok = self.validate_delta_spark_pom(spark_spec, "Default publishM2 (with suffix)")
273+
return jars_ok and pom_ok
223274

224275
def test_backward_compat_publish(self) -> bool:
225276
"""skipSparkSuffix=true should publish ALL modules WITHOUT Spark suffix."""
@@ -242,7 +293,10 @@ def test_backward_compat_publish(self) -> bool:
242293

243294
# Expect artifacts WITHOUT suffix (e.g., delta-spark_2.13 instead of delta-spark_4.0_2.13)
244295
expected = substitute_xversion(spark_spec_no_suffix.all_jars, self.delta_version)
245-
return self.validate_jars(expected, "skipSparkSuffix=true (backward compat)")
296+
jars_ok = self.validate_jars(expected, "skipSparkSuffix=true (backward compat)")
297+
pom_ok = self.validate_delta_spark_pom(
298+
spark_spec_no_suffix, "skipSparkSuffix=true (backward compat)")
299+
return jars_ok and pom_ok
246300

247301
def test_cross_spark_workflow(self) -> bool:
248302
"""Full cross-Spark workflow: backward-compat (no suffix) + all versions (with suffix)."""
@@ -290,7 +344,19 @@ def test_cross_spark_workflow(self) -> bool:
290344
expected.update(substitute_xversion(spark_spec.spark_related_jars, self.delta_version))
291345
expected.update(substitute_xversion(spark_spec.iceberg_jars, self.delta_version))
292346

293-
return self.validate_jars(expected, "Cross-Spark Workflow")
347+
jars_ok = self.validate_jars(expected, "Cross-Spark Workflow")
348+
349+
# Validate POM for each Spark version's delta-spark artifact
350+
pom_ok = True
351+
pom_ok = self.validate_delta_spark_pom(
352+
no_suffix_spec, "Cross-Spark Workflow (no suffix)") and pom_ok
353+
for spark_version, spark_spec in SPARK_VERSIONS.items():
354+
if "SNAPSHOT" in spark_version:
355+
continue
356+
pom_ok = self.validate_delta_spark_pom(
357+
spark_spec, f"Cross-Spark Workflow ({spark_version})") and pom_ok
358+
359+
return jars_ok and pom_ok
294360

295361
def validate_spark_versions(self) -> None:
296362
"""

0 commit comments

Comments
 (0)