2020import json
2121import subprocess
2222import sys
23+ import xml .etree .ElementTree as ET
2324from dataclasses import dataclass
2425from pathlib import Path
2526from typing import List , Set , Dict
5657 "delta-kernel-unitycatalog-{version}.jar"
5758]
5859
60+ # Kernel modules that must appear as dependencies in delta-spark's POM.
61+ # These are transitive through sparkV2 (an internal module filtered from the POM),
62+ # so they are explicitly added by pomPostProcess in build.sbt.
63+ DELTA_SPARK_EXPECTED_POM_DEPS = [
64+ "delta-kernel-api" ,
65+ "delta-kernel-defaults" ,
66+ "delta-kernel-unitycatalog" ,
67+ ]
68+
5969
6070@dataclass
6171class SparkVersionSpec :
@@ -201,6 +211,45 @@ def validate_jars(self, expected: Set[str], test_name: str) -> bool:
201211
202212 return False
203213
214+ def validate_delta_spark_pom (self , spark_spec : 'SparkVersionSpec' , test_name : str ) -> bool :
215+ """Validates that delta-spark's POM includes kernel module dependencies."""
216+ m2_repo = Path .home () / ".m2" / "repository" / "io" / "delta"
217+
218+ # delta-spark artifact name includes Spark suffix and Scala version
219+ artifact_name = f"delta-spark{ spark_spec .suffix } _{ self .scala_version } "
220+ pom_dir = m2_repo / artifact_name / self .delta_version
221+ pom_file = pom_dir / f"{ artifact_name } -{ self .delta_version } .pom"
222+
223+ if not pom_file .exists ():
224+ print (f"✗ { test_name } POM - File not found: { pom_file } " )
225+ return False
226+
227+ try :
228+ tree = ET .parse (pom_file )
229+ root = tree .getroot ()
230+ ns = {"m" : "http://maven.apache.org/POM/4.0.0" }
231+
232+ found_artifact_ids = set ()
233+ for dep in root .findall (".//m:dependencies/m:dependency" , ns ):
234+ aid = dep .find ("m:artifactId" , ns )
235+ if aid is not None :
236+ found_artifact_ids .add (aid .text )
237+
238+ missing = [d for d in DELTA_SPARK_EXPECTED_POM_DEPS if d not in found_artifact_ids ]
239+
240+ if not missing :
241+ print (f"✓ { test_name } POM - All kernel dependencies present in { artifact_name } " )
242+ return True
243+
244+ print (f"✗ { test_name } POM - Missing kernel dependencies in { artifact_name } :" )
245+ for dep in missing :
246+ print (f" ✗ { dep } " )
247+ return False
248+
249+ except ET .ParseError as e :
250+ print (f"✗ { test_name } POM - Failed to parse { pom_file } : { e } " )
251+ return False
252+
204253 def test_default_publish (self ) -> bool :
205254 """Default publishM2 should publish ALL modules WITH Spark suffix."""
206255 spark_spec = SPARK_VERSIONS [DEFAULT_SPARK ]
@@ -219,7 +268,9 @@ def test_default_publish(self) -> bool:
219268
220269 # Default behavior: all Spark-dependent modules have suffix (e.g., delta-spark_4.0_2.13)
221270 expected = substitute_xversion (spark_spec .all_jars , self .delta_version )
222- return self .validate_jars (expected , "Default publishM2 (with suffix)" )
271+ jars_ok = self .validate_jars (expected , "Default publishM2 (with suffix)" )
272+ pom_ok = self .validate_delta_spark_pom (spark_spec , "Default publishM2 (with suffix)" )
273+ return jars_ok and pom_ok
223274
224275 def test_backward_compat_publish (self ) -> bool :
225276 """skipSparkSuffix=true should publish ALL modules WITHOUT Spark suffix."""
@@ -242,7 +293,10 @@ def test_backward_compat_publish(self) -> bool:
242293
243294 # Expect artifacts WITHOUT suffix (e.g., delta-spark_2.13 instead of delta-spark_4.0_2.13)
244295 expected = substitute_xversion (spark_spec_no_suffix .all_jars , self .delta_version )
245- return self .validate_jars (expected , "skipSparkSuffix=true (backward compat)" )
296+ jars_ok = self .validate_jars (expected , "skipSparkSuffix=true (backward compat)" )
297+ pom_ok = self .validate_delta_spark_pom (
298+ spark_spec_no_suffix , "skipSparkSuffix=true (backward compat)" )
299+ return jars_ok and pom_ok
246300
247301 def test_cross_spark_workflow (self ) -> bool :
248302 """Full cross-Spark workflow: backward-compat (no suffix) + all versions (with suffix)."""
@@ -290,7 +344,19 @@ def test_cross_spark_workflow(self) -> bool:
290344 expected .update (substitute_xversion (spark_spec .spark_related_jars , self .delta_version ))
291345 expected .update (substitute_xversion (spark_spec .iceberg_jars , self .delta_version ))
292346
293- return self .validate_jars (expected , "Cross-Spark Workflow" )
347+ jars_ok = self .validate_jars (expected , "Cross-Spark Workflow" )
348+
349+ # Validate POM for each Spark version's delta-spark artifact
350+ pom_ok = True
351+ pom_ok = self .validate_delta_spark_pom (
352+ no_suffix_spec , "Cross-Spark Workflow (no suffix)" ) and pom_ok
353+ for spark_version , spark_spec in SPARK_VERSIONS .items ():
354+ if "SNAPSHOT" in spark_version :
355+ continue
356+ pom_ok = self .validate_delta_spark_pom (
357+ spark_spec , f"Cross-Spark Workflow ({ spark_version } )" ) and pom_ok
358+
359+ return jars_ok and pom_ok
294360
295361 def validate_spark_versions (self ) -> None :
296362 """
0 commit comments