fix: resolve Scala 2.12 and Spark 3.4 compatibility issues in CI

derrickburns · claude · derrickburns · commit 3066b635eeff · 2025-10-17T22:29:40.000-07:00
Fixed multiple cross-version compatibility issues discovered during CI testing: 1. **isFinite compatibility**: Replaced Scala 2.13-only `.isFinite` with `java.lang.Double.isFinite()` for Scala 2.12 compatibility across all test files 2. **Spark API compatibility**: Replaced `model.summary.trainingCost` with `model.computeCost()` in PerformanceSanityCheck.scala for Spark 3.4 support 3. **Parallel collections**: Fixed CollectionConverters import in PerformanceTestSuite to use compat package for Scala 2.12/2.13 cross-compilation 4. **CI workflow fixes**: Added required arguments to PersistenceRoundTrip example calls in examples-run and persistence-cross jobs These changes ensure all CI jobs pass across the full test matrix: - Scala 2.12.18 / Spark 3.4.0 - Scala 2.12.18 / Spark 3.5.1 - Scala 2.13.14 / Spark 3.4.0 - Scala 2.13.14 / Spark 3.5.1 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -106,8 +106,10 @@ jobs:
         run: sbt ++${{ env.SCALA_213 }} "runMain examples.SoftKMeansExample"
       - name: Run XMeansExample
         run: sbt ++${{ env.SCALA_213 }} "runMain examples.XMeansExample"
-      - name: Run PersistenceRoundTrip
-        run: sbt ++${{ env.SCALA_213 }} "runMain examples.PersistenceRoundTrip"
+      - name: Run PersistenceRoundTrip (save)
+        run: sbt ++${{ env.SCALA_213 }} "runMain examples.PersistenceRoundTrip save /tmp/test-model"
+      - name: Run PersistenceRoundTrip (load)
+        run: sbt ++${{ env.SCALA_213 }} "runMain examples.PersistenceRoundTrip load /tmp/test-model"
 
   # Job 4: Cross-version persistence test (save in 2.12, load in 2.13, vice versa)
   persistence-cross:
@@ -130,16 +132,16 @@ jobs:
       - name: Save model with Scala 2.12
         run: |
           mkdir -p /tmp/persistence-test
-          sbt ++${{ env.SCALA_212 }} "runMain examples.PersistenceRoundTrip /tmp/persistence-test/model-212"
+          sbt ++${{ env.SCALA_212 }} "runMain examples.PersistenceRoundTrip save /tmp/persistence-test/model-212"
       - name: Load model with Scala 2.13
         run: |
-          sbt ++${{ env.SCALA_213 }} "runMain examples.PersistenceRoundTrip /tmp/persistence-test/model-212"
+          sbt ++${{ env.SCALA_213 }} "runMain examples.PersistenceRoundTrip load /tmp/persistence-test/model-212"
       - name: Save model with Scala 2.13
         run: |
-          sbt ++${{ env.SCALA_213 }} "runMain examples.PersistenceRoundTrip /tmp/persistence-test/model-213"
+          sbt ++${{ env.SCALA_213 }} "runMain examples.PersistenceRoundTrip save /tmp/persistence-test/model-213"
       - name: Load model with Scala 2.12
         run: |
-          sbt ++${{ env.SCALA_212 }} "runMain examples.PersistenceRoundTrip /tmp/persistence-test/model-213"
+          sbt ++${{ env.SCALA_212 }} "runMain examples.PersistenceRoundTrip load /tmp/persistence-test/model-213"
 
   # Job 5: Performance sanity check (ensure no major regression)
   perf-sanity:
diff --git a/src/test/scala/com/massivedatascience/clusterer/BisectingKMeansSuite.scala b/src/test/scala/com/massivedatascience/clusterer/BisectingKMeansSuite.scala
@@ -325,7 +325,7 @@ class BisectingKMeansSuite extends AnyFunSuite with BeforeAndAfterAll {
     val cost  = model.computeCost(df)
 
     // Cost should be positive and finite
-    assert(cost > 0.0 && cost.isFinite)
+    assert(cost > 0.0 && java.lang.Double.isFinite(cost))
   }
 
   test("Bisecting K-Means parameter validation should work") {
diff --git a/src/test/scala/com/massivedatascience/clusterer/PerformanceSanityCheck.scala b/src/test/scala/com/massivedatascience/clusterer/PerformanceSanityCheck.scala
@@ -73,14 +73,15 @@ object PerformanceSanityCheck {
       val model = kmeans.fit(data)
       val predictions = model.transform(data)
       val clusterCount = predictions.select("cluster").distinct().count()
+      val cost = model.computeCost(data)
 
       val elapsedTime = System.currentTimeMillis() - startTime
 
       // Verify results
       System.out.println(s"\nResults:")
       System.out.println(s"  Elapsed time: ${elapsedTime}ms")
       System.out.println(s"  Clusters found: $clusterCount")
-      System.out.println(s"  Cost: ${model.summary.trainingCost}")
+      System.out.println(s"  Cost: $cost")
 
       // Check for major regression
       if (elapsedTime > timeBudgetMs) {
@@ -100,7 +101,7 @@ object PerformanceSanityCheck {
         logFile.println(s"timestamp,${System.currentTimeMillis()}")
         logFile.println(s"elapsed_ms,$elapsedTime")
         logFile.println(s"clusters,$clusterCount")
-        logFile.println(s"cost,${model.summary.trainingCost}")
+        logFile.println(s"cost,$cost")
         logFile.println(s"budget_ms,$timeBudgetMs")
         logFile.println(s"passed,${elapsedTime <= timeBudgetMs}")
       } finally {
diff --git a/src/test/scala/com/massivedatascience/clusterer/PerformanceTestSuite.scala b/src/test/scala/com/massivedatascience/clusterer/PerformanceTestSuite.scala
@@ -23,7 +23,9 @@ import com.massivedatascience.linalg.WeightedVector
 import com.massivedatascience.transforms.Embedding
 import org.apache.spark.ml.linalg.Vectors
 import org.scalatest.funsuite.AnyFunSuite
-import scala.collection.parallel.CollectionConverters._
+
+// Cross-version parallel collections support via compat package
+import com.massivedatascience.clusterer.compat._
 
 class PerformanceTestSuite extends AnyFunSuite with LocalClusterSparkContext {
 
diff --git a/src/test/scala/com/massivedatascience/clusterer/SoftKMeansSuite.scala b/src/test/scala/com/massivedatascience/clusterer/SoftKMeansSuite.scala
@@ -224,11 +224,11 @@ class SoftKMeansSuite extends AnyFunSuite with BeforeAndAfterAll {
 
     // Hard cost
     val hardCost = model.computeCost(df)
-    assert(hardCost > 0.0 && hardCost.isFinite)
+    assert(hardCost > 0.0 && java.lang.Double.isFinite(hardCost))
 
     // Soft cost
     val softCost = model.computeSoftCost(df)
-    assert(softCost > 0.0 && softCost.isFinite)
+    assert(softCost > 0.0 && java.lang.Double.isFinite(softCost))
 
     // Soft cost should be <= hard cost (weighted average property)
     assert(softCost <= hardCost * 1.1) // Allow small numerical tolerance
diff --git a/src/test/scala/com/massivedatascience/clusterer/StreamingKMeansSuite.scala b/src/test/scala/com/massivedatascience/clusterer/StreamingKMeansSuite.scala
@@ -453,7 +453,7 @@ class StreamingKMeansSuite extends AnyFunSuite with BeforeAndAfterAll {
     val cost = model.computeCost(df)
 
     // Cost should be positive and finite
-    assert(cost > 0.0 && cost.isFinite)
+    assert(cost > 0.0 && java.lang.Double.isFinite(cost))
 
     // After update, cost on new similar data should be reasonable
     val batch = Seq(
@@ -465,7 +465,7 @@ class StreamingKMeansSuite extends AnyFunSuite with BeforeAndAfterAll {
     model.update(batchDF)
 
     val newCost = model.computeCost(batchDF)
-    assert(newCost > 0.0 && newCost.isFinite)
+    assert(newCost > 0.0 && java.lang.Double.isFinite(newCost))
   }
 
   test("StreamingKMeans parameter validation") {
diff --git a/src/test/scala/com/massivedatascience/clusterer/XMeansSuite.scala b/src/test/scala/com/massivedatascience/clusterer/XMeansSuite.scala
@@ -258,7 +258,7 @@ class XMeansSuite extends AnyFunSuite with BeforeAndAfterAll {
     val cost = model.computeCost(df)
 
     // Cost should be positive and finite
-    assert(cost > 0.0 && cost.isFinite)
+    assert(cost > 0.0 && java.lang.Double.isFinite(cost))
   }
 
   test("X-Means parameter validation") {

Original file line number	Diff line number	Diff line change
`@@ -325,7 +325,7 @@ class BisectingKMeansSuite extends AnyFunSuite with BeforeAndAfterAll {`
`325`	`325`	`val cost = model.computeCost(df)`
`326`	`326`
`327`	`327`	`// Cost should be positive and finite`
`328`		`- assert(cost > 0.0 && cost.isFinite)`
	`328`	`+ assert(cost > 0.0 && java.lang.Double.isFinite(cost))`
`329`	`329`	`}`
`330`	`330`
`331`	`331`	`test("Bisecting K-Means parameter validation should work") {`
Original file line number	Diff line number	Diff line change
`@@ -258,7 +258,7 @@ class XMeansSuite extends AnyFunSuite with BeforeAndAfterAll {`
`258`	`258`	`val cost = model.computeCost(df)`
`259`	`259`
`260`	`260`	`// Cost should be positive and finite`
`261`		`- assert(cost > 0.0 && cost.isFinite)`
	`261`	`+ assert(cost > 0.0 && java.lang.Double.isFinite(cost))`
`262`	`262`	`}`
`263`	`263`
`264`	`264`	`test("X-Means parameter validation") {`