Skip to content

Commit 3066b63

Browse files
derrickburnsclaude
andcommitted
fix: resolve Scala 2.12 and Spark 3.4 compatibility issues in CI
Fixed multiple cross-version compatibility issues discovered during CI testing: 1. **isFinite compatibility**: Replaced Scala 2.13-only `.isFinite` with `java.lang.Double.isFinite()` for Scala 2.12 compatibility across all test files 2. **Spark API compatibility**: Replaced `model.summary.trainingCost` with `model.computeCost()` in PerformanceSanityCheck.scala for Spark 3.4 support 3. **Parallel collections**: Fixed CollectionConverters import in PerformanceTestSuite to use compat package for Scala 2.12/2.13 cross-compilation 4. **CI workflow fixes**: Added required arguments to PersistenceRoundTrip example calls in examples-run and persistence-cross jobs These changes ensure all CI jobs pass across the full test matrix: - Scala 2.12.18 / Spark 3.4.0 - Scala 2.12.18 / Spark 3.5.1 - Scala 2.13.14 / Spark 3.4.0 - Scala 2.13.14 / Spark 3.5.1 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 42a69e2 commit 3066b63

File tree

7 files changed

+20
-15
lines changed

7 files changed

+20
-15
lines changed

.github/workflows/ci.yml

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,10 @@ jobs:
106106
run: sbt ++${{ env.SCALA_213 }} "runMain examples.SoftKMeansExample"
107107
- name: Run XMeansExample
108108
run: sbt ++${{ env.SCALA_213 }} "runMain examples.XMeansExample"
109-
- name: Run PersistenceRoundTrip
110-
run: sbt ++${{ env.SCALA_213 }} "runMain examples.PersistenceRoundTrip"
109+
- name: Run PersistenceRoundTrip (save)
110+
run: sbt ++${{ env.SCALA_213 }} "runMain examples.PersistenceRoundTrip save /tmp/test-model"
111+
- name: Run PersistenceRoundTrip (load)
112+
run: sbt ++${{ env.SCALA_213 }} "runMain examples.PersistenceRoundTrip load /tmp/test-model"
111113

112114
# Job 4: Cross-version persistence test (save in 2.12, load in 2.13, vice versa)
113115
persistence-cross:
@@ -130,16 +132,16 @@ jobs:
130132
- name: Save model with Scala 2.12
131133
run: |
132134
mkdir -p /tmp/persistence-test
133-
sbt ++${{ env.SCALA_212 }} "runMain examples.PersistenceRoundTrip /tmp/persistence-test/model-212"
135+
sbt ++${{ env.SCALA_212 }} "runMain examples.PersistenceRoundTrip save /tmp/persistence-test/model-212"
134136
- name: Load model with Scala 2.13
135137
run: |
136-
sbt ++${{ env.SCALA_213 }} "runMain examples.PersistenceRoundTrip /tmp/persistence-test/model-212"
138+
sbt ++${{ env.SCALA_213 }} "runMain examples.PersistenceRoundTrip load /tmp/persistence-test/model-212"
137139
- name: Save model with Scala 2.13
138140
run: |
139-
sbt ++${{ env.SCALA_213 }} "runMain examples.PersistenceRoundTrip /tmp/persistence-test/model-213"
141+
sbt ++${{ env.SCALA_213 }} "runMain examples.PersistenceRoundTrip save /tmp/persistence-test/model-213"
140142
- name: Load model with Scala 2.12
141143
run: |
142-
sbt ++${{ env.SCALA_212 }} "runMain examples.PersistenceRoundTrip /tmp/persistence-test/model-213"
144+
sbt ++${{ env.SCALA_212 }} "runMain examples.PersistenceRoundTrip load /tmp/persistence-test/model-213"
143145
144146
# Job 5: Performance sanity check (ensure no major regression)
145147
perf-sanity:

src/test/scala/com/massivedatascience/clusterer/BisectingKMeansSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,7 @@ class BisectingKMeansSuite extends AnyFunSuite with BeforeAndAfterAll {
325325
val cost = model.computeCost(df)
326326

327327
// Cost should be positive and finite
328-
assert(cost > 0.0 && cost.isFinite)
328+
assert(cost > 0.0 && java.lang.Double.isFinite(cost))
329329
}
330330

331331
test("Bisecting K-Means parameter validation should work") {

src/test/scala/com/massivedatascience/clusterer/PerformanceSanityCheck.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,14 +73,15 @@ object PerformanceSanityCheck {
7373
val model = kmeans.fit(data)
7474
val predictions = model.transform(data)
7575
val clusterCount = predictions.select("cluster").distinct().count()
76+
val cost = model.computeCost(data)
7677

7778
val elapsedTime = System.currentTimeMillis() - startTime
7879

7980
// Verify results
8081
System.out.println(s"\nResults:")
8182
System.out.println(s" Elapsed time: ${elapsedTime}ms")
8283
System.out.println(s" Clusters found: $clusterCount")
83-
System.out.println(s" Cost: ${model.summary.trainingCost}")
84+
System.out.println(s" Cost: $cost")
8485

8586
// Check for major regression
8687
if (elapsedTime > timeBudgetMs) {
@@ -100,7 +101,7 @@ object PerformanceSanityCheck {
100101
logFile.println(s"timestamp,${System.currentTimeMillis()}")
101102
logFile.println(s"elapsed_ms,$elapsedTime")
102103
logFile.println(s"clusters,$clusterCount")
103-
logFile.println(s"cost,${model.summary.trainingCost}")
104+
logFile.println(s"cost,$cost")
104105
logFile.println(s"budget_ms,$timeBudgetMs")
105106
logFile.println(s"passed,${elapsedTime <= timeBudgetMs}")
106107
} finally {

src/test/scala/com/massivedatascience/clusterer/PerformanceTestSuite.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@ import com.massivedatascience.linalg.WeightedVector
2323
import com.massivedatascience.transforms.Embedding
2424
import org.apache.spark.ml.linalg.Vectors
2525
import org.scalatest.funsuite.AnyFunSuite
26-
import scala.collection.parallel.CollectionConverters._
26+
27+
// Cross-version parallel collections support via compat package
28+
import com.massivedatascience.clusterer.compat._
2729

2830
class PerformanceTestSuite extends AnyFunSuite with LocalClusterSparkContext {
2931

src/test/scala/com/massivedatascience/clusterer/SoftKMeansSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,11 +224,11 @@ class SoftKMeansSuite extends AnyFunSuite with BeforeAndAfterAll {
224224

225225
// Hard cost
226226
val hardCost = model.computeCost(df)
227-
assert(hardCost > 0.0 && hardCost.isFinite)
227+
assert(hardCost > 0.0 && java.lang.Double.isFinite(hardCost))
228228

229229
// Soft cost
230230
val softCost = model.computeSoftCost(df)
231-
assert(softCost > 0.0 && softCost.isFinite)
231+
assert(softCost > 0.0 && java.lang.Double.isFinite(softCost))
232232

233233
// Soft cost should be <= hard cost (weighted average property)
234234
assert(softCost <= hardCost * 1.1) // Allow small numerical tolerance

src/test/scala/com/massivedatascience/clusterer/StreamingKMeansSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -453,7 +453,7 @@ class StreamingKMeansSuite extends AnyFunSuite with BeforeAndAfterAll {
453453
val cost = model.computeCost(df)
454454

455455
// Cost should be positive and finite
456-
assert(cost > 0.0 && cost.isFinite)
456+
assert(cost > 0.0 && java.lang.Double.isFinite(cost))
457457

458458
// After update, cost on new similar data should be reasonable
459459
val batch = Seq(
@@ -465,7 +465,7 @@ class StreamingKMeansSuite extends AnyFunSuite with BeforeAndAfterAll {
465465
model.update(batchDF)
466466

467467
val newCost = model.computeCost(batchDF)
468-
assert(newCost > 0.0 && newCost.isFinite)
468+
assert(newCost > 0.0 && java.lang.Double.isFinite(newCost))
469469
}
470470

471471
test("StreamingKMeans parameter validation") {

src/test/scala/com/massivedatascience/clusterer/XMeansSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ class XMeansSuite extends AnyFunSuite with BeforeAndAfterAll {
258258
val cost = model.computeCost(df)
259259

260260
// Cost should be positive and finite
261-
assert(cost > 0.0 && cost.isFinite)
261+
assert(cost > 0.0 && java.lang.Double.isFinite(cost))
262262
}
263263

264264
test("X-Means parameter validation") {

0 commit comments

Comments
 (0)