Skip to content

Commit f04c9aa

Browse files
committed
Updated ci.yml
1 parent 8d1f8f5 commit f04c9aa

File tree

1 file changed

+164
-55
lines changed

1 file changed

+164
-55
lines changed

.github/workflows/ci.yml

Lines changed: 164 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,3 @@
1-
# .github/workflows/ci.yml
2-
# This workflow is refactored to match the "Validation DAG" principle:
3-
# Build once (implicitly in each job) and validate concerns in parallel.
4-
51
name: CI
62

73
on:
@@ -10,106 +6,219 @@ on:
106
pull_request:
117
branches: [ master, main ]
128

9+
concurrency:
10+
group: ${{ github.workflow }}-${{ github.ref }}
11+
cancel-in-progress: true
12+
13+
env:
14+
DEFAULT_SPARK: "3.5.1"
15+
PYSPARK_PIN: "3.5.1"
16+
JAVA_VERSION: "17"
17+
SCALA_212: "2.12.18"
18+
SCALA_213: "2.13.14"
19+
SBT_OPTS: "-Xms2g -Xmx4g -XX:+UseG1GC -Dsbt.log.noformat=true"
20+
1321
jobs:
14-
# Job 1: Linting and Code Style (Fast, Independent Check)
22+
# Job 0: Linting and Code Style (fast gate)
1523
lint:
1624
runs-on: ubuntu-latest
1725
name: Lint & Style Check
1826
steps:
1927
- uses: actions/checkout@v4
2028
- uses: actions/setup-java@v4
2129
with:
22-
distribution: 'temurin'
23-
java-version: '17'
30+
distribution: temurin
31+
java-version: ${{ env.JAVA_VERSION }}
32+
cache: sbt
2433
- uses: sbt/setup-sbt@v1
25-
- uses: actions/cache@v4
26-
with:
27-
path: |
28-
~/.ivy2/cache
29-
~/.sbt
30-
~/.cache/coursier
31-
key: ${{ runner.os }}-sbt-lint-${{ hashFiles('**/build.sbt', '**/project/build.properties') }}
32-
restore-keys: |
33-
${{ runner.os }}-sbt-lint-
34-
3534
- name: Check Formatting and Style
3635
run: |
37-
sbt ++2.13.14 scalafmtCheckAll
38-
sbt ++2.13.14 scalastyle
36+
sbt ++${{ env.SCALA_213 }} scalafmtCheckAll
37+
sbt ++${{ env.SCALA_213 }} scalastyle
38+
39+
# Job 1: Build once (produce 2.12 & 2.13 jars for reuse)
40+
build:
41+
runs-on: ubuntu-latest
42+
needs: lint
43+
name: Build JARs (2.12 & 2.13)
44+
steps:
45+
- uses: actions/checkout@v4
46+
- uses: actions/setup-java@v4
47+
with:
48+
distribution: temurin
49+
java-version: ${{ env.JAVA_VERSION }}
50+
cache: sbt
51+
- uses: sbt/setup-sbt@v1
52+
- name: Package 2.12 (Spark ${{ env.DEFAULT_SPARK }})
53+
run: sbt ++${{ env.SCALA_212 }} -Dspark.version=${{ env.DEFAULT_SPARK }} clean package
54+
- name: Package 2.13 (Spark ${{ env.DEFAULT_SPARK }})
55+
run: sbt ++${{ env.SCALA_213 }} -Dspark.version=${{ env.DEFAULT_SPARK }} clean package
56+
- name: Upload JARs
57+
uses: actions/upload-artifact@v4
58+
with:
59+
name: jars
60+
path: |
61+
target/scala-2.12/*.jar
62+
target/scala-2.13/*.jar
63+
if-no-files-found: error
3964

40-
# Job 2: Core JVM Tests across the full matrix (The Longest Job)
65+
# Job 2: Core JVM tests across matrix (re-compiles per combo; ok)
4166
test-jvm:
4267
runs-on: ubuntu-latest
43-
needs: lint # Optional: ensure style passes before running long tests
68+
needs: build
4469
strategy:
4570
fail-fast: false
4671
matrix:
47-
java-version: [11, 17]
48-
scala-version: ['2.13.14', '2.12.18'] # 2.13 is now the default
49-
spark-version: ['3.4.3', '3.5.1']
72+
scala-version: [ '2.13.14', '2.12.18' ]
73+
spark-version: [ '3.4.3', '3.5.1' ]
74+
include:
75+
# Force Java 17 for Spark 3.5/3.4
76+
- java-version: '17'
5077
exclude:
78+
# Spark 3.4 typically ships with Scala 2.12 artifacts
5179
- scala-version: '2.13.14'
52-
spark-version: '3.4.3' # Spark 3.4 typically uses Scala 2.12
53-
54-
name: Test (Java ${{ matrix.java-version }}, Scala ${{ matrix.scala-version }}, Spark ${{ matrix.spark-version }})
80+
spark-version: '3.4.3'
81+
name: Test (Scala ${{ matrix.scala-version }}, Spark ${{ matrix.spark-version }})
5582
steps:
5683
- uses: actions/checkout@v4
5784
- uses: actions/setup-java@v4
5885
with:
59-
distribution: 'temurin'
86+
distribution: temurin
6087
java-version: ${{ matrix.java-version }}
88+
cache: sbt
6189
- uses: sbt/setup-sbt@v1
62-
- uses: actions/cache@v4
63-
with:
64-
path: |
65-
~/.ivy2/cache
66-
~/.sbt
67-
~/.cache/coursier
68-
key: ${{ runner.os }}-sbt-${{ matrix.scala-version }}-${{ matrix.spark-version }}-${{ hashFiles('**/build.sbt', '**/project/build.properties') }}
69-
restore-keys: |
70-
${{ runner.os }}-sbt-${{ matrix.scala-version }}-${{ matrix.spark-version }}-
71-
7290
- name: Run All JVM Tests
73-
# The 'test' command automatically compiles. No separate compile step needed.
74-
# Use -Dspark.version as per the final plan.
7591
run: sbt ++${{ matrix.scala-version }} -Dspark.version=${{ matrix.spark-version }} test
92+
- name: Preserve test reports
93+
if: always()
94+
run: |
95+
mkdir -p artifacts/test-reports/${{ matrix.scala-version }}_${{ matrix.spark-version }}
96+
(test -d target/test-reports && cp -r target/test-reports/* artifacts/test-reports/${{ matrix.scala-version }}_${{ matrix.spark-version }}/) || true
97+
- uses: actions/upload-artifact@v4
98+
if: always()
99+
with:
100+
name: test-reports-${{ matrix.scala-version }}-${{ matrix.spark-version }}
101+
path: artifacts/test-reports/${{ matrix.scala-version }}_${{ matrix.spark-version }}
76102

77-
# Job 3: Python Smoke Test (Independent Check)
103+
# Job 3: Python smoke test (downloads jar built once; uses non-SE divergence)
78104
test-python:
79105
runs-on: ubuntu-latest
80-
needs: lint
81-
name: Python Smoke Test (PySpark 3.5.1)
106+
needs: build
107+
name: Python Smoke Test (PySpark ${{ env.PYSPARK_PIN }})
108+
steps:
109+
- uses: actions/checkout@v4
110+
- uses: actions/setup-python@v5
111+
with:
112+
python-version: "3.11"
113+
- uses: actions/setup-java@v4
114+
with:
115+
distribution: temurin
116+
java-version: ${{ env.JAVA_VERSION }}
117+
- name: Download JARs
118+
uses: actions/download-artifact@v4
119+
with:
120+
name: jars
121+
path: jars
122+
- name: Install PySpark
123+
run: python -m pip install --upgrade pip && pip install pyspark==${{ env.PYSPARK_PIN }}
124+
- name: Run smoke (local[*], non-SE)
125+
run: |
126+
JAR_212=$(ls jars/*scala-2.12*.jar | head -n1)
127+
test -f "$JAR_212"
128+
spark-submit --jars "$JAR_212" python/smoke_test.py
129+
130+
# Job 4: Cross-version persistence round-trip
131+
persistence-cross:
132+
runs-on: ubuntu-latest
133+
needs: build
134+
name: Persistence Cross-Version (3.4 ↔ 3.5)
135+
steps:
136+
- uses: actions/checkout@v4
137+
- uses: actions/setup-java@v4
138+
with:
139+
distribution: temurin
140+
java-version: ${{ env.JAVA_VERSION }}
141+
cache: sbt
142+
- uses: sbt/setup-sbt@v1
143+
144+
- name: Save 3.4.x
145+
run: sbt ++${{ env.SCALA_213 }} -Dspark.version=3.4.3 "testOnly *PersistenceSuite -- -z save_3_4"
146+
- name: Load (3.4) on 3.5.x
147+
run: sbt ++${{ env.SCALA_213 }} -Dspark.version=3.5.1 "testOnly *PersistenceSuite -- -z load_3_4_in_3_5"
148+
149+
- name: Save 3.5.x
150+
run: sbt ++${{ env.SCALA_213 }} -Dspark.version=3.5.1 "testOnly *PersistenceSuite -- -z save_3_5"
151+
- name: Load (3.5) on 3.4.x
152+
run: sbt ++${{ env.SCALA_213 }} -Dspark.version=3.4.3 "testOnly *PersistenceSuite -- -z load_3_5_in_3_4"
153+
154+
# Job 5: Run Scala examples to keep docs executable
155+
run-examples:
156+
runs-on: ubuntu-latest
157+
needs: build
158+
name: Run Scala Examples
82159
steps:
83160
- uses: actions/checkout@v4
84161
- uses: actions/setup-java@v4
85162
with:
86-
distribution: 'temurin'
87-
java-version: '17' # Pin to a modern JDK
163+
distribution: temurin
164+
java-version: ${{ env.JAVA_VERSION }}
165+
cache: sbt
88166
- uses: sbt/setup-sbt@v1
89-
- name: Build Scala 2.12 JAR for PySpark
90-
run: sbt ++2.12.18 -Dspark.version=3.5.1 package
167+
- name: Execute examples (spark-shell -i)
168+
run: |
169+
# Adjust the list to match files present in your repo
170+
if [ -f examples/ml-wrapper.scala ]; then spark-shell -i examples/ml-wrapper.scala <<<'':; fi
171+
if [ -f examples/bisecting.scala ]; then spark-shell -i examples/bisecting.scala <<<'':; fi
172+
if [ -f examples/streaming.scala ]; then spark-shell -i examples/streaming.scala <<<'':; fi
91173
92-
- name: Run Python Smoke Test
174+
# Job 6: Perf sanity (prints a single metric line)
175+
perf-sanity:
176+
runs-on: ubuntu-latest
177+
needs: build
178+
name: Perf sanity
179+
steps:
180+
- uses: actions/checkout@v4
181+
- uses: actions/setup-java@v4
182+
with:
183+
distribution: temurin
184+
java-version: ${{ env.JAVA_VERSION }}
185+
cache: sbt
186+
- uses: sbt/setup-sbt@v1
187+
- name: Run perf sanity (expects 'perf_sanity_seconds=' in logs)
93188
run: |
94-
pip install pyspark==3.5.1
95-
spark-submit --jars target/scala-2.12/*generalized-kmeans*.jar python/smoke_test.py
189+
sbt ++${{ env.SCALA_213 }} -Dspark.version=${{ env.DEFAULT_SPARK }} "testOnly *GeneralizedKMeansSuite -- -z \"perf sanity\""
190+
(grep -R "perf_sanity_seconds=" -n target/test-reports || true)
96191
97-
# Job 4: Code Coverage (Independent Check, runs only once)
192+
# Job 7: Coverage (unchanged, runs once)
98193
coverage:
99194
runs-on: ubuntu-latest
100-
needs: lint
195+
needs: build
101196
name: Code Coverage
102197
steps:
103198
- uses: actions/checkout@v4
104199
- uses: actions/setup-java@v4
105200
with:
106-
distribution: 'temurin'
107-
java-version: '17'
201+
distribution: temurin
202+
java-version: ${{ env.JAVA_VERSION }}
203+
cache: sbt
108204
- uses: sbt/setup-sbt@v1
109205
- name: Generate Coverage Report
110-
run: sbt ++2.12.18 -Dspark.version=3.5.1 coverage test coverageReport
206+
run: sbt ++${{ env.SCALA_212 }} -Dspark.version=${{ env.DEFAULT_SPARK }} coverage test coverageReport
111207
- name: Upload coverage to Codecov
112208
uses: codecov/codecov-action@v4
113209
with:
114210
file: ./target/scala-2.12/scoverage-report/scoverage.xml
115211
fail_ci_if_error: false
212+
213+
# Final gate: only reports success if all validations pass
214+
release-ready:
215+
runs-on: ubuntu-latest
216+
needs:
217+
- test-jvm
218+
- test-python
219+
- persistence-cross
220+
- run-examples
221+
- perf-sanity
222+
- coverage
223+
steps:
224+
- run: echo "All validations passed."

0 commit comments

Comments
 (0)