|
1 | | -# .github/workflows/ci.yml |
2 | | -# This workflow is refactored to match the "Validation DAG" principle: |
3 | | -# Build once (implicitly in each job) and validate concerns in parallel. |
4 | | - |
5 | 1 | name: CI |
6 | 2 |
|
7 | 3 | on: |
|
10 | 6 | pull_request: |
11 | 7 | branches: [ master, main ] |
12 | 8 |
|
| 9 | +concurrency: |
| 10 | + group: ${{ github.workflow }}-${{ github.ref }} |
| 11 | + cancel-in-progress: true |
| 12 | + |
| 13 | +env: |
| 14 | + DEFAULT_SPARK: "3.5.1" |
| 15 | + PYSPARK_PIN: "3.5.1" |
| 16 | + JAVA_VERSION: "17" |
| 17 | + SCALA_212: "2.12.18" |
| 18 | + SCALA_213: "2.13.14" |
| 19 | + SBT_OPTS: "-Xms2g -Xmx4g -XX:+UseG1GC -Dsbt.log.noformat=true" |
| 20 | + |
13 | 21 | jobs: |
14 | | - # Job 1: Linting and Code Style (Fast, Independent Check) |
| 22 | + # Job 0: Linting and Code Style (fast gate) |
15 | 23 | lint: |
16 | 24 | runs-on: ubuntu-latest |
17 | 25 | name: Lint & Style Check |
18 | 26 | steps: |
19 | 27 | - uses: actions/checkout@v4 |
20 | 28 | - uses: actions/setup-java@v4 |
21 | 29 | with: |
22 | | - distribution: 'temurin' |
23 | | - java-version: '17' |
| 30 | + distribution: temurin |
| 31 | + java-version: ${{ env.JAVA_VERSION }} |
| 32 | + cache: sbt |
24 | 33 | - uses: sbt/setup-sbt@v1 |
25 | | - - uses: actions/cache@v4 |
26 | | - with: |
27 | | - path: | |
28 | | - ~/.ivy2/cache |
29 | | - ~/.sbt |
30 | | - ~/.cache/coursier |
31 | | - key: ${{ runner.os }}-sbt-lint-${{ hashFiles('**/build.sbt', '**/project/build.properties') }} |
32 | | - restore-keys: | |
33 | | - ${{ runner.os }}-sbt-lint- |
34 | | -
|
35 | 34 | - name: Check Formatting and Style |
36 | 35 | run: | |
37 | | - sbt ++2.13.14 scalafmtCheckAll |
38 | | - sbt ++2.13.14 scalastyle |
| 36 | + sbt ++${{ env.SCALA_213 }} scalafmtCheckAll |
| 37 | + sbt ++${{ env.SCALA_213 }} scalastyle |
| 38 | +
|
| 39 | + # Job 1: Build once (produce 2.12 & 2.13 jars for reuse) |
| 40 | + build: |
| 41 | + runs-on: ubuntu-latest |
| 42 | + needs: lint |
| 43 | + name: Build JARs (2.12 & 2.13) |
| 44 | + steps: |
| 45 | + - uses: actions/checkout@v4 |
| 46 | + - uses: actions/setup-java@v4 |
| 47 | + with: |
| 48 | + distribution: temurin |
| 49 | + java-version: ${{ env.JAVA_VERSION }} |
| 50 | + cache: sbt |
| 51 | + - uses: sbt/setup-sbt@v1 |
| 52 | + - name: Package 2.12 (Spark ${{ env.DEFAULT_SPARK }}) |
| 53 | + run: sbt ++${{ env.SCALA_212 }} -Dspark.version=${{ env.DEFAULT_SPARK }} clean package |
| 54 | + - name: Package 2.13 (Spark ${{ env.DEFAULT_SPARK }}) |
| 55 | + run: sbt ++${{ env.SCALA_213 }} -Dspark.version=${{ env.DEFAULT_SPARK }} clean package |
| 56 | + - name: Upload JARs |
| 57 | + uses: actions/upload-artifact@v4 |
| 58 | + with: |
| 59 | + name: jars |
| 60 | + path: | |
| 61 | + target/scala-2.12/*.jar |
| 62 | + target/scala-2.13/*.jar |
| 63 | + if-no-files-found: error |
39 | 64 |
|
40 | | - # Job 2: Core JVM Tests across the full matrix (The Longest Job) |
| 65 | + # Job 2: Core JVM tests across matrix (re-compiles per combo; ok) |
41 | 66 | test-jvm: |
42 | 67 | runs-on: ubuntu-latest |
43 | | - needs: lint # Optional: ensure style passes before running long tests |
| 68 | + needs: build |
44 | 69 | strategy: |
45 | 70 | fail-fast: false |
46 | 71 | matrix: |
47 | | - java-version: [11, 17] |
48 | | - scala-version: ['2.13.14', '2.12.18'] # 2.13 is now the default |
49 | | - spark-version: ['3.4.3', '3.5.1'] |
| 72 | + scala-version: [ '2.13.14', '2.12.18' ] |
| 73 | + spark-version: [ '3.4.3', '3.5.1' ] |
| 74 | + include: |
| 75 | + # Force Java 17 for Spark 3.5/3.4 |
| 76 | + - java-version: '17' |
50 | 77 | exclude: |
| 78 | + # Spark 3.4 typically ships with Scala 2.12 artifacts |
51 | 79 | - scala-version: '2.13.14' |
52 | | - spark-version: '3.4.3' # Spark 3.4 typically uses Scala 2.12 |
53 | | - |
54 | | - name: Test (Java ${{ matrix.java-version }}, Scala ${{ matrix.scala-version }}, Spark ${{ matrix.spark-version }}) |
| 80 | + spark-version: '3.4.3' |
| 81 | + name: Test (Scala ${{ matrix.scala-version }}, Spark ${{ matrix.spark-version }}) |
55 | 82 | steps: |
56 | 83 | - uses: actions/checkout@v4 |
57 | 84 | - uses: actions/setup-java@v4 |
58 | 85 | with: |
59 | | - distribution: 'temurin' |
| 86 | + distribution: temurin |
60 | 87 | java-version: ${{ matrix.java-version }} |
| 88 | + cache: sbt |
61 | 89 | - uses: sbt/setup-sbt@v1 |
62 | | - - uses: actions/cache@v4 |
63 | | - with: |
64 | | - path: | |
65 | | - ~/.ivy2/cache |
66 | | - ~/.sbt |
67 | | - ~/.cache/coursier |
68 | | - key: ${{ runner.os }}-sbt-${{ matrix.scala-version }}-${{ matrix.spark-version }}-${{ hashFiles('**/build.sbt', '**/project/build.properties') }} |
69 | | - restore-keys: | |
70 | | - ${{ runner.os }}-sbt-${{ matrix.scala-version }}-${{ matrix.spark-version }}- |
71 | | -
|
72 | 90 | - name: Run All JVM Tests |
73 | | - # The 'test' command automatically compiles. No separate compile step needed. |
74 | | - # Use -Dspark.version as per the final plan. |
75 | 91 | run: sbt ++${{ matrix.scala-version }} -Dspark.version=${{ matrix.spark-version }} test |
| 92 | + - name: Preserve test reports |
| 93 | + if: always() |
| 94 | + run: | |
| 95 | + mkdir -p artifacts/test-reports/${{ matrix.scala-version }}_${{ matrix.spark-version }} |
| 96 | + (test -d target/test-reports && cp -r target/test-reports/* artifacts/test-reports/${{ matrix.scala-version }}_${{ matrix.spark-version }}/) || true |
| 97 | + - uses: actions/upload-artifact@v4 |
| 98 | + if: always() |
| 99 | + with: |
| 100 | + name: test-reports-${{ matrix.scala-version }}-${{ matrix.spark-version }} |
| 101 | + path: artifacts/test-reports/${{ matrix.scala-version }}_${{ matrix.spark-version }} |
76 | 102 |
|
77 | | - # Job 3: Python Smoke Test (Independent Check) |
| 103 | + # Job 3: Python smoke test (downloads jar built once; uses non-SE divergence) |
78 | 104 | test-python: |
79 | 105 | runs-on: ubuntu-latest |
80 | | - needs: lint |
81 | | - name: Python Smoke Test (PySpark 3.5.1) |
| 106 | + needs: build |
| 107 | + name: Python Smoke Test (PySpark ${{ env.PYSPARK_PIN }}) |
| 108 | + steps: |
| 109 | + - uses: actions/checkout@v4 |
| 110 | + - uses: actions/setup-python@v5 |
| 111 | + with: |
| 112 | + python-version: "3.11" |
| 113 | + - uses: actions/setup-java@v4 |
| 114 | + with: |
| 115 | + distribution: temurin |
| 116 | + java-version: ${{ env.JAVA_VERSION }} |
| 117 | + - name: Download JARs |
| 118 | + uses: actions/download-artifact@v4 |
| 119 | + with: |
| 120 | + name: jars |
| 121 | + path: jars |
| 122 | + - name: Install PySpark |
| 123 | + run: python -m pip install --upgrade pip && pip install pyspark==${{ env.PYSPARK_PIN }} |
| 124 | + - name: Run smoke (local[*], non-SE) |
| 125 | + run: | |
| 126 | + JAR_212=$(ls jars/*scala-2.12*.jar | head -n1) |
| 127 | + test -f "$JAR_212" |
| 128 | + spark-submit --jars "$JAR_212" python/smoke_test.py |
| 129 | +
|
| 130 | + # Job 4: Cross-version persistence round-trip |
| 131 | + persistence-cross: |
| 132 | + runs-on: ubuntu-latest |
| 133 | + needs: build |
| 134 | + name: Persistence Cross-Version (3.4 ↔ 3.5) |
| 135 | + steps: |
| 136 | + - uses: actions/checkout@v4 |
| 137 | + - uses: actions/setup-java@v4 |
| 138 | + with: |
| 139 | + distribution: temurin |
| 140 | + java-version: ${{ env.JAVA_VERSION }} |
| 141 | + cache: sbt |
| 142 | + - uses: sbt/setup-sbt@v1 |
| 143 | + |
| 144 | + - name: Save 3.4.x |
| 145 | + run: sbt ++${{ env.SCALA_213 }} -Dspark.version=3.4.3 "testOnly *PersistenceSuite -- -z save_3_4" |
| 146 | + - name: Load (3.4) on 3.5.x |
| 147 | + run: sbt ++${{ env.SCALA_213 }} -Dspark.version=3.5.1 "testOnly *PersistenceSuite -- -z load_3_4_in_3_5" |
| 148 | + |
| 149 | + - name: Save 3.5.x |
| 150 | + run: sbt ++${{ env.SCALA_213 }} -Dspark.version=3.5.1 "testOnly *PersistenceSuite -- -z save_3_5" |
| 151 | + - name: Load (3.5) on 3.4.x |
| 152 | + run: sbt ++${{ env.SCALA_213 }} -Dspark.version=3.4.3 "testOnly *PersistenceSuite -- -z load_3_5_in_3_4" |
| 153 | + |
| 154 | + # Job 5: Run Scala examples to keep docs executable |
| 155 | + run-examples: |
| 156 | + runs-on: ubuntu-latest |
| 157 | + needs: build |
| 158 | + name: Run Scala Examples |
82 | 159 | steps: |
83 | 160 | - uses: actions/checkout@v4 |
84 | 161 | - uses: actions/setup-java@v4 |
85 | 162 | with: |
86 | | - distribution: 'temurin' |
87 | | - java-version: '17' # Pin to a modern JDK |
| 163 | + distribution: temurin |
| 164 | + java-version: ${{ env.JAVA_VERSION }} |
| 165 | + cache: sbt |
88 | 166 | - uses: sbt/setup-sbt@v1 |
89 | | - - name: Build Scala 2.12 JAR for PySpark |
90 | | - run: sbt ++2.12.18 -Dspark.version=3.5.1 package |
| 167 | + - name: Execute examples (spark-shell -i) |
| 168 | + run: | |
| 169 | + # Adjust the list to match files present in your repo |
| 170 | + if [ -f examples/ml-wrapper.scala ]; then spark-shell -i examples/ml-wrapper.scala <<<'':; fi |
| 171 | + if [ -f examples/bisecting.scala ]; then spark-shell -i examples/bisecting.scala <<<'':; fi |
| 172 | + if [ -f examples/streaming.scala ]; then spark-shell -i examples/streaming.scala <<<'':; fi |
91 | 173 |
|
92 | | - - name: Run Python Smoke Test |
| 174 | + # Job 6: Perf sanity (prints a single metric line) |
| 175 | + perf-sanity: |
| 176 | + runs-on: ubuntu-latest |
| 177 | + needs: build |
| 178 | + name: Perf sanity |
| 179 | + steps: |
| 180 | + - uses: actions/checkout@v4 |
| 181 | + - uses: actions/setup-java@v4 |
| 182 | + with: |
| 183 | + distribution: temurin |
| 184 | + java-version: ${{ env.JAVA_VERSION }} |
| 185 | + cache: sbt |
| 186 | + - uses: sbt/setup-sbt@v1 |
| 187 | + - name: Run perf sanity (expects 'perf_sanity_seconds=' in logs) |
93 | 188 | run: | |
94 | | - pip install pyspark==3.5.1 |
95 | | - spark-submit --jars target/scala-2.12/*generalized-kmeans*.jar python/smoke_test.py |
| 189 | + sbt ++${{ env.SCALA_213 }} -Dspark.version=${{ env.DEFAULT_SPARK }} "testOnly *GeneralizedKMeansSuite -- -z \"perf sanity\"" |
| 190 | + (grep -R "perf_sanity_seconds=" -n target/test-reports || true) |
96 | 191 |
|
97 | | - # Job 4: Code Coverage (Independent Check, runs only once) |
| 192 | + # Job 7: Coverage (unchanged, runs once) |
98 | 193 | coverage: |
99 | 194 | runs-on: ubuntu-latest |
100 | | - needs: lint |
| 195 | + needs: build |
101 | 196 | name: Code Coverage |
102 | 197 | steps: |
103 | 198 | - uses: actions/checkout@v4 |
104 | 199 | - uses: actions/setup-java@v4 |
105 | 200 | with: |
106 | | - distribution: 'temurin' |
107 | | - java-version: '17' |
| 201 | + distribution: temurin |
| 202 | + java-version: ${{ env.JAVA_VERSION }} |
| 203 | + cache: sbt |
108 | 204 | - uses: sbt/setup-sbt@v1 |
109 | 205 | - name: Generate Coverage Report |
110 | | - run: sbt ++2.12.18 -Dspark.version=3.5.1 coverage test coverageReport |
| 206 | + run: sbt ++${{ env.SCALA_212 }} -Dspark.version=${{ env.DEFAULT_SPARK }} coverage test coverageReport |
111 | 207 | - name: Upload coverage to Codecov |
112 | 208 | uses: codecov/codecov-action@v4 |
113 | 209 | with: |
114 | 210 | file: ./target/scala-2.12/scoverage-report/scoverage.xml |
115 | 211 | fail_ci_if_error: false |
| 212 | + |
| 213 | + # Final gate: only reports success if all validations pass |
| 214 | + release-ready: |
| 215 | + runs-on: ubuntu-latest |
| 216 | + needs: |
| 217 | + - test-jvm |
| 218 | + - test-python |
| 219 | + - persistence-cross |
| 220 | + - run-examples |
| 221 | + - perf-sanity |
| 222 | + - coverage |
| 223 | + steps: |
| 224 | + - run: echo "All validations passed." |
0 commit comments