forked from scikit-learn/scikit-learn
-
Notifications
You must be signed in to change notification settings - Fork 0
478 lines (426 loc) · 19.6 KB
/
unit-tests.yml
File metadata and controls
478 lines (426 loc) · 19.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
name: Unit tests
permissions:
contents: read
on:
push:
pull_request:
schedule:
# Nightly build at 02:30 UTC
- cron: "30 2 * * *"
# Manual run
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
env:
VIRTUALENV: testvenv
TEST_DIR: ${{ github.workspace }}/tmp_folder
CCACHE_DIR: ${{ github.workspace }}/ccache
COVERAGE: 'true'
JUNITXML: 'test-data.xml'
jobs:
lint:
name: Lint
runs-on: ubuntu-latest
if: github.repository == 'scikit-learn/scikit-learn'
steps:
- name: Checkout
uses: actions/checkout@v6
- uses: actions/setup-python@v6
with:
python-version: '3.12'
cache: 'pip'
- name: Install linters
run: |
source build_tools/shared.sh
# Include pytest compatibility with mypy
pip install pytest $(get_dep ruff min) $(get_dep mypy min) cython-lint
- name: Run linters
run: ./build_tools/linting.sh
- name: Run Meson OpenMP checks
run: |
pip install ninja meson scipy
python build_tools/check-meson-openmp-dependencies.py
retrieve-commit-message:
name: Retrieve the latest commit message
runs-on: ubuntu-latest
if: github.repository == 'scikit-learn/scikit-learn'
outputs:
message: ${{ steps.git-log.outputs.message }}
steps:
- uses: actions/checkout@v6
with:
ref: ${{ github.event.pull_request.head.sha }}
- id: git-log
name: Retrieve the latest commit message
shell: bash
run: |
set -eu
message=$(git log --format=%B -n 1)
{
echo 'message<<EOF'
echo "${message}"
echo EOF
} >> "${GITHUB_OUTPUT}"
retrieve-selected-tests:
# Parse the commit message to check if `build_tools/github/test_script.sh` should run
# only specific tests.
#
# If so, selected tests will be run with SKLEARN_TESTS_GLOBAL_RANDOM_SEED="all".
#
# The commit message must take the form:
# <title> [all random seeds]
# <test_name_1>
# <test_name_2>
# ...
name: Retrieve the selected tests
runs-on: ubuntu-latest
if: github.repository == 'scikit-learn/scikit-learn'
outputs:
tests: ${{ steps.selected-tests.outputs.tests }}
needs: [retrieve-commit-message]
steps:
- id: selected-tests
name: Retrieve the selected tests
shell: python
env:
COMMIT_MESSAGE: ${{ needs.retrieve-commit-message.outputs.message }}
run: |
import os
commit_message = os.environ["COMMIT_MESSAGE"]
# Retrieve selected tests from commit message
if "[all random seeds]" in commit_message:
selected_tests = commit_message.split("[all random seeds]")[1].strip()
selected_tests = selected_tests.replace("\n", " or ")
# quote 'selected_tests' to cover the case of multiple selected tests
selected_tests = f"{selected_tests!r}"
else:
selected_tests = ""
# Write selected tests to `GITHUB_OUTPUT`
with open(os.environ["GITHUB_OUTPUT"], "a") as file:
file.write(f"tests={selected_tests}\n")
unit-tests:
name: ${{ matrix.name }}
runs-on: ${{ matrix.os }}
if: github.repository == 'scikit-learn/scikit-learn'
needs: [lint, retrieve-commit-message, retrieve-selected-tests]
strategy:
# Ensures that all builds run to completion even if one of them fails
fail-fast: false
matrix:
include:
- name: Linux pymin_conda_forge_arm
os: ubuntu-24.04-arm
DISTRIB: conda
LOCK_FILE: build_tools/github/pymin_conda_forge_arm_linux-aarch64_conda.lock
- name: Linux x86-64 pylatest_conda_forge_mkl
os: ubuntu-22.04
DISTRIB: conda
LOCK_FILE: build_tools/github/pylatest_conda_forge_mkl_linux-64_conda.lock
COVERAGE: true
SKLEARN_TESTS_GLOBAL_RANDOM_SEED: 42 # default global random seed
SCIPY_ARRAY_API: 1
# Tests that require large downloads over the networks are skipped in CI.
# Here we make sure, that they are still run on a regular basis.
SKLEARN_SKIP_NETWORK_TESTS: ${{ (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && '0' || '1' }}
# Check compilation with Ubuntu 22.04 LTS (Jammy Jellyfish) and scipy from conda-forge
- name: Linux x86-64 pymin_conda_forge_openblas_ubuntu_2204
os: ubuntu-22.04
DISTRIB: conda
LOCK_FILE: build_tools/github/pymin_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock
SKLEARN_WARNINGS_AS_ERRORS: 1
COVERAGE: false
SKLEARN_TESTS_GLOBAL_RANDOM_SEED: 0 # non-default seed
# Linux build with minimum supported version of dependencies
- name: Linux x86-64 pymin_conda_forge_openblas_min_dependencies
os: ubuntu-22.04
DISTRIB: conda
LOCK_FILE: build_tools/github/pymin_conda_forge_openblas_min_dependencies_linux-64_conda.lock
# Enable debug Cython directives to capture IndexError exceptions in
# combination with the -Werror::pytest.PytestUnraisableExceptionWarning
# flag for pytest.
# https://github.com/scikit-learn/scikit-learn/pull/24438
SKLEARN_ENABLE_DEBUG_CYTHON_DIRECTIVES: 1
SKLEARN_RUN_FLOAT32_TESTS: 1
SKLEARN_TESTS_GLOBAL_RANDOM_SEED: 2 # non-default seed
# Linux environment to test the latest available dependencies.
# It runs tests requiring lightgbm, pandas and PyAMG.
- name: Linux pylatest_pip_openblas_pandas
os: ubuntu-24.04
DISTRIB: conda
LOCK_FILE: build_tools/github/pylatest_pip_openblas_pandas_linux-64_conda.lock
SKLEARN_TESTS_GLOBAL_RANDOM_SEED: 3 # non-default seed
SCIPY_ARRAY_API: 1
CHECK_PYTEST_SOFT_DEPENDENCY: true
SKLEARN_WARNINGS_AS_ERRORS: 1
# disable pytest-xdist to have 1 job where OpenMP and BLAS are not single
# threaded because by default the tests configuration (sklearn/conftest.py)
# makes sure that they are single threaded in each xdist subprocess.
PYTEST_XDIST_VERSION: none
PIP_BUILD_ISOLATION: true
# Linux environment to test that scikit-learn can be built against
# versions of numpy, scipy with ATLAS that comes with Ubuntu 24.04
# Noble Numbat i.e. numpy 1.26.4 and scipy 1.11.4
- name: Linux x86-64 ubuntu_atlas
os: ubuntu-24.04
DISTRIB: ubuntu
LOCK_FILE: build_tools/github/ubuntu_atlas_lock.txt
COVERAGE: false
SKLEARN_TESTS_GLOBAL_RANDOM_SEED: 1 # non-default seed
- name: macOS pylatest_conda_forge_arm
os: macos-15
DISTRIB: conda
LOCK_FILE: build_tools/github/pylatest_conda_forge_osx-arm64_conda.lock
SKLEARN_TESTS_GLOBAL_RANDOM_SEED: 5 # non-default seed
SCIPY_ARRAY_API: 1
PYTORCH_ENABLE_MPS_FALLBACK: 1
CHECK_PYTEST_SOFT_DEPENDENCY: true
- name: macOS x86-64 pylatest_conda_forge_mkl_no_openmp
os: macos-15-intel
DISTRIB: conda
LOCK_FILE: build_tools/github/pylatest_conda_forge_mkl_no_openmp_osx-64_conda.lock
SKLEARN_TEST_NO_OPENMP: true
SKLEARN_SKIP_OPENMP_TEST: true
SKLEARN_TESTS_GLOBAL_RANDOM_SEED: 6 # non-default seed
- name: Windows x64 pymin_conda_forge_openblas
os: windows-latest
DISTRIB: conda
LOCK_FILE: build_tools/github/pymin_conda_forge_openblas_win-64_conda.lock
SKLEARN_WARNINGS_AS_ERRORS: 1
# The Windows runner is typically much slower than other CI runners
# due to the lack of compiler cache. Running the tests with coverage
# enabled makes them run extra slow. Since very few parts of the
# code should have windows-specific code branches, code coverage
# collection is only done for the non-windows runners.
COVERAGE: false
# Enable debug Cython directives to capture IndexError exceptions in
# combination with the -Werror::pytest.PytestUnraisableExceptionWarning
# flag for pytest.
# https://github.com/scikit-learn/scikit-learn/pull/24438
SKLEARN_ENABLE_DEBUG_CYTHON_DIRECTIVES: 1
SKLEARN_TESTS_GLOBAL_RANDOM_SEED: 7 # non-default seed
env: ${{ matrix }}
steps: &unit-tests-steps
- name: Checkout
uses: actions/checkout@v6
# This step is necessary to access the job name the same way in both matrix and
# non-matrix jobs (like free-threaded or scipy-dev builds).
- name: Set JOB_NAME variable
shell: bash
run: |
if [[ -z "$JOB_NAME" ]]; then
echo "JOB_NAME=${{ matrix.name }}" >> $GITHUB_ENV
fi
- name: Create cache for ccache
uses: actions/cache@v5
with:
path: ${{ env.CCACHE_DIR }}
key: ccache-v1-${{ env.JOB_NAME }}-${{ hashFiles('**/*.pyx*', '**/*.pxd*', '**/*.pxi*', '**/*.h', '**/*.c', '**/*.cpp', format('{0}', env.LOCK_FILE)) }}
restore-keys: ccache-${{ env.JOB_NAME }}
- name: Set up conda
uses: conda-incubator/setup-miniconda@v3
if: ${{ startsWith(env.DISTRIB, 'conda') }}
with:
miniforge-version: latest
auto-activate-base: true
activate-environment: ""
- name: Build scikit-learn
run: bash -l build_tools/github/install.sh
# Enable global random seed randomization to discover seed-sensitive tests
# only on nightly builds.
# https://scikit-learn.org/stable/computing/parallelism.html#sklearn-tests-global-random-seed
- name: Set random global random seed for nightly/manual runs
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
shell: bash
run: |
SKLEARN_TESTS_GLOBAL_RANDOM_SEED=$((RANDOM % 20))
echo "SKLEARN_TESTS_GLOBAL_RANDOM_SEED=$SKLEARN_TESTS_GLOBAL_RANDOM_SEED" >> $GITHUB_ENV
echo "To reproduce this test run, set the following environment variable:"
echo " SKLEARN_TESTS_GLOBAL_RANDOM_SEED=$SKLEARN_TESTS_GLOBAL_RANDOM_SEED"
echo "See: https://scikit-learn.org/dev/computing/parallelism.html#sklearn-tests-global-random-seed"
# Enable global dtype fixture for all nightly builds to discover
# numerical-sensitive tests.
# https://scikit-learn.org/stable/computing/parallelism.html#sklearn-run-float32-tests
- name: Run float32 tests for nightly/manual runs
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
shell: bash
run: |
echo SKLEARN_RUN_FLOAT32_TESTS=1 >> $GITHUB_ENV
- name: Run tests
env:
COMMIT_MESSAGE: ${{ needs.retrieve-commit-message.outputs.message }}
SELECTED_TESTS: ${{ needs.retrieve-selected-tests.outputs.tests }}
COVERAGE: ${{ env.COVERAGE == 'true' && needs.retrieve-selected-tests.outputs.tests == ''}}
run: bash -l build_tools/github/test_script.sh
- name: Run doctests in .py and .rst files
run: bash -l build_tools/github/test_docs.sh
if: ${{ needs.retrieve-selected-tests.outputs.tests == ''}}
- name: Run pytest soft dependency test
run: bash -l build_tools/github/test_pytest_soft_dependency.sh
if: ${{ env.CHECK_PYTEST_SOFT_DEPENDENCY == 'true' && needs.retrieve-selected-tests.outputs.tests == ''}}
- name: Combine coverage reports from parallel test runners
run: bash -l build_tools/github/combine_coverage_reports.sh
if: ${{ env.COVERAGE == 'true' && needs.retrieve-selected-tests.outputs.tests == ''}}
- name: Upload coverage report to Codecov
uses: codecov/codecov-action@v5
if: ${{ env.COVERAGE == 'true' && needs.retrieve-selected-tests.outputs.tests == ''}}
with:
files: ./coverage.xml
token: ${{ secrets.CODECOV_TOKEN }}
disable_search: true
- name: Update tracking issue
if: ${{ always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')}}
shell: bash
run: |
set -ex
pip install defusedxml PyGithub
python maint_tools/update_tracking_issue.py \
${{ secrets.BOT_GITHUB_TOKEN }} \
"$GITHUB_WORKFLOW $JOB_NAME" \
"$GITHUB_REPOSITORY" \
https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID \
--junit-file $TEST_DIR/$JUNITXML \
--auto-close false \
--job-name "$JOB_NAME"
free-threaded:
name: &free-threaded-job-name
Linux x86-64 pylatest_free_threaded
runs-on: ubuntu-latest
needs: [lint, retrieve-commit-message, retrieve-selected-tests]
if: contains(needs.retrieve-commit-message.outputs.message, '[free-threaded]') || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
env:
DISTRIB: conda-free-threaded
LOCK_FILE: build_tools/github/pylatest_free_threaded_linux-64_conda.lock
COVERAGE: false
# Disable pytest-xdist to use multiple cores for stress-testing with pytest-run-parallel
PYTEST_XDIST_VERSION: none
# To be able to access the job name in the steps, it must be set as an env variable.
JOB_NAME: *free-threaded-job-name
steps: *unit-tests-steps
scipy-dev:
name: &scipy-dev-job-name
Linux x86-64 pylatest_pip_scipy_dev
runs-on: ubuntu-22.04
needs: [lint, retrieve-commit-message, retrieve-selected-tests]
if: contains(needs.retrieve-commit-message.outputs.message, '[scipy-dev]') || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
env:
DISTRIB: conda-pip-scipy-dev
LOCK_FILE: build_tools/github/pylatest_pip_scipy_dev_linux-64_conda.lock
SKLEARN_WARNINGS_AS_ERRORS: 1
CHECK_PYTEST_SOFT_DEPENDENCY: true
# To be able to access the job name in the steps, it must be set as an env variable.
JOB_NAME: *scipy-dev-job-name
steps: *unit-tests-steps
debian-32bit:
name: &debian-32bit-job-name
Linux i386 debian_32bit
runs-on: ubuntu-24.04
needs: [lint, retrieve-commit-message, retrieve-selected-tests]
env:
DISTRIB: debian-32
LOCK_FILE: build_tools/github/debian_32bit_lock.txt
SKLEARN_TESTS_GLOBAL_RANDOM_SEED: 4 # non-default seed
DOCKER_CONTAINER: i386/debian:trixie
# To be able to access the job name in the steps, it must be set as an env variable.
JOB_NAME: *debian-32bit-job-name
steps:
- name: Checkout
uses: actions/checkout@v6
- name: Create cache for ccache
uses: actions/cache@v5
with:
path: ${{ env.CCACHE_DIR }}
key: ccache-v1-${{ env.JOB_NAME }}-${{ hashFiles('**/*.pyx*', '**/*.pxd*', '**/*.pxi*', '**/*.h', '**/*.c', '**/*.cpp', format('{0}', env.LOCK_FILE)) }}
restore-keys: ccache-${{ env.JOB_NAME }}
- name: Set up conda
uses: conda-incubator/setup-miniconda@v3
if: ${{ startsWith(env.DISTRIB, 'conda') }}
with:
miniforge-version: latest
auto-activate-base: true
activate-environment: ""
# Enable global random seed randomization to discover seed-sensitive tests
# only on nightly builds.
# https://scikit-learn.org/stable/computing/parallelism.html#sklearn-tests-global-random-seed
- name: Set random global random seed for nightly/manual runs
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
shell: bash
run: |
SKLEARN_TESTS_GLOBAL_RANDOM_SEED=$((RANDOM % 20))
echo "SKLEARN_TESTS_GLOBAL_RANDOM_SEED=$SKLEARN_TESTS_GLOBAL_RANDOM_SEED" >> $GITHUB_ENV
echo "To reproduce this test run, set the following environment variable:"
echo " SKLEARN_TESTS_GLOBAL_RANDOM_SEED=$SKLEARN_TESTS_GLOBAL_RANDOM_SEED"
echo "See: https://scikit-learn.org/dev/computing/parallelism.html#sklearn-tests-global-random-seed"
# Enable global dtype fixture for all nightly builds to discover
# numerical-sensitive tests.
# https://scikit-learn.org/stable/computing/parallelism.html#sklearn-run-float32-tests
- name: Run float32 tests for nightly/manual runs
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
shell: bash
run: |
echo SKLEARN_RUN_FLOAT32_TESTS=1 >> $GITHUB_ENV
- name: Start container
# Environment variable are passed when starting the container rather
# than in "Run tests step" for more standard jobs
env:
COMMIT_MESSAGE: ${{ needs.retrieve-commit-message.outputs.message }}
SELECTED_TESTS: ${{ needs.retrieve-selected-tests.outputs.tests }}
COVERAGE: ${{ env.COVERAGE == 'true' && needs.retrieve-selected-tests.outputs.tests == ''}}
run: >
docker container run --rm
--volume $TEST_DIR:/temp_dir
--volume $PWD:/scikit-learn
--volume $CCACHE_DIR:/ccache
-w /scikit-learn
--detach
--name skcontainer
-e TEST_DIR=/temp_dir
-e CCACHE_DIR=/ccache
-e COVERAGE
-e DISTRIB
-e LOCK_FILE
-e JUNITXML
-e VIRTUALENV
-e PYTEST_XDIST_VERSION
-e SKLEARN_SKIP_NETWORK_TESTS
-e SELECTED_TESTS
-e CCACHE_COMPRESS
-e COMMIT_MESSAGE
-e JOB_NAME
-e SKLEARN_TESTS_GLOBAL_RANDOM_SEED
-e SKLEARN_RUN_FLOAT32_TESTS
$DOCKER_CONTAINER
sleep 1000000
- name: Build scikit-learn
run: docker exec skcontainer bash -l build_tools/github/install.sh
- name: Run tests
run: docker exec skcontainer bash -l build_tools/github/test_script.sh
- name: Run doctests in .py and .rst files
run: docker exec skcontainer bash -l build_tools/github/test_docs.sh
if: ${{ needs.retrieve-selected-tests.outputs.tests == ''}}
- name: Run pytest soft dependency test
run: docker exec skcontainer build_tools/github/test_pytest_soft_dependency.sh
if: ${{ env.CHECK_PYTEST_SOFT_DEPENDENCY == 'true' && needs.retrieve-selected-tests.outputs.tests == ''}}
- name: Combine coverage reports from parallel test runners
run: docker exec skcontainer bash -l build_tools/github/combine_coverage_reports.sh
if: ${{ env.COVERAGE == 'true' && needs.retrieve-selected-tests.outputs.tests == ''}}
- name: Upload coverage report to Codecov
uses: codecov/codecov-action@v5
if: ${{ env.COVERAGE == 'true' && needs.retrieve-selected-tests.outputs.tests == ''}}
with:
files: ./coverage.xml
token: ${{ secrets.CODECOV_TOKEN }}
disable_search: true
- name: Update tracking issue
if: ${{ always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')}}
run: |
set -ex
pip install defusedxml PyGithub
python maint_tools/update_tracking_issue.py \
${{ secrets.BOT_GITHUB_TOKEN }} \
"$GITHUB_WORKFLOW $JOB_NAME" \
"$GITHUB_REPOSITORY" \
https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID \
--junit-file $TEST_DIR/$JUNITXML \
--auto-close false \
--job-name "$JOB_NAME"