Skip to content

Commit d6cf755

Browse files
authored
Merge pull request #255 from ROCm/ci-upstream-sync-135_1
CI: 03/04/25 upstream sync
2 parents a701022 + 07cd809 commit d6cf755

File tree

172 files changed

+9849
-1758
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

172 files changed

+9849
-1758
lines changed

.bazelrc

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
# #############################################################################
22
# All default build options below. These apply to all build commands.
33
# #############################################################################
4+
# TODO: Enable Bzlmod
5+
common --noenable_bzlmod
6+
7+
# TODO: Migrate for https://github.com/bazelbuild/bazel/issues/7260
8+
common --noincompatible_enable_cc_toolchain_resolution
9+
410
# Make Bazel print out all options from rc files.
511
common --announce_rc
612

@@ -41,6 +47,13 @@ build:linux --copt=-Wno-array-parameter
4147
build:macos --config=posix
4248
build:macos --apple_platform_type=macos
4349

50+
# Bazel 7.0.0 no longer supports dynamic symbol lookup on macOS. To resolve
51+
# undefined symbol errors in macOS arm64 builds, explicitly add the necessary
52+
# linker flags until dependencies are well defined. See
53+
# https://github.com/bazelbuild/bazel/issues/19730.
54+
build:macos --linkopt=-Wl,-undefined,dynamic_lookup
55+
build:macos --host_linkopt=-Wl,-undefined,dynamic_lookup
56+
4457
# Windows has a relatively short command line limit, which JAX has begun to hit.
4558
# See https://docs.bazel.build/versions/main/windows.html
4659
build:windows --features=compiler_param_file
@@ -267,6 +280,12 @@ build:resultstore --bes_instance_name="tensorflow-testing"
267280
build:resultstore --bes_results_url="https://source.cloud.google.com/results/invocations"
268281
build:resultstore --bes_timeout=600s
269282

283+
# Configs for RBE cache. When using resultstore, we need to use these configs
284+
# as well to ensure that the logs that get uploaded to resultstore can be read
285+
# without any errors.
286+
build:rbe_cache --remote_cache=remotebuildexecution.googleapis.com
287+
build:rbe_cache --remote_instance_name=projects/tensorflow-testing/instances/default_instance
288+
270289
build:rbe --config=resultstore
271290
build:rbe --repo_env=BAZEL_DO_NOT_DETECT_CPP_TOOLCHAIN=1
272291
build:rbe --define=EXECUTOR=remote

.bazelversion

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
6.5.0
1+
7.4.1

.github/workflows/asan.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,4 +90,4 @@ jobs:
9090
echo "JAX_ENABLE_X64=$JAX_ENABLE_X64"
9191
echo "JAX_SKIP_SLOW_TESTS=$JAX_SKIP_SLOW_TESTS"
9292
# The LD_PRELOAD works around https://github.com/google/sanitizers/issues/934#issuecomment-649516500
93-
LD_PRELOAD=/lib/x86_64-linux-gnu/libstdc++.so.6 python -m pytest -n auto --tb=short --maxfail=20 tests
93+
LD_PRELOAD=/lib/x86_64-linux-gnu/libstdc++.so.6 python -m pytest -n 32 --tb=short --maxfail=20 tests

.github/workflows/build_artifacts.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ on:
1717
options:
1818
- "linux-x86-n2-16"
1919
- "linux-arm64-c4a-64"
20-
- "windows-x86-n2-16"
20+
- "windows-x86-n2-64"
2121
artifact:
2222
description: "Which JAX artifact to build?"
2323
type: choice
@@ -119,11 +119,11 @@ jobs:
119119

120120
steps:
121121
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
122-
- name: Enable RBE if building on Linux x86 or Windows x86
123-
if: contains(inputs.runner, 'linux-x86') || contains(inputs.runner, 'windows-x86')
122+
- name: Enable RBE if building on Linux x86
123+
if: contains(inputs.runner, 'linux-x86')
124124
run: echo "JAXCI_BUILD_ARTIFACT_WITH_RBE=1" >> $GITHUB_ENV
125-
- name: Enable Bazel remote cache (with writes enabled) if building on Linux Aarch64
126-
if: contains(inputs.runner, 'linux-arm64')
125+
- name: Enable Bazel remote cache (with writes enabled) if building on Linux Aarch64 or Windows x86
126+
if: contains(inputs.runner, 'linux-arm64') || contains(inputs.runner, 'windows-x86')
127127
run: echo "JAXCI_WRITE_TO_BAZEL_REMOTE_CACHE=1" >> $GITHUB_ENV
128128
# Halt for testing
129129
- name: Wait For Connection

.github/workflows/ci-build.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ jobs:
8080
JAX_SKIP_SLOW_TESTS: true
8181
PY_COLORS: 1
8282
run: |
83-
pip install -e .
83+
uv pip install --system -e .
8484
echo "JAX_NUM_GENERATED_CASES=$JAX_NUM_GENERATED_CASES"
8585
echo "JAX_ENABLE_X64=$JAX_ENABLE_X64"
8686
echo "JAX_ENABLE_CUSTOM_PRNG=$JAX_ENABLE_CUSTOM_PRNG"
@@ -171,7 +171,7 @@ jobs:
171171
JAX_SKIP_SLOW_TESTS: true
172172
PY_COLORS: 1
173173
run: |
174-
pip install -e .
174+
uv pip install --system -e .
175175
echo "JAX_NUM_GENERATED_CASES=$JAX_NUM_GENERATED_CASES"
176176
echo "JAX_ENABLE_X64=$JAX_ENABLE_X64"
177177
echo "JAX_ENABLE_CHECKS=$JAX_ENABLE_CHECKS"

.github/workflows/cloud-tpu-ci-nightly.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ jobs:
3333
python-version: ["3.10"]
3434
name: "TPU test (jaxlib=${{ matrix.jaxlib-version }}, ${{ matrix.tpu.type }})"
3535
env:
36-
LIBTPU_OLDEST_VERSION_DATE: 20241118
36+
LIBTPU_OLDEST_VERSION_DATE: 20241205
3737
PYTHON: python${{ matrix.python-version }}
3838
runs-on: ${{ matrix.tpu.runner }}
3939
container: "us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest"

.github/workflows/pytest_cpu.yml

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,15 @@ jobs:
7979
8080
# Get the major and minor version of Python.
8181
# E.g if JAXCI_HERMETIC_PYTHON_VERSION=3.10, then python_major_minor=310
82-
python_major_minor=$(echo "$JAXCI_HERMETIC_PYTHON_VERSION" | tr -d '.')
82+
# E.g if JAXCI_HERMETIC_PYTHON_VERSION=3.13-nogil, then python_major_minor=313t
83+
python_major_minor=$(echo "${JAXCI_HERMETIC_PYTHON_VERSION//-nogil/t}" | tr -d '.')
8384
8485
echo "OS=${os}" >> $GITHUB_ENV
8586
echo "ARCH=${arch}" >> $GITHUB_ENV
86-
echo "PYTHON_MAJOR_MINOR=${python_major_minor}" >> $GITHUB_ENV
87+
# Python wheels follow a naming convention: standard wheels use the pattern
88+
# `*-cp<py_version>-cp<py_version>-*`, while free-threaded wheels use
89+
# `*-cp<py_version>-cp<py_version>t-*`.
90+
echo "PYTHON_MAJOR_MINOR=cp${python_major_minor%t}-cp${python_major_minor}-" >> $GITHUB_ENV
8791
- name: Download jaxlib wheel from GCS (non-Windows runs)
8892
id: download-wheel-artifacts-nw
8993
# Set continue-on-error to true to prevent actions from failing the workflow if this step
@@ -125,7 +129,19 @@ jobs:
125129
echo "Skipping the test run."
126130
exit 1
127131
- name: Install Python dependencies
128-
run: $JAXCI_PYTHON -m uv pip install -r build/requirements.in
132+
run: |
133+
# TODO(srnitin): Remove after uv is installed in the Windows Dockerfile
134+
$JAXCI_PYTHON -m pip install uv~=0.5.30
135+
# python 3.13t cannot compile zstandard 0.23.0 due to
136+
# https://github.com/indygreg/python-zstandard/issues/231. Remove this once zstandard
137+
# has a prebuilt wheel for 3.13t or an env marker is available for free threading python
138+
# in requirements.in.
139+
if [[ $JAXCI_PYTHON =~ "python3.13-nogil" ]]; then
140+
grep -v "zstandard" build/requirements.in > build/requirements_without_zstandard.txt
141+
$JAXCI_PYTHON -m uv pip install -r build/requirements_without_zstandard.txt
142+
else
143+
$JAXCI_PYTHON -m uv pip install -r build/requirements.in
144+
fi
129145
# Halt for testing
130146
- name: Wait For Connection
131147
uses: google-ml-infra/actions/ci_connection@main

.github/workflows/pytest_cuda.yml

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,11 +77,15 @@ jobs:
7777
7878
# Get the major and minor version of Python.
7979
# E.g if JAXCI_HERMETIC_PYTHON_VERSION=3.10, then python_major_minor=310
80-
python_major_minor=$(echo "$JAXCI_HERMETIC_PYTHON_VERSION" | tr -d '.')
80+
# E.g if JAXCI_HERMETIC_PYTHON_VERSION=3.13-nogil, then python_major_minor=313t
81+
python_major_minor=$(echo "${JAXCI_HERMETIC_PYTHON_VERSION//-nogil/t}" | tr -d '.')
8182
8283
echo "OS=${os}" >> $GITHUB_ENV
8384
echo "ARCH=${arch}" >> $GITHUB_ENV
84-
echo "PYTHON_MAJOR_MINOR=${python_major_minor}" >> $GITHUB_ENV
85+
# Python wheels follow a naming convention: standard wheels use the pattern
86+
# `*-cp<py_version>-cp<py_version>-*`, while free-threaded wheels use
87+
# `*-cp<py_version>-cp<py_version>t-*`.
88+
echo "PYTHON_MAJOR_MINOR=cp${python_major_minor%t}-cp${python_major_minor}-" >> $GITHUB_ENV
8589
- name: Download the wheel artifacts from GCS
8690
id: download-wheel-artifacts
8791
# Set continue-on-error to true to prevent actions from failing the workflow if this step
@@ -106,7 +110,17 @@ jobs:
106110
echo "Skipping the test run."
107111
exit 1
108112
- name: Install Python dependencies
109-
run: $JAXCI_PYTHON -m uv pip install -r build/requirements.in
113+
run: |
114+
# python 3.13t cannot compile zstandard 0.23.0 due to
115+
# https://github.com/indygreg/python-zstandard/issues/231. Remove this once zstandard
116+
# has a prebuilt wheel for 3.13t or an env marker is available for free threading python
117+
# in requirements.in.
118+
if [[ $JAXCI_PYTHON =~ "python3.13-nogil" ]]; then
119+
grep -v "zstandard" build/requirements.in > build/requirements_without_zstandard.txt
120+
$JAXCI_PYTHON -m uv pip install -r build/requirements_without_zstandard.txt
121+
else
122+
$JAXCI_PYTHON -m uv pip install -r build/requirements.in
123+
fi
110124
# Halt for testing
111125
- name: Wait For Connection
112126
uses: google-ml-infra/actions/ci_connection@main

.github/workflows/tsan-suppressions.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ race:dump_traceback
1717
# https://github.com/python/cpython/issues/128137
1818
# Fixed in Python 3.14, but not backported to 3.13.
1919
race:immortalize_interned
20+
race:_PyUnicode_InternMortal
2021

2122
# https://github.com/python/cpython/issues/128144
2223
# Fixed in Python 3.14, but not backported to 3.13.
@@ -31,6 +32,7 @@ race:split_keys_entry_added
3132
# https://github.com/python/cpython/issues/129748
3233
race:mi_block_set_nextx
3334

35+
3436
# Races because the LAPACK and BLAS in our scipy isn't TSAN instrumented.
3537
race:heevd_ffi
3638
race:gesdd_ffi
@@ -59,3 +61,6 @@ race:gemm_oncopy
5961

6062
# https://github.com/python/cpython/issues/128133
6163
# race:bytes_hash
64+
65+
# https://github.com/python/cpython/issues/130571
66+
# race:_PyObject_GetMethod

.github/workflows/tsan.yaml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,5 @@ jobs:
199199
--local_test_jobs=32 \
200200
--test_timeout=600 \
201201
--config=resultstore \
202-
--spawn_strategy=local \
203-
--remote_cache=remotebuildexecution.googleapis.com \
204-
--remote_instance_name=projects/tensorflow-testing/instances/default_instance \
202+
--config=rbe_cache \
205203
//tests:cpu_tests

0 commit comments

Comments
 (0)