Skip to content

Commit dfa9903

Browse files
authored
Merge branch 'rocm-main' into ci_rnn_final
2 parents 3f8ed59 + 6dc4dee commit dfa9903

File tree

475 files changed

+30885
-12548
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

475 files changed

+30885
-12548
lines changed

.bazelrc

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ build:mkl_open_source_only --define=tensorflow_mkldnn_contraction_kernel=1
104104
build:clang --copt=-Wno-gnu-offsetof-extensions
105105
# Disable clang extention that rejects unknown arguments.
106106
build:clang --copt=-Qunused-arguments
107+
# Error on struct/class mismatches, since this causes link failures on Windows.
108+
build:clang --copt=-Werror=mismatched-tags
107109

108110
# Configs for CUDA
109111
build:cuda --repo_env TF_NEED_CUDA=1
@@ -183,6 +185,7 @@ build:macos_cache_push --config=macos_cache --remote_upload_local_results=true -
183185
build:ci_linux_x86_64 --config=avx_linux --config=avx_posix
184186
build:ci_linux_x86_64 --config=mkl_open_source_only
185187
build:ci_linux_x86_64 --config=clang --verbose_failures=true
188+
build:ci_linux_x86_64 --color=yes
186189

187190
# TODO(b/356695103): We do not have a CPU only toolchain so we use the CUDA
188191
# toolchain for both CPU and GPU builds.
@@ -203,6 +206,7 @@ build:ci_linux_x86_64_cuda --config=ci_linux_x86_64
203206
# Linux Aarch64 CI configs
204207
build:ci_linux_aarch64_base --config=clang --verbose_failures=true
205208
build:ci_linux_aarch64_base --action_env=TF_SYSROOT="/dt10"
209+
build:ci_linux_aarch64_base --color=yes
206210

207211
build:ci_linux_aarch64 --config=ci_linux_aarch64_base
208212
build:ci_linux_aarch64 --host_crosstool_top="@ml2014_clang_aarch64_config_aarch64//crosstool:toolchain"
@@ -221,18 +225,21 @@ build:ci_linux_aarch64_cuda --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm
221225
build:ci_darwin_x86_64 --macos_minimum_os=10.14
222226
build:ci_darwin_x86_64 --config=macos_cache_push
223227
build:ci_darwin_x86_64 --verbose_failures=true
228+
build:ci_darwin_x86_64 --color=yes
224229

225230
# Mac Arm64 CI configs
226231
build:ci_darwin_arm64 --macos_minimum_os=11.0
227232
build:ci_darwin_arm64 --config=macos_cache_push
228233
build:ci_darwin_arm64 --verbose_failures=true
234+
build:ci_darwin_arm64 --color=yes
229235

230236
# Windows x86 CI configs
231237
build:ci_windows_amd64 --config=avx_windows
232238
build:ci_windows_amd64 --compiler=clang-cl --config=clang --verbose_failures=true
233239
build:ci_windows_amd64 --crosstool_top="@xla//tools/toolchains/win/20240424:toolchain"
234240
build:ci_windows_amd64 --extra_toolchains="@xla//tools/toolchains/win/20240424:cc-toolchain-x64_windows-clang-cl"
235241
build:ci_windows_amd64 --host_linkopt=/FORCE:MULTIPLE --linkopt=/FORCE:MULTIPLE
242+
build:ci_windows_amd64 --color=yes
236243

237244
# #############################################################################
238245
# RBE config options below. These inherit the CI configs above and set the
@@ -379,4 +386,4 @@ build:debug --config debug_symbols -c fastbuild
379386
try-import %workspace%/.jax_configure.bazelrc
380387

381388
# Load rc file with user-specific options.
382-
try-import %workspace%/.bazelrc.user
389+
try-import %workspace%/.bazelrc.user

.github/workflows/asan.yaml

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ on:
1212
branches:
1313
- main
1414
paths:
15-
- '**/workflows/asan.yml'
15+
- '**/workflows/asan.yaml'
1616

1717
jobs:
1818
asan:
@@ -25,14 +25,8 @@ jobs:
2525
run:
2626
shell: bash -l {0}
2727
steps:
28-
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
29-
with:
30-
path: jax
31-
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
32-
with:
33-
repository: python/cpython
34-
path: cpython
35-
ref: v3.13.0
28+
# Install git before actions/checkout as otherwise it will download the code with the GitHub
29+
# REST API and therefore any subsequent git commands will fail.
3630
- name: Install clang 18
3731
env:
3832
DEBIAN_FRONTEND: noninteractive
@@ -42,6 +36,14 @@ jobs:
4236
zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev curl git \
4337
libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev \
4438
libffi-dev liblzma-dev
39+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
40+
with:
41+
path: jax
42+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
43+
with:
44+
repository: python/cpython
45+
path: cpython
46+
ref: v3.13.0
4547
- name: Build CPython with ASAN enabled
4648
env:
4749
ASAN_OPTIONS: detect_leaks=0
@@ -65,7 +67,7 @@ jobs:
6567
run: |
6668
source ${GITHUB_WORKSPACE}/venv/bin/activate
6769
cd jax
68-
python build/build.py \
70+
python build/build.py build --wheels=jaxlib --verbose \
6971
--bazel_options=--color=yes \
7072
--bazel_options=--copt=-fsanitize=address \
7173
--clang_path=/usr/bin/clang-18
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
name: CI - Bazel CPU tests (RBE)
2+
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
halt-for-connection:
7+
description: 'Should this workflow run wait for a remote connection?'
8+
type: choice
9+
required: true
10+
default: 'no'
11+
options:
12+
- 'yes'
13+
- 'no'
14+
15+
concurrency:
16+
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
17+
cancel-in-progress: true
18+
19+
jobs:
20+
run_tests:
21+
if: github.event.repository.fork == false
22+
strategy:
23+
matrix:
24+
runner: ["linux-x86-n2-16", "linux-arm64-t2a-16"]
25+
26+
runs-on: ${{ matrix.runner }}
27+
# TODO(b/369382309): Replace Linux Arm64 container with the ml-build container once it is available
28+
container: ${{ (contains(matrix.runner, 'linux-x86') && 'us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest') ||
29+
(contains(matrix.runner, 'linux-arm64') && 'us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/linux-arm64-arc-container:latest') }}
30+
31+
env:
32+
JAXCI_HERMETIC_PYTHON_VERSION: "3.12"
33+
34+
steps:
35+
- uses: actions/checkout@v3
36+
- name: Wait For Connection
37+
uses: google-ml-infra/actions/ci_connection@main
38+
with:
39+
halt-dispatch-input: ${{ inputs.halt-for-connection }}
40+
- name: Run Bazel CPU Tests with RBE
41+
run: ./ci/run_bazel_test_cpu_rbe.sh
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
name: CI - Bazel GPU tests (RBE)
2+
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
halt-for-connection:
7+
description: 'Should this workflow run wait for a remote connection?'
8+
type: choice
9+
required: true
10+
default: 'no'
11+
options:
12+
- 'yes'
13+
- 'no'
14+
15+
concurrency:
16+
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
17+
cancel-in-progress: true
18+
19+
jobs:
20+
run_tests:
21+
if: github.event.repository.fork == false
22+
strategy:
23+
matrix:
24+
runner: ["linux-x86-n2-16"]
25+
26+
runs-on: ${{ matrix.runner }}
27+
container: 'us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest'
28+
29+
env:
30+
JAXCI_HERMETIC_PYTHON_VERSION: "3.12"
31+
32+
steps:
33+
- uses: actions/checkout@v3
34+
- name: Wait For Connection
35+
uses: google-ml-infra/actions/ci_connection@main
36+
with:
37+
halt-dispatch-input: ${{ inputs.halt-for-connection }}
38+
- name: Run Bazel GPU Tests with RBE
39+
run: ./ci/run_bazel_test_gpu_rbe.sh

.github/workflows/ci-build.yaml

Lines changed: 40 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: CI
1+
name: ROCm CPU CI
22

33
# We test all supported Python versions as follows:
44
# - 3.10 : Documentation build
@@ -11,10 +11,10 @@ on:
1111
# but only for the main branch
1212
push:
1313
branches:
14-
- main
14+
- rocm-main
1515
pull_request:
1616
branches:
17-
- main
17+
- rocm-main
1818

1919
permissions:
2020
contents: read # to fetch code
@@ -29,18 +29,21 @@ jobs:
2929
runs-on: ubuntu-latest
3030
timeout-minutes: 5
3131
steps:
32-
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
32+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
3333
- name: Set up Python 3.11
34-
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
34+
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
3535
with:
3636
python-version: 3.11
37-
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
37+
- run: python -m pip install pre-commit
38+
- uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
39+
with:
40+
path: ~/.cache/pre-commit
41+
key: pre-commit-${{ env.pythonLocation }}-${{ hashFiles('.pre-commit-config.yaml', 'setup.py') }}
42+
- run: pre-commit run --show-diff-on-failure --color=always --all-files
3843

3944
build:
4045
name: "build ${{ matrix.name-prefix }} (py ${{ matrix.python-version }} on ubuntu-20.04, x64=${{ matrix.enable-x64}})"
41-
runs-on: linux-x86-n2-32
42-
container:
43-
image: index.docker.io/library/ubuntu@sha256:6d8d9799fe6ab3221965efac00b4c34a2bcc102c086a58dff9e19a08b913c7ef # ratchet:ubuntu:20.04
46+
runs-on: ROCM-Ubuntu
4447
timeout-minutes: 60
4548
strategy:
4649
matrix:
@@ -57,13 +60,9 @@ jobs:
5760
prng-upgrade: 0
5861
num_generated_cases: 1
5962
steps:
60-
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
61-
- name: Image Setup
62-
run: |
63-
apt update
64-
apt install -y libssl-dev
63+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
6564
- name: Set up Python ${{ matrix.python-version }}
66-
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
65+
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
6766
with:
6867
python-version: ${{ matrix.python-version }}
6968
- name: Get pip cache dir
@@ -72,7 +71,7 @@ jobs:
7271
python -m pip install --upgrade pip wheel
7372
echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT
7473
- name: pip cache
75-
uses: actions/cache@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1
74+
uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
7675
with:
7776
path: ${{ steps.pip-cache.outputs.dir }}
7877
key: ${{ runner.os }}-py${{ matrix.python-version }}-pip-${{ hashFiles('**/setup.py', '**/requirements.txt', '**/test-requirements.txt') }}
@@ -102,15 +101,15 @@ jobs:
102101
103102
documentation:
104103
name: Documentation - test code snippets
105-
runs-on: ubuntu-latest
104+
runs-on: ROCM-Ubuntu
106105
timeout-minutes: 10
107106
strategy:
108107
matrix:
109108
python-version: ['3.10']
110109
steps:
111-
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
110+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
112111
- name: Set up Python ${{ matrix.python-version }}
113-
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
112+
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
114113
with:
115114
python-version: ${{ matrix.python-version }}
116115
- name: Get pip cache dir
@@ -119,7 +118,7 @@ jobs:
119118
python -m pip install --upgrade pip wheel
120119
echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT
121120
- name: pip cache
122-
uses: actions/cache@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1
121+
uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
123122
with:
124123
path: ${{ steps.pip-cache.outputs.dir }}
125124
key: ${{ runner.os }}-pip-docs-${{ hashFiles('**/setup.py', '**/requirements.txt', '**/test-requirements.txt') }}
@@ -140,14 +139,14 @@ jobs:
140139
documentation_render:
141140
name: Documentation - render documentation
142141
runs-on: ubuntu-latest
143-
timeout-minutes: 10
142+
timeout-minutes: 20
144143
strategy:
145144
matrix:
146145
python-version: ['3.10']
147146
steps:
148-
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
147+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
149148
- name: Set up Python ${{ matrix.python-version }}
150-
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
149+
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
151150
with:
152151
python-version: ${{ matrix.python-version }}
153152
- name: Get pip cache dir
@@ -156,7 +155,7 @@ jobs:
156155
python -m pip install --upgrade pip wheel
157156
echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT
158157
- name: pip cache
159-
uses: actions/cache@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1
158+
uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
160159
with:
161160
path: ${{ steps.pip-cache.outputs.dir }}
162161
key: ${{ runner.os }}-pip-docs-${{ hashFiles('**/setup.py', '**/requirements.txt', '**/test-requirements.txt') }}
@@ -165,8 +164,7 @@ jobs:
165164
pip install -r docs/requirements.txt
166165
- name: Render documentation
167166
run: |
168-
sphinx-build --color -W --keep-going -b html -D nb_execution_mode=off docs docs/build/html
169-
167+
sphinx-build -j auto --color -W --keep-going -b html -D nb_execution_mode=off docs docs/build/html
170168
171169
jax2tf_test:
172170
name: "jax2tf_test (py ${{ matrix.python-version }} on ${{ matrix.os }}, x64=${{ matrix.enable-x64}})"
@@ -181,9 +179,9 @@ jobs:
181179
enable-x64: 0
182180
num_generated_cases: 10
183181
steps:
184-
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
182+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
185183
- name: Set up Python ${{ matrix.python-version }}
186-
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
184+
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
187185
with:
188186
python-version: ${{ matrix.python-version }}
189187
- name: Get pip cache dir
@@ -192,7 +190,7 @@ jobs:
192190
python -m pip install --upgrade pip wheel
193191
echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT
194192
- name: pip cache
195-
uses: actions/cache@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1
193+
uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
196194
with:
197195
path: ${{ steps.pip-cache.outputs.dir }}
198196
key: ${{ runner.os }}-py${{ matrix.python-version }}-pip-${{ hashFiles('**/setup.py', '**/requirements.txt', '**/test-requirements.txt') }}
@@ -217,21 +215,21 @@ jobs:
217215
218216
ffi:
219217
name: FFI example
220-
runs-on: ubuntu-latest
221-
timeout-minutes: 5
218+
runs-on: ROCM-Ubuntu
219+
timeout-minutes: 30
222220
steps:
223-
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
224-
- name: Set up Python 3.11
225-
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
221+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
222+
- name: Set up Python
223+
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
226224
with:
227-
python-version: 3.11
225+
python-version: 3.12
228226
- name: Get pip cache dir
229227
id: pip-cache
230228
run: |
231229
python -m pip install --upgrade pip wheel
232230
echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT
233231
- name: pip cache
234-
uses: actions/cache@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1
232+
uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
235233
with:
236234
path: ${{ steps.pip-cache.outputs.dir }}
237235
key: ${{ runner.os }}-pip-ffi-examples-${{ hashFiles('**/setup.py', '**/requirements.txt', '**/test-requirements.txt', 'examples/**/pyproject.toml') }}
@@ -245,6 +243,10 @@ jobs:
245243
# a different toolchain. GCC is the default compiler on the
246244
# 'ubuntu-latest' runner, but we still set this explicitly just to be
247245
# clear.
248-
CMAKE_ARGS: -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++
249-
- name: Run tests
246+
CMAKE_ARGS: -DCMAKE_CXX_COMPILER=g++ #-DJAX_FFI_EXAMPLE_ENABLE_CUDA=ON
247+
- name: Run CPU tests
248+
run: python -m pytest examples/ffi/tests
249+
env:
250+
JAX_PLATFORM_NAME: cpu
251+
- name: Run GPU tests
250252
run: python -m pytest examples/ffi/tests

0 commit comments

Comments
 (0)