Skip to content

Commit 2f0f001

Browse files
yuriivcsGoogle-ML-Automation
authored andcommitted
Use a local clang config for TSAN builds
Update the TSAN GitHub workflow to use `rbe_linux_x86_64_clang_local`, which ensures clang-based builds run locally, as required for ThreadSanitizer. PiperOrigin-RevId: 864992841
1 parent 8ca2421 commit 2f0f001

File tree

2 files changed

+144
-95
lines changed

2 files changed

+144
-95
lines changed

.bazelrc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ common:ci_linux_x86_64 --color=yes
292292
# toolchain for both CPU and GPU builds.
293293
common:ci_linux_x86_64_clang_local --config=ci_linux_x86_64
294294
common:ci_linux_x86_64_clang_local --config=clang_local
295-
common:ci_linux_x86_64_clang_local --repo_env=TF_SYSROOT="/dt9"
295+
#common:ci_linux_x86_64_clang_local --repo_env=TF_SYSROOT="/dt9"
296296

297297
# Clang path needs to be set for remote toolchain to be configured correctly.
298298
common:ci_linux_x86_64_clang_local --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm-18/bin/clang"
@@ -429,6 +429,8 @@ common:rbe_linux_x86_64_base --platforms="@ml_build_config_platform//:platform"
429429
common:rbe_linux_x86_64 --config=rbe_linux_x86_64_base
430430
common:rbe_linux_x86_64 --config=ci_linux_x86_64
431431

432+
common:rbe_linux_x86_64_clang_local --config=ci_linux_x86_64_clang_local
433+
432434
common:rbe_linux_x86_64_cuda_common --config=rbe_linux_x86_64_base
433435
common:rbe_linux_x86_64_cuda_common --config=rbe_gpu_pool
434436
# Update UMD version when RBE CUDA driver is updated.

.github/workflows/tsan.yaml

Lines changed: 141 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ concurrency:
55
cancel-in-progress: true
66
on:
77
schedule:
8-
- cron: "0 5 * * *" # Daily at 05:00 UTC == 00:00 EST == 21:00 PST
8+
- cron: "0 5 * * *" # Daily at 05:00 UTC == 00:00 EST == 21:00 PST
99
workflow_dispatch: # allows triggering the workflow run manually
1010
pull_request: # Automatically trigger on pull requests affecting this file
1111
branches:
@@ -14,20 +14,19 @@ on:
1414
- '**/workflows/tsan.yaml'
1515
- '**/workflows/tsan-suppressions*.txt'
1616
permissions: {}
17-
18-
env:
19-
UV_DEFAULT_INDEX: "https://us-python.pkg.dev/ml-oss-artifacts-published/pypi-mirror/simple"
20-
PIP_INDEX_URL: "https://us-python.pkg.dev/ml-oss-artifacts-published/pypi-mirror/simple"
21-
2217
jobs:
2318
tsan:
2419
runs-on: linux-x86-n4-64
2520
container:
26-
image: us-docker.pkg.dev/ml-oss-artifacts-published/ml-public-container/ml-build@sha256:ea67e8453d8b09c2ba48853da5e79efef4b65804b4a48dfae4b4da89ffd38405 # ml-build container (based on Ubuntu 22.04)
21+
image: index.docker.io/library/ubuntu@sha256:b359f1067efa76f37863778f7b6d0e8d911e3ee8efa807ad01fbf5dc1ef9006b # ratchet:ubuntu:24.04
2722
strategy:
2823
fail-fast: false
2924
matrix:
3025
include:
26+
- name-prefix: "with 3.13"
27+
python-version: "3.13"
28+
github_branch: "3.13"
29+
requirements_lock_name: "requirements_lock_3_13_ft"
3130
- name-prefix: "with 3.14"
3231
python-version: "3.14"
3332
github_branch: "3.14"
@@ -38,56 +37,32 @@ jobs:
3837
steps:
3938
# Install git before actions/checkout as otherwise it will download the code with the GitHub
4039
# REST API and therefore any subsequent git commands will fail.
41-
# Also install dependencies for Google Cloud SDK (curl, python3, etc)
42-
- name: Install dependencies
40+
- name: Install clang 18
4341
env:
4442
DEBIAN_FRONTEND: noninteractive
4543
run: |
46-
apt-get update
47-
apt-get install -q -y \
48-
clang-18 \
49-
libclang-common-18-dev \
50-
libclang-rt-18-dev \
51-
libc++abi-18-dev \
52-
lld-18 \
53-
libstdc++-12-dev \
54-
libc++-18-dev \
55-
build-essential \
56-
libssl-dev \
57-
zlib1g-dev \
58-
libbz2-dev \
59-
libreadline-dev \
60-
libsqlite3-dev \
61-
curl \
62-
git \
63-
libncursesw5-dev \
64-
xz-utils \
65-
tk-dev \
66-
libxml2-dev \
67-
libxmlsec1-dev \
68-
libffi-dev \
69-
liblzma-dev \
70-
file \
71-
vim \
72-
wget \
73-
zip \
74-
zstd
75-
- name: Install Google Cloud SDK
76-
run: |
77-
echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list
78-
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add -
79-
apt update && apt install -y google-cloud-cli
80-
81-
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
44+
apt update
45+
apt install -q -y clang-18 libstdc++-14-dev build-essential libssl-dev \
46+
zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev curl git \
47+
libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev \
48+
libffi-dev liblzma-dev file zip
49+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
8250
with:
8351
path: jax
8452
persist-credentials: false
85-
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
53+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
8654
with:
8755
repository: numpy/numpy
8856
path: numpy
8957
submodules: true
9058
persist-credentials: false
59+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
60+
if: ${{ matrix.python-version == '3.14' }}
61+
with:
62+
repository: scipy/scipy
63+
path: scipy
64+
submodules: true
65+
persist-credentials: false
9166

9267
- name: Get year & week number
9368
id: get-date
@@ -96,13 +71,13 @@ jobs:
9671

9772
- name: Restore cached TSAN CPython ${{ matrix.python-version }}
9873
id: cache-cpython-tsan-restore
99-
uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
74+
uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
10075
with:
10176
path: |
10277
./python-tsan.tgz
10378
key: ${{ runner.os }}-cpython-tsan-${{ matrix.python-version }}-${{ steps.get-date.outputs.date }}
10479

105-
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
80+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
10681
if: steps.cache-cpython-tsan-restore.outputs.cache-hit != 'true'
10782
with:
10883
repository: python/cpython
@@ -116,7 +91,7 @@ jobs:
11691
run: |
11792
cd cpython
11893
mkdir ${GITHUB_WORKSPACE}/cpython-tsan
119-
CC=clang-18 CXX=clang++-18 CFLAGS=" -O0 -g" CXXFLAGS="-stdlib=libc++" ./configure --prefix ${GITHUB_WORKSPACE}/cpython-tsan --disable-gil --with-thread-sanitizer --with-mimalloc
94+
CC=clang-18 CXX=clang++-18 ./configure --prefix ${GITHUB_WORKSPACE}/cpython-tsan --disable-gil --with-thread-sanitizer
12095
make -j64
12196
make install -j64
12297
# Check whether free-threading mode is enabled
@@ -128,38 +103,22 @@ jobs:
128103
- name: Save TSAN CPython ${{ matrix.python-version }}
129104
id: cache-cpython-tsan-save
130105
if: steps.cache-cpython-tsan-restore.outputs.cache-hit != 'true'
131-
uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
106+
uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
132107
with:
133108
path: |
134109
./python-tsan.tgz
135110
key: ${{ runner.os }}-cpython-tsan-${{ matrix.python-version }}-${{ steps.get-date.outputs.date }}
136111

137-
# Upload the Python tarball to GCS so RBE can access it.
138-
- name: Upload TSAN CPython to GCS
139-
run: |
140-
GCS_DEST="gs://general-ml-ci-transient/jax-github-actions/jax/${{ github.workflow }}/${{ github.run_number }}/${{ github.run_attempt }}"
141-
echo "Uploading python-tsan.tgz to $GCS_DEST"
142-
gcloud storage cp python-tsan.tgz "$GCS_DEST/"
143-
144-
# Output the HTTP URL for Bazel
145-
BASE_URL="https://storage.googleapis.com/general-ml-ci-transient/jax-github-actions/jax/${{ github.workflow }}/${{ github.run_number }}/${{ github.run_attempt }}/python-tsan.tgz"
146-
# URL-encode the path (handles spaces in workflow name)
147-
PUBLIC_URL=$(python3 -c "import urllib.parse; print(urllib.parse.quote('${BASE_URL}', safe=':/'))")
148-
echo "HERMETIC_PYTHON_URL=$PUBLIC_URL" >> $GITHUB_ENV
149-
150-
# --- Condtional NumPy steps for pre-release Python ---
151-
152112
- name: Restore cached TSAN Numpy
153113
id: cache-numpy-tsan-restore
154-
if: matrix.python-version == '3.15'
155-
uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
114+
uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
156115
with:
157116
path: |
158117
./wheelhouse
159118
key: ${{ runner.os }}-numpy-tsan-${{ matrix.python-version }}-${{ hashFiles('numpy/pyproject.toml') }}-${{ steps.get-date.outputs.date }}
160119

161120
- name: Build TSAN Numpy wheel
162-
if: matrix.python-version == '3.15' && steps.cache-numpy-tsan-restore.outputs.cache-hit != 'true'
121+
if: steps.cache-numpy-tsan-restore.outputs.cache-hit != 'true'
163122
run: |
164123
set -eux
165124
cd numpy
@@ -177,6 +136,7 @@ jobs:
177136
export PATH=${GITHUB_WORKSPACE}/cpython-tsan/bin/:$PATH
178137
179138
python3 -m pip install uv~=0.5.30
139+
python3 -m pip install --upgrade pip
180140
python3 -m uv pip install -r requirements/build_requirements.txt
181141
182142
CC=clang-18 CXX=clang++-18 python3 -m pip wheel --wheel-dir dist -v . --no-build-isolation -Csetup-args=-Db_sanitize=thread -Csetup-args=-Dbuildtype=debugoptimized
@@ -205,17 +165,91 @@ jobs:
205165
206166
- name: Save TSAN Numpy wheel
207167
id: cache-numpy-tsan-save
208-
if: matrix.python-version == '3.15' && steps.cache-numpy-tsan-restore.outputs.cache-hit != 'true'
209-
uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
168+
if: steps.cache-numpy-tsan-restore.outputs.cache-hit != 'true'
169+
uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
210170
with:
211171
path: |
212172
./wheelhouse
213173
key: ${{ runner.os }}-numpy-tsan-${{ matrix.python-version }}-${{ hashFiles('numpy/pyproject.toml') }}-${{ steps.get-date.outputs.date }}
214174

215-
# --- End Conditional NumPy Steps ---
175+
- name: Restore cached Scipy
176+
if: ${{ matrix.python-version == '3.14' }}
177+
id: cache-scipy-restore
178+
uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
179+
with:
180+
path: |
181+
./wheelhouse
182+
key: ${{ runner.os }}-scipy-${{ matrix.python-version }}-${{ hashFiles('scipy/pyproject.toml') }}-${{ steps.get-date.outputs.date }}
183+
184+
- name: Build Scipy wheel
185+
if: ${{ steps.cache-scipy-restore.outputs.cache-hit != 'true' && matrix.python-version == '3.14' }}
186+
env:
187+
DEBIAN_FRONTEND: noninteractive
188+
run: |
189+
# Install scipy dependencies:
190+
apt install -q -y gfortran libopenblas-dev liblapack-dev pkg-config --no-install-recommends
191+
192+
cd scipy
193+
194+
# If we restored cpython from cache, we need to get python interpreter from python-tsan.tgz
195+
if [ ! -d ${GITHUB_WORKSPACE}/cpython-tsan/bin/ ]; then
196+
echo "Extract cpython from python-tsan.tgz"
197+
pushd .
198+
ls ${GITHUB_WORKSPACE}/python-tsan.tgz
199+
cd ${GITHUB_WORKSPACE} && tar -xzf python-tsan.tgz
200+
ls ${GITHUB_WORKSPACE}/cpython-tsan/bin/
201+
popd
202+
fi
203+
204+
export PATH=${GITHUB_WORKSPACE}/cpython-tsan/bin/:$PATH
205+
206+
python3 -m pip install uv~=0.5.30
207+
python3 -m pip install --upgrade pip
208+
209+
python3 -m uv pip install -U --pre numpy --extra-index-url file://${GITHUB_WORKSPACE}/wheelhouse/
210+
python3 -m uv pip install cython pythran pybind11 meson-python ninja
211+
212+
python3 -m uv pip list | grep -E "(numpy|pythran|cython|pybind11)"
213+
214+
export CC=clang-18
215+
export CXX=clang++-18
216+
python3 -m pip wheel --wheel-dir dist -vvv . --no-build-isolation --no-deps -Csetup-args=-Dbuildtype=debugoptimized
217+
218+
# Create simple index and copy the wheel
219+
mkdir -p ${GITHUB_WORKSPACE}/wheelhouse/scipy
220+
221+
scipy_whl_name=($(cd dist && ls scipy*.whl))
222+
if [ -z "${scipy_whl_name}" ]; then exit 1; fi
223+
224+
echo "Built TSAN Scipy wheel: ${scipy_whl_name}"
225+
226+
cp dist/${scipy_whl_name} ${GITHUB_WORKSPACE}/wheelhouse/scipy
227+
228+
# Recreate wheelhouse index with Numpy and Scipy
229+
cat << EOF > ${GITHUB_WORKSPACE}/wheelhouse/index.html
230+
<!DOCTYPE html><html><body>
231+
<a href="numpy">numpy></a></br>
232+
<a href="scipy">scipy></a></br>
233+
</body></html>
234+
EOF
235+
236+
cat << EOF > ${GITHUB_WORKSPACE}/wheelhouse/scipy/index.html
237+
<!DOCTYPE html><html><body>
238+
<a href="${scipy_whl_name}">${scipy_whl_name}</a></br>
239+
</body></html>
240+
EOF
241+
242+
- name: Save Scipy wheel
243+
id: cache-scipy-save
244+
if: ${{ steps.cache-scipy-restore.outputs.cache-hit != 'true' && matrix.python-version == '3.14' }}
245+
uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
246+
with:
247+
path: |
248+
./wheelhouse
249+
key: ${{ runner.os }}-scipy-${{ matrix.python-version }}-${{ hashFiles('scipy/pyproject.toml') }}-${{ steps.get-date.outputs.date }}
216250

217251
- name: Build Jax and run tests
218-
timeout-minutes: 180
252+
timeout-minutes: 120
219253
env:
220254
JAX_NUM_GENERATED_CASES: 1
221255
JAX_ENABLE_X64: true
@@ -226,57 +260,70 @@ jobs:
226260
set -x
227261
cd jax
228262
229-
# Calculate SHA256 of the Python tarball
230-
export PYTHON_SHA256=($(sha256sum ${GITHUB_WORKSPACE}/python-tsan.tgz | awk '{ print $1 }'))
263+
export PYTHON_SHA256=($(sha256sum ${GITHUB_WORKSPACE}/python-tsan.tgz))
231264
echo "Python sha256: ${PYTHON_SHA256}"
232-
echo "Python URL: $HERMETIC_PYTHON_URL"
233265
234-
# Configure Bazel
235266
python3 build/build.py build --configure_only \
236267
--python_version=${{ matrix.python-version }}-ft \
237-
--bazel_options=--repo_env=HERMETIC_PYTHON_URL="${HERMETIC_PYTHON_URL}" \
268+
--bazel_options=--repo_env=HERMETIC_PYTHON_URL="file://${GITHUB_WORKSPACE}/python-tsan.tgz" \
238269
--bazel_options=--repo_env=HERMETIC_PYTHON_SHA256=${PYTHON_SHA256} \
239270
--bazel_options=--repo_env=HERMETIC_PYTHON_PREFIX="cpython-tsan/" \
240271
--bazel_options=--color=yes \
241272
--bazel_options=--copt=-fsanitize=thread \
242273
--bazel_options=--linkopt="-fsanitize=thread" \
243-
--bazel_options=--copt=-g
274+
--bazel_options=--copt=-g \
275+
--clang_path=/usr/bin/clang-18
244276
245277
mkdir -p dist
278+
# Check whether we have numpy wheel or exit with error
279+
ls ${GITHUB_WORKSPACE}/wheelhouse/numpy/*.whl || exit 1
280+
cp -v ${GITHUB_WORKSPACE}/wheelhouse/numpy/*.whl dist/
281+
if [ "${{ matrix.python-version }}" == "3.14" ]; then
282+
# Check whether we have scipy wheel or exit with error
283+
ls ${GITHUB_WORKSPACE}/wheelhouse/scipy/*.whl || exit 1
284+
cp -v ${GITHUB_WORKSPACE}/wheelhouse/scipy/*.whl dist/
285+
286+
# Patch build/requirements_lock_3_14_ft.txt to use TSAN instrumented NumPy and Scipy
287+
sed -i "s|--extra-index-url.*|--extra-index-url file://${GITHUB_WORKSPACE}/wheelhouse/|" build/${{ matrix.requirements_lock_name }}.txt
246288
247-
# Copy custom Numpy wheel only if using 3.15 (and if it exists)
248-
if [ "${{ matrix.python-version }}" == "3.15" ]; then
249-
ls ${GITHUB_WORKSPACE}/wheelhouse/numpy/*.whl || exit 1
250-
cp -v ${GITHUB_WORKSPACE}/wheelhouse/numpy/*.whl dist/
289+
# We should install jpeg dev package to be able to build Pillow from source:
290+
apt install -q -y libjpeg-dev --no-install-recommends
251291
252-
# Patch requirements lock to use TSAN-instrumented NumPy
253-
sed -i "s|--extra-index-url.*|--extra-index-url file://${GITHUB_WORKSPACE}/wheelhouse/|" build/${{ matrix.requirements_lock_name }}.txt
292+
# Install scipy runtime dependencies (in case we restore scipy wheel from cache):
293+
apt install -q -y libopenblas-dev liblapack-dev --no-install-recommends
254294
fi
255295
256296
echo "JAX_NUM_GENERATED_CASES=$JAX_NUM_GENERATED_CASES"
257297
echo "JAX_ENABLE_X64=$JAX_ENABLE_X64"
258298
echo "JAX_SKIP_SLOW_TESTS=$JAX_SKIP_SLOW_TESTS"
259299
300+
# Set symlink to the bazel executable
301+
bazel_exec=($(ls bazel-*))
302+
ln -s ${bazel_exec} bazel
260303
261304
# Check python version
262-
python_version="${{ matrix.python-version }}"
263-
bazel run --config=rbe_linux_x86_64 --@rules_python//python/config_settings:py_freethreaded="yes" @python_${python_version//./_}_host//:python -- -VV
305+
./bazel run --@rules_python//python/config_settings:py_freethreaded="yes" @python//:python3 -- -VV
264306
265307
# Check numpy version
266-
bazel cquery @pypi_numpy//:* | grep whl
308+
./bazel cquery @pypi_numpy//:* | grep whl
309+
310+
if [ "${{ matrix.python-version }}" == "3.14" ]; then
311+
# Check scipy version
312+
./bazel cquery @pypi_scipy//:* | grep whl
313+
fi
267314
268315
# Build JAX and run tests
269-
bazel test \
316+
./bazel test \
270317
--test_env=JAX_NUM_GENERATED_CASES=$JAX_NUM_GENERATED_CASES \
271318
--test_env=JAX_ENABLE_X64=$JAX_ENABLE_X64 \
272319
--test_env=JAX_SKIP_SLOW_TESTS=$JAX_SKIP_SLOW_TESTS \
273320
--test_env=PYTHON_GIL=0 \
274-
--test_env=TSAN_OPTIONS="halt_on_error=1,suppressions=tests/config/tsan-suppressions_${{ matrix.python-version }}.txt" \
321+
--test_env=TSAN_OPTIONS=halt_on_error=1,suppressions=$PWD/.github/workflows/tsan-suppressions_${{ matrix.python-version }}.txt \
275322
--test_env=JAX_TEST_NUM_THREADS=8 \
276-
--@rules_python//python/config_settings:py_freethreaded="yes" \
277323
--test_output=errors \
324+
--local_test_jobs=32 \
278325
--test_timeout=1800 \
279-
--config=rbe_linux_x86_64 \
280-
--test_tag_filters=-notsan \
281-
--run_under=//tests/config:oss_tsan_wrapper_sh \
282-
//tests:cpu_tests
326+
--config=resultstore \
327+
--config=ci_rbe_cache \
328+
--config=clang_local \
329+
//tests:cpu_tests

0 commit comments

Comments
 (0)