xla/tensorflow.bazelrc at main · atoiiayaa-design/xla · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
# TensorFlow Bazel configuration file.
# This file tries to group and simplify build options for TensorFlow
#
# ----CONFIG OPTIONS----
# Macosx options
#     darwin_arm64:
#
# Compiler options:
#     cuda_clang:             Use Clang when building CUDA code.
#     avx_linux:              Build with avx instruction set on linux.
#     avx_win:                Build with avx instruction set on windows
#
# Other build options:
#     short_logs:       Only log errors during build, skip warnings.
#     verbose_logs:     Show all compiler warnings during build.
#     monolithic:       Build all TF C++ code into a single shared object.
#     dynamic_kernels:  Try to link all kernels dynamically (experimental).
#     dbg:              Build with debug info
#
# TF version options;
#     v2: Build TF v2
#
# Feature and Third party library support options:
#     xla:          Build TF with XLA
#     tpu:          Build TF with TPU support
#     cuda:         Build with CUDA support.
#     cuda_clang    Build with CUDA Clang support.
#     rocm:         Build with AMD GPU support (rocm)
#     mkl:          Enable full mkl support.
#     nonccl:       Disable nccl support.
#
# PJRT Releases:
#     pjrt_{x86,aarch64}: Used to build PJRT CPU plugins.
#     pjrt_{x86,aarch64}_{cuda12,cuda13}: Used to build PJRT GPU plugins.
#
#
# Remote build execution options (only configured to work with TF team projects for now.)
#     rbe_base:                       General RBE options shared by all flavors.
#     rbe_linux:                      General RBE options used on all linux builds.
#     rbe_win_base:                   General RBE options used on all Windows builds. Not to be used standalone.
#     rbe_windows_x86_cpu_2022:       Windows-specific RBE options.
#
#     rbe_linux_cpu:                  RBE options to build with only CPU support.
#     rbe_linux_cuda:                 RBE options to build with GPU support using clang.
#     rbe_linux_cuda_nvcc:            RBE options to build with GPU support using nvcc.
#
# Embedded Linux options (experimental and only tested with TFLite build yet)
#     elinux:          General Embedded Linux options shared by all flavors.
#     elinux_aarch64:  Embedded Linux options for aarch64 (ARM64) CPU support.
#     elinux_armhf:    Embedded Linux options for armhf (ARMv7) CPU support.
#
# Default build options. These are applied first and unconditionally.

# For projects which use TensorFlow as part of a Bazel build process, putting
# nothing in a bazelrc will default to a monolithic build. The following line
# opts in to modular op registration support by default.
common --define framework_shared_object=true
common --define tsl_protobuf_header_only=true

common --define=allow_oversize_protos=true

common --spawn_strategy=standalone
common -c opt

common --repo_env=USE_PYWRAP_RULES=True
common --copt=-DGRPC_BAZEL_BUILD
common --host_copt=-DGRPC_BAZEL_BUILD
common --action_env=GRPC_BAZEL_RUNTIME=1
common --repo_env=PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=upb
common --action_env=PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=upb
# Some targets have the same py source file, but use different
# configurations via `requires-` tags. This results in an action
# conflict when precompiling. Disable to avoid that problem.
# See https://github.com/bazel-contrib/rules_python/issues/2445
common --@rules_python//python/config_settings:precompile=force_disabled
common --@rules_python//python/config_settings:bootstrap_impl=script --repo_env=RULES_PYTHON_ENABLE_PIPSTAR=0

# Do not do this. This is how gRPC builds itself by default, but we don't want
# that as it would link protobuf into its own set of dynamic libraries, which
# would conflict with our protobuf linkage.
#common --define=use_fast_cpp_protos=true

common --incompatible_default_to_explicit_init_py

# For local build, without dependency on rbe
common:linux_cpu --config=avx_linux
common:linux_cpu --linkopt=-lrt
common:linux_cpu --host_linkopt=-lrt
common:linux_cpu --linkopt=-lm
common:linux_cpu --host_linkopt=-lm
common:linux_cpu --config=avx_linux
common:linux_cpu --copt=-Wno-error=unused-command-line-argument
common:linux_cpu --copt=-Wno-gnu-offsetof-extensions

# Make Bazel print out all options from rc files.
common --announce_rc

# TODO(mihaimaruseac): Document this option or remove if no longer needed
common --define=grpc_no_ares=true

# See https://github.com/bazelbuild/bazel/issues/7362 for information on what
# --incompatible_remove_legacy_whole_archive flag does.
# This flag is set to true in Bazel 1.0 and newer versions. We tried to migrate
# Tensorflow to the default, however test coverage wasn't enough to catch the
# errors.
# There is ongoing work on Bazel team's side to provide support for transitive
# shared libraries. As part of migrating to transitive shared libraries, we
# hope to provide a better mechanism for control over symbol exporting, and
# then tackle this issue again.
#
# TODO: Remove the following two lines once TF doesn't depend on Bazel wrapping
# all library archives in -whole_archive -no_whole_archive.
common --noincompatible_remove_legacy_whole_archive
common --features=-force_no_whole_archive

# TODO(mihaimaruseac): Document this option or remove if no longer needed
common --enable_platform_specific_config

# TODO(mihaimaruseac): Document this option or remove if no longer needed
common --config=short_logs

# Precompiling results in some action conflicts. Disable it for now until
# the problematic targets are fixed.
common --@rules_python//python/config_settings:precompile=force_disabled

# TF now has `cc_shared_library` targets, so it needs the experimental flag
# TODO(rostam): Remove when `cc_shared_library` is enabled by default
common --experimental_cc_shared_library

# cc_shared_library ensures no library is linked statically more than once.
common --experimental_link_static_libraries_once=false

# Prevent regressions on those two incompatible changes
# TODO: remove those flags when they are flipped in the default Bazel version TF uses.
common --incompatible_enforce_config_setting_visibility
# TODO: also enable this flag after fixing the visibility violations
# common --incompatible_config_setting_private_default_visibility

# Print a stacktrace when a test is killed
test --test_env="GTEST_INSTALL_FAILURE_SIGNAL_HANDLER=1"


# Default options should come above this line.

# Sets the default Apple platform to macOS.
common:macos --apple_platform_type=macos

# Use cc toolchains from apple_support for Apple builds.
# https://github.com/bazelbuild/apple_support/tree/master?tab=readme-ov-file#bazel-6-setup
common:macos --apple_crosstool_top=@local_config_apple_cc//:toolchain
common:macos --crosstool_top=@local_config_apple_cc//:toolchain
common:macos --host_crosstool_top=@local_config_apple_cc//:toolchain

# gRPC on MacOS requires this #define
common:macos --copt=-DGRPC_BAZEL_BUILD

# Avoid hitting command line argument limit
common:macos --features=archive_param_file

# Settings for MacOS on ARM CPUs.
common:macos_arm64 --cpu=darwin_arm64
common:macos_arm64 --macos_minimum_os=11.0
common:macos_arm64 --platforms=@build_bazel_apple_support//platforms:darwin_arm64

# Config to use a mostly-static build and disable modular op registration
# support (this will revert to loading TensorFlow with RTLD_GLOBAL in Python).
# By default, TensorFlow will build with a dependence on
# //tensorflow:libtensorflow_framework.so.
common:monolithic --define framework_shared_object=false
common:monolithic --define tsl_protobuf_header_only=false
common:monolithic --experimental_link_static_libraries_once=false  # b/229868128

# Please note that MKL on MacOS is still not supported.
# If you would like to use a local MKL instead of downloading, please set the
# environment variable "TF_MKL_ROOT" every time before build.
common:mkl --define=build_with_mkl=true --define=enable_mkl=true
common:mkl --define=tensorflow_mkldnn_contraction_kernel=0
common:mkl --define=build_with_openmp=true
common:mkl -c opt

# config to build OneDNN backend with a user specified threadpool.
common:mkl_threadpool --define=build_with_mkl=true --define=enable_mkl=true
common:mkl_threadpool --define=tensorflow_mkldnn_contraction_kernel=0
common:mkl_threadpool --define=build_with_mkl_opensource=true
common:mkl_threadpool -c opt

# Config setting to build oneDNN with Compute Library for the Arm Architecture (ACL).
# with Eigen threadpool support
common:mkl_aarch64_threadpool --define=build_with_mkl_aarch64=true
common:mkl_aarch64_threadpool -c opt

# This is an alias for the mkl_aarch64_threadpool build.
common:mkl_aarch64 --config=mkl_aarch64_threadpool

# Default CUDA, CUDNN, NCCL and NVSHMEM versions.
common:cuda_version --repo_env=HERMETIC_CUDA_VERSION="12.9.1"
common:cuda_version --repo_env=HERMETIC_CUDNN_VERSION="9.8.0"
common:cuda_version --repo_env=HERMETIC_NVSHMEM_VERSION="3.2.5"
common:cuda_version --repo_env=HERMETIC_NCCL_VERSION="2.28.9"

# CUDA: This config refers to building CUDA op kernels with nvcc.
common:cuda --repo_env TF_NEED_CUDA=1
common:cuda --@rules_ml_toolchain//common:enable_cuda
common:cuda --config=cuda_version
# This flag is needed to include CUDA libraries.
common:cuda --@local_config_cuda//cuda:include_cuda_libs=true

# This configuration is used for building the wheels.
common:cuda_wheel --@local_config_cuda//cuda:include_cuda_libs=false

common:hermetic_cuda_umd --@cuda_driver//:include_cuda_umd_libs=true

# CUDA: This config refers to building CUDA op kernels with clang.
common:cuda_clang --config=cuda
common:cuda_clang --@local_config_cuda//:cuda_compiler=clang
common:cuda_clang --copt=-Qunused-arguments
# Select supported compute capabilities (supported graphics cards).
# This is the same as the official TensorFlow builds.
# See https://developer.nvidia.com/cuda-gpus#compute
# `compute_XY` enables PTX embedding in addition to SASS. PTX
# is forward compatible beyond the current compute capability major
# release while SASS is only forward compatible inside the current
# major release. Example: sm_80 kernels can run on sm_89 GPUs but
# not on sm_90 GPUs. compute_80 kernels though can also run on sm_90 GPUs.
common:cuda_clang --repo_env=HERMETIC_CUDA_COMPUTE_CAPABILITIES="sm_60,sm_70,sm_80,sm_89,compute_90"
# Permit newer CUDA versions than Clang is aware of
common:cuda_clang --copt="-Wno-unknown-cuda-version"
# Set lld as the linker.
common:cuda_clang --host_linkopt="-fuse-ld=lld"
common:cuda_clang --host_linkopt="-lm"
common:cuda_clang --linkopt="-fuse-ld=lld"
common:cuda_clang --linkopt="-lm"

common:cuda_clang_local --config=cuda_clang
common:cuda_clang_local --config=clang_local
common:cuda_clang_local --crosstool_top=@local_config_cuda//crosstool:toolchain

# Set up compilation CUDA version and paths and use the CUDA Clang toolchain.
common:cuda_clang_official --config=cuda_clang
common:cuda_clang_official --config=cuda_version
common:cuda_clang_official --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm-18/bin/clang"
common:cuda_clang_official --crosstool_top="@local_config_cuda//crosstool:toolchain"

# Build with nvcc for CUDA and clang for host
common:cuda_nvcc --config=cuda
common:cuda_nvcc --action_env=TF_NVCC_CLANG="1"
common:cuda_nvcc --@local_config_cuda//:cuda_compiler=nvcc
# Old config for backward compatibility
common:nvcc_clang --config=cuda_nvcc

# Deprecated cuda_nvcc config with non-hermetic toolchains
common:cuda_nvcc_clang_local --config=cuda_nvcc
common:cuda_nvcc_clang_local --config=clang_local
common:cuda_nvcc_clang_local --crosstool_top=@local_config_cuda//crosstool:toolchain

# Debug config. Enables Bazel's 'dbg' compilation mode, build with debugging enabled
common:dbg -c dbg
# Compiling all dependencies with debug info can cause linker failures
# and significantly increase binary size.
# The following setting disables debug info (-g0) for all files,
# except those under the xla/* path. This helps reduce the size of debug sections
# in the ELF binary, which can otherwise become too large and lead to errors.
# For more details, see: https://github.com/tensorflow/tensorflow/issues/48919.
common:dbg --per_file_copt=+.*,-xla.*@-g0
# AWS SDK must be compiled in release mode. see: https://github.com/tensorflow/tensorflow/issues/37498
common:dbg --copt -DDEBUG_BUILD

common:asan --strip=never
common:asan --copt -fsanitize=address
common:asan --copt -DADDRESS_SANITIZER
common:asan --copt -O1
common:asan --copt -g
common:asan --copt -fno-omit-frame-pointer
common:asan --linkopt -fsanitize=address

common:rocm_base --config=clang_local
common:rocm_base --copt=-Wno-gnu-offsetof-extensions
common:rocm_base --crosstool_top=@local_config_rocm//crosstool:toolchain
common:rocm_base --define=using_rocm_hipcc=true
common:rocm_base --define=tensorflow_mkldnn_contraction_kernel=0
common:rocm_base --repo_env TF_NEED_ROCM=1

common:rocm_clang_official --config=rocm_base
common:rocm_clang_official --action_env=CLANG_COMPILER_PATH="/usr/lib/llvm-18/bin/clang"
common:rocm_clang_official --action_env=HIPCC_COMPILE_FLAGS_APPEND="--offload-compress"
common:rocm_clang_official --action_env=TF_ROCM_CLANG="1"
common:rocm_clang_official --linkopt="-fuse-ld=lld"
common:rocm_clang_official --host_linkopt="-fuse-ld=lld"

common:rocm --config=rocm_clang_official
common:rocm_ci --config=rocm

common:rocm_ci_hermetic --dynamic_mode=off
common:rocm_ci_hermetic --config=rocm_clang_official
common:rocm_ci_hermetic --repo_env="ROCM_DISTRO_VERSION=rocm_7.10.0_gfx90X"
common:rocm_ci_hermetic --@local_config_rocm//rocm:rocm_path_type=hermetic

# This config option is used for SYCL as GPU backend.
# SYCL Configuration (non-hermetic)
common:sycl --@rules_ml_toolchain//common:enable_sycl=True
common:sycl --repo_env=TF_NEED_SYCL=1
common:sycl --define=tensorflow_mkldnn_contraction_kernel=0
common:sycl --cxxopt=-std=c++17
common:sycl --host_cxxopt=-std=c++17
common:sycl --repo_env=SYCL_BUILD_HERMETIC=0

# Enable Clang for host and icpx for SYCL
common:icpx_clang --repo_env TF_ICPX_CLANG=1
common:icpx_clang --copt=-fclang-abi-compat=17

# Hermetic SYCL Configuration
common:sycl_hermetic --config=sycl
common:sycl_hermetic --repo_env=SYCL_BUILD_HERMETIC=1

# Options to disable default on features
common:nonccl --define=no_nccl_support=true

# Don't trigger --config=<host platform> when cross-compiling.
common:linux --host_copt=-w
common:macos --copt=-w
common:windows --copt=/W0
common:windows --host_copt=/W0

# Linux ARM64 specific options
common:linux_arm64 --copt="-mtune=generic" --copt="-march=armv8-a" --copt="-O3"


# On Windows, `__cplusplus` is wrongly defined without this switch
# See https://devblogs.microsoft.com/cppblog/msvc-now-correctly-reports-__cplusplus/
common:windows --copt=/Zc:__cplusplus
common:windows --host_copt=/Zc:__cplusplus

# Tensorflow uses M_* math constants that only get defined by MSVC headers if
# _USE_MATH_DEFINES is defined.
common:windows --copt=/D_USE_MATH_DEFINES
common:windows --host_copt=/D_USE_MATH_DEFINES

# Windows has a relatively short command line limit, which TF has begun to hit.
# See https://docs.bazel.build/versions/main/windows.html
common:windows --features=compiler_param_file
common:windows --features=archive_param_file

# Speed Windows compile times. Available in VS 16.4 (we are on 16.11). See
# https://groups.google.com/a/tensorflow.org/d/topic/build/SsW98Eo7l3o/discussion
common:windows --copt=/d2ReducedOptimizeHugeFunctions
common:windows --host_copt=/d2ReducedOptimizeHugeFunctions

# Before VS 2017 15.8, the member "type" would non-conformingly have an
# alignment of only alignof(max_align_t). VS 2017 15.8 was fixed to handle this
# correctly, but the fix inherently changes layout and breaks binary
# compatibility (*only* for uses of aligned_storage with extended alignments).
common:windows --copt=-D_ENABLE_EXTENDED_ALIGNED_STORAGE
common:windows --host_copt=-D_ENABLE_EXTENDED_ALIGNED_STORAGE

# Enable the runfiles symlink tree on Windows. This makes it possible to build
# the pip package on Windows without an intermediate data-file archive, as the
# build_pip_package script in its current form (as of Aug 2023) uses the
# runfiles symlink tree to decide what to put into the Python wheel.
startup --windows_enable_symlinks
common:windows --enable_runfiles
common:windows --nobuild_python_zip
common:windows --dynamic_mode=off

# Default paths for TF_SYSTEM_LIBS
common:linux --define=PREFIX=/usr
common:linux --define=LIBDIR=$(PREFIX)/lib
common:linux --define=INCLUDEDIR=$(PREFIX)/include
common:linux --define=PROTOBUF_INCLUDE_PATH=$(PREFIX)/include
common:macos --define=PREFIX=/usr
common:macos --define=LIBDIR=$(PREFIX)/lib
common:macos --define=INCLUDEDIR=$(PREFIX)/include
common:macos --define=PROTOBUF_INCLUDE_PATH=$(PREFIX)/include
# TF_SYSTEM_LIBS do not work on windows.

# By default, build TF in C++ 17 mode.
common:linux --cxxopt=-std=c++17
common:linux --host_cxxopt=-std=c++17
common:macos --cxxopt=-std=c++17
common:macos --host_cxxopt=-std=c++17
common:windows --cxxopt=/std:c++17
common:windows --host_cxxopt=/std:c++17

# On windows, we still link everything into a single DLL.
common:windows --config=monolithic

# Make sure to include as little of windows.h as possible
common:windows --copt=-DWIN32_LEAN_AND_MEAN
common:windows --host_copt=-DWIN32_LEAN_AND_MEAN
common:windows --copt=-DNOGDI
common:windows --host_copt=-DNOGDI

# MSVC (Windows): Standards-conformant preprocessor mode
# See https://docs.microsoft.com/en-us/cpp/preprocessor/preprocessor-experimental-overview
common:windows --copt=/Zc:preprocessor
common:windows --host_copt=/Zc:preprocessor

# Misc build options we need for windows.
common:windows --linkopt=/DEBUG
common:windows --host_linkopt=/DEBUG
common:windows --linkopt=/OPT:REF
common:windows --host_linkopt=/OPT:REF
common:windows --linkopt=/OPT:ICF
common:windows --host_linkopt=/OPT:ICF

# Verbose failure logs when something goes wrong
common:windows --verbose_failures

# Work around potential issues with large command lines on windows.
# See: https://github.com/bazelbuild/bazel/issues/5163
common:windows --features=compiler_param_file

# Do not risk cache corruption. See:
# https://github.com/bazelbuild/bazel/issues/3360
common:linux --experimental_guard_against_concurrent_changes

# Configure short or long logs
common:short_logs --output_filter=DONT_MATCH_ANYTHING
common:verbose_logs --output_filter=

# Instruction set optimizations
# TODO(gunan): Create a feature in toolchains for avx/avx2 to
#   avoid having to define linux/win separately.
common:avx_linux --copt=-mavx
common:avx_linux --host_copt=-mavx
common:avx_win --copt=/arch:AVX

common:win_clang_base --@com_google_protobuf//build_defs:use_dlls=True
common:win_clang_base --@com_google_absl//absl:use_dlls=True
common:win_clang_base --linkopt=/demangle:no --host_linkopt=/demangle:no
common:win_clang_base --linkopt=/errorlimit:0 --host_linkopt=/errorlimit:0
common:win_clang_base --copt=/clang:-Weverything
common:win_clang_base --host_copt=/clang:-Weverything
common:win_clang_base --compiler=clang-cl
common:win_clang_base --linkopt=/FORCE:MULTIPLE
common:win_clang_base --host_linkopt=/FORCE:MULTIPLE
common:win_clang_base --action_env=PATHEXT=.COM;.EXE;.BAT;.CMD;.VBS;.VBE;.JS;.JSE;.WSF;.WSH;.MSC;.PY;.PYW
test:win_clang_base --linkopt=/FORCE:MULTIPLE
test:win_clang_base --host_linkopt=/FORCE:MULTIPLE
test:win_clang_base --build_tests_only --keep_going --test_output=errors --verbose_failures=true --test_summary=short

common:win_clang --config=win_clang_base
common:win_clang --extra_toolchains=@local_config_cc//:cc-toolchain-x64_windows-clang-cl
common:win_clang --extra_execution_platforms=//tensorflow/tools/toolchains/win:x64_windows-clang-cl
common:win_clang --host_platform=//tensorflow/tools/toolchains/win:x64_windows-clang-cl

common:windows_x86_cpu_2022 --config=win_clang_base
common:windows_x86_cpu_2022 --crosstool_top="//tensorflow/tools/toolchains/win2022/20241118:toolchain"
common:windows_x86_cpu_2022 --extra_toolchains="//tensorflow/tools/toolchains/win2022/20241118:cc-toolchain-x64_windows-clang-cl"
common:windows_x86_cpu_2022 --extra_execution_platforms="//tensorflow/tools/toolchains/win2022:windows_ltsc2022_clang"
common:windows_x86_cpu_2022 --host_platform="//tensorflow/tools/toolchains/win2022:windows_ltsc2022_clang"
common:windows_x86_cpu_2022 --platforms="//tensorflow/tools/toolchains/win2022:windows_ltsc2022_clang"

common:xla_windows_x86_cpu_2022 --config=win_clang_base
common:xla_windows_x86_cpu_2022 --crosstool_top="//tools/toolchains/win2022/20241118:toolchain"
common:xla_windows_x86_cpu_2022 --extra_toolchains="//tools/toolchains/win2022/20241118:cc-toolchain-x64_windows-clang-cl"
common:xla_windows_x86_cpu_2022 --extra_execution_platforms="//tools/toolchains/win2022:windows_ltsc2022_clang"
common:xla_windows_x86_cpu_2022 --host_platform="//tools/toolchains/win2022:windows_ltsc2022_clang"
common:xla_windows_x86_cpu_2022 --platforms="//tools/toolchains/win2022:windows_ltsc2022_clang"

# Options to build TensorFlow 1.x or 2.x.
# TODO(kanglan): Change v2's define to default behavior
common:v2 --define=tf_api_version=2 --action_env=TF2_BEHAVIOR=1

# Enable all targets in XLA
common:cpu_cross --define=with_cross_compiler_support=true

# BEGIN TF REMOTE BUILD EXECUTION OPTIONS
# Options when using remote execution
# WARNING: THESE OPTIONS WONT WORK IF YOU DO NOT HAVE PROPER AUTHENTICATION AND PERMISSIONS

# Allow creation of resultstore URLs for any bazel invocation
common:resultstore --google_default_credentials
common:resultstore --bes_backend=buildeventservice.googleapis.com
common:resultstore --bes_instance_name="tensorflow-testing"
common:resultstore --bes_results_url="https://source.cloud.google.com/results/invocations"
common:resultstore --bes_timeout=600s

# Flag to enable remote config
common --experimental_repo_remote_exec

common:use_tar_archive_files --repo_env=USE_CUDA_TAR_ARCHIVE_FILES=1
common:use_tar_archive_files --repo_env=USE_NVSHMEM_TAR_ARCHIVE_FILES=1
common:use_tar_archive_files --repo_env=USE_LLVM_TAR_ARCHIVE_FILES=1
common:use_tar_archive_files --repo_env=USE_MIRRORED_TAR_ARCHIVE_FILES=1

# Make Bazel not try to probe the host system for a C++ toolchain.
common:rbe_base --config=resultstore
common:rbe_base --define=EXECUTOR=remote
common:rbe_base --jobs=800
common:rbe_base --remote_executor=grpcs://remotebuildexecution.googleapis.com
common:rbe_base --remote_timeout=3600
common:rbe_base --spawn_strategy=remote,worker,standalone,local
# Attempt to minimize the amount of data transfer between bazel and the remote
# workers:
common:rbe_base --remote_download_toplevel
test:rbe_base --test_env=USER=anon

# TODO(kanglan): Check if we want to merge rbe_linux into rbe_linux_cpu.
common:rbe_linux --config=rbe_base
common:rbe_linux --action_env=PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/go/bin"
# Non-rbe settings we should include because we do not run configure
common:rbe_linux --config=avx_linux
# TODO(gunan): Check why we need this specified in rbe, but not in other builds.
common:rbe_linux --linkopt=-lrt
common:rbe_linux --host_linkopt=-lrt
common:rbe_linux --linkopt=-lm
common:rbe_linux --host_linkopt=-lm

common:rbe_linux_cpu --config=rbe_linux
# Linux cpu and cuda builds share the same toolchain now.
common:rbe_linux_cpu --extra_execution_platforms="@ml_build_config_platform//:platform"
common:rbe_linux_cpu --host_platform="@ml_build_config_platform//:platform"
common:rbe_linux_cpu --platforms="@ml_build_config_platform//:platform"
# This is needed for all Clang17 builds but must not be present in GCC builds.
common:rbe_linux_cpu --copt=-Wno-error=unused-command-line-argument
# This was added in clang-16 by https://reviews.llvm.org/D133574.
# Can be removed once upb is updated, since a type definition is used within
# offset of in the current version of ubp.
# See https://github.com/protocolbuffers/upb/blob/9effcbcb27f0a665f9f345030188c0b291e32482/upb/upb.c#L183.
common:rbe_linux_cpu --copt=-Wno-gnu-offsetof-extensions
# Python config is the same across all containers because the binary is the same
common:rbe_linux_cpu --python_path="/usr/bin/python3"
# These you may need to change for your own GCP project.
common:rbe_linux_cpu --remote_instance_name=projects/tensorflow-testing/instances/default_instance

# Deprecated RBE config with non-hermetic toolchains.
common:rbe_linux_cpu_clang_local --config=clang_local
common:rbe_linux_cpu_clang_local --config=rbe_linux_cpu
common:rbe_linux_cpu_clang_local --repo_env=BAZEL_DO_NOT_DETECT_CPP_TOOLCHAIN=1
common:rbe_linux_cpu_clang_local --host_crosstool_top="@local_config_cuda//crosstool:toolchain"
common:rbe_linux_cpu_clang_local --crosstool_top="@local_config_cuda//crosstool:toolchain"
common:rbe_linux_cpu_clang_local --extra_toolchains="@local_config_cuda//crosstool:toolchain-linux-x86_64"
common:rbe_linux_cpu_clang_local --repo_env=CC="/usr/lib/llvm-18/bin/clang"
common:rbe_linux_cpu_clang_local --repo_env=TF_SYSROOT="/dt9"

common:rbe_linux_cuda --config=cuda_clang_official
common:rbe_linux_cuda --config=rbe_linux_cpu
# dt9 is based on glibc 2.17, which is outdated and incompatible with CUDA 12.8.0
# unset TF_SYSROOT
common:rbe_linux_cuda --repo_env=TF_SYSROOT=
# For Remote build execution -- GPU configuration
common:rbe_linux_cuda --repo_env=REMOTE_GPU_TESTING=1
# Update UMD version when RBE CUDA driver is updated.
common:rbe_linux_cuda --repo_env=HERMETIC_CUDA_UMD_VERSION="13.0.2"

common:rbe_linux_cuda_nvcc --config=rbe_linux_cuda
common:rbe_linux_cuda_nvcc --config=cuda_nvcc
common:rbe_linux_cuda_nvcc --repo_env TF_NCCL_USE_STUB=1

common:rbe_win_base --config=clang_local
common:rbe_win_base --config=rbe_base
common:rbe_win_base --repo_env=BAZEL_DO_NOT_DETECT_CPP_TOOLCHAIN=1
common:rbe_win_base --shell_executable=C:\\tools\\msys64\\usr\\bin\\bash.exe
common:rbe_win_base --remote_instance_name=projects/tensorflow-testing/instances/windows
# Don't build the python zip archive in the RBE build.
common:rbe_win_base --remote_download_minimal
common:rbe_win_base --enable_runfiles
common:rbe_win_base --nobuild_python_zip
# Don't build the runfile links in the RBE build which is expensive on Windows.
common:rbe_win_base --nobuild_runfile_links
common:rbe_win_base --define=override_eigen_strong_inline=true

common:rbe_windows_x86_cpu_2022 --config=rbe_win_base --config=windows_x86_cpu_2022

# Override the remote instance and BES instance names to use the new RBE pools
common:rbe_windows_x86_cpu_2022 --remote_instance_name=projects/ml-oss-rbe-general/instances/default_instance
common:rbe_windows_x86_cpu_2022 --bes_instance_name="ml-oss-rbe-general"

common:rbe_windows_x86_cpu_2022 --host_platform="//tensorflow/tools/toolchains/win2022:windows_ltsc2022_clang_new"
common:rbe_windows_x86_cpu_2022 --extra_execution_platforms="//tensorflow/tools/toolchains/win2022:windows_ltsc2022_clang_new"
common:rbe_windows_x86_cpu_2022 --platforms="//tensorflow/tools/toolchains/win2022:windows_ltsc2022_clang_new"

common:rbe_xla_windows_x86_cpu_2022 --config=rbe_win_base --config=xla_windows_x86_cpu_2022
common:rbe_xla_windows_x86_cpu_2022 --remote_instance_name=projects/ml-oss-rbe-general/instances/default_instance
common:rbe_xla_windows_x86_cpu_2022 --bes_instance_name="ml-oss-rbe-general"

common:rbe_xla_windows_x86_cpu_2022 --host_platform="//tools/toolchains/win2022:windows_ltsc2022_clang_new"
common:rbe_xla_windows_x86_cpu_2022 --extra_execution_platforms="//tools/toolchains/win2022:windows_ltsc2022_clang_new"
common:rbe_xla_windows_x86_cpu_2022 --platforms="//tools/toolchains/win2022:windows_ltsc2022_clang_new"

# END TF REMOTE BUILD EXECUTION OPTIONS

# TFLite build configs for generic embedded Linux
common:elinux --config=clang_local
common:elinux --crosstool_top=@local_config_embedded_arm//:toolchain
common:elinux --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
common:elinux_aarch64 --config=elinux
common:elinux_aarch64 --cpu=aarch64
common:elinux_aarch64 --platforms=@org_tensorflow//tensorflow/tools/toolchains/linux:linux_aarch64
common:elinux_armhf --config=elinux
common:elinux_armhf --cpu=armhf
common:elinux_armhf --platforms=@org_tensorflow//tensorflow/tools/toolchains/linux:linux_armhf
common:elinux_armhf --copt -mfp16-format=ieee

# Config-specific options should come above this line.

# Load rc file written by ./configure.
try-import %workspace%/.tf_configure.bazelrc

# Load rc file with user-specific options.
try-import %workspace%/.bazelrc.user

common:release_cpu_macos --config=avx_linux

# Base build configs for macOS
common:release_macos_base --define=no_nccl_support=true --output_filter=^$

# Ensure release_base is set on mac
common:release_macos_base --config=cpu_cross
common:release_macos_base --config=clang_local

# Build configs for macOS x86
common:release_macos_x86 --config=release_macos_base
# Build with the AVX instruction set when on macOS x86
common:release_macos_x86 --config=avx_linux
common:release_macos_x86 --cpu=darwin
common:release_macos_x86 --platforms=@build_bazel_apple_support//platforms:darwin_x86_64
# Target Catalina as the minimum compatible OS version
common:release_macos_x86 --macos_minimum_os=10.15
common:release_macos_x86 --macos_sdk_version=10.15

# Build configs for macOS Arm64
common:release_macos_arm64 --config=release_macos_base
common:release_macos_arm64 --cpu=darwin_arm64
common:release_macos_arm64 --platforms=@build_bazel_apple_support//platforms:darwin_arm64
common:release_macos_arm64 --define=tensorflow_mkldnn_contraction_kernel=0
# Target Moneterey as the minimum compatible OS version
common:release_macos_arm64 --macos_minimum_os=12.0
common:release_macos_arm64 --macos_sdk_version=12.0

# Base test configs for macOS
test:release_macos_base --verbose_failures=true --local_test_jobs=HOST_CPUS
test:release_macos_base --test_timeout=300,450,1200,3600 --test_output=errors
test:release_macos_base --build_tests_only --keep_going
test:release_macos_base --flaky_test_attempts=3

# Test configs for macOS x86
test:release_macos_x86 --config=release_macos_base

# BEGIN TF CACHE HELPER OPTIONS
# Options when using remote execution
# WARNING: THESE OPTIONS WONT WORK IF YOU DO NOT HAVE PROPER AUTHENTICATION AND PERMISSIONS

# Use --config=tf_public_cache to try and use the TensorFlow public build cache
# to build TensorFlow. Look at ci/official/envs to find which types of jobs
# push to the cache.  For macOS, use --config=tf_public_macos_cache
common:tf_public_cache --remote_cache="https://storage.googleapis.com/tensorflow-devinfra-bazel-cache/january2024" --remote_upload_local_results=false
# Cache pushes are limited to TF's CI system.
common:tf_public_cache_push --config=tf_public_cache --remote_upload_local_results=true --google_default_credentials
# Public cache for macOS builds
common:tf_public_macos_cache --remote_cache="https://storage.googleapis.com/tensorflow-macos-bazel-cache/oct2023" --remote_upload_local_results=false
# Cache pushes are limited to TF's CI system.
common:tf_public_macos_cache_push --config=tf_public_macos_cache --remote_upload_local_results=true --google_default_credentials

# END TF CACHE HELPER OPTIONS

# START CROSS-COMPILE CONFIGS
# Set execution platform to Linux x86
# Note: Lot of the "host_" flags such as "host_cpu" and "host_crosstool_top"
# flags seem to be actually used to specify the execution platform details. It
# seems it is this way because these flags are old and predate the distinction
# between host and execution platform.
common:cross_compile_base --config=clang_local
common:cross_compile_base --host_cpu=k8
common:cross_compile_base --host_crosstool_top=//tools/toolchains/cross_compile/cc:cross_compile_toolchain_suite
common:cross_compile_base --extra_execution_platforms=//tools/toolchains/cross_compile/config:linux_x86_64

common:rbe_cross_compile_base --config=rbe_base
common:rbe_cross_compile_base --repo_env=BAZEL_DO_NOT_DETECT_CPP_TOOLCHAIN=1
common:rbe_cross_compile_base --remote_instance_name=projects/tensorflow-testing/instances/default_instance

# Test-related settings below this point
# We cannot run cross-compiled tests on the remote Linux x86 VMs so we need to
# force all tests to run locally on the Aarch64 host.
test:rbe_cross_compile_base --strategy=TestRunner=local --build_tests_only
test:rbe_cross_compile_base --verbose_failures=true --local_test_jobs=HOST_CPUS --test_output=errors

# START LINUX AARCH64 CROSS-COMPILE CONFIGS
common:cross_compile_linux_arm64 --config=cross_compile_base

# Set the target CPU to Aarch64
common:cross_compile_linux_arm64 --platforms=//tools/toolchains/cross_compile/config:linux_aarch64
common:cross_compile_linux_arm64 --cpu=aarch64
common:cross_compile_linux_arm64 --crosstool_top=//tools/toolchains/cross_compile/cc:cross_compile_toolchain_suite

# RBE cross-compile configs for Linux Aarch64
common:rbe_cross_compile_linux_arm64 --config=cross_compile_linux_arm64
common:rbe_cross_compile_linux_arm64 --config=rbe_cross_compile_base
test:rbe_cross_compile_linux_arm64 --config=rbe_cross_compile_base

# END LINUX AARCH64 CROSS-COMPILE CONFIGS

# START MACOS CROSS-COMPILE CONFIGS
common:cross_compile_macos_x86 --config=cross_compile_base
common:cross_compile_macos_x86 --config=nonccl
# Target Catalina (10.15) as the minimum supported OS
common:cross_compile_macos_x86 --action_env  MACOSX_DEPLOYMENT_TARGET=10.15

# Set the target CPU to Darwin x86
common:cross_compile_macos_x86 --platforms=//tools/toolchains/cross_compile/config:darwin_x86_64
common:cross_compile_macos_x86 --cpu=darwin
common:cross_compile_macos_x86 --crosstool_top=//tools/toolchains/cross_compile/cc:cross_compile_toolchain_suite
# When RBE cross-compiling for macOS, we need to explicitly register the
# toolchain. Otherwise, oddly, RBE complains that a "docker container must be
# specified".
common:cross_compile_macos_x86 --extra_toolchains=//tools/toolchains/cross_compile/config:macos-x86-cross-compile-cc-toolchain
# Map --platforms=darwin_x86_64 to --cpu=darwin and vice-versa to make selects()
# and transistions that use these flags work.
common:cross_compile_macos_x86 --platform_mappings=tools/toolchains/cross_compile/config/platform_mappings

# RBE cross-compile configs for Darwin x86
common:rbe_cross_compile_macos_x86 --config=cross_compile_macos_x86 --remote_download_minimal
common:rbe_cross_compile_macos_x86 --bes_backend="" --bes_results_url="" --bes_timeout="0s"
common:rbe_cross_compile_macos_x86 --experimental_remote_build_event_upload="minimal"
common:rbe_cross_compile_macos_x86 --config=rbe_cross_compile_base
common:rbe_cross_compile_macos_x86 --bes_upload_mode=nowait_for_upload_complete
test:rbe_cross_compile_macos_x86 --config=rbe_cross_compile_base
# Increase the test timeout as tests often take longer on mac.
test:rbe_cross_compile_macos_x86 --test_timeout=300,450,1200,3600
# Limit jobs to 100 to avoid running into "out of memory" issues (b/316266643)
common:rbe_cross_compile_macos_x86 --jobs=100
test:rbe_cross_compile_macos_x86 --jobs=100
# END MACOS CROSS-COMPILE CONFIGS
# END CROSS-COMPILE CONFIGS

# START PJRT RELEASE CONFIGS
build:pjrt_release -c opt --@local_config_cuda//cuda:include_cuda_libs=false
build:pjrt_nightly -c opt --@local_config_cuda//cuda:include_cuda_libs=true

build:pjrt_x86 --cpu=k8
build:pjrt_aarch64 --cpu=aarch64

build:pjrt_cuda12 --config=cuda_nvcc
build:pjrt_cuda12 --repo_env=HERMETIC_CUDA_VERSION="12.9.1"
build:pjrt_cuda12 --repo_env=HERMETIC_CUDNN_VERSION="9.8.0"
build:pjrt_cuda12 --repo_env=HERMETIC_NVSHMEM_VERSION="3.2.5"
build:pjrt_cuda12 --repo_env=HERMETIC_CUDA_COMPUTE_CAPABILITIES="sm_50,sm_60,sm_70,sm_80,sm_90,sm_100,compute_120"

build:pjrt_cuda13 --config=cuda_nvcc
build:pjrt_cuda13 --repo_env=HERMETIC_CUDA_VERSION="13.0.0"
build:pjrt_cuda13 --repo_env=HERMETIC_CUDNN_VERSION="9.12.0"
build:pjrt_cuda13 --repo_env=HERMETIC_NVSHMEM_VERSION="3.3.20"
build:pjrt_cuda13 --repo_env=HERMETIC_CUDA_COMPUTE_CAPABILITIES="sm_75,sm_80,sm_90,sm_100,compute_120"

build:pjrt_x86_release --config=pjrt_release --config=pjrt_x86
build:pjrt_aarch64_release --config=pjrt_release --config=pjrt_aarch64
build:pjrt_x86_nightly --config=pjrt_nightly --config=pjrt_x86
build:pjrt_aarch64_nightly --config=pjrt_nightly --config=pjrt_aarch64

build:pjrt_x86_cuda12_release --config=pjrt_x86_release --config=pjrt_cuda12
build:pjrt_aarch64_cuda12_release --config=pjrt_aarch64_release --config=pjrt_cuda12
build:pjrt_x86_cuda13_release --config=pjrt_x86_release --config=pjrt_cuda13
build:pjrt_aarch64_cuda13_release --config=pjrt_aarch64_release --config=pjrt_cuda13

build:pjrt_x86_cuda12_nightly --config=pjrt_x86_nightly --config=pjrt_cuda12
build:pjrt_aarch64_cuda12_nightly --config=pjrt_aarch64_nightly --config=pjrt_cuda12
build:pjrt_x86_cuda13_nightly --config=pjrt_x86_nightly --config=pjrt_cuda13
build:pjrt_aarch64_cuda13_nightly --config=pjrt_aarch64_nightly --config=pjrt_cuda13
# END PJRT RELEASE CONFIGS