diff --git a/.bazelrc b/.bazelrc index 612a1e59aeb806..4ff08fd221f5dc 100644 --- a/.bazelrc +++ b/.bazelrc @@ -231,7 +231,7 @@ common:apple-toolchain --host_crosstool_top=@local_config_apple_cc//:toolchain common:macos_arm64 --cpu=darwin_arm64 common:macos_arm64 --macos_minimum_os=11.0 common:macos_arm64 --config=clang_local -common:macos_arm64 --platforms=@build_bazel_apple_support//configs/platforms:darwin_arm64 +common:macos_arm64 --platforms=@build_bazel_apple_support//platforms:darwin_arm64 # iOS configs for each architecture and the fat binary builds. common:ios --apple_platform_type=ios @@ -244,16 +244,16 @@ common:ios_armv7 --cpu=ios_armv7 common:ios_armv7 --platforms=@org_tensorflow//tensorflow/tools/toolchains/ios:ios_armv7 common:ios_arm64 --config=ios common:ios_arm64 --cpu=ios_arm64 -common:ios_arm64 --platforms=@build_bazel_apple_support//configs/platforms:ios_arm64 +common:ios_arm64 --platforms=@build_bazel_apple_support//platforms:ios_arm64 common:ios_arm64e --config=ios common:ios_arm64e --cpu=ios_arm64e -common:ios_arm64e --platforms=@build_bazel_apple_support//configs/platforms:ios_arm64e +common:ios_arm64e --platforms=@build_bazel_apple_support//platforms:ios_arm64e common:ios_sim_arm64 --config=ios common:ios_sim_arm64 --cpu=ios_sim_arm64 -common:ios_sim_arm64 --platforms=@build_bazel_apple_support//configs/platforms:ios_sim_arm64 +common:ios_sim_arm64 --platforms=@build_bazel_apple_support//platforms:ios_sim_arm64 common:ios_x86_64 --config=ios common:ios_x86_64 --cpu=ios_x86_64 -common:ios_x86_64 --platforms=@build_bazel_apple_support//configs/platforms:ios_x86_64 +common:ios_x86_64 --platforms=@build_bazel_apple_support//platforms:ios_x86_64 common:ios_fat --config=ios common:ios_fat --ios_multi_cpus=armv7,arm64,i386,x86_64 @@ -282,19 +282,20 @@ common:mkl_threadpool -c opt # Config setting to build oneDNN with Compute Library for the Arm Architecture (ACL). # with Eigen threadpool support common:mkl_aarch64_threadpool --define=build_with_mkl_aarch64=true +common:mkl_aarch64_threadpool --@compute_library//:openmp=false common:mkl_aarch64_threadpool -c opt # This is an alias for the mkl_aarch64_threadpool build. common:mkl_aarch64 --config=mkl_aarch64_threadpool -# Default CUDA, CUDNN and NVSHMEM versions. +# Default CUDA, CUDNN, NCCL and NVSHMEM versions. common:cuda_version --repo_env=HERMETIC_CUDA_VERSION="12.5.1" common:cuda_version --repo_env=HERMETIC_CUDNN_VERSION="9.3.0" common:cuda_version --repo_env=HERMETIC_NVSHMEM_VERSION="3.2.5" +common:cuda_version --repo_env=HERMETIC_NCCL_VERSION="2.27.7" # CUDA: This config refers to building CUDA op kernels with nvcc. common:cuda --repo_env TF_NEED_CUDA=1 -common:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain common:cuda --@local_config_cuda//:enable_cuda common:cuda --config=cuda_version # This flag is needed to include CUDA libraries. @@ -329,8 +330,6 @@ common:cuda_clang --linkopt="-lm" # Set up compilation CUDA version and paths and use the CUDA Clang toolchain. common:cuda_clang_official --config=cuda_clang common:cuda_clang_official --config=cuda_version -common:cuda_clang_official --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm-18/bin/clang" -common:cuda_clang_official --crosstool_top="@local_config_cuda//crosstool:toolchain" # Build with nvcc for CUDA and clang for host common:cuda_nvcc --config=cuda @@ -653,12 +652,6 @@ common:rbe_linux_cpu --python_path="/usr/bin/python3" # These you may need to change for your own GCP project. common:rbe_linux_cpu --remote_instance_name=projects/tensorflow-testing/instances/default_instance -# Download CUDA/CUDNN redistributions to preserve the repositories cache between -# CPU and GPU builds. -# TODO(ybaturina): Uncomment when RBE is ready to support this. -common:rbe_linux_cpu --repo_env USE_CUDA_REDISTRIBUTIONS=1 -common:rbe_linux_cpu --config=cuda_version - # Deprecated RBE config with non-hermetic toolchains. common:rbe_linux_cpu_clang_local --config=rbe_linux_cpu common:rbe_linux_cpu_clang_local --config=clang_local @@ -785,14 +778,18 @@ common:release_gpu_linux_clang_local --config=release_cpu_linux_clang_local common:release_arm64_linux --config=release_linux_base common:release_arm64_linux --config=linux_arm64 -common:release_arm64_linux --config=clang_local -common:release_arm64_linux --repo_env=CC="/usr/lib/llvm-18/bin/clang" -common:release_arm64_linux --repo_env=BAZEL_COMPILER="/usr/lib/llvm-18/bin/clang" -common:release_arm64_linux --crosstool_top="@ml2014_clang_aarch64_config_aarch64//crosstool:toolchain" common:release_arm64_linux --config=mkl_aarch64_threadpool common:release_arm64_linux --copt=-flax-vector-conversions test:release_arm64_linux --flaky_test_attempts=3 +# Deprecated release CPU config with non-hermetic toolchains. +common:release_arm64_linux_clang_local --config=release_arm64_linux +common:release_arm64_linux_clang_local --config=clang_local +common:release_arm64_linux_clang_local --repo_env=CC="/usr/lib/llvm-18/bin/clang" +common:release_arm64_linux_clang_local --repo_env=BAZEL_COMPILER="/usr/lib/llvm-18/bin/clang" +common:release_arm64_linux_clang_local --crosstool_top="@ml2014_clang_aarch64_config_aarch64//crosstool:toolchain" +test:release_arm64_linux_clang_local --flaky_test_attempts=3 + common:release_cpu_macos --config=avx_linux # Base build configs for macOS diff --git a/.github/workflows/arm-cd.yml b/.github/workflows/arm-cd.yml index 2e3912041d9cf2..5430fc1c8151e8 100644 --- a/.github/workflows/arm-cd.yml +++ b/.github/workflows/arm-cd.yml @@ -52,12 +52,12 @@ jobs: run: find /home/ubuntu/actions-runner/_work/tensorflow/tensorflow/. -name . -o -prune -exec sudo rm -rf -- {} + || true - name: Checkout repository for nightly (skipped for releases) if: ${{ github.event_name == 'schedule' }} - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: ref: 'nightly' - name: Checkout repository for releases (skipped for nightly) if: ${{ github.event_name == 'push' }} - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: Build and test pip wheel shell: bash run: | diff --git a/.github/workflows/arm-ci-extended-cpp.yml b/.github/workflows/arm-ci-extended-cpp.yml index 54903a6998b090..09085e814daba1 100644 --- a/.github/workflows/arm-ci-extended-cpp.yml +++ b/.github/workflows/arm-ci-extended-cpp.yml @@ -50,12 +50,12 @@ jobs: run: find /home/ubuntu/actions-runner/_work/tensorflow/tensorflow/. -name . -o -prune -exec sudo rm -rf -- {} + || true - name: Checkout repository for nightly (skipped for releases) if: ${{ github.event_name == 'schedule' }} - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: ref: 'nightly' - name: Checkout repository if: ${{ github.event_name == 'push' }} - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: Build binary and run C++ tests shell: bash run: | diff --git a/.github/workflows/arm-ci-extended.yml b/.github/workflows/arm-ci-extended.yml index 2235cfc2d986da..94237fcaa6cca5 100644 --- a/.github/workflows/arm-ci-extended.yml +++ b/.github/workflows/arm-ci-extended.yml @@ -51,12 +51,12 @@ jobs: run: find /home/ubuntu/actions-runner/_work/tensorflow/tensorflow/. -name . -o -prune -exec sudo rm -rf -- {} + || true - name: Checkout repository for nightly (skipped for releases) if: ${{ github.event_name == 'schedule' }} - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: ref: 'nightly' - name: Checkout repository if: ${{ github.event_name == 'push' }} - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: Build binary and run python tests on nightly for all python versions shell: bash run: | diff --git a/.github/workflows/arm-ci.yml b/.github/workflows/arm-ci.yml index a141bdd4676852..12d8ab4a2cf719 100644 --- a/.github/workflows/arm-ci.yml +++ b/.github/workflows/arm-ci.yml @@ -47,7 +47,7 @@ jobs: shell: bash run: find /home/ubuntu/actions-runner/_work/tensorflow/tensorflow/. -name . -o -prune -exec sudo rm -rf -- {} + || true - name: Checkout repository - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: Build binary and run python tests shell: bash run: | diff --git a/.github/workflows/cffconvert.yml b/.github/workflows/cffconvert.yml index 6421e08ccf0839..de578ffec96327 100644 --- a/.github/workflows/cffconvert.yml +++ b/.github/workflows/cffconvert.yml @@ -30,7 +30,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out a copy of the repository - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: Check whether the citation metadata from CITATION.cff is valid uses: citation-file-format/cffconvert-github-action@4cf11baa70a673bfdf9dad0acc7ee33b3f4b6084 # v2.0.0 diff --git a/.github/workflows/issue-on-pr-rollback.yml b/.github/workflows/issue-on-pr-rollback.yml index d5e0661a5f356b..1d548e9204e563 100644 --- a/.github/workflows/issue-on-pr-rollback.yml +++ b/.github/workflows/issue-on-pr-rollback.yml @@ -33,7 +33,7 @@ jobs: startsWith(github.event.head_commit.message, 'Rollback of PR #') steps: - name: Checkout repo - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: Create a new Github Issue uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 with: diff --git a/.github/workflows/osv-scanner-scheduled.yml b/.github/workflows/osv-scanner-scheduled.yml index 07896a48470753..984dead9db7388 100644 --- a/.github/workflows/osv-scanner-scheduled.yml +++ b/.github/workflows/osv-scanner-scheduled.yml @@ -28,7 +28,7 @@ permissions: jobs: scan-scheduled: if: github.repository == 'tensorflow/tensorflow' - uses: "google/osv-scanner-action/.github/workflows/osv-scanner-reusable.yml@v2.2.4" + uses: "google/osv-scanner-action/.github/workflows/osv-scanner-reusable.yml@v2.3.0" with: scan-args: |- --lockfile=requirements.txt:./requirements_lock_3_9.txt diff --git a/.github/workflows/pylint-presubmit.yml b/.github/workflows/pylint-presubmit.yml index 59068d9d86f45d..483cf5bfc0addf 100644 --- a/.github/workflows/pylint-presubmit.yml +++ b/.github/workflows/pylint-presubmit.yml @@ -28,7 +28,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: Get file changes id: get_file_changes uses: trilom/file-changes-action@a6ca26c14274c33b15e6499323aac178af06ad4b # v1.2.4 @@ -38,7 +38,7 @@ jobs: run: | echo Changed files: ${{ steps.get_file_changes.outputs.files }} - name: Set up Python 3.9 - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 with: python-version: "3.9" - name: Install Python dependencies diff --git a/.github/workflows/release-branch-cherrypick.yml b/.github/workflows/release-branch-cherrypick.yml index 69e03a040ae1a2..fc643c92d304d1 100644 --- a/.github/workflows/release-branch-cherrypick.yml +++ b/.github/workflows/release-branch-cherrypick.yml @@ -45,7 +45,7 @@ jobs: if: github.repository == 'tensorflow/tensorflow' # Don't do this in forks steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: ref: ${{ github.event.inputs.release_branch }} - name: Get some helpful info for formatting @@ -58,7 +58,7 @@ jobs: echo "SHORTSHA=$(git log -1 ${{ github.event.inputs.git_commit }} --format="%h")" >> "$GITHUB_OUTPUT" echo "TITLE=$(git log -1 ${{ github.event.inputs.git_commit }} --format="%s")" >> "$GITHUB_OUTPUT" - name: Create Pull Request with changes - uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # v7.0.8 + uses: peter-evans/create-pull-request@84ae59a2cdc2258d6fa0732dd66352dddae2a412 # v7.0.9 with: title: '${{ github.event.inputs.release_branch }} cherry-pick: ${{ steps.cherrypick.outputs.SHORTSHA }} "${{ steps.cherrypick.outputs.TITLE }}"' committer: TensorFlow Release Automation diff --git a/.github/workflows/scorecards-analysis.yml b/.github/workflows/scorecards-analysis.yml index e635c4cd8ccc88..ce2d7075019b5d 100644 --- a/.github/workflows/scorecards-analysis.yml +++ b/.github/workflows/scorecards-analysis.yml @@ -41,7 +41,7 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: persist-credentials: false @@ -64,6 +64,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard (optional). # Commenting out will disable upload of results to your repo's Code Scanning dashboard - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@0499de31b99561a6d14a36a5f662c2a54f91beee # v3.29.5 + uses: github/codeql-action/upload-sarif@fe4161a26a8629af62121b670040955b330f9af2 # v3.29.5 with: sarif_file: results.sarif diff --git a/.github/workflows/update-rbe.yml b/.github/workflows/update-rbe.yml index a8dba883f5ff14..d2cc83b7f5c2c2 100644 --- a/.github/workflows/update-rbe.yml +++ b/.github/workflows/update-rbe.yml @@ -30,7 +30,7 @@ jobs: if: github.repository == 'tensorflow/tensorflow' # Don't do this in forks steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: Update the RBE Configs run: | function map() { @@ -130,7 +130,7 @@ jobs: map sigbuild-r2.17-clang-python3.11 2.17-python3.11 map sigbuild-r2.17-clang-python3.12 2.17-python3.12 - name: Create Pull Request with changes - uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # v7.0.8 + uses: peter-evans/create-pull-request@84ae59a2cdc2258d6fa0732dd66352dddae2a412 # v7.0.9 with: title: Update the RBE images to the latest container versions committer: TensorFlow Release Automation diff --git a/WORKSPACE b/WORKSPACE index 0fc24cb3edd116..0c4c70e21101bc 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -4,26 +4,28 @@ workspace(name = "org_tensorflow") # buildifier: disable=load-on-top -load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +load("//third_party:repo.bzl", "tf_http_archive", "tf_mirror_urls") -http_archive( +tf_http_archive( name = "rules_shell", sha256 = "bc61ef94facc78e20a645726f64756e5e285a045037c7a61f65af2941f4c25e1", strip_prefix = "rules_shell-0.4.1", - url = "https://github.com/bazelbuild/rules_shell/releases/download/v0.4.1/rules_shell-v0.4.1.tar.gz", + urls = tf_mirror_urls( + "https://github.com/bazelbuild/rules_shell/releases/download/v0.4.1/rules_shell-v0.4.1.tar.gz", + ), ) # Initialize toolchains for ML projects. # # A hermetic build system is designed to produce completely reproducible builds for C++. # Details: https://github.com/google-ml-infra/rules_ml_toolchain -http_archive( +tf_http_archive( name = "rules_ml_toolchain", - sha256 = "7f00b3e94bbca1a4737ded6b9ed5358f6d1c86430c2ec97c90081343c0482f18", - strip_prefix = "rules_ml_toolchain-29d54c875da37e74b8548924ed30e78cb28126b9", - urls = [ - "https://github.com/yuriivcs/rules_ml_toolchain/archive/29d54c875da37e74b8548924ed30e78cb28126b9.tar.gz", - ], + sha256 = "1c2c530a054e9e8b3c811ec21ed8a687fc865bec3abbc8ff65beb829b1d67ae4", + strip_prefix = "rules_ml_toolchain-6734d2a174bf29e731d3f473743d1cc1a86100c3", + urls = tf_mirror_urls( + "https://github.com/google-ml-infra/rules_ml_toolchain/archive/6734d2a174bf29e731d3f473743d1cc1a86100c3.tar.gz", + ), ) load( diff --git a/ci/official/containers/ml_build/Dockerfile b/ci/official/containers/ml_build/Dockerfile index a4fb0cd9b1640a..ba090e65c95b33 100644 --- a/ci/official/containers/ml_build/Dockerfile +++ b/ci/official/containers/ml_build/Dockerfile @@ -58,10 +58,10 @@ RUN if [ -e "/usr/local/cuda/compat/libcuda.so.1" ]; then ln -s /usr/local/cuda/ # - buildozer: clean bazel build deps # - gcloud SDK: communicate with Google Cloud Platform (GCP) for RBE, CI # - patchelf: Utility tool to modify existing ELF executables and libraries -RUN git clone --branch v1.11.0 https://github.com/bats-core/bats-core.git && bats-core/install.sh /usr/local && rm -rf bats-core -RUN wget https://github.com/bazelbuild/bazelisk/releases/download/v1.21.0/bazelisk-linux-amd64 -O /usr/local/bin/bazel && chmod +x /usr/local/bin/bazel -RUN wget https://github.com/bazelbuild/buildtools/releases/download/v7.3.1/buildifier-linux-amd64 -O /usr/local/bin/buildifier && chmod +x /usr/local/bin/buildifier -RUN wget https://github.com/bazelbuild/buildtools/releases/download/v7.3.1/buildozer-linux-amd64 -O /usr/local/bin/buildozer && chmod +x /usr/local/bin/buildozer +RUN git clone --branch v1.13.0 https://github.com/bats-core/bats-core.git && bats-core/install.sh /usr/local && rm -rf bats-core +RUN wget https://github.com/bazelbuild/bazelisk/releases/download/v1.27.0/bazelisk-linux-amd64 -O /usr/local/bin/bazel && chmod +x /usr/local/bin/bazel +RUN wget https://github.com/bazelbuild/buildtools/releases/download/v8.2.1/buildifier-linux-amd64 -O /usr/local/bin/buildifier && chmod +x /usr/local/bin/buildifier +RUN wget https://github.com/bazelbuild/buildtools/releases/download/v8.2.1/buildozer-linux-amd64 -O /usr/local/bin/buildozer && chmod +x /usr/local/bin/buildozer RUN curl https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-linux-x86_64.tar.gz | \ tar zxf - google-cloud-sdk && \ diff --git a/ci/official/containers/ml_build_arm64/Dockerfile b/ci/official/containers/ml_build_arm64/Dockerfile deleted file mode 100644 index 379162d0d1af76..00000000000000 --- a/ci/official/containers/ml_build_arm64/Dockerfile +++ /dev/null @@ -1,75 +0,0 @@ -################################################################################ -FROM ubuntu:20.04@sha256:8e5c4f0285ecbb4ead070431d29b576a530d3166df73ec44affc1cd27555141b as devel -################################################################################ - -# Install devtoolset build dependencies -COPY setup.sources.sh /setup.sources.sh -COPY setup.packages.sh /setup.packages.sh -COPY builder.packages.txt /builder.packages.txt - -RUN /setup.sources.sh && /setup.packages.sh /builder.packages.txt - -RUN update-ca-certificates -# Install devtoolset-9 in /dt10 with glibc 2.17 and libstdc++ 4.8, for building -# manylinux2014-compatible packages. -COPY builder.devtoolset/fixlinks_aarch64.sh /fixlinks.sh -COPY builder.devtoolset/rpm-patch.sh /rpm-patch.sh -COPY builder.devtoolset/build_devtoolset.sh /build_devtoolset.sh -COPY builder.devtoolset/gcc9-fixups.patch /gcc9-fixups.patch -COPY builder.devtoolset/stringop_trunc.patch /stringop_trunc.patch - -RUN /build_devtoolset.sh devtoolset-10 /dt10 - -# Build later version of patchelf that is not so buggy -RUN wget https://github.com/NixOS/patchelf/releases/download/0.18.0/patchelf-0.18.0-aarch64.tar.gz && tar -zxvf patchelf-0.18.0-aarch64.tar.gz -C /usr && rm -rf patchelf-0.18.0-aarch64.tar.gz - -RUN curl https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-linux-arm.tar.gz | \ - tar zxf - google-cloud-sdk && \ - google-cloud-sdk/install.sh --quiet -ENV PATH="$PATH:/google-cloud-sdk/bin/" - -# Install various tools. -# - bats: bash unit testing framework -# NOTE: v1.6.0 seems to have a bug that made "git" in setup_file break -# - bazelisk: always use the correct bazel version -# - buildifier: clean bazel build depshttps://github.com/bazelbuild/buildtools/releases/download/v7.3.1/buildifier-linux-arm64 -# - buildozer: clean bazel build deps -RUN git clone --branch v1.11.0 https://github.com/bats-core/bats-core.git && bats-core/install.sh /usr/local && rm -rf bats-core -RUN wget https://github.com/bazelbuild/bazelisk/releases/download/v1.21.0/bazelisk-linux-arm64 -O /usr/local/bin/bazel && chmod +x /usr/local/bin/bazel -RUN wget https://github.com/bazelbuild/buildtools/releases/download/v7.3.1/buildifier-linux-arm64 -O /usr/local/bin/buildifier && chmod +x /usr/local/bin/buildifier -RUN wget https://github.com/bazelbuild/buildtools/releases/download/v7.3.1/buildozer-linux-arm64 -O /usr/local/bin/buildozer && chmod +x /usr/local/bin/buildozer - -RUN groupadd -g 1001 buildslave && useradd -m -u 1001 -g buildslave buildslave -RUN mkdir -p /tf/venv -RUN chown -R buildslave:buildslave /tf - -# All lines past this point are reset when $CACHEBUSTER is set. We need this -# for Python specifically because we install some nightly packages which are -# likely to change daily. -ARG CACHEBUSTER=0 -RUN echo $CACHEBUSTER - -# Setup build and environment -COPY devel.bashrc /root/.bashrc -COPY ld.so.conf /dt10/etc/ - -# Make sure clang is on the path -RUN ln -s /usr/lib/llvm-18/bin/clang /usr/bin/clang - -# Setup JAX Python environment. -COPY requirements.txt /requirements.txt -COPY setup.python.sh /setup.python.sh -RUN /setup.python.sh python3.9 requirements.txt -RUN /setup.python.sh python3.10 requirements.txt -RUN /setup.python.sh python3.11 requirements.txt -RUN /setup.python.sh python3.12 requirements.txt -RUN /setup.python.sh python3.13 requirements.txt -# python3.13-nogil is a free-threaded build of python3.13. -RUN /setup.python.sh python3.13-nogil requirements.txt -RUN /setup.python.sh python3.14 requirements.txt -RUN /setup.python.sh python3.14-nogil requirements.txt - -# Python commands by default run under 3.11 -RUN ln -sf /usr/bin/python3.11 /usr/bin/python3 -RUN ln -sf /usr/bin/python3.11 /usr/bin/python -RUN ln -sf /usr/lib/python3.11 /usr/lib/tf_python diff --git a/ci/official/containers/ml_build_arm64/builder.devtoolset/build_devtoolset.sh b/ci/official/containers/ml_build_arm64/builder.devtoolset/build_devtoolset.sh deleted file mode 100755 index d59923d405a8c8..00000000000000 --- a/ci/official/containers/ml_build_arm64/builder.devtoolset/build_devtoolset.sh +++ /dev/null @@ -1,157 +0,0 @@ -#!/bin/bash -eu -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# -# Builds a devtoolset cross-compiler targeting manylinux2014 (glibc 2.17 / libstdc++ 4.8). - -VERSION="$1" -TARGET="$2" - -case "${VERSION}" in -devtoolset-9) - LIBSTDCXX_VERSION="6.0.28" - LIBSTDCXX_ABI="new" - ;; -devtoolset-10) - LIBSTDCXX_VERSION="6.0.28" - LIBSTDCXX_ABI="new" - ;; -*) - echo "Usage: $0 {devtoolset-9|devtoolset-10} " - echo "Use 'devtoolset-9' to build a manylinux2014 compatible toolchain" - exit 1 - ;; -esac - -mkdir -p "${TARGET}" - -mkdir -p ${TARGET}/usr/include - -# Put the current kernel headers from ubuntu in place. -ln -s "/usr/include/linux" "${TARGET}/usr/include/linux" -ln -s "/usr/include/asm-generic" "${TARGET}/usr/include/asm-generic" -ln -s "/usr/include/aarch64-linux-gnu/asm" "${TARGET}/usr/include/asm" - -# Download glibc's shared and development libraries based on the value of the -# `VERSION` parameter. -# Note: 'Templatizing' this and the other conditional branches would require -# defining several variables (version, os, path) making it difficult to maintain -# and extend for future modifications. -mkdir -p glibc-src -mkdir -p glibc-build -cd glibc-src -wget --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 --tries=5 "https://vault.centos.org/centos/7/os/Source/SPackages/glibc-2.17-317.el7.src.rpm" -rpm2cpio "glibc-2.17-317.el7.src.rpm" |cpio -idmv -tar -xvzf "glibc-2.17-c758a686.tar.gz" --strip 1 -tar -xvzf "glibc-2.17-c758a686-releng.tar.gz" --strip 1 -sed -i '/patch0060/d' glibc.spec -/rpm-patch.sh "glibc.spec" -rm -f "glibc-2.17-317.el7.src.rpm" "glibc-2.17-c758a686.tar.gz" "glibc-2.17-c758a686-releng.tar.gz" -patch -p1 < /gcc9-fixups.patch -patch -p1 < /stringop_trunc.patch -cd ../glibc-build -../glibc-src/configure --prefix=/usr --disable-werror --enable-obsolete-rpc --disable-profile -make -j$(nproc) -make install DESTDIR=${TARGET} -cd .. - -# Symlinks in the binary distribution are set up for installation in /usr, we -# need to fix up all the links to stay within /${TARGET}. -/fixlinks.sh "/${TARGET}" - -# Patch to allow non-glibc 2.12 compatible builds to work. -sed -i '54i#define TCP_USER_TIMEOUT 18' "/${TARGET}/usr/include/netinet/tcp.h" - -# Download specific version of libstdc++ shared library based on the value of -# the `VERSION` parameter - # Download binary libstdc++ 4.8 shared library release -wget --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 --tries=5 "http://old-releases.ubuntu.com/ubuntu/pool/main/g/gcc-4.8/libstdc++6_4.8.1-10ubuntu8_arm64.deb" && \ - unar "libstdc++6_4.8.1-10ubuntu8_arm64.deb" && \ - tar -C "${TARGET}" -xvzf "libstdc++6_4.8.1-10ubuntu8_arm64/data.tar.gz" "./usr/lib/aarch64-linux-gnu/libstdc++.so.6.0.18" && \ - rm -rf "libstdc++6_4.8.1-10ubuntu8_arm64.deb" "libstdc++6_4.8.1-10ubuntu8_arm64" - -mkdir -p "${TARGET}-src" -cd "${TARGET}-src" - -# Build a devtoolset cross-compiler based on our glibc 2.12/glibc 2.17 sysroot setup. -case "${VERSION}" in -devtoolset-9) - wget --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 --tries=5 "https://vault.centos.org/centos/7/sclo/Source/rh/devtoolset-9-gcc-9.3.1-2.2.el7.src.rpm" - rpm2cpio "devtoolset-9-gcc-9.3.1-2.2.el7.src.rpm" |cpio -idmv - tar -xvf "gcc-9.3.1-20200408.tar.xz" --strip 1 - ;; -devtoolset-10) - wget --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 --tries=5 "https://vault.centos.org/centos/7/sclo/Source/rh/devtoolset-10-gcc-10.2.1-11.2.el7.src.rpm" - rpm2cpio "devtoolset-10-gcc-10.2.1-11.2.el7.src.rpm" |cpio -idmv - tar -xvf "gcc-10.2.1-20210130.tar.xz" --strip 1 - ;; -esac - -# Apply the devtoolset patches to gcc. -/rpm-patch.sh "gcc.spec" - -./contrib/download_prerequisites - -mkdir -p "${TARGET}-build" -cd "${TARGET}-build" - -"${TARGET}-src/configure" \ - --prefix="${TARGET}/usr" \ - --with-sysroot="/${TARGET}" \ - --disable-bootstrap \ - --disable-libmpx \ - --enable-libsanitizer \ - --disable-libunwind-exceptions \ - --disable-libunwind-exceptions \ - --disable-lto \ - --disable-multilib \ - --enable-__cxa_atexit \ - --enable-gnu-indirect-function \ - --enable-gnu-unique-object \ - --enable-initfini-array \ - --enable-languages="c,c++" \ - --enable-linker-build-id \ - --enable-plugin \ - --enable-shared \ - --enable-threads=posix \ - --with-default-libstdcxx-abi=${LIBSTDCXX_ABI} \ - --with-gcc-major-version-only \ - --with-linker-hash-style="gnu" \ - && \ - make -j$(nproc) && \ - make install - - -# Create the devtoolset libstdc++ linkerscript that links dynamically against -# the system libstdc++ 4.4 and provides all other symbols statically. -# Note that the installation path for libstdc++ here is ${TARGET}/usr/lib64/ -mv "${TARGET}/usr/lib64/libstdc++.so.${LIBSTDCXX_VERSION}" \ - "${TARGET}/usr/lib64/libstdc++.so.${LIBSTDCXX_VERSION}.backup" -echo -e "OUTPUT_FORMAT(elf64-littleaarch64)\nINPUT ( libstdc++.so.6.0.18 -lstdc++_nonshared44 )" \ - > "${TARGET}/usr/lib64/libstdc++.so.${LIBSTDCXX_VERSION}" -cp "./aarch64-unknown-linux-gnu/libstdc++-v3/src/.libs/libstdc++_nonshared44.a" \ - "${TARGET}/usr/lib64" - - -# Link in architecture specific includes from the system; note that we cannot -# link in the whole aarch64-linux-gnu folder, as otherwise we're overlaying -# system gcc paths that we do not want to find. -# TODO(klimek): Automate linking in all non-gcc / non-kernel include -# directories. -mkdir -p "${TARGET}/usr/include/aarch64-linux-gnu" -PYTHON_VERSIONS=("python3.9" "python3.10" "python3.11" "python3.12") -for v in "${PYTHON_VERSIONS[@]}"; do - ln -s "/usr/local/include/${v}" "${TARGET}/usr/include/aarch64-linux-gnu/${v}" -done diff --git a/ci/official/containers/ml_build_arm64/builder.devtoolset/fixlinks_aarch64.sh b/ci/official/containers/ml_build_arm64/builder.devtoolset/fixlinks_aarch64.sh deleted file mode 100755 index 09a5f9854d42ef..00000000000000 --- a/ci/official/containers/ml_build_arm64/builder.devtoolset/fixlinks_aarch64.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# -# Re-direct all links in $1 that are relative to be canonical - -BASE="$1" -find "${BASE}" -type l | \ - while read l ; do - if [[ "$(readlink "$l")" == \.\./* ]]; then - CANONICAL="$(readlink "$l")"; - rm "$l"; - ln -s "${CANONICAL}" "$l" - fi - done - diff --git a/ci/official/containers/ml_build_arm64/builder.devtoolset/gcc9-fixups.patch b/ci/official/containers/ml_build_arm64/builder.devtoolset/gcc9-fixups.patch deleted file mode 100644 index 7b9bbf358ada74..00000000000000 --- a/ci/official/containers/ml_build_arm64/builder.devtoolset/gcc9-fixups.patch +++ /dev/null @@ -1,270 +0,0 @@ -diff --git a/iconv/gconv.h b/iconv/gconv.h -index 3f9112e..8e60197 100644 ---- a/iconv/gconv.h -+++ b/iconv/gconv.h -@@ -174,7 +174,7 @@ typedef struct __gconv_info - { - size_t __nsteps; - struct __gconv_step *__steps; -- __extension__ struct __gconv_step_data __data __flexarr; -+ __extension__ struct __gconv_step_data __data[0]; - } *__gconv_t; - - #endif /* gconv.h */ -diff --git a/include/libc-symbols.h b/include/libc-symbols.h -index c555bf2..143b26d 100644 ---- a/include/libc-symbols.h -+++ b/include/libc-symbols.h -@@ -107,6 +107,11 @@ - # endif - #endif - -+#ifndef __attribute_copy__ -+/* Provide an empty definition when cdefs.h is not included. */ -+# define __attribute_copy__(arg) -+#endif -+ - #ifndef __ASSEMBLER__ - /* GCC understands weak symbols and aliases; use its interface where - possible, instead of embedded assembly language. */ -@@ -114,7 +119,8 @@ - /* Define ALIASNAME as a strong alias for NAME. */ - # define strong_alias(name, aliasname) _strong_alias(name, aliasname) - # define _strong_alias(name, aliasname) \ -- extern __typeof (name) aliasname __attribute__ ((alias (#name))); -+ extern __typeof (name) aliasname __attribute__ ((alias (#name))) \ -+ __attribute_copy__ (name); - - /* This comes between the return type and function name in - a function definition to make that definition weak. */ -@@ -125,14 +131,16 @@ - If weak aliases are not available, this defines a strong alias. */ - # define weak_alias(name, aliasname) _weak_alias (name, aliasname) - # define _weak_alias(name, aliasname) \ -- extern __typeof (name) aliasname __attribute__ ((weak, alias (#name))); -+ extern __typeof (name) aliasname __attribute__ ((weak, alias (#name))) \ -+ __attribute_copy__ (name); - - /* Same as WEAK_ALIAS, but mark symbol as hidden. */ - # define weak_hidden_alias(name, aliasname) \ - _weak_hidden_alias (name, aliasname) - # define _weak_hidden_alias(name, aliasname) \ - extern __typeof (name) aliasname \ -- __attribute__ ((weak, alias (#name), __visibility__ ("hidden"))); -+ __attribute__ ((weak, alias (#name), __visibility__ ("hidden"))) \ -+ __attribute_copy__ (name); - - /* Declare SYMBOL as weak undefined symbol (resolved to 0 if not defined). */ - # define weak_extern(symbol) _weak_extern (weak symbol) -@@ -528,7 +536,8 @@ for linking") - # define __hidden_ver1(local, internal, name) \ - extern __typeof (name) __EI_##name __asm__(__hidden_asmname (#internal)); \ - extern __typeof (name) __EI_##name \ -- __attribute__((alias (__hidden_asmname (#local)))) -+ __attribute__((alias (__hidden_asmname (#local)))) \ -+ __attribute_copy__ (name) - # define hidden_ver(local, name) __hidden_ver1(local, __GI_##name, name); - # define hidden_data_ver(local, name) hidden_ver(local, name) - # define hidden_def(name) __hidden_ver1(__GI_##name, name, name); -@@ -541,7 +550,8 @@ for linking") - # define __hidden_nolink1(local, internal, name, version) \ - __hidden_nolink2 (local, internal, name, version) - # define __hidden_nolink2(local, internal, name, version) \ -- extern __typeof (name) internal __attribute__ ((alias (#local))); \ -+ extern __typeof (name) internal __attribute__ ((alias (#local))) \ -+ __attribute_copy__ (name); \ - __hidden_nolink3 (local, internal, #name "@" #version) - # define __hidden_nolink3(local, internal, vername) \ - __asm__ (".symver " #internal ", " vername); -diff --git a/locale/weightwc.h b/locale/weightwc.h -index e966c03..22ab790 100644 ---- a/locale/weightwc.h -+++ b/locale/weightwc.h -@@ -79,19 +79,19 @@ findidx (const wint_t **cpp, size_t len) - if (cp[cnt] != usrc[cnt]) - break; - -- if (cnt < nhere - 1) -+ if (cnt < nhere - 1 || cnt == len) - { - cp += 2 * nhere; - continue; - } - -- if (cp[nhere - 1] > usrc[nhere -1]) -+ if (cp[nhere - 1] > usrc[nhere - 1]) - { - cp += 2 * nhere; - continue; - } - -- if (cp[2 * nhere - 1] < usrc[nhere -1]) -+ if (cp[2 * nhere - 1] < usrc[nhere - 1]) - { - cp += 2 * nhere; - continue; -diff --git a/locale/xlocale.h b/locale/xlocale.h -index 98c080b..843bd45 100644 ---- a/locale/xlocale.h -+++ b/locale/xlocale.h -@@ -20,6 +20,9 @@ - #ifndef _XLOCALE_H - #define _XLOCALE_H 1 - -+#ifndef _BITS_TYPES___LOCALE_T_H -+#define _BITS_TYPES___LOCALE_T_H 1 -+ - /* Structure for reentrant locale using functions. This is an - (almost) opaque type for the user level programs. The file and - this data structure is not standardized. Don't rely on it. It can -@@ -41,4 +44,6 @@ typedef struct __locale_struct - /* POSIX 2008 makes locale_t official. */ - typedef __locale_t locale_t; - -+#endif /* bits/types/__locale_t.h */ -+ - #endif /* xlocale.h */ -diff --git a/misc/sys/cdefs.h b/misc/sys/cdefs.h -index d1cb3dd..30482a1 100644 ---- a/misc/sys/cdefs.h -+++ b/misc/sys/cdefs.h -@@ -423,4 +423,14 @@ - # endif - #endif - -+/* Undefine (also defined in libc-symbols.h). */ -+#undef __attribute_copy__ -+#if __GNUC_PREREQ (9, 0) -+/* Copies attributes from the declaration or type referenced by -+ the argument. */ -+# define __attribute_copy__(arg) __attribute__ ((__copy__ (arg))) -+#else -+# define __attribute_copy__(arg) -+#endif -+ - #endif /* sys/cdefs.h */ -diff --git a/stdlib/setenv.c b/stdlib/setenv.c -index 45efe2e..06bfab0 100644 ---- a/stdlib/setenv.c -+++ b/stdlib/setenv.c -@@ -319,6 +319,7 @@ unsetenv (const char *name) - - ep = __environ; - if (ep != NULL) -+ { - while (*ep != NULL) - if (!strncmp (*ep, name, len) && (*ep)[len] == '=') - { -@@ -332,6 +333,7 @@ unsetenv (const char *name) - } - else - ++ep; -+ } - - UNLOCK; - -diff --git a/support/Makefile b/support/Makefile -index a253698..2f4e2a9 100644 ---- a/support/Makefile -+++ b/support/Makefile -@@ -167,13 +167,6 @@ CFLAGS-support_paths.c = \ - -DINSTDIR_PATH=\"$(prefix)\" \ - -DLIBDIR_PATH=\"$(libdir)\" - --ifeq (,$(CXX)) --LINKS_DSO_PROGRAM = links-dso-program-c --else --LINKS_DSO_PROGRAM = links-dso-program --LDLIBS-links-dso-program = -lstdc++ -lgcc -lgcc_s $(libunwind) --endif -- - LDLIBS-test-container = $(libsupport) - - others += test-container -@@ -182,9 +175,6 @@ others-noinstall += test-container - others += shell-container echo-container true-container - others-noinstall += shell-container echo-container true-container - --others += $(LINKS_DSO_PROGRAM) --others-noinstall += $(LINKS_DSO_PROGRAM) -- - $(objpfx)test-container : $(libsupport) - $(objpfx)shell-container : $(libsupport) - $(objpfx)echo-container : $(libsupport) -diff --git a/support/links-dso-program.cc b/support/links-dso-program.cc -index 8ff3155..f9d2b77 100644 ---- a/support/links-dso-program.cc -+++ b/support/links-dso-program.cc -@@ -3,6 +3,11 @@ - backported. */ - #define _ISOMAC 1 - -+#define __GLIBC_USE(F) __GLIBC_USE_ ## F -+ -+# define __attribute_alloc_size__(params) \ -+ __attribute__ ((__alloc_size__ params)) -+ - #include - - using namespace std; -diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h -index 185402f..bbdeae0 100644 ---- a/sysdeps/aarch64/dl-machine.h -+++ b/sysdeps/aarch64/dl-machine.h -@@ -49,23 +49,11 @@ elf_machine_load_address (void) - /* To figure out the load address we use the definition that for any symbol: - dynamic_addr(symbol) = static_addr(symbol) + load_addr - -- The choice of symbol is arbitrary. The static address we obtain -- by constructing a non GOT reference to the symbol, the dynamic -- address of the symbol we compute using adrp/add to compute the -- symbol's address relative to the PC. */ -- -- ElfW(Addr) static_addr; -- ElfW(Addr) dynamic_addr; -- -- asm (" \n\ -- adrp %1, _dl_start; \n\ -- add %1, %1, #:lo12:_dl_start \n\ -- ldr %w0, 1f \n\ -- b 2f \n\ --1: .word _dl_start \n\ --2: \n\ -- " : "=r" (static_addr), "=r" (dynamic_addr)); -- return dynamic_addr - static_addr; -+ _DYNAMIC sysmbol is used here as its link-time address stored in -+ the special unrelocated first GOT entry. */ -+ -+ extern ElfW(Dyn) _DYNAMIC[] attribute_hidden; -+ return (ElfW(Addr)) &_DYNAMIC - elf_machine_dynamic (); - } - - /* Set up the loaded object described by L so its unrelocated PLT -diff --git a/sysdeps/ieee754/dbl-64/k_rem_pio2.c b/sysdeps/ieee754/dbl-64/k_rem_pio2.c -index fcf956a..e2c5d29 100644 ---- a/sysdeps/ieee754/dbl-64/k_rem_pio2.c -+++ b/sysdeps/ieee754/dbl-64/k_rem_pio2.c -@@ -172,7 +172,8 @@ int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int3 - - /* compute q[0],q[1],...q[jk] */ - for (i=0;i<=jk;i++) { -- for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw; -+ for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; -+ q[i] = fw; - } - - jz = jk; -diff --git a/sysdeps/ieee754/flt-32/k_rem_pio2f.c b/sysdeps/ieee754/flt-32/k_rem_pio2f.c -index e54a067..215b0e0 100644 ---- a/sysdeps/ieee754/flt-32/k_rem_pio2f.c -+++ b/sysdeps/ieee754/flt-32/k_rem_pio2f.c -@@ -65,7 +65,8 @@ int __kernel_rem_pio2f(float *x, float *y, int e0, int nx, int prec, const int32 - - /* compute q[0],q[1],...q[jk] */ - for (i=0;i<=jk;i++) { -- for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw; -+ for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; -+ q[i] = fw; - } - - jz = jk; diff --git a/ci/official/containers/ml_build_arm64/builder.devtoolset/rpm-patch.sh b/ci/official/containers/ml_build_arm64/builder.devtoolset/rpm-patch.sh deleted file mode 100755 index 892ae2af86a3fa..00000000000000 --- a/ci/official/containers/ml_build_arm64/builder.devtoolset/rpm-patch.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -eu -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# -# Given an RPM spec file $1, apply its patches. - -SPEC="$1" -grep '%patch' "${SPEC}" |while read cmd ; do - N=$(echo "${cmd}" |sed 's,%patch\([0-9]\+\).*,\1,') - file=$(grep "Patch$N:" "${SPEC}" |sed 's,.*: ,,') - parg=$(echo "${cmd}" |sed 's,.*\(-p[0-9]\).*,\1,') - if [[ ! "${file}" =~ doxygen && "${cmd}" != \#* ]]; then - echo "patch ${parg} -s < ${file}" - patch ${parg} -s < "${file}" - fi -done diff --git a/ci/official/containers/ml_build_arm64/builder.devtoolset/stringop_trunc.patch b/ci/official/containers/ml_build_arm64/builder.devtoolset/stringop_trunc.patch deleted file mode 100644 index bd9e5533118d6c..00000000000000 --- a/ci/official/containers/ml_build_arm64/builder.devtoolset/stringop_trunc.patch +++ /dev/null @@ -1,1204 +0,0 @@ -diff --git a/bits/utmp.h b/bits/utmp.h -index 775123d..bf28c6d 100644 ---- a/bits/utmp.h -+++ b/bits/utmp.h -@@ -1,5 +1,5 @@ --/* The `struct utmp' type, describing entries in the utmp file. Generic/BSDish -- Copyright (C) 1993, 1996, 1997 Free Software Foundation, Inc. -+/* The `struct utmp' type, describing entries in the utmp file. -+ Copyright (C) 1993-2022 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or -@@ -14,7 +14,7 @@ - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see -- . */ -+ . */ - - #ifndef _UTMP_H - # error "Never include directly; use instead." -@@ -24,11 +24,13 @@ - #include - - --#define UT_NAMESIZE 8 --#define UT_LINESIZE 8 --#define UT_HOSTSIZE 16 -+#define UT_LINESIZE 32 -+#define UT_NAMESIZE 32 -+#define UT_HOSTSIZE 256 - - -+/* The structure describing an entry in the database of -+ previous logins. */ - struct lastlog - { - time_t ll_time; -@@ -36,12 +38,16 @@ struct lastlog - char ll_host[UT_HOSTSIZE]; - }; - -+/* The structure describing an entry in the user accounting database. */ - struct utmp - { -- char ut_line[UT_LINESIZE]; -- char ut_user[UT_NAMESIZE]; -+ char ut_line[UT_LINESIZE] -+ __attribute_nonstring__; /* Devicename. */ -+ char ut_user[UT_NAMESIZE] -+ __attribute_nonstring__; /* Username. */ - #define ut_name ut_user -- char ut_host[UT_HOSTSIZE]; -+ char ut_host[UT_HOSTSIZE] -+ __attribute_nonstring__; /* Hostname for remote login. */ - long int ut_time; - }; - -diff --git a/misc/sys/cdefs.h b/misc/sys/cdefs.h -index 30482a1..551d5fd 100644 ---- a/misc/sys/cdefs.h -+++ b/misc/sys/cdefs.h -@@ -391,6 +391,15 @@ - - #include - -+#if __GNUC_PREREQ (8, 0) -+/* Describes a char array whose address can safely be passed as the first -+ argument to strncpy and strncat, as the char array is not necessarily -+ a NUL-terminated string. */ -+# define __attribute_nonstring__ __attribute__ ((__nonstring__)) -+#else -+# define __attribute_nonstring__ -+#endif -+ - #if defined __LONG_DOUBLE_MATH_OPTIONAL && defined __NO_LONG_DOUBLE_MATH - # define __LDBL_COMPAT 1 - # ifdef __REDIRECT -diff --git a/nis/nss_nisplus/nisplus-parser.c b/nis/nss_nisplus/nisplus-parser.c -index a4d76fb..41600f0 100644 ---- a/nis/nss_nisplus/nisplus-parser.c -+++ b/nis/nss_nisplus/nisplus-parser.c -@@ -82,7 +82,7 @@ _nss_nisplus_parse_pwent (nis_result *result, struct passwd *pw, - - char *numstr = NISOBJVAL (2, obj); - len = NISOBJLEN (2, obj); -- if (len == 0 && numstr[len - 1] != '\0') -+ if (len == 0 || numstr[len - 1] != '\0') - { - if (len >= room_left) - goto no_more_room; -@@ -98,7 +98,7 @@ _nss_nisplus_parse_pwent (nis_result *result, struct passwd *pw, - - numstr = NISOBJVAL (3, obj); - len = NISOBJLEN (3, obj); -- if (len == 0 && numstr[len - 1] != '\0') -+ if (len == 0 || numstr[len - 1] != '\0') - { - if (len >= room_left) - goto no_more_room; -diff --git a/string/bits/string2.h b/string/bits/string2.h -index c9bf593..f461fc1 100644 ---- a/string/bits/string2.h -+++ b/string/bits/string2.h -@@ -47,29 +47,7 @@ - #endif - - #if _STRING_ARCH_unaligned --/* If we can do unaligned memory accesses we must know the endianess. */ --# include - # include -- --# if __BYTE_ORDER == __LITTLE_ENDIAN --# define __STRING2_SMALL_GET16(src, idx) \ -- (((const unsigned char *) (const char *) (src))[idx + 1] << 8 \ -- | ((const unsigned char *) (const char *) (src))[idx]) --# define __STRING2_SMALL_GET32(src, idx) \ -- (((((const unsigned char *) (const char *) (src))[idx + 3] << 8 \ -- | ((const unsigned char *) (const char *) (src))[idx + 2]) << 8 \ -- | ((const unsigned char *) (const char *) (src))[idx + 1]) << 8 \ -- | ((const unsigned char *) (const char *) (src))[idx]) --# else --# define __STRING2_SMALL_GET16(src, idx) \ -- (((const unsigned char *) (const char *) (src))[idx] << 8 \ -- | ((const unsigned char *) (const char *) (src))[idx + 1]) --# define __STRING2_SMALL_GET32(src, idx) \ -- (((((const unsigned char *) (const char *) (src))[idx] << 8 \ -- | ((const unsigned char *) (const char *) (src))[idx + 1]) << 8 \ -- | ((const unsigned char *) (const char *) (src))[idx + 2]) << 8 \ -- | ((const unsigned char *) (const char *) (src))[idx + 3]) --# endif - #else - /* These are a few types we need for the optimizations if we cannot - use unaligned memory accesses. */ -@@ -94,148 +72,11 @@ __STRING2_COPY_TYPE (8); - - /* Set N bytes of S to C. */ - #if !defined _HAVE_STRING_ARCH_memset --# if !__GNUC_PREREQ (3, 0) --# if _STRING_ARCH_unaligned --# define memset(s, c, n) \ -- (__extension__ (__builtin_constant_p (n) && (n) <= 16 \ -- ? ((n) == 1 \ -- ? __memset_1 (s, c) \ -- : __memset_gc (s, c, n)) \ -- : (__builtin_constant_p (c) && (c) == '\0' \ -- ? ({ void *__s = (s); __bzero (__s, n); __s; }) \ -- : memset (s, c, n)))) -- --# define __memset_1(s, c) ({ void *__s = (s); \ -- *((__uint8_t *) __s) = (__uint8_t) c; __s; }) -- --# define __memset_gc(s, c, n) \ -- ({ void *__s = (s); \ -- union { \ -- unsigned int __ui; \ -- unsigned short int __usi; \ -- unsigned char __uc; \ -- } *__u = __s; \ -- __uint8_t __c = (__uint8_t) (c); \ -- \ -- /* This `switch' statement will be removed at compile-time. */ \ -- switch ((unsigned int) (n)) \ -- { \ -- case 15: \ -- __u->__ui = __c * 0x01010101; \ -- __u = __extension__ ((void *) __u + 4); \ -- case 11: \ -- __u->__ui = __c * 0x01010101; \ -- __u = __extension__ ((void *) __u + 4); \ -- case 7: \ -- __u->__ui = __c * 0x01010101; \ -- __u = __extension__ ((void *) __u + 4); \ -- case 3: \ -- __u->__usi = (unsigned short int) __c * 0x0101; \ -- __u = __extension__ ((void *) __u + 2); \ -- __u->__uc = (unsigned char) __c; \ -- break; \ -- \ -- case 14: \ -- __u->__ui = __c * 0x01010101; \ -- __u = __extension__ ((void *) __u + 4); \ -- case 10: \ -- __u->__ui = __c * 0x01010101; \ -- __u = __extension__ ((void *) __u + 4); \ -- case 6: \ -- __u->__ui = __c * 0x01010101; \ -- __u = __extension__ ((void *) __u + 4); \ -- case 2: \ -- __u->__usi = (unsigned short int) __c * 0x0101; \ -- break; \ -- \ -- case 13: \ -- __u->__ui = __c * 0x01010101; \ -- __u = __extension__ ((void *) __u + 4); \ -- case 9: \ -- __u->__ui = __c * 0x01010101; \ -- __u = __extension__ ((void *) __u + 4); \ -- case 5: \ -- __u->__ui = __c * 0x01010101; \ -- __u = __extension__ ((void *) __u + 4); \ -- case 1: \ -- __u->__uc = (unsigned char) __c; \ -- break; \ -- \ -- case 16: \ -- __u->__ui = __c * 0x01010101; \ -- __u = __extension__ ((void *) __u + 4); \ -- case 12: \ -- __u->__ui = __c * 0x01010101; \ -- __u = __extension__ ((void *) __u + 4); \ -- case 8: \ -- __u->__ui = __c * 0x01010101; \ -- __u = __extension__ ((void *) __u + 4); \ -- case 4: \ -- __u->__ui = __c * 0x01010101; \ -- case 0: \ -- break; \ -- } \ -- \ -- __s; }) --# else --# define memset(s, c, n) \ -- (__extension__ (__builtin_constant_p (c) && (c) == '\0' \ -- ? ({ void *__s = (s); __bzero (__s, n); __s; }) \ -- : memset (s, c, n))) --# endif --# endif -- --/* GCC < 3.0 optimizes memset(s, 0, n) but not bzero(s, n). -- The optimization is broken before EGCS 1.1. -- GCC 3.0+ has __builtin_bzero as well, but at least till GCC 3.4 -- if it decides to call the library function, it calls memset -- and not bzero. */ --# if __GNUC_PREREQ (2, 91) --# define __bzero(s, n) __builtin_memset (s, '\0', n) --# endif -- -+# define __bzero(s, n) __builtin_memset (s, '\0', n) - #endif - -- --/* Copy N bytes from SRC to DEST, returning pointer to byte following the -- last copied. */ --#ifdef __USE_GNU --# if !defined _HAVE_STRING_ARCH_mempcpy || defined _FORCE_INLINES --# ifndef _HAVE_STRING_ARCH_mempcpy --# if __GNUC_PREREQ (3, 4) --# define __mempcpy(dest, src, n) __builtin_mempcpy (dest, src, n) --# elif __GNUC_PREREQ (3, 0) --# define __mempcpy(dest, src, n) \ -- (__extension__ (__builtin_constant_p (src) && __builtin_constant_p (n) \ -- && __string2_1bptr_p (src) && n <= 8 \ -- ? __builtin_memcpy (dest, src, n) + (n) \ -- : __mempcpy (dest, src, n))) --# else --# define __mempcpy(dest, src, n) \ -- (__extension__ (__builtin_constant_p (src) && __builtin_constant_p (n) \ -- && __string2_1bptr_p (src) && n <= 8 \ -- ? __mempcpy_small (dest, __mempcpy_args (src), n) \ -- : __mempcpy (dest, src, n))) --# endif --/* In glibc we use this function frequently but for namespace reasons -- we have to use the name `__mempcpy'. */ --# define mempcpy(dest, src, n) __mempcpy (dest, src, n) --# endif -- --# if !__GNUC_PREREQ (3, 0) || defined _FORCE_INLINES --# if _STRING_ARCH_unaligned --# ifndef _FORCE_INLINES --# define __mempcpy_args(src) \ -- ((const char *) (src))[0], ((const char *) (src))[2], \ -- ((const char *) (src))[4], ((const char *) (src))[6], \ -- __extension__ __STRING2_SMALL_GET16 (src, 0), \ -- __extension__ __STRING2_SMALL_GET16 (src, 4), \ -- __extension__ __STRING2_SMALL_GET32 (src, 0), \ -- __extension__ __STRING2_SMALL_GET32 (src, 4) --# endif --__STRING_INLINE void *__mempcpy_small (void *, char, char, char, char, -- __uint16_t, __uint16_t, __uint32_t, -- __uint32_t, size_t); -+#if defined _FORCE_INLINES -+# if _STRING_ARCH_unaligned - __STRING_INLINE void * - __mempcpy_small (void *__dest1, - char __src0_1, char __src2_1, char __src4_1, char __src6_1, -@@ -298,44 +139,7 @@ __mempcpy_small (void *__dest1, - } - return (void *) __u; - } --# else --# ifndef _FORCE_INLINES --# define __mempcpy_args(src) \ -- ((const char *) (src))[0], \ -- __extension__ ((__STRING2_COPY_ARR2) \ -- { { ((const char *) (src))[0], ((const char *) (src))[1] } }), \ -- __extension__ ((__STRING2_COPY_ARR3) \ -- { { ((const char *) (src))[0], ((const char *) (src))[1], \ -- ((const char *) (src))[2] } }), \ -- __extension__ ((__STRING2_COPY_ARR4) \ -- { { ((const char *) (src))[0], ((const char *) (src))[1], \ -- ((const char *) (src))[2], ((const char *) (src))[3] } }), \ -- __extension__ ((__STRING2_COPY_ARR5) \ -- { { ((const char *) (src))[0], ((const char *) (src))[1], \ -- ((const char *) (src))[2], ((const char *) (src))[3], \ -- ((const char *) (src))[4] } }), \ -- __extension__ ((__STRING2_COPY_ARR6) \ -- { { ((const char *) (src))[0], ((const char *) (src))[1], \ -- ((const char *) (src))[2], ((const char *) (src))[3], \ -- ((const char *) (src))[4], ((const char *) (src))[5] } }), \ -- __extension__ ((__STRING2_COPY_ARR7) \ -- { { ((const char *) (src))[0], ((const char *) (src))[1], \ -- ((const char *) (src))[2], ((const char *) (src))[3], \ -- ((const char *) (src))[4], ((const char *) (src))[5], \ -- ((const char *) (src))[6] } }), \ -- __extension__ ((__STRING2_COPY_ARR8) \ -- { { ((const char *) (src))[0], ((const char *) (src))[1], \ -- ((const char *) (src))[2], ((const char *) (src))[3], \ -- ((const char *) (src))[4], ((const char *) (src))[5], \ -- ((const char *) (src))[6], ((const char *) (src))[7] } }) --# endif --__STRING_INLINE void *__mempcpy_small (void *, char, __STRING2_COPY_ARR2, -- __STRING2_COPY_ARR3, -- __STRING2_COPY_ARR4, -- __STRING2_COPY_ARR5, -- __STRING2_COPY_ARR6, -- __STRING2_COPY_ARR7, -- __STRING2_COPY_ARR8, size_t); -+# else - __STRING_INLINE void * - __mempcpy_small (void *__dest, char __src1, - __STRING2_COPY_ARR2 __src2, __STRING2_COPY_ARR3 __src3, -@@ -382,8 +186,6 @@ __mempcpy_small (void *__dest, char __src1, - } - return __extension__ ((void *) __u + __srclen); - } --# endif --# endif - # endif - #endif - -@@ -391,44 +193,17 @@ __mempcpy_small (void *__dest, char __src1, - /* Return pointer to C in S. */ - #ifndef _HAVE_STRING_ARCH_strchr - extern void *__rawmemchr (const void *__s, int __c); --# if __GNUC_PREREQ (3, 2) - # define strchr(s, c) \ - (__extension__ (__builtin_constant_p (c) && !__builtin_constant_p (s) \ - && (c) == '\0' \ - ? (char *) __rawmemchr (s, c) \ - : __builtin_strchr (s, c))) --# else --# define strchr(s, c) \ -- (__extension__ (__builtin_constant_p (c) && (c) == '\0' \ -- ? (char *) __rawmemchr (s, c) \ -- : strchr (s, c))) --# endif - #endif - - - /* Copy SRC to DEST. */ --#if (!defined _HAVE_STRING_ARCH_strcpy && !__GNUC_PREREQ (3, 0)) \ -- || defined _FORCE_INLINES --# if !defined _HAVE_STRING_ARCH_strcpy && !__GNUC_PREREQ (3, 0) --# define strcpy(dest, src) \ -- (__extension__ (__builtin_constant_p (src) \ -- ? (__string2_1bptr_p (src) && strlen (src) + 1 <= 8 \ -- ? __strcpy_small (dest, __strcpy_args (src), \ -- strlen (src) + 1) \ -- : (char *) memcpy (dest, src, strlen (src) + 1)) \ -- : strcpy (dest, src))) --# endif -- -+#if defined _FORCE_INLINES - # if _STRING_ARCH_unaligned --# ifndef _FORCE_INLINES --# define __strcpy_args(src) \ -- __extension__ __STRING2_SMALL_GET16 (src, 0), \ -- __extension__ __STRING2_SMALL_GET16 (src, 4), \ -- __extension__ __STRING2_SMALL_GET32 (src, 0), \ -- __extension__ __STRING2_SMALL_GET32 (src, 4) --# endif --__STRING_INLINE char *__strcpy_small (char *, __uint16_t, __uint16_t, -- __uint32_t, __uint32_t, size_t); - __STRING_INLINE char * - __strcpy_small (char *__dest, - __uint16_t __src0_2, __uint16_t __src4_2, -@@ -482,42 +257,6 @@ __strcpy_small (char *__dest, - return __dest; - } - # else --# ifndef _FORCE_INLINES --# define __strcpy_args(src) \ -- __extension__ ((__STRING2_COPY_ARR2) \ -- { { ((const char *) (src))[0], '\0' } }), \ -- __extension__ ((__STRING2_COPY_ARR3) \ -- { { ((const char *) (src))[0], ((const char *) (src))[1], \ -- '\0' } }), \ -- __extension__ ((__STRING2_COPY_ARR4) \ -- { { ((const char *) (src))[0], ((const char *) (src))[1], \ -- ((const char *) (src))[2], '\0' } }), \ -- __extension__ ((__STRING2_COPY_ARR5) \ -- { { ((const char *) (src))[0], ((const char *) (src))[1], \ -- ((const char *) (src))[2], ((const char *) (src))[3], \ -- '\0' } }), \ -- __extension__ ((__STRING2_COPY_ARR6) \ -- { { ((const char *) (src))[0], ((const char *) (src))[1], \ -- ((const char *) (src))[2], ((const char *) (src))[3], \ -- ((const char *) (src))[4], '\0' } }), \ -- __extension__ ((__STRING2_COPY_ARR7) \ -- { { ((const char *) (src))[0], ((const char *) (src))[1], \ -- ((const char *) (src))[2], ((const char *) (src))[3], \ -- ((const char *) (src))[4], ((const char *) (src))[5], \ -- '\0' } }), \ -- __extension__ ((__STRING2_COPY_ARR8) \ -- { { ((const char *) (src))[0], ((const char *) (src))[1], \ -- ((const char *) (src))[2], ((const char *) (src))[3], \ -- ((const char *) (src))[4], ((const char *) (src))[5], \ -- ((const char *) (src))[6], '\0' } }) --# endif --__STRING_INLINE char *__strcpy_small (char *, __STRING2_COPY_ARR2, -- __STRING2_COPY_ARR3, -- __STRING2_COPY_ARR4, -- __STRING2_COPY_ARR5, -- __STRING2_COPY_ARR6, -- __STRING2_COPY_ARR7, -- __STRING2_COPY_ARR8, size_t); - __STRING_INLINE char * - __strcpy_small (char *__dest, - __STRING2_COPY_ARR2 __src2, __STRING2_COPY_ARR3 __src3, -@@ -570,44 +309,15 @@ __strcpy_small (char *__dest, - - /* Copy SRC to DEST, returning pointer to final NUL byte. */ - #ifdef __USE_GNU --# if !defined _HAVE_STRING_ARCH_stpcpy || defined _FORCE_INLINES --# ifndef _HAVE_STRING_ARCH_stpcpy --# if __GNUC_PREREQ (3, 4) --# define __stpcpy(dest, src) __builtin_stpcpy (dest, src) --# elif __GNUC_PREREQ (3, 0) --# define __stpcpy(dest, src) \ -- (__extension__ (__builtin_constant_p (src) \ -- ? (__string2_1bptr_p (src) && strlen (src) + 1 <= 8 \ -- ? __builtin_strcpy (dest, src) + strlen (src) \ -- : ((char *) (__mempcpy) (dest, src, strlen (src) + 1) \ -- - 1)) \ -- : __stpcpy (dest, src))) --# else --# define __stpcpy(dest, src) \ -- (__extension__ (__builtin_constant_p (src) \ -- ? (__string2_1bptr_p (src) && strlen (src) + 1 <= 8 \ -- ? __stpcpy_small (dest, __stpcpy_args (src), \ -- strlen (src) + 1) \ -- : ((char *) (__mempcpy) (dest, src, strlen (src) + 1) \ -- - 1)) \ -- : __stpcpy (dest, src))) --# endif -+# ifndef _HAVE_STRING_ARCH_stpcpy -+# define __stpcpy(dest, src) __builtin_stpcpy (dest, src) - /* In glibc we use this function frequently but for namespace reasons - we have to use the name `__stpcpy'. */ --# define stpcpy(dest, src) __stpcpy (dest, src) --# endif -+# define stpcpy(dest, src) __stpcpy (dest, src) -+# endif - --# if !__GNUC_PREREQ (3, 0) || defined _FORCE_INLINES --# if _STRING_ARCH_unaligned --# ifndef _FORCE_INLINES --# define __stpcpy_args(src) \ -- __extension__ __STRING2_SMALL_GET16 (src, 0), \ -- __extension__ __STRING2_SMALL_GET16 (src, 4), \ -- __extension__ __STRING2_SMALL_GET32 (src, 0), \ -- __extension__ __STRING2_SMALL_GET32 (src, 4) --# endif --__STRING_INLINE char *__stpcpy_small (char *, __uint16_t, __uint16_t, -- __uint32_t, __uint32_t, size_t); -+# ifndef _FORCE_INLINES -+# if _STRING_ARCH_unaligned - __STRING_INLINE char * - __stpcpy_small (char *__dest, - __uint16_t __src0_2, __uint16_t __src4_2, -@@ -665,43 +375,7 @@ __stpcpy_small (char *__dest, - } - return &__u->__c; - } --# else --# ifndef _FORCE_INLINES --# define __stpcpy_args(src) \ -- __extension__ ((__STRING2_COPY_ARR2) \ -- { { ((const char *) (src))[0], '\0' } }), \ -- __extension__ ((__STRING2_COPY_ARR3) \ -- { { ((const char *) (src))[0], ((const char *) (src))[1], \ -- '\0' } }), \ -- __extension__ ((__STRING2_COPY_ARR4) \ -- { { ((const char *) (src))[0], ((const char *) (src))[1], \ -- ((const char *) (src))[2], '\0' } }), \ -- __extension__ ((__STRING2_COPY_ARR5) \ -- { { ((const char *) (src))[0], ((const char *) (src))[1], \ -- ((const char *) (src))[2], ((const char *) (src))[3], \ -- '\0' } }), \ -- __extension__ ((__STRING2_COPY_ARR6) \ -- { { ((const char *) (src))[0], ((const char *) (src))[1], \ -- ((const char *) (src))[2], ((const char *) (src))[3], \ -- ((const char *) (src))[4], '\0' } }), \ -- __extension__ ((__STRING2_COPY_ARR7) \ -- { { ((const char *) (src))[0], ((const char *) (src))[1], \ -- ((const char *) (src))[2], ((const char *) (src))[3], \ -- ((const char *) (src))[4], ((const char *) (src))[5], \ -- '\0' } }), \ -- __extension__ ((__STRING2_COPY_ARR8) \ -- { { ((const char *) (src))[0], ((const char *) (src))[1], \ -- ((const char *) (src))[2], ((const char *) (src))[3], \ -- ((const char *) (src))[4], ((const char *) (src))[5], \ -- ((const char *) (src))[6], '\0' } }) --# endif --__STRING_INLINE char *__stpcpy_small (char *, __STRING2_COPY_ARR2, -- __STRING2_COPY_ARR3, -- __STRING2_COPY_ARR4, -- __STRING2_COPY_ARR5, -- __STRING2_COPY_ARR6, -- __STRING2_COPY_ARR7, -- __STRING2_COPY_ARR8, size_t); -+# else - __STRING_INLINE char * - __stpcpy_small (char *__dest, - __STRING2_COPY_ARR2 __src2, __STRING2_COPY_ARR3 __src3, -@@ -748,27 +422,11 @@ __stpcpy_small (char *__dest, - } - return __dest + __srclen - 1; - } --# endif - # endif - # endif - #endif - - --/* Copy no more than N characters of SRC to DEST. */ --#ifndef _HAVE_STRING_ARCH_strncpy --# if __GNUC_PREREQ (3, 2) --# define strncpy(dest, src, n) __builtin_strncpy (dest, src, n) --# else --# define strncpy(dest, src, n) \ -- (__extension__ (__builtin_constant_p (src) && __builtin_constant_p (n) \ -- ? (strlen (src) + 1 >= ((size_t) (n)) \ -- ? (char *) memcpy (dest, src, n) \ -- : strncpy (dest, src, n)) \ -- : strncpy (dest, src, n))) --# endif --#endif -- -- - /* Append no more than N characters from SRC onto DEST. */ - #ifndef _HAVE_STRING_ARCH_strncat - # ifdef _USE_STRING_ARCH_strchr -@@ -780,380 +438,29 @@ __stpcpy_small (char *__dest, - : (*((char *) __mempcpy (strchr (__dest, '\0'), \ - src, n)) = '\0', __dest)) \ - : strncat (dest, src, n); })) --# elif __GNUC_PREREQ (3, 2) --# define strncat(dest, src, n) __builtin_strncat (dest, src, n) - # else --# define strncat(dest, src, n) \ -- (__extension__ (__builtin_constant_p (src) && __builtin_constant_p (n) \ -- ? (strlen (src) < ((size_t) (n)) \ -- ? strcat (dest, src) \ -- : strncat (dest, src, n)) \ -- : strncat (dest, src, n))) --# endif --#endif -- -- --/* Compare characters of S1 and S2. */ --#ifndef _HAVE_STRING_ARCH_strcmp --# if __GNUC_PREREQ (3, 2) --# define strcmp(s1, s2) \ -- __extension__ \ -- ({ size_t __s1_len, __s2_len; \ -- (__builtin_constant_p (s1) && __builtin_constant_p (s2) \ -- && (__s1_len = strlen (s1), __s2_len = strlen (s2), \ -- (!__string2_1bptr_p (s1) || __s1_len >= 4) \ -- && (!__string2_1bptr_p (s2) || __s2_len >= 4)) \ -- ? __builtin_strcmp (s1, s2) \ -- : (__builtin_constant_p (s1) && __string2_1bptr_p (s1) \ -- && (__s1_len = strlen (s1), __s1_len < 4) \ -- ? (__builtin_constant_p (s2) && __string2_1bptr_p (s2) \ -- ? __builtin_strcmp (s1, s2) \ -- : __strcmp_cg (s1, s2, __s1_len)) \ -- : (__builtin_constant_p (s2) && __string2_1bptr_p (s2) \ -- && (__s2_len = strlen (s2), __s2_len < 4) \ -- ? (__builtin_constant_p (s1) && __string2_1bptr_p (s1) \ -- ? __builtin_strcmp (s1, s2) \ -- : __strcmp_gc (s1, s2, __s2_len)) \ -- : __builtin_strcmp (s1, s2)))); }) --# else --# define strcmp(s1, s2) \ -- __extension__ \ -- ({ size_t __s1_len, __s2_len; \ -- (__builtin_constant_p (s1) && __builtin_constant_p (s2) \ -- && (__s1_len = strlen (s1), __s2_len = strlen (s2), \ -- (!__string2_1bptr_p (s1) || __s1_len >= 4) \ -- && (!__string2_1bptr_p (s2) || __s2_len >= 4)) \ -- ? memcmp ((const char *) (s1), (const char *) (s2), \ -- (__s1_len < __s2_len ? __s1_len : __s2_len) + 1) \ -- : (__builtin_constant_p (s1) && __string2_1bptr_p (s1) \ -- && (__s1_len = strlen (s1), __s1_len < 4) \ -- ? (__builtin_constant_p (s2) && __string2_1bptr_p (s2) \ -- ? __strcmp_cc (s1, s2, __s1_len) \ -- : __strcmp_cg (s1, s2, __s1_len)) \ -- : (__builtin_constant_p (s2) && __string2_1bptr_p (s2) \ -- && (__s2_len = strlen (s2), __s2_len < 4) \ -- ? (__builtin_constant_p (s1) && __string2_1bptr_p (s1) \ -- ? __strcmp_cc (s1, s2, __s2_len) \ -- : __strcmp_gc (s1, s2, __s2_len)) \ -- : strcmp (s1, s2)))); }) -+# define strncat(dest, src, n) __builtin_strncat (dest, src, n) - # endif -- --# define __strcmp_cc(s1, s2, l) \ -- (__extension__ ({ int __result = \ -- (((const unsigned char *) (const char *) (s1))[0] \ -- - ((const unsigned char *) (const char *)(s2))[0]); \ -- if (l > 0 && __result == 0) \ -- { \ -- __result = (((const unsigned char *) \ -- (const char *) (s1))[1] \ -- - ((const unsigned char *) \ -- (const char *) (s2))[1]); \ -- if (l > 1 && __result == 0) \ -- { \ -- __result = \ -- (((const unsigned char *) \ -- (const char *) (s1))[2] \ -- - ((const unsigned char *) \ -- (const char *) (s2))[2]); \ -- if (l > 2 && __result == 0) \ -- __result = \ -- (((const unsigned char *) \ -- (const char *) (s1))[3] \ -- - ((const unsigned char *) \ -- (const char *) (s2))[3]); \ -- } \ -- } \ -- __result; })) -- --# define __strcmp_cg(s1, s2, l1) \ -- (__extension__ ({ const unsigned char *__s2 = \ -- (const unsigned char *) (const char *) (s2); \ -- int __result = \ -- (((const unsigned char *) (const char *) (s1))[0] \ -- - __s2[0]); \ -- if (l1 > 0 && __result == 0) \ -- { \ -- __result = (((const unsigned char *) \ -- (const char *) (s1))[1] - __s2[1]); \ -- if (l1 > 1 && __result == 0) \ -- { \ -- __result = (((const unsigned char *) \ -- (const char *) (s1))[2] - __s2[2]); \ -- if (l1 > 2 && __result == 0) \ -- __result = (((const unsigned char *) \ -- (const char *) (s1))[3] \ -- - __s2[3]); \ -- } \ -- } \ -- __result; })) -- --# define __strcmp_gc(s1, s2, l2) \ -- (__extension__ ({ const unsigned char *__s1 = \ -- (const unsigned char *) (const char *) (s1); \ -- register int __result = \ -- __s1[0] - ((const unsigned char *) \ -- (const char *) (s2))[0]; \ -- if (l2 > 0 && __result == 0) \ -- { \ -- __result = (__s1[1] \ -- - ((const unsigned char *) \ -- (const char *) (s2))[1]); \ -- if (l2 > 1 && __result == 0) \ -- { \ -- __result = \ -- (__s1[2] - ((const unsigned char *) \ -- (const char *) (s2))[2]); \ -- if (l2 > 2 && __result == 0) \ -- __result = \ -- (__s1[3] \ -- - ((const unsigned char *) \ -- (const char *) (s2))[3]); \ -- } \ -- } \ -- __result; })) --#endif -- -- --/* Compare N characters of S1 and S2. */ --#ifndef _HAVE_STRING_ARCH_strncmp --# define strncmp(s1, s2, n) \ -- (__extension__ (__builtin_constant_p (n) \ -- && ((__builtin_constant_p (s1) \ -- && strlen (s1) < ((size_t) (n))) \ -- || (__builtin_constant_p (s2) \ -- && strlen (s2) < ((size_t) (n)))) \ -- ? strcmp (s1, s2) : strncmp (s1, s2, n))) - #endif - - - /* Return the length of the initial segment of S which - consists entirely of characters not in REJECT. */ --#if !defined _HAVE_STRING_ARCH_strcspn || defined _FORCE_INLINES --# ifndef _HAVE_STRING_ARCH_strcspn --# if __GNUC_PREREQ (3, 2) --# define strcspn(s, reject) \ -- __extension__ \ -- ({ char __r0, __r1, __r2; \ -- (__builtin_constant_p (reject) && __string2_1bptr_p (reject) \ -- ? ((__builtin_constant_p (s) && __string2_1bptr_p (s)) \ -- ? __builtin_strcspn (s, reject) \ -- : ((__r0 = ((const char *) (reject))[0], __r0 == '\0') \ -- ? strlen (s) \ -- : ((__r1 = ((const char *) (reject))[1], __r1 == '\0') \ -- ? __strcspn_c1 (s, __r0) \ -- : ((__r2 = ((const char *) (reject))[2], __r2 == '\0') \ -- ? __strcspn_c2 (s, __r0, __r1) \ -- : (((const char *) (reject))[3] == '\0' \ -- ? __strcspn_c3 (s, __r0, __r1, __r2) \ -- : __builtin_strcspn (s, reject)))))) \ -- : __builtin_strcspn (s, reject)); }) --# else --# define strcspn(s, reject) \ -- __extension__ \ -- ({ char __r0, __r1, __r2; \ -- (__builtin_constant_p (reject) && __string2_1bptr_p (reject) \ -- ? ((__r0 = ((const char *) (reject))[0], __r0 == '\0') \ -- ? strlen (s) \ -- : ((__r1 = ((const char *) (reject))[1], __r1 == '\0') \ -- ? __strcspn_c1 (s, __r0) \ -- : ((__r2 = ((const char *) (reject))[2], __r2 == '\0') \ -- ? __strcspn_c2 (s, __r0, __r1) \ -- : (((const char *) (reject))[3] == '\0' \ -- ? __strcspn_c3 (s, __r0, __r1, __r2) \ -- : strcspn (s, reject))))) \ -- : strcspn (s, reject)); }) --# endif --# endif -- --__STRING_INLINE size_t __strcspn_c1 (const char *__s, int __reject); --__STRING_INLINE size_t --__strcspn_c1 (const char *__s, int __reject) --{ -- size_t __result = 0; -- while (__s[__result] != '\0' && __s[__result] != __reject) -- ++__result; -- return __result; --} -- --__STRING_INLINE size_t __strcspn_c2 (const char *__s, int __reject1, -- int __reject2); --__STRING_INLINE size_t --__strcspn_c2 (const char *__s, int __reject1, int __reject2) --{ -- size_t __result = 0; -- while (__s[__result] != '\0' && __s[__result] != __reject1 -- && __s[__result] != __reject2) -- ++__result; -- return __result; --} -- --__STRING_INLINE size_t __strcspn_c3 (const char *__s, int __reject1, -- int __reject2, int __reject3); --__STRING_INLINE size_t --__strcspn_c3 (const char *__s, int __reject1, int __reject2, -- int __reject3) --{ -- size_t __result = 0; -- while (__s[__result] != '\0' && __s[__result] != __reject1 -- && __s[__result] != __reject2 && __s[__result] != __reject3) -- ++__result; -- return __result; --} -+#ifndef _HAVE_STRING_ARCH_strcspn -+# define strcspn(s, reject) __builtin_strcspn (s, reject) - #endif - - - /* Return the length of the initial segment of S which - consists entirely of characters in ACCEPT. */ --#if !defined _HAVE_STRING_ARCH_strspn || defined _FORCE_INLINES --# ifndef _HAVE_STRING_ARCH_strspn --# if __GNUC_PREREQ (3, 2) --# define strspn(s, accept) \ -- __extension__ \ -- ({ char __a0, __a1, __a2; \ -- (__builtin_constant_p (accept) && __string2_1bptr_p (accept) \ -- ? ((__builtin_constant_p (s) && __string2_1bptr_p (s)) \ -- ? __builtin_strspn (s, accept) \ -- : ((__a0 = ((const char *) (accept))[0], __a0 == '\0') \ -- ? ((void) (s), (size_t) 0) \ -- : ((__a1 = ((const char *) (accept))[1], __a1 == '\0') \ -- ? __strspn_c1 (s, __a0) \ -- : ((__a2 = ((const char *) (accept))[2], __a2 == '\0') \ -- ? __strspn_c2 (s, __a0, __a1) \ -- : (((const char *) (accept))[3] == '\0' \ -- ? __strspn_c3 (s, __a0, __a1, __a2) \ -- : __builtin_strspn (s, accept)))))) \ -- : __builtin_strspn (s, accept)); }) --# else --# define strspn(s, accept) \ -- __extension__ \ -- ({ char __a0, __a1, __a2; \ -- (__builtin_constant_p (accept) && __string2_1bptr_p (accept) \ -- ? ((__a0 = ((const char *) (accept))[0], __a0 == '\0') \ -- ? ((void) (s), (size_t) 0) \ -- : ((__a1 = ((const char *) (accept))[1], __a1 == '\0') \ -- ? __strspn_c1 (s, __a0) \ -- : ((__a2 = ((const char *) (accept))[2], __a2 == '\0') \ -- ? __strspn_c2 (s, __a0, __a1) \ -- : (((const char *) (accept))[3] == '\0' \ -- ? __strspn_c3 (s, __a0, __a1, __a2) \ -- : strspn (s, accept))))) \ -- : strspn (s, accept)); }) --# endif --# endif -- --__STRING_INLINE size_t __strspn_c1 (const char *__s, int __accept); --__STRING_INLINE size_t --__strspn_c1 (const char *__s, int __accept) --{ -- size_t __result = 0; -- /* Please note that __accept never can be '\0'. */ -- while (__s[__result] == __accept) -- ++__result; -- return __result; --} -- --__STRING_INLINE size_t __strspn_c2 (const char *__s, int __accept1, -- int __accept2); --__STRING_INLINE size_t --__strspn_c2 (const char *__s, int __accept1, int __accept2) --{ -- size_t __result = 0; -- /* Please note that __accept1 and __accept2 never can be '\0'. */ -- while (__s[__result] == __accept1 || __s[__result] == __accept2) -- ++__result; -- return __result; --} -- --__STRING_INLINE size_t __strspn_c3 (const char *__s, int __accept1, -- int __accept2, int __accept3); --__STRING_INLINE size_t --__strspn_c3 (const char *__s, int __accept1, int __accept2, int __accept3) --{ -- size_t __result = 0; -- /* Please note that __accept1 to __accept3 never can be '\0'. */ -- while (__s[__result] == __accept1 || __s[__result] == __accept2 -- || __s[__result] == __accept3) -- ++__result; -- return __result; --} -+#ifndef _HAVE_STRING_ARCH_strspn -+# define strspn(s, accept) __builtin_strspn (s, accept) - #endif - - - /* Find the first occurrence in S of any character in ACCEPT. */ --#if !defined _HAVE_STRING_ARCH_strpbrk || defined _FORCE_INLINES --# ifndef _HAVE_STRING_ARCH_strpbrk --# if __GNUC_PREREQ (3, 2) --# define strpbrk(s, accept) \ -- __extension__ \ -- ({ char __a0, __a1, __a2; \ -- (__builtin_constant_p (accept) && __string2_1bptr_p (accept) \ -- ? ((__builtin_constant_p (s) && __string2_1bptr_p (s)) \ -- ? __builtin_strpbrk (s, accept) \ -- : ((__a0 = ((const char *) (accept))[0], __a0 == '\0') \ -- ? ((void) (s), (char *) NULL) \ -- : ((__a1 = ((const char *) (accept))[1], __a1 == '\0') \ -- ? __builtin_strchr (s, __a0) \ -- : ((__a2 = ((const char *) (accept))[2], __a2 == '\0') \ -- ? __strpbrk_c2 (s, __a0, __a1) \ -- : (((const char *) (accept))[3] == '\0' \ -- ? __strpbrk_c3 (s, __a0, __a1, __a2) \ -- : __builtin_strpbrk (s, accept)))))) \ -- : __builtin_strpbrk (s, accept)); }) --# else --# define strpbrk(s, accept) \ -- __extension__ \ -- ({ char __a0, __a1, __a2; \ -- (__builtin_constant_p (accept) && __string2_1bptr_p (accept) \ -- ? ((__a0 = ((const char *) (accept))[0], __a0 == '\0') \ -- ? ((void) (s), (char *) NULL) \ -- : ((__a1 = ((const char *) (accept))[1], __a1 == '\0') \ -- ? strchr (s, __a0) \ -- : ((__a2 = ((const char *) (accept))[2], __a2 == '\0') \ -- ? __strpbrk_c2 (s, __a0, __a1) \ -- : (((const char *) (accept))[3] == '\0' \ -- ? __strpbrk_c3 (s, __a0, __a1, __a2) \ -- : strpbrk (s, accept))))) \ -- : strpbrk (s, accept)); }) --# endif --# endif -- --__STRING_INLINE char *__strpbrk_c2 (const char *__s, int __accept1, -- int __accept2); --__STRING_INLINE char * --__strpbrk_c2 (const char *__s, int __accept1, int __accept2) --{ -- /* Please note that __accept1 and __accept2 never can be '\0'. */ -- while (*__s != '\0' && *__s != __accept1 && *__s != __accept2) -- ++__s; -- return *__s == '\0' ? NULL : (char *) (size_t) __s; --} -- --__STRING_INLINE char *__strpbrk_c3 (const char *__s, int __accept1, -- int __accept2, int __accept3); --__STRING_INLINE char * --__strpbrk_c3 (const char *__s, int __accept1, int __accept2, int __accept3) --{ -- /* Please note that __accept1 to __accept3 never can be '\0'. */ -- while (*__s != '\0' && *__s != __accept1 && *__s != __accept2 -- && *__s != __accept3) -- ++__s; -- return *__s == '\0' ? NULL : (char *) (size_t) __s; --} --#endif -- -- --/* Find the first occurrence of NEEDLE in HAYSTACK. Newer gcc versions -- do this itself. */ --#if !defined _HAVE_STRING_ARCH_strstr && !__GNUC_PREREQ (2, 97) --# define strstr(haystack, needle) \ -- (__extension__ (__builtin_constant_p (needle) && __string2_1bptr_p (needle) \ -- ? (((const char *) (needle))[0] == '\0' \ -- ? (char *) (size_t) (haystack) \ -- : (((const char *) (needle))[1] == '\0' \ -- ? strchr (haystack, \ -- ((const char *) (needle))[0]) \ -- : strstr (haystack, needle))) \ -- : strstr (haystack, needle))) -+#ifndef _HAVE_STRING_ARCH_strpbrk -+# define strpbrk(s, accept) __builtin_strpbrk (s, accept) - #endif - - -diff --git a/string/strncat.c b/string/strncat.c -index dcfb04d..a9cb913 100644 ---- a/string/strncat.c -+++ b/string/strncat.c -@@ -1,4 +1,4 @@ --/* Copyright (C) 1991,1997,2011 Free Software Foundation, Inc. -+/* Copyright (C) 1991-2022 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or -@@ -13,14 +13,10 @@ - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see -- . */ -+ . */ - - #include - --#ifdef _LIBC --# include --#endif -- - #ifndef STRNCAT - # undef strncat - # define STRNCAT strncat -@@ -29,54 +25,16 @@ - char * - STRNCAT (char *s1, const char *s2, size_t n) - { -- char c; - char *s = s1; - - /* Find the end of S1. */ -- do -- c = *s1++; -- while (c != '\0'); -- -- /* Make S1 point before next character, so we can increment -- it while memory is read (wins on pipelined cpus). */ -- s1 -= 2; -+ s1 += strlen (s1); - -- if (n >= 4) -- { -- size_t n4 = n >> 2; -- do -- { -- c = *s2++; -- *++s1 = c; -- if (c == '\0') -- return s; -- c = *s2++; -- *++s1 = c; -- if (c == '\0') -- return s; -- c = *s2++; -- *++s1 = c; -- if (c == '\0') -- return s; -- c = *s2++; -- *++s1 = c; -- if (c == '\0') -- return s; -- } while (--n4 > 0); -- n &= 3; -- } -+ size_t ss = __strnlen (s2, n); - -- while (n > 0) -- { -- c = *s2++; -- *++s1 = c; -- if (c == '\0') -- return s; -- n--; -- } -- -- if (c != '\0') -- *++s1 = '\0'; -+ s1[ss] = '\0'; -+ memcpy (s1, s2, ss); - - return s; - } -+ -diff --git a/string/strncpy.c b/string/strncpy.c -index 19d501e..83fb610 100644 ---- a/string/strncpy.c -+++ b/string/strncpy.c -@@ -1,4 +1,4 @@ --/* Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc. -+/* Copyright (C) 1991-2022 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or -@@ -13,75 +13,22 @@ - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see -- . */ -+ . */ - - #include --#include - - #undef strncpy - - #ifndef STRNCPY --#define STRNCPY strncpy -+ #define STRNCPY strncpy - #endif - - char * - STRNCPY (char *s1, const char *s2, size_t n) - { -- char c; -- char *s = s1; -- -- --s1; -- -- if (n >= 4) -- { -- size_t n4 = n >> 2; -- -- for (;;) -- { -- c = *s2++; -- *++s1 = c; -- if (c == '\0') -- break; -- c = *s2++; -- *++s1 = c; -- if (c == '\0') -- break; -- c = *s2++; -- *++s1 = c; -- if (c == '\0') -- break; -- c = *s2++; -- *++s1 = c; -- if (c == '\0') -- break; -- if (--n4 == 0) -- goto last_chars; -- } -- n = n - (s1 - s) - 1; -- if (n == 0) -- return s; -- goto zero_fill; -- } -- -- last_chars: -- n &= 3; -- if (n == 0) -- return s; -- -- do -- { -- c = *s2++; -- *++s1 = c; -- if (--n == 0) -- return s; -- } -- while (c != '\0'); -- -- zero_fill: -- do -- *++s1 = '\0'; -- while (--n > 0); -- -- return s; -+ size_t size = __strnlen (s2, n); -+ if (size != n) -+ memset (s1 + size, '\0', n - size); -+ return memcpy (s1, s2, size); - } - libc_hidden_builtin_def (strncpy) -diff --git a/sysdeps/gnu/bits/utmp.h b/sysdeps/gnu/bits/utmp.h -index aed2750..434a533 100644 ---- a/sysdeps/gnu/bits/utmp.h -+++ b/sysdeps/gnu/bits/utmp.h -@@ -59,10 +59,14 @@ struct utmp - { - short int ut_type; /* Type of login. */ - pid_t ut_pid; /* Process ID of login process. */ -- char ut_line[UT_LINESIZE]; /* Devicename. */ -- char ut_id[4]; /* Inittab ID. */ -- char ut_user[UT_NAMESIZE]; /* Username. */ -- char ut_host[UT_HOSTSIZE]; /* Hostname for remote login. */ -+ char ut_line[UT_LINESIZE] -+ __attribute_nonstring__; /* Devicename. */ -+ char ut_id[4] -+ __attribute_nonstring__; /* Inittab ID. */ -+ char ut_user[UT_NAMESIZE] -+ __attribute_nonstring__; /* Username. */ -+ char ut_host[UT_HOSTSIZE] -+ __attribute_nonstring__; /* Hostname for remote login. */ - struct exit_status ut_exit; /* Exit status of a process marked - as DEAD_PROCESS. */ - /* The ut_session and ut_tv fields must be the same size when compiled -diff --git a/sysdeps/gnu/bits/utmpx.h b/sysdeps/gnu/bits/utmpx.h -index f8716ca..13d84e4 100644 ---- a/sysdeps/gnu/bits/utmpx.h -+++ b/sysdeps/gnu/bits/utmpx.h -@@ -56,10 +56,14 @@ struct utmpx - { - short int ut_type; /* Type of login. */ - __pid_t ut_pid; /* Process ID of login process. */ -- char ut_line[__UT_LINESIZE]; /* Devicename. */ -- char ut_id[4]; /* Inittab ID. */ -- char ut_user[__UT_NAMESIZE]; /* Username. */ -- char ut_host[__UT_HOSTSIZE]; /* Hostname for remote login. */ -+ char ut_line[__UT_LINESIZE] -+ __attribute_nonstring__; /* Devicename. */ -+ char ut_id[4] -+ __attribute_nonstring__; /* Inittab ID. */ -+ char ut_user[__UT_NAMESIZE] -+ __attribute_nonstring__; /* Username. */ -+ char ut_host[__UT_HOSTSIZE] -+ __attribute_nonstring__; /* Hostname for remote login. */ - struct __exit_status ut_exit; /* Exit status of a process marked - as DEAD_PROCESS. */ - -diff --git a/sysdeps/unix/sysv/linux/if_index.c b/sysdeps/unix/sysv/linux/if_index.c -index 8ba5eae..b620d21 100644 ---- a/sysdeps/unix/sysv/linux/if_index.c -+++ b/sysdeps/unix/sysv/linux/if_index.c -@@ -38,12 +38,19 @@ __if_nametoindex (const char *ifname) - return 0; - #else - struct ifreq ifr; -+ if (strlen (ifname) >= IFNAMSIZ) -+ { -+ __set_errno (ENODEV); -+ return 0; -+ } -+ -+ strncpy (ifr.ifr_name, ifname, sizeof (ifr.ifr_name)); -+ - int fd = __opensock (); - - if (fd < 0) - return 0; - -- strncpy (ifr.ifr_name, ifname, sizeof (ifr.ifr_name)); - if (__ioctl (fd, SIOCGIFINDEX, &ifr) < 0) - { - int saved_errno = errno; -diff --git a/timezone/zic.c b/timezone/zic.c -index a5202a1..772d081 100644 ---- a/timezone/zic.c -+++ b/timezone/zic.c -@@ -1609,7 +1609,7 @@ writezone(const char *const name, const char *const string) - } - #define DO(field) ((void) fwrite(tzh.field, sizeof tzh.field, 1, fp)) - tzh = tzh0; -- (void) strncpy(tzh.tzh_magic, TZ_MAGIC, sizeof tzh.tzh_magic); -+ memcpy(tzh.tzh_magic, TZ_MAGIC, sizeof tzh.tzh_magic); - tzh.tzh_version[0] = ZIC_VERSION; - convert(eitol(thistypecnt), tzh.tzh_ttisgmtcnt); - convert(eitol(thistypecnt), tzh.tzh_ttisstdcnt); diff --git a/ci/official/containers/ml_build_arm64/builder.packages.txt b/ci/official/containers/ml_build_arm64/builder.packages.txt deleted file mode 100644 index 2be317ca4e256b..00000000000000 --- a/ci/official/containers/ml_build_arm64/builder.packages.txt +++ /dev/null @@ -1,47 +0,0 @@ -# Packages needed to build devtoolset -file -flex -make -build-essential -patch -rpm2cpio -unar -wget -xz-utils -cpio -gawk -texinfo -gettext - -# Other build-related tools -software-properties-common -apt-transport-https -autoconf -automake -ca-certificates -pkg-config -libcurl3-dev -libcurl4-openssl-dev -libfreetype6-dev -libhdf5-serial-dev -libomp-18-dev -libssl-dev -libtool -libssl-dev -libxml2-dev -libxslt1-dev -libzmq3-dev -llvm-18 -clang-18 -clang-tidy-18 -lld-18 -clang-format-12 -curl -git -parallel -sudo -swig -unzip -zip -openjdk-21-jdk -vim diff --git a/ci/official/containers/ml_build_arm64/devel.bashrc b/ci/official/containers/ml_build_arm64/devel.bashrc deleted file mode 100644 index 755d48783b1b9a..00000000000000 --- a/ci/official/containers/ml_build_arm64/devel.bashrc +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright 2021 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================== - -# Do not print anything if this is not being used interactively -[ -z "$PS1" ] && return - -# Set up attractive prompt -export PS1="\[\e[31m\]tf-docker\[\e[m\] \[\e[33m\]\w\[\e[m\] > " -export TERM=xterm-256color -alias grep="grep --color=auto" -alias ls="ls --color=auto" -# Fix nvidia-docker -ldconfig diff --git a/ci/official/containers/ml_build_arm64/ld.so.conf b/ci/official/containers/ml_build_arm64/ld.so.conf deleted file mode 100644 index e2aa028720ed2c..00000000000000 --- a/ci/official/containers/ml_build_arm64/ld.so.conf +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright 2023 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# -# Builds a devtoolset cross-compiler targeting manylinux2014 (glibc 2.17 / libstdc++ 4.8). - -/lib64 diff --git a/ci/official/containers/ml_build_arm64/requirements.txt b/ci/official/containers/ml_build_arm64/requirements.txt deleted file mode 100644 index 6ae6deda141234..00000000000000 --- a/ci/official/containers/ml_build_arm64/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -portpicker==1.6.0 -# For wheel verification, and uploading -auditwheel ~= 6.1.0 -twine ~= 6.1.0 - -# uv is faster than pip for installing Python packages. -uv ~= 0.5.30 \ No newline at end of file diff --git a/ci/official/containers/ml_build_arm64/setup.packages.sh b/ci/official/containers/ml_build_arm64/setup.packages.sh deleted file mode 100755 index 347b853e349385..00000000000000 --- a/ci/official/containers/ml_build_arm64/setup.packages.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright 2022 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# -# setup.packages.sh: Given a list of Ubuntu packages, install them and clean up. -# Usage: setup.packages.sh -set -e - -# Prevent apt install tzinfo from asking our location (assumes UTC) -export DEBIAN_FRONTEND=noninteractive - -apt-get update -# Remove commented lines and blank lines -apt-get install -y --no-install-recommends $(sed -e '/^\s*#.*$/d' -e '/^\s*$/d' "$1" | sort -u) -rm -rf /var/lib/apt/lists/* \ No newline at end of file diff --git a/ci/official/containers/ml_build_arm64/setup.python.sh b/ci/official/containers/ml_build_arm64/setup.python.sh deleted file mode 100755 index ff5ade526536fa..00000000000000 --- a/ci/official/containers/ml_build_arm64/setup.python.sh +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright 2022 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# -# setup.python.sh: Install a specific Python version and packages for it. -# Usage: setup.python.sh -set -xe - -source ~/.bashrc -VERSION=$1 -REQUIREMENTS=$2 - -add-apt-repository ppa:deadsnakes/ppa -# Install Python packages for this container's version -if [[ ${VERSION} == "python3.13-nogil" ]]; then - cat >pythons.txt <pythons.txt <pythons.txt < requirements_without_twine.txt - REQUIREMENTS=requirements_without_twine.txt -fi - -# Disable the cache dir to save image space, and install packages -/usr/bin/$VERSION -m pip install --no-cache-dir -r $REQUIREMENTS -U diff --git a/ci/official/containers/ml_build_arm64/setup.sources.sh b/ci/official/containers/ml_build_arm64/setup.sources.sh deleted file mode 100755 index f8c87d4ceade60..00000000000000 --- a/ci/official/containers/ml_build_arm64/setup.sources.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright 2022 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# -# setup.python.sh: Install a specific Python version and packages for it. -# Usage: setup.python.sh - -# Sets up custom apt sources for our TF images. - -# Prevent apt install tzinfo from asking our location (assumes UTC) -export DEBIAN_FRONTEND=noninteractive - -# Set up shared custom sources -apt-get update -apt-get install -y gnupg ca-certificates - -# Deadsnakes: https://launchpad.net/~deadsnakes/+archive/ubuntu/ppa -apt-key adv --keyserver keyserver.ubuntu.com --recv-keys F23C5A6CF475977595C89F51BA6932366A755776 - -# LLVM/Clang: https://apt.llvm.org/ -apt-key adv --fetch-keys https://apt.llvm.org/llvm-snapshot.gpg.key - -# Set up custom sources -cat >/etc/apt/sources.list.d/custom.list <(data); } -TFE_TensorHandle* MakeLoggedTensorHandle( - TFE_Context* context, const tensorflow::string& logging_device_name, - std::unique_ptr t, TF_Status* status) { +TFE_TensorHandle* MakeLoggedTensorHandle(TFE_Context* context, + const std::string& logging_device_name, + std::unique_ptr t, + TF_Status* status) { auto dtype = TFE_TensorHandleDataType(t->tensor); TFE_CustomDeviceTensorHandleMethods handle_methods; handle_methods.num_dims = &LoggedTensorNumDims; diff --git a/tensorflow/c/experimental/ops/gen/cpp/BUILD b/tensorflow/c/experimental/ops/gen/cpp/BUILD index 1e1d4eca98106a..05bd307fd499ec 100644 --- a/tensorflow/c/experimental/ops/gen/cpp/BUILD +++ b/tensorflow/c/experimental/ops/gen/cpp/BUILD @@ -28,7 +28,6 @@ cc_library( "//tensorflow/core:lib_internal", "//tensorflow/core:op_gen_lib", "//tensorflow/core:protos_all_cc", - "//tensorflow/core/platform:str_util", "@com_google_absl//absl/strings", ], alwayslink = 1, diff --git a/tensorflow/c/experimental/ops/gen/cpp/cpp_generator.cc b/tensorflow/c/experimental/ops/gen/cpp/cpp_generator.cc index 45e7b87069e361..e4b82c59072123 100644 --- a/tensorflow/c/experimental/ops/gen/cpp/cpp_generator.cc +++ b/tensorflow/c/experimental/ops/gen/cpp/cpp_generator.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/c/experimental/ops/gen/cpp/cpp_generator.h" +#include #include #include "tensorflow/c/experimental/ops/gen/common/path_config.h" diff --git a/tensorflow/c/experimental/ops/gen/cpp/cpp_generator_test.cc b/tensorflow/c/experimental/ops/gen/cpp/cpp_generator_test.cc index e1db2c9b8ce14b..d97bd7ee2d921f 100644 --- a/tensorflow/c/experimental/ops/gen/cpp/cpp_generator_test.cc +++ b/tensorflow/c/experimental/ops/gen/cpp/cpp_generator_test.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/c/experimental/ops/gen/cpp/cpp_generator.h" #include +#include #include #include "tensorflow/c/experimental/ops/gen/common/path_config.h" diff --git a/tensorflow/c/experimental/ops/gen/cpp/renderers/cpp_config.cc b/tensorflow/c/experimental/ops/gen/cpp/renderers/cpp_config.cc index 7c8231a71133f5..cd4e0af1ec8454 100644 --- a/tensorflow/c/experimental/ops/gen/cpp/renderers/cpp_config.cc +++ b/tensorflow/c/experimental/ops/gen/cpp/renderers/cpp_config.cc @@ -14,6 +14,8 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/c/experimental/ops/gen/cpp/renderers/cpp_config.h" +#include + #include "absl/strings/ascii.h" #include "absl/strings/str_split.h" #include "tensorflow/core/platform/types.h" diff --git a/tensorflow/c/experimental/ops/gen/cpp/renderers/guard_renderer.cc b/tensorflow/c/experimental/ops/gen/cpp/renderers/guard_renderer.cc index 50db08df1db988..b3d33c379549b5 100644 --- a/tensorflow/c/experimental/ops/gen/cpp/renderers/guard_renderer.cc +++ b/tensorflow/c/experimental/ops/gen/cpp/renderers/guard_renderer.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/c/experimental/ops/gen/cpp/renderers/guard_renderer.h" #include +#include #include "tensorflow/c/experimental/ops/gen/common/case_format.h" #include "tensorflow/c/experimental/ops/gen/cpp/renderers/renderer.h" diff --git a/tensorflow/c/experimental/ops/gen/cpp/renderers/include_renderer.cc b/tensorflow/c/experimental/ops/gen/cpp/renderers/include_renderer.cc index 0ec8108bee7aaf..5aea065a45dffc 100644 --- a/tensorflow/c/experimental/ops/gen/cpp/renderers/include_renderer.cc +++ b/tensorflow/c/experimental/ops/gen/cpp/renderers/include_renderer.cc @@ -14,6 +14,8 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/c/experimental/ops/gen/cpp/renderers/include_renderer.h" +#include + #include "tensorflow/c/experimental/ops/gen/cpp/renderers/renderer.h" #include "tensorflow/c/experimental/ops/gen/cpp/renderers/renderer_context.h" #include "tensorflow/core/platform/path.h" diff --git a/tensorflow/c/experimental/ops/gen/cpp/renderers/namespace_renderer.cc b/tensorflow/c/experimental/ops/gen/cpp/renderers/namespace_renderer.cc index b490cc7fe9e86a..96f317f6201286 100644 --- a/tensorflow/c/experimental/ops/gen/cpp/renderers/namespace_renderer.cc +++ b/tensorflow/c/experimental/ops/gen/cpp/renderers/namespace_renderer.cc @@ -14,6 +14,8 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/c/experimental/ops/gen/cpp/renderers/namespace_renderer.h" +#include + #include "tensorflow/c/experimental/ops/gen/cpp/renderers/renderer.h" #include "tensorflow/c/experimental/ops/gen/cpp/renderers/renderer_context.h" #include "tensorflow/core/platform/types.h" diff --git a/tensorflow/c/experimental/ops/gen/cpp/renderers/op_renderer.cc b/tensorflow/c/experimental/ops/gen/cpp/renderers/op_renderer.cc index 63cb5f30eb1d9d..766adae9a558a1 100644 --- a/tensorflow/c/experimental/ops/gen/cpp/renderers/op_renderer.cc +++ b/tensorflow/c/experimental/ops/gen/cpp/renderers/op_renderer.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/c/experimental/ops/gen/cpp/renderers/op_renderer.h" #include +#include #include #include "absl/strings/str_cat.h" diff --git a/tensorflow/c/experimental/ops/gen/cpp/renderers/renderer.cc b/tensorflow/c/experimental/ops/gen/cpp/renderers/renderer.cc index 6a608d759a3753..5acf000cd71169 100644 --- a/tensorflow/c/experimental/ops/gen/cpp/renderers/renderer.cc +++ b/tensorflow/c/experimental/ops/gen/cpp/renderers/renderer.cc @@ -14,9 +14,12 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/c/experimental/ops/gen/cpp/renderers/renderer.h" +#include + #include "absl/log/log.h" #include "absl/strings/match.h" #include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" #include "absl/strings/substitute.h" #include "tensorflow/c/experimental/ops/gen/cpp/renderers/renderer_context.h" #include "tensorflow/core/lib/strings/str_util.h" diff --git a/tensorflow/c/experimental/ops/gen/cpp/renderers/renderer_test.cc b/tensorflow/c/experimental/ops/gen/cpp/renderers/renderer_test.cc index 6621d1aea2c217..cdcbad089a556e 100644 --- a/tensorflow/c/experimental/ops/gen/cpp/renderers/renderer_test.cc +++ b/tensorflow/c/experimental/ops/gen/cpp/renderers/renderer_test.cc @@ -14,6 +14,8 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/c/experimental/ops/gen/cpp/renderers/renderer.h" +#include + #include "tensorflow/c/experimental/ops/gen/common/path_config.h" #include "tensorflow/c/experimental/ops/gen/common/source_code.h" #include "tensorflow/c/experimental/ops/gen/cpp/renderers/cpp_config.h" diff --git a/tensorflow/c/kernels/BUILD b/tensorflow/c/kernels/BUILD index 6e8dbc8512fa86..a7e93841a98627 100644 --- a/tensorflow/c/kernels/BUILD +++ b/tensorflow/c/kernels/BUILD @@ -117,6 +117,7 @@ tf_cc_test( "//tensorflow/core:testlib", "@com_google_absl//absl/container:inlined_vector", "@com_google_absl//absl/status", + "@com_google_absl//absl/strings", "@local_xla//xla/tsl/protobuf:error_codes_proto_impl_cc", ], ) diff --git a/tensorflow/c/kernels/bitcast_op_test.cc b/tensorflow/c/kernels/bitcast_op_test.cc index c44bc832547dab..e7ae841194f226 100644 --- a/tensorflow/c/kernels/bitcast_op_test.cc +++ b/tensorflow/c/kernels/bitcast_op_test.cc @@ -20,6 +20,7 @@ limitations under the License. #include "absl/container/inlined_vector.h" #include "absl/status/status.h" +#include "absl/strings/str_cat.h" #include "xla/tsl/protobuf/error_codes.pb.h" #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/attr_value_util.h" diff --git a/tensorflow/c/kernels/merge_summary_op.cc b/tensorflow/c/kernels/merge_summary_op.cc index ddbc3440d47dc1..9945f473874e20 100644 --- a/tensorflow/c/kernels/merge_summary_op.cc +++ b/tensorflow/c/kernels/merge_summary_op.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include #include +#include #include #include "absl/log/check.h" diff --git a/tensorflow/c/kernels_test.cc b/tensorflow/c/kernels_test.cc index b8b8b2f29cfe13..3064224e9b12bf 100644 --- a/tensorflow/c/kernels_test.cc +++ b/tensorflow/c/kernels_test.cc @@ -405,7 +405,7 @@ TEST_F(TestKernelAttr, String) { /*max_length*/ 5, status); EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); - EXPECT_EQ("bunny", string(static_cast(val.get()), 5)); + EXPECT_EQ("bunny", std::string(static_cast(val.get()), 5)); TF_DeleteStatus(status); return static_cast(s); }; @@ -421,7 +421,7 @@ TEST_F(TestKernelAttr, StringList) { s->created = true; s->compute_called = false; - std::vector list = {"bugs", "bunny", "duck"}; + std::vector list = {"bugs", "bunny", "duck"}; int list_total_size = 0; for (const auto& s : list) { list_total_size += s.size(); @@ -440,7 +440,8 @@ TEST_F(TestKernelAttr, StringList) { for (size_t i = 0; i < list.size(); ++i) { EXPECT_EQ(list[i].size(), lens[i]) << i; - EXPECT_EQ(list[i], string(static_cast(values[i]), lens[i])) + EXPECT_EQ(list[i], + std::string(static_cast(values[i]), lens[i])) << i; } TF_DeleteStatus(status); @@ -823,7 +824,7 @@ TEST(TestKernel, TestInputAndOutputCount) { TF_Status* s = TF_NewStatus(); TF_GetInput(ctx, 0, &input, s); EXPECT_EQ(TF_OK, TF_GetCode(s)) << "Failed to get input: " << TF_Message(s); - EXPECT_EQ(123, *static_cast(TF_TensorData(input))); + EXPECT_EQ(123, *static_cast(TF_TensorData(input))); TF_GetInput(ctx, -1, &input, s); EXPECT_EQ(TF_OUT_OF_RANGE, TF_GetCode(s)); TF_GetInput(ctx, 3, &input, s); @@ -866,7 +867,7 @@ TEST(TestKernel, TestInputAndOutputCount) { p.device = &dummy_device; p.step_id = 43; - Tensor t(tensorflow::uint8(123)); + Tensor t(uint8_t(123)); absl::InlinedVector inputs; // Simulate 2 inputs @@ -886,7 +887,7 @@ TEST(TestKernel, TestInputAndOutputCount) { ASSERT_EQ(2, num_inputs); ASSERT_EQ(1, num_outputs); - ASSERT_EQ(123, ctx.mutable_output(0)->scalar()()); + ASSERT_EQ(123, ctx.mutable_output(0)->scalar()()); } } diff --git a/tensorflow/c/python_api.cc b/tensorflow/c/python_api.cc index c2a4d73f8ad620..e49f5a099ee72d 100644 --- a/tensorflow/c/python_api.cc +++ b/tensorflow/c/python_api.cc @@ -84,7 +84,7 @@ std::string GetHandleShapeAndType(TF_Graph* graph, TF_Output output) { *out_shape_and_type->mutable_type() = p.type; } } - string result; + std::string result; handle_data.SerializeToString(&result); return result; } diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD index bfa665a09f7588..3131284b4802bd 100644 --- a/tensorflow/cc/BUILD +++ b/tensorflow/cc/BUILD @@ -359,6 +359,7 @@ cc_library( "//tensorflow/core:lib_proto_parsing", "//tensorflow/core:protos_all_cc", "@com_google_absl//absl/status", + "@com_google_absl//absl/strings", ], alwayslink = 1, ) diff --git a/tensorflow/cc/experimental/libexport/load.cc b/tensorflow/cc/experimental/libexport/load.cc index fd7f2d159e6166..670fa4f51f5cc1 100644 --- a/tensorflow/cc/experimental/libexport/load.cc +++ b/tensorflow/cc/experimental/libexport/load.cc @@ -31,8 +31,9 @@ using protobuf::RepeatedPtrField; absl::StatusOr TFPackage::Load(const std::string& path) { // Load the proto TFPackage tf_package; - const string saved_model_pb_path = io::JoinPath(path, kSavedModelFilenamePb); - const string saved_model_pbtxt_path = + const std::string saved_model_pb_path = + io::JoinPath(path, kSavedModelFilenamePb); + const std::string saved_model_pbtxt_path = io::JoinPath(path, kSavedModelFilenamePbTxt); if (Env::Default()->FileExists(saved_model_pb_path).ok()) { TF_RETURN_IF_ERROR(ReadBinaryProto(Env::Default(), saved_model_pb_path, diff --git a/tensorflow/cc/experimental/libexport/save_test.cc b/tensorflow/cc/experimental/libexport/save_test.cc index fbcc3c2e53b426..1a0ba4f0662a92 100644 --- a/tensorflow/cc/experimental/libexport/save_test.cc +++ b/tensorflow/cc/experimental/libexport/save_test.cc @@ -25,7 +25,7 @@ namespace libexport { namespace { TEST(SaveTest, TestDirectoryStructure) { - const string base_dir = tensorflow::io::JoinPath( + const std::string base_dir = tensorflow::io::JoinPath( tensorflow::testing::TmpDir(), "test_directory_structure"); TF_ASSERT_OK(Save(base_dir)); TF_ASSERT_OK(Env::Default()->IsDirectory(base_dir)); diff --git a/tensorflow/cc/framework/scope.cc b/tensorflow/cc/framework/scope.cc index 0185fd11d37dec..b457f602b4a5b0 100644 --- a/tensorflow/cc/framework/scope.cc +++ b/tensorflow/cc/framework/scope.cc @@ -79,7 +79,7 @@ Scope Scope::DisabledShapeInferenceScope() { /* disable_shape_inference */ true)); } -Scope::Impl::Impl(const Scope& other, Tags::ScopeName, const string& name, +Scope::Impl::Impl(const Scope& other, Tags::ScopeName, const std::string& name, bool copy_names) : graph_(other.impl()->graph_), status_(other.impl()->status_), @@ -98,8 +98,8 @@ Scope::Impl::Impl(const Scope& other, Tags::ScopeName, const string& name, colocation_constraints_(other.impl()->colocation_constraints_), disable_shape_inference_(other.impl()->disable_shape_inference_) {} -Scope::Impl::Impl(const Scope& other, Tags::OpName, const string& name, - const string& op_name) +Scope::Impl::Impl(const Scope& other, Tags::OpName, const std::string& name, + const std::string& op_name) : graph_(other.impl()->graph_), status_(other.impl()->status_), name_map_(other.impl()->name_map_), @@ -140,7 +140,7 @@ Scope::Impl::Impl(const Scope& other, Tags::ControlDeps, colocation_constraints_(other.impl()->colocation_constraints_), disable_shape_inference_(other.impl()->disable_shape_inference_) {} -Scope::Impl::Impl(const Scope& other, Tags::Device, const string& device) +Scope::Impl::Impl(const Scope& other, Tags::Device, const std::string& device) : graph_(other.impl()->graph_), status_(other.impl()->status_), name_map_(other.impl()->name_map_), @@ -158,7 +158,7 @@ Scope::Impl::Impl(const Scope& other, Tags::Device, const string& device) disable_shape_inference_(other.impl()->disable_shape_inference_) {} Scope::Impl::Impl(const Scope& other, Tags::SingleUseScope, - const string& op_name) + const std::string& op_name) : graph_(other.impl()->graph_), status_(other.impl()->status_), name_map_(other.impl()->name_map_), @@ -193,7 +193,7 @@ Scope::Impl::Impl(const Scope& other, Tags::ExitOnError) disable_shape_inference_(other.impl()->disable_shape_inference_) {} Scope::Impl::Impl(const Scope& other, Tags::KernelLabel, - const string& kernel_label) + const std::string& kernel_label) : graph_(other.impl()->graph_), status_(other.impl()->status_), name_map_(other.impl()->name_map_), @@ -227,12 +227,12 @@ Scope::Impl::Impl(const Scope& other, Tags::Colocate, xla_cluster_(other.impl()->xla_cluster_), colocation_constraints_( clear_colocations - ? std::unordered_set() + ? std::unordered_set() : other.impl()->GetColocationConstraints(colocate_with_op)), disable_shape_inference_(other.impl()->disable_shape_inference_) {} Scope::Impl::Impl(const Scope& other, Tags::AssignedDevice, - const string& assigned_device) + const std::string& assigned_device) : graph_(other.impl()->graph_), status_(other.impl()->status_), name_map_(other.impl()->name_map_), @@ -250,7 +250,7 @@ Scope::Impl::Impl(const Scope& other, Tags::AssignedDevice, disable_shape_inference_(other.impl()->disable_shape_inference_) {} Scope::Impl::Impl(const Scope& other, Tags::XlaCluster, - const string& xla_cluster) + const std::string& xla_cluster) : graph_(other.impl()->graph_), status_(other.impl()->status_), name_map_(other.impl()->name_map_), @@ -267,13 +267,13 @@ Scope::Impl::Impl(const Scope& other, Tags::XlaCluster, colocation_constraints_(other.impl()->colocation_constraints_), disable_shape_inference_(other.impl()->disable_shape_inference_) {} -std::unordered_set Scope::Impl::GetColocationConstraints( +std::unordered_set Scope::Impl::GetColocationConstraints( const Operation& colocate_with_op) const { - std::unordered_set current_constraints(colocation_constraints_); + std::unordered_set current_constraints(colocation_constraints_); const AttrSlice attrs = colocate_with_op.node()->attrs(); - std::vector node_constraints; + std::vector node_constraints; if (TryGetNodeAttr(attrs, kColocationAttrName, &node_constraints)) { - for (const string& entry : node_constraints) { + for (const std::string& entry : node_constraints) { absl::string_view s(entry); if (absl::ConsumePrefix(&s, kColocationGroupPrefix)) { current_constraints.emplace(s); @@ -335,13 +335,14 @@ void Scope::UpdateBuilder(NodeBuilder* builder) const { } if (!impl()->colocation_constraints_.empty()) { - std::vector constraints(impl()->colocation_constraints_.begin(), - impl()->colocation_constraints_.end()); + std::vector constraints( + impl()->colocation_constraints_.begin(), + impl()->colocation_constraints_.end()); // Sort the set. std::sort(constraints.begin(), constraints.end()); // Add loc:@ prefix std::transform(constraints.begin(), constraints.end(), constraints.begin(), - [](const string& s) { + [](const std::string& s) { return absl::StrCat(kColocationGroupPrefix, s); }); builder->Attr(kColocationAttrName, constraints); @@ -357,8 +358,8 @@ void Scope::UpdateBuilder(NodeBuilder* builder) const { } } -string Scope::Impl::GetUniqueName(const string& prefix, - bool check_single_use) const { +std::string Scope::Impl::GetUniqueName(const std::string& prefix, + bool check_single_use) const { if (check_single_use && single_use_scope()) { if (*scope_used_) { *status_ = @@ -373,7 +374,7 @@ string Scope::Impl::GetUniqueName(const string& prefix, name_map_->insert({prefix, 0}); return prefix; } - string unique_name; + std::string unique_name; do { unique_name = absl::StrCat(prefix, kSuffixSeparator, ++entry->second); } while (name_map_->find(unique_name) != name_map_->end()); @@ -381,15 +382,15 @@ string Scope::Impl::GetUniqueName(const string& prefix, return unique_name; } -string Scope::Impl::GetNameForOp(const string& default_name) const { - const string unique_name = +std::string Scope::Impl::GetNameForOp(const std::string& default_name) const { + const std::string unique_name = GetUniqueName(default_name, true /* check_single_use */); - const string sep = + const std::string sep = name_.empty() || unique_name.empty() ? "" : kScopeSeparator; return absl::StrCat(name_, sep, unique_name); } -string Scope::GetUniqueNameForOp(const string& default_name) const { +std::string Scope::GetUniqueNameForOp(const std::string& default_name) const { if (impl()->single_use_scope()) { if (impl()->op_name_.empty() || *impl()->scope_used_) { *impl()->status_ = @@ -403,21 +404,21 @@ string Scope::GetUniqueNameForOp(const string& default_name) const { : impl()->GetNameForOp(impl()->op_name_); } -Scope Scope::NewSubScope(const string& child_scope_name) const { +Scope Scope::NewSubScope(const std::string& child_scope_name) const { if (child_scope_name.empty()) { return Scope(new Impl(*this, Impl::Tags::ScopeName(), impl()->name_, true /* copy_names */)); } - const string unique_name = + const std::string unique_name = impl()->GetUniqueName(child_scope_name, false /* check_single_use */); - const string sep = + const std::string sep = impl()->name_.empty() || unique_name.empty() ? "" : kScopeSeparator; return Scope(new Impl(*this, Impl::Tags::ScopeName(), absl::StrCat(impl()->name_, sep, unique_name), false /* copy_names */)); } -Scope Scope::WithOpNameImpl(const string& op_name) const { +Scope Scope::WithOpNameImpl(const std::string& op_name) const { if (impl()->single_use_scope()) { UpdateStatus(errors::InvalidArgument("Cannot set op name ", op_name, " on this scope")); @@ -446,15 +447,15 @@ Scope Scope::WithNoControlDependencies() const { /* clear_control_deps */ true)); } -Scope Scope::WithDevice(const string& device) const { +Scope Scope::WithDevice(const std::string& device) const { return Scope(new Impl(*this, Impl::Tags::Device(), device)); } -Scope Scope::WithAssignedDevice(const string& assigned_device) const { +Scope Scope::WithAssignedDevice(const std::string& assigned_device) const { return Scope(new Impl(*this, Impl::Tags::AssignedDevice(), assigned_device)); } -Scope Scope::WithXlaCluster(const string& xla_cluster) const { +Scope Scope::WithXlaCluster(const std::string& xla_cluster) const { return Scope(new Impl(*this, Impl::Tags::XlaCluster(), xla_cluster)); } @@ -472,12 +473,12 @@ Scope Scope::ExitOnError() const { return Scope(new Impl(*this, Impl::Tags::ExitOnError())); } -Scope Scope::WithKernelLabel(const string& kernel_label) const { +Scope Scope::WithKernelLabel(const std::string& kernel_label) const { return Scope(new Impl(*this, Impl::Tags::KernelLabel(), kernel_label)); } CompositeOpScopes Scope::GetCompositeOpScopes( - const string& composite_op_name) const { + const std::string& composite_op_name) const { if (impl()->op_name_.empty() && composite_op_name.empty()) { UpdateStatus(errors::InvalidArgument( "Cannot create composite op scopes with empty name")); @@ -486,8 +487,9 @@ CompositeOpScopes Scope::GetCompositeOpScopes( if (!impl()->single_use_scope()) { Scope child = NewSubScope(impl()->op_name_.empty() ? composite_op_name : impl()->op_name_); - const string child_op_sep = impl()->name_.empty() ? "" : kSuffixSeparator; - const string child_name = + const std::string child_op_sep = + impl()->name_.empty() ? "" : kSuffixSeparator; + const std::string child_name = absl::StrCat(impl()->name_, child_op_sep, child.impl()->name_); return {child, Scope(new Impl(child, Impl::Tags::SingleUseScope(), child_name))}; @@ -510,11 +512,11 @@ class InternalScope { ShapeRefiner* refiner) { Scope::Impl::NameMap* name_map = new Scope::Impl::NameMap; for (const Node* node : graph->nodes()) { - const string& name = node->name(); + const std::string& name = node->name(); (*name_map)[name] = 0; // Add all name prefixes ('/' separated). size_t idx = -1; - while ((idx = name.find(kScopeSeparator, idx + 1)) != string::npos) { + while ((idx = name.find(kScopeSeparator, idx + 1)) != std::string::npos) { (*name_map)[name.substr(0, idx)] = 0; } } @@ -533,7 +535,7 @@ Scope NewInternalScope(Graph* graph, absl::Status* status, return InternalScope::NewScope(graph, status, refiner); } -absl::Status CreateOutputWithScope(string op_name, +absl::Status CreateOutputWithScope(std::string op_name, absl::Span inputs, const Scope& scope, Output* output) { TF_RETURN_IF_ERROR(scope.status()); diff --git a/tensorflow/cc/gradients/array_grad.cc b/tensorflow/cc/gradients/array_grad.cc index f3c3fd045a3d6f..f0189c60c714e1 100644 --- a/tensorflow/cc/gradients/array_grad.cc +++ b/tensorflow/cc/gradients/array_grad.cc @@ -14,9 +14,11 @@ limitations under the License. ==============================================================================*/ #include +#include #include #include "absl/status/status.h" +#include "absl/strings/str_cat.h" #include "tensorflow/cc/framework/grad_op_registry.h" #include "tensorflow/cc/framework/gradients.h" #include "tensorflow/cc/ops/array_ops_internal.h" diff --git a/tensorflow/cc/gradients/image_grad.cc b/tensorflow/cc/gradients/image_grad.cc index deb90eec264ee7..bb37c90b3f32a8 100644 --- a/tensorflow/cc/gradients/image_grad.cc +++ b/tensorflow/cc/gradients/image_grad.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include #include #include "absl/status/status.h" diff --git a/tensorflow/cc/gradients/math_grad.cc b/tensorflow/cc/gradients/math_grad.cc index c785af15f95447..af39009ad3f2a5 100644 --- a/tensorflow/cc/gradients/math_grad.cc +++ b/tensorflow/cc/gradients/math_grad.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include #include +#include #include #include "absl/status/status.h" diff --git a/tensorflow/cc/gradients/nn_grad.cc b/tensorflow/cc/gradients/nn_grad.cc index 6309080492c1da..9b980bd9e8321d 100644 --- a/tensorflow/cc/gradients/nn_grad.cc +++ b/tensorflow/cc/gradients/nn_grad.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include #include #include #include diff --git a/tensorflow/cc/saved_model/experimental/tests/BUILD b/tensorflow/cc/saved_model/experimental/tests/BUILD index 3270ca916e14a0..995f2a18d6979b 100644 --- a/tensorflow/cc/saved_model/experimental/tests/BUILD +++ b/tensorflow/cc/saved_model/experimental/tests/BUILD @@ -23,5 +23,6 @@ tf_cc_test( "//tensorflow/core:lib", "//tensorflow/core:test", "//tensorflow/core:test_main", + "@com_google_absl//absl/strings:string_view", ], ) diff --git a/tensorflow/cc/saved_model/experimental/tests/saved_model_api_test.cc b/tensorflow/cc/saved_model/experimental/tests/saved_model_api_test.cc index ac85bd728cb7e4..baa3b6be991076 100644 --- a/tensorflow/cc/saved_model/experimental/tests/saved_model_api_test.cc +++ b/tensorflow/cc/saved_model/experimental/tests/saved_model_api_test.cc @@ -19,6 +19,7 @@ limitations under the License. #include #include +#include "absl/strings/string_view.h" #include "tensorflow/c/tf_status.h" #include "tensorflow/cc/experimental/base/public/runtime.h" #include "tensorflow/cc/experimental/base/public/runtime_builder.h" diff --git a/tensorflow/cc/training/queue_runner.cc b/tensorflow/cc/training/queue_runner.cc index 1d23f9d87e2d7d..87b696edc39681 100644 --- a/tensorflow/cc/training/queue_runner.cc +++ b/tensorflow/cc/training/queue_runner.cc @@ -118,7 +118,7 @@ absl::Status QueueRunner::StartAndCollectCostGraph( absl::Status QueueRunner::Start(Session* sess, int wait_for) { counter_ = std::make_unique(runs_); - for (const string& enqueue_op : enqueue_op_names_) { + for (const std::string& enqueue_op : enqueue_op_names_) { thread_pool_->Schedule( std::bind(&QueueRunner::Run, this, sess, enqueue_op)); } @@ -182,7 +182,7 @@ void QueueRunner::UpdateStatus(const absl::Status& status) { } } -void QueueRunner::Run(Session* sess, const string& enqueue_op) { +void QueueRunner::Run(Session* sess, const std::string& enqueue_op) { bool first_iteration = true; absl::Status status; while (status.ok()) { @@ -245,7 +245,7 @@ void QueueRunner::SetRunArgumentsAndCostGraph(const RunOptions& run_options) { run_options_ = run_options; } -absl::Status QueueRunner::RealRun(Session* sess, const string& op, +absl::Status QueueRunner::RealRun(Session* sess, const std::string& op, bool update_costs) { absl::Status s; if (update_costs && cg_mu_) { diff --git a/tensorflow/cc/training/queue_runner.h b/tensorflow/cc/training/queue_runner.h index d5d6ca10a56821..ffba8987c6d518 100644 --- a/tensorflow/cc/training/queue_runner.h +++ b/tensorflow/cc/training/queue_runner.h @@ -97,7 +97,7 @@ class QueueRunner : public RunnerInterface { absl::Status Init(const QueueRunnerDef& queue_runner_def); // The Run function for each thread. - void Run(Session* sess, const string& enqueue_op); + void Run(Session* sess, const std::string& enqueue_op); // Updates the internal status; it only keeps OK or the first unexpected error // status. @@ -112,12 +112,12 @@ class QueueRunner : public RunnerInterface { void SetRunArgumentsAndCostGraph(const RunOptions& run_options); - absl::Status RealRun(Session* sess, const string& op, bool update_costs); + absl::Status RealRun(Session* sess, const std::string& op, bool update_costs); - string queue_name_; - std::vector enqueue_op_names_; - string close_op_name_; - string cancel_op_name_; + std::string queue_name_; + std::vector enqueue_op_names_; + std::string close_op_name_; + std::string cancel_op_name_; // code::Code casted to int to avoid a hash function. std::unordered_set queue_closed_exception_types_; diff --git a/tensorflow/compiler/aot/codegen.cc b/tensorflow/compiler/aot/codegen.cc index 87cb051b75df63..1042ff1fa7a896 100644 --- a/tensorflow/compiler/aot/codegen.cc +++ b/tensorflow/compiler/aot/codegen.cc @@ -1206,9 +1206,9 @@ absl::StatusOr GenerateConstantBuffersData( auto aot_thunk_result_temp, xla::cpu::CpuAotCompilationResult::FromString(serialized, nullptr)); - TF_ASSIGN_OR_RETURN( - auto executable, - std::move(*aot_thunk_result_temp).LoadExecutable(nullptr, nullptr)); + TF_ASSIGN_OR_RETURN(auto executable, + std::move(*aot_thunk_result_temp) + .LoadExecutable(/*stream_exec=*/nullptr)); xla::cpu::CpuExecutable* cpu_executable = tsl::down_cast(executable.get()); diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index 7c1772c084750c..91313abca45a24 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -65,7 +65,10 @@ filegroup( # Please use the individual targets in the deps list as needed. See b/336889334. cc_library( name = "jit", - visibility = internal_visibility([":legacy_jit_users"]), + visibility = internal_visibility([ + ":legacy_jit_users", + "//tensorflow/python/profiler:__pkg__", + ]), deps = [ ":xla_cpu_device", ":xla_cpu_jit", @@ -253,7 +256,6 @@ cc_library( hdrs = ["xla_device_context.h"], visibility = ["//visibility:public"], deps = [ - ":xla_launch_util", ":xla_tensor", "//tensorflow/compiler/tf2xla:common", "//tensorflow/compiler/tf2xla:layout_util", @@ -261,14 +263,29 @@ cc_library( "//tensorflow/compiler/tf2xla:xla_helpers", "//tensorflow/core:framework", "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", "//tensorflow/core:lib_proto_parsing", - "//tensorflow/core/common_runtime:device", "//tensorflow/core/common_runtime:dma_helper", "//tensorflow/core/framework:allocator", + "//tensorflow/core/platform:errors", + "//tensorflow/core/platform:status", + "@com_google_absl//absl/log", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings:string_view", "@com_google_absl//absl/synchronization", - "@local_tsl//tsl/platform:statusor", - "@local_xla//xla:util", + "@local_xla//xla:literal", + "@local_xla//xla:shape_util", + "@local_xla//xla:status_macros", "@local_xla//xla/client:local_client", + "@local_xla//xla/service:stream_pool", + "@local_xla//xla/stream_executor:allocator_stats", + "@local_xla//xla/stream_executor:event", + "@local_xla//xla/stream_executor:stream", + "@local_xla//xla/stream_executor:stream_executor_h", + "@local_xla//xla/tsl/platform:errors", + "@local_xla//xla/tsl/platform:statusor", ], ) @@ -682,7 +699,9 @@ tf_cc_test( name = "xla_launch_util_test", srcs = ["xla_launch_util_test.cc"], deps = [ + ":device_compilation_profiler", ":device_compiler", + ":device_executable_persistor", ":flags_headers", ":pjrt_device_compiler_client", ":variable_info", @@ -691,25 +710,35 @@ tf_cc_test( ":xla_cpu_jit", ":xla_device_no_jit_rewrite_registration", ":xla_launch_util", + "//tensorflow/compiler/tf2xla:xla_compiler", "//tensorflow/compiler/tf2xla:xla_op_registry", "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:portable_gif_internal", + "//tensorflow/core:session_options", "//tensorflow/core/framework:fake_input", "//tensorflow/core/framework:tensor_testutil", "//tensorflow/core/kernels:ops_testutil", "//tensorflow/core/platform:refcount", "//tensorflow/core/tfrt/common:create_pjrt_client_util", "//tensorflow/core/tfrt/common:pjrt_util", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/types:span", "@com_google_googletest//:gtest_main", - "@local_tsl//tsl/platform:status", - "@local_tsl//tsl/platform:statusor", + "@local_xla//xla:literal", + "@local_xla//xla:literal_util", "@local_xla//xla/pjrt:pjrt_client", "@local_xla//xla/pjrt:pjrt_common", + "@local_xla//xla/pjrt:pjrt_executable", "@local_xla//xla/pjrt/plugin/xla_cpu:cpu_client_options", "@local_xla//xla/pjrt/plugin/xla_cpu:xla_cpu_pjrt_client", "@local_xla//xla/tests:literal_test_util", "@local_xla//xla/tsl/framework:device_id_utils", "@local_xla//xla/tsl/lib/core:status_test_util", + "@local_xla//xla/tsl/platform:statusor", ], ) @@ -976,8 +1005,7 @@ tf_cc_test( "//tensorflow/core:session_options", "//tensorflow/core:test", "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - "@com_google_absl//absl/memory", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/status", ], ) @@ -1057,6 +1085,9 @@ cc_library( "//tensorflow/core:core_cpu_base", "//tensorflow/core:framework", "//tensorflow/core:lib", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings", "@local_xla//xla:status_macros", ], ) @@ -1075,11 +1106,10 @@ tf_cc_test( "//tensorflow/core:core_cpu_base", "//tensorflow/core:framework", "//tensorflow/core:ops", - "//tensorflow/core:portable_gif_internal", "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core/kernels:constant_op", - "@local_tsl//tsl/platform:status", + "@com_google_absl//absl/log:check", ], ) @@ -1106,12 +1136,15 @@ tf_cc_test( deps = [ ":encapsulate_util", "//tensorflow/cc:cc_ops", + "//tensorflow/cc:ops", "//tensorflow/cc:scope", + "//tensorflow/core:core_cpu_base", "//tensorflow/core:framework", "//tensorflow/core:ops", "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", "//tensorflow/core:test_main", + "@com_google_absl//absl/log:check", ], ) @@ -1216,11 +1249,16 @@ cc_library( "//tensorflow/core:graph", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", + "//tensorflow/core/common_runtime:function_body", "//tensorflow/core/framework:bounds_check", + "//tensorflow/core/platform:hash", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/container:inlined_vector", + "@com_google_absl//absl/log", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", @@ -1363,6 +1401,9 @@ tf_cc_test( "//tensorflow/core:testlib", "//tensorflow/core/common_runtime:device_set", "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/log", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/memory", "@com_google_absl//absl/status", "@com_google_absl//absl/strings", @@ -1414,6 +1455,8 @@ cc_library( "//tensorflow/core:graph", "//tensorflow/core:protos_all_cc", "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/log", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:span", @@ -1450,7 +1493,7 @@ cc_library( ":xla_activity_proto_cc", ":xla_cluster_util", "//tensorflow/compiler/tf2xla:resource_operation_table", - "//tensorflow/compiler/tf2xla:tf2xla_util", + "//tensorflow/compiler/tf2xla:tf2xla_defs", "//tensorflow/compiler/tf2xla:xla_compiler", "//tensorflow/compiler/tf2xla:xla_op_registry", "//tensorflow/core:core_cpu", @@ -1458,12 +1501,17 @@ cc_library( "//tensorflow/core:graph", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", + "//tensorflow/core/common_runtime:function_body", + "//tensorflow/core/common_runtime:function_utils", "@com_google_absl//absl/algorithm:container", - "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/log", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", + "@com_google_absl//absl/types:span", "@local_xla//xla:union_find", "@local_xla//xla:util", "@local_xla//xla/service/graphcycles", @@ -1474,6 +1522,7 @@ tf_cc_test( name = "compilability_check_util_test", srcs = ["compilability_check_util_test.cc"], deps = [ + ":common", ":compilability_check_util", ":xla_cpu_device", ":xla_cpu_jit", @@ -1482,17 +1531,17 @@ tf_cc_test( "//tensorflow/cc:functional_ops", "//tensorflow/cc:ops", "//tensorflow/cc:scope", - "//tensorflow/compiler/tf2xla:test_util", "//tensorflow/compiler/tf2xla:xla_compiler", - "//tensorflow/compiler/tf2xla/cc:xla_jit_ops", - "//tensorflow/compiler/tf2xla/cc:xla_ops", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", + "//tensorflow/core:lib", "//tensorflow/core:ops", "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", "//tensorflow/core:test_main", - "@com_google_absl//absl/memory", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:span", ], ) @@ -2052,12 +2101,15 @@ tf_cuda_cc_test( srcs = ["xla_platform_info_test.cc"], tags = tf_cuda_tests_tags() + ["config-cuda-only"], deps = [ + ":device_compilation_profiler", + ":device_compiler", ":flags_headers", ":test_util", ":xla_device_no_jit_rewrite_registration", ":xla_gpu_device", ":xla_gpu_jit", "//tensorflow/compiler/tf2xla:layout_util", + "//tensorflow/compiler/tf2xla:xla_op_registry", "//tensorflow/core:framework", "//tensorflow/core:framework_types_hdr", "//tensorflow/core:lib_proto_parsing", @@ -2069,8 +2121,12 @@ tf_cuda_cc_test( "//tensorflow/core/tfrt/common:create_pjrt_client_util", "//tensorflow/core/tfrt/common:pjrt_util", "//tensorflow/core/tpu:tpu_defs", + "@com_google_absl//absl/log:check", "@com_google_googletest//:gtest_main", + "@local_xla//xla/client:local_client", + "@local_xla//xla/pjrt:pjrt_client", "@local_xla//xla/pjrt/plugin/xla_cpu:cpu_client_options", "@local_xla//xla/pjrt/plugin/xla_cpu:xla_cpu_pjrt_client", + "@local_xla//xla/tsl/platform:statusor", ], ) diff --git a/tensorflow/compiler/jit/compilability_check_util.cc b/tensorflow/compiler/jit/compilability_check_util.cc index 6c77648817f808..8da8b2055c6c2b 100644 --- a/tensorflow/compiler/jit/compilability_check_util.cc +++ b/tensorflow/compiler/jit/compilability_check_util.cc @@ -15,52 +15,40 @@ limitations under the License. #include "tensorflow/compiler/jit/compilability_check_util.h" -#include -#include -#include +#include #include -#include #include -#include -#include #include +#include #include "absl/algorithm/container.h" -#include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/status/status.h" #include "absl/status/statusor.h" #include "absl/strings/str_cat.h" -#include "absl/strings/str_join.h" #include "absl/strings/string_view.h" +#include "absl/types/span.h" #include "tensorflow/compiler/jit/defs.h" -#include "tensorflow/compiler/jit/device_util.h" -#include "tensorflow/compiler/jit/flags.h" -#include "tensorflow/compiler/jit/resource_operation_safety_analysis.h" #include "tensorflow/compiler/jit/xla_activity.pb.h" #include "tensorflow/compiler/jit/xla_activity_listener.h" #include "tensorflow/compiler/jit/xla_cluster_util.h" #include "tensorflow/compiler/tf2xla/const_analysis.h" -#include "tensorflow/compiler/tf2xla/resource_operation_table.h" -#include "tensorflow/compiler/tf2xla/tf2xla_util.h" +#include "tensorflow/compiler/tf2xla/tf2xla_defs.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" -#include "xla/service/graphcycles/graphcycles.h" -#include "xla/union_find.h" -#include "xla/util.h" -#include "tensorflow/core/common_runtime/function.h" -#include "tensorflow/core/common_runtime/graph_constructor.h" +#include "xla/tsl/platform/errors.h" +#include "tensorflow/core/common_runtime/function_body.h" +#include "tensorflow/core/common_runtime/function_utils.h" #include "tensorflow/core/framework/attr_value.pb.h" -#include "tensorflow/core/framework/bounds_check.h" #include "tensorflow/core/framework/graph_def_util.h" #include "tensorflow/core/framework/memory_types.h" #include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/node_def_util.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/graph/algorithm.h" -#include "tensorflow/core/graph/control_flow.h" +#include "tensorflow/core/graph/graph.h" #include "tensorflow/core/lib/gtl/cleanup.h" -#include "tensorflow/core/lib/strings/stringprintf.h" -#include "tensorflow/core/public/version.h" -#include "tensorflow/core/util/dump_graph.h" namespace tensorflow { @@ -341,8 +329,8 @@ bool RecursiveCompilabilityChecker::IsCompilableCall( return false; } - auto release_handle_on_return = gtl::MakeCleanup( - [&] { TF_CHECK_OK(lib_runtime->ReleaseHandle(handle)); }); + auto release_handle_on_return = + gtl::MakeCleanup([&] { CHECK_OK(lib_runtime->ReleaseHandle(handle)); }); const FunctionBody* fbody = lib_runtime->GetFunctionBody(handle); bool is_compilable = true; for (const Node* node : fbody->graph->op_nodes()) { diff --git a/tensorflow/compiler/jit/compilability_check_util_test.cc b/tensorflow/compiler/jit/compilability_check_util_test.cc index ea24176bb04a4a..185afab797ee1e 100644 --- a/tensorflow/compiler/jit/compilability_check_util_test.cc +++ b/tensorflow/compiler/jit/compilability_check_util_test.cc @@ -15,21 +15,32 @@ limitations under the License. #include "tensorflow/compiler/jit/compilability_check_util.h" -#include "absl/memory/memory.h" +#include +#include +#include + +#include "absl/log/check.h" +#include "absl/strings/match.h" +#include "absl/types/span.h" +#include "tensorflow/cc/framework/ops.h" #include "tensorflow/cc/framework/scope.h" -#include "tensorflow/cc/ops/function_ops.h" +#include "tensorflow/cc/ops/array_ops.h" #include "tensorflow/cc/ops/functional_ops.h" -#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/compiler/jit/defs.h" #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" +#include "xla/tsl/lib/core/status_test_util.h" #include "tensorflow/core/common_runtime/graph_def_builder_util.h" #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/function.h" -#include "tensorflow/core/framework/graph_to_functiondef.h" #include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/types.h" #include "tensorflow/core/graph/graph_def_builder.h" -#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/public/version.h" namespace tensorflow { namespace { @@ -260,7 +271,7 @@ TEST_F(CompilabilityCheckUtilTest, CheckFunctionalWhileNode) { GraphDef graph_def; TF_EXPECT_OK(builder.ToGraphDef(&graph_def)); std::unique_ptr graph(new Graph(flib_def_.get())); - TF_CHECK_OK(GraphDefBuilderToGraph(builder, graph.get())); + CHECK_OK(GraphDefBuilderToGraph(builder, graph.get())); auto while_node_it = std::find_if( graph->nodes().begin(), graph->nodes().end(), diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc index 94b136a02b99cf..776ec3915e2f73 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc @@ -15,26 +15,52 @@ limitations under the License. #include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h" +#include +#include +#include #include +#include +#include +#include #include +#include +#include "absl/log/check.h" +#include "absl/status/status.h" #include "absl/strings/match.h" #include "absl/strings/str_cat.h" +#include "absl/types/span.h" #include "tensorflow/cc/framework/ops.h" -#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/cc/framework/scope.h" +#include "tensorflow/cc/ops/array_ops.h" +#include "tensorflow/cc/ops/const_op.h" +#include "tensorflow/cc/ops/math_ops.h" +#include "tensorflow/cc/ops/state_ops.h" #include "tensorflow/compiler/jit/encapsulate_util.h" #include "tensorflow/compiler/jit/extract_outside_compilation_pass.h" #include "tensorflow/compiler/jit/test_util.h" #include "tensorflow/compiler/tf2xla/side_effect_util.h" -#include "tensorflow/core/common_runtime/device_factory.h" -#include "tensorflow/core/common_runtime/function.h" +#include "xla/tsl/lib/core/status_test_util.h" +#include "xla/tsl/platform/errors.h" +#include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/common_runtime/graph_constructor.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/device.h" +#include "tensorflow/core/framework/device_factory.h" +#include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/function_testlib.h" #include "tensorflow/core/framework/graph_to_functiondef.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/graph/graph.h" #include "tensorflow/core/graph/graph_def_builder.h" +#include "tensorflow/core/graph/node_builder.h" #include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/public/session_options.h" #include "tensorflow/core/public/version.h" #include "tensorflow/core/util/equal_graph_def.h" @@ -499,7 +525,7 @@ absl::Status Encapsulate( // Create FunctionLibraryRuntime. SessionOptions session_options; std::vector> devices; - TF_CHECK_OK(DeviceFactory::AddDevices( + CHECK_OK(DeviceFactory::AddDevices( session_options, "/job:localhost/replica:0/task:0", &devices)); OptimizerOptions opts; auto device_mgr = std::make_unique(std::move(devices)); diff --git a/tensorflow/compiler/jit/encapsulate_util_test.cc b/tensorflow/compiler/jit/encapsulate_util_test.cc index 6d1661222e3eaf..4d2b71327b3250 100644 --- a/tensorflow/compiler/jit/encapsulate_util_test.cc +++ b/tensorflow/compiler/jit/encapsulate_util_test.cc @@ -15,12 +15,19 @@ limitations under the License. #include "tensorflow/compiler/jit/encapsulate_util.h" +#include + +#include "absl/log/check.h" +#include "tensorflow/cc/framework/ops.h" #include "tensorflow/cc/framework/scope.h" #include "tensorflow/cc/ops/array_ops.h" -#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/cc/ops/const_op.h" +#include "tensorflow/cc/ops/math_ops.h" #include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_shape.pb.h" +#include "tensorflow/core/graph/graph.h" #include "tensorflow/core/platform/test.h" namespace tensorflow { @@ -35,16 +42,16 @@ TEST(PerformStaticShapeInferenceBeforeEncapsulationTest, Basic) { Output add = ops::Add(s.WithOpName("add"), const_0, const_1); Output identity = ops::Identity(s.WithOpName("identity"), add); Graph g(OpRegistry::Global()); - TF_CHECK_OK(s.ToGraph(&g)); + CHECK_OK(s.ToGraph(&g)); - TF_CHECK_OK(PerformStaticShapeInferenceBeforeEncapsulation(&g)); + CHECK_OK(PerformStaticShapeInferenceBeforeEncapsulation(&g)); // Check that "add" node now has _xla_inferred_shapes attr. auto node_index = g.BuildNodeNameIndex(); Node *add_node = node_index["add"]; std::vector output_shapes; - TF_CHECK_OK(GetNodeAttr(add_node->attrs(), kXlaInferredShapesAttrName, - &output_shapes)); + CHECK_OK(GetNodeAttr(add_node->attrs(), kXlaInferredShapesAttrName, + &output_shapes)); EXPECT_EQ(output_shapes.size(), 1); TensorShapeProto shape_proto; output_shapes[0].AsProto(&shape_proto); diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc b/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc index acd5319cf8ed16..6b0570b704e2d7 100644 --- a/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc +++ b/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc @@ -15,19 +15,31 @@ limitations under the License. #include "tensorflow/compiler/jit/encapsulate_xla_computations_pass.h" +#include +#include +#include +#include + +#include "absl/log/check.h" +#include "absl/status/status.h" +#include "tensorflow/cc/framework/ops.h" +#include "tensorflow/cc/framework/scope.h" +#include "tensorflow/cc/ops/array_ops.h" #include "tensorflow/cc/ops/function_ops.h" +#include "tensorflow/cc/ops/math_ops.h" #include "tensorflow/cc/ops/resource_variable_ops.h" -#include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/compiler/jit/defs.h" -#include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h" #include "tensorflow/compiler/jit/xla_cluster_util.h" #include "tensorflow/compiler/tf2xla/cc/ops/xla_jit_ops.h" #include "tensorflow/compiler/tf2xla/test_util.h" +#include "xla/tsl/lib/core/status_test_util.h" #include "tensorflow/core/common_runtime/graph_constructor.h" +#include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/graph_to_functiondef.h" -#include "tensorflow/core/lib/core/status_test_util.h" -#include "tensorflow/core/lib/hash/hash.h" -#include "tensorflow/core/lib/strings/proto_serialization.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/graph/graph.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/util/equal_graph_def.h" @@ -47,23 +59,23 @@ static std::unique_ptr MakeOuterGraph( auto w = ops::Placeholder(scope.WithOpName("W"), DT_RESOURCE); NodeDef def; - TF_CHECK_OK(NodeDefBuilder("launch0", function, &flib_def) - .Input(a.node()->name(), 0, DT_INT32) - .Input(b.node()->name(), 0, DT_FLOAT) - .Input(c.node()->name(), 0, DT_INT32) - .Input(d.node()->name(), 0, DT_FLOAT) - .Input(u.node()->name(), 0, DT_RESOURCE) - .Input(v.node()->name(), 0, DT_RESOURCE) - .Input(w.node()->name(), 0, DT_RESOURCE) - .Device("/gpu:0") - .Attr(kXlaClusterIdAttr, "launch0") - .Attr("_variable_start_index", 4) - .Finalize(&def)); + CHECK_OK(NodeDefBuilder("launch0", function, &flib_def) + .Input(a.node()->name(), 0, DT_INT32) + .Input(b.node()->name(), 0, DT_FLOAT) + .Input(c.node()->name(), 0, DT_INT32) + .Input(d.node()->name(), 0, DT_FLOAT) + .Input(u.node()->name(), 0, DT_RESOURCE) + .Input(v.node()->name(), 0, DT_RESOURCE) + .Input(w.node()->name(), 0, DT_RESOURCE) + .Device("/gpu:0") + .Attr(kXlaClusterIdAttr, "launch0") + .Attr("_variable_start_index", 4) + .Finalize(&def)); absl::Status status; Node* launch = scope.graph()->AddNode(def, &status); - TF_CHECK_OK(status); - TF_CHECK_OK(scope.DoShapeInference(launch)); + CHECK_OK(status); + CHECK_OK(scope.DoShapeInference(launch)); scope.graph()->AddEdge(a.node(), 0, launch, 0); scope.graph()->AddEdge(b.node(), 0, launch, 1); scope.graph()->AddEdge(c.node(), 0, launch, 2); @@ -89,7 +101,7 @@ static std::unique_ptr MakeOuterGraph( auto consumer3 = ops::Identity(scope.WithOpName("consumer3"), out3); std::unique_ptr graph(new Graph(OpRegistry::Global())); - TF_CHECK_OK(scope.ToGraph(graph.get())); + CHECK_OK(scope.ToGraph(graph.get())); return graph; } @@ -135,7 +147,7 @@ static std::unique_ptr MakeBodyGraph() { ops::_Retval(scope.WithOpName("readu_0_retval_RetVal"), read_u, 3); std::unique_ptr graph(new Graph(OpRegistry::Global())); - TF_CHECK_OK(scope.ToGraph(graph.get())); + CHECK_OK(scope.ToGraph(graph.get())); return graph; } @@ -160,7 +172,7 @@ TEST(EncapsulateXlaComputations, DeterministicEncapsulate) { }; add_attrs(e.node()); - TF_CHECK_OK(scope.ToGraph(graph.get())); + CHECK_OK(scope.ToGraph(graph.get())); auto get_node_in_graph = [&graph](Node* node) { return graph->FindNodeId(node->id()); }; @@ -178,7 +190,7 @@ TEST(EncapsulateXlaComputations, DeterministicEncapsulate) { get_node_in_graph(e.node()), true); } } - TF_CHECK_OK(EncapsulateXlaComputationsPass::Encapsulate(&graph, &flib_def)); + CHECK_OK(EncapsulateXlaComputationsPass::Encapsulate(&graph, &flib_def)); return SerializeGraphDeterministic(*graph).value(); }; diff --git a/tensorflow/compiler/jit/extract_outside_compilation_pass_test.cc b/tensorflow/compiler/jit/extract_outside_compilation_pass_test.cc index 1a6441a80726a0..aa6ad2e4eeed8c 100644 --- a/tensorflow/compiler/jit/extract_outside_compilation_pass_test.cc +++ b/tensorflow/compiler/jit/extract_outside_compilation_pass_test.cc @@ -15,22 +15,39 @@ limitations under the License. #include "tensorflow/compiler/jit/extract_outside_compilation_pass.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#include "absl/log/check.h" +#include "absl/status/status.h" #include "absl/strings/match.h" +#include "tensorflow/cc/framework/ops.h" #include "tensorflow/cc/framework/scope.h" #include "tensorflow/cc/ops/array_ops.h" +#include "tensorflow/cc/ops/const_op.h" #include "tensorflow/cc/ops/function_ops.h" #include "tensorflow/cc/ops/functional_ops.h" -#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/cc/ops/math_ops.h" #include "tensorflow/compiler/jit/encapsulate_util.h" #include "xla/hlo/testlib/test.h" -#include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/common_runtime/function.h" -#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/device_factory.h" #include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/graph_to_functiondef.h" +#include "tensorflow/core/framework/node_def_builder.h" #include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_shape.pb.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/public/session_options.h" #include "tensorflow/core/public/version.h" @@ -50,7 +67,7 @@ TEST(RewriteOutsideCompilationSubgraphFnTest, Basic) { auto ret0 = ops::_Retval(s.WithOpName("ret0"), add, 0); auto ret1 = ops::_Retval(s.WithOpName("ret1"), arg1, 1); std::unique_ptr g(new Graph(OpRegistry::Global())); - TF_CHECK_OK(s.ToGraph(g.get())); + CHECK_OK(s.ToGraph(g.get())); auto node_name_image = g->BuildNodeNameIndex(); Node *add_node = node_name_image["add"]; EXPECT_NE(add_node, nullptr); @@ -61,7 +78,7 @@ TEST(RewriteOutsideCompilationSubgraphFnTest, Basic) { std::vector arg_source_tensors; NodeDef call_node_def; call_node_def.set_op("0"); - TF_CHECK_OK( + CHECK_OK( rewrite_fn(arg_source_tensors, &g, nullptr, nullptr, &call_node_def)); node_name_image = g->BuildNodeNameIndex(); @@ -75,7 +92,7 @@ TEST(RewriteOutsideCompilationSubgraphFnTest, Basic) { Node *recv_at_host = node_name_image["outside_compilation_cluster__0_recv"]; EXPECT_NE(recv_at_host, nullptr); std::vector recv_at_host_dtypes; - TF_CHECK_OK( + CHECK_OK( GetNodeAttr(recv_at_host->attrs(), "Toutputs", &recv_at_host_dtypes)); EXPECT_EQ(recv_at_host_dtypes.size(), 3); EXPECT_EQ(recv_at_host_dtypes[0], DT_INT32); @@ -88,7 +105,7 @@ TEST(RewriteOutsideCompilationSubgraphFnTest, Basic) { Node *send_from_host = node_name_image["outside_compilation_cluster__0_send"]; EXPECT_NE(send_from_host, nullptr); std::vector send_from_host_dtypes; - TF_CHECK_OK( + CHECK_OK( GetNodeAttr(send_from_host->attrs(), "Tinputs", &send_from_host_dtypes)); EXPECT_EQ(send_from_host_dtypes.size(), 2); EXPECT_EQ(send_from_host_dtypes[0], DT_INT32); @@ -115,8 +132,8 @@ TEST(RewriteOutsideCompilationSubgraphFnTest, Basic) { EXPECT_TRUE(has_control_edge_to_send_from_host); // Verify step 7: necessary attrs added to call_node_def. NameAttrList shape_inference_graph; - TF_CHECK_OK(GetNodeAttr(AttrSlice(&call_node_def.attr()), - "shape_inference_graph", &shape_inference_graph)); + CHECK_OK(GetNodeAttr(AttrSlice(&call_node_def.attr()), + "shape_inference_graph", &shape_inference_graph)); EXPECT_EQ(shape_inference_graph.name(), "_outside_compilation_shape_inference_cluster__0"); } @@ -126,13 +143,13 @@ TEST(RewriteOutsideCompilationSubgraphFnTest, NoSendFromHost) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output arg0 = ops::_Arg(s.WithOpName("arg0"), DT_INT32, 0); std::unique_ptr g(new Graph(OpRegistry::Global())); - TF_CHECK_OK(s.ToGraph(g.get())); + CHECK_OK(s.ToGraph(g.get())); RewriteOutsideCompilationSubgraphFn rewrite_fn("_xla", "_oc", "cluster", ""); std::vector arg_source_tensors; NodeDef call_node_def; call_node_def.set_op("0"); - TF_CHECK_OK( + CHECK_OK( rewrite_fn(arg_source_tensors, &g, nullptr, nullptr, &call_node_def)); auto node_name_image = g->BuildNodeNameIndex(); @@ -152,13 +169,13 @@ TEST(RewriteOutsideCompilationSubgraphFnTest, NoRecvAtHost) { Output const0 = ops::Const(s.WithOpName("const0"), 1, {2}); auto ret = ops::_Retval(s.WithOpName("ret"), const0, 0); std::unique_ptr g(new Graph(OpRegistry::Global())); - TF_CHECK_OK(s.ToGraph(g.get())); + CHECK_OK(s.ToGraph(g.get())); RewriteOutsideCompilationSubgraphFn rewrite_fn("_xla", "_oc", "cluster", ""); std::vector arg_source_tensors; NodeDef call_node_def; call_node_def.set_op("0"); - TF_CHECK_OK( + CHECK_OK( rewrite_fn(arg_source_tensors, &g, nullptr, nullptr, &call_node_def)); auto node_name_image = g->BuildNodeNameIndex(); @@ -176,13 +193,13 @@ TEST(RewriteOutsideCompilationSubgraphFnTest, NoKeyPlaceholder) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output const0 = ops::Const(s.WithOpName("const0"), 1, {2}); std::unique_ptr g(new Graph(OpRegistry::Global())); - TF_CHECK_OK(s.ToGraph(g.get())); + CHECK_OK(s.ToGraph(g.get())); RewriteOutsideCompilationSubgraphFn rewrite_fn("_xla", "_oc", "cluster", ""); std::vector arg_source_tensors; NodeDef call_node_def; call_node_def.set_op("0"); - TF_CHECK_OK( + CHECK_OK( rewrite_fn(arg_source_tensors, &g, nullptr, nullptr, &call_node_def)); auto node_name_image = g->BuildNodeNameIndex(); @@ -202,7 +219,7 @@ TEST(RewriteOutsideCompilationSubgraphFnTest, ShapesInferred) { Output const0 = ops::Const(s.WithOpName("const0"), 1, {2}); auto ret = ops::_Retval(s.WithOpName("ret"), const0, 0); std::unique_ptr g(new Graph(OpRegistry::Global())); - TF_CHECK_OK(s.ToGraph(g.get())); + CHECK_OK(s.ToGraph(g.get())); auto node_name_image = g->BuildNodeNameIndex(); Node *const0_node = node_name_image["const0"]; EXPECT_NE(const0_node, nullptr); @@ -214,13 +231,13 @@ TEST(RewriteOutsideCompilationSubgraphFnTest, ShapesInferred) { std::vector arg_source_tensors; NodeDef call_node_def; call_node_def.set_op("0"); - TF_CHECK_OK( + CHECK_OK( rewrite_fn(arg_source_tensors, &g, nullptr, nullptr, &call_node_def)); node_name_image = g->BuildNodeNameIndex(); // Check "shape" attr is available in call_node_def. std::vector shapes; - TF_CHECK_OK(GetNodeAttr(AttrSlice(&call_node_def.attr()), "shapes", &shapes)); + CHECK_OK(GetNodeAttr(AttrSlice(&call_node_def.attr()), "shapes", &shapes)); EXPECT_EQ(shapes.size(), 1); EXPECT_EQ(shapes[0].dim_size(), 1); } @@ -230,7 +247,7 @@ class ExtractOutsideCompilationForFunctionTest : public ::testing::Test { void SetUp() override { SessionOptions session_options; std::vector> devices; - TF_CHECK_OK(DeviceFactory::AddDevices( + CHECK_OK(DeviceFactory::AddDevices( session_options, "/job:localhost/replica:0/task:0", &devices)); device_mgr_ = std::make_unique(std::move(devices)); } @@ -275,7 +292,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, Basic) { Output identity1 = ops::Identity(s.WithOpName("identity1"), identity0); Output identity2 = ops::Identity(s.WithOpName("identity2"), identity1); std::unique_ptr g(new Graph(OpRegistry::Global())); - TF_CHECK_OK(s.ToGraph(g.get())); + CHECK_OK(s.ToGraph(g.get())); auto node_name_image = g->BuildNodeNameIndex(); node_name_image["identity0"]->AddAttr("_oc", "0"); node_name_image["identity1"]->AddAttr("_oc", "1"); @@ -284,7 +301,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, Basic) { kXlaInferredShapesAttrName, std::vector{shape}); FunctionDef *xla_fdef = fdl.add_function(); - TF_CHECK_OK(GraphToFunctionDef(*g, "cluster", xla_fdef)); + CHECK_OK(GraphToFunctionDef(*g, "cluster", xla_fdef)); } FunctionLibraryDefinition fld(OpRegistry::Global(), fdl); @@ -295,15 +312,15 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, Basic) { NameAttrList name_attrs; name_attrs.set_name("cluster"); *name_attrs.mutable_attr() = attrs; - TF_CHECK_OK(ExtractOutsideCompilationTest( + CHECK_OK(ExtractOutsideCompilationTest( "_xla", "_oc", "cluster", name_attrs, "cluster_rewritten", "host_graph", host_compute_core, &fld, &shape_inference_graphs, &has_outside_compilation)); // Get rewritten XLA computation function. std::unique_ptr xla_fbody; - TF_CHECK_OK(FunctionDefToBodyHelper(*fld.Find("cluster_rewritten"), - AttrSlice(), &fld, &xla_fbody)); + CHECK_OK(FunctionDefToBodyHelper(*fld.Find("cluster_rewritten"), AttrSlice(), + &fld, &xla_fbody)); auto node_name_index = xla_fbody->graph->BuildNodeNameIndex(); // Check XlaHostCompute nodes. @@ -313,26 +330,26 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, Basic) { EXPECT_NE(host_compute_1, nullptr); // Check XlaHostCompute nodes' "tpu_core" attr. int tpu_core; - TF_CHECK_OK(GetNodeAttr(host_compute_0->attrs(), "tpu_core", &tpu_core)); + CHECK_OK(GetNodeAttr(host_compute_0->attrs(), "tpu_core", &tpu_core)); EXPECT_EQ(tpu_core, 1); - TF_CHECK_OK(GetNodeAttr(host_compute_1->attrs(), "tpu_core", &tpu_core)); + CHECK_OK(GetNodeAttr(host_compute_1->attrs(), "tpu_core", &tpu_core)); EXPECT_EQ(tpu_core, 0); // Check XlaHostCompute nodes' "shapes" attr. "0" should not have shapes, and // "1" should have shapes. std::vector shapes; - TF_CHECK_OK(GetNodeAttr(host_compute_0->attrs(), "shapes", &shapes)); + CHECK_OK(GetNodeAttr(host_compute_0->attrs(), "shapes", &shapes)); EXPECT_EQ(shapes.size(), 0); - TF_CHECK_OK(GetNodeAttr(host_compute_1->attrs(), "shapes", &shapes)); + CHECK_OK(GetNodeAttr(host_compute_1->attrs(), "shapes", &shapes)); EXPECT_EQ(shapes.size(), 1); EXPECT_EQ(shapes[0].dim_size(), 1); // Check XlaHostCompute nodes' "shape_inference_graph" attr. Both should have // empty values. NameAttrList shape_inference_graph; - TF_CHECK_OK(GetNodeAttr(host_compute_0->attrs(), "shape_inference_graph", - &shape_inference_graph)); + CHECK_OK(GetNodeAttr(host_compute_0->attrs(), "shape_inference_graph", + &shape_inference_graph)); EXPECT_EQ(shape_inference_graph.name(), ""); - TF_CHECK_OK(GetNodeAttr(host_compute_1->attrs(), "shape_inference_graph", - &shape_inference_graph)); + CHECK_OK(GetNodeAttr(host_compute_1->attrs(), "shape_inference_graph", + &shape_inference_graph)); EXPECT_EQ(shape_inference_graph.name(), ""); // Check `shape_inference_graphs`. @@ -344,7 +361,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, Basic) { device_ordinal_temp_value.set_i(0); protobuf::Map host_func_attrs; host_func_attrs["_device_ordinal"] = device_ordinal_temp_value; - TF_CHECK_OK(FunctionDefToBodyHelper( + CHECK_OK(FunctionDefToBodyHelper( *fld.Find("host_graph"), AttrSlice(&host_func_attrs), &fld, &host_fbody)); Graph *host_graph = host_fbody->graph; Node *key_placeholder = nullptr, *sequencer = nullptr; @@ -377,7 +394,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, Basic) { EXPECT_EQ(num_recv_at_host, 1); for (Node *n : send_recv_nodes) { Node *input_node; - TF_CHECK_OK(n->input_node(n->num_inputs() - 1, &input_node)); + CHECK_OK(n->input_node(n->num_inputs() - 1, &input_node)); EXPECT_EQ(input_node, key_placeholder); bool has_control_edge_to_sequencer = false; @@ -399,10 +416,10 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, NoHostGraph) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output const0 = ops::Const(s.WithOpName("const0"), 1, {2}); std::unique_ptr g(new Graph(OpRegistry::Global())); - TF_CHECK_OK(s.ToGraph(g.get())); + CHECK_OK(s.ToGraph(g.get())); FunctionDef *xla_fdef = fdl.add_function(); - TF_CHECK_OK(GraphToFunctionDef(*g, "cluster", xla_fdef)); + CHECK_OK(GraphToFunctionDef(*g, "cluster", xla_fdef)); } FunctionLibraryDefinition fld(OpRegistry::Global(), fdl); @@ -413,7 +430,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, NoHostGraph) { NameAttrList name_attrs; name_attrs.set_name("cluster"); *name_attrs.mutable_attr() = attrs; - TF_CHECK_OK(ExtractOutsideCompilationTest( + CHECK_OK(ExtractOutsideCompilationTest( "_xla", "_oc", "cluster", name_attrs, "cluster_rewritten", "host_graph", host_compute_core, &fld, &shape_inference_graphs, &has_outside_compilation)); @@ -435,7 +452,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInIf) { Output identity = ops::Identity(s.WithOpName("identity_true_fn"), arg); ops::_Retval retval(s.WithOpName("retval"), identity, 0); std::unique_ptr g(new Graph(OpRegistry::Global())); - TF_CHECK_OK(s.ToGraph(g.get())); + CHECK_OK(s.ToGraph(g.get())); auto node_name_image = g->BuildNodeNameIndex(); node_name_image["identity_true_fn"]->AddAttr("_oc", "0"); PartialTensorShape shape({2}); @@ -443,7 +460,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInIf) { kXlaInferredShapesAttrName, std::vector{shape}); FunctionDef *true_fn_fdef = fdl.add_function(); - TF_CHECK_OK(GraphToFunctionDef(*g, "true_fn", true_fn_fdef)); + CHECK_OK(GraphToFunctionDef(*g, "true_fn", true_fn_fdef)); } { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); @@ -451,7 +468,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInIf) { Output identity = ops::Identity(s.WithOpName("identity_false_fn"), arg); ops::_Retval retval(s.WithOpName("retval"), identity, 0); std::unique_ptr g(new Graph(OpRegistry::Global())); - TF_CHECK_OK(s.ToGraph(g.get())); + CHECK_OK(s.ToGraph(g.get())); auto node_name_image = g->BuildNodeNameIndex(); node_name_image["identity_false_fn"]->AddAttr("_oc", "0"); PartialTensorShape shape({2}); @@ -459,7 +476,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInIf) { kXlaInferredShapesAttrName, std::vector{shape}); FunctionDef *false_fn_fdef = fdl.add_function(); - TF_CHECK_OK(GraphToFunctionDef(*g, "false_fn", false_fn_fdef)); + CHECK_OK(GraphToFunctionDef(*g, "false_fn", false_fn_fdef)); } { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); @@ -474,10 +491,10 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInIf) { true_fn, false_fn); ops::_Retval retval(s.WithOpName("retval"), if_op.output[0], 0); std::unique_ptr g(new Graph(OpRegistry::Global())); - TF_CHECK_OK(s.ToGraph(g.get())); + CHECK_OK(s.ToGraph(g.get())); FunctionDef *xla_fdef = fdl.add_function(); - TF_CHECK_OK(GraphToFunctionDef(*g, "cluster", xla_fdef)); + CHECK_OK(GraphToFunctionDef(*g, "cluster", xla_fdef)); } FunctionLibraryDefinition fld(OpRegistry::Global(), fdl); @@ -488,7 +505,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInIf) { NameAttrList name_attrs; name_attrs.set_name("cluster"); *name_attrs.mutable_attr() = attrs; - TF_CHECK_OK(ExtractOutsideCompilationTest( + CHECK_OK(ExtractOutsideCompilationTest( "_xla", "_oc", "cluster", name_attrs, "cluster_rewritten", "host_graph", host_compute_core, &fld, &shape_inference_graphs, &has_outside_compilation)); @@ -500,9 +517,9 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInIf) { device_ordinal_temp_value.set_i(0); protobuf::Map host_func_attrs; host_func_attrs["_device_ordinal"] = device_ordinal_temp_value; - TF_CHECK_OK(FunctionDefToBodyHelper(*fld.Find("host_graph"), - AttrSlice(&host_func_attrs), &fld, - &host_fbody)); + CHECK_OK(FunctionDefToBodyHelper(*fld.Find("host_graph"), + AttrSlice(&host_func_attrs), &fld, + &host_fbody)); Graph *host_graph = host_fbody->graph; auto node_name_index = host_graph->BuildNodeNameIndex(); @@ -515,7 +532,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInIf) { Node *if_oc_node = node_name_index["oc_if_if"]; EXPECT_NE(if_oc_node, nullptr); Node *if_oc_node_cond_input; - TF_CHECK_OK(if_oc_node->input_node(0, &if_oc_node_cond_input)); + CHECK_OK(if_oc_node->input_node(0, &if_oc_node_cond_input)); EXPECT_EQ(if_oc_node_cond_input, recv_if_pred_node); // Check that then_branch outside compilation has node "identity_true_fn". @@ -546,8 +563,8 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInIf) { // Check XLA graph. { std::unique_ptr xla_fbody; - TF_CHECK_OK(FunctionDefToBodyHelper(*fld.Find("cluster_rewritten"), - AttrSlice(), &fld, &xla_fbody)); + CHECK_OK(FunctionDefToBodyHelper(*fld.Find("cluster_rewritten"), + AttrSlice(), &fld, &xla_fbody)); Graph *xla_graph = xla_fbody->graph; auto node_name_index = xla_graph->BuildNodeNameIndex(); @@ -569,7 +586,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInIf) { Node *if_node = node_name_index["if"]; EXPECT_NE(if_node, nullptr); std::vector token_inputs; - TF_CHECK_OK( + CHECK_OK( GetNodeAttr(if_node->def(), "_xla_token_input_nodes", &token_inputs)); EXPECT_THAT(token_inputs, ::testing::ElementsAre("send_oc_if_pred_if")); } @@ -586,7 +603,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInWhile) { Output identity = ops::Identity(s.WithOpName("identity_cond_fn"), arg); ops::_Retval retval(s.WithOpName("retval"), identity, 0); std::unique_ptr g(new Graph(OpRegistry::Global())); - TF_CHECK_OK(s.ToGraph(g.get())); + CHECK_OK(s.ToGraph(g.get())); auto node_name_image = g->BuildNodeNameIndex(); node_name_image["identity_cond_fn"]->AddAttr("_oc", "0"); PartialTensorShape shape({2}); @@ -594,7 +611,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInWhile) { kXlaInferredShapesAttrName, std::vector{shape}); FunctionDef *cond_fn_fdef = fdl.add_function(); - TF_CHECK_OK(GraphToFunctionDef(*g, "cond_fn", cond_fn_fdef)); + CHECK_OK(GraphToFunctionDef(*g, "cond_fn", cond_fn_fdef)); } { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); @@ -602,7 +619,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInWhile) { Output identity = ops::Identity(s.WithOpName("identity_body_fn"), arg); ops::_Retval retval(s.WithOpName("retval"), identity, 0); std::unique_ptr g(new Graph(OpRegistry::Global())); - TF_CHECK_OK(s.ToGraph(g.get())); + CHECK_OK(s.ToGraph(g.get())); auto node_name_image = g->BuildNodeNameIndex(); node_name_image["identity_body_fn"]->AddAttr("_oc", "0"); PartialTensorShape shape({2}); @@ -610,7 +627,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInWhile) { kXlaInferredShapesAttrName, std::vector{shape}); FunctionDef *body_fn_fdef = fdl.add_function(); - TF_CHECK_OK(GraphToFunctionDef(*g, "body_fn", body_fn_fdef)); + CHECK_OK(GraphToFunctionDef(*g, "body_fn", body_fn_fdef)); } { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); @@ -624,10 +641,10 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInWhile) { cond_fn, body_fn); ops::_Retval retval(s.WithOpName("retval"), while_op.output[0], 0); std::unique_ptr g(new Graph(OpRegistry::Global())); - TF_CHECK_OK(s.ToGraph(g.get())); + CHECK_OK(s.ToGraph(g.get())); FunctionDef *xla_fdef = fdl.add_function(); - TF_CHECK_OK(GraphToFunctionDef(*g, "cluster", xla_fdef)); + CHECK_OK(GraphToFunctionDef(*g, "cluster", xla_fdef)); } FunctionLibraryDefinition fld(OpRegistry::Global(), fdl); @@ -638,7 +655,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInWhile) { NameAttrList name_attrs; name_attrs.set_name("cluster"); *name_attrs.mutable_attr() = attrs; - TF_CHECK_OK(ExtractOutsideCompilationTest( + CHECK_OK(ExtractOutsideCompilationTest( "_xla", "_oc", "cluster", name_attrs, "cluster_rewritten", "host_graph", host_compute_core, &fld, &shape_inference_graphs, &has_outside_compilation)); @@ -650,9 +667,9 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInWhile) { device_ordinal_temp_value.set_i(0); protobuf::Map host_func_attrs; host_func_attrs["_device_ordinal"] = device_ordinal_temp_value; - TF_CHECK_OK(FunctionDefToBodyHelper(*fld.Find("host_graph"), - AttrSlice(&host_func_attrs), &fld, - &host_fbody)); + CHECK_OK(FunctionDefToBodyHelper(*fld.Find("host_graph"), + AttrSlice(&host_func_attrs), &fld, + &host_fbody)); Graph *host_graph = host_fbody->graph; auto node_name_index = host_graph->BuildNodeNameIndex(); @@ -713,7 +730,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInFunction) { Output identity = ops::Identity(s.WithOpName("identity"), arg); ops::_Retval retval(s.WithOpName("retval"), identity, 0); std::unique_ptr g(new Graph(OpRegistry::Global())); - TF_CHECK_OK(s.ToGraph(g.get())); + CHECK_OK(s.ToGraph(g.get())); auto node_name_image = g->BuildNodeNameIndex(); node_name_image["identity"]->AddAttr("_oc", "0"); PartialTensorShape shape({2}); @@ -721,7 +738,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInFunction) { kXlaInferredShapesAttrName, std::vector{shape}); FunctionDef *true_fn_fdef = fdl.add_function(); - TF_CHECK_OK(GraphToFunctionDef(*g, "fn", true_fn_fdef)); + CHECK_OK(GraphToFunctionDef(*g, "fn", true_fn_fdef)); } FunctionLibraryDefinition fld(OpRegistry::Global(), fdl); { @@ -736,35 +753,35 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInFunction) { tensor_proto.add_int_val(1); } NodeDef const_def; - TF_CHECK_OK(NodeDefBuilder("const", "Const") - .Attr("dtype", DT_INT32) - .Attr("value", tensor_proto) - .Finalize(&const_def)); + CHECK_OK(NodeDefBuilder("const", "Const") + .Attr("dtype", DT_INT32) + .Attr("value", tensor_proto) + .Finalize(&const_def)); absl::Status s; Node *const_node = g->AddNode(const_def, &s); - TF_CHECK_OK(s); + CHECK_OK(s); NodeDef fn_def; - TF_CHECK_OK(NodeDefBuilder("fn", "fn", &fld) - .Input("const", 0, DT_INT32) - .Finalize(&fn_def)); + CHECK_OK(NodeDefBuilder("fn", "fn", &fld) + .Input("const", 0, DT_INT32) + .Finalize(&fn_def)); Node *fn_node = g->AddNode(fn_def, &s); - TF_CHECK_OK(s); + CHECK_OK(s); g->AddEdge(const_node, 0, fn_node, 0); NodeDef ret_def; - TF_CHECK_OK(NodeDefBuilder("ret", "_Retval") - .Attr("index", 0) - .Attr("T", DT_INT32) - .Input("fn", 0, DT_INT32) - .Finalize(&ret_def)); + CHECK_OK(NodeDefBuilder("ret", "_Retval") + .Attr("index", 0) + .Attr("T", DT_INT32) + .Input("fn", 0, DT_INT32) + .Finalize(&ret_def)); Node *ret_node = g->AddNode(ret_def, &s); - TF_CHECK_OK(s); + CHECK_OK(s); g->AddEdge(fn_node, 0, ret_node, 0); FunctionDef *xla_fdef = fdl.add_function(); - TF_CHECK_OK(GraphToFunctionDef(*g, "cluster", xla_fdef)); - TF_CHECK_OK(fld.AddFunctionDef(*xla_fdef)); + CHECK_OK(GraphToFunctionDef(*g, "cluster", xla_fdef)); + CHECK_OK(fld.AddFunctionDef(*xla_fdef)); } protobuf::Map attrs; @@ -774,7 +791,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInFunction) { NameAttrList name_attrs; name_attrs.set_name("cluster"); *name_attrs.mutable_attr() = attrs; - TF_CHECK_OK(ExtractOutsideCompilationTest( + CHECK_OK(ExtractOutsideCompilationTest( "_xla", "_oc", "cluster", name_attrs, "cluster_rewritten", "host_graph", host_compute_core, &fld, &shape_inference_graphs, &has_outside_compilation)); @@ -786,9 +803,9 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInFunction) { device_ordinal_temp_value.set_i(0); protobuf::Map host_func_attrs; host_func_attrs["_device_ordinal"] = device_ordinal_temp_value; - TF_CHECK_OK(FunctionDefToBodyHelper(*fld.Find("host_graph"), - AttrSlice(&host_func_attrs), &fld, - &host_fbody)); + CHECK_OK(FunctionDefToBodyHelper(*fld.Find("host_graph"), + AttrSlice(&host_func_attrs), &fld, + &host_fbody)); Graph *host_graph = host_fbody->graph; auto node_name_index = host_graph->BuildNodeNameIndex(); @@ -797,9 +814,9 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInFunction) { EXPECT_NE(call_node, nullptr); std::unique_ptr call_fbody; - TF_CHECK_OK(FunctionDefToBodyHelper(*fld.Find("oc_func_call_host_fn"), - AttrSlice(&host_func_attrs), &fld, - &call_fbody)); + CHECK_OK(FunctionDefToBodyHelper(*fld.Find("oc_func_call_host_fn"), + AttrSlice(&host_func_attrs), &fld, + &call_fbody)); // Verify we have _XlaRecvAtHost and _XlaSendFromHost nodes. bool has_recv = false, has_send = false; @@ -817,8 +834,8 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInFunction) { // Check XLA graph. { std::unique_ptr xla_fbody; - TF_CHECK_OK(FunctionDefToBodyHelper(*fld.Find("cluster_rewritten"), - AttrSlice(), &fld, &xla_fbody)); + CHECK_OK(FunctionDefToBodyHelper(*fld.Find("cluster_rewritten"), + AttrSlice(), &fld, &xla_fbody)); Graph *xla_graph = xla_fbody->graph; auto node_name_index = xla_graph->BuildNodeNameIndex(); @@ -828,8 +845,8 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInFunction) { EXPECT_EQ(fn_node->type_string(), "fn_oc"); std::unique_ptr call_fbody; - TF_CHECK_OK(FunctionDefToBodyHelper(*fld.Find("fn_oc"), AttrSlice(), &fld, - &call_fbody)); + CHECK_OK(FunctionDefToBodyHelper(*fld.Find("fn_oc"), AttrSlice(), &fld, + &call_fbody)); // Verify we have XlaHostCompute nodes. bool has_hc = false; @@ -857,7 +874,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, Output identity1 = ops::Identity(s.WithOpName("identity1"), identity0); Output identity2 = ops::Identity(s.WithOpName("identity2"), identity1); std::unique_ptr g(new Graph(OpRegistry::Global())); - TF_CHECK_OK(s.ToGraph(g.get())); + CHECK_OK(s.ToGraph(g.get())); std::cout << "Graph is " << (*g).ToGraphDefDebug().DebugString() << std::endl; auto node_name_image = g->BuildNodeNameIndex(); @@ -869,7 +886,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, kXlaInferredShapesAttrName, std::vector{shape}); FunctionDef *xla_fdef = fdl.add_function(); - TF_CHECK_OK(GraphToFunctionDef(*g, "cluster", xla_fdef)); + CHECK_OK(GraphToFunctionDef(*g, "cluster", xla_fdef)); } FunctionLibraryDefinition fld(OpRegistry::Global(), fdl); @@ -880,15 +897,15 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, NameAttrList name_attrs; name_attrs.set_name("cluster"); *name_attrs.mutable_attr() = attrs; - TF_CHECK_OK(ExtractOutsideCompilationTest( + CHECK_OK(ExtractOutsideCompilationTest( "_xla", "_oc", "cluster", name_attrs, "cluster_rewritten", "host_graph", host_compute_core, &fld, &shape_inference_graphs, &has_outside_compilation)); // Get rewritten XLA computation function. std::unique_ptr xla_fbody; - TF_CHECK_OK(FunctionDefToBodyHelper(*fld.Find("cluster_rewritten"), - AttrSlice(), &fld, &xla_fbody)); + CHECK_OK(FunctionDefToBodyHelper(*fld.Find("cluster_rewritten"), AttrSlice(), + &fld, &xla_fbody)); auto node_name_index = xla_fbody->graph->BuildNodeNameIndex(); // Check XlaHostCompute nodes. @@ -899,8 +916,8 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, // Check XlaHostCompute nodes' "_xla_token_input_nodes" attr. std::vector token_input_nodes; - TF_CHECK_OK(GetNodeAttr(AttrSlice(host_compute_0->attrs()), - "_xla_token_input_nodes", &token_input_nodes)); + CHECK_OK(GetNodeAttr(AttrSlice(host_compute_0->attrs()), + "_xla_token_input_nodes", &token_input_nodes)); std::vector expected_token_input_nodes_0( {"_xla_token_arg_node"}); @@ -908,8 +925,8 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, token_input_nodes.clear(); std::vector expected_token_input_nodes_1( {"_xla_token_arg_node", "outside_compilation_0_host_compute"}); - TF_CHECK_OK(GetNodeAttr(AttrSlice(host_compute_1->attrs()), - "_xla_token_input_nodes", &token_input_nodes)); + CHECK_OK(GetNodeAttr(AttrSlice(host_compute_1->attrs()), + "_xla_token_input_nodes", &token_input_nodes)); EXPECT_EQ(token_input_nodes, expected_token_input_nodes_1); // Check there is a control edge from host_compute_0 to host_compute_1. @@ -940,7 +957,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, s.WithOpName("identity1").WithControlDependencies(identity0), const0); Output identity2 = ops::Identity(s.WithOpName("identity2"), identity1); std::unique_ptr g(new Graph(OpRegistry::Global())); - TF_CHECK_OK(s.ToGraph(g.get())); + CHECK_OK(s.ToGraph(g.get())); std::cout << "Graph is " << (*g).ToGraphDefDebug().DebugString() << std::endl; auto node_name_image = g->BuildNodeNameIndex(); @@ -952,7 +969,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, kXlaInferredShapesAttrName, std::vector{shape}); FunctionDef *xla_fdef = fdl.add_function(); - TF_CHECK_OK(GraphToFunctionDef(*g, "cluster", xla_fdef)); + CHECK_OK(GraphToFunctionDef(*g, "cluster", xla_fdef)); } FunctionLibraryDefinition fld(OpRegistry::Global(), fdl); @@ -963,15 +980,15 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, NameAttrList name_attrs; name_attrs.set_name("cluster"); *name_attrs.mutable_attr() = attrs; - TF_CHECK_OK(ExtractOutsideCompilationTest( + CHECK_OK(ExtractOutsideCompilationTest( "_xla", "_oc", "cluster", name_attrs, "cluster_rewritten", "host_graph", host_compute_core, &fld, &shape_inference_graphs, &has_outside_compilation)); // Get rewritten XLA computation function. std::unique_ptr xla_fbody; - TF_CHECK_OK(FunctionDefToBodyHelper(*fld.Find("cluster_rewritten"), - AttrSlice(), &fld, &xla_fbody)); + CHECK_OK(FunctionDefToBodyHelper(*fld.Find("cluster_rewritten"), AttrSlice(), + &fld, &xla_fbody)); auto node_name_index = xla_fbody->graph->BuildNodeNameIndex(); // Check XlaHostCompute nodes. @@ -982,8 +999,8 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, // Check XlaHostCompute nodes' "_xla_token_input_nodes" attr. std::vector token_input_nodes; - TF_CHECK_OK(GetNodeAttr(AttrSlice(host_compute_0->attrs()), - "_xla_token_input_nodes", &token_input_nodes)); + CHECK_OK(GetNodeAttr(AttrSlice(host_compute_0->attrs()), + "_xla_token_input_nodes", &token_input_nodes)); std::vector expected_token_input_nodes_0( {"_xla_token_arg_node"}); @@ -991,8 +1008,8 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, token_input_nodes.clear(); std::vector expected_token_input_nodes_1( {"_xla_token_arg_node", "outside_compilation_0_host_compute"}); - TF_CHECK_OK(GetNodeAttr(AttrSlice(host_compute_1->attrs()), - "_xla_token_input_nodes", &token_input_nodes)); + CHECK_OK(GetNodeAttr(AttrSlice(host_compute_1->attrs()), + "_xla_token_input_nodes", &token_input_nodes)); EXPECT_EQ(token_input_nodes, expected_token_input_nodes_1); // Check there is a control edge from host_compute_0 to host_compute_1. diff --git a/tensorflow/compiler/jit/kernels/xla_ops.cc b/tensorflow/compiler/jit/kernels/xla_ops.cc index 325f79b95e3a5e..54d6276c05cc32 100644 --- a/tensorflow/compiler/jit/kernels/xla_ops.cc +++ b/tensorflow/compiler/jit/kernels/xla_ops.cc @@ -166,7 +166,7 @@ class ExecutableClosureStore { public: ExecutableClosureStore() : key_counter_(0) {} - using KeyT = string; + using KeyT = std::string; KeyT Produce(ExecutableClosure result) { mutex_lock l(mutex_); @@ -217,7 +217,8 @@ se::Stream* GetStream(OpKernelContext* ctx) { XlaComputationLaunchContext GetLaunchContext( const XlaPlatformInfo& platform_info, OpKernelContext* ctx, - xla::LocalClient* client, se::DeviceMemoryAllocator* allocator) { + xla::LocalClient* client, + stream_executor::DeviceAddressAllocator* allocator) { se::Stream* stream = GetStream(ctx); int device_ordinal = stream ? stream->parent()->device_ordinal() : client->default_device_ordinal(); @@ -230,7 +231,7 @@ XlaComputationLaunchContext GetLaunchContext( absl::Status GetTaskName(const absl::string_view device_name, std::string* task_name) { - string ignored; + std::string ignored; if (!DeviceNameUtils::SplitDeviceName(device_name, task_name, &ignored)) { return errors::InvalidArgument("Unable to parse device name: ", device_name); @@ -246,7 +247,7 @@ xla::SendDeviceMemoryFunction GetSendDeviceMemoryFunction( return [ctx, program_key]( int64_t channel_id, se::Stream* stream, const xla::Shape& shape, - const se::DeviceMemoryBase& device_memory_base, + const stream_executor::DeviceAddressBase& device_memory_base, const absl::flat_hash_map& frontend_attrs) -> absl::StatusOr>> { auto iter = frontend_attrs.find("_xla_host_transfer_rendezvous"); @@ -293,7 +294,7 @@ xla::RecvDeviceMemoryFunction GetRecvDeviceMemoryFunction( return [ctx, program_key]( int64_t channel_id, se::Stream* stream, const xla::Shape& shape, - se::DeviceMemoryBase* device_memory_base, + stream_executor::DeviceAddressBase* device_memory_base, const absl::flat_hash_map& frontend_attrs) -> absl::StatusOr>> { auto iter = frontend_attrs.find("_xla_host_transfer_rendezvous"); @@ -339,7 +340,7 @@ absl::StatusOr RunExecutable( const XlaComputationLaunchContext& launch_context, std::vector execution_inputs, xla::ExecutableRunOptions run_options, xla::LocalExecutable* executable, - OpKernelContext* ctx, se::DeviceMemoryAllocator* allocator) { + OpKernelContext* ctx, stream_executor::DeviceAddressAllocator* allocator) { VLOG(2) << "Executing Xla Computation."; Env* env = Env::Default(); auto start_time = env->NowMicros(); @@ -620,7 +621,7 @@ void XlaLocalLaunchBase::ComputeAsync(OpKernelContext* ctx, DoneCallback done) { resource_var_ptrs[resources[i]] = variable_infos[i].var()->tensor(); } - std::shared_ptr allocator = + std::shared_ptr allocator = GetAllocator(ctx->device(), GetStream(ctx), platform_info); XlaComputationLaunchContext launch_context = GetLaunchContext(platform_info, ctx, client, allocator.get()); @@ -928,7 +929,7 @@ void XlaRunOp::Compute(OpKernelContext* ctx) { XlaExecutableClosure closure = XlaExecutableClosureStore::Global()->Consume(key); - std::shared_ptr allocator = + std::shared_ptr allocator = GetAllocator(ctx->device(), GetStream(ctx), platform_info_); XlaComputationLaunchContext launch_context = GetLaunchContext(platform_info_, ctx, closure.client(), allocator.get()); diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc index 1d4031a4ffc926..89d5ea8863151b 100644 --- a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc +++ b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc @@ -16,6 +16,8 @@ limitations under the License. #include "tensorflow/compiler/jit/mark_for_compilation_pass.h" #include +#include +#include #include #include #include @@ -24,33 +26,50 @@ limitations under the License. #include #include "absl/container/flat_hash_map.h" -#include "absl/memory/memory.h" +#include "absl/container/flat_hash_set.h" +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/status/status.h" #include "absl/strings/match.h" -#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" #include "absl/strings/str_split.h" #include "absl/strings/string_view.h" +#include "absl/types/span.h" #include "tensorflow/cc/framework/ops.h" +#include "tensorflow/cc/framework/scope.h" #include "tensorflow/cc/ops/array_ops.h" +#include "tensorflow/cc/ops/const_op.h" +#include "tensorflow/cc/ops/control_flow_ops.h" #include "tensorflow/cc/ops/control_flow_ops_internal.h" +#include "tensorflow/cc/ops/data_flow_ops.h" #include "tensorflow/cc/ops/function_ops.h" #include "tensorflow/cc/ops/functional_ops.h" #include "tensorflow/cc/ops/list_ops.h" +#include "tensorflow/cc/ops/logging_ops.h" +#include "tensorflow/cc/ops/math_ops.h" +#include "tensorflow/cc/ops/no_op.h" +#include "tensorflow/cc/ops/random_ops.h" #include "tensorflow/cc/ops/resource_variable_ops.h" #include "tensorflow/cc/ops/sendrecv_ops.h" -#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/cc/ops/state_ops.h" #include "tensorflow/compiler/jit/defs.h" +#include "tensorflow/compiler/jit/flags.h" #include "tensorflow/compiler/jit/mark_for_compilation_pass_test_helper.h" #include "tensorflow/compiler/jit/node_matchers.h" #include "tensorflow/compiler/jit/xla_cluster_util.h" -#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" -#include "tensorflow/core/common_runtime/graph_constructor.h" +#include "xla/tsl/lib/core/status_test_util.h" #include "tensorflow/core/common_runtime/graph_def_builder_util.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/framework/node_def_builder.h" #include "tensorflow/core/framework/node_def_util.h" #include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/graph/algorithm.h" +#include "tensorflow/core/graph/graph.h" #include "tensorflow/core/graph/graph_def_builder.h" -#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/graph/node_builder.h" #include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/test.h" @@ -503,7 +522,7 @@ TEST(XlaCompilationTest, CyclesWithAllDifferentScopesGlobalJitOverridden) { ops::BinaryOp( "MatMul", a, b, builder.opts().WithName("C").WithAttr(kXlaScopeAttr, "ScopeC")); - TF_CHECK_OK(GraphDefBuilderToGraph(builder, graph.get())); + CHECK_OK(GraphDefBuilderToGraph(builder, graph.get())); } FunctionDefLibrary flib; @@ -536,7 +555,7 @@ TEST(XlaCompilationTest, CyclesWithAllDifferentScopes) { ops::BinaryOp( "MatMul", a, b, builder.opts().WithName("C").WithAttr(kXlaScopeAttr, "ScopeC")); - TF_CHECK_OK(GraphDefBuilderToGraph(builder, graph.get())); + CHECK_OK(GraphDefBuilderToGraph(builder, graph.get())); } TF_ASSERT_OK(MarkForCompilationPassTestHelper::MarkForCompilation( @@ -574,7 +593,7 @@ TEST(XlaCompilationTest, CyclesWithSplittingScopes) { .WithName("D") .WithAttr(kXlaCompileAttr, true) .WithAttr(kXlaScopeAttr, "Scope2")); - TF_CHECK_OK(GraphDefBuilderToGraph(builder, graph.get())); + CHECK_OK(GraphDefBuilderToGraph(builder, graph.get())); } TF_ASSERT_OK(MarkForCompilationPassTestHelper::MarkForCompilation( @@ -607,7 +626,7 @@ TEST(XlaCompilationTest, CyclesWithDifferentScopesAndBridge) { .WithAttr(kXlaCompileAttr, true) .WithAttr(kXlaScopeAttr, "ScopeB")); ops::BinaryOp("MatMul", a, b, builder.opts().WithName("C")); - TF_CHECK_OK(GraphDefBuilderToGraph(builder, graph.get())); + CHECK_OK(GraphDefBuilderToGraph(builder, graph.get())); } TF_ASSERT_OK(MarkForCompilationPassTestHelper::MarkForCompilation( @@ -797,11 +816,11 @@ TEST(XlaCompilationTest, IllegalCycle_UsefulErrorMessage) { auto BuildNoopNode = [](absl::string_view name, Graph* graph) { NodeDefBuilder builder(name, "NoOp"); NodeDef def; - TF_CHECK_OK(builder.Finalize(&def)); + CHECK_OK(builder.Finalize(&def)); absl::Status status; Node* node = graph->AddNode(def, &status); - TF_CHECK_OK(status); + CHECK_OK(status); return node; }; diff --git a/tensorflow/compiler/jit/node_matchers.cc b/tensorflow/compiler/jit/node_matchers.cc index db158fc84a0173..93c07d5539ccc2 100644 --- a/tensorflow/compiler/jit/node_matchers.cc +++ b/tensorflow/compiler/jit/node_matchers.cc @@ -15,16 +15,31 @@ limitations under the License. #include "tensorflow/compiler/jit/node_matchers.h" +#include +#include +#include +#include +#include +#include +#include #include +#include #include "absl/algorithm/container.h" +#include "absl/log/check.h" +#include "absl/log/log.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" -#include "absl/strings/str_replace.h" #include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" +#include "absl/types/span.h" +#include "tensorflow/cc/framework/ops.h" #include "tensorflow/core/framework/attr_value_util.h" #include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor.pb.h" +#include "tensorflow/core/graph/graph.h" #include "tensorflow/core/graph/graph_node_util.h" namespace tensorflow { @@ -515,7 +530,7 @@ impl::NodeMatcherProperties impl::Attr(std::string name) { NodeMatcherProperties ConstantValue( const ::tensorflow::Input::Initializer& val) { - TF_CHECK_OK(val.status); + CHECK_OK(val.status); NodeMatcherProperties props; props.set_constant_value(val.tensor); return props; diff --git a/tensorflow/compiler/jit/rearrange_function_argument_pass_test.cc b/tensorflow/compiler/jit/rearrange_function_argument_pass_test.cc index a833e9827c028a..6f3450f67e0e38 100644 --- a/tensorflow/compiler/jit/rearrange_function_argument_pass_test.cc +++ b/tensorflow/compiler/jit/rearrange_function_argument_pass_test.cc @@ -13,25 +13,28 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "absl/strings/match.h" +#include +#include +#include +#include + +#include "absl/log/check.h" +#include "absl/status/status.h" +#include "tensorflow/cc/framework/ops.h" #include "tensorflow/cc/framework/scope.h" -#include "tensorflow/cc/ops/array_ops.h" +#include "tensorflow/cc/ops/const_op.h" #include "tensorflow/cc/ops/function_ops.h" #include "tensorflow/cc/ops/functional_ops.h" -#include "tensorflow/cc/ops/standard_ops.h" -#include "tensorflow/compiler/jit/encapsulate_util.h" #include "tensorflow/compiler/tf2xla/rearrange_function_argument.h" -#include "tensorflow/core/common_runtime/device_factory.h" +#include "xla/tsl/platform/errors.h" #include "tensorflow/core/common_runtime/function.h" -#include "tensorflow/core/framework/common_shape_fns.h" #include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/graph_to_functiondef.h" #include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/protobuf/error_codes.pb.h" -#include "tensorflow/core/public/session_options.h" -#include "tensorflow/core/public/version.h" namespace tensorflow { @@ -49,9 +52,9 @@ TEST(RearrangeFunctionArgumentForFunctionTest, Basic) { auto ret0 = ops::_Retval(s.WithOpName("ret0"), arg1, 0); auto ret1 = ops::_Retval(s.WithOpName("ret1"), arg0, 1); std::unique_ptr g(new Graph(OpRegistry::Global())); - TF_CHECK_OK(s.ToGraph(g.get())); + CHECK_OK(s.ToGraph(g.get())); FunctionDef *xla_fdef = fdl.add_function(); - TF_CHECK_OK(GraphToFunctionDef(*g, "f1", xla_fdef)); + CHECK_OK(GraphToFunctionDef(*g, "f1", xla_fdef)); } { // Function for While's "body". @@ -64,9 +67,9 @@ TEST(RearrangeFunctionArgumentForFunctionTest, Basic) { auto ret0 = ops::_Retval(s.WithOpName("ret0"), arg0, 0); auto ret1 = ops::_Retval(s.WithOpName("ret1"), arg1, 1); std::unique_ptr g(new Graph(OpRegistry::Global())); - TF_CHECK_OK(s.ToGraph(g.get())); + CHECK_OK(s.ToGraph(g.get())); FunctionDef *xla_fdef = fdl.add_function(); - TF_CHECK_OK(GraphToFunctionDef(*g, "f2", xla_fdef)); + CHECK_OK(GraphToFunctionDef(*g, "f2", xla_fdef)); } { // Function for While's "cond". @@ -77,9 +80,9 @@ TEST(RearrangeFunctionArgumentForFunctionTest, Basic) { Output arg1 = ops::_Arg(s.WithOpName("arg1"), DT_BOOL, 1); auto ret0 = ops::_Retval(s.WithOpName("ret0"), arg1, 0); std::unique_ptr g(new Graph(OpRegistry::Global())); - TF_CHECK_OK(s.ToGraph(g.get())); + CHECK_OK(s.ToGraph(g.get())); FunctionDef *xla_fdef = fdl.add_function(); - TF_CHECK_OK(GraphToFunctionDef(*g, "f3", xla_fdef)); + CHECK_OK(GraphToFunctionDef(*g, "f3", xla_fdef)); } FunctionLibraryDefinition fld(OpRegistry::Global(), fdl); @@ -106,11 +109,11 @@ TEST(RearrangeFunctionArgumentForFunctionTest, Basic) { auto ret2 = ops::_Retval(s.WithOpName("ret2"), while_op.output[0], 2); auto ret3 = ops::_Retval(s.WithOpName("ret3"), while_op.output[1], 3); std::unique_ptr g(new Graph(OpRegistry::Global())); - TF_CHECK_OK(s.ToGraph(g.get())); + CHECK_OK(s.ToGraph(g.get())); std::vector> fbodies; - TF_CHECK_OK(RearrangeFunctionArguments( - [&](const NameAttrList &function, const FunctionBody **fbody) { + CHECK_OK(RearrangeFunctionArguments( + [&](const NameAttrList& function, const FunctionBody** fbody) { std::unique_ptr new_fbody; TF_RETURN_IF_ERROR(FunctionDefToBodyHelper(*fld.Find(function.name()), AttrSlice(&function.attr()), @@ -136,33 +139,33 @@ TEST(RearrangeFunctionArgumentForFunctionTest, Basic) { const Node *if_node = node_name_index.at("if"); ASSERT_NE(if_node, nullptr); const Node *input_node; - TF_CHECK_OK(if_node->input_node(1, &input_node)); + CHECK_OK(if_node->input_node(1, &input_node)); EXPECT_EQ(input_node->name(), "arg1"); - TF_CHECK_OK(if_node->input_node(2, &input_node)); + CHECK_OK(if_node->input_node(2, &input_node)); EXPECT_EQ(input_node->name(), "arg0"); const Node *ret0_node = node_name_index.at("ret0"); ASSERT_NE(ret0_node, nullptr); - TF_CHECK_OK(ret0_node->input_node(0, &input_node)); + CHECK_OK(ret0_node->input_node(0, &input_node)); EXPECT_EQ(input_node->name(), "if"); const Node *ret1_node = node_name_index.at("ret1"); ASSERT_NE(ret1_node, nullptr); - TF_CHECK_OK(ret1_node->input_node(0, &input_node)); + CHECK_OK(ret1_node->input_node(0, &input_node)); EXPECT_EQ(input_node->name(), "arg0"); // Check node "while" input and output edges. const Node *while_node = node_name_index.at("while"); ASSERT_NE(while_node, nullptr); - TF_CHECK_OK(while_node->input_node(0, &input_node)); + CHECK_OK(while_node->input_node(0, &input_node)); EXPECT_EQ(input_node->name(), "arg1"); - TF_CHECK_OK(while_node->input_node(1, &input_node)); + CHECK_OK(while_node->input_node(1, &input_node)); EXPECT_EQ(input_node->name(), "arg0"); const Node *ret2_node = node_name_index.at("ret2"); ASSERT_NE(ret2_node, nullptr); - TF_CHECK_OK(ret2_node->input_node(0, &input_node)); + CHECK_OK(ret2_node->input_node(0, &input_node)); EXPECT_EQ(input_node->name(), "arg0"); const Node *ret3_node = node_name_index.at("ret3"); ASSERT_NE(ret3_node, nullptr); - TF_CHECK_OK(ret3_node->input_node(0, &input_node)); + CHECK_OK(ret3_node->input_node(0, &input_node)); EXPECT_EQ(input_node->name(), "while"); } @@ -182,9 +185,9 @@ TEST(RearrangeFunctionArgumentForFunctionTest, auto ret1 = ops::_Retval(s.WithOpName("ret1"), arg0, 1); auto ret2 = ops::_Retval(s.WithOpName("ret2"), arg2, 2); std::unique_ptr g(new Graph(OpRegistry::Global())); - TF_CHECK_OK(s.ToGraph(g.get())); + CHECK_OK(s.ToGraph(g.get())); FunctionDef *xla_fdef = fdl.add_function(); - TF_CHECK_OK(GraphToFunctionDef(*g, "f2", xla_fdef)); + CHECK_OK(GraphToFunctionDef(*g, "f2", xla_fdef)); } { // Function for While's "cond". @@ -197,9 +200,9 @@ TEST(RearrangeFunctionArgumentForFunctionTest, Output cond = ops::Const(s.WithOpName("const"), true, TensorShape({})); auto ret0 = ops::_Retval(s.WithOpName("ret0"), cond, 0); std::unique_ptr g(new Graph(OpRegistry::Global())); - TF_CHECK_OK(s.ToGraph(g.get())); + CHECK_OK(s.ToGraph(g.get())); FunctionDef *xla_fdef = fdl.add_function(); - TF_CHECK_OK(GraphToFunctionDef(*g, "f1", xla_fdef)); + CHECK_OK(GraphToFunctionDef(*g, "f1", xla_fdef)); } FunctionLibraryDefinition fld(OpRegistry::Global(), fdl); @@ -217,7 +220,7 @@ TEST(RearrangeFunctionArgumentForFunctionTest, std::initializer_list{arg0, arg1, arg2}, cond_fn, body_fn); std::unique_ptr g(new Graph(OpRegistry::Global())); - TF_CHECK_OK(s.ToGraph(g.get())); + CHECK_OK(s.ToGraph(g.get())); std::vector> fbodies; absl::Status status = RearrangeFunctionArguments( diff --git a/tensorflow/compiler/jit/shape_inference_test.cc b/tensorflow/compiler/jit/shape_inference_test.cc index 599d442de4b092..807505672357cb 100644 --- a/tensorflow/compiler/jit/shape_inference_test.cc +++ b/tensorflow/compiler/jit/shape_inference_test.cc @@ -17,27 +17,28 @@ limitations under the License. #include "tensorflow/compiler/jit/shape_inference.h" +#include +#include #include #include +#include #include +#include "absl/log/check.h" #include "tensorflow/cc/framework/ops.h" #include "tensorflow/cc/framework/scope.h" #include "tensorflow/cc/ops/array_ops.h" +#include "tensorflow/cc/ops/const_op.h" +#include "tensorflow/cc/ops/control_flow_ops.h" #include "tensorflow/cc/ops/control_flow_ops_internal.h" #include "tensorflow/cc/ops/math_ops.h" #include "tensorflow/cc/ops/resource_variable_ops.h" -#include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/compiler/jit/test_util.h" #include "xla/tsl/lib/core/status_test_util.h" #include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/partial_tensor_shape.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/graph/graph.h" -#include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/test.h" -#include "tensorflow/core/platform/types.h" -#include "tsl/platform/status.h" namespace tensorflow { namespace { @@ -55,7 +56,7 @@ TEST(ShapeInferenceTest, Basics) { auto g = ops::AddN(root.WithOpName("G"), std::initializer_list{e, f}); std::unique_ptr graph(new Graph(OpRegistry::Global())); - TF_CHECK_OK(root.ToGraph(graph.get())); + CHECK_OK(root.ToGraph(graph.get())); GraphShapeInfo shape_info; TF_ASSERT_OK(InferShapes(graph.get(), /*arg_shapes=*/{}, @@ -84,7 +85,7 @@ TEST(ShapeInferenceTest, UseArgShapesForVariableBatchSize) { b.node()->AddAttr("_index", 1); std::unique_ptr graph(new Graph(OpRegistry::Global())); - TF_CHECK_OK(root.ToGraph(graph.get())); + CHECK_OK(root.ToGraph(graph.get())); std::map arg_shapes; arg_shapes[0].shape = TensorShape({2, 3}); @@ -118,7 +119,7 @@ TEST(ShapeInferenceTest, UseArgShapesForVariableBatchSizeIncompleteUserArgs) { b.node()->AddAttr("_index", 0); std::unique_ptr graph(new Graph(OpRegistry::Global())); - TF_CHECK_OK(root.ToGraph(graph.get())); + CHECK_OK(root.ToGraph(graph.get())); std::map arg_shapes; arg_shapes[0].shape = TensorShape({2, 3}); diff --git a/tensorflow/compiler/jit/test_util.cc b/tensorflow/compiler/jit/test_util.cc index 30a9ab51faf105..b72fd6e7aaa6eb 100644 --- a/tensorflow/compiler/jit/test_util.cc +++ b/tensorflow/compiler/jit/test_util.cc @@ -15,14 +15,28 @@ limitations under the License. #include "tensorflow/compiler/jit/test_util.h" +#include #include #include #include #include +#include +#include "absl/log/check.h" +#include "absl/status/status.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" #include "tensorflow/compiler/jit/shape_inference.h" #include "xla/status_macros.h" +#include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/common_runtime/graph_runner.h" +#include "tensorflow/core/common_runtime/process_function_library_runtime.h" #include "tensorflow/core/framework/device_factory.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/platform/errors.h" #include "tensorflow/core/public/version.h" namespace tensorflow { @@ -71,15 +85,15 @@ void DeviceSetup::AddDevicesAndSetUp( } std::vector> devices; - TF_CHECK_OK(DeviceFactory::AddDevices( - options, "/job:localhost/replica:0/task:0", &devices)); + CHECK_OK(DeviceFactory::AddDevices(options, "/job:localhost/replica:0/task:0", + &devices)); device_mgr_ = std::make_unique(std::move(devices)); OptimizerOptions opts; lib_def_ = std::make_unique(OpRegistry::Global(), FunctionDefLibrary()); if (fdef.has_value()) { - TF_CHECK_OK(lib_def_->AddFunctionDef(*fdef)); + CHECK_OK(lib_def_->AddFunctionDef(*fdef)); } pflr_ = std::make_unique( device_mgr_.get(), Env::Default(), /*config=*/nullptr, @@ -96,7 +110,7 @@ Device* DeviceSetup::GetDevice(const std::string& device_name) { std::string full_device_name = absl::StrCat( "/job:localhost/replica:0/task:0/device:", device_name, ":0"); Device* device; - TF_CHECK_OK(device_mgr_->LookupDevice(full_device_name, &device)); + CHECK_OK(device_mgr_->LookupDevice(full_device_name, &device)); return device; } diff --git a/tensorflow/compiler/jit/tests/BUILD b/tensorflow/compiler/jit/tests/BUILD index 40de3e19dfd6d1..4c6a59e3f682fc 100644 --- a/tensorflow/compiler/jit/tests/BUILD +++ b/tensorflow/compiler/jit/tests/BUILD @@ -49,6 +49,8 @@ tf_cc_test( deps = [ ":auto_clustering_test_helper", "//tensorflow/core:test", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status", "@com_google_absl//absl/strings", ], ) diff --git a/tensorflow/compiler/jit/tests/auto_clustering_test.cc b/tensorflow/compiler/jit/tests/auto_clustering_test.cc index d108bc51b5ee33..806abbeb8e6d6a 100644 --- a/tensorflow/compiler/jit/tests/auto_clustering_test.cc +++ b/tensorflow/compiler/jit/tests/auto_clustering_test.cc @@ -13,9 +13,15 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include + +#include "absl/log/check.h" +#include "absl/status/status.h" #include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" #include "tensorflow/compiler/jit/tests/auto_clustering_test_helper.h" -#include "tensorflow/core/lib/core/status_test_util.h" +#include "xla/tsl/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" namespace tensorflow { namespace { @@ -85,7 +91,7 @@ absl::Status BenchmarkHelper(absl::string_view key, benchmark::State& state) { } void BM_MarkForCompilationPass_KerasImagenetMain(benchmark::State& state) { - TF_CHECK_OK(BenchmarkHelper("keras_imagenet_main", state)); + CHECK_OK(BenchmarkHelper("keras_imagenet_main", state)); } BENCHMARK(BM_MarkForCompilationPass_KerasImagenetMain); diff --git a/tensorflow/compiler/jit/xla_cluster_util.cc b/tensorflow/compiler/jit/xla_cluster_util.cc index 6d7e5518524c29..1d51d4d1ca2b90 100644 --- a/tensorflow/compiler/jit/xla_cluster_util.cc +++ b/tensorflow/compiler/jit/xla_cluster_util.cc @@ -15,25 +15,50 @@ limitations under the License. #include "tensorflow/compiler/jit/xla_cluster_util.h" +#include +#include +#include +#include #include #include +#include +#include #include "absl/algorithm/container.h" +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/container/inlined_vector.h" +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" #include "absl/strings/match.h" -#include "absl/strings/numbers.h" #include "absl/strings/str_cat.h" -#include "absl/strings/str_join.h" +#include "absl/strings/string_view.h" #include "tensorflow/compiler/jit/flags.h" +#include "xla/service/graphcycles/graphcycles.h" #include "xla/status_macros.h" -#include "tensorflow/core/common_runtime/function.h" +#include "xla/tsl/platform/errors.h" +#include "xla/tsl/platform/statusor.h" +#include "tensorflow/core/common_runtime/function_body.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/framework/attr_value_util.h" #include "tensorflow/core/framework/bounds_check.h" +#include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_def_builder.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/graph/algorithm.h" #include "tensorflow/core/graph/control_flow.h" +#include "tensorflow/core/graph/edgeset.h" +#include "tensorflow/core/graph/graph.h" #include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/lib/strings/proto_serialization.h" #include "tensorflow/core/platform/errors.h" -#include "tensorflow/core/platform/fingerprint.h" +#include "tensorflow/core/platform/hash.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/public/session_options.h" #include "tensorflow/core/util/device_name_utils.h" #include "tensorflow/core/util/xla_config_registry.h" @@ -460,8 +485,8 @@ absl::StatusOr DoesAnyCalleeHaveRefNodes( return true; } - auto release_handle_on_return = gtl::MakeCleanup( - [&] { TF_CHECK_OK(lib_runtime->ReleaseHandle(handle)); }); + auto release_handle_on_return = + gtl::MakeCleanup([&] { CHECK_OK(lib_runtime->ReleaseHandle(handle)); }); const FunctionBody* fbody = lib_runtime->GetFunctionBody(handle); TF_RETURN_IF_ERROR(GetNodesRelatedToRefVariablesInDirection( diff --git a/tensorflow/compiler/jit/xla_device_compiler_client.cc b/tensorflow/compiler/jit/xla_device_compiler_client.cc index 71be1f7ec6b25d..ff565042347ae1 100644 --- a/tensorflow/compiler/jit/xla_device_compiler_client.cc +++ b/tensorflow/compiler/jit/xla_device_compiler_client.cc @@ -81,7 +81,7 @@ absl::StatusOr XlaDeviceCompilerClient::BuildSerializedExecutable( xla::ExecutableBuildOptions build_options = GetExecutableBuildOptions( options, result, client_->default_device_ordinal()); TF_ASSIGN_OR_RETURN( - std::vector> aot_results, + std::vector> aot_results, client_->CompileAheadOfTime(*result.computation, argument_layouts, build_options)); TF_RET_CHECK(aot_results.size() == 1); diff --git a/tensorflow/compiler/jit/xla_device_context.cc b/tensorflow/compiler/jit/xla_device_context.cc index 8b38d79f58e415..027fd494ed8af5 100644 --- a/tensorflow/compiler/jit/xla_device_context.cc +++ b/tensorflow/compiler/jit/xla_device_context.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/compiler/jit/xla_device_context.h" +#include #include #include #include @@ -22,15 +23,37 @@ limitations under the License. #include #include -#include "tensorflow/compiler/jit/xla_launch_util.h" +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" +#include "absl/synchronization/mutex.h" +#include "tensorflow/compiler/jit/xla_tensor.h" +#include "tensorflow/compiler/tf2xla/layout_util.h" #include "tensorflow/compiler/tf2xla/literal_util.h" -#include "tensorflow/compiler/tf2xla/shape_util.h" #include "tensorflow/compiler/tf2xla/xla_helpers.h" -#include "xla/util.h" -#include "tensorflow/core/common_runtime/device.h" +#include "xla/client/local_client.h" +#include "xla/layout_util.h" +#include "xla/literal.h" +#include "xla/service/stream_pool.h" +#include "xla/shape.h" +#include "xla/shape_util.h" +#include "xla/status_macros.h" +#include "xla/stream_executor/allocator_stats.h" +#include "xla/stream_executor/event.h" +#include "xla/stream_executor/stream.h" +#include "xla/stream_executor/stream_executor.h" +#include "xla/tsl/platform/errors.h" +#include "xla/tsl/platform/statusor.h" #include "tensorflow/core/common_runtime/dma_helper.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/device.h" +#include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_reference.h" -#include "tsl/platform/statusor.h" +#include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/platform/status.h" +#include "tensorflow/core/platform/threadpool.h" namespace tensorflow { @@ -249,7 +272,7 @@ void XlaDeviceContext::CopyDeviceTensorToCPU(const Tensor* device_tensor, // shape as it is derived from the cpu_tensor's shape using // shape_representation_fn_. xla::MutableBorrowingLiteral literal; - TF_CHECK_OK(HostTensorToMutableBorrowingLiteral( + CHECK_OK(HostTensorToMutableBorrowingLiteral( xla::LayoutUtil::GetWithDefaultLayout( xla_tensor->shaped_buffer().on_host_shape()), cpu_tensor, &literal)); diff --git a/tensorflow/compiler/jit/xla_kernel_creator_test.cc b/tensorflow/compiler/jit/xla_kernel_creator_test.cc index 12ab76a7c1ce37..1804b1728c8c7f 100644 --- a/tensorflow/compiler/jit/xla_kernel_creator_test.cc +++ b/tensorflow/compiler/jit/xla_kernel_creator_test.cc @@ -15,16 +15,23 @@ limitations under the License. #include "tensorflow/compiler/jit/xla_kernel_creator.h" -#include "absl/memory/memory.h" +#include +#include +#include + +#include "absl/log/check.h" #include "absl/status/status.h" -#include "tensorflow/core/common_runtime/device_factory.h" -#include "tensorflow/core/common_runtime/function.h" -#include "tensorflow/core/framework/function_testlib.h" -#include "tensorflow/core/framework/node_def_builder.h" -#include "tensorflow/core/framework/tensor_testutil.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/common_runtime/process_function_library_runtime.h" +#include "tensorflow/core/framework/device.h" +#include "tensorflow/core/framework/device_factory.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/framework/node_properties.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/public/session_options.h" #include "tensorflow/core/public/version.h" @@ -63,7 +70,7 @@ class XlaKernelCreatorTest : public ::testing::Test { auto* device_count = options.config.mutable_device_count(); device_count->insert({"CPU", 1}); std::vector> devices; - TF_CHECK_OK(DeviceFactory::AddDevices( + CHECK_OK(DeviceFactory::AddDevices( options, "/job:localhost/replica:0/task:0", &devices)); FunctionDefLibrary proto; diff --git a/tensorflow/compiler/jit/xla_launch_util.cc b/tensorflow/compiler/jit/xla_launch_util.cc index 8ccb236897ce39..c35a7d0457c6ff 100644 --- a/tensorflow/compiler/jit/xla_launch_util.cc +++ b/tensorflow/compiler/jit/xla_launch_util.cc @@ -66,7 +66,6 @@ limitations under the License. #include "xla/tsl/framework/device_id_utils.h" #include "xla/tsl/framework/serving_device_selector_policies.h" #include "xla/tsl/platform/errors.h" -#include "xla/tsl/platform/status.h" #include "xla/tsl/platform/statusor.h" #include "xla/util.h" #include "tensorflow/core/common_runtime/dma_helper.h" @@ -85,6 +84,7 @@ limitations under the License. #include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/status.h" #include "tensorflow/core/tfrt/common/async_value_tensor.h" +#include "tsl/platform/casts.h" namespace tensorflow { namespace { @@ -323,7 +323,7 @@ absl::Status SetOutputForConstant( } ctx->op_device_context()->CopyCPUTensorToDevice( &const_tensor, device, output_tensor, - [&](absl::Status status) { TF_CHECK_OK(status); }); + [&](absl::Status status) { CHECK_OK(status); }); if (device->device_type() == DEVICE_GPU) { // The GPUDeviceContext enqueues the host->device transfer in a @@ -562,7 +562,7 @@ XlaComputationLaunchContext::BuildXlaCompilerArguments( } absl::flat_hash_map variable_info_lookup; - TF_CHECK_OK(CreateVariableInfoLookup(variable_args, variable_info_lookup)); + CHECK_OK(CreateVariableInfoLookup(variable_args, variable_info_lookup)); for (int64_t input_num = 0; input_num < inputs.size(); ++input_num) { const Tensor* input = inputs[input_num]; XlaCompiler::Argument& arg = out.emplace_back(); diff --git a/tensorflow/compiler/jit/xla_launch_util.h b/tensorflow/compiler/jit/xla_launch_util.h index 2876b3a7b96373..401f15587fcf39 100644 --- a/tensorflow/compiler/jit/xla_launch_util.h +++ b/tensorflow/compiler/jit/xla_launch_util.h @@ -29,6 +29,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/xla_compiler.h" #include "xla/client/local_client.h" #include "xla/pjrt/pjrt_client.h" +#include "xla/service/maybe_owning_device_memory.h" #include "xla/service/shaped_buffer.h" #include "xla/stream_executor/device_memory_allocator.h" #include "tensorflow/core/framework/allocation_description.pb.h" diff --git a/tensorflow/compiler/jit/xla_launch_util_gpu_test.cc b/tensorflow/compiler/jit/xla_launch_util_gpu_test.cc index 563e75c5d61b28..e3f32f8403379a 100644 --- a/tensorflow/compiler/jit/xla_launch_util_gpu_test.cc +++ b/tensorflow/compiler/jit/xla_launch_util_gpu_test.cc @@ -112,7 +112,7 @@ class PjRtExecutionUtilGpuTest : public OpsTestBase { // Create the DeviceCompiler to help with compiling executables. auto pjrt_client_or = GetOrCreatePjRtClient(device_type_); - TF_CHECK_OK(pjrt_client_or.status()); + CHECK_OK(pjrt_client_or.status()); pjrt_client_ = pjrt_client_or.value(); device_compiler_ = new PjRtDeviceCompiler( std::make_unique( diff --git a/tensorflow/compiler/jit/xla_launch_util_test.cc b/tensorflow/compiler/jit/xla_launch_util_test.cc index d8ed5feac79f12..a2eb031da6c38c 100644 --- a/tensorflow/compiler/jit/xla_launch_util_test.cc +++ b/tensorflow/compiler/jit/xla_launch_util_test.cc @@ -21,33 +21,50 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" +#include "absl/log/check.h" +#include "absl/status/statusor.h" +#include "absl/types/span.h" +#include "tensorflow/compiler/jit/device_compilation_profiler.h" #include "tensorflow/compiler/jit/device_compiler.h" +#include "tensorflow/compiler/jit/device_executable_persistor.h" #include "tensorflow/compiler/jit/flags.h" #include "tensorflow/compiler/jit/pjrt_device_compiler_client.h" #include "tensorflow/compiler/jit/variable_info.h" #include "tensorflow/compiler/jit/variable_info_util.h" +#include "tensorflow/compiler/tf2xla/xla_compiler.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" +#include "xla/literal.h" +#include "xla/literal_util.h" #include "xla/pjrt/pjrt_client.h" #include "xla/pjrt/pjrt_common.h" +#include "xla/pjrt/pjrt_executable.h" #include "xla/pjrt/plugin/xla_cpu/cpu_client_options.h" #include "xla/pjrt/plugin/xla_cpu/xla_cpu_pjrt_client.h" #include "xla/tests/literal_test_util.h" #include "xla/tsl/framework/device_id_utils.h" #include "xla/tsl/lib/core/status_test_util.h" +#include "xla/tsl/platform/statusor.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/device.h" +#include "tensorflow/core/framework/device_base.h" +#include "tensorflow/core/framework/device_factory.h" #include "tensorflow/core/framework/fake_input.h" #include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/resource_handle.h" +#include "tensorflow/core/framework/resource_mgr.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/framework/type_index.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/platform/refcount.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/public/session_options.h" #include "tensorflow/core/tfrt/common/create_pjrt_client_util.h" #include "tensorflow/core/tfrt/common/pjrt_util.h" -#include "tsl/platform/status.h" -#include "tsl/platform/statusor.h" namespace tensorflow { namespace { @@ -93,11 +110,11 @@ class PjRtExecutionUtilTest : public OpsTestBase { xla::CpuClientOptions options; options.asynchronous = true; options.cpu_device_count = 1; - TF_CHECK_OK(SetPjRtClientInTFGlobalResourceManager( + CHECK_OK(SetPjRtClientInTFGlobalResourceManager( device_type, xla::GetXlaPjrtCpuClient(options).value())); // device_context_ should be a PjRtDeviceContext. - TF_CHECK_OK(device_->TryGetDeviceContext(&device_context_)); + CHECK_OK(device_->TryGetDeviceContext(&device_context_)); // Get the host allocator. AllocatorAttributes host_alloc_attr; @@ -111,7 +128,7 @@ class PjRtExecutionUtilTest : public OpsTestBase { // Create the DeviceCompiler to help with compiling executables. auto pjrt_client_or = GetOrCreatePjRtClient(device_type_); - TF_CHECK_OK(pjrt_client_or.status()); + CHECK_OK(pjrt_client_or.status()); pjrt_client_ = pjrt_client_or.value(); device_compiler_ = new PjRtDeviceCompiler( std::make_unique( diff --git a/tensorflow/compiler/jit/xla_platform_info_test.cc b/tensorflow/compiler/jit/xla_platform_info_test.cc index 84fd60ef6c7e33..7b45521daf2827 100644 --- a/tensorflow/compiler/jit/xla_platform_info_test.cc +++ b/tensorflow/compiler/jit/xla_platform_info_test.cc @@ -18,17 +18,21 @@ limitations under the License. #include #include +#include "absl/log/check.h" +#include "tensorflow/compiler/jit/device_compilation_profiler.h" +#include "tensorflow/compiler/jit/device_compiler.h" #include "tensorflow/compiler/jit/flags.h" #include "tensorflow/compiler/jit/test_util.h" +#include "tensorflow/compiler/tf2xla/xla_op_registry.h" +#include "xla/client/local_client.h" +#include "xla/pjrt/pjrt_client.h" #include "xla/pjrt/plugin/xla_cpu/cpu_client_options.h" #include "xla/pjrt/plugin/xla_cpu/xla_cpu_pjrt_client.h" +#include "xla/tsl/lib/core/status_test_util.h" +#include "xla/tsl/platform/statusor.h" #include "tensorflow/core/framework/device_base.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/refcount.h" -#include "tensorflow/core/platform/status_matchers.h" -#include "tensorflow/core/platform/statusor.h" #include "tensorflow/core/protobuf/error_codes.pb.h" #include "tensorflow/core/tfrt/common/create_pjrt_client_util.h" #include "tensorflow/core/tfrt/common/pjrt_util.h" @@ -65,7 +69,7 @@ TEST_F(XlaPlatformInfoTest, BuildXlaDeviceCompilerXlaDeviceMetadata) { Device* device = device_setup_.GetDevice(DEVICE_XLA_GPU); const XlaDevice::Metadata* metadata = nullptr; - TF_CHECK_OK(XlaDevice::GetMetadataFromDevice(device, &metadata)); + CHECK_OK(XlaDevice::GetMetadataFromDevice(device, &metadata)); XlaPlatformInfo platform_info = XlaPlatformInfoFromDevice(device); TF_ASSERT_OK_AND_ASSIGN( @@ -91,7 +95,7 @@ TEST_F(XlaPlatformInfoTest, BuildXlaDeviceCompilerXlaDeviceCacheEnabled) { Device* device = device_setup_.GetDevice(DEVICE_XLA_GPU); const XlaDevice::Metadata* metadata = nullptr; - TF_CHECK_OK(XlaDevice::GetMetadataFromDevice(device, &metadata)); + CHECK_OK(XlaDevice::GetMetadataFromDevice(device, &metadata)); XlaPlatformInfo platform_info = XlaPlatformInfoFromDevice(device); TF_ASSERT_OK_AND_ASSIGN( @@ -134,7 +138,7 @@ TEST_F(XlaPlatformInfoTest, GetOrCreatePjRtDeviceCompilerAndProfilerXlaDevice) { Device* device = device_setup_.GetDevice(device_type.type()); const XlaDevice::Metadata* metadata = nullptr; - TF_CHECK_OK(XlaDevice::GetMetadataFromDevice(device, &metadata)); + CHECK_OK(XlaDevice::GetMetadataFromDevice(device, &metadata)); XlaPlatformInfo platform_info = XlaPlatformInfoFromDevice(device); ResourceMgr resource_mgr(""); @@ -254,7 +258,7 @@ TEST_F(XlaPlatformInfoTest, xla::CpuClientOptions options; options.asynchronous = true; options.cpu_device_count = 1; - TF_CHECK_OK(SetPjRtClientInTFGlobalResourceManager( + CHECK_OK(SetPjRtClientInTFGlobalResourceManager( device_type, xla::GetXlaPjrtCpuClient(options).value())); TF_ASSERT_OK_AND_ASSIGN(auto pjrt_client, GetOrCreatePjRtClient(device_type)); diff --git a/tensorflow/compiler/jit/xla_tensor.cc b/tensorflow/compiler/jit/xla_tensor.cc index e9cdad219dd28d..d6792cd7802d96 100644 --- a/tensorflow/compiler/jit/xla_tensor.cc +++ b/tensorflow/compiler/jit/xla_tensor.cc @@ -55,7 +55,7 @@ absl::Status XlaTensor::AllocateShapedBuffer(DataType dtype, xla::ShapeUtil::GetSubshape(on_device_shape, index_to_buffer.first); uint64 size = client->backend().transfer_manager()->GetByteSizeRequirement(subshape); - TF_ASSIGN_OR_RETURN(se::OwningDeviceMemory buffer, + TF_ASSIGN_OR_RETURN(se::ScopedDeviceAddress buffer, client->backend().memory_allocator()->Allocate( device_ordinal, size, /*retry_on_failure=*/false, subshape.layout().memory_space())); diff --git a/tensorflow/compiler/mlir/lite/BUILD b/tensorflow/compiler/mlir/lite/BUILD index ab6c5abeca86f0..b48a8ef6411711 100644 --- a/tensorflow/compiler/mlir/lite/BUILD +++ b/tensorflow/compiler/mlir/lite/BUILD @@ -1699,6 +1699,7 @@ cc_library( "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/log", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", @@ -1714,7 +1715,6 @@ cc_library( "@llvm-project//mlir:Support", "@llvm-project//mlir:TranslateLib", "@local_xla//xla/tsl/platform:errors", - "@local_xla//xla/tsl/platform:status", "@local_xla//xla/tsl/platform:statusor", "@stablehlo//:stablehlo_ops", "@stablehlo//:vhlo_ops", diff --git a/tensorflow/compiler/mlir/lite/debug/debug_test.cc b/tensorflow/compiler/mlir/lite/debug/debug_test.cc index b82d5725182745..a9337c0c84f944 100644 --- a/tensorflow/compiler/mlir/lite/debug/debug_test.cc +++ b/tensorflow/compiler/mlir/lite/debug/debug_test.cc @@ -103,20 +103,21 @@ class InitPassManagerTest : public testing::Test { context_.loadAllAvailableDialects(); mlir::OpBuilder builder(&context_); - module_ = builder.create(builder.getUnknownLoc()); + module_ = mlir::ModuleOp::create(builder, builder.getUnknownLoc()); builder.setInsertionPointToStart(module_->getBody()); - auto func = builder.create( // - builder.getUnknownLoc(), "main", builder.getFunctionType({}, {})); + auto func = mlir::func::FuncOp::create(builder, // + builder.getUnknownLoc(), "main", + builder.getFunctionType({}, {})); func->setAttr("tfl.func", builder.getUnitAttr()); builder.setInsertionPointToStart(func.addEntryBlock()); llvm::SmallVector shape{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - builder.create( - builder.getUnknownLoc(), + mlir::arith::ConstantOp::create( + builder, builder.getUnknownLoc(), mlir::DenseIntElementsAttr::get( mlir::RankedTensorType::get(shape.size(), builder.getI32Type()), shape)); - builder.create(builder.getUnknownLoc()); + mlir::func::ReturnOp::create(builder, builder.getUnknownLoc()); } absl::Status GetDumpDir(std::string* dump_dir) { diff --git a/tensorflow/compiler/mlir/lite/experimental/common/outline_operations.cc b/tensorflow/compiler/mlir/lite/experimental/common/outline_operations.cc index 533a69bdfd9efa..614f9738356019 100644 --- a/tensorflow/compiler/mlir/lite/experimental/common/outline_operations.cc +++ b/tensorflow/compiler/mlir/lite/experimental/common/outline_operations.cc @@ -163,8 +163,8 @@ func::FuncOp BuildFuncOp(const Subgraph& subgraph, OpBuilder& builder, Value cloned_output = values_in_scope.lookup(result); return_operands.push_back(cloned_output); } - function_builder.create(new_func.getLoc(), - return_operands); + mlir::func::ReturnOp::create(function_builder, new_func.getLoc(), + return_operands); ops_added.func_op = new_func; module.push_back(new_func); return new_func; @@ -179,8 +179,8 @@ void ExtractSubgraphToFunc(const Subgraph& subgraph, OpBuilder& builder, Operation* last_output = subgraph.partition_ops_.back(); builder.setInsertionPoint(last_output); - auto call_op = builder.create(last_output->getLoc(), func, - subgraph.FuncArguments()); + auto call_op = func::CallOp::create(builder, last_output->getLoc(), func, + subgraph.FuncArguments()); ops_added.call_op = call_op; // FuncOutputs refer to the original `Values` in input module which are now // invalid after pulling out the defining ops. The values in diff --git a/tensorflow/compiler/mlir/lite/experimental/tac/transforms/device_transform.cc b/tensorflow/compiler/mlir/lite/experimental/tac/transforms/device_transform.cc index 787190318b63ad..c5c8c040c2bb28 100644 --- a/tensorflow/compiler/mlir/lite/experimental/tac/transforms/device_transform.cc +++ b/tensorflow/compiler/mlir/lite/experimental/tac/transforms/device_transform.cc @@ -89,8 +89,8 @@ void ConvertQuantizedOpToFloat(mlir::func::FuncOp func, OpBuilder* builder) { auto dequantized_input_type = mlir::quant::QuantizedType::castToExpressedType(input_type); builder->setInsertionPoint(op); - auto dequantize_op = builder->create( - op->getLoc(), dequantized_input_type, input.get()); + auto dequantize_op = TFL::DequantizeOp::create( + *builder, op->getLoc(), dequantized_input_type, input.get()); dequantized_inputs.push_back(dequantize_op); } else { dequantized_inputs.push_back(input.get()); @@ -126,8 +126,9 @@ void ConvertQuantizedOpToFloat(mlir::func::FuncOp func, OpBuilder* builder) { Value new_result = new_op->getResult(i); if (IsQI8Type(result_type) || IsQUI8Type(result_type)) { builder->setInsertionPoint(op); - TFL::QuantizeOp quant_op = builder->create( - op->getLoc(), result_type, new_result, TypeAttr::get(result_type)); + TFL::QuantizeOp quant_op = + TFL::QuantizeOp::create(*builder, op->getLoc(), result_type, + new_result, TypeAttr::get(result_type)); new_result = quant_op.getResult(); } diff --git a/tensorflow/compiler/mlir/lite/experimental/tac/transforms/device_transform_patterns.cc b/tensorflow/compiler/mlir/lite/experimental/tac/transforms/device_transform_patterns.cc index d701254f333322..e6d7c6425abafe 100644 --- a/tensorflow/compiler/mlir/lite/experimental/tac/transforms/device_transform_patterns.cc +++ b/tensorflow/compiler/mlir/lite/experimental/tac/transforms/device_transform_patterns.cc @@ -85,11 +85,11 @@ TFL::ReshapeOp InsertReshapeOp(Location loc, Value input, Type element_type, auto new_shape_attr = mlir::DenseIntElementsAttr::get(reshape_shape_type, new_shape_array_i32); - auto new_shape = builder->create(loc, new_shape_attr); + auto new_shape = TFL::ConstOp::create(*builder, loc, new_shape_attr); auto reshape_out_type = RankedTensorType::get(new_shape_array, element_type); - return builder->create(loc, reshape_out_type, input, - new_shape); + return TFL::ReshapeOp::create(*builder, loc, reshape_out_type, input, + new_shape); } LogicalResult EnsureBias(Operation* op, int bias_idx, @@ -148,7 +148,7 @@ TF::ConstOp PadConstValues(Operation* input_op, int value_to_pad, auto new_value_i32_attr = mlir::DenseIntElementsAttr::get(value_shape_type, value_i32); - return builder->create(loc, new_value_i32_attr); + return TF::ConstOp::create(*builder, loc, new_value_i32_attr); } SmallVector SliceOutputs(Operation* split_op, Value input, @@ -186,13 +186,13 @@ SmallVector SliceOutputs(Operation* split_op, Value input, mlir::DenseIntElementsAttr::get(slice_type, slice_size); auto slice_begin_const = - rewriter->create(split_op->getLoc(), slice_begin_attr); + TFL::ConstOp::create(*rewriter, split_op->getLoc(), slice_begin_attr); auto slice_size_const = - rewriter->create(split_op->getLoc(), slice_size_attr); + TFL::ConstOp::create(*rewriter, split_op->getLoc(), slice_size_attr); - auto slice_op = rewriter->create( - split_op->getLoc(), current_output_type, input, slice_begin_const, - slice_size_const); + auto slice_op = + TFL::SliceOp::create(*rewriter, split_op->getLoc(), current_output_type, + input, slice_begin_const, slice_size_const); // Rewire output. slice_outputs.push_back(slice_op.getResult()); diff --git a/tensorflow/compiler/mlir/lite/experimental/tac/transforms/pick_subgraphs.cc b/tensorflow/compiler/mlir/lite/experimental/tac/transforms/pick_subgraphs.cc index 58940205edf1ab..300daee0f9a40d 100644 --- a/tensorflow/compiler/mlir/lite/experimental/tac/transforms/pick_subgraphs.cc +++ b/tensorflow/compiler/mlir/lite/experimental/tac/transforms/pick_subgraphs.cc @@ -430,8 +430,8 @@ void PickSubgraphsPass::RewireSubgraphs( if (call.getCallee() != impl.getName()) { // We need to rebuild the call op. :( builder->setInsertionPoint(call); - auto new_call = builder->create(call.getLoc(), impl, - call.getOperands()); + auto new_call = func::CallOp::create(*builder, call.getLoc(), impl, + call.getOperands()); // Set interface_name & target to the call_op as well. new_call->setAttr(kInterfaceNameAttr, diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_export.cc b/tensorflow/compiler/mlir/lite/flatbuffer_export.cc index 41dffc228a6b2c..67eef87eb872ad 100644 --- a/tensorflow/compiler/mlir/lite/flatbuffer_export.cc +++ b/tensorflow/compiler/mlir/lite/flatbuffer_export.cc @@ -269,7 +269,7 @@ static StatusOr GetTFLiteType(Type type, static bool IsConst(Operation* op) { return isa(op); } @@ -632,6 +632,12 @@ class Translator { std::optional> BuildBuffer( Value value, bool can_be_deduplicated, int& index); + // Builds external buffer and external buffer group from the given value. If + // the value is not defined by a constant op with external buffer attributes, + // returns std::nullopt. + std::optional> BuildExternalBuffer( + Value value, uint32_t external_buffer_id); + // Build TFLite tensor from the given type. This function is for tfl.lstm // intermediates, which should have UniformQuantizedType. std::optional> BuildTensorFromType( @@ -647,6 +653,7 @@ class Translator { // corresponding buffer. Emits error and returns std::nullopt on failure. std::optional> BuildTensor( Value value, const std::string& name, unsigned buffer_idx, + unsigned external_buffer_id, const std::optional>& quant_parameters); @@ -858,6 +865,13 @@ class Translator { BufferOffset empty_buffer_; std::vector> buffers_; + + // External buffers + std::vector> external_buffers_; + std::vector> + external_buffer_groups_; + absl::flat_hash_map external_buffer_group_map_; + // Maps subgraph index and tensor name in the graph to the tensor index. absl::flat_hash_map> tensor_index_map_; @@ -986,6 +1000,44 @@ std::string Translator::UniqueName(mlir::Value val) { return std::string(name_mapper_.GetUniqueName(val)); } +std::optional> +Translator::BuildExternalBuffer(mlir::Value value, + uint32_t external_buffer_id) { + if (value.getDefiningOp() == nullptr) { + return std::nullopt; + } + auto inst = mlir::dyn_cast(value.getDefiningOp()); + if (!inst) { + return std::nullopt; + } + auto meta = inst.getExternalBufferAttr(); + if (!meta) { + return std::nullopt; + } + + std::string group_name = meta.getGroupName().str(); + uint64_t offset = meta.getOffset(); + uint64_t length = meta.getLength(); + std::string packing = meta.getPacking().str(); + + uint32_t group_index = 0; + if (auto it = external_buffer_group_map_.find(group_name); + it != external_buffer_group_map_.end()) { + group_index = it->second; + } else { + int index = external_buffer_groups_.size(); + external_buffer_groups_.push_back(tflite::CreateExternalBufferGroup( + builder_, builder_.CreateString(group_name))); + external_buffer_group_map_[group_name] = index; + group_index = index; + } + + auto external_buffer = tflite::CreateExternalBuffer( + builder_, external_buffer_id, group_index, offset, length, + builder_.CreateString(packing)); + return external_buffer; +} + std::optional> Translator::BuildBuffer( mlir::Value value, bool can_be_deduplicated, int& index) { can_be_deduplicated = can_be_deduplicated && !disable_buffer_deduping_; @@ -1241,11 +1293,13 @@ std::optional> Translator::BuildTensorFromType( /*buffer=*/0, builder_.CreateString(name), q_params, /*is_variable=*/false, /*sparsity=*/0, /*shape_signature=*/0, /*has_rank=*/tensor_type.hasRank(), - variant_params->empty() ? 0 : builder_.CreateVector(*variant_params)); + variant_params->empty() ? 0 : builder_.CreateVector(*variant_params), + /*external_buffer=*/0); } std::optional> Translator::BuildTensor( Value value, const std::string& name, unsigned buffer_idx, + unsigned external_buffer_id, const std::optional>& quant_parameters) { auto type = mlir::cast(value.getType()); @@ -1371,7 +1425,8 @@ std::optional> Translator::BuildTensor( (is_variable ? 0 : buffer_idx), builder_.CreateString(name), q_params, /*is_variable=*/is_variable, s_params, /*shape_signature=*/0, /*has_rank=*/has_rank, - variant_params->empty() ? 0 : builder_.CreateVector(*variant_params)); + variant_params->empty() ? 0 : builder_.CreateVector(*variant_params), + external_buffer_id); } else { return tflite::CreateTensor( builder_, builder_.CreateVector(shape), tflite_element_type, @@ -1379,7 +1434,8 @@ std::optional> Translator::BuildTensor( /*is_variable=*/is_variable, s_params, /*shape_signature=*/builder_.CreateVector(shape_signature), /*has_rank=*/has_rank, - variant_params->empty() ? 0 : builder_.CreateVector(*variant_params)); + variant_params->empty() ? 0 : builder_.CreateVector(*variant_params), + external_buffer_id); } } @@ -3292,27 +3348,41 @@ std::optional> Translator::BuildSubGraph( } } + // External buffer id is enforced to have MSB set to 1 to distinguish from + // buffer index/id, with the assumption that the number of external buffers + // are less than 2^31. + uint32_t external_buffer_id = + (1 << 31) | static_cast(external_buffers_.size()); int buffer_index = buffers_.size(); - // If a constant is returned as subgraph's output, this constant cannot be - // deduplicated. - const bool not_returned_by_subgraph = llvm::none_of( - value.getUsers(), - [](Operation* user) { return llvm::isa(user); }); + // TODO(ashwinm): Check if for stateful tensors, if it is also needed to // make the Buffer empty apart from setting the buffer_idx=0 in the // Tensor. This does not seem to affect runtime behavior for RNN/LSTM, // but would be good for reducing memory footprint. - if (value.getDefiningOp()) { + if (auto external_buffer_or = + BuildExternalBuffer(value, external_buffer_id); + external_buffer_or.has_value()) { + buffer_index = 0; + external_buffers_.push_back(*external_buffer_or); + } else if (value.getDefiningOp()) { + // If a constant is returned as subgraph's output, this constant cannot be + // deduplicated. + const bool not_returned_by_subgraph = + llvm::none_of(value.getUsers(), [](Operation* user) { + return llvm::isa(user); + }); auto buffer_or = BuildBuffer(value, not_returned_by_subgraph, buffer_index); if (!buffer_or) return false; + external_buffer_id = 0; buffers_.push_back(*buffer_or); } else { + external_buffer_id = 0; buffers_.push_back(empty_buffer_); } - auto tensor_or = - BuildTensor(value, tensor_name, buffer_index, quant_parameters); + auto tensor_or = BuildTensor(value, tensor_name, buffer_index, + external_buffer_id, quant_parameters); if (!tensor_or) return false; tensors.push_back(*tensor_or); @@ -4192,11 +4262,15 @@ std::optional Translator::TranslateInternal() { } auto signature_defs = CreateSignatureDefs(signature_defs_vec); - auto model = tflite::CreateModel(builder_, TFLITE_SCHEMA_VERSION, - builder_.CreateVector(opcodes_), - builder_.CreateVector(subgraphs_), - description, builder_.CreateVector(buffers_), - metadata_buffer, *metadata, *signature_defs); + bool has_external_buffers = !external_buffers_.empty(); + auto model = tflite::CreateModel( + builder_, TFLITE_SCHEMA_VERSION, builder_.CreateVector(opcodes_), + builder_.CreateVector(subgraphs_), description, + builder_.CreateVector(buffers_), metadata_buffer, *metadata, + *signature_defs, + has_external_buffers ? builder_.CreateVector(external_buffer_groups_) : 0, + has_external_buffers ? builder_.CreateVector(external_buffers_) : 0); + tflite::FinishModelBuffer(builder_, model); // There is a limit of 2GB for a flatbuffer. bool flatbuffer_limit_exceeded = builder_.GetSize() > flatbuffer_size_max; diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_import.cc b/tensorflow/compiler/mlir/lite/flatbuffer_import.cc index 4bd8ae5ce0dbb3..ab7d782dba8d33 100644 --- a/tensorflow/compiler/mlir/lite/flatbuffer_import.cc +++ b/tensorflow/compiler/mlir/lite/flatbuffer_import.cc @@ -29,6 +29,7 @@ limitations under the License. #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" +#include "absl/log/check.h" #include "absl/log/log.h" #include "absl/status/status.h" #include "absl/status/statusor.h" @@ -101,7 +102,6 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/utils/dynamic_shape_utils.h" #include "tensorflow/compiler/mlir/tensorflow/utils/mangling_util.h" #include "xla/tsl/platform/errors.h" -#include "xla/tsl/platform/status.h" #include "xla/tsl/platform/statusor.h" #include "tensorflow/core/framework/tensor.pb.h" #include "tensorflow/core/framework/tensor_shape.pb.h" @@ -457,9 +457,9 @@ std::string GetMlirOpName(const tflite::OperatorT& op, return mlir::GetMlirOpNameFromOpCode(op_code); } -StatusOr BuildExternalConstOp(const tflite::TensorT& tensor, - int32_t buffer_index, - OpBuilder builder, Location loc) { +StatusOr BuildExternalConstOpWithBufferIndex( + const tflite::TensorT& tensor, int32_t buffer_index, OpBuilder builder, + Location loc) { TF_ASSIGN_OR_RETURN(mlir::TensorType type, tfl::GetTensorType(tensor, builder, /*is_constant=*/true)); @@ -468,7 +468,45 @@ StatusOr BuildExternalConstOp(const tflite::TensorT& tensor, return errors::Internal("Constant doesn't have a shape"); } auto op = builder.create( - loc, shaped_type, builder.getI32IntegerAttr(buffer_index)); + loc, shaped_type, + /*buffer_index=*/builder.getI32IntegerAttr(buffer_index), + /*external_buffer=*/nullptr); + return op.getOperation(); +} + +StatusOr BuildExternalConstOpWithExternalBuffer( + const tflite::ModelT& model, const tflite::TensorT& tensor, + OpBuilder builder, Location loc) { + TF_ASSIGN_OR_RETURN(mlir::TensorType type, + tfl::GetTensorType(tensor, builder, + /*is_constant=*/true)); + auto shaped_type = llvm::dyn_cast(type); + if (!shaped_type) { + return errors::Internal("Constant doesn't have a shape"); + } + + tflite::ExternalBufferT* external_buffer = nullptr; + for (const auto& extbuf : model.external_buffers) { + if (extbuf->id == tensor.external_buffer) { + external_buffer = extbuf.get(); + break; + } + } + if (external_buffer == nullptr) { + return errors::Internal("External buffer not found"); + } + + std::string group_name = + model.external_buffer_groups[external_buffer->group]->name; + auto op = builder.create( + loc, shaped_type, /*buffer_index=*/nullptr, + /*external_buffer=*/ + tfl::ExternalBufferAttr::get( + builder.getContext(), + /*group_name=*/builder.getStringAttr(group_name), + /*offset=*/external_buffer->offset, + /*length=*/external_buffer->length, + /*packing=*/builder.getStringAttr(external_buffer->packing))); return op.getOperation(); } @@ -936,8 +974,8 @@ StatusOr ConvertOp( if (op_name == "tfl.lstm") { // TODO(b/147587779): add the right region if region is empty. op_state.addRegion(); - TF_CHECK_OK(AddOpIntermediatesForLstm(op, intermediate_types, op_state, loc, - builder)); + CHECK_OK(AddOpIntermediatesForLstm(op, intermediate_types, op_state, loc, + builder)); } if (op_name == "tfl.while") { // Adds two empty regions for "tfl.while". We will fill the regions after @@ -948,8 +986,8 @@ StatusOr ConvertOp( op_state.addRegion(); } if (op_name == "tfl.unidirectional_sequence_lstm") { - TF_CHECK_OK(AddOpIntermediatesForLstm(op, intermediate_types, op_state, loc, - builder)); + CHECK_OK(AddOpIntermediatesForLstm(op, intermediate_types, op_state, loc, + builder)); } if (op_name == "tfl.reshape") { // Flattens reshape ops when more than one dimension shape operand is given. @@ -1347,7 +1385,8 @@ mlir::ResultRange MaybeWrapInControlNode(mlir::Operation* op, // ordered_output_arrays in the same order. If signature is not null, then the // inputs/outputs in signature will be attached to the FuncOp. StatusOr ConvertSubgraph( - const tflite::SubGraphT& subgraph, llvm::StringRef name, + const tflite::ModelT& model, const tflite::SubGraphT& subgraph, + llvm::StringRef name, const std::vector>& op_codes, const std::vector& func_names, const std::vector>& buffers, @@ -1511,22 +1550,30 @@ StatusOr ConvertSubgraph( StatusOr op_or_err; std::vector buffer; // Check if constant tensor is stored outside of the flatbuffers. - if (IsValidBufferOffset(buffers[const_tensor.buffer]->offset)) { - const uint8_t* file_begin_ptr = - reinterpret_cast(model_ptr->allocation()->base()); - buffer = std::vector( - file_begin_ptr + buffers[const_tensor.buffer]->offset, - file_begin_ptr + buffers[const_tensor.buffer]->offset + - buffers[const_tensor.buffer]->size); + if (const_tensor.external_buffer != 0) { + op_or_err = BuildExternalConstOpWithExternalBuffer( + model, const_tensor, op_builder, const_loc); } else { - buffer = buffers[const_tensor.buffer]->data; + if (IsValidBufferOffset(buffers[const_tensor.buffer]->offset)) { + const uint8_t* file_begin_ptr = reinterpret_cast( + model_ptr->allocation()->base()); + + buffer = std::vector( + file_begin_ptr + buffers[const_tensor.buffer]->offset, + file_begin_ptr + buffers[const_tensor.buffer]->offset + + buffers[const_tensor.buffer]->size); + } else { + buffer = buffers[const_tensor.buffer]->data; + } + op_or_err = + use_external_constant + ? BuildExternalConstOpWithBufferIndex(const_tensor, + const_tensor.buffer, + op_builder, const_loc) + : BuildConstOp(const_tensor, buffer, const_tensor.is_variable, + op_builder, const_loc, use_stablehlo_constant); } - op_or_err = - use_external_constant - ? BuildExternalConstOp(const_tensor, const_tensor.buffer, - op_builder, const_loc) - : BuildConstOp(const_tensor, buffer, const_tensor.is_variable, - op_builder, const_loc, use_stablehlo_constant); + if (!op_or_err.ok()) { return emitError(const_loc, op_or_err.status().ToString()), op_or_err.status(); @@ -1584,23 +1631,29 @@ StatusOr ConvertSubgraph( StatusOr op_or_err; std::vector buffer; // Check if constant tensor is stored outside of the flatbuffers. - if (IsValidBufferOffset(buffers[const_tensor.buffer]->offset)) { - const uint8_t* file_begin_ptr = - reinterpret_cast(model_ptr->allocation()->base()); - - buffer = std::vector( - file_begin_ptr + buffers[const_tensor.buffer]->offset, - file_begin_ptr + buffers[const_tensor.buffer]->offset + - buffers[const_tensor.buffer]->size); + if (const_tensor.external_buffer != 0) { + op_or_err = BuildExternalConstOpWithExternalBuffer( + model, const_tensor, op_builder, const_loc); } else { - buffer = buffers[const_tensor.buffer]->data; + if (IsValidBufferOffset(buffers[const_tensor.buffer]->offset)) { + const uint8_t* file_begin_ptr = + reinterpret_cast(model_ptr->allocation()->base()); + + buffer = std::vector( + file_begin_ptr + buffers[const_tensor.buffer]->offset, + file_begin_ptr + buffers[const_tensor.buffer]->offset + + buffers[const_tensor.buffer]->size); + } else { + buffer = buffers[const_tensor.buffer]->data; + } + op_or_err = + use_external_constant + ? BuildExternalConstOpWithBufferIndex( + const_tensor, const_tensor.buffer, op_builder, const_loc) + : BuildConstOp(const_tensor, buffer, const_tensor.is_variable, + op_builder, const_loc, use_stablehlo_constant); } - op_or_err = - use_external_constant - ? BuildExternalConstOp(const_tensor, const_tensor.buffer, - op_builder, const_loc) - : BuildConstOp(const_tensor, buffer, const_tensor.is_variable, - op_builder, const_loc, use_stablehlo_constant); + if (!op_or_err.ok()) { return emitError(const_loc, op_or_err.status().ToString()), op_or_err.status(); @@ -1862,8 +1915,8 @@ OwningOpRef tflite::FlatBufferToMlir( SubgraphName(set_implicit_main_func, e.index(), *subgraph); uint32_t subgraph_index = static_cast(e.index()); auto func_or_error = ConvertSubgraph( - *subgraph, name, model->operator_codes, func_names, model->buffers, - base_loc, builder, + *model, *subgraph, name, model->operator_codes, func_names, + model->buffers, base_loc, builder, /*is_entry_point=*/ set_implicit_main_func ? e.index() == 0 diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td b/tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td index 57e4ec22976df3..6fa287a8c8b013 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td +++ b/tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td @@ -166,4 +166,16 @@ def TFL_ConstBytesAttr : AttrDef { let hasCustomAssemblyFormat = 1; } +def TFL_ExternalBufferAttr : AttrDef { + let mnemonic = "external_buffer"; + let parameters = (ins + "::mlir::StringAttr":$group_name, + "uint64_t":$offset, + "uint64_t":$length, + "::mlir::StringAttr":$packing + ); + let summary = "Flatbuffer external buffer metadata."; + let assemblyFormat = "`<` struct(params) `>`"; +} + #endif // TFL_OP_ENUMS diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td index c90859cd6accfe..4c7e784d5069fd 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td @@ -951,11 +951,15 @@ def TFL_ExternalConstOp : Op:$buffer_index, + OptionalAttr:$external_buffer + ); let results = (outs AnyTensor:$output); } diff --git a/tensorflow/compiler/mlir/lite/quantization/common/quantization_lib/quantization_utils.h b/tensorflow/compiler/mlir/lite/quantization/common/quantization_lib/quantization_utils.h index 66d307dd2fbd86..1da38c2c9f466e 100644 --- a/tensorflow/compiler/mlir/lite/quantization/common/quantization_lib/quantization_utils.h +++ b/tensorflow/compiler/mlir/lite/quantization/common/quantization_lib/quantization_utils.h @@ -346,10 +346,10 @@ void CreateVerifier(mlir::Operation* quantizing_op, BoolAttr log = rewriter.getBoolAttr(quant_params.numeric_verify_spec.log_if_failed_flag); // Verify the quantized value by sending the result to the verifier. - rewriter.create( - quantizing_op->getLoc(), quantized_op->getResult(result_idx).getType(), - quantized_op->getResult(result_idx), quantizing_op->getResult(result_idx), - tolerance, log); + VerifierT::create(rewriter, quantizing_op->getLoc(), + quantized_op->getResult(result_idx).getType(), + quantized_op->getResult(result_idx), + quantizing_op->getResult(result_idx), tolerance, log); } template <> @@ -645,8 +645,8 @@ class QuantizationPattern : public RewritePattern { if (!matchPattern(q.getOperand(), m_Constant(&attr))) { continue; } - auto cst = rewriter.create( - quantized_op->getLoc(), attr); + auto cst = arith::ConstantOp::create(rewriter, + quantized_op->getLoc(), attr); quantizing_op->setOperand(i, cst.getResult()); } } diff --git a/tensorflow/compiler/mlir/lite/quantization/lite/quantize_weights_test.cc b/tensorflow/compiler/mlir/lite/quantization/lite/quantize_weights_test.cc index 1e1f79af16cbd6..b131a5f0e1060b 100644 --- a/tensorflow/compiler/mlir/lite/quantization/lite/quantize_weights_test.cc +++ b/tensorflow/compiler/mlir/lite/quantization/lite/quantize_weights_test.cc @@ -43,7 +43,7 @@ limitations under the License. // Note: branched from tensorflow/lite/tools/optimize/quantize_weights_test.cc namespace { -tensorflow::string* g_test_model_dir = nullptr; +std::string* g_test_model_dir = nullptr; } // namespace namespace tflite { @@ -766,7 +766,7 @@ TEST_F(QuantizeWeightsTest, DequantizeConvBlocklisted) { } // namespace tflite int main(int argc, char** argv) { - tensorflow::string model_file; + std::string model_file; const std::vector flag_list = { tensorflow::Flag("test_model_file", &model_file, "Path to test tflite model file."), @@ -777,8 +777,7 @@ int main(int argc, char** argv) { std::cerr << "Required test_model_file\n"; std::abort(); } - g_test_model_dir = - new tensorflow::string(tensorflow::io::Dirname(model_file)); + g_test_model_dir = new std::string(tensorflow::io::Dirname(model_file)); ::tensorflow::port::InitMain(argv[0], &argc, &argv); return RUN_ALL_TESTS(); } diff --git a/tensorflow/compiler/mlir/lite/quantization/tensorflow/tf_to_quant.cc b/tensorflow/compiler/mlir/lite/quantization/tensorflow/tf_to_quant.cc index 6c43167a78cbae..529b5d2161be32 100644 --- a/tensorflow/compiler/mlir/lite/quantization/tensorflow/tf_to_quant.cc +++ b/tensorflow/compiler/mlir/lite/quantization/tensorflow/tf_to_quant.cc @@ -150,10 +150,10 @@ struct InsertQuantOpsAfterTFFakeQuantOp // dequantize ops, and insert them between the tf.FakeQuantWithMinMaxVarsOp // and its users. Value value = tf_op.getOutputs(); - auto quantize = rewriter.create( - tf_op.getLoc(), qtype.getValue(), value); - auto dequantize = rewriter.create( - tf_op.getLoc(), res_type, quantize.getResult()); + auto quantize = quantfork::QuantizeCastOp::create(rewriter, tf_op.getLoc(), + qtype.getValue(), value); + auto dequantize = quantfork::DequantizeCastOp::create( + rewriter, tf_op.getLoc(), res_type, quantize.getResult()); value.replaceAllUsesWith(dequantize); quantize.getOperation()->replaceUsesOfWith(dequantize, value); diff --git a/tensorflow/compiler/mlir/lite/schema/BUILD b/tensorflow/compiler/mlir/lite/schema/BUILD index 0c0381439d2b5e..649e198336c911 100644 --- a/tensorflow/compiler/mlir/lite/schema/BUILD +++ b/tensorflow/compiler/mlir/lite/schema/BUILD @@ -1,5 +1,10 @@ load("@flatbuffers//:build_defs.bzl", "flatbuffer_cc_library") load("@rules_cc//cc:cc_library.bzl", "cc_library") + +# copybara:uncomment_begin(google-only) +# load("@flatbuffers//:flatbuffers.bzl", "flatbuffers_library", "ts_flatbuffers_library") +# copybara:uncomment_end + load("//tensorflow:tensorflow.bzl", "tf_cc_test") load("//tensorflow:tensorflow.default.bzl", "get_compatible_with_portable") @@ -131,3 +136,15 @@ tf_cc_test( "@flatbuffers//:flatc_library", ], ) + +# copybara:uncomment_begin(google-only) +# flatbuffers_library( +# name = "schema_fbslib", +# srcs = ["schema.fbs"], +# ) +# +# ts_flatbuffers_library( +# name = "schema_ts_fbs", +# deps = [":schema_fbslib"], +# ) +# copybara:uncomment_end diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/compose_uniform_quantized_type_pass.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/compose_uniform_quantized_type_pass.cc index 4107859b7412af..0dd7e1f3b97a1c 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/compose_uniform_quantized_type_pass.cc +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/compose_uniform_quantized_type_pass.cc @@ -677,13 +677,12 @@ class ComposeUniformQuantizedConvolutionOp CreateI8F32UniformQuantizedType( uniform_quantize_call_op.getLoc(), *rewriter.getContext(), input_scale_value, input_zero_point_value); - auto input_uniform_quantize_op = - rewriter.create( - uniform_quantize_call_op.getLoc(), - /*result=*/ - mlir::cast(input_value.getType()) - .clone(input_quantized_element_type), - /*operand=*/input_value); + auto input_uniform_quantize_op = stablehlo::UniformQuantizeOp::create( + rewriter, uniform_quantize_call_op.getLoc(), + /*result=*/ + mlir::cast(input_value.getType()) + .clone(input_quantized_element_type), + /*operand=*/input_value); rewriter.replaceAllUsesWith(input_i8_to_f32_convert_op.getResult(), input_uniform_quantize_op.getResult()); @@ -754,8 +753,8 @@ class ComposeUniformQuantizedConvolutionOp /*quantization_dimension=*/3); // Create a new constant op for the filter in i8. - auto quantized_filter_constant_op = rewriter.create( - filter_op->getLoc(), + auto quantized_filter_constant_op = stablehlo::ConstantOp::create( + rewriter, filter_op->getLoc(), /*output=*/ filter_i8_value_attr.getType().clone(filter_quantized_element_type), /*value=*/filter_i8_value_attr); @@ -797,18 +796,16 @@ class ComposeUniformQuantizedConvolutionOp SmallVector new_conv_output_types = { output_uniform_quantized_tensor_type}; - auto new_conv_op_with_output_type = - rewriter.create( - op.getLoc(), new_conv_output_types, op.getOperands(), - op->getAttrs()); + auto new_conv_op_with_output_type = stablehlo::ConvolutionOp::create( + rewriter, op.getLoc(), new_conv_output_types, op.getOperands(), + op->getAttrs()); rewriter.replaceAllUsesWith(op.getResult(), new_conv_op_with_output_type.getResult()); - auto new_output_dequant_op = - rewriter.create( - rewriter.getUnknownLoc(), - /*operand=*/new_conv_op_with_output_type); + auto new_output_dequant_op = stablehlo::UniformDequantizeOp::create( + rewriter, rewriter.getUnknownLoc(), + /*operand=*/new_conv_op_with_output_type); auto output_uniform_dequantize_call_op = cast( *output_uniform_quantize_call_op.getResult(0).user_begin()); @@ -1035,13 +1032,12 @@ class ComposeUniformQuantizedDotGeneralOp input_scale_value, input_zero_point_value); Value input_value = input_uniform_quantize_call_pattern->GetInputValue(); - auto input_uniform_quantize_op = - rewriter.create( - input_i8_to_f32_convert_op.getLoc(), - /*result=*/ - mlir::cast(input_value.getType()) - .clone(input_uniform_quantized_type), - /*operand=*/input_value); + auto input_uniform_quantize_op = stablehlo::UniformQuantizeOp::create( + rewriter, input_i8_to_f32_convert_op.getLoc(), + /*result=*/ + mlir::cast(input_value.getType()) + .clone(input_uniform_quantized_type), + /*operand=*/input_value); rewriter.replaceAllUsesWith(input_i8_to_f32_convert_op.getResult(), input_uniform_quantize_op.getResult()); @@ -1116,8 +1112,8 @@ class ComposeUniformQuantizedDotGeneralOp quantization_dimension); // Create a new constant op for the filter in i8. - auto quantized_filter_constant_op = rewriter.create( - filter_constant_op.getLoc(), + auto quantized_filter_constant_op = stablehlo::ConstantOp::create( + rewriter, filter_constant_op.getLoc(), /*output=*/ mlir::cast(filter_constant_op.getResult().getType()) .clone(filter_uniform_quantized_type), @@ -1157,8 +1153,8 @@ class ComposeUniformQuantizedDotGeneralOp output_uniform_quantize_call_op.getLoc(), *rewriter.getContext(), output_scale_value, output_zero_point_value); - auto new_dot_general_op = rewriter.create( - op.getLoc(), /*resultType0=*/ + auto new_dot_general_op = stablehlo::DotGeneralOp::create( + rewriter, op.getLoc(), /*resultType0=*/ mlir::cast(op.getResult().getType()) .clone(output_uniform_quantized_type), /*lhs=*/op.getLhs(), /*rhs=*/op.getRhs(), @@ -1168,10 +1164,9 @@ class ComposeUniformQuantizedDotGeneralOp rewriter.replaceAllUsesWith(op.getResult(), new_dot_general_op.getResult()); - auto new_output_dequant_op = - rewriter.create( - output_uniform_dequantize_call_op.getLoc(), - /*operand=*/new_dot_general_op); + auto new_output_dequant_op = stablehlo::UniformDequantizeOp::create( + rewriter, output_uniform_dequantize_call_op.getLoc(), + /*operand=*/new_dot_general_op); rewriter.replaceAllUsesWith(output_uniform_dequantize_call_op.getResult(0), new_output_dequant_op.getResult()); @@ -1423,13 +1418,12 @@ class ComposeUniformQuantizedDotGeneralOpWithTwoQuantizedActivations input1_scale_value, input1_zero_point_value); Value input1_value = input1_uniform_quantize_call_pattern->GetInputValue(); - auto input1_uniform_quantize_op = - rewriter.create( - input1_uniform_quantize_call_op.getLoc(), - /*result=*/ - mlir::cast(input1_value.getType()) - .clone(input1_uniform_quantized_type), - /*operand=*/input1_value); + auto input1_uniform_quantize_op = stablehlo::UniformQuantizeOp::create( + rewriter, input1_uniform_quantize_call_op.getLoc(), + /*result=*/ + mlir::cast(input1_value.getType()) + .clone(input1_uniform_quantized_type), + /*operand=*/input1_value); rewriter.replaceAllUsesWith(input1_zero_point_subtract_op.getResult(), input1_uniform_quantize_op.getResult()); @@ -1462,13 +1456,12 @@ class ComposeUniformQuantizedDotGeneralOpWithTwoQuantizedActivations input2_scale_value, input2_zero_point_value); Value input2_value = input2_uniform_quantize_call_pattern->GetInputValue(); - auto input2_uniform_quantize_op = - rewriter.create( - input2_uniform_quantize_call_op.getLoc(), - /*result=*/ - mlir::cast(input2_value.getType()) - .clone(input2_uniform_quantized_type), - /*operand=*/input2_value); + auto input2_uniform_quantize_op = stablehlo::UniformQuantizeOp::create( + rewriter, input2_uniform_quantize_call_op.getLoc(), + /*result=*/ + mlir::cast(input2_value.getType()) + .clone(input2_uniform_quantized_type), + /*operand=*/input2_value); rewriter.replaceAllUsesWith(input2_zero_point_subtract_op.getResult(), input2_uniform_quantize_op.getResult()); @@ -1512,8 +1505,8 @@ class ComposeUniformQuantizedDotGeneralOpWithTwoQuantizedActivations output_uniform_quantize_call_op.getLoc(), *rewriter.getContext(), output_scale_value, output_zero_point_value); - auto new_dot_general_op = rewriter.create( - op.getLoc(), /*resultType0=*/ + auto new_dot_general_op = stablehlo::DotGeneralOp::create( + rewriter, op.getLoc(), /*resultType0=*/ mlir::cast(op.getResult().getType()) .clone(output_uniform_quantized_type), /*lhs=*/op.getLhs(), /*rhs=*/op.getRhs(), @@ -1523,10 +1516,9 @@ class ComposeUniformQuantizedDotGeneralOpWithTwoQuantizedActivations rewriter.replaceAllUsesWith(op.getResult(), new_dot_general_op.getResult()); - auto new_output_dequant_op = - rewriter.create( - output_uniform_dequantize_call_op.getLoc(), - /*operand=*/new_dot_general_op); + auto new_output_dequant_op = stablehlo::UniformDequantizeOp::create( + rewriter, output_uniform_dequantize_call_op.getLoc(), + /*operand=*/new_dot_general_op); rewriter.replaceAllUsesWith(output_uniform_dequantize_call_op.getResult(0), new_output_dequant_op.getResult()); diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo.cc index 7608ff985f1eb9..0d8688b2c8855a 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo.cc +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo.cc @@ -328,22 +328,22 @@ class ConvertNdConvOp : public OpConversionPattern { size.push_back(input_shape[i] - pre_slice - post_slice); } - auto start_attr = rewriter.create( - value.getLoc(), + auto start_attr = TF::ConstOp::create( + rewriter, value.getLoc(), DenseIntElementsAttr::get( RankedTensorType::get({static_cast(start.size())}, rewriter.getI64Type()), start)); - auto size_attr = rewriter.create( - value.getLoc(), + auto size_attr = TF::ConstOp::create( + rewriter, value.getLoc(), DenseIntElementsAttr::get( RankedTensorType::get({static_cast(size.size())}, rewriter.getI64Type()), size)); auto output_type = RankedTensorType::get(size, input_type.getElementType()); - return rewriter.create(value.getLoc(), output_type, value, - start_attr, size_attr); + return TF::SliceOp::create(rewriter, value.getLoc(), output_type, value, + start_attr, size_attr); } void CreateConvOp(mhlo::ConvolutionOp conv_op, ArrayRef strides, @@ -381,14 +381,15 @@ class ConvertNdConvOp : public OpConversionPattern { mlir::dyn_cast(conv_op.getLhs().getType()); RankedTensorType padding_attr_type = mlir::RankedTensorType::get( {lhs_type.getRank(), 2}, rewriter.getIntegerType(64)); - auto padding_const = rewriter.create( - conv_op->getLoc(), + auto padding_const = TF::ConstOp::create( + rewriter, conv_op->getLoc(), mlir::DenseElementsAttr::get(padding_attr_type, ArrayRef(new_padding))); // Add Pad op. auto pad_output_type = UnrankedTensorType::get(lhs_type.getElementType()); - sliced_lhs = rewriter.create( - conv_op->getLoc(), pad_output_type, sliced_lhs, padding_const); + sliced_lhs = + TF::PadOp::create(rewriter, conv_op->getLoc(), pad_output_type, + sliced_lhs, padding_const); padding = "VALID"; } @@ -422,28 +423,28 @@ class ConvertNdConvOp : public OpConversionPattern { hlo_filter_shape.end()); tf_filter_shape[2] = input_channels; tf_filter_shape[3] = hlo_filter_shape.back() / input_channels; - auto reshaped_filter = rewriter.create( - rhs.getLoc(), + auto reshaped_filter = mhlo::ReshapeOp::create( + rewriter, rhs.getLoc(), RankedTensorType::get(tf_filter_shape, filter_type.getElementType()), rhs); - output = rewriter.create( - conv_op.getLoc(), conv_output_type, sliced_lhs, reshaped_filter, - rewriter.getI64ArrayAttr(strides), + output = TF::DepthwiseConv2dNativeOp::create( + rewriter, conv_op.getLoc(), conv_output_type, sliced_lhs, + reshaped_filter, rewriter.getI64ArrayAttr(strides), /*padding=*/rewriter.getStringAttr(padding), /*explicit_paddings=*/rewriter.getI64ArrayAttr(new_padding), /*data_format=*/rewriter.getStringAttr("NHWC"), /*dilations=*/rewriter.getI64ArrayAttr(dilation)); } else if (num_spatial_dims == 3) { - output = rewriter.create( - conv_op.getLoc(), conv_output_type, sliced_lhs, rhs, + output = TF::Conv3DOp::create( + rewriter, conv_op.getLoc(), conv_output_type, sliced_lhs, rhs, rewriter.getI64ArrayAttr(strides), /*padding=*/rewriter.getStringAttr(padding), /*data_format=*/rewriter.getStringAttr("NDHWC"), /*dilations=*/rewriter.getI64ArrayAttr(dilation)); } else { - output = rewriter.create( - conv_op.getLoc(), conv_output_type, sliced_lhs, rhs, + output = TF::Conv2DOp::create( + rewriter, conv_op.getLoc(), conv_output_type, sliced_lhs, rhs, rewriter.getI64ArrayAttr(strides), /*use_cudnn_on_gpu=*/rewriter.getBoolAttr(true), /*padding=*/rewriter.getStringAttr(padding), @@ -462,8 +463,8 @@ class ConvertNdConvOp : public OpConversionPattern { dnums.getOutputFeatureDimension(), *dnums.getOutputSpatialDimensions().begin(), num_spatial_dims, conv_output_type, rewriter); - output = rewriter.create( - conv_op.getLoc(), conv_op.getType(), output, permutation); + output = mhlo::TransposeOp::create( + rewriter, conv_op.getLoc(), conv_op.getType(), output, permutation); } rewriter.replaceOp(conv_op, {output}); } @@ -513,8 +514,8 @@ class Convert1DConvOp : public OpConversionPattern { auto image_2d_type = RankedTensorType::get(image_2d_shape, image_type.getElementType()); auto loc = conv_op.getLoc(); - auto image_2d_op = rewriter.create( - conv_op.getLoc(), image_2d_type, conv_op.getLhs()); + auto image_2d_op = mhlo::ReshapeOp::create(rewriter, conv_op.getLoc(), + image_2d_type, conv_op.getLhs()); // Transpose image to get it into NWHC form (where H is the added dim). SmallVector image_permutation = { @@ -523,9 +524,9 @@ class Convert1DConvOp : public OpConversionPattern { dnums.getInputFeatureDimension()}; auto image_permutation_and_shape = GetPermutationAndTransposedShape( image_permutation, image_2d_type, rewriter); - auto transposed_image_2d_op = rewriter.create( - loc, image_permutation_and_shape.shape, image_2d_op->getResult(0), - image_permutation_and_shape.permutation); + auto transposed_image_2d_op = mhlo::TransposeOp::create( + rewriter, loc, image_permutation_and_shape.shape, + image_2d_op->getResult(0), image_permutation_and_shape.permutation); // Reshape kernel to add a new spatial dimension. auto kernel_type = mlir::cast(conv_op.getRhs().getType()); @@ -536,8 +537,8 @@ class Convert1DConvOp : public OpConversionPattern { kernel_2d_shape.push_back(1); auto kernel_2d_type = RankedTensorType::get(kernel_2d_shape, kernel_type.getElementType()); - auto kernel_2d_op = - rewriter.create(loc, kernel_2d_type, conv_op.getRhs()); + auto kernel_2d_op = mhlo::ReshapeOp::create(rewriter, loc, kernel_2d_type, + conv_op.getRhs()); // Transpose kernel to get it into WHIO form (where H is the added dim). SmallVector kernel_permutation = { @@ -547,9 +548,9 @@ class Convert1DConvOp : public OpConversionPattern { dnums.getKernelOutputFeatureDimension()}; auto kernel_permutation_and_shape = GetPermutationAndTransposedShape( kernel_permutation, kernel_2d_type, rewriter); - auto transposed_kernel_2d_op = rewriter.create( - loc, kernel_permutation_and_shape.shape, kernel_2d_op->getResult(0), - kernel_permutation_and_shape.permutation); + auto transposed_kernel_2d_op = mhlo::TransposeOp::create( + rewriter, loc, kernel_permutation_and_shape.shape, + kernel_2d_op->getResult(0), kernel_permutation_and_shape.permutation); // // Create 2d equivalents for 1d convolution attributes. @@ -638,12 +639,12 @@ class Convert1DConvOp : public OpConversionPattern { rewriter) .shape; - auto conv2d_op = rewriter.create( - loc, transposed_output_2d_shape, transposed_image_2d_op.getResult(), - transposed_kernel_2d_op.getResult(), window_strides_2d, padding_2d, - lhs_dilation_2d, rhs_dilation_2d, window_reversal_2d, dnums_2d, - conv_op.getFeatureGroupCount(), conv_op.getBatchGroupCount(), - conv_op.getPrecisionConfigAttr()); + auto conv2d_op = mhlo::ConvolutionOp::create( + rewriter, loc, transposed_output_2d_shape, + transposed_image_2d_op.getResult(), transposed_kernel_2d_op.getResult(), + window_strides_2d, padding_2d, lhs_dilation_2d, rhs_dilation_2d, + window_reversal_2d, dnums_2d, conv_op.getFeatureGroupCount(), + conv_op.getBatchGroupCount(), conv_op.getPrecisionConfigAttr()); OpResult conv2d_output = conv2d_op->getResult(0); auto conv2d_output_type = mlir::cast(conv2d_output.getType()); @@ -656,8 +657,8 @@ class Convert1DConvOp : public OpConversionPattern { // affectively applied. auto output_permutation_and_shape = GetInversePermutationAndShape( output_permutation, conv2d_output_type, rewriter); - auto transposed_output_2d_op = rewriter.create( - loc, output_permutation_and_shape.shape, conv2d_output, + auto transposed_output_2d_op = mhlo::TransposeOp::create( + rewriter, loc, output_permutation_and_shape.shape, conv2d_output, output_permutation_and_shape.permutation); // Drop the trailing spatial dimension from the output. @@ -804,11 +805,10 @@ class ConvertToResizeBilinearOpOrDepthwiseTransposedConvOp } else { limit_indices[channel_idx] = depth_idx + 1; } - return rewriter.create( - conv_op.getLoc(), tensor, - GetI64ElementsAttr(start_indices, &rewriter), - GetI64ElementsAttr(limit_indices, &rewriter), - GetI64ElementsAttr(strides, &rewriter)); + return mhlo::SliceOp::create(rewriter, conv_op.getLoc(), tensor, + GetI64ElementsAttr(start_indices, &rewriter), + GetI64ElementsAttr(limit_indices, &rewriter), + GetI64ElementsAttr(strides, &rewriter)); }; // Storage for smaller convolution results @@ -832,18 +832,19 @@ class ConvertToResizeBilinearOpOrDepthwiseTransposedConvOp RankedTensorType::get(new_output_shape, output_type.getElementType()); // Create a Smaller Convolution (Ensure compatibility) - auto conv_result = rewriter.create( - conv_op.getLoc(), new_output_type, sliced_input, sliced_kernel, - conv_op.getWindowStridesAttr(), conv_op.getPaddingAttr(), - conv_op.getLhsDilationAttr(), conv_op.getRhsDilationAttr(), - conv_op.getWindowReversalAttr(), conv_op.getDimensionNumbers(), 1, 1, + auto conv_result = mhlo::ConvolutionOp::create( + rewriter, conv_op.getLoc(), new_output_type, sliced_input, + sliced_kernel, conv_op.getWindowStridesAttr(), + conv_op.getPaddingAttr(), conv_op.getLhsDilationAttr(), + conv_op.getRhsDilationAttr(), conv_op.getWindowReversalAttr(), + conv_op.getDimensionNumbers(), 1, 1, conv_op.getPrecisionConfigAttr()); conv_results.push_back(conv_result); } - auto final_output = rewriter.create( - conv_op.getLoc(), conv_results, + auto final_output = mhlo::ConcatenateOp::create( + rewriter, conv_op.getLoc(), conv_results, rewriter.getI64IntegerAttr(dnums.getOutputFeatureDimension())); rewriter.replaceOp(conv_op, final_output.getResult()); return mlir::success(); @@ -854,8 +855,8 @@ class ConvertToResizeBilinearOpOrDepthwiseTransposedConvOp llvm::ArrayRef output_sizes, bool align_corners, ConversionPatternRewriter& rewriter) const { - Value output_sizes_attr = rewriter.create( - conv_op.getLoc(), + Value output_sizes_attr = TF::ConstOp::create( + rewriter, conv_op.getLoc(), DenseIntElementsAttr::get( RankedTensorType::get({static_cast(output_sizes.size())}, rewriter.getI32Type()), @@ -863,8 +864,8 @@ class ConvertToResizeBilinearOpOrDepthwiseTransposedConvOp // The value of half_pixel_centers couldn't be inferred from the IR and XLA // only support half_pixel_centers=True as in 01/11/2022. Here // half_pixel_centers=False is hardcoded. - Value output = rewriter.create( - conv_op.getLoc(), conv_op.getType(), conv_op.getLhs(), + Value output = TF::ResizeBilinearOp::create( + rewriter, conv_op.getLoc(), conv_op.getType(), conv_op.getLhs(), output_sizes_attr, /*align_corners=*/rewriter.getBoolAttr(align_corners), /*half_pixel_centers=*/rewriter.getBoolAttr(false)); @@ -1071,8 +1072,8 @@ class ConvertNonTrivialConvOp permutation.push_back(dnums.getKernelOutputFeatureDimension()); permutation.push_back(dnums.getKernelInputFeatureDimension()); - auto filter_transposed = rewriter.create( - conv_op.getLoc(), conv_op.getRhs(), + auto filter_transposed = mhlo::TransposeOp::create( + rewriter, conv_op.getLoc(), conv_op.getRhs(), DenseIntElementsAttr::get( RankedTensorType::get({static_cast(permutation.size())}, rewriter.getI64Type()), @@ -1082,8 +1083,9 @@ class ConvertNonTrivialConvOp // Lets hard-code the reverse indexes to be {0, 1} as the expectation is // that the kernel is always in HWOI format, with the above code. - mhlo::ReverseOp filter = rewriter.create( - conv_op.getLoc(), reverse_filter_in, rewriter.getI64TensorAttr({0, 1})); + mhlo::ReverseOp filter = + mhlo::ReverseOp::create(rewriter, conv_op.getLoc(), reverse_filter_in, + rewriter.getI64TensorAttr({0, 1})); // if output is not in [b, 0, 1, f] format, insert transpose to go back if (dnums.getOutputBatchDimension() != 0 || @@ -1112,23 +1114,23 @@ class ConvertNonTrivialConvOp auto output_type = RankedTensorType::get( transposed_output_shape, mlir::cast(conv_op.getRhs().getType()).getElementType()); - auto output_sizes = rewriter.create( - conv_op.getLoc(), + auto output_sizes = TF::ConstOp::create( + rewriter, conv_op.getLoc(), DenseIntElementsAttr::get( RankedTensorType::get( {static_cast(transposed_output_shape_i32.size())}, rewriter.getI32Type()), transposed_output_shape_i32)); - auto new_conv = rewriter.create( - conv_op.getLoc(), output_type, output_sizes, filter, conv_input, - rewriter.getI64ArrayAttr(strides), + auto new_conv = TF::Conv2DBackpropInputOp::create( + rewriter, conv_op.getLoc(), output_type, output_sizes, filter, + conv_input, rewriter.getI64ArrayAttr(strides), /*use_cudnn_on_gpu=*/rewriter.getBoolAttr(true), /*padding=*/rewriter.getStringAttr(padding), /*explicit_paddings=*/rewriter.getI64ArrayAttr({}), /*data_format=*/rewriter.getStringAttr("NHWC"), /*dilations=*/rewriter.getI64ArrayAttr(dilation)); - auto output_transpose = rewriter.create( - conv_op.getLoc(), new_conv.getResult(), + auto output_transpose = mhlo::TransposeOp::create( + rewriter, conv_op.getLoc(), new_conv.getResult(), rewriter.getI64TensorAttr(transpose_order)); conv_op->replaceAllUsesWith(output_transpose); rewriter.eraseOp(conv_op); @@ -1139,8 +1141,8 @@ class ConvertNonTrivialConvOp .getShape()) { output_shape_i32.push_back(dim); } - auto output_sizes = rewriter.create( - conv_op.getLoc(), + auto output_sizes = TF::ConstOp::create( + rewriter, conv_op.getLoc(), DenseIntElementsAttr::get( RankedTensorType::get( {static_cast(output_shape_i32.size())}, @@ -1255,12 +1257,12 @@ class ConvertSliceOp : public OpConversionPattern { LogicalResult matchAndRewrite( mhlo::SliceOp slice_op, OpAdaptor adaptor, ConversionPatternRewriter& rewriter) const final { - auto begin = rewriter.create(slice_op.getLoc(), - slice_op.getStartIndices()); - auto end = rewriter.create(slice_op.getLoc(), - slice_op.getLimitIndices()); + auto begin = TF::ConstOp::create(rewriter, slice_op.getLoc(), + slice_op.getStartIndices()); + auto end = TF::ConstOp::create(rewriter, slice_op.getLoc(), + slice_op.getLimitIndices()); auto strides = - rewriter.create(slice_op.getLoc(), slice_op.getStrides()); + TF::ConstOp::create(rewriter, slice_op.getLoc(), slice_op.getStrides()); rewriter.replaceOpWithNewOp( slice_op, slice_op.getType(), slice_op.getOperand(), begin, end, strides); @@ -1294,22 +1296,24 @@ class ConvertDynamicSliceOp : public OpConversionPattern { // Clamp indices to [0, input_size - output_size] llvm::SmallVector start_indices_vector; start_indices_vector.reserve(op.getStartIndices().size()); - Value clamp_min = rewriter.create( - op.getLoc(), + Value clamp_min = TF::ConstOp::create( + rewriter, op.getLoc(), rewriter.getIntegerAttr(signed_start_indices_element_type, 0)); for (uint64_t i = 0, e = op.getStartIndices().size(); i < e; ++i) { // Always put a cast there. auto start = op.getStartIndices()[i]; auto cast_type = mlir::cast(start.getType()) .clone(signed_start_indices_element_type); - auto cast_op = rewriter.create(op.getLoc(), cast_type, start); - Value clamp_max = rewriter.create( - op.getLoc(), rewriter.getIntegerAttr( - signed_start_indices_element_type, - input_type.getShape()[i] - - op.getSliceSizes().getValues()[i])); - Value clamped_index = rewriter.create( - op.getLoc(), cast_type, clamp_min, cast_op, clamp_max); + auto cast_op = + TF::CastOp::create(rewriter, op.getLoc(), cast_type, start); + Value clamp_max = TF::ConstOp::create( + rewriter, op.getLoc(), + rewriter.getIntegerAttr( + signed_start_indices_element_type, + input_type.getShape()[i] - + op.getSliceSizes().getValues()[i])); + Value clamped_index = mhlo::ClampOp::create( + rewriter, op.getLoc(), cast_type, clamp_min, cast_op, clamp_max); start_indices_vector.push_back(clamped_index); } @@ -1317,11 +1321,12 @@ class ConvertDynamicSliceOp : public OpConversionPattern { Type start_indices_type = RankedTensorType::get( {static_cast(start_indices_vector.size())}, signed_start_indices_element_type); - Value start_indices_op = rewriter.create( - op.getLoc(), start_indices_type, ValueRange(start_indices_vector)); + Value start_indices_op = + TF::PackOp::create(rewriter, op.getLoc(), start_indices_type, + ValueRange(start_indices_vector)); Value slice_sices_op = - rewriter.create(op.getLoc(), op.getSliceSizes()); + TF::ConstOp::create(rewriter, op.getLoc(), op.getSliceSizes()); rewriter.replaceOpWithNewOp(op, op.getType(), op.getOperand(), start_indices_op, slice_sices_op); return success(); @@ -1378,8 +1383,8 @@ Value BuildReshapeOp(ImplicitLocOpBuilder& builder, ArrayRef shape, Type idx_type, Type element_type) { Value shape_cst = BuildIntArrayConstOp(builder, rewriter, shape, idx_type); - Value reshaped_input = builder.create( - RankedTensorType::get(shape, element_type), input, shape_cst); + Value reshaped_input = TF::ReshapeOp::create( + builder, RankedTensorType::get(shape, element_type), input, shape_cst); return reshaped_input; } @@ -1389,8 +1394,9 @@ Value BuildSliceOp(ImplicitLocOpBuilder& builder, Value begin, ArrayRef shape, Type idx_type, Type element_type) { Value shape_cst = BuildIntArrayConstOp(builder, rewriter, shape, idx_type); - Value slice_result = builder.create( - RankedTensorType::get(shape, element_type), input, begin, shape_cst); + Value slice_result = + TF::SliceOp::create(builder, RankedTensorType::get(shape, element_type), + input, begin, shape_cst); return slice_result; } @@ -1416,8 +1422,8 @@ class ConvertDynamicUpdateSliceOp llvm::SmallVector start_indices_vector; Append(start_indices_vector, op.getStartIndices()); auto shape_tensor_type = RankedTensorType::get({shape_dim}, idx_type); - Value start_indices_tensor = rewriter.create( - op.getLoc(), shape_tensor_type, start_indices_vector); + Value start_indices_tensor = TF::PackOp::create( + rewriter, op.getLoc(), shape_tensor_type, start_indices_vector); rewriter.replaceOpWithNewOp( op, op.getType(), op.getOperand(), op.getUpdate(), start_indices_tensor); @@ -1584,7 +1590,7 @@ Value BuildDotOperandFlattenedShapeOp(Value operand, bool is_lhs) { auto operand_type = mlir::cast(operand.getType()); BoolAttr true_attr = builder.getBoolAttr(true); - auto operand_shape = builder.create(operand, true_attr); + auto operand_shape = TF::ShapeOp::create(builder, operand, true_attr); const int64_t operand_rank = operand_type.getRank(); // Compute flattened out dimension and contracting dimension using // TF::UnsortedSegmentProdOp. @@ -1600,26 +1606,28 @@ Value BuildDotOperandFlattenedShapeOp(Value operand, } auto seg_prod_result_type = RankedTensorType::get(static_cast(1), builder.getI32Type()); - auto out_segids_cst = builder.create( - builder.getI32TensorAttr(flattened_out_segids)); - auto contracting_segids_cst = builder.create( - builder.getI32TensorAttr(flattened_contracting_segids)); + auto out_segids_cst = TF::ConstOp::create( + builder, builder.getI32TensorAttr(flattened_out_segids)); + auto contracting_segids_cst = TF::ConstOp::create( + builder, builder.getI32TensorAttr(flattened_contracting_segids)); auto num_segids_tensor = - builder.create(builder.getI32IntegerAttr(1)); - auto flattened_out_dims = builder.create( - seg_prod_result_type, operand_shape, out_segids_cst, num_segids_tensor); - auto flattened_contracting_dims = builder.create( - seg_prod_result_type, operand_shape, contracting_segids_cst, + TF::ConstOp::create(builder, builder.getI32IntegerAttr(1)); + auto flattened_out_dims = TF::UnsortedSegmentProdOp::create( + builder, seg_prod_result_type, operand_shape, out_segids_cst, + num_segids_tensor); + auto flattened_contracting_dims = TF::UnsortedSegmentProdOp::create( + builder, seg_prod_result_type, operand_shape, contracting_segids_cst, num_segids_tensor); llvm::SmallVector flattend_shape_values; // Gather the batch dimensions. if (!dot_dimensions_info.batch_dimensions().AxesArray().empty()) { if (ShapedType::isDynamicShape( dot_dimensions_info.batch_dimensions().SizesArray())) { - auto batch_axes_tensor = - builder.create(builder.getI64TensorAttr( - dot_dimensions_info.batch_dimensions().AxesArray())); - auto batch_dims = builder.create( + auto batch_axes_tensor = TF::ConstOp::create( + builder, builder.getI64TensorAttr( + dot_dimensions_info.batch_dimensions().AxesArray())); + auto batch_dims = TF::GatherOp::create( + builder, RankedTensorType::get( {static_cast( dot_dimensions_info.batch_dimensions().AxesArray().size())}, @@ -1633,7 +1641,7 @@ Value BuildDotOperandFlattenedShapeOp(Value operand, batch_i32_vec.push_back(static_cast(element)); } auto batch_dims = - builder.create(builder.getI32TensorAttr(batch_i32_vec)); + TF::ConstOp::create(builder, builder.getI32TensorAttr(batch_i32_vec)); flattend_shape_values.push_back(batch_dims); } } @@ -1649,9 +1657,9 @@ Value BuildDotOperandFlattenedShapeOp(Value operand, builder.getIntegerType(32)); // Concatenate the batch dimensions, flattened out dimension and flattened // contracting dimension. - return builder.create( - concat_result_type, - builder.create(builder.getI32IntegerAttr(0)), + return TF::ConcatOp::create( + builder, concat_result_type, + TF::ConstOp::create(builder, builder.getI32IntegerAttr(0)), flattend_shape_values); } @@ -1682,8 +1690,8 @@ Value ConvertDot(PatternRewriter& rewriter, Value lhs, Value rhs, lhs_dot_dimensions_info.batch_dimensions().SizesArray(), lhs_dot_dimensions_info.out_dimensions().SizesArray(), lhs_dot_dimensions_info.contracting_dimensions().SizesArray()); - auto lhs_transposed = rewriter.create( - loc, + auto lhs_transposed = mhlo::TransposeOp::create( + rewriter, loc, RankedTensorType::get(lhs_transposed_shape, lhs_type.getElementType()), lhs, DenseIntElementsAttr::get( @@ -1700,8 +1708,8 @@ Value ConvertDot(PatternRewriter& rewriter, Value lhs, Value rhs, rhs_dot_dimensions_info.batch_dimensions().SizesArray(), rhs_dot_dimensions_info.contracting_dimensions().SizesArray(), rhs_dot_dimensions_info.out_dimensions().SizesArray()); - auto rhs_transposed = rewriter.create( - loc, + auto rhs_transposed = mhlo::TransposeOp::create( + rewriter, loc, RankedTensorType::get(rhs_transposed_shape, rhs_type.getElementType()), rhs, DenseIntElementsAttr::get( @@ -1717,15 +1725,15 @@ Value ConvertDot(PatternRewriter& rewriter, Value lhs, Value rhs, lhs_dot_dimensions_info.FlattenedContractingDimensionSize()}); Value lhs_flattend; if (lhs_type.hasStaticShape()) { - lhs_flattend = rewriter.create( - loc, + lhs_flattend = mhlo::ReshapeOp::create( + rewriter, loc, RankedTensorType::get(lhs_flattened_shape, lhs_type.getElementType()), lhs_transposed.getResult()); } else { auto lhs_flattend_shape_op = BuildDotOperandFlattenedShapeOp( lhs, lhs_dot_dimensions_info, builder, /*is_lhs=*/true); - lhs_flattend = rewriter.create( - loc, + lhs_flattend = mhlo::DynamicReshapeOp::create( + rewriter, loc, RankedTensorType::get(lhs_flattened_shape, lhs_type.getElementType()), lhs_transposed, lhs_flattend_shape_op); } @@ -1739,15 +1747,15 @@ Value ConvertDot(PatternRewriter& rewriter, Value lhs, Value rhs, rhs_dot_dimensions_info.FlattenedOutDimensionSize()}); Value rhs_flattend; if (rhs_type.hasStaticShape()) { - rhs_flattend = rewriter.create( - loc, + rhs_flattend = mhlo::ReshapeOp::create( + rewriter, loc, RankedTensorType::get(rhs_flattened_shape, rhs_type.getElementType()), rhs_transposed.getResult()); } else { auto rhs_flattend_shape_op = BuildDotOperandFlattenedShapeOp( rhs, rhs_dot_dimensions_info, builder, /*is_lhs=*/false); - rhs_flattend = rewriter.create( - loc, + rhs_flattend = mhlo::DynamicReshapeOp::create( + rewriter, loc, RankedTensorType::get(rhs_flattened_shape, rhs_type.getElementType()), rhs_transposed, rhs_flattend_shape_op); } @@ -1759,36 +1767,38 @@ Value ConvertDot(PatternRewriter& rewriter, Value lhs, Value rhs, lhs_dot_dimensions_info.FlattenedOutDimensionSize()}, llvm::ArrayRef{ rhs_dot_dimensions_info.FlattenedOutDimensionSize()}); - auto matmul = rewriter.create( - loc, RankedTensorType::get(matmul_shape, result_type.getElementType()), + auto matmul = TF::BatchMatMulV3Op::create( + rewriter, loc, + RankedTensorType::get(matmul_shape, result_type.getElementType()), lhs_flattend, rhs_flattend); if (result_type.hasStaticShape()) { auto reshaped = - rewriter.create(loc, result_type, matmul.getResult()); + mhlo::ReshapeOp::create(rewriter, loc, result_type, matmul.getResult()); return reshaped.getResult(); } // Reshape for dynamic shaped operands. The result shape is // [lhs_batch_dimensions, lhs_out_dimensions, rhs_out_dimensions]. BoolAttr true_attr = rewriter.getBoolAttr(true); - auto lhs_shape = rewriter.create(loc, lhs, true_attr); - auto rhs_shape = rewriter.create(loc, rhs, true_attr); + auto lhs_shape = TF::ShapeOp::create(rewriter, loc, lhs, true_attr); + auto rhs_shape = TF::ShapeOp::create(rewriter, loc, rhs, true_attr); llvm::SmallVector lhs_batch_and_out = Concat(lhs_dot_dimensions_info.batch_dimensions().AxesArray(), lhs_dot_dimensions_info.out_dimensions().AxesArray()); - auto lhs_batch_and_out_cst = rewriter.create( - loc, rewriter.getI64TensorAttr(lhs_batch_and_out)); - auto lhs_batch_and_out_dims = rewriter.create( - loc, + auto lhs_batch_and_out_cst = TF::ConstOp::create( + rewriter, loc, rewriter.getI64TensorAttr(lhs_batch_and_out)); + auto lhs_batch_and_out_dims = TF::GatherOp::create( + rewriter, loc, RankedTensorType::get({static_cast(lhs_batch_and_out.size())}, rewriter.getIntegerType(32)), lhs_shape, lhs_batch_and_out_cst, true_attr); - auto rhs_out_cst = rewriter.create( - loc, rewriter.getI64TensorAttr( - rhs_dot_dimensions_info.out_dimensions().AxesArray())); - auto rhs_out_dims = rewriter.create( - loc, + auto rhs_out_cst = TF::ConstOp::create( + rewriter, loc, + rewriter.getI64TensorAttr( + rhs_dot_dimensions_info.out_dimensions().AxesArray())); + auto rhs_out_dims = TF::GatherOp::create( + rewriter, loc, RankedTensorType::get( {static_cast( rhs_dot_dimensions_info.out_dimensions().AxesArray().size())}, @@ -1800,13 +1810,13 @@ Value ConvertDot(PatternRewriter& rewriter, Value lhs, Value rhs, lhs_dot_dimensions_info.out_dimensions().AxesArray().size() + rhs_dot_dimensions_info.out_dimensions().AxesArray().size())}, rewriter.getIntegerType(32)); - auto result_shape = rewriter.create( - loc, result_shape_type, - rewriter.create(loc, rewriter.getI32IntegerAttr(0)), + auto result_shape = TF::ConcatOp::create( + rewriter, loc, result_shape_type, + TF::ConstOp::create(rewriter, loc, rewriter.getI32IntegerAttr(0)), ValueRange{lhs_batch_and_out_dims, rhs_out_dims}); - auto reshaped = rewriter.create( - loc, result_type, matmul.getResult(), result_shape); + auto reshaped = mhlo::DynamicReshapeOp::create( + rewriter, loc, result_type, matmul.getResult(), result_shape); return reshaped.getResult(); } @@ -1844,9 +1854,10 @@ template LogicalResult rewriteNonMatchInitValue(mhlo::ReduceOp reduce_op, Value input, TF::ConstOp reduction_indices, ConversionPatternRewriter& rewriter) { - Value reduce_result = rewriter.create( - reduce_op.getLoc(), reduce_op.getType(0), input, reduction_indices, - /*keep_dim=*/rewriter.getBoolAttr(false)); + Value reduce_result = + TfReduceOp::create(rewriter, reduce_op.getLoc(), reduce_op.getType(0), + input, reduction_indices, + /*keep_dim=*/rewriter.getBoolAttr(false)); rewriter.replaceOpWithNewOp(reduce_op, reduce_op.getType(0), reduce_result, reduce_op.getInitValues()[0]); @@ -1902,8 +1913,9 @@ class ConvertReduceOpToTfOp : public OpConversionPattern { } auto dim_type = RankedTensorType::get( {static_cast(reduce_dims.size())}, rewriter.getI64Type()); - auto reduction_indices = rewriter.create( - reduce_op.getLoc(), dim_type, rewriter.getI64TensorAttr(reduce_dims)); + auto reduction_indices = + TF::ConstOp::create(rewriter, reduce_op.getLoc(), dim_type, + rewriter.getI64TensorAttr(reduce_dims)); // In `MatchReduceOpOperand` function, we already match that the // "mhlo::ReduceOp" only has one operand, one init_value and one result. @@ -2103,25 +2115,26 @@ class ConvertIotaOpToTfRange : public OpConversionPattern { auto range_type = RankedTensorType::get({type.getShape()[dimension]}, element_type); - Value start_op = rewriter.create(iota_op.getLoc(), start); - Value limit_op = rewriter.create(iota_op.getLoc(), limit); - Value delta_op = rewriter.create(iota_op.getLoc(), delta); - Value result = rewriter.create(iota_op.getLoc(), range_type, - start_op, limit_op, delta_op); + Value start_op = TF::ConstOp::create(rewriter, iota_op.getLoc(), start); + Value limit_op = TF::ConstOp::create(rewriter, iota_op.getLoc(), limit); + Value delta_op = TF::ConstOp::create(rewriter, iota_op.getLoc(), delta); + Value result = TF::RangeOp::create(rewriter, iota_op.getLoc(), range_type, + start_op, limit_op, delta_op); if (type.getRank() > 1) { std::vector reshape_shape(type.getRank(), 1); reshape_shape[iota_op.getIotaDimension()] = type.getShape()[dimension]; auto reshape_type = RankedTensorType::get(reshape_shape, element_type); - Value reshape_shape_op = rewriter.create( - iota_op.getLoc(), rewriter.getI64TensorAttr(reshape_shape)); - result = rewriter.create(iota_op.getLoc(), reshape_type, - result, reshape_shape_op); + Value reshape_shape_op = TF::ConstOp::create( + rewriter, iota_op.getLoc(), rewriter.getI64TensorAttr(reshape_shape)); + result = TF::ReshapeOp::create(rewriter, iota_op.getLoc(), reshape_type, + result, reshape_shape_op); - Value broadcast_shape_op = rewriter.create( - iota_op.getLoc(), rewriter.getI64TensorAttr(type.getShape())); - result = rewriter.create(iota_op.getLoc(), type, - result, broadcast_shape_op); + Value broadcast_shape_op = + TF::ConstOp::create(rewriter, iota_op.getLoc(), + rewriter.getI64TensorAttr(type.getShape())); + result = TF::BroadcastToOp::create(rewriter, iota_op.getLoc(), type, + result, broadcast_shape_op); } rewriter.replaceOp(iota_op, result); @@ -2314,8 +2327,8 @@ class ConvertLoweredCumOp : public OpConversionPattern { if (right_padding != 0) return failure(); } - auto axis = rewriter.create( - rw->getLoc(), + auto axis = TF::ConstOp::create( + rewriter, rw->getLoc(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), cumulative_axis)); rewriter.replaceOpWithNewOp(rw, rw.getType(0), rw.getInputs()[0], @@ -2585,7 +2598,7 @@ arith::ConstantOp ShapeToConst(PatternRewriter& rewriter, Value value) { auto attr_type = RankedTensorType::get({static_cast(shape.size())}, rewriter.getIntegerType(64)); auto attr = DenseElementsAttr::get(attr_type, shape); - return rewriter.create(value.getLoc(), attr_type, attr); + return arith::ConstantOp::create(rewriter, value.getLoc(), attr_type, attr); } bool IsSign(APInt a, APInt sign) { @@ -2841,8 +2854,8 @@ class ConvertGatherOp : public OpConversionPattern { TF::CastOp cast_op = nullptr; if (canonical_start_indices_type.getElementType().isUnsignedInteger(32)) { - cast_op = rewriter.create( - gather_op->getLoc(), + cast_op = TF::CastOp::create( + rewriter, gather_op->getLoc(), RankedTensorType::get(canonical_start_indices_type.getShape(), rewriter.getI64Type()), canonical_start_indices); @@ -2861,8 +2874,8 @@ class ConvertGatherOp : public OpConversionPattern { auto canonical_result_type = RankedTensorType::get( canonical_result_shape, result_type.getElementType()); - auto canonical_result = rewriter.create( - gather_op->getLoc(), canonical_result_type, canonical_operand, + auto canonical_result = TF::GatherNdOp::create( + rewriter, gather_op->getLoc(), canonical_result_type, canonical_operand, cast_op ? cast_op.getResult() : canonical_start_indices); auto offset_dims = gather_op.getDimensionNumbers().getOffsetDims(); @@ -2968,24 +2981,24 @@ class ConvertGatherOp : public OpConversionPattern { auto min_start_indices = BuildIntArrayConstOp( builder, rewriter, llvm::SmallVector({0, 0}), start_indices_type.getElementType()); - auto start_indices_max_op = rewriter.create( - gather_op.getLoc(), start_indices, min_start_indices); - auto clamped_start_indices_op = rewriter.create( - gather_op.getLoc(), start_indices_max_op, max_start_indices); + auto start_indices_max_op = TF::MaximumOp::create( + rewriter, gather_op.getLoc(), start_indices, min_start_indices); + auto clamped_start_indices_op = TF::MinimumOp::create( + rewriter, gather_op.getLoc(), start_indices_max_op, max_start_indices); int64_t batch_size = start_indices_type.getDimSize(batch_dim); auto slice_size = BuildIntArrayConstOp( builder, rewriter, slice_sizes_vector, rewriter.getI32Type()); if (batch_size == 1) { - auto squeeze_op = rewriter.create( - gather_op.getLoc(), + auto squeeze_op = TF::SqueezeOp::create( + rewriter, gather_op.getLoc(), RankedTensorType::get({rank_two}, start_indices_type.getElementType()), clamped_start_indices_op, rewriter.getI64ArrayAttr(llvm::ArrayRef({batch_dim}))); auto slice_op = - rewriter.create(gather_op.getLoc(), gather_op.getType(), - operand, squeeze_op, slice_size); + TF::SliceOp::create(rewriter, gather_op.getLoc(), gather_op.getType(), + operand, squeeze_op, slice_size); rewriter.replaceOp(gather_op, slice_op); return mlir::success(); } @@ -2999,29 +3012,29 @@ class ConvertGatherOp : public OpConversionPattern { auto two = BuildIntArrayConstOp(builder, rewriter, llvm::SmallVector({1, 2}), rewriter.getI32Type()); - auto begin = rewriter.create( - gather_op.getLoc(), + auto begin = TF::SliceOp::create( + rewriter, gather_op.getLoc(), RankedTensorType::get({1, 2}, start_indices_type.getElementType()), clamped_start_indices_op, zero, two); - auto squeeze_op = rewriter.create( - gather_op.getLoc(), + auto squeeze_op = TF::SqueezeOp::create( + rewriter, gather_op.getLoc(), RankedTensorType::get({rank_two}, start_indices_type.getElementType()), begin, rewriter.getI64ArrayAttr(llvm::ArrayRef({batch_dim}))); - auto slice_op = rewriter.create( - gather_op.getLoc(), + auto slice_op = TF::SliceOp::create( + rewriter, gather_op.getLoc(), RankedTensorType::get({1, slice_sizes_vector[1]}, operand_type.getElementType()), operand, squeeze_op, slice_size); slices.push_back(slice_op); } auto scalar_type = RankedTensorType::get({}, rewriter.getI32Type()); - auto zero_scalar = rewriter.create( - gather_op.getLoc(), + auto zero_scalar = TF::ConstOp::create( + rewriter, gather_op.getLoc(), DenseIntElementsAttr::get(scalar_type, static_cast(0))); - auto concat_op = rewriter.create( - gather_op.getLoc(), result_type, slices, zero_scalar); + auto concat_op = TF::ConcatV2Op::create(rewriter, gather_op.getLoc(), + result_type, slices, zero_scalar); rewriter.replaceOp(gather_op, concat_op); return mlir::success(); } @@ -3116,12 +3129,13 @@ class ConvertGatherOp : public OpConversionPattern { if (canonical_result_type.hasStaticShape()) { auto unflattened_result_type = RankedTensorType::get( unflattened_shape, original_result_type.getElementType()); - canonical_result = rewriter.create( - gather_op.getLoc(), unflattened_result_type, canonical_result); + canonical_result = + mhlo::ReshapeOp::create(rewriter, gather_op.getLoc(), + unflattened_result_type, canonical_result); } // Transpose back to the original result shape. - return rewriter.create( - gather_op.getLoc(), original_result_type, canonical_result, + return mhlo::TransposeOp::create( + rewriter, gather_op.getLoc(), original_result_type, canonical_result, rewriter.getI64TensorAttr( GetInversePermutationArray(permutation_to_canonical))); } @@ -3168,13 +3182,13 @@ class ConvertGatherOp : public OpConversionPattern { // Transpose the dimensions and flatten the batching dimensions. RankedTensorType transposed_type = RankedTensorType::get(transposed_shape, operand_type.getElementType()); - auto transposed_operand = rewriter.create( - gather_op.getLoc(), transposed_type, operand, + auto transposed_operand = mhlo::TransposeOp::create( + rewriter, gather_op.getLoc(), transposed_type, operand, rewriter.getI64TensorAttr(permutation)); auto flattened_type = RankedTensorType::get(flattened_shape, operand_type.getElementType()); - auto flattened_operand = rewriter.create( - gather_op.getLoc(), flattened_type, transposed_operand); + auto flattened_operand = mhlo::ReshapeOp::create( + rewriter, gather_op.getLoc(), flattened_type, transposed_operand); return flattened_operand; } @@ -3233,13 +3247,13 @@ class ConvertGatherOp : public OpConversionPattern { reshaped_shape.push_back(index_vector_size); // Transpose the dimensions and flatten the batching dimensions. - auto transposed_start_indices = rewriter.create( - gather_op.getLoc(), + auto transposed_start_indices = mhlo::TransposeOp::create( + rewriter, gather_op.getLoc(), RankedTensorType::get(transposed_shape, start_indices_type.getElementType()), start_indices, rewriter.getI64TensorAttr(permutation)); - start_indices = rewriter.create( - gather_op.getLoc(), + start_indices = mhlo::ReshapeOp::create( + rewriter, gather_op.getLoc(), RankedTensorType::get(reshaped_shape, start_indices_type.getElementType()), transposed_start_indices); @@ -3275,32 +3289,33 @@ class ConvertGatherOp : public OpConversionPattern { llvm::SmallVector offsets_shape(start_indices_shape.size(), 1); offsets_shape[non_trivial_sliced_dim] = slice_sizes[operand_dim]; start_indices_shape[non_trivial_sliced_dim] = slice_sizes[operand_dim]; - auto offsets = rewriter.create( - gather_op.getLoc(), + auto offsets = mhlo::IotaOp::create( + rewriter, gather_op.getLoc(), RankedTensorType::get(offsets_shape, start_indices_type.getElementType()), rewriter.getI64IntegerAttr(non_trivial_sliced_dim)); non_trivial_sliced_dim++; // Pad with 0s on the other operand dimensions. - Value zero = rewriter.create( - gather_op.getLoc(), rewriter.getZeroAttr(RankedTensorType::get( - {}, start_indices_type.getElementType()))); + Value zero = arith::ConstantOp::create( + rewriter, gather_op.getLoc(), + rewriter.getZeroAttr( + RankedTensorType::get({}, start_indices_type.getElementType()))); int rank = offsets_shape.size(); llvm::SmallVector padding_low(rank, 0); llvm::SmallVector padding_high(rank, 0); llvm::SmallVector padding_interior(rank, 0); padding_low.back() = i; padding_high.back() = start_indices_shape.back() - i - 1; - auto padded_offsets = rewriter.create( - gather_op.getLoc(), offsets, zero, - GetI64ElementsAttr(padding_low, &rewriter), - GetI64ElementsAttr(padding_high, &rewriter), - GetI64ElementsAttr(padding_interior, &rewriter)); + auto padded_offsets = + mhlo::PadOp::create(rewriter, gather_op.getLoc(), offsets, zero, + GetI64ElementsAttr(padding_low, &rewriter), + GetI64ElementsAttr(padding_high, &rewriter), + GetI64ElementsAttr(padding_interior, &rewriter)); // Add the padded offsets to the start indices (with broadcasting). - start_indices = rewriter.create(gather_op.getLoc(), - start_indices, padded_offsets); + start_indices = TF::AddOp::create(rewriter, gather_op.getLoc(), + start_indices, padded_offsets); } if (!start_indices_batching_dims.empty()) { @@ -3308,15 +3323,15 @@ class ConvertGatherOp : public OpConversionPattern { // operand. llvm::SmallVector offsets_shape = start_indices_shape; offsets_shape.back() = 1; - auto offsets = rewriter.create( - gather_op.getLoc(), + auto offsets = mhlo::IotaOp::create( + rewriter, gather_op.getLoc(), RankedTensorType::get(offsets_shape, start_indices_type.getElementType()), rewriter.getI64IntegerAttr(0)); start_indices_shape.back()++; - start_indices = rewriter.create( - gather_op.getLoc(), + start_indices = mhlo::ConcatenateOp::create( + rewriter, gather_op.getLoc(), RankedTensorType::get(start_indices_shape, start_indices_type.getElementType()), ValueRange{offsets, start_indices}, @@ -3345,8 +3360,9 @@ class ConvertWhileOp : public OpConversionPattern { // Creates a TF::WhileRegionOp to replace the mhlo::WhileOp. HLO WhileOp // currently doesn't support stateless and shape invariant, so these // parameters are set to the default values. - auto new_while = rewriter.create( - while_op.getLoc(), while_op->getResultTypes(), while_op->getOperands(), + auto new_while = TF::WhileRegionOp::create( + rewriter, while_op.getLoc(), while_op->getResultTypes(), + while_op->getOperands(), /*parallel_iterations=*/10, /*is_stateless=*/false, /*shape_invariant=*/false); new_while.getCond().takeBody(while_op.getCond()); @@ -3366,8 +3382,8 @@ class ConvertIfOp : public OpConversionPattern { mhlo::IfOp op, OpAdaptor adaptor, ConversionPatternRewriter& rewriter) const final { // HLO IfOp currently doesn't support stateless - auto new_op = rewriter.create( - op.getLoc(), op->getResultTypes(), op.getPred(), + auto new_op = TF::IfRegionOp::create( + rewriter, op.getLoc(), op->getResultTypes(), op.getPred(), /*is_stateless=*/false, /*_then_func_name=*/nullptr, /*_else_func_name=*/nullptr); new_op.getThenBranch().takeBody(op.getTrueBranch()); @@ -3427,10 +3443,10 @@ Value ConvertPadOp(PatternRewriter& rewriter, Operation* old_op) { {pad_op.getEdgePaddingLow().size(), 2}, rewriter.getI64Type()); auto padding_attr = DenseIntElementsAttr::get(padding_attr_type, padding); auto padding_amount_const_op = - rewriter.create(loc, padding_attr_type, padding_attr); - auto new_pad_op = rewriter.create( - loc, pad_op.getType().clone(pad_output_shape), pad_op.getOperand(), - padding_amount_const_op, pad_op.getPaddingValue()); + arith::ConstantOp::create(rewriter, loc, padding_attr_type, padding_attr); + auto new_pad_op = TF::PadV2Op::create( + rewriter, loc, pad_op.getType().clone(pad_output_shape), + pad_op.getOperand(), padding_amount_const_op, pad_op.getPaddingValue()); if (!has_negative_padding_amount) { return new_pad_op; } @@ -3438,15 +3454,14 @@ Value ConvertPadOp(PatternRewriter& rewriter, Operation* old_op) { // Convert negative padding amount into slice. auto slice_attr_type = RankedTensorType::get( {pad_op.getEdgePaddingLow().size()}, rewriter.getI64Type()); - auto slice_begins_const_op = rewriter.create( - loc, slice_attr_type, + auto slice_begins_const_op = arith::ConstantOp::create( + rewriter, loc, slice_attr_type, DenseIntElementsAttr::get(slice_attr_type, slice_begins)); - auto slice_sizes_const_op = rewriter.create( - loc, slice_attr_type, + auto slice_sizes_const_op = arith::ConstantOp::create( + rewriter, loc, slice_attr_type, DenseIntElementsAttr::get(slice_attr_type, slice_sizes)); - return rewriter.create(loc, pad_op.getType(), new_pad_op, - slice_begins_const_op, - slice_sizes_const_op); + return TF::SliceOp::create(rewriter, loc, pad_op.getType(), new_pad_op, + slice_begins_const_op, slice_sizes_const_op); } class ConvertPopulationCountOp @@ -3459,8 +3474,8 @@ class ConvertPopulationCountOp ConversionPatternRewriter& rewriter) const final { auto output_type = op.getType().clone( rewriter.getIntegerType(/*width=*/8, /*isSigned=*/false)); - auto pop_cnt = rewriter.create( - op.getLoc(), output_type, op.getOperand()); + auto pop_cnt = TF::PopulationCountOp::create(rewriter, op.getLoc(), + output_type, op.getOperand()); auto cast_or_pop_cnt = rewriter.createOrFold(op.getLoc(), op.getType(), pop_cnt); rewriter.replaceOp(op, {cast_or_pop_cnt}); @@ -3608,9 +3623,9 @@ class ConvertCustomCallWithApproxTopK } auto is_max_k = rewriter.getBoolAttr(true); - auto approx_top_k = rewriter.create( - op.getLoc(), op->getResultTypes(), op.getInputs()[0], top_k_attr, - reduction_dim_attr, recall_target_attr, is_max_k, + auto approx_top_k = TF::ApproxTopKOp::create( + rewriter, op.getLoc(), op->getResultTypes(), op.getInputs()[0], + top_k_attr, reduction_dim_attr, recall_target_attr, is_max_k, reduction_input_size_override_attr, aggregate_to_topk_attr); rewriter.replaceOp(op, approx_top_k.getResults()); @@ -3661,8 +3676,8 @@ class ConvertGetDimensionSizeOp mhlo::GetDimensionSizeOp op, OpAdaptor adaptor, ConversionPatternRewriter& rewriter) const final { ImplicitLocOpBuilder builder(op.getLoc(), rewriter); - Value shape_op = rewriter.create(op.getLoc(), op.getOperand(), - rewriter.getBoolAttr(true)); + Value shape_op = TF::ShapeOp::create(rewriter, op.getLoc(), op.getOperand(), + rewriter.getBoolAttr(true)); Value size = BuildIntArrayConstOp(builder, rewriter, llvm::SmallVector({1}), rewriter.getI32Type()); @@ -3670,13 +3685,13 @@ class ConvertGetDimensionSizeOp builder, rewriter, llvm::SmallVector({static_cast(op.getDimension())}), rewriter.getI64Type()); - Value slice_op = rewriter.create( - op.getLoc(), + Value slice_op = TF::SliceOp::create( + rewriter, op.getLoc(), RankedTensorType::get({static_cast(1)}, op.getType().getElementType()), shape_op, begin, size); - Value squeeze_op = rewriter.create( - op.getLoc(), op.getType(), slice_op, + Value squeeze_op = TF::SqueezeOp::create( + rewriter, op.getLoc(), op.getType(), slice_op, rewriter.getI64ArrayAttr(llvm::ArrayRef({0}))); rewriter.replaceOp(op, {squeeze_op}); return success(); @@ -3749,25 +3764,26 @@ class ConvertDynamicIotaOp : public OpConversionPattern { if (mlir::isa(element_type)) { auto cast_type = mlir::cast(output_shape.getType()).clone(element_type); - output_shape = rewriter.create(dynamic_iota_op.getLoc(), - cast_type, output_shape); + output_shape = TF::CastOp::create(rewriter, dynamic_iota_op.getLoc(), + cast_type, output_shape); } DenseIntElementsAttr scalar_attr = DenseIntElementsAttr::get( RankedTensorType::get({0}, rewriter.getI32Type()), llvm::ArrayRef({})); auto scalar_shape = - rewriter.create(dynamic_iota_op.getLoc(), scalar_attr); - auto limit_scalar = rewriter.create( - dynamic_iota_op.getLoc(), RankedTensorType::get({}, element_type), - output_shape, scalar_shape); + TF::ConstOp::create(rewriter, dynamic_iota_op.getLoc(), scalar_attr); + auto limit_scalar = TF::ReshapeOp::create( + rewriter, dynamic_iota_op.getLoc(), + RankedTensorType::get({}, element_type), output_shape, scalar_shape); auto range_type = RankedTensorType::get({type.getShape()[dimension]}, element_type); Value start_op = - rewriter.create(dynamic_iota_op.getLoc(), start); + TF::ConstOp::create(rewriter, dynamic_iota_op.getLoc(), start); Value delta_op = - rewriter.create(dynamic_iota_op.getLoc(), delta); - Value range_op = rewriter.create( - dynamic_iota_op.getLoc(), range_type, start_op, limit_scalar, delta_op); + TF::ConstOp::create(rewriter, dynamic_iota_op.getLoc(), delta); + Value range_op = + TF::RangeOp::create(rewriter, dynamic_iota_op.getLoc(), range_type, + start_op, limit_scalar, delta_op); rewriter.replaceOp(dynamic_iota_op, range_op); return success(); } @@ -3820,7 +3836,7 @@ arith::ConstantOp ExpandedShape(PatternRewriter& rewriter, Value input, RankedTensorType::get({static_cast(expanded_shape.size())}, rewriter.getIntegerType(64)); auto attr = DenseElementsAttr::get(attr_type, expanded_shape); - return rewriter.create(output.getLoc(), attr_type, attr); + return arith::ConstantOp::create(rewriter, output.getLoc(), attr_type, attr); } Value ExpandedDynamicShape(PatternRewriter& rewriter, Value input, @@ -3843,9 +3859,9 @@ Value ExpandedDynamicShape(PatternRewriter& rewriter, Value input, for (int64_t i : expanded_dimensions) { auto index_attr = DenseIntElementsAttr::get( RankedTensorType::get({}, rewriter.getI64Type()), {i}); - Value index = rewriter.create(output.getLoc(), index_attr); - expanded_input = rewriter.create(output.getLoc(), - expanded_input, index); + Value index = TF::ConstOp::create(rewriter, output.getLoc(), index_attr); + expanded_input = TF::ExpandDimsOp::create(rewriter, output.getLoc(), + expanded_input, index); } return expanded_input; } diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/conv_util.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/conv_util.cc index e5ea3d2ebc5e93..096de88c16055f 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/conv_util.cc +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/conv_util.cc @@ -111,11 +111,11 @@ Value CreatePadOpFromConvPadding(OpBuilder& b, mhlo::ConvolutionOp op) { auto padding_value_type = RankedTensorType::get({}, data.ElementType()); auto padding_value_attr = b.getZeroAttr(padding_value_type); auto padding_value_op = - b.create(op->getLoc(), padding_value_attr); + arith::ConstantOp::create(b, op->getLoc(), padding_value_attr); - auto pad_op = b.create(padding_value_op->getLoc(), op.getLhs(), - padding_value_op, lo_padding_attr, - hi_padding_attr, interior_padding_attr); + auto pad_op = mhlo::PadOp::create(b, padding_value_op->getLoc(), op.getLhs(), + padding_value_op, lo_padding_attr, + hi_padding_attr, interior_padding_attr); return pad_op; } diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/custom_call.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/custom_call.cc index f89f8acd446315..18d9b10d677259 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/custom_call.cc +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/custom_call.cc @@ -82,9 +82,9 @@ LogicalResult ConvertCustomCallOp::matchAndRewrite( if (!call_target_name.starts_with("custom_call.")) { return failure(); } - auto tfl_custom = rewriter.create( - mhlo_custom_call.getLoc(), mhlo_custom_call.getResultTypes(), - mhlo_custom_call.getInputs()); + auto tfl_custom = TFL::CustomOp::create(rewriter, mhlo_custom_call.getLoc(), + mhlo_custom_call.getResultTypes(), + mhlo_custom_call.getInputs()); tfl_custom.setCustomCodeAttr(rewriter.getStringAttr(call_target_name)); if (auto bc = mhlo_custom_call.getBackendConfig()) { diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/dot_general.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/dot_general.cc index 940c75256b9e75..347817d3cc6d59 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/dot_general.cc +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/dot_general.cc @@ -178,7 +178,8 @@ Value BuildDotOperandFlattenedShapeOp(Value operand, ImplicitLocOpBuilder& builder, bool is_lhs) { auto operand_type = mlir::cast(operand.getType()); - auto operand_shape = builder.create( + auto operand_shape = TFL::ShapeOp::create( + builder, RankedTensorType::get(static_cast(operand_type.getRank()), builder.getIntegerType(32)), operand); @@ -197,27 +198,29 @@ Value BuildDotOperandFlattenedShapeOp(Value operand, } auto seg_prod_result_type = RankedTensorType::get(static_cast(1), builder.getI32Type()); - auto out_segids_cst = builder.create( - builder.getI32TensorAttr(flattened_out_segids)); - auto contracting_segids_cst = builder.create( - builder.getI32TensorAttr(flattened_contracting_segids)); - auto num_segids_tensor = - builder.create(DenseIntElementsAttr::get( - RankedTensorType::get({}, builder.getIntegerType(32)), 1)); - auto flattened_out_dims = builder.create( - seg_prod_result_type, operand_shape, out_segids_cst, num_segids_tensor); - auto flattened_contracting_dims = builder.create( - seg_prod_result_type, operand_shape, contracting_segids_cst, + auto out_segids_cst = TFL::ConstOp::create( + builder, builder.getI32TensorAttr(flattened_out_segids)); + auto contracting_segids_cst = TFL::ConstOp::create( + builder, builder.getI32TensorAttr(flattened_contracting_segids)); + auto num_segids_tensor = TFL::ConstOp::create( + builder, DenseIntElementsAttr::get( + RankedTensorType::get({}, builder.getIntegerType(32)), 1)); + auto flattened_out_dims = TFL::UnsortedSegmentProdOp::create( + builder, seg_prod_result_type, operand_shape, out_segids_cst, + num_segids_tensor); + auto flattened_contracting_dims = TFL::UnsortedSegmentProdOp::create( + builder, seg_prod_result_type, operand_shape, contracting_segids_cst, num_segids_tensor); llvm::SmallVector flattend_shape_values; // Gather the batch dimensions. if (!dot_dimensions_info.batch_dimensions().AxesArray().empty()) { if (ShapedType::isDynamicShape( dot_dimensions_info.batch_dimensions().SizesArray())) { - auto batch_axes_tensor = - builder.create(builder.getI64TensorAttr( - dot_dimensions_info.batch_dimensions().AxesArray())); - auto batch_dims = builder.create( + auto batch_axes_tensor = TFL::ConstOp::create( + builder, builder.getI64TensorAttr( + dot_dimensions_info.batch_dimensions().AxesArray())); + auto batch_dims = TFL::GatherOp::create( + builder, RankedTensorType::get( {static_cast( dot_dimensions_info.batch_dimensions().AxesArray().size())}, @@ -230,8 +233,8 @@ Value BuildDotOperandFlattenedShapeOp(Value operand, dot_dimensions_info.batch_dimensions().SizesArray()) { batch_i32_vec.push_back(static_cast(element)); } - auto batch_dims = - builder.create(builder.getI32TensorAttr(batch_i32_vec)); + auto batch_dims = TFL::ConstOp::create( + builder, builder.getI32TensorAttr(batch_i32_vec)); flattend_shape_values.push_back(batch_dims); } } @@ -247,9 +250,9 @@ Value BuildDotOperandFlattenedShapeOp(Value operand, builder.getIntegerType(32)); // Concatenate the batch dimensions, flattened out dimension and flattened // contracting dimension. - return builder.create( - concat_result_type, flattend_shape_values, /*axis*/ 0, - /*fused_activation_function*/ "NONE"); + return TFL::ConcatenationOp::create(builder, concat_result_type, + flattend_shape_values, /*axis*/ 0, + /*fused_activation_function*/ "NONE"); } } // namespace @@ -280,8 +283,8 @@ Value ConvertDot(PatternRewriter& rewriter, Value lhs, Value rhs, lhs_dot_dimensions_info.batch_dimensions().SizesArray(), lhs_dot_dimensions_info.out_dimensions().SizesArray(), lhs_dot_dimensions_info.contracting_dimensions().SizesArray()); - auto lhs_transposed = rewriter.create( - loc, + auto lhs_transposed = mhlo::TransposeOp::create( + rewriter, loc, RankedTensorType::get(lhs_transposed_shape, lhs_type.getElementType()), lhs, DenseIntElementsAttr::get( @@ -298,8 +301,8 @@ Value ConvertDot(PatternRewriter& rewriter, Value lhs, Value rhs, rhs_dot_dimensions_info.batch_dimensions().SizesArray(), rhs_dot_dimensions_info.contracting_dimensions().SizesArray(), rhs_dot_dimensions_info.out_dimensions().SizesArray()); - auto rhs_transposed = rewriter.create( - loc, + auto rhs_transposed = mhlo::TransposeOp::create( + rewriter, loc, RankedTensorType::get(rhs_transposed_shape, rhs_type.getElementType()), rhs, DenseIntElementsAttr::get( @@ -314,15 +317,15 @@ Value ConvertDot(PatternRewriter& rewriter, Value lhs, Value rhs, lhs_dot_dimensions_info.FlattenedContractingDimensionSize()}); Value lhs_flattend; if (lhs_type.hasStaticShape()) { - lhs_flattend = rewriter.create( - loc, + lhs_flattend = mhlo::ReshapeOp::create( + rewriter, loc, RankedTensorType::get(lhs_flattened_shape, lhs_type.getElementType()), lhs_transposed.getResult()); } else { auto lhs_flattend_shape_op = BuildDotOperandFlattenedShapeOp( lhs, lhs_dot_dimensions_info, builder, /*is_lhs=*/true); - lhs_flattend = rewriter.create( - loc, + lhs_flattend = mhlo::DynamicReshapeOp::create( + rewriter, loc, RankedTensorType::get(lhs_flattened_shape, lhs_type.getElementType()), lhs_transposed, lhs_flattend_shape_op); } @@ -336,15 +339,15 @@ Value ConvertDot(PatternRewriter& rewriter, Value lhs, Value rhs, rhs_dot_dimensions_info.FlattenedOutDimensionSize()}); Value rhs_flattend; if (rhs_type.hasStaticShape()) { - rhs_flattend = rewriter.create( - loc, + rhs_flattend = mhlo::ReshapeOp::create( + rewriter, loc, RankedTensorType::get(rhs_flattened_shape, rhs_type.getElementType()), rhs_transposed.getResult()); } else { auto rhs_flattend_shape_op = BuildDotOperandFlattenedShapeOp( rhs, rhs_dot_dimensions_info, builder, /*is_lhs=*/false); - rhs_flattend = rewriter.create( - loc, + rhs_flattend = mhlo::DynamicReshapeOp::create( + rewriter, loc, RankedTensorType::get(rhs_flattened_shape, rhs_type.getElementType()), rhs_transposed, rhs_flattend_shape_op); } @@ -357,44 +360,46 @@ Value ConvertDot(PatternRewriter& rewriter, Value lhs, Value rhs, llvm::ArrayRef{ rhs_dot_dimensions_info.FlattenedOutDimensionSize()}); BoolAttr false_attr = rewriter.getBoolAttr(false); - auto matmul = rewriter.create( - loc, RankedTensorType::get(matmul_shape, result_type.getElementType()), + auto matmul = TFL::BatchMatMulOp::create( + rewriter, loc, + RankedTensorType::get(matmul_shape, result_type.getElementType()), lhs_flattend, rhs_flattend, /*adj_x*/ false_attr, /*adj_y*/ false_attr, /*asym_quant_input*/ false_attr); if (result_type.hasStaticShape()) { auto reshaped = - rewriter.create(loc, result_type, matmul.getResult()); + mhlo::ReshapeOp::create(rewriter, loc, result_type, matmul.getResult()); return reshaped.getResult(); } // Reshape for dynamic shaped operands. The result shape is // [lhs_batch_dimensions, lhs_out_dimensions, rhs_out_dimensions]. - auto lhs_shape = rewriter.create( - loc, + auto lhs_shape = TFL::ShapeOp::create( + rewriter, loc, RankedTensorType::get(static_cast(lhs_type.getRank()), builder.getIntegerType(32)), lhs); - auto rhs_shape = rewriter.create( - loc, + auto rhs_shape = TFL::ShapeOp::create( + rewriter, loc, RankedTensorType::get(static_cast(rhs_type.getRank()), builder.getIntegerType(32)), rhs); llvm::SmallVector lhs_batch_and_out = Concat(lhs_dot_dimensions_info.batch_dimensions().AxesArray(), lhs_dot_dimensions_info.out_dimensions().AxesArray()); - auto lhs_batch_and_out_cst = rewriter.create( - loc, rewriter.getI64TensorAttr(lhs_batch_and_out)); - auto lhs_batch_and_out_dims = rewriter.create( - loc, + auto lhs_batch_and_out_cst = TFL::ConstOp::create( + rewriter, loc, rewriter.getI64TensorAttr(lhs_batch_and_out)); + auto lhs_batch_and_out_dims = TFL::GatherOp::create( + rewriter, loc, RankedTensorType::get({static_cast(lhs_batch_and_out.size())}, rewriter.getIntegerType(32)), lhs_shape, lhs_batch_and_out_cst, /*axis*/ 0, /*batch_dims*/ 0); - auto rhs_out_cst = rewriter.create( - loc, rewriter.getI64TensorAttr( - rhs_dot_dimensions_info.out_dimensions().AxesArray())); - auto rhs_out_dims = rewriter.create( - loc, + auto rhs_out_cst = TFL::ConstOp::create( + rewriter, loc, + rewriter.getI64TensorAttr( + rhs_dot_dimensions_info.out_dimensions().AxesArray())); + auto rhs_out_dims = TFL::GatherOp::create( + rewriter, loc, RankedTensorType::get( {static_cast( rhs_dot_dimensions_info.out_dimensions().AxesArray().size())}, @@ -407,12 +412,12 @@ Value ConvertDot(PatternRewriter& rewriter, Value lhs, Value rhs, lhs_dot_dimensions_info.out_dimensions().AxesArray().size() + rhs_dot_dimensions_info.out_dimensions().AxesArray().size())}, rewriter.getIntegerType(32)); - auto result_shape = rewriter.create( - loc, result_shape_type, ValueRange{lhs_batch_and_out_dims, rhs_out_dims}, - 0, "NONE"); + auto result_shape = TFL::ConcatenationOp::create( + rewriter, loc, result_shape_type, + ValueRange{lhs_batch_and_out_dims, rhs_out_dims}, 0, "NONE"); - auto reshaped = rewriter.create( - loc, result_type, matmul.getResult(), result_shape); + auto reshaped = mhlo::DynamicReshapeOp::create( + rewriter, loc, result_type, matmul.getResult(), result_shape); return reshaped.getResult(); } diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/fft.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/fft.cc index f2d29774c31c89..34b1b60fd1b825 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/fft.cc +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/fft.cc @@ -74,9 +74,11 @@ bool IsSupportedRfftOp(mhlo::FftOp fft_op) { // concatenate with other dimension sizes. Value GetDimensionSizeTensor(OpBuilder& rewriter, Location loc, Value input, int64_t dim) { - auto size_scalar = rewriter.create(loc, input, dim); - return rewriter.create( - loc, RankedTensorType::get({1}, rewriter.getI32Type()), size_scalar); + auto size_scalar = + mhlo::GetDimensionSizeOp::create(rewriter, loc, input, dim); + return mhlo::ReshapeOp::create( + rewriter, loc, RankedTensorType::get({1}, rewriter.getI32Type()), + size_scalar); } // Convert rfft to rfft2d. @@ -154,13 +156,13 @@ class ConvertNDFftTo2DFftOp : public OpRewritePattern { expanded_input_shape_values.push_back(GetDimensionSizeTensor( rewriter, fft_op.getLoc(), fft_operand, i)); } - expanded_input_shape_values.push_back(rewriter.create( - fft_op.getLoc(), rewriter.getI32TensorAttr({1}))); + expanded_input_shape_values.push_back(mhlo::ConstantOp::create( + rewriter, fft_op.getLoc(), rewriter.getI32TensorAttr({1}))); expanded_input_shape_values.push_back(GetDimensionSizeTensor( rewriter, fft_op.getLoc(), fft_operand, input_shape.size() - 1)); - auto expanded_input_shape_tensor = rewriter.create( - fft_op.getLoc(), + auto expanded_input_shape_tensor = mhlo::ConcatenateOp::create( + rewriter, fft_op.getLoc(), RankedTensorType::get( {static_cast(expanded_input_shape_values.size())}, rewriter.getI32Type()), @@ -168,12 +170,12 @@ class ConvertNDFftTo2DFftOp : public OpRewritePattern { // Create a new mhlo.dynamic_reshape op with the expanded input and // expanded input shape. SHAPE tensor is created in the previous step. - fft_operand = rewriter.create( - fft_op.getLoc(), expanded_input_type, fft_operand, + fft_operand = mhlo::DynamicReshapeOp::create( + rewriter, fft_op.getLoc(), expanded_input_type, fft_operand, expanded_input_shape_tensor); } else { - fft_operand = rewriter.create( - fft_op.getLoc(), expanded_input_type, fft_operand); + fft_operand = mhlo::ReshapeOp::create(rewriter, fft_op.getLoc(), + expanded_input_type, fft_operand); } SmallVector new_output_shape = {output_shape.begin(), @@ -186,8 +188,8 @@ class ConvertNDFftTo2DFftOp : public OpRewritePattern { } auto new_fft = - rewriter.create(fft_op.getLoc(), output_type, fft_operand, - fft_op.getFftType(), new_fft_lengths_attr); + mhlo::FftOp::create(rewriter, fft_op.getLoc(), output_type, fft_operand, + fft_op.getFftType(), new_fft_lengths_attr); if (input_shape[input_shape.size() - 2] != 1) { // Squeeze the output dimensions back to 2D. @@ -202,19 +204,20 @@ class ConvertNDFftTo2DFftOp : public OpRewritePattern { rewriter, fft_op.getLoc(), new_fft.getResult(), new_fft.getResult().getType().getShape().size() - 1)); - auto shape_tensor = rewriter.create( - fft_op.getLoc(), + auto shape_tensor = mhlo::ConcatenateOp::create( + rewriter, fft_op.getLoc(), RankedTensorType::get( {static_cast(output_shape_values.size())}, rewriter.getI32Type()), output_shape_values, 0); - auto squeeze_op = rewriter.create( - fft_op.getLoc(), fft_op.getResult().getType(), new_fft.getResult(), - shape_tensor); + auto squeeze_op = mhlo::DynamicReshapeOp::create( + rewriter, fft_op.getLoc(), fft_op.getResult().getType(), + new_fft.getResult(), shape_tensor); rewriter.replaceOp(fft_op, squeeze_op.getResult()); } else { - auto squeeze_op = rewriter.create( - fft_op.getLoc(), fft_op.getResult().getType(), new_fft.getResult()); + auto squeeze_op = mhlo::ReshapeOp::create(rewriter, fft_op.getLoc(), + fft_op.getResult().getType(), + new_fft.getResult()); rewriter.replaceOp(fft_op, squeeze_op.getResult()); } } else { @@ -256,9 +259,10 @@ class LegalizeRfftOp : public OpConversionPattern { auto output_type = mlir::cast(fft_op.getResult().getType()); auto fft_len_const = - rewriter.create(fft_op.getLoc(), fft_len_f32_attr); - auto tfl_rfft2d = rewriter.create( - fft_op.getLoc(), output_type, fft_op.getOperand(), fft_len_const); + arith::ConstantOp::create(rewriter, fft_op.getLoc(), fft_len_f32_attr); + auto tfl_rfft2d = + TFL::RFFT2dOp::create(rewriter, fft_op.getLoc(), output_type, + fft_op.getOperand(), fft_len_const); rewriter.replaceOp(fft_op, tfl_rfft2d.getResult()); diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/gelu.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/gelu.cc index 539a9934f75e5a..9833b3415f3059 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/gelu.cc +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/gelu.cc @@ -119,9 +119,9 @@ LogicalResult LowerGELU::matchAndRewrite(Operation* op, if (!HasSplatArg(rhs_mul, kOneOverRoot2, 1)) return failure(); auto is_approx_attr = rewriter.getBoolAttr(false); - auto gelu = rewriter.create( - output_mul.getLoc(), output_mul.getResult().getType(), - erf_input->getOperand(0), is_approx_attr); + auto gelu = TFL::GeluOp::create(rewriter, output_mul.getLoc(), + output_mul.getResult().getType(), + erf_input->getOperand(0), is_approx_attr); rewriter.replaceAllOpUsesWith(output_mul, gelu); // Note these must be erased in reverse topo order to avoid // failing in debug mode. diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/if.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/if.cc index e43f342aec2cdc..6b377c0eee933c 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/if.cc +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/if.cc @@ -33,8 +33,8 @@ class LegalizeIfOp : public OpConversionPattern { LogicalResult matchAndRewrite( mhlo::IfOp if_op, OpAdaptor adaptor, ConversionPatternRewriter& rewriter) const final { - auto new_op = rewriter.create( - if_op.getLoc(), if_op.getResultTypes(), if_op.getPred()); + auto new_op = TFL::IfOp::create(rewriter, if_op.getLoc(), + if_op.getResultTypes(), if_op.getPred()); new_op.getThenRegion().takeBody(if_op.getTrueBranch()); new_op.getElseRegion().takeBody(if_op.getFalseBranch()); diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/reduce.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/reduce.cc index f237a7168e5660..5b5368ac1f5522 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/reduce.cc +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/reduce.cc @@ -248,8 +248,8 @@ LogicalResult ConvertReduceOpToArgMinMax< int64_t axis = reduce_op.getDimensions().getValues()[0]; auto dim_type = RankedTensorType::get({1}, rewriter.getI32Type()); - auto reduction_indices = rewriter.create( - reduce_op.getLoc(), dim_type, + auto reduction_indices = arith::ConstantOp::create( + rewriter, reduce_op.getLoc(), dim_type, rewriter.getI32TensorAttr({static_cast(axis)})); // Generate a Max and an ArgMax of as the mhlo op returns both while in TF @@ -260,24 +260,24 @@ LogicalResult ConvertReduceOpToArgMinMax< if (operand_type.getElementType().isInteger(1)) { // TF does not support min or max on boolean (int1) arguments. // Use AnyOp for MaxOp and AllOp for MinOp. - auto tf_reduce_op = rewriter.create( - reduce_op.getLoc(), reduce_op->getResult(0).getType(), operand, - reduction_indices, + auto tf_reduce_op = BooleanReduce::create( + rewriter, reduce_op.getLoc(), reduce_op->getResult(0).getType(), + operand, reduction_indices, /*keep_dim=*/rewriter.getBoolAttr(false)); - auto tf_argreduce_op = rewriter.create( - reduce_op.getLoc(), reduce_op->getResult(1).getType(), operand, - reduction_indices); + auto tf_argreduce_op = ArgReduce::create(rewriter, reduce_op.getLoc(), + reduce_op->getResult(1).getType(), + operand, reduction_indices); rewriter.replaceOp(reduce_op, {tf_reduce_op, tf_argreduce_op}); } else { - auto tf_reduce_op = rewriter.create( - reduce_op.getLoc(), reduce_op->getResult(0).getType(), operand, - reduction_indices, + auto tf_reduce_op = Reduce::create( + rewriter, reduce_op.getLoc(), reduce_op->getResult(0).getType(), + operand, reduction_indices, /*keep_dim=*/rewriter.getBoolAttr(false)); - auto tf_argreduce_op = rewriter.create( - reduce_op.getLoc(), reduce_op->getResult(1).getType(), operand, - reduction_indices); + auto tf_argreduce_op = ArgReduce::create(rewriter, reduce_op.getLoc(), + reduce_op->getResult(1).getType(), + operand, reduction_indices); rewriter.replaceOp(reduce_op, {tf_reduce_op, tf_argreduce_op}); } @@ -366,9 +366,10 @@ template LogicalResult rewriteNonMatchInitValue(mhlo::ReduceOp reduce_op, Value input, arith::ConstantOp reduction_indices, ConversionPatternRewriter& rewriter) { - Value reduce_result = rewriter.create( - reduce_op.getLoc(), reduce_op.getType(0), input, reduction_indices, - /*keep_dim=*/rewriter.getBoolAttr(false)); + Value reduce_result = + ReduceOp::create(rewriter, reduce_op.getLoc(), reduce_op.getType(0), + input, reduction_indices, + /*keep_dim=*/rewriter.getBoolAttr(false)); if constexpr (BuilderHasFAF) { rewriter.replaceOpWithNewOp(reduce_op, reduce_result, @@ -455,7 +456,7 @@ class ConvertReduce : public OpConversionPattern { auto tfl_dims = GetDimsAsI32Elements(rewriter, reduce_op); auto tfl_dims_op = - rewriter.create(reduce_op.getLoc(), tfl_dims); + arith::ConstantOp::create(rewriter, reduce_op.getLoc(), tfl_dims); // // replace with new reduce op, chaining binary op if needed. diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/reduce_window.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/reduce_window.cc index 4382a9864cac02..c4a3dc62fd58f0 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/reduce_window.cc +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/reduce_window.cc @@ -126,7 +126,7 @@ Value TransposeTensor(OpBuilder& b, Value tensor, const int64_t perm_size = perm.size(); auto perm_attr_type = RankedTensorType::get({perm_size}, b.getI64Type()); auto perm_attr = DenseIntElementsAttr::get(perm_attr_type, perm); - return b.create(tensor.getLoc(), tensor, perm_attr); + return mhlo::TransposeOp::create(b, tensor.getLoc(), tensor, perm_attr); } DenseIntElementsAttr BuildDenseI64(OpBuilder& b, ArrayRef shape, @@ -289,9 +289,10 @@ LogicalResult RelayoutReduceWindow::matchAndRewrite( // transpose input and build new reduce_window auto new_input = TransposeTensor(rewriter, input, perm_for_inputs); - auto new_rw = rewriter.create( - op.getLoc(), new_out_type, new_input, init_val, new_window_dims_attr, - new_window_strides_attr, BuildDenseI64(rewriter, view.BaseDilations()), + auto new_rw = mhlo::ReduceWindowOp::create( + rewriter, op.getLoc(), new_out_type, new_input, init_val, + new_window_dims_attr, new_window_strides_attr, + BuildDenseI64(rewriter, view.BaseDilations()), BuildDenseI64(rewriter, view.WindowDilations()), new_paddings_attr); IRMapping ir_map; op.getBody().cloneInto(&new_rw.getBody(), ir_map); @@ -412,7 +413,7 @@ LogicalResult LegalizeCumSum::matchAndRewrite( RankedTensorType::get({}, rewriter.getI32Type()), static_cast(axis)); auto axis_cst = - rewriter.create(op->getLoc(), axis_cst_attr); + arith::ConstantOp::create(rewriter, op->getLoc(), axis_cst_attr); auto tfl_exclusive_attr = rewriter.getBoolAttr(false); auto tfl_reverse_attr = rewriter.getBoolAttr(false); @@ -476,7 +477,7 @@ TFL::PadV2Op LegalizeMaxPool::BuildExplicitPadOp( llvm::ArrayRef(padding_values)); auto padding_values_op = - rewriter.create(op.getLoc(), padding_dense_attr); + arith::ConstantOp::create(rewriter, op.getLoc(), padding_dense_attr); llvm::SmallVector pad_output_shape_vector; pad_output_shape_vector.push_back(input_type.getDimSize(0)); @@ -489,8 +490,8 @@ TFL::PadV2Op LegalizeMaxPool::BuildExplicitPadOp( pad_output_shape_vector.push_back(input_type.getDimSize(3)); auto pad_output_type = mlir::RankedTensorType::get( pad_output_shape_vector, output_type.getElementType()); - return rewriter.create(op.getLoc(), pad_output_type, input, - padding_values_op, init); + return TFL::PadV2Op::create(rewriter, op.getLoc(), pad_output_type, input, + padding_values_op, init); } LogicalResult LegalizeMaxPool::matchAndRewrite( @@ -575,13 +576,12 @@ void ReplaceWithAvgPool(mhlo::DivOp op, Value rw_lhs_input, auto [fh, fw, sh, sw, p, faf] = BuildTFLPoolAttrs(rewriter, lhs_view, padding); - Value final_op = rewriter.create( - op->getLoc(), out_type, rw_lhs_input, fh, fw, p, sh, sw, faf); + Value final_op = TFL::AveragePool2DOp::create( + rewriter, op->getLoc(), out_type, rw_lhs_input, fh, fw, p, sh, sw, faf); if (opt_final_tpose) { - final_op = rewriter - .create(final_op.getLoc(), final_op, - opt_final_tpose.getPermutation()) + final_op = mhlo::TransposeOp::create(rewriter, final_op.getLoc(), final_op, + opt_final_tpose.getPermutation()) .getResult(); } diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/scatter.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/scatter.cc index 87bf7770a20ddf..303c446d536b47 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/scatter.cc +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/scatter.cc @@ -69,8 +69,8 @@ LogicalResult CanonicalizeScatterUpdates( auto permutation_and_shape = GetPermutationAndTransposedShape( permutation_array, updates_type, rewriter); - auto transposed_updates = rewriter.create( - scatter_op->getLoc(), permutation_and_shape.shape, updates, + auto transposed_updates = mhlo::TransposeOp::create( + rewriter, scatter_op->getLoc(), permutation_and_shape.shape, updates, permutation_and_shape.permutation); updates = transposed_updates; @@ -163,9 +163,9 @@ LogicalResult ConvertScatterOp::matchAndRewrite( permutation_array, operand_type, rewriter); Location loc = scatter_op.getLoc(); - auto transposed_operand = rewriter.create( - loc, permutation_and_shape.shape, operands[0], - permutation_and_shape.permutation); + auto transposed_operand = + mhlo::TransposeOp::create(rewriter, loc, permutation_and_shape.shape, + operands[0], permutation_and_shape.permutation); Value new_indices = indices; int64_t index_depth = @@ -181,8 +181,8 @@ LogicalResult ConvertScatterOp::matchAndRewrite( builder, rewriter, llvm::SmallVector({num_updates, index_depth}), rewriter.getI32Type()); - new_indices = rewriter.create( - loc, + new_indices = TF::ReshapeOp::create( + rewriter, loc, RankedTensorType::get({num_updates, index_depth}, indices_type.getElementType()), indices, indices_shape); @@ -190,8 +190,8 @@ LogicalResult ConvertScatterOp::matchAndRewrite( builder, rewriter, llvm::SmallVector({num_updates, updates_type.getDimSize(0)}), rewriter.getI32Type()); - new_updates = rewriter.create( - loc, + new_updates = TF::ReshapeOp::create( + rewriter, loc, RankedTensorType::get({1, updates_type.getDimSize(0)}, updates_type.getElementType()), new_updates, updates_shape); @@ -200,8 +200,8 @@ LogicalResult ConvertScatterOp::matchAndRewrite( // Apply TF scatter to update the trailing dimensions of the // transposed operand. auto tf_scatter_op = - rewriter.create(loc, permutation_and_shape.shape, - transposed_operand, new_indices, new_updates); + TfOp::create(rewriter, loc, permutation_and_shape.shape, + transposed_operand, new_indices, new_updates); // Reverse the earlier transpose. auto inverse_permutation = GetInversePermutation(permutation_array, rewriter); diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/slice.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/slice.cc index e43c0c665ff9db..548951c1ae43e0 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/slice.cc +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/slice.cc @@ -46,8 +46,8 @@ Value PackScalarIndices(mlir::ValueRange indices, OpBuilder& b) { auto values_count_attr = b.getI32IntegerAttr(num_indices); auto pack_axis_attr = b.getI32IntegerAttr(0); - return b.create(indices.back().getLoc(), packed_indices_type, - indices, values_count_attr, pack_axis_attr); + return TFL::PackOp::create(b, indices.back().getLoc(), packed_indices_type, + indices, values_count_attr, pack_axis_attr); } //===----------------------------------------------------------------------===// @@ -56,8 +56,8 @@ Value PackScalarIndices(mlir::ValueRange indices, OpBuilder& b) { // Cast the value to i32. Value BuildTFLCastOp(OpBuilder& b, Value value) { - return b.create( - value.getLoc(), + return TFL::CastOp::create( + b, value.getLoc(), RankedTensorType::get(llvm::cast(value.getType()).getShape(), b.getI32Type()), value); @@ -70,12 +70,12 @@ class LegalizeSliceOp : public OpConversionPattern { LogicalResult matchAndRewrite( mhlo::SliceOp slice_op, OpAdaptor adaptor, ConversionPatternRewriter& rewriter) const final { - auto begin = rewriter.create(slice_op.getLoc(), - slice_op.getStartIndices()); - auto end = rewriter.create(slice_op.getLoc(), - slice_op.getLimitIndices()); - auto strides = rewriter.create(slice_op.getLoc(), - slice_op.getStrides()); + auto begin = arith::ConstantOp::create(rewriter, slice_op.getLoc(), + slice_op.getStartIndices()); + auto end = arith::ConstantOp::create(rewriter, slice_op.getLoc(), + slice_op.getLimitIndices()); + auto strides = arith::ConstantOp::create(rewriter, slice_op.getLoc(), + slice_op.getStrides()); auto zero = rewriter.getIntegerAttr(rewriter.getI32Type(), 0); auto no_offset = rewriter.getBoolAttr(false); @@ -116,8 +116,8 @@ LogicalResult CastSliceIndicesToSignless::matchAndRewrite( llvm::SmallVector casted_start_inds; for (auto start_ind_opr : op.getStartIndices()) { - auto casted_start_ind_opr = rewriter.create( - start_ind_opr.getLoc(), start_ind_opr, new_start_e_type); + auto casted_start_ind_opr = mhlo::ConvertOp::create( + rewriter, start_ind_opr.getLoc(), start_ind_opr, new_start_e_type); casted_start_inds.push_back(casted_start_ind_opr.getResult()); } @@ -161,8 +161,8 @@ LogicalResult LegalizeDynamicSliceOp::matchAndRewrite( // clamp start indices between zero and shape(operand) - slice_sizes //=----- - Value clamp_left_cst = rewriter.create( - op->getLoc(), rewriter.getZeroAttr(start_type)); + Value clamp_left_cst = arith::ConstantOp::create( + rewriter, op->getLoc(), rewriter.getZeroAttr(start_type)); llvm::SmallVector new_start_indices; const auto stride_sizes = UnrollI64Splat(op.getSliceSizes()); @@ -170,15 +170,15 @@ LogicalResult LegalizeDynamicSliceOp::matchAndRewrite( for (auto [dim_size, start_ind_opr, stride_size] : llvm::zip(input_type.getShape(), op.getStartIndices(), stride_sizes)) { const int64_t clamp_right_val = dim_size - stride_size; - auto clamp_right_cst = rewriter.create( - op->getLoc(), + auto clamp_right_cst = arith::ConstantOp::create( + rewriter, op->getLoc(), DenseElementsAttr::get(start_type, rewriter.getIntegerAttr( start_e_type, clamp_right_val))); - Value new_start_ind = rewriter.create( - op->getLoc(), start_type, clamp_left_cst, start_ind_opr); - new_start_ind = rewriter.create( - op->getLoc(), start_type, clamp_right_cst, new_start_ind); + Value new_start_ind = TFL::MaximumOp::create( + rewriter, op->getLoc(), start_type, clamp_left_cst, start_ind_opr); + new_start_ind = TFL::MinimumOp::create(rewriter, op->getLoc(), start_type, + clamp_right_cst, new_start_ind); new_start_indices.push_back(new_start_ind); } @@ -190,7 +190,7 @@ LogicalResult LegalizeDynamicSliceOp::matchAndRewrite( auto packed_indices = PackScalarIndices(new_start_indices, rewriter); auto slice_sizes_cst = - rewriter.create(op->getLoc(), op.getSliceSizes()); + arith::ConstantOp::create(rewriter, op->getLoc(), op.getSliceSizes()); rewriter.replaceOpWithNewOp(op, op.getType(), op.getOperand(), packed_indices, slice_sizes_cst); diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/util.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/util.cc index a64012415729e4..6dcf03b1600244 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/util.cc +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/util.cc @@ -70,7 +70,7 @@ Value BuildIntConstOp(ImplicitLocOpBuilder& builder, ConversionPatternRewriter& rewriter, int64_t const_value, Type type) { Value result_const = - builder.create(rewriter.getIntegerAttr(type, const_value)); + TF::ConstOp::create(builder, rewriter.getIntegerAttr(type, const_value)); return result_const; } @@ -115,8 +115,8 @@ LogicalResult NormalizeIndexVector(Operation* parent_op, Value& indices, new_start_indices_shape.push_back(1); indices_type = RankedTensorType::get(new_start_indices_shape, indices_type.getElementType()); - indices = rewriter.create(parent_op->getLoc(), - indices_type, indices); + indices = mhlo::ReshapeOp::create(rewriter, parent_op->getLoc(), + indices_type, indices); } else if (index_vector_dim != indices_type.getRank() - 1) { // If index_vector_dim isn't the last dimension in indices then it isn't // supported yet. @@ -197,8 +197,8 @@ Value InsertTranspose(Value value, int batch_dim, int feature_dim, default_batch_dim, default_feature_dim, default_spatial_dim_start, num_spatial_dims, type, rewriter); - return rewriter.create(value.getLoc(), type, value, - permutation); + return mhlo::TransposeOp::create(rewriter, value.getLoc(), type, value, + permutation); } Value CreateCastToInt32(Value val, Location loc, PatternRewriter& rewriter) { @@ -206,10 +206,10 @@ Value CreateCastToInt32(Value val, Location loc, PatternRewriter& rewriter) { if (auto shaped_type = mlir::dyn_cast(val.getType())) { ShapedType new_type = RankedTensorType::get(shaped_type.getShape(), new_ele_type); - return rewriter.create(loc, new_type, val); + return TFL::CastOp::create(rewriter, loc, new_type, val); } - return rewriter.create( - loc, UnrankedTensorType::get(new_ele_type), val); + return TFL::CastOp::create(rewriter, loc, + UnrankedTensorType::get(new_ele_type), val); } // Replaces `region`'s terminator to TFL::Yield. diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/util.h b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/util.h index c72fce3ffc6a84..1bf33c1d0d993e 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/util.h +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/util.h @@ -75,7 +75,7 @@ Value BuildIntArrayConstOp(ImplicitLocOpBuilder& builder, } const_value_raw = rewriter.getI32TensorAttr(const_i32_vec); } - Value result_const = builder.create(const_value_raw); + Value result_const = ConstOpT::create(builder, const_value_raw); return result_const; } diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/while.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/while.cc index c2323b63b9370c..0de2ccafedbe16 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/while.cc +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_hlo_conversions/while.cc @@ -51,9 +51,10 @@ class LeagalizeWhileOp : public OpConversionPattern { // currently doesn't support stateless, so this // parameters are set to the default values. auto is_stateless = rewriter.getBoolAttr(false); - auto new_while = rewriter.create( - while_op.getLoc(), while_op->getResultTypes(), while_op->getOperands(), - /*is_stateless=*/is_stateless); + auto new_while = TFL::WhileOp::create(rewriter, while_op.getLoc(), + while_op->getResultTypes(), + while_op->getOperands(), + /*is_stateless=*/is_stateless); new_while.getCond().takeBody(while_op.getCond()); new_while.getBody().takeBody(while_op.getBody()); TFLReplaceReturnOp(new_while.getCond(), rewriter); diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_stablehlo_custom_call_to_composite.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_stablehlo_custom_call_to_composite.cc index 113293596536c9..c7f88bb2ebeebc 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_stablehlo_custom_call_to_composite.cc +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_stablehlo_custom_call_to_composite.cc @@ -69,9 +69,9 @@ struct ReplaceCustomCallWithComposite final auto decomposition = mlir::cast(calledComputations[0]); - auto composite = rewriter.create( - op.getLoc(), op.getResultTypes(), op.getOperands(), name.str(), attrs, - decomposition.getValue()); + auto composite = mlir::stablehlo::CompositeOp::create( + rewriter, op.getLoc(), op.getResultTypes(), op.getOperands(), + name.str(), attrs, decomposition.getValue()); rewriter.replaceOp(op, composite.getResults()); return success(); } diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_stablehlo_to_vhlo.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_stablehlo_to_vhlo.cc index 704dbf37d680dd..836598d19a7516 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_stablehlo_to_vhlo.cc +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_stablehlo_to_vhlo.cc @@ -59,7 +59,7 @@ namespace { Value MaterializeIllegalCast(OpBuilder &builder, Type type, ValueRange inputs, Location loc) { - return builder.create(loc, type, inputs) + return UnrealizedConversionCastOp::create(builder, loc, type, inputs) ->getResult(0); } diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_tf_xla_call_module_to_stablehlo_pass.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_tf_xla_call_module_to_stablehlo_pass.cc index 78da8b153f47fc..614bd070748267 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_tf_xla_call_module_to_stablehlo_pass.cc +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/legalize_tf_xla_call_module_to_stablehlo_pass.cc @@ -177,8 +177,8 @@ class ConvertTFXlaCallModuleOp : public OpRewritePattern { SmallVector call_op_operands(op.getOperands()); if (ContainsPlatformIndexArg(op)) { - Value dummy_const = rewriter.create( - op.getLoc(), + Value dummy_const = TF::ConstOp::create( + rewriter, op.getLoc(), DenseIntElementsAttr::get( RankedTensorType::get({}, rewriter.getIntegerType(32)), {0})); call_op_operands.insert(call_op_operands.begin(), dummy_const); @@ -196,16 +196,16 @@ class ConvertTFXlaCallModuleOp : public OpRewritePattern { Value operand = std::get<0>(operand_and_type); Type expected_type = std::get<1>(operand_and_type); if (operand.getType() != expected_type) { - operand = rewriter.create( - op.getLoc(), expected_type, operand, - /*Truncate=*/rewriter.getBoolAttr(false)); + operand = + TF::CastOp::create(rewriter, op.getLoc(), expected_type, operand, + /*Truncate=*/rewriter.getBoolAttr(false)); } casted_operands.push_back(operand); } - auto call = rewriter.create( - op->getLoc(), main_fn.getSymName(), main_fn.getResultTypes(), - casted_operands); + auto call = + func::CallOp::create(rewriter, op->getLoc(), main_fn.getSymName(), + main_fn.getResultTypes(), casted_operands); rewriter.replaceOp(op, call->getResults()); return success(); diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/optimize.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/optimize.cc index 282c44a958c27f..1effffd9aa00e3 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/optimize.cc +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/optimize.cc @@ -269,9 +269,9 @@ LogicalResult LiftDotConcatLHS(mhlo::ConcatenateOp concat, mlir::dyn_cast(v.getType()).getShape()[new_concat_dim]; } - auto new_concat = rewriter.create( - concat->getLoc(), concat.getType().clone(new_concat_shape), all_dot_lhs, - rewriter.getI64IntegerAttr(new_concat_dim)); + auto new_concat = mhlo::ConcatenateOp::create( + rewriter, concat->getLoc(), concat.getType().clone(new_concat_shape), + all_dot_lhs, rewriter.getI64IntegerAttr(new_concat_dim)); rewriter.replaceOpWithNewOp( concat, concat.getType(), new_concat, first_dot.getRhs(), first_dot.getDotDimensionNumbers(), first_dot.getPrecisionConfigAttr(), @@ -368,11 +368,11 @@ LogicalResult LiftDotConcatLHSAndRHS(mhlo::ConcatenateOp concat, mlir::dyn_cast(v.getType()).getShape()[rhs_batch_dim]; } - auto lhs_new_concat = rewriter.create( - concat->getLoc(), concat.getType().clone(lhs_new_concat_shape), + auto lhs_new_concat = mhlo::ConcatenateOp::create( + rewriter, concat->getLoc(), concat.getType().clone(lhs_new_concat_shape), all_dot_lhs, rewriter.getI64IntegerAttr(lhs_batch_dim)); - auto rhs_new_concat = rewriter.create( - concat->getLoc(), concat.getType().clone(rhs_new_concat_shape), + auto rhs_new_concat = mhlo::ConcatenateOp::create( + rewriter, concat->getLoc(), concat.getType().clone(rhs_new_concat_shape), all_dot_rhs, rewriter.getI64IntegerAttr(rhs_batch_dim)); rewriter.replaceOpWithNewOp( concat, concat.getType(), lhs_new_concat, rhs_new_concat, @@ -439,7 +439,8 @@ LogicalResult FuseSliceConcat(mhlo::ConcatenateOp concat, new_slice_shape.push_back(second_limit - first_start); } - auto new_slice = rewriter.create( + auto new_slice = mhlo::SliceOp::create( + rewriter, FusedLoc::get(first->getContext(), {first.getLoc(), second.getLoc()}), first.getType().clone(new_slice_shape), first.getOperand(), /*start_indices=*/rewriter.getI64TensorAttr(new_start), @@ -730,8 +731,8 @@ class SimplifyBroadcastInDimsReshape auto new_broadcast_input_type = RankedTensorType::get( new_broadcast_input_shape, broadcast_type.getElementType()); - auto new_broadcast_input = rewriter.create( - op->getLoc(), new_broadcast_input_type, op.getOperand()); + auto new_broadcast_input = mhlo::ReshapeOp::create( + rewriter, op->getLoc(), new_broadcast_input_type, op.getOperand()); auto new_broadcast_dims_attr = rewriter.getI64TensorAttr(new_broadcast_dims); diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/smuggle_disallowed_ops.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/smuggle_disallowed_ops.cc index 249a1018e091f4..13f981c8714f46 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/smuggle_disallowed_ops.cc +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/smuggle_disallowed_ops.cc @@ -44,8 +44,8 @@ LogicalResult SmuggleOp(Operation* op, PatternRewriter& rewriter) { rewriter.getNamedAttr("call_target_name", op->getName().getIdentifier()); SmallVector attrs{op->getAttrs()}; attrs.push_back(call_target); - auto custom_call = rewriter.create( - op->getLoc(), op->getResultTypes(), op->getOperands(), attrs); + auto custom_call = mlir::stablehlo::CustomCallOp::create( + rewriter, op->getLoc(), op->getResultTypes(), op->getOperands(), attrs); rewriter.replaceOp(op, custom_call.getResults()); return success(); } diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/stablehlo_fuse_convolution_pass.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/stablehlo_fuse_convolution_pass.cc index fcecd557aeab1c..557b721bfaf35f 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/stablehlo_fuse_convolution_pass.cc +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/stablehlo_fuse_convolution_pass.cc @@ -143,13 +143,15 @@ class FuseStablehloMulAndConvolutionPattern broadcast_dims = DenseI64ArrayAttr::get(rewriter.getContext(), {filter_rank - 1}); } - Value broadcast_multiplier = rewriter.create( - mul_op.getLoc(), filter.getType(), multiplier, broadcast_dims); - Value new_filter = rewriter.create( - mul_op.getLoc(), filter.getType(), filter, broadcast_multiplier); - Value new_conv = rewriter.create( - mul_op.getLoc(), conv_op.getType(), conv_op.getLhs(), new_filter, - conv_op.getWindowStridesAttr(), conv_op.getPaddingAttr(), + Value broadcast_multiplier = stablehlo::BroadcastInDimOp::create( + rewriter, mul_op.getLoc(), filter.getType(), multiplier, + broadcast_dims); + Value new_filter = + stablehlo::MulOp::create(rewriter, mul_op.getLoc(), filter.getType(), + filter, broadcast_multiplier); + Value new_conv = stablehlo::ConvolutionOp::create( + rewriter, mul_op.getLoc(), conv_op.getType(), conv_op.getLhs(), + new_filter, conv_op.getWindowStridesAttr(), conv_op.getPaddingAttr(), conv_op.getLhsDilationAttr(), conv_op.getRhsDilationAttr(), conv_op.getWindowReversalAttr(), conv_op.getDimensionNumbers(), conv_op.getFeatureGroupCount(), conv_op.getBatchGroupCount(), @@ -169,8 +171,8 @@ class FuseStablehloMulAndConvolutionPattern conv_op) { return failure(); } - Value new_shape_of = rewriter.create( - mul_op.getLoc(), shape_of_op.getType(), new_conv); + Value new_shape_of = shape::ShapeOfOp::create( + rewriter, mul_op.getLoc(), shape_of_op.getType(), new_conv); shape_of_op.replaceAllUsesWith(new_shape_of); rewriter.replaceOp(mul_op, {new_conv}); } diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/tflite_legalize_hlo.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/tflite_legalize_hlo.cc index 0c43a5c4047a64..b283dea3098232 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/tflite_legalize_hlo.cc +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/tflite_legalize_hlo.cc @@ -69,7 +69,7 @@ arith::ConstantOp ShapeToConst(PatternRewriter& rewriter, Value value) { auto attr_type = RankedTensorType::get({static_cast(shape.size())}, rewriter.getIntegerType(64)); auto attr = DenseElementsAttr::get(attr_type, shape); - return rewriter.create(value.getLoc(), attr_type, attr); + return arith::ConstantOp::create(rewriter, value.getLoc(), attr_type, attr); } // Returns true if broadcast_dimensions obey Tensorflow convention, as in new @@ -107,7 +107,7 @@ arith::ConstantOp ExpandedShape(OpBuilder& b, Value input, auto attr_type = RankedTensorType::get( {static_cast(expanded_shape.size())}, b.getIntegerType(32)); auto attr = DenseElementsAttr::get(attr_type, expanded_shape); - return b.create(output.getLoc(), attr_type, attr); + return arith::ConstantOp::create(b, output.getLoc(), attr_type, attr); } Value ExpandedDynamicShape(OpBuilder& b, Value input, @@ -132,7 +132,7 @@ Value ExpandedDynamicShape(OpBuilder& b, Value input, for (int64_t i : expanded_dimensions) { auto index_attr = DenseIntElementsAttr::get( RankedTensorType::get({}, b.getI64Type()), {i}); - Value index = b.create(output.getLoc(), index_attr); + Value index = arith::ConstantOp::create(b, output.getLoc(), index_attr); auto cur_type = llvm::cast(expanded_input.getType()); auto cur_shape = cur_type.getShape(); @@ -145,8 +145,8 @@ Value ExpandedDynamicShape(OpBuilder& b, Value input, auto new_type = RankedTensorType::get(new_shape, cur_type.getElementType()); - expanded_input = b.create(output.getLoc(), new_type, - expanded_input, index); + expanded_input = TFL::ExpandDimsOp::create(b, output.getLoc(), new_type, + expanded_input, index); } return expanded_input; diff --git a/tensorflow/compiler/mlir/lite/stablehlo/transforms/unfold_splat_constant_pass.cc b/tensorflow/compiler/mlir/lite/stablehlo/transforms/unfold_splat_constant_pass.cc index b0a023494f1ca4..b5aded528cdc25 100644 --- a/tensorflow/compiler/mlir/lite/stablehlo/transforms/unfold_splat_constant_pass.cc +++ b/tensorflow/compiler/mlir/lite/stablehlo/transforms/unfold_splat_constant_pass.cc @@ -74,18 +74,17 @@ class UnfoldSplatConstantPass return; } op_builder->setInsertionPoint(const_op); - Value scalar = op_builder->create( - const_op->getLoc(), + Value scalar = mhlo::ConstantOp::create( + *op_builder, const_op->getLoc(), DenseElementsAttr::get( RankedTensorType::get(/*shape=*/{}, element_type), splat_elements_attr.getSplatValue())); auto broadcast_dims = DenseIntElementsAttr::get( RankedTensorType::get(/*shape=*/{0}, op_builder->getI64Type()), llvm::SmallVector{}); - mhlo::BroadcastInDimOp broadcast_in_dim_op = - op_builder->create( - const_op->getLoc(), splat_elements_attr.getType(), scalar, - broadcast_dims); + mhlo::BroadcastInDimOp broadcast_in_dim_op = mhlo::BroadcastInDimOp::create( + *op_builder, const_op->getLoc(), splat_elements_attr.getType(), scalar, + broadcast_dims); const_op->replaceAllUsesWith(broadcast_in_dim_op); const_op->erase(); } diff --git a/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/external_buffer.mlir b/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/external_buffer.mlir new file mode 100644 index 00000000000000..987f5a90e374f5 --- /dev/null +++ b/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/external_buffer.mlir @@ -0,0 +1,14 @@ +// RUN: flatbuffer_translate -mlir-to-tflite-flatbuffer %s -o - | flatbuffer_translate --tflite-flatbuffer-to-mlir - -o - | FileCheck %s + +module { + func.func public @main(%arg0: tensor<2x2xf32>) -> tensor<2x2xf32> { + %0 = "tfl.external_const"() <{external_buffer = #tfl.external_buffer}> : () -> tensor<2x2xf32> + %1 = tfl.add %arg0, %0 {fused_activation_function = "NONE"} : tensor<2x2xf32> + return %1 : tensor<2x2xf32> + } +} + +// CHECK-LABEL: @main +// CHECK: %0 = "tfl.external_const"() <{external_buffer = #tfl.external_buffer}> +// CHECK-NEXT: %1 = tfl.add %arg0, %0 {fused_activation_function = "NONE"} : tensor<2x2xf32> +// CHECK-NEXT: return %1 diff --git a/tensorflow/compiler/mlir/lite/tests/mlir2flatbuffer/external_buffer.mlir b/tensorflow/compiler/mlir/lite/tests/mlir2flatbuffer/external_buffer.mlir new file mode 100644 index 00000000000000..09d7e764b1f7a2 --- /dev/null +++ b/tensorflow/compiler/mlir/lite/tests/mlir2flatbuffer/external_buffer.mlir @@ -0,0 +1,34 @@ +// RUN: flatbuffer_translate -mlir-to-tflite-flatbuffer %s -o - | flatbuffer_to_string - | FileCheck %s + +module { + func.func public @main(%arg0: tensor<2x2xf32>) -> tensor<2x2xf32> { + %0 = "tfl.external_const"() <{external_buffer = #tfl.external_buffer}> : () -> tensor<2x2xf32> + %1 = tfl.add %arg0, %0 {fused_activation_function = "NONE"} : tensor<2x2xf32> + return %1 : tensor<2x2xf32> + } +} + +// CHECK: tensors: [ { +// CHECK: shape: [ 2, 2 ], +// CHECK: buffer: 1, +// CHECK: name: "arg0", +// CHECK: has_rank: true +// CHECK: }, { +// CHECK: shape: [ 2, 2 ], +// CHECK: name: "tfl.external_const", +// CHECK: has_rank: true, +// CHECK: external_buffer: 2147483648 +// CHECK: }, { +// CHECK: shape: [ 2, 2 ], +// CHECK: buffer: 2, +// CHECK: name: "tfl.add", +// CHECK: has_rank: true +// CHECK: } ], +// CHECK: external_buffer_groups: [ { +// CHECK: name: "test.bin" +// CHECK: } ], +// CHECK: external_buffers: [ { +// CHECK: id: 2147483648, +// CHECK: length: 13, +// CHECK: packing: "unpacked" +// CHECK: } ] diff --git a/tensorflow/compiler/mlir/lite/transforms/decompose_hybrid_quantization.cc b/tensorflow/compiler/mlir/lite/transforms/decompose_hybrid_quantization.cc index 6b92b5f63ee66f..e04be6148b7b1d 100644 --- a/tensorflow/compiler/mlir/lite/transforms/decompose_hybrid_quantization.cc +++ b/tensorflow/compiler/mlir/lite/transforms/decompose_hybrid_quantization.cc @@ -93,7 +93,7 @@ class DequantizeConverter : public OpRewritePattern { if (QuantizedType::getQuantizedElementType(operand.getType())) { auto newTy = QuantizedType::castToExpressedType(operand.getType()); newOperands.push_back( - rewriter.create(loc, newTy, operand)); + TFL::DequantizeOp::create(rewriter, loc, newTy, operand)); continue; } @@ -109,9 +109,8 @@ class DequantizeConverter : public OpRewritePattern { newResultTys.push_back(resultTy); } - auto newResults = rewriter - .create(loc, newResultTys, newOperands, - op->getAttrDictionary().getValue()) + auto newResults = SrcOp::create(rewriter, loc, newResultTys, newOperands, + op->getAttrDictionary().getValue()) .getOperation() ->getResults(); @@ -120,8 +119,8 @@ class DequantizeConverter : public OpRewritePattern { Value result = newResults[i]; Type resultTy = op->getOpResult(i).getType(); if (QuantizedType::getQuantizedElementType(resultTy)) { - replaceResults.push_back(rewriter.create( - loc, resultTy, result, TypeAttr::get(resultTy))); + replaceResults.push_back(TFL::QuantizeOp::create( + rewriter, loc, resultTy, result, TypeAttr::get(resultTy))); continue; } diff --git a/tensorflow/compiler/mlir/lite/transforms/if_outline.cc b/tensorflow/compiler/mlir/lite/transforms/if_outline.cc index 0e7c03dd32b35f..c45d5f74b8988d 100644 --- a/tensorflow/compiler/mlir/lite/transforms/if_outline.cc +++ b/tensorflow/compiler/mlir/lite/transforms/if_outline.cc @@ -84,7 +84,7 @@ func::FuncOp CreateOutlineFuncAndEraseRegion( type = FunctionType::get(context, types, result_types); // Create outlined function and move region body to it. - auto outlined_func = func_builder.create(loc, name, type); + auto outlined_func = func::FuncOp::create(func_builder, loc, name, type); outlined_func.getBody().takeBody(region); Region& func_region = outlined_func.getBody(); @@ -97,8 +97,8 @@ func::FuncOp CreateOutlineFuncAndEraseRegion( // Replace yield op with return. Operation* yield_op = outlined_func.getBody().front().getTerminator(); OpBuilder return_builder(yield_op); - return_builder.create(yield_op->getLoc(), - yield_op->getOperands()); + func::ReturnOp::create(return_builder, yield_op->getLoc(), + yield_op->getOperands()); yield_op->erase(); SymbolTable(region.getParentOfType()).insert(outlined_func); @@ -121,8 +121,8 @@ void ReplaceRegionWithCall(StringRef name, Region& region, new_operands.push_back(block->addArgument(t, loc)); } new_operands.append(extern_values.begin(), extern_values.end()); - auto call = b.create(loc, func, new_operands); - b.create(loc, call.getResults()); + auto call = func::CallOp::create(b, loc, func, new_operands); + YieldOp::create(b, loc, call.getResults()); } void IfOutlinePass::OutlineIf(IfOp if_op) { diff --git a/tensorflow/compiler/mlir/lite/transforms/insert_call_once_op.cc b/tensorflow/compiler/mlir/lite/transforms/insert_call_once_op.cc index 4e1fe8e012211a..7a85d60b51d6eb 100644 --- a/tensorflow/compiler/mlir/lite/transforms/insert_call_once_op.cc +++ b/tensorflow/compiler/mlir/lite/transforms/insert_call_once_op.cc @@ -51,7 +51,7 @@ void InsertCallOnceOpFromSessionInitializerPass::runOnOperation() { OpBuilder builder(func.getContext()); builder.setInsertionPointToStart(&func.getBlocks().front()); - builder.create(func.getLoc(), init_func_op.getName()); + TFL::CallOnceOp::create(builder, func.getLoc(), init_func_op.getName()); } } } diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td b/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td index 36091686021e2c..26c5496ff3b08b 100644 --- a/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td +++ b/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td @@ -75,14 +75,14 @@ def CreateInt32ConstOrCast : NativeCodeCall< // Creates an int32 constant op from an integer attribute $0. def CreateInt32ConstOpFromIntAttr - : NativeCodeCall<"$_builder.create($_loc, DenseElementsAttr::get(RankedTensorType::get({}, $_builder.getI32Type()), {static_cast(llvm::cast($0).getInt())}))">; + : NativeCodeCall<"TF::ConstOp::create($_builder, $_loc, DenseElementsAttr::get(RankedTensorType::get({}, $_builder.getI32Type()), {static_cast(llvm::cast($0).getInt())}))">; //===----------------------------------------------------------------------===// // Nullary ops patterns. //===----------------------------------------------------------------------===// def createConstOp - : NativeCodeCall<"$_builder.create($_loc, $0.getType(), $1)">; + : NativeCodeCall<"ConstOp::create($_builder, $_loc, $0.getType(), $1)">; def LegalizeTFConstToTFLConst: Pat<(TF_ConstOp:$res ElementsAttr:$value), (createConstOp $res, $value)>; diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_tensorlist.td b/tensorflow/compiler/mlir/lite/transforms/legalize_tensorlist.td index 9894e7df7587f9..ce9b6af564d2a4 100644 --- a/tensorflow/compiler/mlir/lite/transforms/legalize_tensorlist.td +++ b/tensorflow/compiler/mlir/lite/transforms/legalize_tensorlist.td @@ -20,7 +20,7 @@ include "mlir/IR/OpBase.td" include "mlir/Dialect/Arith/IR/ArithOps.td" def ConstDenseElementsI32ZeroAttr - : NativeCodeCall<"$_builder.create($_loc, DenseElementsAttr::get(RankedTensorType::get({}, $_builder.getI32Type()), {0}))">; + : NativeCodeCall<"TFL::ConstOp::create($_builder, $_loc, DenseElementsAttr::get(RankedTensorType::get({}, $_builder.getI32Type()), {0}))">; def Size1InputRange : NativeCodeCall< "SmallVector{$0}">; diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize_batch_matmul_pass.cc b/tensorflow/compiler/mlir/lite/transforms/optimize_batch_matmul_pass.cc index bfeea6d6e6373a..668493eca931e7 100644 --- a/tensorflow/compiler/mlir/lite/transforms/optimize_batch_matmul_pass.cc +++ b/tensorflow/compiler/mlir/lite/transforms/optimize_batch_matmul_pass.cc @@ -135,8 +135,8 @@ struct ConvertBatchMatMulOp2FullyConnectedOp_Rank2ConstantRhs // mapped to X and Z dimension. std::iter_swap(permute.begin() + input_rank - 1, permute.begin() + input_rank - 2); - auto permutation_tensor_op = rewriter.create( - bmm_op->getLoc(), permuation_tensor_type, + auto permutation_tensor_op = arith::ConstantOp::create( + rewriter, bmm_op->getLoc(), permuation_tensor_type, DenseElementsAttr::get(permuation_tensor_type, permute)); auto input_shape = input_type.getShape(); @@ -181,9 +181,8 @@ struct ConvertBatchMatMulOp2FullyConnectedOp_Rank2ConstantRhs RankedTensorType::get(permuted_shape, input_type.getElementType()); } - return rewriter.create( - bmm_op->getLoc(), output_type, input, - permutation_tensor_op.getResult()); + return TFL::TransposeOp::create(rewriter, bmm_op->getLoc(), output_type, + input, permutation_tensor_op.getResult()); }; Value input_lhs = bmm_op.getX(); @@ -198,10 +197,11 @@ struct ConvertBatchMatMulOp2FullyConnectedOp_Rank2ConstantRhs !bmm_op.getAdjY() ? create_z_x_transpose_op(input_rhs) : input_rhs; Type output_type = bmm_op.getResult().getType(); - auto no_input = rewriter.create( - bmm_op->getLoc(), rewriter.getNoneType(), rewriter.getUnitAttr()); - auto fc_op = rewriter.create( - bmm_op->getLoc(), ArrayRef{output_type}, + auto no_input = + TFL::NoValueOp::create(rewriter, bmm_op->getLoc(), + rewriter.getNoneType(), rewriter.getUnitAttr()); + auto fc_op = TFL::FullyConnectedOp::create( + rewriter, bmm_op->getLoc(), ArrayRef{output_type}, /*input=*/output_lhs, /*filter=*/output_rhs, /*bias=*/no_input, /*fused_activation_function=*/rewriter.getStringAttr("NONE"), /*weights_format=*/rewriter.getStringAttr("DEFAULT"), @@ -257,13 +257,14 @@ struct ConvertBatchMatMulOpToReduceSum cY = rhs_shape.size() - 1; } - auto reduce_dim_op = rewriter.create( - bmm_op->getLoc(), + auto reduce_dim_op = TFL::ConstOp::create( + rewriter, bmm_op->getLoc(), DenseIntElementsAttr::get( RankedTensorType::get({1}, rewriter.getI32Type()), {cY})); - auto sum_op = rewriter.create( - bmm_op->getLoc(), bmm_op.getType(), bmm_op.getY(), reduce_dim_op, - /*keep_dims=*/rewriter.getBoolAttr(true)); + auto sum_op = + TFL::SumOp::create(rewriter, bmm_op->getLoc(), bmm_op.getType(), + bmm_op.getY(), reduce_dim_op, + /*keep_dims=*/rewriter.getBoolAttr(true)); rewriter.replaceOp(bmm_op, sum_op); return success(); }; @@ -368,19 +369,21 @@ struct FuseRhsTransposeIntoBatchMatMulOp new_reshape_input_shape.push_back( rhs_contracting_dimensions.SizesArray().front()); - Value new_reshape_shape_value = rewriter.create( - bmm_op->getLoc(), + Value new_reshape_shape_value = arith::ConstantOp::create( + rewriter, bmm_op->getLoc(), GetI32ElementsAttr(new_reshape_input_shape, &rewriter)); - auto new_reshape_value = rewriter.create( - bmm_op->getLoc(), transpose_op.getInput(), new_reshape_shape_value); + auto new_reshape_value = TFL::ReshapeOp::create(rewriter, bmm_op->getLoc(), + transpose_op.getInput(), + new_reshape_shape_value); // Replace the BatchMatMulOp with a FullyConnectedOp, if the RHS of BMM has // no broadcasting dimensions. I.e. RHS of BMM is of Rank 2. if (rhs_dimensions_info.batch_dimensions().AxesArray().empty()) { - auto no_input = rewriter.create( - bmm_op->getLoc(), rewriter.getNoneType(), rewriter.getUnitAttr()); - auto fc_op = rewriter.create( - bmm_op->getLoc(), ArrayRef{bmm_op.getType()}, + auto no_input = TFL::NoValueOp::create(rewriter, bmm_op->getLoc(), + rewriter.getNoneType(), + rewriter.getUnitAttr()); + auto fc_op = TFL::FullyConnectedOp::create( + rewriter, bmm_op->getLoc(), ArrayRef{bmm_op.getType()}, /*input=*/bmm_op.getX(), /*filter=*/new_reshape_value, /*bias=*/no_input, /*fused_activation_function=*/rewriter.getStringAttr("NONE"), @@ -391,9 +394,10 @@ struct FuseRhsTransposeIntoBatchMatMulOp } else { // Replace the BatchMatMulOp with a BatchMatMulOp with adj_y = true and // transpose fused into RHS. - auto bmm_op_with_adj_y = rewriter.create( - bmm_op->getLoc(), bmm_op.getType(), bmm_op.getX(), new_reshape_value, - bmm_op.getAdjX(), /*adj_y=*/true, mlir::BoolAttr()); + auto bmm_op_with_adj_y = TFL::BatchMatMulOp::create( + rewriter, bmm_op->getLoc(), bmm_op.getType(), bmm_op.getX(), + new_reshape_value, bmm_op.getAdjX(), /*adj_y=*/true, + mlir::BoolAttr()); rewriter.replaceOp(bmm_op, {bmm_op_with_adj_y.getResult()}); } diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize_broadcast_like_pass.cc b/tensorflow/compiler/mlir/lite/transforms/optimize_broadcast_like_pass.cc index aed2946db17ba3..21b1963998d0d5 100644 --- a/tensorflow/compiler/mlir/lite/transforms/optimize_broadcast_like_pass.cc +++ b/tensorflow/compiler/mlir/lite/transforms/optimize_broadcast_like_pass.cc @@ -131,8 +131,9 @@ Value PrepareBroadcastLikeOpInput(Operation* op, PatternRewriter& rewriter) { RankedTensorType::get({}, elements_attr.getType().getElementType()), elements_attr.getSplatValue()); - return rewriter.create( - op->getLoc(), scalar_elements_attr.getType(), scalar_elements_attr); + return arith::ConstantOp::create(rewriter, op->getLoc(), + scalar_elements_attr.getType(), + scalar_elements_attr); } return nullptr; } @@ -380,10 +381,10 @@ LogicalResult ReorderBroadcastToCast::matchAndRewrite( : static_cast(UnrankedTensorType::get( old_cast_op_output_type.getElementType())); - auto new_cast_op = rewriter.create( - fused_loc, new_cast_op_output_type, input_value); - auto new_broadcast_to_op = rewriter.create( - fused_loc, old_cast_op_output_type, new_cast_op.getOutput(), + auto new_cast_op = TFL::CastOp::create(rewriter, fused_loc, + new_cast_op_output_type, input_value); + auto new_broadcast_to_op = TFL::BroadcastToOp::create( + rewriter, fused_loc, old_cast_op_output_type, new_cast_op.getOutput(), broadcast_to_op.getShape()); rewriter.replaceOp(cast_op, new_broadcast_to_op.getOutput()); diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize_pass.cc b/tensorflow/compiler/mlir/lite/transforms/optimize_pass.cc index ec6e2b5902503f..062d9c1e712de2 100644 --- a/tensorflow/compiler/mlir/lite/transforms/optimize_pass.cc +++ b/tensorflow/compiler/mlir/lite/transforms/optimize_pass.cc @@ -502,8 +502,8 @@ Value GetBiasMultiplier(OpBuilder& builder, Value binary_op, (llvm::isa(binary_op.getDefiningOp()) ? 1.0 : -1.0); Attribute constant_attr = FloatAttr::get(element_type, multiplier); - return builder.create( - binary_op.getLoc(), + return arith::ConstantOp::create( + builder, binary_op.getLoc(), DenseFPElementsAttr::get(RankedTensorType::get({}, element_type), constant_attr)); } @@ -677,10 +677,10 @@ Value ReshapeValueDroppingLastDim(OpBuilder& builder, Value value) { } else { new_shape.push_back(-1); } - return builder.create( - value.getLoc(), value, - builder.create( - value.getLoc(), + return ReshapeOp::create( + builder, value.getLoc(), value, + arith::ConstantOp::create( + builder, value.getLoc(), DenseIntElementsAttr::get( RankedTensorType::get(type.getRank() - 1, builder.getI32Type()), new_shape))); @@ -754,9 +754,8 @@ Value Get1DShapeValue(OpBuilder& builder, Value value) { } auto output_type = RankedTensorType::get({1}, builder.getI32Type()); const int num_elements = type.getNumElements(); - return builder.create( - value.getLoc(), output_type, - DenseIntElementsAttr::get(output_type, num_elements)); + return ConstOp::create(builder, value.getLoc(), output_type, + DenseIntElementsAttr::get(output_type, num_elements)); } Type GetEmbeddingLookupShape(Value lookup, Value value) { @@ -780,8 +779,8 @@ mlir::Value GetFcOutput(OpBuilder* builder, StringAttr fused_activation_function, StringAttr weights_format, BoolAttr keep_num_dims, BoolAttr asymmetric_quantize_inputs) { - auto fc_op = builder->create( - result[0].getLoc(), result.getTypes(), input, filter, bias, + auto fc_op = FullyConnectedOp::create( + *builder, result[0].getLoc(), result.getTypes(), input, filter, bias, fused_activation_function, weights_format, keep_num_dims, asymmetric_quantize_inputs); return fc_op->getResult(0); @@ -973,13 +972,13 @@ struct SqueezeReshapesAroundBroadcastOp .drop_back(num_trailing_broadcast_dims) .drop_front(num_leading_broadcast_dims)}; - Value new_reshape_shape_value = rewriter.create( - inner_reshape_op->getLoc(), + Value new_reshape_shape_value = arith::ConstantOp::create( + rewriter, inner_reshape_op->getLoc(), GetI32ElementsAttr(new_reshape_shape_i32, &rewriter)); - auto new_inner_reshape_op = rewriter.create( - inner_reshape_op->getLoc(), inner_reshape_input, - new_reshape_shape_value); + auto new_inner_reshape_op = + TFL::ReshapeOp::create(rewriter, inner_reshape_op->getLoc(), + inner_reshape_input, new_reshape_shape_value); // Create a new reshape_op to replace the old inner reshape_op. rewriter.replaceOp(inner_reshape_op, new_inner_reshape_op.getResult()); @@ -990,11 +989,12 @@ struct SqueezeReshapesAroundBroadcastOp .drop_back(num_trailing_broadcast_dims) .drop_front(num_leading_broadcast_dims)}; - Value new_broadcast_shape_value = rewriter.create( - loc, GetI64ElementsAttr(new_broadcast_shape, &rewriter)); + Value new_broadcast_shape_value = arith::ConstantOp::create( + rewriter, loc, GetI64ElementsAttr(new_broadcast_shape, &rewriter)); - auto new_broadcast_to_op = rewriter.create( - loc, RankedTensorType::get(new_broadcast_shape, rewriter.getF32Type()), + auto new_broadcast_to_op = TFL::BroadcastToOp::create( + rewriter, loc, + RankedTensorType::get(new_broadcast_shape, rewriter.getF32Type()), new_inner_reshape_op.getOutput(), new_broadcast_shape_value); // Create a new broadcast_op to replace the old broadcast_op. @@ -1055,18 +1055,19 @@ struct FuseAddAndStridedSlice : public OpRewritePattern { added_value.reshape(RankedTensorType::get( {num_dims}, mlir::cast(added_value.getType()).getElementType())); - ::mlir::arith::ConstantOp new_end = rewriter.create( - strided_slice_op.getEnd().getLoc(), new_added_value); + ::mlir::arith::ConstantOp new_end = arith::ConstantOp::create( + rewriter, strided_slice_op.getEnd().getLoc(), new_added_value); if (strided_slice_op.getBeginMask() != 0) return failure(); if (strided_slice_op.getEndMask() != 0) return failure(); if (strided_slice_op.getEllipsisMask() != 0) return failure(); mlir::TFL::StridedSliceOp new_strided_slice_op = - rewriter.create( - strided_slice_op.getLoc(), strided_slice_op.getOutput().getType(), - strided_slice_op.getInput(), strided_slice_op.getBegin(), new_end, - strided_slice_op.getStrides(), strided_slice_op.getBeginMask(), - strided_slice_op.getEndMask(), strided_slice_op.getEllipsisMask(), + TFL::StridedSliceOp::create( + rewriter, strided_slice_op.getLoc(), + strided_slice_op.getOutput().getType(), strided_slice_op.getInput(), + strided_slice_op.getBegin(), new_end, strided_slice_op.getStrides(), + strided_slice_op.getBeginMask(), strided_slice_op.getEndMask(), + strided_slice_op.getEllipsisMask(), strided_slice_op.getNewAxisMask(), strided_slice_op.getShrinkAxisMask(), /*offset=*/true); @@ -1186,24 +1187,26 @@ struct Convert2DUpscalingToResizeNearestNeighor SmallVector reshape_shape_in_int64( {1, image_size, image_size, feature_size}); - auto reshape_shape_const_op = rewriter.create( - gather_nd_first->getLoc(), - GetI32ElementsAttr(reshape_shape, &rewriter)); + auto reshape_shape_const_op = + TFL::ConstOp::create(rewriter, gather_nd_first->getLoc(), + GetI32ElementsAttr(reshape_shape, &rewriter)); - auto reshape_op = rewriter.create( - gather_nd_first->getLoc(), + auto reshape_op = TFL::ReshapeOp::create( + rewriter, gather_nd_first->getLoc(), tensorflow::GetTypeFromTFTensorShape(reshape_shape_in_int64, result_type.getElementType()), params_value, reshape_shape_const_op.getResult()); // Add TFL::resize_nearest_neighor op for 2x upscaling. SmallVector size_vec = {image_size * 2, image_size * 2}; - auto size_const_op = rewriter.create( - gather_nd_first->getLoc(), GetI32ElementsAttr(size_vec, &rewriter)); + auto size_const_op = + TFL::ConstOp::create(rewriter, gather_nd_first->getLoc(), + GetI32ElementsAttr(size_vec, &rewriter)); - auto resize = rewriter.create( - gather_nd_first->getLoc(), transpose_second.getResult().getType(), - reshape_op.getResult(), size_const_op.getResult(), false, false); + auto resize = TFL::ResizeNearestNeighborOp::create( + rewriter, gather_nd_first->getLoc(), + transpose_second.getResult().getType(), reshape_op.getResult(), + size_const_op.getResult(), false, false); rewriter.replaceOp(transpose_second, resize.getResult()); return success(); @@ -1233,13 +1236,13 @@ static std::optional GetAs1DValue(PatternRewriter& rewriter, Value value, RankedTensorType::get({num_channels}, type.getElementType()); auto splat_attr = DenseElementsAttr::get(splat_type, attr.getSplatValue()); - return rewriter.create(value.getLoc(), splat_attr); + return arith::ConstantOp::create(rewriter, value.getLoc(), splat_attr); } if (HasOneTailUnitDimension(attr) && attr.getNumElements() == num_channels) { auto flattened = FlattenTo1D(attr); - return rewriter.create(value.getLoc(), flattened); + return arith::ConstantOp::create(rewriter, value.getLoc(), flattened); } } @@ -1259,7 +1262,7 @@ static std::optional GetBiasIn1D(PatternRewriter& rewriter, Value bias, RankedTensorType type = RankedTensorType::get({num_channels}, fallback_element_type); auto attr = rewriter.getZeroAttr(type); - return rewriter.create(bias.getLoc(), type, attr); + return arith::ConstantOp::create(rewriter, bias.getLoc(), type, attr); } auto bias_type = mlir::dyn_cast(bias.getType()); @@ -1377,34 +1380,34 @@ struct FuseFullyConnectedAndAdd : public OpRewritePattern { } auto new_bias = - rewriter - .create(add_op.getLoc(), bias_1d.value(), add_rhs_1d.value(), - rewriter.getStringAttr("NONE")) + AddOp::create(rewriter, add_op.getLoc(), bias_1d.value(), + add_rhs_1d.value(), rewriter.getStringAttr("NONE")) .getOutput(); mlir::Value out = - rewriter - .create( - mlir::FusedLoc::get(fc_op.getContext(), - {fc_op.getLoc(), add_op.getLoc()}), - fc_output_type, - /*input=*/fc_op.getInput(), - /*filter=*/filter, - /*bias=*/new_bias, - /*fused_activation_function=*/ - rewriter.getStringAttr(add_op.getFusedActivationFunction()), - /*weights_format=*/ - rewriter.getStringAttr(fc_op.getWeightsFormat()), - /*keep_num_dims=*/rewriter.getBoolAttr(fc_op.getKeepNumDims()), - /*asymmetric_quantize_inputs=*/ - fc_op.getAsymmetricQuantizeInputsAttr()) + TFL::FullyConnectedOp::create( + rewriter, + mlir::FusedLoc::get(fc_op.getContext(), + {fc_op.getLoc(), add_op.getLoc()}), + fc_output_type, + /*input=*/fc_op.getInput(), + /*filter=*/filter, + /*bias=*/new_bias, + /*fused_activation_function=*/ + rewriter.getStringAttr(add_op.getFusedActivationFunction()), + /*weights_format=*/ + rewriter.getStringAttr(fc_op.getWeightsFormat()), + /*keep_num_dims=*/rewriter.getBoolAttr(fc_op.getKeepNumDims()), + /*asymmetric_quantize_inputs=*/ + fc_op.getAsymmetricQuantizeInputsAttr()) .getOutput()[0]; if (fc_output_type.getShape() != add_output_type.getShape()) { - auto target_shape = rewriter.create( - add_op.getLoc(), rewriter.getI32TensorAttr(llvm::SmallVector( - add_output_type.getShape()))); - out = rewriter.create(add_op.getLoc(), add_output_type, out, - target_shape); + auto target_shape = arith::ConstantOp::create( + rewriter, add_op.getLoc(), + rewriter.getI32TensorAttr( + llvm::SmallVector(add_output_type.getShape()))); + out = ReshapeOp::create(rewriter, add_op.getLoc(), add_output_type, out, + target_shape); } rewriter.replaceOp(add_op, out); @@ -1471,8 +1474,8 @@ struct FuseAddAndFullyConnected return failure(); } - auto new_bias = rewriter.create( - fc_op.getLoc(), old_bias.getType(), + auto new_bias = TFL::FullyConnectedOp::create( + rewriter, fc_op.getLoc(), old_bias.getType(), /*input=*/add_op.getRhs(), /*filter=*/fc_op.getFilter(), /*bias=*/old_bias, @@ -1482,7 +1485,8 @@ struct FuseAddAndFullyConnected /*asymmetric_quantize_inputs=*/fc_op.getAsymmetricQuantizeInputsAttr()); // Create the updated FC. - auto new_fc = rewriter.create( + auto new_fc = TFL::FullyConnectedOp::create( + rewriter, FusedLoc::get(add_op.getContext(), {add_op.getLoc(), fc_op.getLoc()}), fc_op.getOutput().getTypes(), /*input=*/add_op.getLhs(), @@ -1557,14 +1561,14 @@ struct FuseMulAndFullyConnected auto location = FusedLoc::get(mul_op.getContext(), {mul_op.getLoc(), fc_op.getLoc()}); - auto new_filter = rewriter.create( - location, + auto new_filter = TFL::MulOp::create( + rewriter, location, /*lhs=*/fc_op.getFilter(), /*rhs=*/mul_op.getRhs(), /*fused_activation_function=*/rewriter.getStringAttr("NONE")); // Create the updated FC. - auto new_fc = rewriter.create( - location, fc_op.getOutput().getTypes(), + auto new_fc = TFL::FullyConnectedOp::create( + rewriter, location, fc_op.getOutput().getTypes(), /*input=*/mul_op.getLhs(), /*filter=*/new_filter, /*bias=*/fc_op.getBias(), @@ -1597,7 +1601,8 @@ struct FuseFullyConnectedAndReluX : public OpRewritePattern { rewriter.getStringAttr(fully_connected_op.getWeightsFormat()); auto new_keep_num_dims = rewriter.getBoolAttr(fully_connected_op.getKeepNumDims()); - auto fc = rewriter.create( + auto fc = FullyConnectedOp::create( + rewriter, FusedLoc::get(relu_op.getContext(), {fully_connected_op.getLoc(), relu_op.getLoc()}), relu_op.getType(), /*input=*/fully_connected_op.getInput(), @@ -1674,7 +1679,7 @@ struct FuseFullyConnectedAndMul : public OpRewritePattern { } auto new_op = - rewriter.create(mul_op.getLoc(), new_type, new_cst); + arith::ConstantOp::create(rewriter, mul_op.getLoc(), new_type, new_cst); Value new_const_val = new_op.getResult(); // Rewrite. Since the folder of TFL::MulOp couldn't broadcast the operands, @@ -1689,15 +1694,16 @@ struct FuseFullyConnectedAndMul : public OpRewritePattern { if (size > (1 << 30)) return failure(); } auto new_filter = - rewriter.create(mul_op.getLoc(), filter, new_const_val) + TF::MulOp::create(rewriter, mul_op.getLoc(), filter, new_const_val) .getZ(); // If bias isn't None, it needs to be multiplied as well. if (!mlir::isa(bias.getType())) { - bias = rewriter.create(mul_op.getLoc(), bias, constant_val) + bias = TF::MulOp::create(rewriter, mul_op.getLoc(), bias, constant_val) .getZ(); } - auto fc = rewriter.create( + auto fc = TFL::FullyConnectedOp::create( + rewriter, FusedLoc::get(fc_op.getContext(), {fc_op.getLoc(), mul_op.getLoc()}), mul_op.getType(), /*input=*/fc_op.getInput(), @@ -1848,13 +1854,13 @@ struct FuseAffinOpAndMulWithQDQs : public OpRewritePattern { DenseElementsAttr broadcasted_gamma_attr = ExpandTo4DForConv(gamma_cst, filter_output_dim); auto broadcasted_gamma = - rewriter.create(loc, broadcasted_gamma_attr); + ConstOp::create(rewriter, loc, broadcasted_gamma_attr); // Inject a mul between the filter constant and the quantize op. - auto new_filter = rewriter - .create(loc, filter, broadcasted_gamma, - rewriter.getStringAttr("NONE")) - .getResult(); + auto new_filter = + TFL::MulOp::create(rewriter, loc, filter, broadcasted_gamma, + rewriter.getStringAttr("NONE")) + .getResult(); // Update the scale in the quantize op. auto new_qtype = RescaleQtype(q_op.getQtype(), gamma_cst); if (!new_qtype) { @@ -1869,11 +1875,11 @@ struct FuseAffinOpAndMulWithQDQs : public OpRewritePattern { auto squeezed_gamma = FlattenTo1D(gamma_cst); auto squeezed_gamma_type = squeezed_gamma.getType(); - auto squeezed_gamma_op = rewriter.create( - affine_op.getLoc(), squeezed_gamma_type, squeezed_gamma); + auto squeezed_gamma_op = arith::ConstantOp::create( + rewriter, affine_op.getLoc(), squeezed_gamma_type, squeezed_gamma); - auto new_bias = rewriter.create( - loc, bias, squeezed_gamma_op, rewriter.getStringAttr("NONE")); + auto new_bias = TFL::MulOp::create(rewriter, loc, bias, squeezed_gamma_op, + rewriter.getStringAttr("NONE")); affine_op.getOperation()->replaceUsesOfWith(bias, new_bias); } @@ -1977,7 +1983,7 @@ struct FuseBinaryOpToFollowingAffineOp : public OpRewritePattern { } auto new_bias = DenseFPElementsAttr::get(new_bias_type, new_bias_values); auto new_bias_op = - rewriter.create(fc_op.getLoc(), new_bias_type, new_bias); + ConstOp::create(rewriter, fc_op.getLoc(), new_bias_type, new_bias); fc_op.setOperand(0, binary_op->getOperand(0)); fc_op.setOperand(2, new_bias_op); } else if (llvm::isa(binary_op)) { @@ -1992,8 +1998,8 @@ struct FuseBinaryOpToFollowingAffineOp : public OpRewritePattern { }); // We recreate the constant op in case it is shared by the other ops. This // might increase the model size. - auto new_filter_op = rewriter.create( - fc_op.getLoc(), filter.getType(), new_filter); + auto new_filter_op = ConstOp::create(rewriter, fc_op.getLoc(), + filter.getType(), new_filter); fc_op.setOperand(0, binary_op->getOperand(0)); if (fc_op.getFilter() != filter) { // This filter goes through quantize and dequantize ops. Then we just @@ -2186,8 +2192,9 @@ struct FuseUnpackAndConcatToReshape new_shape_array_i32.push_back( ShapedType::isDynamic(size) ? -1 : static_cast(size)); } - auto new_shape = rewriter.create( - concat_op.getLoc(), GetI32ElementsAttr(new_shape_array_i32, &rewriter)); + auto new_shape = TFL::ConstOp::create( + rewriter, concat_op.getLoc(), + GetI32ElementsAttr(new_shape_array_i32, &rewriter)); rewriter.replaceOpWithNewOp( concat_op, output_type, unpack_op.getInput(), new_shape); @@ -2273,8 +2280,8 @@ struct OptimizeTopK : public OpRewritePattern { auto k = !values.use_empty() ? k_values : k_indices; // Build scalar tensor k. auto k_ty = mlir::RankedTensorType::get({}, rewriter.getIntegerType(32)); - Value k_cst = rewriter.create( - op.getLoc(), DenseElementsAttr::get(k_ty, k)); + Value k_cst = TFL::ConstOp::create(rewriter, op.getLoc(), + DenseElementsAttr::get(k_ty, k)); // Compute new result types. auto values_ty = mlir::dyn_cast(values.getType()); auto indices_ty = mlir::dyn_cast(indices.getType()); @@ -2287,8 +2294,9 @@ struct OptimizeTopK : public OpRewritePattern { mlir::RankedTensorType::get(shape, values_ty.getElementType()); auto new_indices_ty = mlir::RankedTensorType::get(shape, indices_ty.getElementType()); - TFL::TopKV2Op top_k_op = rewriter.create( - op.getLoc(), new_values_ty, new_indices_ty, op->getOperand(0), k_cst); + TFL::TopKV2Op top_k_op = + TFL::TopKV2Op::create(rewriter, op.getLoc(), new_values_ty, + new_indices_ty, op->getOperand(0), k_cst); // Remove original ops (topk, Slice, Slice). if (!values.use_empty()) { @@ -2376,10 +2384,12 @@ struct FuseReshapeAndTransposeAroundBatchMatmul static_cast(std::accumulate( transpose_input.getType().getShape().begin() + 2, transpose_input.getType().getShape().end(), 1, std::multiplies()))}; - auto shape_constant = rewriter.create( - batch_matmul.getLoc(), GetI32ElementsAttr(new_shape, &rewriter)); - auto reshaped_input = rewriter.create( - batch_matmul.getLoc(), transpose_op.getInput(), shape_constant); + auto shape_constant = + ConstOp::create(rewriter, batch_matmul.getLoc(), + GetI32ElementsAttr(new_shape, &rewriter)); + auto reshaped_input = + ReshapeOp::create(rewriter, batch_matmul.getLoc(), + transpose_op.getInput(), shape_constant); rewriter.replaceOpWithNewOp( op, op.getType(), reshaped_input, batch_matmul.getX(), /*adj_x=*/false, /*adj_y=*/!batch_matmul.getAdjX(), @@ -2438,10 +2448,10 @@ struct FuseTransposeReshapeIntoBatchMatmul reshape_op.getType().getShape().drop_front().begin(), reshape_op.getType().getShape().drop_front().end()); new_shape.push_back(reshape_op.getType().getDimSize(0)); - auto shape_constant = rewriter.create( - op.getLoc(), GetI32ElementsAttr(new_shape, &rewriter)); - auto new_reshape = rewriter.create( - op.getLoc(), transpose_op.getInput(), shape_constant); + auto shape_constant = ConstOp::create( + rewriter, op.getLoc(), GetI32ElementsAttr(new_shape, &rewriter)); + auto new_reshape = ReshapeOp::create( + rewriter, op.getLoc(), transpose_op.getInput(), shape_constant); rewriter.replaceOpWithNewOp( op, op.getType(), op.getX(), new_reshape, op.getAdjX(), !op.getAdjY(), op.getAsymmetricQuantizeInputsAttr()); @@ -2647,8 +2657,8 @@ struct UndoBroadcastFullyConnectedBiasAddWithQDQs auto new_bias = FlattenTo1D(bias_op.getValueAttr()); auto new_bias_type = new_bias.getType(); - auto new_bias_op = rewriter.create( - bias_op.getLoc(), new_bias_type, new_bias); + auto new_bias_op = arith::ConstantOp::create(rewriter, bias_op.getLoc(), + new_bias_type, new_bias); // Update QuantizeOp with the new bias and its output shape q_op.setOperand(new_bias_op); @@ -2717,10 +2727,11 @@ struct MoveReshapeAfterFullyConnected new_input_shape.pop_back(); new_input_shape.push_back(input_ty.getShape().back()); - auto reshape_before = rewriter.create( - fc.getLoc(), fc.getInput(), - rewriter.create( - fc->getLoc(), GetI32ElementsAttr(new_input_shape, &rewriter))); + auto reshape_before = TFL::ReshapeOp::create( + rewriter, fc.getLoc(), fc.getInput(), + arith::ConstantOp::create( + rewriter, fc->getLoc(), + GetI32ElementsAttr(new_input_shape, &rewriter))); rewriter.replaceOpWithNewOp( reshape, @@ -2864,16 +2875,16 @@ struct PushTransposeThroughSqueeze : public RewritePattern { transpose.getInput().getType().getDimSize(i)); } } - auto new_squeeze = rewriter.create( - squeeze->getLoc(), + auto new_squeeze = TFL::SqueezeOp::create( + rewriter, squeeze->getLoc(), mlir::RankedTensorType::get(new_squeeze_shape, squeeze.getType().getElementType()), transpose.getInput(), rewriter.getI32ArrayAttr(new_squeeze_dims)); - auto new_transpose = rewriter.create( - squeeze->getLoc(), squeeze.getType(), new_squeeze, - rewriter.create( - squeeze->getLoc(), GetI32ElementsAttr(new_perm, &rewriter))); + auto new_transpose = TFL::TransposeOp::create( + rewriter, squeeze->getLoc(), squeeze.getType(), new_squeeze, + arith::ConstantOp::create(rewriter, squeeze->getLoc(), + GetI32ElementsAttr(new_perm, &rewriter))); rewriter.replaceOp(squeeze, new_transpose); return success(); @@ -3000,17 +3011,18 @@ struct ReorderTransposeReshapeTranspose mlir::dyn_cast_or_null(reshape.getType()); if (!reshape_type) return failure(); - auto new_reshape_shape_const = rewriter.create( - reshape.getLoc(), GetI32ElementsAttr(new_reshape_shape, &rewriter)); + auto new_reshape_shape_const = arith::ConstantOp::create( + rewriter, reshape.getLoc(), + GetI32ElementsAttr(new_reshape_shape, &rewriter)); - auto new_inner_reshape = rewriter.create( - reshape.getLoc(), + auto new_inner_reshape = TFL::ReshapeOp::create( + rewriter, reshape.getLoc(), RankedTensorType::get(new_reshape_shape, reshape_type.getElementType()), input, new_reshape_shape_const.getResult()); - auto new_inner_tpose = rewriter.create( - inner_tpose.getLoc(), reshape_type, new_inner_reshape, - rewriter.create( - inner_tpose.getLoc(), + auto new_inner_tpose = TFL::TransposeOp::create( + rewriter, inner_tpose.getLoc(), reshape_type, new_inner_reshape, + arith::ConstantOp::create( + rewriter, inner_tpose.getLoc(), GetI32ElementsAttr(new_inner_perm, &rewriter))); rewriter.replaceOp(reshape, new_inner_tpose); @@ -3079,8 +3091,8 @@ struct FullyConnectedSwapOperandsWhenLHSIsConst RankedTensorType intermediate_type = RankedTensorType::get({O, B}, element_type); - auto new_fc = rewriter.create( - loc, + auto new_fc = TFL::FullyConnectedOp::create( + rewriter, loc, /*resultTypes=*/intermediate_type, /*input=*/filter, // Original Filter V[O, I] /*filter=*/input, // Original Input C[B, I] @@ -3096,10 +3108,11 @@ struct FullyConnectedSwapOperandsWhenLHSIsConst RankedTensorType final_shape_type = RankedTensorType::get({B, O}, element_type); - Value transposed_result = rewriter.create( - loc, final_shape_type, new_fc.getResult(0), - rewriter.create( - loc, GetI32ElementsAttr(ArrayRef({1, 0}), &rewriter))); + Value transposed_result = TFL::TransposeOp::create( + rewriter, loc, final_shape_type, new_fc.getResult(0), + arith::ConstantOp::create( + rewriter, loc, + GetI32ElementsAttr(ArrayRef({1, 0}), &rewriter))); rewriter.replaceOp(fc, transposed_result); diff --git a/tensorflow/compiler/mlir/lite/transforms/pin_ops_with_side_effects.cc b/tensorflow/compiler/mlir/lite/transforms/pin_ops_with_side_effects.cc index 7baa0136f1c33c..b93422d3812f6c 100644 --- a/tensorflow/compiler/mlir/lite/transforms/pin_ops_with_side_effects.cc +++ b/tensorflow/compiler/mlir/lite/transforms/pin_ops_with_side_effects.cc @@ -140,15 +140,15 @@ void PinOpsWithSideEffectsPass::runOnOperation() { // Wrap all side-effect producing/dependent operations in a ControlNodeOp. builder.setInsertionPoint(op); Location loc = op->getLoc(); - auto outer_op = builder.create( - loc, op->getResultTypes(), ControlType::get(op->getContext()), - control_tokens); + auto outer_op = ControlNodeOp::create(builder, loc, op->getResultTypes(), + ControlType::get(op->getContext()), + control_tokens); Region region; Block *new_block = new Block; region.push_back(new_block); builder.setInsertionPointToEnd(®ion.front()); Operation *inner_op = builder.clone(*op); - builder.create(loc, inner_op->getResults()); + YieldOp::create(builder, loc, inner_op->getResults()); outer_op.getBody().takeBody(region); // Careful: We can't use outer_op.getResults(), because that also includes // the control token. diff --git a/tensorflow/compiler/mlir/lite/transforms/post_quantize.cc b/tensorflow/compiler/mlir/lite/transforms/post_quantize.cc index 2538cc423cdf1e..0cf34df94faf6c 100644 --- a/tensorflow/compiler/mlir/lite/transforms/post_quantize.cc +++ b/tensorflow/compiler/mlir/lite/transforms/post_quantize.cc @@ -183,7 +183,7 @@ std::optional GetConstTensor(PatternRewriter& rewriter, auto const_attr = DenseElementsAttr::get(const_type, vec); auto const_op = - rewriter.create(loc, const_type, const_attr); + arith::ConstantOp::create(rewriter, loc, const_type, const_attr); return const_op.getResult(); } @@ -207,8 +207,8 @@ std::optional ConvertDequantizeOp( auto const_attr = DenseElementsAttr::get(const_type, static_cast(zeropoint[0])); - auto const_op = rewriter.create(op->getLoc(), const_type, - const_attr); + auto const_op = arith::ConstantOp::create(rewriter, op->getLoc(), + const_type, const_attr); zp_val = const_op.getResult(); } else { SmallVector shape; @@ -224,8 +224,8 @@ std::optional ConvertDequantizeOp( auto const_attr = DenseElementsAttr::get(const_type, static_cast(scale[0])); - auto const_op = rewriter.create(op->getLoc(), const_type, - const_attr); + auto const_op = arith::ConstantOp::create(rewriter, op->getLoc(), + const_type, const_attr); scale_val = const_op.getResult(); } else { SmallVector shape; @@ -237,16 +237,17 @@ std::optional ConvertDequantizeOp( if (!zp_val || !scale_val) return std::nullopt; auto op1_cast_in = - rewriter.create(op->getLoc(), output_type, input_value); + TFL::CastOp::create(rewriter, op->getLoc(), output_type, input_value); - auto op2_sub_op1 = rewriter.create( - op->getLoc(), output_type, op1_cast_in.getResult(), zp_val.value(), + auto op2_sub_op1 = TFL::SubOp::create( + rewriter, op->getLoc(), output_type, op1_cast_in.getResult(), + zp_val.value(), /*fused_activation_function=*/rewriter.getStringAttr("NONE")); - return rewriter - .create( - op->getLoc(), output_type, op2_sub_op1.getResult(), scale_val.value(), - /*fused_activation_function=*/rewriter.getStringAttr("NONE")) + return TFL::MulOp::create( + rewriter, op->getLoc(), output_type, op2_sub_op1.getResult(), + scale_val.value(), + /*fused_activation_function=*/rewriter.getStringAttr("NONE")) .getResult(); } @@ -313,8 +314,8 @@ struct RemoveVolatileOps : public OpRewritePattern { auto const_type = tensorflow::GetTypeFromTFTensorShape( output_type.getShape(), qtype.getStorageType()); - auto const_op = rewriter.create( - op->getLoc(), const_type, qconst_op.getValue()); + auto const_op = arith::ConstantOp::create( + rewriter, op->getLoc(), const_type, qconst_op.getValue()); auto new_value = ConvertDequantizeOp(rewriter, op, output_type, const_op.getResult(), diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_patterns.td b/tensorflow/compiler/mlir/lite/transforms/prepare_patterns.td index 235ec7d38615fc..d14ee12b7e55a3 100644 --- a/tensorflow/compiler/mlir/lite/transforms/prepare_patterns.td +++ b/tensorflow/compiler/mlir/lite/transforms/prepare_patterns.td @@ -24,7 +24,7 @@ def DenseElementsAttr : ElementsAttrBase< "non-opaque constant tensor">; def CreateGatherNdOp : NativeCodeCall< - "$_builder.create($0.getLoc(), $0.getType(), $1, $2, $3)">; + "TF::GatherNdOp::create($_builder, $0.getLoc(), $0.getType(), $1, $2, $3)">; def CreateTFCastOpI32 : NativeCodeCall< "CreateTFCastOpI32(&$_builder, $_loc, $0, $1)">; diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc b/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc index 957d243e72774d..899e4e9e088312 100644 --- a/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc +++ b/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc @@ -102,7 +102,7 @@ static Value CreateTFCastOpI32(OpBuilder *builder, Location loc, Value x, auto x_type = mlir::dyn_cast_or_null(x.getType()); if (!x_type) llvm_unreachable("unsupported type"); Type type = x_type.clone(builder->getI32Type()); - return builder->create(loc, type, x, truncate); + return TF::CastOp::create(*builder, loc, type, x, truncate); } } // namespace @@ -253,7 +253,7 @@ class ConvertTFConvOp : public RewritePattern { tensorflow::GetTypeFromTFTensorShape({bias_dim}, elem_type); auto bias_attr = rewriter.getZeroAttr(bias_type); auto bias = - rewriter.create(op->getLoc(), bias_type, bias_attr); + TF::ConstOp::create(rewriter, op->getLoc(), bias_type, bias_attr); if (op->getAttrOfType("padding").getValue() == "EXPLICIT") { // Add Const op for padding value. @@ -276,12 +276,12 @@ class ConvertTFConvOp : public RewritePattern { mlir::DenseIntElementsAttr::get(padding_attr_type, padding_values); auto padding_const = - rewriter.create(op->getLoc(), padding_attr); + TF::ConstOp::create(rewriter, op->getLoc(), padding_attr); // Add Pad op. auto pad_output_type = UnrankedTensorType::get(elem_type); - input = rewriter.create(op->getLoc(), pad_output_type, input, - padding_const); + input = TF::PadOp::create(rewriter, op->getLoc(), pad_output_type, input, + padding_const); // Set Conv padding to `VALID` since padding has been handled by Pad op. state.padding = rewriter.getStringAttr("VALID"); @@ -315,8 +315,8 @@ class ConvertTFConv2D : public ConvertTFConvOp { Type result_type, Value input, Value filter, Value bias) const { filter = legalizeFilter(rewriter, loc, filter); - return rewriter.create( - loc, result_type, input, filter, bias, + return TFL::Conv2DOp::create( + rewriter, loc, result_type, input, filter, bias, /*dilation_h_factor=*/state->dilation_height_factor, /*dilation_w_factor=*/state->dilation_width_factor, /*fused_activation_function=*/rewriter.getStringAttr("NONE"), @@ -338,7 +338,7 @@ class ConvertTFConv2D : public ConvertTFConvOp { {static_cast(perm.size())}, rewriter.getIntegerType(32)); auto perm_attr = DenseElementsAttr::get(perm_type, llvm::ArrayRef(perm)); - auto perm_op = rewriter.create(loc, perm_type, perm_attr); + auto perm_op = TF::ConstOp::create(rewriter, loc, perm_type, perm_attr); // Create tensor type for the transpose result. auto filter_type = mlir::cast(filter.getType()); @@ -350,7 +350,7 @@ class ConvertTFConv2D : public ConvertTFConvOp { auto result_type = tensorflow::GetTypeFromTFTensorShape(result_shape, elem_type); - return rewriter.create(loc, result_type, filter, perm_op); + return TF::TransposeOp::create(rewriter, loc, result_type, filter, perm_op); } }; @@ -382,8 +382,8 @@ class ConvertTFDepthwiseConv2dNative mlir::cast(filter.getType()).getDimSize(3); filter = legalizeFilter(rewriter, loc, filter); - return rewriter.create( - loc, result_type, input, filter, bias, + return TFL::DepthwiseConv2DOp::create( + rewriter, loc, result_type, input, filter, bias, /*dilation_h_factor=*/state->dilation_height_factor, /*dilation_w_factor=*/state->dilation_width_factor, /*fused_activation_function=*/rewriter.getStringAttr("NONE"), @@ -420,9 +420,9 @@ class ConvertTFDepthwiseConv2dNative rewriter.getI32IntegerAttr(ConvertToTfliteSize(size)); } auto shape_attr = DenseElementsAttr::get(shape_type, result_shape_data); - auto shape = rewriter.create(loc, shape_type, shape_attr); + auto shape = TF::ConstOp::create(rewriter, loc, shape_type, shape_attr); - return rewriter.create(loc, result_type, filter, shape); + return TF::ReshapeOp::create(rewriter, loc, result_type, filter, shape); } }; @@ -495,11 +495,11 @@ struct ConvertTFStridedSlice : public RewritePattern { auto shape_attr = DenseElementsAttr::get(shape_type, result_shape_data); auto shape = - rewriter.create(loc, shape_type, shape_attr); + arith::ConstantOp::create(rewriter, loc, shape_type, shape_attr); auto revised_output_type = tensorflow::GetTypeFromTFTensorShape( revised_shape, original_input_type.getElementType()); - TF::ReshapeOp reshape = rewriter.create( - loc, revised_output_type, original_input, shape); + TF::ReshapeOp reshape = TF::ReshapeOp::create( + rewriter, loc, revised_output_type, original_input, shape); // Replace the original strided_slice. uint64_t revised_begin_mask = strided_slice_op.getBeginMask(); @@ -656,13 +656,13 @@ struct ConvertTFStridedSlice : public RewritePattern { auto begin_attr = DenseElementsAttr::get(type, padded_begin); auto begin_op = - rewriter.create(op->getLoc(), type, begin_attr); + arith::ConstantOp::create(rewriter, op->getLoc(), type, begin_attr); auto end_attr = DenseElementsAttr::get(type, padded_end); auto end_op = - rewriter.create(op->getLoc(), type, end_attr); + arith::ConstantOp::create(rewriter, op->getLoc(), type, end_attr); auto stride_attr = DenseElementsAttr::get(type, padded_stride); auto stride_op = - rewriter.create(op->getLoc(), type, stride_attr); + arith::ConstantOp::create(rewriter, op->getLoc(), type, stride_attr); rewriter.replaceOpWithNewOp( op, strided_slice_op.getType(), input, begin_op.getResult(), @@ -767,17 +767,17 @@ struct ConvertTFStridedSlice : public RewritePattern { auto begin_end_type = tensorflow::GetTypeFromTFTensorShape( {num_input_dims}, rewriter.getIntegerType(32)); - auto new_begin_attr = rewriter.create( - op->getLoc(), begin_end_type, + auto new_begin_attr = arith::ConstantOp::create( + rewriter, op->getLoc(), begin_end_type, DenseElementsAttr::get(begin_end_type, padded_begin)); - auto new_end_attr = rewriter.create( - op->getLoc(), begin_end_type, + auto new_end_attr = arith::ConstantOp::create( + rewriter, op->getLoc(), begin_end_type, DenseElementsAttr::get(begin_end_type, padded_end)); auto strides_type = tensorflow::GetTypeFromTFTensorShape( {static_cast(padded_strides.size())}, rewriter.getIntegerType(32)); - auto new_strides_attr = rewriter.create( - op->getLoc(), strides_type, + auto new_strides_attr = arith::ConstantOp::create( + rewriter, op->getLoc(), strides_type, DenseElementsAttr::get(strides_type, padded_strides)); auto attribute_type = rewriter.getIntegerType(64); @@ -1043,9 +1043,10 @@ struct FusedBatchNormV3Pat : public ::mlir::RewritePattern { auto reduce_dim_type = tensorflow::GetTypeFromTFTensorShape( {3}, rewriter.getIntegerType(32)); ::mlir::SmallVector reduce_dim_values = {0, 1, 2}; - reduce_dim_op = rewriter.create( - odsLoc, ::mlir::DenseIntElementsAttr::get(reduce_dim_type, - reduce_dim_values)); + reduce_dim_op = + TF::ConstOp::create(rewriter, odsLoc, + ::mlir::DenseIntElementsAttr::get( + reduce_dim_type, reduce_dim_values)); } auto new_mean_type = tensorflow::GetTypeFromTFTensorShape( @@ -1053,8 +1054,8 @@ struct FusedBatchNormV3Pat : public ::mlir::RewritePattern { ::mlir::TF::MeanOp mean_op_1; { ::mlir::Value x_value = (*x.begin()); - mean_op_1 = rewriter.create( - odsLoc, new_mean_type, x_value, reduce_dim_op, + mean_op_1 = TF::MeanOp::create( + rewriter, odsLoc, new_mean_type, x_value, reduce_dim_op, /*keep_dims=*/rewriter.getBoolAttr(false)); } @@ -1064,15 +1065,15 @@ struct FusedBatchNormV3Pat : public ::mlir::RewritePattern { ::mlir::Value tblgen_value_1 = (*mean_op_1.getODSResults(0).begin()); // If x has shape of [b, h, w, c], the result of mean_op_1 will have // shape of [c]. Therefore, their shapes are always compatible. - square_diff_op = rewriter.create<::mlir::TF::SquaredDifferenceOp>( - odsLoc, tblgen_value_0, tblgen_value_1); + square_diff_op = ::mlir::TF::SquaredDifferenceOp::create( + rewriter, odsLoc, tblgen_value_0, tblgen_value_1); } ::mlir::TF::MeanOp mean_op_2; { ::mlir::Value input_value = (*square_diff_op.getODSResults(0).begin()); - mean_op_2 = rewriter.create( - odsLoc, new_mean_type, input_value, reduce_dim_op, + mean_op_2 = TF::MeanOp::create( + rewriter, odsLoc, new_mean_type, input_value, reduce_dim_op, /*keep_dims=*/rewriter.getBoolAttr(false)); } @@ -1083,57 +1084,56 @@ struct FusedBatchNormV3Pat : public ::mlir::RewritePattern { ::llvm::SmallVector<::mlir::Value, 4> replace_values; ::mlir::TF::ConstOp epsilon_const_op; { - epsilon_const_op = - rewriter.create<::mlir::TF::ConstOp>(odsLoc, - /*value=*/epsilon); + epsilon_const_op = ::mlir::TF::ConstOp::create(rewriter, odsLoc, + /*value=*/epsilon); } ::mlir::TF::AddOp add_op_1; { ::mlir::Value epsilon_value = (*epsilon_const_op.getODSResults(0).begin()); // Multiplying with a constant, no need to check broadcastibility. - add_op_1 = rewriter.create<::mlir::TF::AddOp>(odsLoc, - /*x=*/variance_value, - /*y=*/epsilon_value); + add_op_1 = ::mlir::TF::AddOp::create(rewriter, odsLoc, + /*x=*/variance_value, + /*y=*/epsilon_value); } ::mlir::TF::RsqrtOp rsqrt_op; { ::mlir::SmallVector<::mlir::Value, 4> tblgen_values; ::mlir::SmallVector<::mlir::NamedAttribute, 4> tblgen_attrs; tblgen_values.push_back((*add_op_1.getODSResults(0).begin())); - rsqrt_op = rewriter.create<::mlir::TF::RsqrtOp>(odsLoc, tblgen_values, - tblgen_attrs); + rsqrt_op = ::mlir::TF::RsqrtOp::create(rewriter, odsLoc, tblgen_values, + tblgen_attrs); } ::mlir::TF::MulOp multiplier; { ::mlir::Value tblgen_value_0 = (*scale.begin()); ::mlir::Value tblgen_value_1 = (*rsqrt_op.getODSResults(0).begin()); - multiplier = rewriter.create<::mlir::TF::MulOp>(odsLoc, - /*x=*/tblgen_value_0, - /*y=*/tblgen_value_1); + multiplier = ::mlir::TF::MulOp::create(rewriter, odsLoc, + /*x=*/tblgen_value_0, + /*y=*/tblgen_value_1); } ::mlir::TF::MulOp mul_op_1; { ::mlir::Value tblgen_value_0 = (*x.begin()); ::mlir::Value tblgen_value_1 = (*multiplier.getODSResults(0).begin()); - mul_op_1 = rewriter.create<::mlir::TF::MulOp>(odsLoc, - /*x=*/tblgen_value_0, - /*y=*/tblgen_value_1); + mul_op_1 = ::mlir::TF::MulOp::create(rewriter, odsLoc, + /*x=*/tblgen_value_0, + /*y=*/tblgen_value_1); } ::mlir::TF::MulOp mul_op_2; { ::mlir::Value multiplier_value = (*multiplier.getODSResults(0).begin()); - mul_op_2 = rewriter.create<::mlir::TF::MulOp>(odsLoc, - /*x=*/mean_value, - /*y=*/multiplier_value); + mul_op_2 = ::mlir::TF::MulOp::create(rewriter, odsLoc, + /*x=*/mean_value, + /*y=*/multiplier_value); } ::mlir::TF::SubOp sub_op; { ::mlir::Value tblgen_value_0 = (*offset.begin()); ::mlir::Value tblgen_value_1 = (*mul_op_2.getODSResults(0).begin()); - sub_op = rewriter.create<::mlir::TF::SubOp>(odsLoc, - /*x=*/tblgen_value_0, - /*y=*/tblgen_value_1); + sub_op = ::mlir::TF::SubOp::create(rewriter, odsLoc, + /*x=*/tblgen_value_0, + /*y=*/tblgen_value_1); } ::mlir::TF::AddOp add_op_2; { @@ -1145,8 +1145,8 @@ struct FusedBatchNormV3Pat : public ::mlir::RewritePattern { for (auto v : fused_batch_norm_op.getODSResults(0)) { tblgen_types.push_back(v.getType()); } - add_op_2 = rewriter.create<::mlir::TF::AddOp>( - odsLoc, tblgen_types, tblgen_values, tblgen_attrs); + add_op_2 = ::mlir::TF::AddOp::create(rewriter, odsLoc, tblgen_types, + tblgen_values, tblgen_attrs); } for (auto v : ::llvm::SmallVector<::mlir::Value, 4>{add_op_2.getODSResults(0)}) { @@ -1261,9 +1261,9 @@ struct ReorderFakeQuantPattern : public RewritePattern { ReorderOp &new_reorder_op) const { Value tensor_value = (*input.begin()); Value shape_value = (*shape.begin()); - new_reorder_op = rewriter.create(ods_loc, - /*tensor=*/tensor_value, - /*shape=*/shape_value); + new_reorder_op = ReorderOp::create(rewriter, ods_loc, + /*tensor=*/tensor_value, + /*shape=*/shape_value); return success(); } @@ -1289,8 +1289,8 @@ struct ReorderFakeQuantPattern : public RewritePattern { for (auto v : casted_op.getODSResults(0)) { target_types.push_back(v.getType()); } - fakequant_op = rewriter.create( - ods_loc, target_types, target_values, target_attrs); + fakequant_op = TF::FakeQuantWithMinMaxVarsOp::create( + rewriter, ods_loc, target_types, target_values, target_attrs); return success(); } @@ -1442,13 +1442,14 @@ struct ConvertRfftToRfft2d : public RewritePattern { auto expaned_input_type = tensorflow::GetTypeFromTFTensorShape( expanded_input_shape, input_type.getElementType()); - TF::ExpandDimsOp expanded_input = rewriter.create( - rfft_op.getLoc(), expaned_input_type, input, minus_two->getResult()); + TF::ExpandDimsOp expanded_input = + TF::ExpandDimsOp::create(rewriter, rfft_op.getLoc(), expaned_input_type, + input, minus_two->getResult()); // Expanded fft_len. auto one_attr = mlir::DenseIntElementsAttr::get(one_ele_type, {1}); - auto one = rewriter.create(rfft_op.getLoc(), one_attr); + auto one = TF::ConstOp::create(rewriter, rfft_op.getLoc(), one_attr); auto zero = CreateConstOpWithSingleValue(&rewriter, rfft_op.getLoc(), one_ele_type, 0); @@ -1456,21 +1457,22 @@ struct ConvertRfftToRfft2d : public RewritePattern { auto expanded_fft_len_type = tensorflow::GetTypeFromTFTensorShape( {2}, fft_len_type.getElementType()); - TF::ConcatV2Op expanded_fft_len = rewriter.create( - rfft_op.getLoc(), expanded_fft_len_type, + TF::ConcatV2Op expanded_fft_len = TF::ConcatV2Op::create( + rewriter, rfft_op.getLoc(), expanded_fft_len_type, SmallVector({one.getResult(), fft_len}), zero->getResult()); // Insert the rfft_2d. auto rfft2d_out_type = tensorflow::GetTypeFromTFTensorShape( expanded_output_shape, output_type.getElementType()); - TF::RFFT2DOp rfft2d = rewriter.create( - rfft_op.getLoc(), rfft2d_out_type, expanded_input.getResult(), + TF::RFFT2DOp rfft2d = TF::RFFT2DOp::create( + rewriter, rfft_op.getLoc(), rfft2d_out_type, expanded_input.getResult(), expanded_fft_len.getResult()); // Insert the squeeze op. auto squeeze_dim = rewriter.getI64ArrayAttr({-2}); - TF::SqueezeOp squeeze = rewriter.create( - rfft_op.getLoc(), output_type, rfft2d.getResult(), squeeze_dim); + TF::SqueezeOp squeeze = + TF::SqueezeOp::create(rewriter, rfft_op.getLoc(), output_type, + rfft2d.getResult(), squeeze_dim); rewriter.replaceOp(op, squeeze.getResult()); @@ -1614,8 +1616,8 @@ class QuantizeConcatResult : public OpRewritePattern { llvm::SmallVector inputs{concat_result, min_v, max_v}; rewriter.setInsertionPointAfter(concat.getOperation()); - auto new_fake_quant_op = rewriter.create( - concat.getLoc(), concat->getResultTypes(), inputs, + auto new_fake_quant_op = TF::FakeQuantWithMinMaxVarsOp::create( + rewriter, concat.getLoc(), concat->getResultTypes(), inputs, (*fake_quant_ops.begin())->getAttrs()); for (OpOperand *use : uses) { @@ -1673,8 +1675,9 @@ class QuantizeMeanResult : public OpRewritePattern { llvm::SmallVector inputs{mean_result, fq.getMin(), fq.getMax()}; rewriter.setInsertionPointAfter(mean.getOperation()); - auto new_fake_quant_op = rewriter.create( - mean.getLoc(), mean->getResultTypes(), inputs, fq->getAttrs()); + auto new_fake_quant_op = TF::FakeQuantWithMinMaxVarsOp::create( + rewriter, mean.getLoc(), mean->getResultTypes(), inputs, + fq->getAttrs()); for (OpOperand *use : uses) { use->assign(new_fake_quant_op); diff --git a/tensorflow/compiler/mlir/lite/transforms/push_transpose_through_ewise_pass.cc b/tensorflow/compiler/mlir/lite/transforms/push_transpose_through_ewise_pass.cc index 4dc0b4bf668043..82803f6de927cb 100644 --- a/tensorflow/compiler/mlir/lite/transforms/push_transpose_through_ewise_pass.cc +++ b/tensorflow/compiler/mlir/lite/transforms/push_transpose_through_ewise_pass.cc @@ -173,8 +173,8 @@ class CommuteBothInputsTransposedWithEwiseOps : public RewritePattern { new_out_type, op->getAttrs()); // Apply original tranpose to output of ewise op. - auto out_tpose_op = rewriter.create( - new_ewise_op->getLoc(), op->getResult(0).getType(), + auto out_tpose_op = TFL::TransposeOp::create( + rewriter, new_ewise_op->getLoc(), op->getResult(0).getType(), new_ewise_op->getResults()[0], perm1); rewriter.replaceOp(op, out_tpose_op.getOperation()); return success(); @@ -273,7 +273,7 @@ class CommuteTransposeWithEwiseOps : public RewritePattern { RankedTensorType::get(inverse_perm.size(), rewriter.getI32Type()), inverse_perm); auto inverse_perm_op = - rewriter.create(perm.getLoc(), inverse_perm_attr); + arith::ConstantOp::create(rewriter, perm.getLoc(), inverse_perm_attr); // Transpose the input constant. auto in_rtt = @@ -283,9 +283,9 @@ class CommuteTransposeWithEwiseOps : public RewritePattern { RankedTensorType::get(PermuteShape(in_rtt.getShape(), inverse_perm), in_rtt.getElementType()); - tposed_const = rewriter.create( - cst_arg->getLoc(), inverse_type, cst_arg->getResult(0), - inverse_perm_op); + tposed_const = + TFL::TransposeOp::create(rewriter, cst_arg->getLoc(), inverse_type, + cst_arg->getResult(0), inverse_perm_op); } auto current_out_type = @@ -301,8 +301,8 @@ class CommuteTransposeWithEwiseOps : public RewritePattern { new_out_type, op->getAttrs()); // Apply original tranpose to output of ewise op. - auto out_tpose_op = rewriter.create( - new_ewise_op->getLoc(), op->getResult(0).getType(), + auto out_tpose_op = TFL::TransposeOp::create( + rewriter, new_ewise_op->getLoc(), op->getResult(0).getType(), new_ewise_op->getResults()[0], perm); rewriter.replaceOp(op, out_tpose_op.getOperation()); return success(); diff --git a/tensorflow/compiler/mlir/lite/transforms/quantize.cc b/tensorflow/compiler/mlir/lite/transforms/quantize.cc index d0c143d73914c9..c50e0a26e71c48 100644 --- a/tensorflow/compiler/mlir/lite/transforms/quantize.cc +++ b/tensorflow/compiler/mlir/lite/transforms/quantize.cc @@ -80,13 +80,13 @@ static LogicalResult IsDrqTensor(Value value, Value& fq_input) { // fake quant op. // This is to support the case such as: // %2077 = "vhlo.composite_v1"(%73, %69, %2070) : (tensor, tensor, - // tensor<1x?x512xf32>) -> tensor<1x?x512xf32> + // tensor<1x?x512xf32>) -> tensor<1x?x512xf32> // %2078 = "tfl.reshape"(%2077, %99) : (tensor<1x?x512xf32>, tensor<2xi32>) -> - // tensor + // tensor // %2079 = "tfl.pseudo_qconst"() <{qtype = tensor<64x512x!quant.uniform, tensor<64x512xf32>, none) -> - // tensor + // %2080 = "tfl.dequantize"(%2079) + // %2081 = "tfl.fully_connected"(%2078, %2080, %0) : (tensor, + // tensor<64x512xf32>, none) -> tensor // TODO - b/422588785: Have proper support for dynamic shaped models. auto v = value; if (auto reshape_op = llvm::dyn_cast_or_null(v.getDefiningOp())) { @@ -207,8 +207,9 @@ class PushForwardDrqFQ : public OpRewritePattern { Value float_input = drq_fq_op.getOperand(drq_fq_op.getNumOperands() - 1); // Create a new pad op. - auto new_pad_op = rewriter.create( - pad_op.getLoc(), pad_op.getType(), float_input, pad_op.getPadding()); + auto new_pad_op = + TFL::PadOp::create(rewriter, pad_op.getLoc(), pad_op.getType(), + float_input, pad_op.getPadding()); // Create a new drq fake quant op. // Operands are the same, except for the last one. @@ -218,8 +219,8 @@ class PushForwardDrqFQ : public OpRewritePattern { } new_drq_operands.push_back(new_pad_op.getResult()); - auto new_drq_fq_op = rewriter.create( - drq_fq_op.getLoc(), pad_op.getType(), new_drq_operands, + auto new_drq_fq_op = stablehlo::CompositeOp::create( + rewriter, drq_fq_op.getLoc(), pad_op.getType(), new_drq_operands, drq_fq_op->getAttrs()); rewriter.replaceOp(pad_op, new_drq_fq_op.getResult(0)); @@ -227,6 +228,40 @@ class PushForwardDrqFQ : public OpRewritePattern { } }; +// Fixes keep_num_dims option of FC if output dims is different from input dims +// though keep_num_dims is true. It happens when FC's input has changed after +// quantization, e.g. by IsDrqTensor(). +// Sets keep_num_dims to false if that's the case. Otherwise, it's not +// compatible with GPU. See CheckGpuDelegateCompatibility() in +// third_party/tensorflow/lite/tools/versioning/gpu_compatibility.cc. +// Note that if FC is followed by Reshape, the keep_num_dims will be set to true +// with a correct shape later by EnableFullyConnectedKeepNumDimsBeforeReshape() +// in optimize pass. +struct FixFullyConnectedKeepNumDims + : public OpRewritePattern { + explicit FixFullyConnectedKeepNumDims(MLIRContext* context) + : OpRewritePattern(context, /*benefit=*/0) {} + + LogicalResult matchAndRewrite(FullyConnectedOp fc, + PatternRewriter& rewriter) const override { + if (!fc.getKeepNumDims()) return failure(); + + auto input_ty = + mlir::dyn_cast_or_null(fc.getInput().getType()); + auto fc_ty = mlir::dyn_cast_or_null(fc.getType(0)); + if (!input_ty || !fc_ty) return failure(); + + auto input_shape = input_ty.getShape(); + auto fc_shape = fc_ty.getShape(); + if (input_shape.size() == fc_shape.size()) { + return failure(); + } + + fc.setKeepNumDims(false); + return success(); + } +}; + class StrictQuantizationPattern : public RewritePattern { public: using BaseType = StrictQuantizationPattern; @@ -763,7 +798,7 @@ void QuantizePass::runOnOperation() { patterns.add(ctx, quant_params); } - + patterns.add(ctx); (void)applyPatternsGreedily(func, std::move(patterns)); // Constant quantization is a lossy transformation, so they are applied only diff --git a/tensorflow/compiler/mlir/lite/transforms/quantize_variables.cc b/tensorflow/compiler/mlir/lite/transforms/quantize_variables.cc index 7453ed54975a5a..d6e18dc4158508 100644 --- a/tensorflow/compiler/mlir/lite/transforms/quantize_variables.cc +++ b/tensorflow/compiler/mlir/lite/transforms/quantize_variables.cc @@ -114,11 +114,12 @@ void QuantizeVariablesPass::QuantizeVariable( // Add dequantize. builder.setInsertionPointAfter(read_variable_op); auto new_read_variable_op = - builder.create(read_variable_op.getLoc(), ref_qtype, - read_variable_op.getResourceId()); - auto new_dq_op = builder.create( - read_variable_op.getLoc(), read_variable_op.getResult().getType(), - new_read_variable_op.getResult()); + ReadVariableOp::create(builder, read_variable_op.getLoc(), ref_qtype, + read_variable_op.getResourceId()); + auto new_dq_op = + DequantizeOp::create(builder, read_variable_op.getLoc(), + read_variable_op.getResult().getType(), + new_read_variable_op.getResult()); read_variable_op->replaceAllUsesWith(new_dq_op); read_variable_op.erase(); } @@ -135,19 +136,19 @@ void QuantizeVariablesPass::QuantizeVariable( if (qtype == quant::QuantizedType::getQuantizedElementType(ref_qtype)) { // Same quantization parameters, remove it. builder.setInsertionPoint(assign_variable_op); - auto new_assign_variable_op = builder.create( - assign_variable_op.getLoc(), assign_variable_op.getResourceId(), - dq_op.getInput()); + auto new_assign_variable_op = AssignVariableOp::create( + builder, assign_variable_op.getLoc(), + assign_variable_op.getResourceId(), dq_op.getInput()); assign_variable_op->replaceAllUsesWith(new_assign_variable_op); } else { // Otherwise, apply re-quantization. builder.setInsertionPoint(assign_variable_op); - auto new_q_op = builder.create( - assign_variable_op.getLoc(), ref_qtype, dq_op.getInput(), + auto new_q_op = QuantizeOp::create( + builder, assign_variable_op.getLoc(), ref_qtype, dq_op.getInput(), TypeAttr::get(ref_qtype)); - auto new_assign_variable_op = builder.create( - assign_variable_op.getLoc(), assign_variable_op.getResourceId(), - new_q_op.getResult()); + auto new_assign_variable_op = AssignVariableOp::create( + builder, assign_variable_op.getLoc(), + assign_variable_op.getResourceId(), new_q_op.getResult()); assign_variable_op->replaceAllUsesWith(new_assign_variable_op); } assign_variable_op.erase(); @@ -155,12 +156,12 @@ void QuantizeVariablesPass::QuantizeVariable( } else { // Add quantize op. builder.setInsertionPoint(assign_variable_op); - auto new_q_op = builder.create( - assign_variable_op.getLoc(), ref_qtype, + auto new_q_op = QuantizeOp::create( + builder, assign_variable_op.getLoc(), ref_qtype, assign_variable_op.getValue(), TypeAttr::get(ref_qtype)); - auto new_assign_variable_op = builder.create( - assign_variable_op.getLoc(), assign_variable_op.getResourceId(), - new_q_op.getResult()); + auto new_assign_variable_op = AssignVariableOp::create( + builder, assign_variable_op.getLoc(), + assign_variable_op.getResourceId(), new_q_op.getResult()); assign_variable_op->replaceAllUsesWith(new_assign_variable_op); assign_variable_op.erase(); } @@ -171,9 +172,9 @@ void QuantizeVariablesPass::QuantizeVariable( builder.setInsertionPoint(var_handle_op); auto output_type = UnrankedTensorType::get(TF::ResourceType::get( {mlir::cast(ref_qtype)}, builder.getContext())); - auto new_var_handle_op = builder.create( - var_handle_op.getLoc(), output_type, var_handle_op.getContainer(), - var_handle_op.getSharedName()); + auto new_var_handle_op = VarHandleOp::create( + builder, var_handle_op.getLoc(), output_type, + var_handle_op.getContainer(), var_handle_op.getSharedName()); var_handle_op->replaceAllUsesWith(new_var_handle_op); var_handle_op.erase(); } diff --git a/tensorflow/compiler/mlir/lite/transforms/raise_custom_ops.cc b/tensorflow/compiler/mlir/lite/transforms/raise_custom_ops.cc index 80e0986209e8d0..58fff203b9fb3e 100644 --- a/tensorflow/compiler/mlir/lite/transforms/raise_custom_ops.cc +++ b/tensorflow/compiler/mlir/lite/transforms/raise_custom_ops.cc @@ -80,8 +80,8 @@ void RaiseCustomOpsPass::runOnOperation() { for (auto *op : custom_ops) { builder.setInsertionPoint(op); Location loc = op->getLoc(); - auto custom_op = builder.create(loc, op->getResultTypes(), - op->getOperands()); + auto custom_op = CustomTfOp::create(builder, loc, op->getResultTypes(), + op->getOperands()); Region region; Block *new_block = new Block; region.push_back(new_block); @@ -95,7 +95,7 @@ void RaiseCustomOpsPass::runOnOperation() { inner_op->setOperand(idx_args.index(), idx_args.value()); } custom_op->setAttrs(inner_op->getAttrs()); - builder.create(loc, inner_op->getResults()); + YieldOp::create(builder, loc, inner_op->getResults()); custom_op.getBody().takeBody(region); op->replaceAllUsesWith(custom_op); diff --git a/tensorflow/compiler/mlir/lite/transforms/reduce_type_precision.cc b/tensorflow/compiler/mlir/lite/transforms/reduce_type_precision.cc index e964f76b30efbe..a0a6df9cf4feef 100644 --- a/tensorflow/compiler/mlir/lite/transforms/reduce_type_precision.cc +++ b/tensorflow/compiler/mlir/lite/transforms/reduce_type_precision.cc @@ -106,12 +106,12 @@ class SanitizeGatherOpOutputToI4 : public OpRewritePattern { } Builder builder(op.getContext()); - auto new_gather_op = rewriter.create( - op.getLoc(), - /*result=*/ - mlir::cast(op.getResult().getType()) - .clone(builder.getI4Type()), - /*operand=*/op.getOperands(), op->getAttrs()); + auto new_gather_op = + TFL::GatherOp::create(rewriter, op.getLoc(), + /*result=*/ + mlir::cast(op.getResult().getType()) + .clone(builder.getI4Type()), + /*operand=*/op.getOperands(), op->getAttrs()); rewriter.replaceAllUsesWith(op.getResult(), new_gather_op.getResult()); return success(); diff --git a/tensorflow/compiler/mlir/lite/transforms/tflite_passes/unfold_large_splat_constants_pass.cc b/tensorflow/compiler/mlir/lite/transforms/tflite_passes/unfold_large_splat_constants_pass.cc index 2b03557121652f..6f476ded0a1a62 100644 --- a/tensorflow/compiler/mlir/lite/transforms/tflite_passes/unfold_large_splat_constants_pass.cc +++ b/tensorflow/compiler/mlir/lite/transforms/tflite_passes/unfold_large_splat_constants_pass.cc @@ -60,23 +60,21 @@ void MaybeUnfoldLargeSplatConstant(mlir::OpBuilder* op_builder, } op_builder->setInsertionPoint(const_op); - mlir::arith::ConstantOp fill_shape = - op_builder->create( - const_op->getLoc(), DenseIntElementsAttr::get( - tensorflow::GetTypeFromTFTensorShape( - {splat_elements_attr.getType().getRank()}, - op_builder->getI64Type()), - splat_elements_attr.getType().getShape())); - mlir::arith::ConstantOp fill_value = - op_builder->create( - const_op->getLoc(), - DenseElementsAttr::get( - tensorflow::GetTypeFromTFTensorShape( - {}, splat_elements_attr.getType().getElementType()), - splat_elements_attr.getSplatValue())); - TFL::FillOp fill = op_builder->create( - const_op->getLoc(), splat_elements_attr.getType(), fill_shape, - fill_value); + mlir::arith::ConstantOp fill_shape = mlir::arith::ConstantOp::create( + *op_builder, const_op->getLoc(), + DenseIntElementsAttr::get(tensorflow::GetTypeFromTFTensorShape( + {splat_elements_attr.getType().getRank()}, + op_builder->getI64Type()), + splat_elements_attr.getType().getShape())); + mlir::arith::ConstantOp fill_value = mlir::arith::ConstantOp::create( + *op_builder, const_op->getLoc(), + DenseElementsAttr::get( + tensorflow::GetTypeFromTFTensorShape( + {}, splat_elements_attr.getType().getElementType()), + splat_elements_attr.getSplatValue())); + TFL::FillOp fill = TFL::FillOp::create(*op_builder, const_op->getLoc(), + splat_elements_attr.getType(), + fill_shape, fill_value); const_op->replaceAllUsesWith(fill); const_op->erase(); } diff --git a/tensorflow/compiler/mlir/lite/utils/fake_quant_utils.h b/tensorflow/compiler/mlir/lite/utils/fake_quant_utils.h index 4e0fb068c8b9e8..d9cab52085ef5b 100644 --- a/tensorflow/compiler/mlir/lite/utils/fake_quant_utils.h +++ b/tensorflow/compiler/mlir/lite/utils/fake_quant_utils.h @@ -150,10 +150,10 @@ class InsertTFLQuantOpsAfterTFFakeQuantOp { // dequantize ops, and insert them between the tf.FakeQuantWithMinMaxVarsOp // and its users. Value value = tf_op.getOutputs(); - auto quantize = rewriter.create( - tf_op.getLoc(), qtype.getValue(), value, qtype); - auto dequantize = rewriter.create( - tf_op.getLoc(), res_type, quantize.getOutput()); + auto quantize = TFL::QuantizeOp::create(rewriter, tf_op.getLoc(), + qtype.getValue(), value, qtype); + auto dequantize = TFL::DequantizeOp::create(rewriter, tf_op.getLoc(), + res_type, quantize.getOutput()); value.replaceAllUsesWith(dequantize); quantize.getOperation()->replaceUsesOfWith(dequantize, value); diff --git a/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc b/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc index a2023742140fce..f94cad6b5eabe7 100644 --- a/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc +++ b/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc @@ -52,14 +52,14 @@ Value CreateI32SplatConst(OpBuilder* builder, ArrayRef shape, int32_t val, mlir::Location location) { auto type = RankedTensorType::get(shape, builder->getIntegerType(32)); auto attr = DenseElementsAttr::get(type, val); - return builder->create(location, type, attr); + return arith::ConstantOp::create(*builder, location, type, attr); } Value CreateF32SplatConst(OpBuilder* builder, ArrayRef shape, float val, mlir::Location location) { auto type = RankedTensorType::get(shape, builder->getF32Type()); auto attr = DenseElementsAttr::get(type, val); - return builder->create(location, type, attr); + return arith::ConstantOp::create(*builder, location, type, attr); } Value CreatTfF32ConstOp(OpBuilder* builder, ArrayRef shape, float val, @@ -67,7 +67,7 @@ Value CreatTfF32ConstOp(OpBuilder* builder, ArrayRef shape, float val, auto type = RankedTensorType::get(shape, builder->getF32Type()); auto ele_type = RankedTensorType::get({1}, builder->getF32Type()); auto attr = DenseElementsAttr::get(ele_type, val); - return builder->create(location, type, attr); + return TF::ConstOp::create(*builder, location, type, attr); } Value CreateI64DenseConst(OpBuilder* builder, ArrayRef shape, @@ -75,7 +75,7 @@ Value CreateI64DenseConst(OpBuilder* builder, ArrayRef shape, auto type = RankedTensorType::get(static_cast(shape.size()), builder->getIntegerType(64)); auto attr = DenseElementsAttr::get(type, values); - return builder->create(location, type, attr); + return arith::ConstantOp::create(*builder, location, type, attr); } Value CreateI32DenseConst(OpBuilder* builder, ArrayRef values, @@ -83,12 +83,12 @@ Value CreateI32DenseConst(OpBuilder* builder, ArrayRef values, auto type = RankedTensorType::get(static_cast(values.size()), builder->getIntegerType(32)); auto attr = DenseElementsAttr::get(type, values); - return builder->create(location, type, attr); + return arith::ConstantOp::create(*builder, location, type, attr); } Value CreateNoneValue(OpBuilder* builder, mlir::Location location) { - return builder->create(location, builder->getNoneType(), - builder->getUnitAttr()); + return TFL::NoValueOp::create(*builder, location, builder->getNoneType(), + builder->getUnitAttr()); } Value Transpose(OpBuilder* builder, Value value_to_transpose, @@ -106,8 +106,8 @@ Value Transpose(OpBuilder* builder, Value value_to_transpose, auto elem_type = transpose_type.getElementType(); auto result_type = RankedTensorType::get(transpose_shape, elem_type); - return builder->create(location, result_type, - value_to_transpose, perm_op); + return TF::TransposeOp::create(*builder, location, result_type, + value_to_transpose, perm_op); } Value Transpose2D(OpBuilder* builder, Value value_to_transpose, @@ -121,8 +121,8 @@ Value Reverse(OpBuilder* builder, Value value_to_reverse, int axis, RankedTensorType type, mlir::Location location) { auto axis_op = CreateI32SplatConst(builder, {1}, axis, location); // The result type will be the same as the input. - return builder->create(location, type, value_to_reverse, - axis_op); + return TF::ReverseV2Op::create(*builder, location, type, value_to_reverse, + axis_op); } ArrayRef GetRankedTensorShape(Value value) { @@ -154,8 +154,8 @@ Value SliceRankedTensor(OpBuilder* builder, Value input, auto slice_i2c_size = CreateI64DenseConst(builder, size_shape, size_values, location); - return builder->create( - location, + return TF::SliceOp::create( + *builder, location, RankedTensorType::get( size_values, mlir::cast(input.getType()).getElementType()), @@ -175,9 +175,9 @@ Value CreateStridedSliceOp(mlir::Location loc, ArrayRef output_shape, auto end_tensor = CreateI32DenseConst(builder, end, loc); auto strides_tensor = CreateI32DenseConst(builder, strides, loc); - return builder->create( - loc, output_type, input, begin_tensor, end_tensor, strides_tensor, - builder->getI64IntegerAttr(begin_mask), + return TF::StridedSliceOp::create( + *builder, loc, output_type, input, begin_tensor, end_tensor, + strides_tensor, builder->getI64IntegerAttr(begin_mask), builder->getI64IntegerAttr(end_mask), builder->getI64IntegerAttr(ellipsis_mask), builder->getI64IntegerAttr(new_axis_mask), @@ -590,21 +590,20 @@ TF::ConstOp Create1DConstantOp(const std::vector& value, Location loc, auto type = mlir::RankedTensorType::get(value.size(), builder->getIntegerType(32)); auto dense_values = mlir::DenseIntElementsAttr::get(type, value); - return builder->create(loc, dense_values); + return TF::ConstOp::create(*builder, loc, dense_values); } TF::ConstOp CreateScalarConstantOp(int value, Location loc, OpBuilder* builder) { - return builder->create(loc, builder->getI32IntegerAttr(value)); + return TF::ConstOp::create(*builder, loc, builder->getI32IntegerAttr(value)); } TF::ReshapeOp CreateFlattenOP(const Value& input, Location loc, OpBuilder* builder) { auto output_shape = Create1DConstantOp({-1}, loc, builder); - return builder->create( - loc, - /*tensor=*/input, - /*shape=*/output_shape.getResult()); + return mlir::TF::ReshapeOp::create(*builder, loc, + /*tensor=*/input, + /*shape=*/output_shape.getResult()); } LogicalResult CreateEqualSizeSplitVOp(Value input, int axis, int splits, @@ -637,9 +636,9 @@ LogicalResult CreateEqualSizeSplitVOp(Value input, int axis, int splits, builder); auto axis_op = CreateScalarConstantOp(axis, loc, builder); - *result = builder->create(loc, output_types, input, - size_of_splits_op.getResult(), - axis_op.getResult()); + *result = + TF::SplitVOp::create(*builder, loc, output_types, input, + size_of_splits_op.getResult(), axis_op.getResult()); return success(); } @@ -771,8 +770,8 @@ LogicalResult ConvertKerasLSTMLayer(mlir::func::FuncOp func_op, mlir::cast(final_inputs.getType()).getElementType()); Value none = CreateNoneValue(builder, func_op.getLoc()); - auto lstm = builder->create( - func_op.getLoc(), result_type, /*input=*/final_inputs, + auto lstm = mlir::TFL::UnidirectionalSequenceLSTMOp::create( + *builder, func_op.getLoc(), result_type, /*input=*/final_inputs, /*input_to_input_weights=*/weights_array->getResult(0), /*input_to_forget_weights=*/weights_array->getResult(1), /*input_to_cell_weights=*/weights_array->getResult(2), @@ -881,7 +880,7 @@ LogicalResult ConvertKerasLSTMLayer(mlir::func::FuncOp func_op, func_op.getFunctionType().getInputs(), output_types)); - builder->create(func_op.getLoc(), outputs); + mlir::func::ReturnOp::create(*builder, func_op.getLoc(), outputs); return success(); } diff --git a/tensorflow/compiler/mlir/lite/utils/nms_utils.cc b/tensorflow/compiler/mlir/lite/utils/nms_utils.cc index 211336de124075..59c3f883411221 100644 --- a/tensorflow/compiler/mlir/lite/utils/nms_utils.cc +++ b/tensorflow/compiler/mlir/lite/utils/nms_utils.cc @@ -57,11 +57,11 @@ void ConvertNMSPaddedFunc::RewriteFunc() { auto output_type1 = func_.getFunctionType().getResult(1); OpBuilder builder(func_.getBody()); - auto op = builder.create( - func_.getLoc(), output_type0, output_type1, boxes, scores, + auto op = mlir::TFL::NonMaxSuppressionV4Op::create( + builder, func_.getLoc(), output_type0, output_type1, boxes, scores, max_output_size, iou_threshold, score_threshold); - builder.create(func_.getLoc(), op.getResults()); + mlir::func::ReturnOp::create(builder, func_.getLoc(), op.getResults()); } LogicalResult ConvertNMSPaddedFunc::VerifySignature() { @@ -102,11 +102,11 @@ LogicalResult ConvertSSDPostProcessFunc::RewriteFunc() { custom_option_buffer))) { return failure(); } - auto op = builder.create( - func_.getLoc(), func_.getFunctionType().getResults(), - func_.getArguments(), kCustomSSDPostprocessing, - CustomOption(&builder, custom_option_buffer)); - builder.create(func_.getLoc(), op.getResults()); + auto op = CustomOp::create(builder, func_.getLoc(), + func_.getFunctionType().getResults(), + func_.getArguments(), kCustomSSDPostprocessing, + CustomOption(&builder, custom_option_buffer)); + func::ReturnOp::create(builder, func_.getLoc(), op.getResults()); return success(); } diff --git a/tensorflow/compiler/mlir/lite/utils/perception_ops_utils.cc b/tensorflow/compiler/mlir/lite/utils/perception_ops_utils.cc index 5f680c7db9be58..4bcf4b86e0ea17 100644 --- a/tensorflow/compiler/mlir/lite/utils/perception_ops_utils.cc +++ b/tensorflow/compiler/mlir/lite/utils/perception_ops_utils.cc @@ -104,11 +104,11 @@ LogicalResult ConvertMaxUnpoolingFunc::RewriteFunc() { if (failed(CreateCustomOptions(custom_option_buffer))) { return failure(); } - auto op = builder.create( - func_.getLoc(), func_.getFunctionType().getResults(), - func_.getArguments(), kMaxUnpooling, - CustomOption(&builder, custom_option_buffer)); - builder.create(func_.getLoc(), op.getResults()); + auto op = CustomOp::create(builder, func_.getLoc(), + func_.getFunctionType().getResults(), + func_.getArguments(), kMaxUnpooling, + CustomOption(&builder, custom_option_buffer)); + func::ReturnOp::create(builder, func_.getLoc(), op.getResults()); return success(); } @@ -205,11 +205,11 @@ LogicalResult ConvertDenseImageWarpFunc::RewriteFunc() { StringAttr::get(func_.getContext(), kImageWarping)); OpBuilder builder(func_.getBody()); - auto op = builder.create(func_.getLoc(), - func_.getFunctionType().getResults(), - func_.getArguments(), kImageWarping, - CustomOption(&builder, /*content=*/"")); - builder.create(func_.getLoc(), op.getResults()); + auto op = CustomOp::create(builder, func_.getLoc(), + func_.getFunctionType().getResults(), + func_.getArguments(), kImageWarping, + CustomOption(&builder, /*content=*/"")); + func::ReturnOp::create(builder, func_.getLoc(), op.getResults()); return success(); } diff --git a/tensorflow/compiler/mlir/lite/utils/region_isolation_test.cc b/tensorflow/compiler/mlir/lite/utils/region_isolation_test.cc index 3c136be98ef071..f3917e32d91126 100644 --- a/tensorflow/compiler/mlir/lite/utils/region_isolation_test.cc +++ b/tensorflow/compiler/mlir/lite/utils/region_isolation_test.cc @@ -85,7 +85,7 @@ TEST(RegionIsolationTest, CaseOp) { OpBuilder b(&ctx); - OwningOpRef root(b.create(b.getUnknownLoc())); + OwningOpRef root(ModuleOp::create(b, b.getUnknownLoc())); { auto& block = root->getBodyRegion().front(); diff --git a/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc b/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc index fa191c6c69d984..a402deb4bc230e 100644 --- a/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc +++ b/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc @@ -136,10 +136,10 @@ LogicalResult ConvertWhitespaceTokenizer(func::FuncOp func, llvm::StringRef api, func->setAttr(kTFImplements, attr); OpBuilder builder(func.getBody()); std::string empty_option_buffer; - auto op = builder.create( - func.getLoc(), func.getFunctionType().getResults(), func.getArguments(), - api, CustomOption(&builder, empty_option_buffer)); - builder.create(func.getLoc(), op.getResults()); + auto op = CustomOp::create( + builder, func.getLoc(), func.getFunctionType().getResults(), + func.getArguments(), api, CustomOption(&builder, empty_option_buffer)); + func::ReturnOp::create(builder, func.getLoc(), op.getResults()); return success(); } @@ -267,10 +267,10 @@ LogicalResult ConvertNgrams(func::FuncOp func, llvm::StringRef api, custom_option_buffer))) { return failure(); } - auto op = builder.create( - func.getLoc(), func.getFunctionType().getResults(), func.getArguments(), - api, CustomOption(&builder, custom_option_buffer)); - builder.create(func.getLoc(), op.getResults()); + auto op = CustomOp::create( + builder, func.getLoc(), func.getFunctionType().getResults(), + func.getArguments(), api, CustomOption(&builder, custom_option_buffer)); + func::ReturnOp::create(builder, func.getLoc(), op.getResults()); return success(); } @@ -350,10 +350,10 @@ LogicalResult ConvertSgnnProjection(func::FuncOp func, llvm::StringRef api, custom_option_buffer))) { return failure(); } - auto op = builder.create( - func.getLoc(), func.getFunctionType().getResults(), func.getArguments(), - api, CustomOption(&builder, custom_option_buffer)); - builder.create(func.getLoc(), op.getResults()); + auto op = CustomOp::create( + builder, func.getLoc(), func.getFunctionType().getResults(), + func.getArguments(), api, CustomOption(&builder, custom_option_buffer)); + func::ReturnOp::create(builder, func.getLoc(), op.getResults()); return success(); } } // namespace diff --git a/tensorflow/compiler/mlir/lite/utils/utils.td b/tensorflow/compiler/mlir/lite/utils/utils.td index 7583d48618f4fc..d38cf411ea9f2c 100644 --- a/tensorflow/compiler/mlir/lite/utils/utils.td +++ b/tensorflow/compiler/mlir/lite/utils/utils.td @@ -136,7 +136,7 @@ def HasSameStaticShapes : Constraint< "have the same static shape">; def CreateNoneValue : NativeCodeCall< - "$_builder.create($0.getLoc(), $_builder.getUnitAttr())">; + "TFL::NoValueOp::create($_builder, $0.getLoc(), $_builder.getUnitAttr())">; // Returns shape of a ranked tensor. // if called without a ranked tensor it will fail. diff --git a/tensorflow/compiler/mlir/quantization/common/attrs_and_constraints.h b/tensorflow/compiler/mlir/quantization/common/attrs_and_constraints.h index e94f9359d6fad2..89896d69079c28 100644 --- a/tensorflow/compiler/mlir/quantization/common/attrs_and_constraints.h +++ b/tensorflow/compiler/mlir/quantization/common/attrs_and_constraints.h @@ -89,12 +89,12 @@ Value CreateConstValue(OpBuilder& builder, const Location loc, RankedTensorType::get(shape, builder.getIntegerType(sizeof(T) * 8)); const auto attr = DenseIntElementsAttr::get(shape_type, values); - return builder.create(loc, attr); + return TF::ConstOp::create(builder, loc, attr); } const auto type = RankedTensorType::get(shape, builder.getF32Type()); const auto value_attr = DenseFPElementsAttr::get(type, values); - return builder.create(loc, value_attr); + return TF::ConstOp::create(builder, loc, value_attr); } // Creates a 1D array with integer/float type. diff --git a/tensorflow/compiler/mlir/quantization/common/quantization_lib/quantization_utils.h b/tensorflow/compiler/mlir/quantization/common/quantization_lib/quantization_utils.h index 324b70c8fbe573..5f43083540831f 100644 --- a/tensorflow/compiler/mlir/quantization/common/quantization_lib/quantization_utils.h +++ b/tensorflow/compiler/mlir/quantization/common/quantization_lib/quantization_utils.h @@ -288,10 +288,10 @@ struct ConvertStatsToQDQs rewriter.setInsertionPointAfter(op.getOperation()); Type result_type = quant_type.castFromExpressedType(op.getType()); auto q = - rewriter.create(op.getLoc(), result_type, op.getArg()); + QuantizeOpT::create(rewriter, op.getLoc(), result_type, op.getArg()); q->setAttr(kVolatileOpAttrName, rewriter.getUnitAttr()); - auto dq = rewriter.create(op.getLoc(), op.getType(), q); + auto dq = DequantizeOpT::create(rewriter, op.getLoc(), op.getType(), q); op.getResult().replaceAllUsesWith(dq); q.getOperation()->replaceUsesOfWith(dq, op.getArg()); op.erase(); @@ -644,8 +644,8 @@ class QuantizationPattern : public RewritePattern { if (!matchPattern(q.getOperand(), m_Constant(&attr))) { continue; } - auto cst = rewriter.create( - quantized_op->getLoc(), attr); + auto cst = arith::ConstantOp::create(rewriter, + quantized_op->getLoc(), attr); quantizing_op->setOperand(i, cst.getResult()); } } diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_tf_quant_to_mhlo_int_test.cc b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_tf_quant_to_mhlo_int_test.cc index 0818c8013e534e..4203d7824844f9 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_tf_quant_to_mhlo_int_test.cc +++ b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_tf_quant_to_mhlo_int_test.cc @@ -125,9 +125,9 @@ class ConvertTfQuantToMhloIntTest : public Test { // can't lower tf.Const. Value cst; if (use_mhlo_const) { - cst = builder.create(func_op->getLoc(), attrs); + cst = mhlo::ConstantOp::create(builder, func_op->getLoc(), attrs); } else { - cst = builder.create(func_op->getLoc(), attrs); + cst = TF::ConstOp::create(builder, func_op->getLoc(), attrs); } func_op.getArgument(i).replaceAllUsesWith(cst); } @@ -180,7 +180,7 @@ class ConvertTfQuantToMhloIntTest : public Test { /*byte_strides=*/std::nullopt, host_buffer_semantics, /*on_done_with_host_buffer=*/nullptr, *device_->default_memory_space(), /*device_layout=*/nullptr)); - return buffer->ToLiteralSync(); + return buffer->ToLiteral().Await(); } absl::StatusOr> CompileProgram( @@ -220,7 +220,7 @@ class ConvertTfQuantToMhloIntTest : public Test { TF_ASSIGN_OR_RETURN(auto result, executable->Execute({buffer_ptrs}, /*options=*/{})); CHECK(result.size() == 1 && result[0].size() == 1); - return result[0][0]->ToLiteralSync(); + return result[0][0]->ToLiteral().Await(); } void ExecuteAndCompareResultsWithTfKernel( diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_tf_quant_types.cc b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_tf_quant_types.cc index 46d408b06d05e4..cc63c246434934 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_tf_quant_types.cc +++ b/tensorflow/compiler/mlir/quantization/stablehlo/passes/bridge/convert_tf_quant_types.cc @@ -241,8 +241,8 @@ class TFUniformQuantizedOpsPattern : public ConversionPattern { Type orig_op_type = op->getOperandTypes()[i]; if (IsIllegalType(orig_op_type) && !IsQintValueDefinedByIntToQintCast(op->getOperand(i))) { - new_operands.push_back(rewriter.create( - op->getLoc(), orig_op_type, operands[i])); + new_operands.push_back(TF::CastOp::create(rewriter, op->getLoc(), + orig_op_type, operands[i])); } else { new_operands.push_back(operands[i]); } @@ -261,8 +261,8 @@ class TFUniformQuantizedOpsPattern : public ConversionPattern { Value &result = new_results[i]; if (IsIllegalType(result.getType()) && !IsQintValueQintToIntCast(op->getResult(i))) { - result = rewriter.create( - op->getLoc(), ToLegalType(result.getType()), result); + result = TF::CastOp::create(rewriter, op->getLoc(), + ToLegalType(result.getType()), result); } // If the result is already consumed by qint->int CastOp, manually replace // its use by the new UQ op. This is because such CastOp is already legal, diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/passes/convert_func_to_bfloat16.cc b/tensorflow/compiler/mlir/quantization/stablehlo/passes/convert_func_to_bfloat16.cc index b7903b433757b5..1dd93a9b2c165e 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/passes/convert_func_to_bfloat16.cc +++ b/tensorflow/compiler/mlir/quantization/stablehlo/passes/convert_func_to_bfloat16.cc @@ -177,15 +177,17 @@ class BitcastConvertOpPattern return failure(); } else if (is_input_legal) { // output is f32, we bitcast_convert to f32 and then convert to bf16. - const Value output = rewriter.create( - op->getLoc(), op.getResult().getType(), adaptor.getOperand()); + const Value output = mlir::stablehlo::BitcastConvertOp::create( + rewriter, op->getLoc(), op.getResult().getType(), + adaptor.getOperand()); rewriter.replaceOpWithNewOp( op, getTypeConverter()->convertType(op.getResult().getType()), output); } else if (is_output_legal) { // input is f32, we convert from bf16 and then bitcast_convert. - const Value output = rewriter.create( - op->getLoc(), op.getOperand().getType(), adaptor.getOperand()); + const Value output = mlir::stablehlo::ConvertOp::create( + rewriter, op->getLoc(), op.getOperand().getType(), + adaptor.getOperand()); rewriter.replaceOpWithNewOp( op, op.getResult().getType(), output); } else { diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/passes/convert_xla_call_module_op_to_bfloat16.cc b/tensorflow/compiler/mlir/quantization/stablehlo/passes/convert_xla_call_module_op_to_bfloat16.cc index 7ee6bbd98f61e6..a63ffb1504bd85 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/passes/convert_xla_call_module_op_to_bfloat16.cc +++ b/tensorflow/compiler/mlir/quantization/stablehlo/passes/convert_xla_call_module_op_to_bfloat16.cc @@ -123,8 +123,8 @@ void ConvertXlaCallModuleOpToBfloat16Pass::runOnOperation() { builder.setInsertionPoint(op); for (auto& op_operand : op->getOpOperands()) { if (quant::stablehlo::IsLargeFloatType(op_operand.get().getType())) { - op_operand.set(builder.create( - op->getLoc(), + op_operand.set(TF::CastOp::create( + builder, op->getLoc(), quant::stablehlo::ToBfloat16Type(op_operand.get().getType()), op_operand.get())); } @@ -135,7 +135,7 @@ void ConvertXlaCallModuleOpToBfloat16Pass::runOnOperation() { const Type original_type = op_result.getType(); op_result.setType(quant::stablehlo::ToBfloat16Type(original_type)); const Value cast = - builder.create(op->getLoc(), original_type, op_result); + TF::CastOp::create(builder, op->getLoc(), original_type, op_result); op_result.replaceAllUsesExcept(cast, cast.getDefiningOp()); } } diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/passes/defer_activation_transpose.cc b/tensorflow/compiler/mlir/quantization/stablehlo/passes/defer_activation_transpose.cc index 0fdefd7342624c..08befa7708297c 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/passes/defer_activation_transpose.cc +++ b/tensorflow/compiler/mlir/quantization/stablehlo/passes/defer_activation_transpose.cc @@ -58,8 +58,8 @@ LogicalResult IsTransposeOpWithPermuation(Operation* absl_nullable op, // The Location is set as `input`'s loc. TransposeOp CreateTransposeOp(Value input, const ArrayRef permutation, PatternRewriter& rewriter) { - return rewriter.create( - input.getLoc(), input, rewriter.getDenseI64ArrayAttr(permutation)); + return TransposeOp::create(rewriter, input.getLoc(), input, + rewriter.getDenseI64ArrayAttr(permutation)); } // Defers the transpose of the left-hand side (LHS) to the right-hand side and @@ -77,7 +77,7 @@ void DeferRhsTransposeForBinaryOp(OpT op, PatternRewriter& rewriter) { /*input=*/rhs, kNchwToNhwcPermutation, rewriter); auto new_binary_op = - rewriter.create(op.getLoc(), lhs_pre_transpose, rhs_transpose_op); + OpT::create(rewriter, op.getLoc(), lhs_pre_transpose, rhs_transpose_op); // NHWC -> NCHW for the output, to match the shapes of `op`'s users. TransposeOp output_transpose_op = CreateTransposeOp( @@ -166,23 +166,22 @@ class DeferActivationTransposeForMaxPoolReduceWindowOp // Create a new `stablehlo.reduce_window` with all relevant attributes // permutated to match the new operand & result type. - auto new_reduce_window_op = - rewriter.create( - op.getLoc(), new_result_type, transpose_op.getOperand(), - /*init_value=*/op.getOperand(1), - /*window_dimensions=*/ - PermuteI64ArrayAttr(rewriter, op.getWindowDimensions(), - kNchwToNhwcPermutation), - /*window_strides=*/ - PermuteI64ArrayAttr(rewriter, op.getWindowStrides(), - kNchwToNhwcPermutation), - /*base_dilations=*/ - PermuteI64ArrayAttr(rewriter, op.getBaseDilations(), - kNchwToNhwcPermutation), - /*window_dilations=*/ - PermuteI64ArrayAttr(rewriter, op.getWindowDilations(), - kNchwToNhwcPermutation), - /*padding=*/DenseIntElementsAttr(nullptr)); + auto new_reduce_window_op = mlir::stablehlo::ReduceWindowOp::create( + rewriter, op.getLoc(), new_result_type, transpose_op.getOperand(), + /*init_value=*/op.getOperand(1), + /*window_dimensions=*/ + PermuteI64ArrayAttr(rewriter, op.getWindowDimensions(), + kNchwToNhwcPermutation), + /*window_strides=*/ + PermuteI64ArrayAttr(rewriter, op.getWindowStrides(), + kNchwToNhwcPermutation), + /*base_dilations=*/ + PermuteI64ArrayAttr(rewriter, op.getBaseDilations(), + kNchwToNhwcPermutation), + /*window_dilations=*/ + PermuteI64ArrayAttr(rewriter, op.getWindowDilations(), + kNchwToNhwcPermutation), + /*padding=*/DenseIntElementsAttr(nullptr)); // Clone the reduce body. It is not affected by the permutation. IRMapping mapping; diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/passes/fold_constant_transpose.cc b/tensorflow/compiler/mlir/quantization/stablehlo/passes/fold_constant_transpose.cc index 699f157e3d1b39..f4648f9a0a0362 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/passes/fold_constant_transpose.cc +++ b/tensorflow/compiler/mlir/quantization/stablehlo/passes/fold_constant_transpose.cc @@ -160,8 +160,8 @@ class FoldTransposedConstantOp /*elementType=*/rewriter.getF32Type()); auto new_value_attr = DenseFPElementsAttr::get(new_value_type, std::move(transposed_values)); - auto new_const_op = rewriter.create( - combined_loc, new_value_attr); + auto new_const_op = mlir::stablehlo::ConstantOp::create( + rewriter, combined_loc, new_value_attr); rewriter.replaceAllUsesWith(op, new_const_op); return success(); diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/passes/insert_calibration_statistics_saver.cc b/tensorflow/compiler/mlir/quantization/stablehlo/passes/insert_calibration_statistics_saver.cc index e855c51749e6d5..05a826b14b010a 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/passes/insert_calibration_statistics_saver.cc +++ b/tensorflow/compiler/mlir/quantization/stablehlo/passes/insert_calibration_statistics_saver.cc @@ -86,9 +86,9 @@ LogicalResult InsertCalibrationStatisticsSaverOp( ArrayAttr ids_attr = builder.getStrArrayAttr(ids); ArrayAttr calibration_methods_attr = builder.getI32ArrayAttr(calibration_methods); - builder.create( - region.getLoc(), statistics_outputs, output_file_path_attr, ids_attr, - calibration_methods_attr); + TF::CalibrationStatisticsSaverOp::create( + builder, region.getLoc(), statistics_outputs, output_file_path_attr, + ids_attr, calibration_methods_attr); return success(); } diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/passes/merge_fusion_with_dequantize.cc b/tensorflow/compiler/mlir/quantization/stablehlo/passes/merge_fusion_with_dequantize.cc index c72879c2e04a4d..71a5b35e351495 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/passes/merge_fusion_with_dequantize.cc +++ b/tensorflow/compiler/mlir/quantization/stablehlo/passes/merge_fusion_with_dequantize.cc @@ -114,20 +114,21 @@ class MergeFusionWithUniformDequantizePattern // Modify the quantized fused function to do dequantize+relu(6). rewriter.setInsertionPoint(req_op); - Value new_result = rewriter.create( - req_op.getLoc(), func_op.getResultTypes()[0], req_op.getOperand()); + Value new_result = mlir::stablehlo::UniformDequantizeOp::create( + rewriter, req_op.getLoc(), func_op.getResultTypes()[0], + req_op.getOperand()); if (func_name.contains("_relu6_")) { - auto min = rewriter.create( - req_op.getLoc(), rewriter.getF32FloatAttr(0)); - auto max = rewriter.create( - req_op.getLoc(), rewriter.getF32FloatAttr(6)); - new_result = rewriter.create( - req_op.getLoc(), min, new_result, max); + auto min = mlir::stablehlo::ConstantOp::create( + rewriter, req_op.getLoc(), rewriter.getF32FloatAttr(0)); + auto max = mlir::stablehlo::ConstantOp::create( + rewriter, req_op.getLoc(), rewriter.getF32FloatAttr(6)); + new_result = mlir::stablehlo::ClampOp::create(rewriter, req_op.getLoc(), + min, new_result, max); } else if (func_name.contains("_relu_")) { - auto min = rewriter.create( - req_op.getLoc(), rewriter.getF32FloatAttr(0)); - new_result = rewriter.create( - req_op.getLoc(), min, new_result, nullptr); + auto min = mlir::stablehlo::ConstantOp::create( + rewriter, req_op.getLoc(), rewriter.getF32FloatAttr(0)); + new_result = mlir::chlo::BroadcastMaxOp::create(rewriter, req_op.getLoc(), + min, new_result, nullptr); } return_op->setOperand(0, new_result); rewriter.eraseOp(req_op); diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/passes/nchw_convolution_to_nhwc.cc b/tensorflow/compiler/mlir/quantization/stablehlo/passes/nchw_convolution_to_nhwc.cc index 51950c5513c5df..1c425487799962 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/passes/nchw_convolution_to_nhwc.cc +++ b/tensorflow/compiler/mlir/quantization/stablehlo/passes/nchw_convolution_to_nhwc.cc @@ -76,8 +76,9 @@ class RewriteNchwConvolutionToNhwc const TensorType new_input_tensor_type = GetTransposedTensorType( mlir::cast(input.getType()), kNchwToNhwcPermutation); - auto input_transpose_op = rewriter.create( - op.getLoc(), /*resultType0=*/new_input_tensor_type, /*operand=*/input, + auto input_transpose_op = mlir::stablehlo::TransposeOp::create( + rewriter, op.getLoc(), /*resultType0=*/new_input_tensor_type, + /*operand=*/input, rewriter.getDenseI64ArrayAttr(kNchwToNhwcPermutation)); // Transpose the filter tensor: [o, i, 0, 1] => [0, 1, i, o] @@ -85,8 +86,9 @@ class RewriteNchwConvolutionToNhwc const TensorType new_filter_tensor_type = GetTransposedTensorType( mlir::cast(filter.getType()), kOihwToHwioPermutation); - auto filter_transpose_op = rewriter.create( - op.getLoc(), /*resultType0=*/new_filter_tensor_type, /*operand=*/filter, + auto filter_transpose_op = mlir::stablehlo::TransposeOp::create( + rewriter, op.getLoc(), /*resultType0=*/new_filter_tensor_type, + /*operand=*/filter, rewriter.getDenseI64ArrayAttr(kOihwToHwioPermutation)); // [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f] @@ -108,8 +110,8 @@ class RewriteNchwConvolutionToNhwc // reused without modification because the ordering of spatial dimensions // is not modified (i.e. before: [b, f, 0, 1], after: [b, 0, 1, f] => the // spatial dimension is still ordered as {0, 1}). - auto new_convolution_op = rewriter.create( - op.getLoc(), /*resultType0=*/new_conv_output_tensor_type, + auto new_convolution_op = mlir::stablehlo::ConvolutionOp::create( + rewriter, op.getLoc(), /*resultType0=*/new_conv_output_tensor_type, /*lhs=*/input_transpose_op, /*rhs=*/filter_transpose_op, /*window_strides=*/op.getWindowStridesAttr(), @@ -125,8 +127,9 @@ class RewriteNchwConvolutionToNhwc // Transpose the output of the `ConvolutionOp` back to the original op's // output shape so that users' shapes match. // [b, 0, 1, f] => [b, f, 0, 1] - auto output_transpose_op = rewriter.create( - new_convolution_op.getLoc(), /*resultType0=*/output_tensor_type, + auto output_transpose_op = mlir::stablehlo::TransposeOp::create( + rewriter, new_convolution_op.getLoc(), + /*resultType0=*/output_tensor_type, /*operand=*/new_convolution_op, rewriter.getDenseI64ArrayAttr(kNhwcToNchwPermutation)); diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/passes/prepare_quantize.cc b/tensorflow/compiler/mlir/quantization/stablehlo/passes/prepare_quantize.cc index 2102e64f223d55..4dff113b6427c9 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/passes/prepare_quantize.cc +++ b/tensorflow/compiler/mlir/quantization/stablehlo/passes/prepare_quantize.cc @@ -95,8 +95,8 @@ class MergeConsecutiveQuantizeCast q_op.getArg().getDefiningOp(); if (!preceding_qcast) return failure(); - auto new_qcast = rewriter.create( - q_op.getLoc(), q_op.getType(), preceding_qcast.getArg()); + auto new_qcast = mlir::quant::ir::QuantizeCastOp::create( + rewriter, q_op.getLoc(), q_op.getType(), preceding_qcast.getArg()); new_qcast->setAttr(kVolatileOpAttrName, rewriter.getUnitAttr()); q_op->replaceAllUsesWith(new_qcast); return success(); diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/passes/quantize_weight.cc b/tensorflow/compiler/mlir/quantization/stablehlo/passes/quantize_weight.cc index 9d0a978bdb8efc..e65d5423458f50 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/passes/quantize_weight.cc +++ b/tensorflow/compiler/mlir/quantization/stablehlo/passes/quantize_weight.cc @@ -163,8 +163,8 @@ class QuantizeWeight : public OpRewritePattern { } } rewriter.setInsertionPointAfter(op); - ConvertOp new_convert_op = rewriter.create( - op->getLoc(), new_result_type, op.getResult()); + ConvertOp new_convert_op = ConvertOp::create( + rewriter, op->getLoc(), new_result_type, op.getResult()); quantizable_op->setOperand(quantize_operand_num, new_convert_op.getResult()); } @@ -203,10 +203,10 @@ class QuantizeWeight : public OpRewritePattern { // of its number of users. rewriter.setInsertionPointAfter(op); // create new F16 constant op in that location - ConstantOp new_const = rewriter.create( - op->getLoc(), new_result_type, new_value_attr); + ConstantOp new_const = ConstantOp::create( + rewriter, op->getLoc(), new_result_type, new_value_attr); ConvertOp dcast = - rewriter.create(op->getLoc(), old_result_type, new_const); + ConvertOp::create(rewriter, op->getLoc(), old_result_type, new_const); // replace all convert ops with dq op. convert_op->replaceAllUsesWith(dcast); // Return without scanning for the next ConvertOp as only one ConvertOp is diff --git a/tensorflow/compiler/mlir/quantization/stablehlo/passes/unwrap_xla_call_module_op.cc b/tensorflow/compiler/mlir/quantization/stablehlo/passes/unwrap_xla_call_module_op.cc index ac1f5e8d705d49..46da2a3f25b82c 100644 --- a/tensorflow/compiler/mlir/quantization/stablehlo/passes/unwrap_xla_call_module_op.cc +++ b/tensorflow/compiler/mlir/quantization/stablehlo/passes/unwrap_xla_call_module_op.cc @@ -82,8 +82,8 @@ void UnwrapXlaCallModuleOp(TF::XlaCallModuleOp call_op, // TODO: b/310291615 - find a better way for multi-platform support. if (call_op_has_platform_index_arg) { arg_mapper.map(func_op.getArgument(0), - builder.create( - func_op.getLoc(), builder.getI16IntegerAttr(0))); + mhlo::ConstantOp::create(builder, func_op.getLoc(), + builder.getI16IntegerAttr(0))); } for (auto [func_arg, operand] : llvm::zip_equal( func_op.getArguments().take_back(call_op.getNumOperands()), diff --git a/tensorflow/compiler/mlir/quantization/tensorflow/calibrator/BUILD b/tensorflow/compiler/mlir/quantization/tensorflow/calibrator/BUILD index 4ec998d18bcfa3..b06568589dadf2 100644 --- a/tensorflow/compiler/mlir/quantization/tensorflow/calibrator/BUILD +++ b/tensorflow/compiler/mlir/quantization/tensorflow/calibrator/BUILD @@ -221,9 +221,9 @@ tf_cc_test( "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/status:status_matchers", "@com_google_googletest//:gtest", "@local_xla//xla/tsl/platform:errors", - "@local_xla//xla/tsl/platform:status", ], ) diff --git a/tensorflow/compiler/mlir/quantization/tensorflow/calibrator/calibration_statistics_saver_op_test.cc b/tensorflow/compiler/mlir/quantization/tensorflow/calibrator/calibration_statistics_saver_op_test.cc index fd3acb188656a1..7f8f2623b7bfa0 100644 --- a/tensorflow/compiler/mlir/quantization/tensorflow/calibrator/calibration_statistics_saver_op_test.cc +++ b/tensorflow/compiler/mlir/quantization/tensorflow/calibrator/calibration_statistics_saver_op_test.cc @@ -19,11 +19,11 @@ limitations under the License. #include #include +#include "absl/log/check.h" #include "absl/status/status_matchers.h" #include "tensorflow/compiler/mlir/quantization/stablehlo/quantization_config.pb.h" #include "tensorflow/compiler/mlir/quantization/tensorflow/calibrator/calibration_statistics.pb.h" #include "xla/tsl/platform/errors.h" -#include "xla/tsl/platform/status.h" #include "tensorflow/core/framework/node_def_builder.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.pb.h" @@ -55,11 +55,11 @@ TEST_F(CalibrationStatisticsSaverTest, MissingOutputPath) { inputs.emplace_back("min", 0, DT_FLOAT); inputs.emplace_back("max", 0, DT_FLOAT); - TF_CHECK_OK(NodeDefBuilder("op", "CalibrationStatisticsSaver") - .Input(inputs) - .Attr("ids", ids) - .Attr("calibration_methods", calibration_methods) - .Finalize(node_def())); + CHECK_OK(NodeDefBuilder("op", "CalibrationStatisticsSaver") + .Input(inputs) + .Attr("ids", ids) + .Attr("calibration_methods", calibration_methods) + .Finalize(node_def())); ASSERT_THAT(InitOp(), absl_testing::StatusIs( tsl::error::INVALID_ARGUMENT, @@ -75,12 +75,12 @@ TEST_F(CalibrationStatisticsSaverTest, WrongNumInputs) { inputs.emplace_back("min", 0, DT_FLOAT); inputs.emplace_back("max", 0, DT_FLOAT); - TF_CHECK_OK(NodeDefBuilder("op", "CalibrationStatisticsSaver") - .Input(inputs) - .Attr("ids", ids) - .Attr("calibration_methods", calibration_methods) - .Attr("output_file_path", "/tmp/statistics.pbtxt") - .Finalize(node_def())); + CHECK_OK(NodeDefBuilder("op", "CalibrationStatisticsSaver") + .Input(inputs) + .Attr("ids", ids) + .Attr("calibration_methods", calibration_methods) + .Attr("output_file_path", "/tmp/statistics.pbtxt") + .Finalize(node_def())); ASSERT_THAT(InitOp(), absl_testing::StatusIs( tsl::error::ABORTED, @@ -98,12 +98,12 @@ TEST_F(CalibrationStatisticsSaverTest, WrongInputTypes) { inputs.emplace_back("max", 0, DT_FLOAT); inputs.emplace_back("histogram", 0, DT_FLOAT); - TF_CHECK_OK(NodeDefBuilder("op", "CalibrationStatisticsSaver") - .Input(inputs) - .Attr("ids", ids) - .Attr("calibration_methods", calibration_methods) - .Attr("output_file_path", "/tmp/statistics.pbtxt") - .Finalize(node_def())); + CHECK_OK(NodeDefBuilder("op", "CalibrationStatisticsSaver") + .Input(inputs) + .Attr("ids", ids) + .Attr("calibration_methods", calibration_methods) + .Attr("output_file_path", "/tmp/statistics.pbtxt") + .Finalize(node_def())); ASSERT_THAT(InitOp(), absl_testing::StatusIs( tsl::error::ABORTED, @@ -123,24 +123,23 @@ TEST_F(CalibrationStatisticsSaverTest, SimpleMinMax) { const std::string dir = testing::TmpDir(); const std::string output_file_path = io::JoinPath(dir, "statistics.pbtxt"); - TF_CHECK_OK(NodeDefBuilder("op", "CalibrationStatisticsSaver") - .Input(inputs) - .Attr("ids", ids) - .Attr("calibration_methods", calibration_methods) - .Attr("output_file_path", output_file_path) - .Finalize(node_def())); - TF_CHECK_OK(InitOp()); + CHECK_OK(NodeDefBuilder("op", "CalibrationStatisticsSaver") + .Input(inputs) + .Attr("ids", ids) + .Attr("calibration_methods", calibration_methods) + .Attr("output_file_path", output_file_path) + .Finalize(node_def())); + CHECK_OK(InitOp()); AddInputFromArray(TensorShape({}), {1.f}); AddInputFromArray(TensorShape({}), {5.f}); AddInputFromArray(TensorShape({0}), {}); - TF_CHECK_OK(RunOpKernel()); + CHECK_OK(RunOpKernel()); kernel_.reset(); CalibrationStatisticsMap statistics_map; - TF_CHECK_OK( - ReadBinaryProto(Env::Default(), output_file_path, &statistics_map)); + CHECK_OK(ReadBinaryProto(Env::Default(), output_file_path, &statistics_map)); ASSERT_THAT(statistics_map.statistics(), SizeIs(1)); ASSERT_THAT(statistics_map.statistics(), ElementsAre(Key("1"))); @@ -163,24 +162,23 @@ TEST_F(CalibrationStatisticsSaverTest, SimpleAverageMinMax) { const std::string dir = testing::TmpDir(); const std::string output_file_path = io::JoinPath(dir, "statistics.pbtxt"); - TF_CHECK_OK(NodeDefBuilder("op", "CalibrationStatisticsSaver") - .Input(inputs) - .Attr("ids", ids) - .Attr("calibration_methods", calibration_methods) - .Attr("output_file_path", output_file_path) - .Finalize(node_def())); - TF_CHECK_OK(InitOp()); + CHECK_OK(NodeDefBuilder("op", "CalibrationStatisticsSaver") + .Input(inputs) + .Attr("ids", ids) + .Attr("calibration_methods", calibration_methods) + .Attr("output_file_path", output_file_path) + .Finalize(node_def())); + CHECK_OK(InitOp()); AddInputFromArray(TensorShape({}), {1.f}); AddInputFromArray(TensorShape({}), {5.f}); AddInputFromArray(TensorShape({0}), {}); - TF_CHECK_OK(RunOpKernel()); + CHECK_OK(RunOpKernel()); kernel_.reset(); CalibrationStatisticsMap statistics_map; - TF_CHECK_OK( - ReadBinaryProto(Env::Default(), output_file_path, &statistics_map)); + CHECK_OK(ReadBinaryProto(Env::Default(), output_file_path, &statistics_map)); ASSERT_THAT(statistics_map.statistics(), SizeIs(1)); ASSERT_THAT(statistics_map.statistics(), ElementsAre(Key("1"))); @@ -204,24 +202,23 @@ TEST_F(CalibrationStatisticsSaverTest, SimpleHistogram) { const std::string dir = testing::TmpDir(); const std::string output_file_path = io::JoinPath(dir, "statistics.pbtxt"); - TF_CHECK_OK(NodeDefBuilder("op", "CalibrationStatisticsSaver") - .Input(inputs) - .Attr("ids", ids) - .Attr("calibration_methods", calibration_methods) - .Attr("output_file_path", output_file_path) - .Finalize(node_def())); - TF_CHECK_OK(InitOp()); + CHECK_OK(NodeDefBuilder("op", "CalibrationStatisticsSaver") + .Input(inputs) + .Attr("ids", ids) + .Attr("calibration_methods", calibration_methods) + .Attr("output_file_path", output_file_path) + .Finalize(node_def())); + CHECK_OK(InitOp()); AddInputFromArray(TensorShape({}), {1.f}); AddInputFromArray(TensorShape({}), {5.f}); AddInputFromArray(TensorShape({8}), {1, 4, 6, 7, 3, 2, 1, 0}); - TF_CHECK_OK(RunOpKernel()); + CHECK_OK(RunOpKernel()); kernel_.reset(); CalibrationStatisticsMap statistics_map; - TF_CHECK_OK( - ReadBinaryProto(Env::Default(), output_file_path, &statistics_map)); + CHECK_OK(ReadBinaryProto(Env::Default(), output_file_path, &statistics_map)); ASSERT_THAT(statistics_map.statistics(), SizeIs(1)); ASSERT_THAT(statistics_map.statistics(), ElementsAre(Key("1"))); @@ -250,13 +247,13 @@ TEST_F(CalibrationStatisticsSaverTest, MultipleStats) { const std::string dir = testing::TmpDir(); const std::string output_file_path = io::JoinPath(dir, "statistics.pbtxt"); - TF_CHECK_OK(NodeDefBuilder("op", "CalibrationStatisticsSaver") - .Input(inputs) - .Attr("ids", ids) - .Attr("calibration_methods", calibration_methods) - .Attr("output_file_path", output_file_path) - .Finalize(node_def())); - TF_CHECK_OK(InitOp()); + CHECK_OK(NodeDefBuilder("op", "CalibrationStatisticsSaver") + .Input(inputs) + .Attr("ids", ids) + .Attr("calibration_methods", calibration_methods) + .Attr("output_file_path", output_file_path) + .Finalize(node_def())); + CHECK_OK(InitOp()); AddInputFromArray(TensorShape({}), {1.f}); AddInputFromArray(TensorShape({}), {5.f}); @@ -265,12 +262,11 @@ TEST_F(CalibrationStatisticsSaverTest, MultipleStats) { AddInputFromArray(TensorShape({}), {5.f}); AddInputFromArray(TensorShape({8}), {1, 4, 6, 7, 3, 2, 1, 0}); - TF_CHECK_OK(RunOpKernel()); + CHECK_OK(RunOpKernel()); kernel_.reset(); CalibrationStatisticsMap statistics_map; - TF_CHECK_OK( - ReadBinaryProto(Env::Default(), output_file_path, &statistics_map)); + CHECK_OK(ReadBinaryProto(Env::Default(), output_file_path, &statistics_map)); ASSERT_THAT(statistics_map.statistics(), SizeIs(2)); ASSERT_THAT(statistics_map.statistics(), Contains(Key("1"))); ASSERT_THAT(statistics_map.statistics(), Contains(Key("2"))); diff --git a/tensorflow/compiler/mlir/quantization/tensorflow/cc/BUILD b/tensorflow/compiler/mlir/quantization/tensorflow/cc/BUILD index 6fe5bd285f8f50..99f2d2a52e1a92 100644 --- a/tensorflow/compiler/mlir/quantization/tensorflow/cc/BUILD +++ b/tensorflow/compiler/mlir/quantization/tensorflow/cc/BUILD @@ -54,6 +54,7 @@ tf_cc_test( "//tensorflow/core/framework:tensor_testutil", "//tensorflow/core/util/tensor_bundle", "@com_google_absl//absl/cleanup", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/status", "@com_google_absl//absl/status:status_matchers", "@com_google_absl//absl/status:statusor", @@ -62,7 +63,6 @@ tf_cc_test( "@llvm-project//mlir:FuncDialect", "@llvm-project//mlir:IR", "@llvm-project//mlir:Parser", - "@local_xla//xla/tsl/platform:status", ], ) diff --git a/tensorflow/compiler/mlir/quantization/tensorflow/cc/constant_fold.cc b/tensorflow/compiler/mlir/quantization/tensorflow/cc/constant_fold.cc index 5cef40a8e77c0e..42bf32a27e7bee 100644 --- a/tensorflow/compiler/mlir/quantization/tensorflow/cc/constant_fold.cc +++ b/tensorflow/compiler/mlir/quantization/tensorflow/cc/constant_fold.cc @@ -67,7 +67,7 @@ LogicalResult FoldOperation(OpBuilder& builder, Operation* op, results.clear(); builder.setInsertionPointAfter(op); for (const auto& result_value : result_values) { - results.push_back(builder.create(op->getLoc(), result_value)); + results.push_back(TF::ConstOp::create(builder, op->getLoc(), result_value)); } return success(); } diff --git a/tensorflow/compiler/mlir/quantization/tensorflow/cc/save_variables_test.cc b/tensorflow/compiler/mlir/quantization/tensorflow/cc/save_variables_test.cc index 2fca9426c9d59f..3c5d085d7655aa 100644 --- a/tensorflow/compiler/mlir/quantization/tensorflow/cc/save_variables_test.cc +++ b/tensorflow/compiler/mlir/quantization/tensorflow/cc/save_variables_test.cc @@ -20,6 +20,7 @@ limitations under the License. #include #include "absl/cleanup/cleanup.h" +#include "absl/log/check.h" #include "absl/status/status.h" #include "absl/status/status_matchers.h" #include "absl/status/statusor.h" @@ -31,7 +32,6 @@ limitations under the License. #include "mlir/Parser/Parser.h" // from @llvm-project #include "tensorflow/compiler/mlir/tensorflow/ir/tf_dialect.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.h" -#include "xla/tsl/platform/status.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_testutil.h" #include "tensorflow/core/framework/types.pb.h" @@ -63,7 +63,7 @@ class SaveVariablesToCheckpointTest : public ::testing::Test { return absl::InternalError("Failed to create temp file."); } - TF_CHECK_OK(env_->CreateDir(tmp_dir)); + CHECK_OK(env_->CreateDir(tmp_dir)); return tmp_dir; } @@ -103,8 +103,8 @@ TEST_F(SaveVariablesToCheckpointTest, VariableSavedToCheckpoint) { const absl::Cleanup checkpoint_prefix_cleanup = [this, &checkpoint_prefix]() { int64_t undeleted_files, undeleted_dirs; - TF_CHECK_OK(env_->DeleteRecursively(*checkpoint_prefix, &undeleted_files, - &undeleted_dirs)); + CHECK_OK(env_->DeleteRecursively(*checkpoint_prefix, &undeleted_files, + &undeleted_dirs)); }; const absl::StatusOr> variable_shared_names = @@ -149,8 +149,8 @@ TEST_F(SaveVariablesToCheckpointTest, MultipleVariablesSavedToCheckpoint) { const absl::Cleanup checkpoint_prefix_cleanup = [this, &checkpoint_prefix]() { int64_t undeleted_files, undeleted_dirs; - TF_CHECK_OK(env_->DeleteRecursively(*checkpoint_prefix, &undeleted_files, - &undeleted_dirs)); + CHECK_OK(env_->DeleteRecursively(*checkpoint_prefix, &undeleted_files, + &undeleted_dirs)); }; const absl::StatusOr> variable_shared_names = @@ -186,8 +186,8 @@ TEST_F(SaveVariablesToCheckpointTest, const absl::Cleanup checkpoint_prefix_cleanup = [this, &checkpoint_prefix]() { int64_t undeleted_files, undeleted_dirs; - TF_CHECK_OK(env_->DeleteRecursively(*checkpoint_prefix, &undeleted_files, - &undeleted_dirs)); + CHECK_OK(env_->DeleteRecursively(*checkpoint_prefix, &undeleted_files, + &undeleted_dirs)); }; const absl::StatusOr> variable_shared_names = @@ -218,8 +218,8 @@ TEST_F(SaveVariablesToCheckpointTest, const absl::Cleanup checkpoint_prefix_cleanup = [this, &checkpoint_prefix]() { int64_t undeleted_files, undeleted_dirs; - TF_CHECK_OK(env_->DeleteRecursively(*checkpoint_prefix, &undeleted_files, - &undeleted_dirs)); + CHECK_OK(env_->DeleteRecursively(*checkpoint_prefix, &undeleted_files, + &undeleted_dirs)); }; EXPECT_TRUE( @@ -253,8 +253,8 @@ TEST_F(SaveVariablesToCheckpointTest, const absl::Cleanup checkpoint_prefix_cleanup = [this, &checkpoint_prefix]() { int64_t undeleted_files, undeleted_dirs; - TF_CHECK_OK(env_->DeleteRecursively(*checkpoint_prefix, &undeleted_files, - &undeleted_dirs)); + CHECK_OK(env_->DeleteRecursively(*checkpoint_prefix, &undeleted_files, + &undeleted_dirs)); }; const absl::StatusOr> variable_shared_names = @@ -293,8 +293,8 @@ TEST_F(SaveVariablesToCheckpointTest, MutableVariablesNotSaved) { const absl::Cleanup checkpoint_prefix_cleanup = [this, &checkpoint_prefix]() { int64_t undeleted_files, undeleted_dirs; - TF_CHECK_OK(env_->DeleteRecursively(*checkpoint_prefix, &undeleted_files, - &undeleted_dirs)); + CHECK_OK(env_->DeleteRecursively(*checkpoint_prefix, &undeleted_files, + &undeleted_dirs)); }; const absl::StatusOr> variable_shared_names = @@ -330,8 +330,8 @@ TEST_F(SaveVariablesToCheckpointTest, const absl::Cleanup checkpoint_prefix_cleanup = [this, &checkpoint_prefix]() { int64_t undeleted_files, undeleted_dirs; - TF_CHECK_OK(env_->DeleteRecursively(*checkpoint_prefix, &undeleted_files, - &undeleted_dirs)); + CHECK_OK(env_->DeleteRecursively(*checkpoint_prefix, &undeleted_files, + &undeleted_dirs)); }; const absl::StatusOr> variable_shared_names = @@ -371,8 +371,8 @@ TEST_F(SaveVariablesToCheckpointTest, FailsWhenDuplicateSharedName) { const absl::Cleanup checkpoint_prefix_cleanup = [this, &checkpoint_prefix]() { int64_t undeleted_files, undeleted_dirs; - TF_CHECK_OK(env_->DeleteRecursively(*checkpoint_prefix, &undeleted_files, - &undeleted_dirs)); + CHECK_OK(env_->DeleteRecursively(*checkpoint_prefix, &undeleted_files, + &undeleted_dirs)); }; EXPECT_FALSE( diff --git a/tensorflow/compiler/mlir/quantization/tensorflow/passes/cast_bf16_ops_to_f32.cc b/tensorflow/compiler/mlir/quantization/tensorflow/passes/cast_bf16_ops_to_f32.cc index 430056668af8ae..c2339fe044edd7 100644 --- a/tensorflow/compiler/mlir/quantization/tensorflow/passes/cast_bf16_ops_to_f32.cc +++ b/tensorflow/compiler/mlir/quantization/tensorflow/passes/cast_bf16_ops_to_f32.cc @@ -91,8 +91,8 @@ class CastBf16OpsToF32 : public RewritePattern { for (int i = 0; i < op->getNumOperands(); i++) { Value input = op->getOperand(i); if (getElementTypeOrSelf(input).isBF16()) { - Value f32_cast = rewriter.create( - op->getLoc(), + Value f32_cast = TF::CastOp::create( + rewriter, op->getLoc(), CloneTypeWithNewElementType(input.getType(), rewriter.getF32Type()), input); op->setOperand(i, f32_cast); @@ -108,8 +108,8 @@ class CastBf16OpsToF32 : public RewritePattern { for (Operation* user : op->getUsers()) { for (int i = 0; i < user->getNumOperands(); i++) { if (user->getOperand(i) == value) { - Value bf16_cast = rewriter.create( - user->getLoc(), + Value bf16_cast = TF::CastOp::create( + rewriter, user->getLoc(), CloneTypeWithNewElementType(value.getType(), rewriter.getBF16Type()), value); diff --git a/tensorflow/compiler/mlir/quantization/tensorflow/passes/prepare_quantize_drq.cc b/tensorflow/compiler/mlir/quantization/tensorflow/passes/prepare_quantize_drq.cc index bfbb8b45c2d80c..2ae814880fc2ff 100644 --- a/tensorflow/compiler/mlir/quantization/tensorflow/passes/prepare_quantize_drq.cc +++ b/tensorflow/compiler/mlir/quantization/tensorflow/passes/prepare_quantize_drq.cc @@ -241,10 +241,10 @@ class PrepareDRQQuantizableOp : public OpRewritePattern { } } rewriter.setInsertionPointAfter(op); - auto q = rewriter.create( - op->getLoc(), cast_type, op.getResult()); - auto dq = rewriter.create( - op->getLoc(), expressed_type, q); + auto q = mlir::quant::ir::QuantizeCastOp::create(rewriter, op->getLoc(), + cast_type, op.getResult()); + auto dq = mlir::quant::ir::DequantizeCastOp::create(rewriter, op->getLoc(), + expressed_type, q); quantize_op->setOperand(quantize_operand_num, dq.getResult()); return true; } diff --git a/tensorflow/compiler/mlir/quantization/tensorflow/utils/tf_to_xla_attribute_utils.cc b/tensorflow/compiler/mlir/quantization/tensorflow/utils/tf_to_xla_attribute_utils.cc index 2ef9eeecc7bc2d..0c42b760557c51 100644 --- a/tensorflow/compiler/mlir/quantization/tensorflow/utils/tf_to_xla_attribute_utils.cc +++ b/tensorflow/compiler/mlir/quantization/tensorflow/utils/tf_to_xla_attribute_utils.cc @@ -32,8 +32,8 @@ namespace { Value GetDimValue(OpBuilder &builder, Location loc, Value shape_value, int32_t dim) { Type attribute_type = builder.getI64Type(); - return builder.create( - loc, + return TF::StridedSliceOp::create( + builder, loc, RankedTensorType::get( {}, mlir::cast(shape_value.getType()).getElementType()), /*input=*/shape_value, @@ -60,16 +60,16 @@ void GetSamePaddingValues(OpBuilder &builder, Location loc, Value input_size, Type int32_scalar_type = zero.getType(); auto scalar_add = [&](Value lhs, Value rhs) { - return builder.create(loc, int32_scalar_type, lhs, rhs); + return TF::AddOp::create(builder, loc, int32_scalar_type, lhs, rhs); }; auto scalar_mul = [&](Value lhs, Value rhs) { - return builder.create(loc, int32_scalar_type, lhs, rhs); + return TF::MulOp::create(builder, loc, int32_scalar_type, lhs, rhs); }; auto scalar_sub = [&](Value lhs, Value rhs) { - return builder.create(loc, int32_scalar_type, lhs, rhs); + return TF::SubOp::create(builder, loc, int32_scalar_type, lhs, rhs); }; auto scalar_div = [&](Value lhs, Value rhs) { - return builder.create(loc, int32_scalar_type, lhs, rhs); + return TF::DivOp::create(builder, loc, int32_scalar_type, lhs, rhs); }; // effective_filter_size = (filter_size - 1) * dilation_rate + 1 @@ -90,7 +90,7 @@ void GetSamePaddingValues(OpBuilder &builder, Location loc, Value input_size, scalar_add(effective_filter_size_op, scalar_mul(stride_value, scalar_sub(output_size, one))), input_size); - padding_needed = builder.create(loc, padding_needed, zero); + padding_needed = TF::MaximumOp::create(builder, loc, padding_needed, zero); padding_low = scalar_div(padding_needed, two); padding_high = scalar_sub(padding_needed, padding_low); } @@ -104,14 +104,15 @@ Value PadForDynamicShapedInputSamePadding( auto reshape_op = [&](Value value, const SmallVector &shape) { const int64_t rank = shape.size(); - return builder.create( - loc, RankedTensorType::get(shape, builder.getI32Type()), value, + return TF::ReshapeOp::create( + builder, loc, RankedTensorType::get(shape, builder.getI32Type()), value, CreateConstValue(builder, loc, {rank}, shape)); }; ShapedType filter_shape = mlir::cast(filter.getType()); - Value input_shape_value = builder.create( - loc, RankedTensorType::get({num_dims}, builder.getI32Type()), input); + Value input_shape_value = TF::ShapeOp::create( + builder, loc, RankedTensorType::get({num_dims}, builder.getI32Type()), + input); auto scalar_to_rank1 = [&](Value value) { return reshape_op(value, {1}); }; for (int i : llvm::seq(1, num_dims - 1)) { Value input_size_i = GetDimValue(builder, loc, input_shape_value, i); @@ -131,12 +132,12 @@ Value PadForDynamicShapedInputSamePadding( builder, loc, /*shape=*/{num_dims - 2, 2}, /*values=*/SmallVector(2 * (num_dims - 2), 0)); Value zero = CreateScalarConstValue(builder, loc, 0); - Value temp_padding_rank1 = builder.create( - loc, RankedTensorType::get({2 * num_dims}, builder.getI32Type()), zero, - temp_padding_values); + Value temp_padding_rank1 = TF::ConcatOp::create( + builder, loc, RankedTensorType::get({2 * num_dims}, builder.getI32Type()), + zero, temp_padding_values); Value temp_padding = reshape_op(temp_padding_rank1, {num_dims, 2}); - return builder.create( - loc, input.getType(), input, temp_padding, + return TF::PadV2Op::create( + builder, loc, input.getType(), input, temp_padding, CreateScalarConstValue(builder, loc, input_zp_value)); } @@ -224,9 +225,9 @@ Value CalculatePaddingAndPadIfNeeded(OpBuilder &builder, Location loc, output_shape[i] += padding_values[2 * i] + padding_values[2 * i + 1]; } - return builder.create( - loc, RankedTensorType::get(output_shape, builder.getI8Type()), input, - temp_padding, + return TF::PadV2Op::create( + builder, loc, RankedTensorType::get(output_shape, builder.getI8Type()), + input, temp_padding, CreateScalarConstValue(builder, loc, input_zp_value)); } @@ -254,7 +255,7 @@ Value PackOperand(OpBuilder &builder, Location loc, Value value, int pack_dim) { value_type.getShape().end()); RankedTensorType shape_type = RankedTensorType::get({rank}, builder.getI64Type()); - Value shape_value = builder.create(loc, shape_type, value); + Value shape_value = TF::ShapeOp::create(builder, loc, shape_type, value); // It is guaranteed that packed_shape[pack_dim] is known. if (packed_shape[pack_dim] % 2 != 0) { @@ -263,14 +264,14 @@ Value PackOperand(OpBuilder &builder, Location loc, Value value, int pack_dim) { padding[pack_dim * 2 + 1] = 1; Value padding_value = CreateConstValue(builder, loc, {rank, 2}, padding); - value = builder.create( - loc, RankedTensorType::get(packed_shape, builder.getI8Type()), value, - padding_value, CreateScalarConstValue(builder, loc, 0)); + value = TF::PadV2Op::create( + builder, loc, RankedTensorType::get(packed_shape, builder.getI8Type()), + value, padding_value, CreateScalarConstValue(builder, loc, 0)); SmallVector shape_add(rank, 0); shape_add[pack_dim] = 1; - shape_value = builder.create( - loc, shape_type, shape_value, + shape_value = TF::AddOp::create( + builder, loc, shape_type, shape_value, CreateConstValue(builder, loc, {rank}, shape_add)); } packed_shape[pack_dim] /= 2; @@ -279,17 +280,17 @@ Value PackOperand(OpBuilder &builder, Location loc, Value value, int pack_dim) { RankedTensorType packed_output_type = RankedTensorType::get(packed_shape, builder.getI8Type()); - Value packed_shape_value = builder.create( - loc, shape_type, shape_value, + Value packed_shape_value = TF::DivOp::create( + builder, loc, shape_type, shape_value, CreateConstValue(builder, loc, {rank}, divisor)); Value packed_low_begin_value = CreateConstValue( builder, loc, {rank}, SmallVector(rank, 0)); Value packed_low_value = - builder.create(loc, packed_output_type, value, - packed_low_begin_value, packed_shape_value); - packed_low_value = builder.create( - loc, packed_output_type, packed_low_value, + TF::SliceOp::create(builder, loc, packed_output_type, value, + packed_low_begin_value, packed_shape_value); + packed_low_value = TF::BitwiseAndOp::create( + builder, loc, packed_output_type, packed_low_value, CreateScalarConstValue(builder, loc, 0x0F)); SmallVector packed_high_begin(rank, 0); @@ -297,14 +298,14 @@ Value PackOperand(OpBuilder &builder, Location loc, Value value, int pack_dim) { Value packed_high_begin_value = CreateConstValue(builder, loc, {rank}, packed_high_begin); Value packed_high_value = - builder.create(loc, packed_output_type, value, - packed_high_begin_value, packed_shape_value); - packed_high_value = builder.create( - loc, packed_output_type, packed_high_value, + TF::SliceOp::create(builder, loc, packed_output_type, value, + packed_high_begin_value, packed_shape_value); + packed_high_value = TF::LeftShiftOp::create( + builder, loc, packed_output_type, packed_high_value, CreateScalarConstValue(builder, loc, 4)); - Operation *packed = builder.create( - loc, packed_output_type, packed_low_value, packed_high_value); + Operation* packed = TF::BitwiseOrOp::create( + builder, loc, packed_output_type, packed_low_value, packed_high_value); return ConstantFoldOpIfPossible(packed).front(); } diff --git a/tensorflow/compiler/mlir/stablehlo/transforms/legalize_tf_patterns.td b/tensorflow/compiler/mlir/stablehlo/transforms/legalize_tf_patterns.td index 24b1d05bce9735..ce91055db9c666 100644 --- a/tensorflow/compiler/mlir/stablehlo/transforms/legalize_tf_patterns.td +++ b/tensorflow/compiler/mlir/stablehlo/transforms/legalize_tf_patterns.td @@ -40,7 +40,7 @@ def CastValueToI64: NativeCodeCall< "CastValueToI64($0.getLoc(), $1, &$_builder)">; def CastValueToElementType: NativeCodeCall< - "$_builder.create($0.getLoc(), $1, " + "ConvertOp::create($_builder, $0.getLoc(), $1, " "getElementTypeOrSelf($2.getType()))">; // Here, $0 is an ElementsAttr with exactly one element of type integer. $1 is diff --git a/tensorflow/compiler/mlir/stablehlo/transforms/utils.cc b/tensorflow/compiler/mlir/stablehlo/transforms/utils.cc index d440f20e6d9779..f963d7a9c8dcb1 100644 --- a/tensorflow/compiler/mlir/stablehlo/transforms/utils.cc +++ b/tensorflow/compiler/mlir/stablehlo/transforms/utils.cc @@ -27,14 +27,14 @@ namespace odml { mhlo::ConstantOp GetScalarConstOfType(Type ty, Location loc, int64_t raw_value, OpBuilder* builder) { - return builder->create(loc, - hlo::getScalarOfType(ty, raw_value)); + return mhlo::ConstantOp::create(*builder, loc, + hlo::getScalarOfType(ty, raw_value)); } mhlo::ConstantOp GetScalarNegZeroOfType(Type ty, Location loc, OpBuilder* builder) { - return builder->create(loc, - hlo::getScalarNegZeroOfType(ty)); + return mhlo::ConstantOp::create(*builder, loc, + hlo::getScalarNegZeroOfType(ty)); } DenseIntElementsAttr GetI64ElementsAttr(ArrayAttr attr) { diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD index cbd6bc3b283504..494c23f1344e9b 100644 --- a/tensorflow/compiler/mlir/tensorflow/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/BUILD @@ -298,9 +298,11 @@ cc_library( "//tensorflow/compiler/mlir/tensorflow/transforms:tensorflow_canonicalize_inc_gen", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/log:check", "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:string_view", "@llvm-project//llvm:Support", "@llvm-project//mlir:BytecodeOpInterface", "@llvm-project//mlir:CallOpInterfaces", @@ -350,6 +352,8 @@ cc_library( "//tensorflow/compiler/mlir/tensorflow/transforms:tensorflow_canonicalize_inc_gen", "//tensorflow/core:framework", "//tensorflow/core:lib", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings", "@llvm-project//llvm:Support", "@llvm-project//mlir:BytecodeOpInterface", "@llvm-project//mlir:CallOpInterfaces", @@ -399,6 +403,7 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core/common_runtime:inline_function_utils", "//tensorflow/core/common_runtime:lower_function_call_inline_policy", + "@com_google_absl//absl/status", "@com_google_absl//absl/strings", "@llvm-project//llvm:Support", "@llvm-project//mlir:BytecodeOpInterface", @@ -502,6 +507,9 @@ cc_library( "//tensorflow/core/ir:Dialect", "//tensorflow/core/ir/types:Dialect", "//tensorflow/core/platform:logging", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/log", + "@com_google_absl//absl/log:check", "@llvm-project//llvm:Support", "@llvm-project//mlir:Analysis", "@llvm-project//mlir:ArithDialect", @@ -535,6 +543,7 @@ tf_cc_test( "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core/platform:test", + "@com_google_googletest//:gtest_main", "@llvm-project//mlir:FuncDialect", "@llvm-project//mlir:IR", "@llvm-project//mlir:Parser", @@ -565,10 +574,11 @@ cc_library( "//tensorflow/core:framework_internal", "//tensorflow/core:protos_all_cc", "//tensorflow/core:session_options", - "//tensorflow/core/common_runtime:threadpool_device", "//tensorflow/core/platform:errors", "//tensorflow/core/platform:status", "//tensorflow/core/platform:threadpool_options", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status", "@com_google_absl//absl/strings", "@llvm-project//llvm:Support", ], @@ -905,7 +915,6 @@ cc_library( "//tensorflow/core/platform:status", "//tensorflow/core/util:managed_stack_trace", "@com_google_absl//absl/status", - "@com_google_absl//absl/strings:string_view", "@llvm-project//mlir:IR", "@llvm-project//mlir:Support", "@local_xla//xla/mlir/utils:error_util", @@ -949,10 +958,8 @@ tf_cc_test( "//tensorflow/core:lib", "//tensorflow/core:test", "//tensorflow/core:test_main", - "@llvm-project//llvm:Support", "@llvm-project//mlir:IR", "@local_xla//xla/hlo/testlib:test", - "@local_xla//xla/mlir/utils:error_util", ], ) @@ -1305,8 +1312,10 @@ tf_cc_test( "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core/platform:test", - "@llvm-project//llvm:Support", - "@llvm-project//mlir:IR", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:string_view", ], ) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_arith_ops_folder.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_arith_ops_folder.cc index 9a78a1a83ae214..a41e81b0bda21a 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_arith_ops_folder.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_arith_ops_folder.cc @@ -15,6 +15,8 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/ir/tf_arith_ops_folder.h" +#include + #include "llvm/Support/FormatVariadic.h" #include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/BuiltinTypes.h" // from @llvm-project diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc index 19a988827bdf42..f6ce8d327a8874 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc @@ -22,6 +22,7 @@ limitations under the License. #include #include +#include "absl/log/check.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc index e8d0ea525943fd..db85471f6ed6aa 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h" #include +#include #include #include "llvm/ADT/ArrayRef.h" diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index ecd4f7560c359a..931fb51426257b 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -11497,6 +11497,26 @@ representation of that entry. TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } +def TF_PostProcessPredictionOp : TF_Op<"PostProcessPrediction", []> { + let summary = [{ +Performs post-processing on prediction inputs. This op has no tensor outputs. + }]; + + let description = [{ +Send an rpc to the external service that builds rpc payload based on prediction result. + }]; + + let arguments = (ins + Arg:$steps, + Arg:$gaia_id, + Arg:$video_id, + + StrAttr:$op_config + ); + + let results = (outs); +} + def TF_PowOp : TF_Op<"Pow", [Pure, ResultsBroadcastableShape, TF_SameOperandsAndResultElementTypeResolveRef]>, WithBroadcastableBinOpBuilder { let summary = "Computes the power of one value to another."; diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.cc index 2cc385794122a2..60a3ea3abdc10c 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.cc @@ -15,6 +15,8 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.h" +#include + namespace mlir { namespace TF { diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc index f432b6b1f612f8..160413009efb3a 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc @@ -15,16 +15,10 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" -#include -#include #include -#include -#include -#include -#include -#include #include +#include "absl/status/status.h" #include "absl/strings/str_cat.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc index 6382f325a47505..a0fefadca96559 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc @@ -29,6 +29,7 @@ limitations under the License. #include "absl/log/check.h" #include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_layout_helper.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_layout_helper.cc index b3ce501c1c08d1..02105ad8cfc210 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_layout_helper.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_layout_helper.cc @@ -15,6 +15,10 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops_layout_helper.h" +#include +#include +#include + #include "mlir/Support/LLVM.h" // from @llvm-project namespace mlir { diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc index 23683673fe189a..1d9a4fecfab4cf 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc @@ -30,6 +30,7 @@ limitations under the License. #include #include +#include "absl/algorithm/container.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" @@ -90,6 +91,7 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/utils/convert_type.h" #include "tensorflow/compiler/mlir/tensorflow/utils/dynamic_shape_utils.h" #include "tensorflow/compiler/mlir/tensorflow/utils/side_effect_analysis_util.h" +#include "tensorflow/core/framework/types.pb.h" namespace mlir { namespace TF { diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_tensor_helper.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_tensor_helper.cc index ca8f27a1489c06..0b13f1791c7717 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_tensor_helper.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_tensor_helper.cc @@ -15,6 +15,8 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops_tensor_helper.h" +#include + #include "mlir/Dialect/Traits.h" // from @llvm-project #include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/Matchers.h" // from @llvm-project diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.cc index 7419149074fb8a..1764aa1124059c 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.cc @@ -15,16 +15,12 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.h" -#include #include -#include -#include -#include #include #include -#include -#include +#include "absl/status/status.h" +#include "absl/strings/str_cat.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc index 45717471e373a2..74af6e58fb2294 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc @@ -16,7 +16,10 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.h" #include +#include +#include "absl/algorithm/container.h" +#include "absl/log/log.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model_test.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model_test.cc index 48cfb26d6802b9..1c53c8296a4a17 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model_test.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model_test.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.h" +#include #include "mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project #include "mlir/IR/Block.h" // from @llvm-project #include "mlir/IR/MLIRContext.h" // from @llvm-project diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_verifiers.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_verifiers.cc index d6d22098666ffe..adf055365a9c56 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_verifiers.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_verifiers.cc @@ -15,6 +15,8 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/ir/tf_verifiers.h" +#include + #include "mlir/Support/LLVM.h" // from @llvm-project #include "tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.h" diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tpu_embedding_ops_registry.cc b/tensorflow/compiler/mlir/tensorflow/ir/tpu_embedding_ops_registry.cc index 5921efa20969b2..93c33e9799a298 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tpu_embedding_ops_registry.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tpu_embedding_ops_registry.cc @@ -15,8 +15,6 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/ir/tpu_embedding_ops_registry.h" -#include - namespace mlir { namespace TF { diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/BUILD b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/BUILD index e052268b6ede98..162a597ef7c40e 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/BUILD @@ -294,7 +294,6 @@ glob_lit_tests( default_tags = [ "no_mac", # TODO(b/191167848) "no_oss", # TODO(b/190855110) - "cuda-only", ], driver = "@llvm-project//mlir:run_lit.sh", exclude = [ diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/cluster_formation.cc b/tensorflow/compiler/mlir/tensorflow/transforms/cluster_formation.cc index 93d31b884732c1..2beec1bcd87944 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/cluster_formation.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/cluster_formation.cc @@ -107,8 +107,8 @@ void BuildLaunchForCluster(const TF::Cluster& c, OpBuilder* builder) { // as operand. OpBuilder return_builder(builder->getContext()); return_builder.setInsertionPointToEnd(block); - return_builder.create(return_builder.getUnknownLoc(), - live_outs); + tf_device::ReturnOp::create(return_builder, return_builder.getUnknownLoc(), + live_outs); llvm::SmallVector live_out_types; live_out_types.reserve(live_outs.size()); @@ -116,8 +116,8 @@ void BuildLaunchForCluster(const TF::Cluster& c, OpBuilder* builder) { live_out_types.emplace_back(v.getType()); } - tf_device::LaunchOp launch_op = builder->create( - builder->getUnknownLoc(), builder->getStringAttr(c.target), + tf_device::LaunchOp launch_op = tf_device::LaunchOp::create( + *builder, builder->getUnknownLoc(), builder->getStringAttr(c.target), live_out_types); // Attach the region to launch_op. diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/cluster_ops_by_policy.cc b/tensorflow/compiler/mlir/tensorflow/transforms/cluster_ops_by_policy.cc index beee1afb1a129e..9158ecc6f7fcd7 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/cluster_ops_by_policy.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/cluster_ops_by_policy.cc @@ -575,7 +575,7 @@ tf_device::ClusterOp CreateClusterOp(Cluster &cluster, StringAttr policy) { OpBuilder builder(back); auto cluster_op = - builder.create(loc, return_types, policy); + tf_device::ClusterOp::create(builder, loc, return_types, policy); // Create block in cluster_op's region and move 'cluster.operations' into // it. @@ -585,7 +585,7 @@ tf_device::ClusterOp CreateClusterOp(Cluster &cluster, StringAttr policy) { // Add 'tf_device::ReturnOp' at the end of the block. builder.setInsertionPointToEnd(block); - builder.create(loc, return_values.getArrayRef()); + tf_device::ReturnOp::create(builder, loc, return_values.getArrayRef()); // Set device attribute if (auto device = back->getAttr(kDeviceAttr)) diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/convert_control_to_data_outputs.cc b/tensorflow/compiler/mlir/tensorflow/transforms/convert_control_to_data_outputs.cc index d63ace094451a6..ea7dce395d84d9 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/convert_control_to_data_outputs.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/convert_control_to_data_outputs.cc @@ -370,7 +370,7 @@ void AppendFunctionResults(func::FuncOp func, int num_resources, // function. OpBuilder builder(graph_op); auto new_graph_op = - builder.create(graph_op.getLoc(), new_result_types); + GraphOp::create(builder, graph_op.getLoc(), new_result_types); new_graph_op.getRegion().takeBody(graph_op.getRegion()); graph_op->replaceAllUsesWith( new_graph_op->getResults().drop_back(num_resources)); @@ -388,14 +388,15 @@ IslandOp CreateIsland(Operation* sub_op, ValueRange control_inputs, OpBuilder builder) { assert(sub_op); auto control_type = ControlType::get(builder.getContext()); - auto island = builder.create( - sub_op->getLoc(), sub_op->getResultTypes(), control_type, control_inputs); + auto island = + IslandOp::create(builder, sub_op->getLoc(), sub_op->getResultTypes(), + control_type, control_inputs); island.getBody().push_back(new Block); Block* block = &island.getBody().back(); builder.setInsertionPointToEnd(block); sub_op->replaceAllUsesWith(island.getOutputs()); sub_op->moveBefore(block, block->begin()); - builder.create(sub_op->getLoc(), sub_op->getResults()); + YieldOp::create(builder, sub_op->getLoc(), sub_op->getResults()); return island; } @@ -429,12 +430,12 @@ void ChainResourceOps( // Create chain source and sink identity islands for current equivalence // class. auto chain_arg = func.getArgument(chain_index++); - auto src_identity = builder_chain_src.create( - chain_arg.getLoc(), chain_arg.getType(), chain_arg); + auto src_identity = TF::IdentityOp::create( + builder_chain_src, chain_arg.getLoc(), chain_arg.getType(), chain_arg); auto chain_src_island = CreateIsland(src_identity, {}, builder_chain_src); - auto sink_identity = builder_chain_sink.create( - chain_arg.getLoc(), chain_arg.getType(), chain_arg); + auto sink_identity = TF::IdentityOp::create( + builder_chain_sink, chain_arg.getLoc(), chain_arg.getType(), chain_arg); auto chain_sink_island = CreateIsland(sink_identity, {}, builder_chain_sink); @@ -477,7 +478,7 @@ void ChainResourceOps( IslandOp GetDummyConstant(OpBuilder builder, ShapedType const_type, Location loc) { DenseIntElementsAttr val = DenseIntElementsAttr::get(const_type, 1); - auto const_op = builder.create(loc, val); + auto const_op = TF::ConstOp::create(builder, loc, val); auto const_island = CreateIsland(const_op, {}, builder); return const_island; } @@ -506,8 +507,9 @@ TF::WhileOp RewriteWhileOp(TF::WhileOp while_op, int num_resource_inputs, } // Replace old while op with new while op. - auto new_while_op = builder.create( - while_op.getLoc(), new_result_types, new_operands, while_op->getAttrs()); + auto new_while_op = + TF::WhileOp::create(builder, while_op.getLoc(), new_result_types, + new_operands, while_op->getAttrs()); auto new_while_wrapper = CreateIsland(new_while_op, while_wrapper.getControlInputs(), builder); for (auto result : while_wrapper.getOutputs()) { diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.cc b/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.cc index 144bdb44018649..cda422d0d9938e 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.cc @@ -137,12 +137,12 @@ class DecomposeRngReadAndSkipOp : public RewritePattern { // Read the state value from the resource. Value state = - rewriter.create(loc, res_type, rng_op.getResource()); + ReadVariableOp::create(rewriter, loc, res_type, rng_op.getResource()); // Extract the key and counter from the state. RankedTensorType word_type = RankedTensorType::get({}, state_element_type); - auto unpacked = rewriter.create( - loc, SmallVector(state_size, word_type), state, 0); + auto unpacked = UnpackOp::create( + rewriter, loc, SmallVector(state_size, word_type), state, 0); Value key = unpacked.getResult(counter_size); SmallVector counter; @@ -153,39 +153,40 @@ class DecomposeRngReadAndSkipOp : public RewritePattern { // Set the increment to 256 * delta. Type u64 = rewriter.getIntegerType(64, /*isSigned=*/false); RankedTensorType u64_scalar = RankedTensorType::get({}, u64); - Value step_size = rewriter.create(loc, GetScalarOfType(u64, 256)); + Value step_size = ConstOp::create(rewriter, loc, GetScalarOfType(u64, 256)); Value increment = - rewriter.create(loc, u64_scalar, step_size, rng_op.getDelta()); + MulOp::create(rewriter, loc, u64_scalar, step_size, rng_op.getDelta()); // Increment the counter. SmallVector pack_args; RankedTensorType word_u64_type = RankedTensorType::get({}, u64); - Value zero_u64 = rewriter.create(loc, GetScalarOfType(u64, 0)); - Value one_u64 = rewriter.create(loc, GetScalarOfType(u64, 1)); + Value zero_u64 = ConstOp::create(rewriter, loc, GetScalarOfType(u64, 0)); + Value one_u64 = ConstOp::create(rewriter, loc, GetScalarOfType(u64, 1)); for (int i = 0; i < counter_size; ++i) { Value word = counter[i]; - Value word_u64 = rewriter.create(loc, word_u64_type, word); - Value new_word_u64 = rewriter.create(loc, word_u64, increment); - Value new_word = rewriter.create(loc, word_type, new_word_u64); + Value word_u64 = CastOp::create(rewriter, loc, word_u64_type, word); + Value new_word_u64 = AddV2Op::create(rewriter, loc, word_u64, increment); + Value new_word = CastOp::create(rewriter, loc, word_type, new_word_u64); pack_args.push_back(new_word); - Value overflow = rewriter.create(loc, new_word_u64, word_u64); - increment = rewriter.create(loc, overflow, one_u64, zero_u64); + Value overflow = LessOp::create(rewriter, loc, new_word_u64, word_u64); + increment = + SelectV2Op::create(rewriter, loc, overflow, one_u64, zero_u64); } // Save the new state value to the resource. pack_args.push_back(key); - Value new_state = rewriter.create(loc, res_type, pack_args); - rewriter.create(loc, rng_op.getResource(), new_state); + Value new_state = PackOp::create(rewriter, loc, res_type, pack_args); + AssignVariableOp::create(rewriter, loc, rng_op.getResource(), new_state); // Pad the original state as necessary to fill the output shape. int pad = tensorflow::RNG_MAX_COUNTER_SIZE - counter_size; Type i64 = rewriter.getI64Type(); RankedTensorType paddings_ty = RankedTensorType::get({1, 2}, i64); std::vector paddings_values = {0, pad}; - Value paddings = rewriter.create( - loc, DenseIntElementsAttr::get(paddings_ty, paddings_values)); - Value output = rewriter.create(loc, op_type, state, paddings); + Value paddings = ConstOp::create( + rewriter, loc, DenseIntElementsAttr::get(paddings_ty, paddings_values)); + Value output = PadOp::create(rewriter, loc, op_type, state, paddings); rewriter.replaceOp(op, output); return success(); diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.td b/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.td index 1fc666da4a8d95..9130ae844bc6b9 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.td @@ -27,7 +27,7 @@ def EmptyList: NativeCodeCall<"llvm::SmallVector{}">; // Creates a tf.ReadVariable op that reads a resource `$2` that has the same // element type as `$1`. The op created will use location of `$0`. def CreateTFReadVariableOp : NativeCodeCall< - "$_builder.create(" + "TF::ReadVariableOp::create($_builder, " " $0.getLoc()," " GetResourceSubtypeOrDefault(" " $2, llvm::cast($1.getType()).getElementType())," @@ -39,19 +39,19 @@ def CheckHasResourceSubtype : Constraint>; def CreateConstBoolAttrFalse : NativeCodeCall<"$_builder.getBoolAttr(false)">; def CreateTensorScatterAddOp : NativeCodeCall< - "$_builder.create(" + "TF::TensorScatterAddOp::create($_builder, " "$0.getLoc(), $0.getType(), $0, $1, $2, $_builder.getStringAttr(\"\"))">; def CreateTensorScatterUpdateOp : NativeCodeCall< - "$_builder.create(" + "TF::TensorScatterUpdateOp::create($_builder, " "$0.getLoc(), $0.getType(), $0, $1, $2, $_builder.getStringAttr(\"\"))">; def CreateTFReadVariableOpFromResourceHandle : NativeCodeCall< - "$_builder.create(" + "TF::ReadVariableOp::create($_builder, " "$0.getLoc(), GetResourceSubtype($1), $1)">; def CreateTFSelectOp: NativeCodeCall< - "$_builder.create(" + "TF::SelectOp::create($_builder, " "$0.getLoc(), $3.getType(), $1, $2, $3)">; def ConstAttrIfThenElse: NativeCodeCall< @@ -59,7 +59,7 @@ def ConstAttrIfThenElse: NativeCodeCall< // Convert clamp(lo, x, hi) to clipbyvalue(x, lo, hi). def Clamp: NativeCodeCall< - "$_builder.create(" + "TF::ClipByValueOp::create($_builder, " " $0.getLoc()," " $2.getType(), $2, $1, $3)">; diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/einsum.cc b/tensorflow/compiler/mlir/tensorflow/transforms/einsum.cc index 954c318b416150..73dc7802c7d56d 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/einsum.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/einsum.cc @@ -61,7 +61,7 @@ namespace { ConstOp createI32ConstOp(int32_t value, Location loc, PatternRewriter* rewriter) { auto int_attr = IntegerAttr::get(rewriter->getIntegerType(32), value); - return rewriter->create(loc, int_attr); + return ConstOp::create(*rewriter, loc, int_attr); } // Creates ConstantOp for array of int32_t. @@ -70,7 +70,7 @@ arith::ConstantOp createI32ConstantOp(llvm::ArrayRef values, auto values_type = RankedTensorType::get( {static_cast(values.size())}, rewriter->getIntegerType(32)); auto constant_attr = rewriter->getI32TensorAttr(values); - return rewriter->create(loc, values_type, constant_attr); + return arith::ConstantOp::create(*rewriter, loc, values_type, constant_attr); } // Creates ConstantOp for array of int64_t. @@ -79,7 +79,7 @@ arith::ConstantOp createI64ConstantOp(llvm::ArrayRef values, auto values_type = RankedTensorType::get( {static_cast(values.size())}, rewriter->getIntegerType(64)); auto constant_attr = rewriter->getI64TensorAttr(values); - return rewriter->create(loc, values_type, constant_attr); + return arith::ConstantOp::create(*rewriter, loc, values_type, constant_attr); } // Function to create a tf.SumOp to sum the element in 'value' reduced along the @@ -98,8 +98,9 @@ TF::SumOp createSumOp(Value value, Location loc, sum_shape.push_back(shape[i]); } } - return rewriter->create( - loc, RankedTensorType::get(sum_shape, value_type.getElementType()), value, + return TF::SumOp::create( + *rewriter, loc, + RankedTensorType::get(sum_shape, value_type.getElementType()), value, redux_op); } @@ -115,8 +116,8 @@ TF::TransposeOp createTransposeOp(Value value, Location loc, } auto transposed_type = RankedTensorType::get(transposed_shape, value_type.getElementType()); - return rewriter->create(loc, transposed_type, value, - perm_op); + return TF::TransposeOp::create(*rewriter, loc, transposed_type, value, + perm_op); } TF::ReshapeOp createReshapeOp(Value value, ArrayRef shape, @@ -125,8 +126,8 @@ TF::ReshapeOp createReshapeOp(Value value, ArrayRef shape, auto shape_tensor = createI64ConstantOp( tensorflow::ConvertMlirShapeToTF(shape), loc, rewriter); Type resultType = RankedTensorType::get(shape, element_type); - return rewriter->create(loc, resultType, /*tensor=*/value, - /*shape=*/shape_tensor); + return TF::ReshapeOp::create(*rewriter, loc, resultType, /*tensor=*/value, + /*shape=*/shape_tensor); } // Creates ReshapeOp with runtime calcuation of required shape to support @@ -140,7 +141,7 @@ TF::ReshapeOp createReshapeOpForDynamic(Value value, ArrayRef shape, PatternRewriter* rewriter) { // Build ShapeOp auto input_shape = - rewriter->create(loc, value, rewriter->getBoolAttr(true)); + TF::ShapeOp::create(*rewriter, loc, value, rewriter->getBoolAttr(true)); // Build UnsortedSegmentProdOp Type segProdresultType = @@ -148,16 +149,16 @@ TF::ReshapeOp createReshapeOpForDynamic(Value value, ArrayRef shape, auto segids_tensor = createI32ConstantOp(reshape_segids, loc, rewriter); auto num_reshape_segids_tensor = createI32ConstOp(num_reshape_segids, loc, rewriter); - auto segprod = rewriter->create( - loc, segProdresultType, input_shape->getResults()[0], segids_tensor, - num_reshape_segids_tensor); + auto segprod = TF::UnsortedSegmentProdOp::create( + *rewriter, loc, segProdresultType, input_shape->getResults()[0], + segids_tensor, num_reshape_segids_tensor); // Build ReshapeOp with the result of UnsortedSegmentProdOp. Type out_tensor_type = RankedTensorType::get(shape, getElementTypeOrSelf(value.getType())); - return rewriter->create(loc, out_tensor_type, - /*tensor=*/value, - /*shape=*/segprod->getResults()[0]); + return TF::ReshapeOp::create(*rewriter, loc, out_tensor_type, + /*tensor=*/value, + /*shape=*/segprod->getResults()[0]); } struct EinsumDimensionNumbers { @@ -178,8 +179,8 @@ TF::ReshapeOp createOutputReshapeOpForDynamic( EinsumDimensionNumbers& dnums, Location loc, PatternRewriter* rewriter) { BoolAttr true_attr = rewriter->getBoolAttr(true); // Build ShapeOp - auto shape_lhs = rewriter->create(loc, org_lhs, true_attr); - auto shape_rhs = rewriter->create(loc, org_rhs, true_attr); + auto shape_lhs = TF::ShapeOp::create(*rewriter, loc, org_lhs, true_attr); + auto shape_rhs = TF::ShapeOp::create(*rewriter, loc, org_rhs, true_attr); std::vector bl_index; // Indexes of B0,...,Bn and L0,...,Ln bl_index.reserve(dnums.lhs_rhs_out.size() + dnums.lhs_out.size()); @@ -196,20 +197,20 @@ TF::ReshapeOp createOutputReshapeOpForDynamic( } auto lhs_index_tensor = createI32ConstantOp(bl_index, loc, rewriter); - auto gather_lhs = rewriter->create( - loc, + auto gather_lhs = TF::GatherOp::create( + *rewriter, loc, RankedTensorType::get({static_cast(bl_index.size())}, rewriter->getIntegerType(32)), shape_lhs->getResults()[0], lhs_index_tensor->getResults()[0], true_attr); auto rhs_index_tensor = createI32ConstantOp(r_index, loc, rewriter); - auto gather_rhs = rewriter->create( - loc, + auto gather_rhs = TF::GatherOp::create( + *rewriter, loc, RankedTensorType::get({static_cast(r_index.size())}, rewriter->getIntegerType(32)), shape_rhs->getResults()[0], rhs_index_tensor->getResults()[0], true_attr); Value zero_value = createI32ConstOp(0, loc, rewriter); - auto concat_out_shape = rewriter->create( - loc, + auto concat_out_shape = TF::ConcatOp::create( + *rewriter, loc, RankedTensorType::get({static_cast(bl_index.size()) + static_cast(r_index.size())}, rewriter->getIntegerType(32)), @@ -220,10 +221,9 @@ TF::ReshapeOp createOutputReshapeOpForDynamic( // Build ReshapeOp with the calculated output shape. Type out_type = RankedTensorType::get(shape, getElementTypeOrSelf(value.getType())); - return rewriter->create( - loc, out_type, - /*tensor=*/value, - /*shape=*/concat_out_shape->getResults()[0]); + return TF::ReshapeOp::create(*rewriter, loc, out_type, + /*tensor=*/value, + /*shape=*/concat_out_shape->getResults()[0]); } std::optional> EquationToMap( @@ -793,9 +793,9 @@ LogicalResult rewriteToBatchMatmul(TF::EinsumOp op, auto matmul_type = RankedTensorType::get(matmul_shape, original_type.getElementType()); - Value out = rewriter.create( - op.getLoc(), matmul_type, lhs, rhs, rewriter.getBoolAttr(false), - rewriter.getBoolAttr(false)); + Value out = TF::BatchMatMulV2Op::create(rewriter, op.getLoc(), matmul_type, + lhs, rhs, rewriter.getBoolAttr(false), + rewriter.getBoolAttr(false)); bool out_reshape_need = (reshape_shape.size() != matmul_shape.size() || original_type.getRank() != matmul_shape.size()); diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/extract_tpu_copy_with_dynamic_shape_op.cc b/tensorflow/compiler/mlir/tensorflow/transforms/extract_tpu_copy_with_dynamic_shape_op.cc index 18480fbd772fa9..883da73f2fb378 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/extract_tpu_copy_with_dynamic_shape_op.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/extract_tpu_copy_with_dynamic_shape_op.cc @@ -127,8 +127,8 @@ tf_device::LaunchOp CreateNewHostLaunchOpWithNewResult( for (Value result : new_launch_op_results) new_launch_op_results_types.push_back(result.getType()); - auto new_launch_op = builder.create( - old_launch_op->getLoc(), old_launch_op->getDeviceAttr(), + auto new_launch_op = tf_device::LaunchOp::create( + builder, old_launch_op->getLoc(), old_launch_op->getDeviceAttr(), /*result_types=*/new_launch_op_results_types); new_launch_op.getBody().takeBody(old_launch_op->getBody()); @@ -154,17 +154,16 @@ LogicalResult CreateNewDeviceLaunchOp( return failure(); } - new_device_launch_op = builder.create( - tpu_copy_with_dynamic_shape_op->getLoc(), + new_device_launch_op = tf_device::LaunchOp::create( + builder, tpu_copy_with_dynamic_shape_op->getLoc(), builder.getStringAttr(device_str), /*result_types=*/tpu_copy_with_dynamic_shape_op->getResultTypes()); new_device_launch_op.getBody().push_back(new Block); builder.setInsertionPointToEnd(&new_device_launch_op.GetBody()); - auto* return_op = builder - .create( - tpu_copy_with_dynamic_shape_op->getLoc(), - tpu_copy_with_dynamic_shape_op->getResults()) + auto* return_op = tf_device::ReturnOp::create( + builder, tpu_copy_with_dynamic_shape_op->getLoc(), + tpu_copy_with_dynamic_shape_op->getResults()) .getOperation(); tpu_copy_with_dynamic_shape_op->moveBefore(return_op); return success(); diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/fused_kernel_matcher.cc b/tensorflow/compiler/mlir/tensorflow/transforms/fused_kernel_matcher.cc index e73d76fbc5907d..b2ab71fa5129cb 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/fused_kernel_matcher.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/fused_kernel_matcher.cc @@ -230,8 +230,8 @@ class FuseContractionWithBiasAdd : public OpRewritePattern { auto *bias_add_op = bias_add.getOperation(); if (bias_add_op) rewriter.setInsertionPoint(bias_add_op); - Value fused_op = rewriter.create(fused_loc, result_type, - ValueRange(operands), attrs); + Value fused_op = FusedOpT::create(rewriter, fused_loc, result_type, + ValueRange(operands), attrs); auto op_to_replace = fuse_activation ? activation : bias_add; rewriter.replaceOp(op_to_replace, ValueRange({fused_op})); return success(); diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/hoist_replicate_invariant_resource_writes.cc b/tensorflow/compiler/mlir/tensorflow/transforms/hoist_replicate_invariant_resource_writes.cc index 2c70a078fbb13a..18fc8fc1cb58cc 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/hoist_replicate_invariant_resource_writes.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/hoist_replicate_invariant_resource_writes.cc @@ -89,9 +89,9 @@ void MoveTailWritesAfterReplicate( OpBuilder builder(replicate_op); // Clone this old replicate op but with new result types. - auto new_replicate_op = builder.create( - replicate_op->getLoc(), new_result_types, replicate_op->getOperands(), - replicate_op->getAttrs()); + auto new_replicate_op = tf_device::ReplicateOp::create( + builder, replicate_op->getLoc(), new_result_types, + replicate_op->getOperands(), replicate_op->getAttrs()); // Move region to the new op. new_replicate_op.getRegion().takeBody(replicate_op.getRegion()); diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/host_runtime/tpu_merge_variables_with_execute.cc b/tensorflow/compiler/mlir/tensorflow/transforms/host_runtime/tpu_merge_variables_with_execute.cc index 9492c007b07ca5..7806967d7dcfe9 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/host_runtime/tpu_merge_variables_with_execute.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/host_runtime/tpu_merge_variables_with_execute.cc @@ -413,8 +413,8 @@ void ReplaceParallelExecute( &output_types, parallel_execute, region_index + 1, num_regions); builder->setInsertionPoint(parallel_execute); - auto new_parallel_execute = builder->create( - parallel_execute.getLoc(), num_regions, output_types); + auto new_parallel_execute = tf_device::ParallelExecuteOp::create( + *builder, parallel_execute.getLoc(), num_regions, output_types); // Replace the uses of the original parallel_execute before region containing // merged execute. @@ -449,8 +449,8 @@ void ReplaceParallelExecute( // execute results. Operation* old_terminator = execute_region->front().getTerminator(); builder->setInsertionPointToEnd(&execute_region->front()); - builder->create(old_terminator->getLoc(), - merged_execute_launch.getResults()); + tf_device::ReturnOp::create(*builder, old_terminator->getLoc(), + merged_execute_launch.getResults()); old_terminator->erase(); // Remove the original TPUExecute op. @@ -532,8 +532,8 @@ LogicalResult MergeForOneTPUExecute( } // Create the merged execute and update variables op. - auto merged_execute = builder->create( - execute_launch.getLoc(), new_output_types, + auto merged_execute = TF::TPUExecuteAndUpdateVariablesOp::create( + *builder, execute_launch.getLoc(), new_output_types, var_access_info.new_operand_values, llvm::ArrayRef{ builder->getNamedAttr( @@ -544,14 +544,14 @@ LogicalResult MergeForOneTPUExecute( builder->getI64ArrayAttr(device_var_updates_indices))}); // Wrap in launch for device assignment. - auto merged_execute_launch = builder->create( - merged_execute.getLoc(), execute_launch.getDeviceAttr(), + auto merged_execute_launch = tf_device::LaunchOp::create( + *builder, merged_execute.getLoc(), execute_launch.getDeviceAttr(), merged_execute.getResultTypes()); merged_execute_launch.getBody().push_back(new Block); builder->setInsertionPointToEnd(&merged_execute_launch.GetBody()); - builder->create(merged_execute.getLoc(), - merged_execute.getResults()); + tf_device::ReturnOp::create(*builder, merged_execute.getLoc(), + merged_execute.getResults()); merged_execute.getOperation()->moveBefore( merged_execute_launch.GetBody().getTerminator()); diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/host_runtime/tpu_metadata_utils.cc b/tensorflow/compiler/mlir/tensorflow/transforms/host_runtime/tpu_metadata_utils.cc index ac9c18602804d7..a5bd582b7c2b5f 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/host_runtime/tpu_metadata_utils.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/host_runtime/tpu_metadata_utils.cc @@ -224,8 +224,8 @@ LogicalResult SetMetadataProtoFromClusterFuncOp( tensorflow::tpu::TPUCompileMetadataProto* metadata) { if (auto options_attr = op->getAttrOfType("tpu_compile_options_proto")) { - if (!metadata->mutable_compile_options()->ParseFromArray( - options_attr.data(), options_attr.size())) { + if (!metadata->mutable_compile_options()->ParseFromString( + absl::string_view(options_attr.data(), options_attr.size()))) { return failure(); } } diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc index 85b61d16355077..0b5976b619ea26 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc @@ -90,7 +90,7 @@ static Value CreateTFCastOpF32(OpBuilder *builder, Location loc, Value x, auto x_type = mlir::dyn_cast_or_null(x.getType()); if (!x_type) llvm_unreachable("unsupported type"); Type type = x_type.clone(builder->getF32Type()); - return builder->create(loc, type, x, truncate); + return CastOp::create(*builder, loc, type, x, truncate); } // Returns a TF_CastOp to I32. This function is used for CastOps that are @@ -103,7 +103,7 @@ static Value CreateTFCastOpI32(OpBuilder *builder, Location loc, Value x, auto x_type = mlir::dyn_cast_or_null(x.getType()); if (!x_type) llvm_unreachable("unsupported type"); Type type = x_type.clone(builder->getI32Type()); - return builder->create(loc, type, x, truncate); + return CastOp::create(*builder, loc, type, x, truncate); } static APFloat ConvertToAPFloat(double val, Type type) { @@ -125,22 +125,24 @@ static Value GetDimensionSize(OpBuilder *builder, Location loc, Value input, } // Return a ConstOp if it's static dimension. if (!ranked_ty.isDynamicDim(idx)) { - return builder->create( - loc, GetScalarOfType( - builder->getIntegerType(use_32bit.getValue() ? 32 : 64), - ranked_ty.getDimSize(idx))); + return TF::ConstOp::create( + *builder, loc, + GetScalarOfType( + builder->getIntegerType(use_32bit.getValue() ? 32 : 64), + ranked_ty.getDimSize(idx))); } } - auto shape = builder->create(loc, input, use_32bit); - return builder->create( - loc, mlir::RankedTensorType::get({}, getElementTypeOrSelf(shape)), shape, + auto shape = TF::ShapeOp::create(*builder, loc, input, use_32bit); + return TF::StridedSliceOp::create( + *builder, loc, + mlir::RankedTensorType::get({}, getElementTypeOrSelf(shape)), shape, /*begin=*/ - builder->create(loc, builder->getI32TensorAttr({idx})), + TF::ConstOp::create(*builder, loc, builder->getI32TensorAttr({idx})), /*end=*/ - builder->create(loc, builder->getI32TensorAttr({idx + 1})), + TF::ConstOp::create(*builder, loc, builder->getI32TensorAttr({idx + 1})), /*strides=*/ - builder->create(loc, builder->getI32TensorAttr({1})), + TF::ConstOp::create(*builder, loc, builder->getI32TensorAttr({1})), /*begin_mask=*/0, /*end_mask=*/0, /*ellipsis_mask=*/0, /*new_axis_mask=*/0, /*shrink_axis_mask=*/1); } @@ -211,9 +213,9 @@ Value ValuesToRank1(PatternRewriter &rewriter, Location loc, Type dtype, ArrayRef vals) { int64_t length = vals.size(); auto type = tensorflow::GetTypeFromTFTensorShape({length}, dtype); - auto axis = rewriter.create( - loc, GetScalarOfType(rewriter.getIntegerType(64), 0)); - return rewriter.create(loc, type, ValueRange(vals), axis); + auto axis = ConstOp::create(rewriter, loc, + GetScalarOfType(rewriter.getIntegerType(64), 0)); + return ConcatV2Op::create(rewriter, loc, type, ValueRange(vals), axis); } // Lowers AddN op to a sequence of AddV2 ops to accumulate operands. @@ -277,10 +279,10 @@ class LowerAddNOp : public RewritePattern { while (n > 1) { for (int64_t i = 0; i < n; i += 2) { // Add two adjacent operands if applicable. - operands[i / 2] = - (i + 1 < n) ? rewriter.create(addn_op.getLoc(), - operands[i], operands[i + 1]) - : operands[i]; + operands[i / 2] = (i + 1 < n) + ? AddV2Op::create(rewriter, addn_op.getLoc(), + operands[i], operands[i + 1]) + : operands[i]; } n = (n + 1) / 2; } @@ -363,8 +365,8 @@ class LowerDynamicStitchOp : public RewritePattern { packed_shape.push_back(-1); packed_shape.append(item_shape.begin(), item_shape.end()); Location loc = op.getLoc(); - auto packed_shape_val = rewriter.create( - loc, GetI64ElementsAttr(packed_shape, &rewriter)); + auto packed_shape_val = ConstOp::create( + rewriter, loc, GetI64ElementsAttr(packed_shape, &rewriter)); // Prepare each of the output item by unpacking data and then putting it to // the specified index. @@ -374,12 +376,13 @@ class LowerDynamicStitchOp : public RewritePattern { Value data = std::get<1>(it); auto reshaped_data = - rewriter.create(loc, data, packed_shape_val); + ReshapeOp::create(rewriter, loc, data, packed_shape_val); auto num_items = mlir::cast(reshaped_data.getType()).getShape()[0]; - auto items = rewriter.create( - loc, SmallVector(num_items, item_ty), reshaped_data, - /*axis=*/0); + auto items = UnpackOp::create(rewriter, loc, + SmallVector(num_items, item_ty), + reshaped_data, + /*axis=*/0); for (auto index_item : llvm::zip(index_attr, items.getResults())) { int64_t output_index = std::get<0>(index_item).getSExtValue(); Value item = std::get<1>(index_item); @@ -426,80 +429,84 @@ class ConvertFakeQuantWithMinMaxVarsOp : public RewritePattern { auto float_min = op.getMin(); auto float_max = op.getMax(); - auto float_diff = rewriter.create(op.getLoc(), float_max, float_min); + auto float_diff = + SubOp::create(rewriter, op.getLoc(), float_max, float_min); // Compute the range when quantized. - auto quant_min = rewriter.create( - op.getLoc(), DenseElementsAttr::get( - scalar_ty, ConvertToAPFloat(bits_min, element_ty))); - - auto quant_max = rewriter.create( - op.getLoc(), DenseElementsAttr::get( - scalar_ty, ConvertToAPFloat(bits_max, element_ty))); - - auto quant_diff = rewriter.create( - op.getLoc(), + auto quant_min = + ConstOp::create(rewriter, op.getLoc(), + DenseElementsAttr::get( + scalar_ty, ConvertToAPFloat(bits_min, element_ty))); + + auto quant_max = + ConstOp::create(rewriter, op.getLoc(), + DenseElementsAttr::get( + scalar_ty, ConvertToAPFloat(bits_max, element_ty))); + + auto quant_diff = ConstOp::create( + rewriter, op.getLoc(), DenseElementsAttr::get( scalar_ty, ConvertToAPFloat(bits_max - bits_min, element_ty))); auto quant_to_float = - rewriter.create(op.getLoc(), float_diff, quant_diff); + DivOp::create(rewriter, op.getLoc(), float_diff, quant_diff); auto float_to_quant = - rewriter.create(op.getLoc(), quant_diff, float_diff); + DivOp::create(rewriter, op.getLoc(), quant_diff, float_diff); // During quantization, the quantized min/max values may not line up // perfectly with the specified min/max. Nudge them into the right range. auto min_scaled = - rewriter.create(op.getLoc(), float_min, quant_to_float); + DivOp::create(rewriter, op.getLoc(), float_min, quant_to_float); auto min_scaled_sub = - rewriter.create(op.getLoc(), quant_min, min_scaled); + SubOp::create(rewriter, op.getLoc(), quant_min, min_scaled); auto mid_rounded = - rewriter.create(op.getLoc(), scalar_ty, min_scaled_sub); + RoundOp::create(rewriter, op.getLoc(), scalar_ty, min_scaled_sub); - auto nudged_zero_point_val = rewriter.create( - op.getLoc(), scalar_ty, mid_rounded, quant_min, quant_max); + auto nudged_zero_point_val = ClipByValueOp::create( + rewriter, op.getLoc(), scalar_ty, mid_rounded, quant_min, quant_max); auto quant_min_sub = - rewriter.create(op.getLoc(), quant_min, nudged_zero_point_val); + SubOp::create(rewriter, op.getLoc(), quant_min, nudged_zero_point_val); auto quant_max_sub = - rewriter.create(op.getLoc(), quant_max, nudged_zero_point_val); + SubOp::create(rewriter, op.getLoc(), quant_max, nudged_zero_point_val); auto nudged_float_min = - rewriter.create(op.getLoc(), quant_min_sub, quant_to_float); + MulOp::create(rewriter, op.getLoc(), quant_min_sub, quant_to_float); auto nudged_float_max = - rewriter.create(op.getLoc(), quant_max_sub, quant_to_float); + MulOp::create(rewriter, op.getLoc(), quant_max_sub, quant_to_float); // Now quantize the input value with the approximated min/max values. // Move the input value into quantized space - Value quantized_input = rewriter.create( - op.getLoc(), input_ty, input, nudged_float_min, nudged_float_max); + Value quantized_input = + ClipByValueOp::create(rewriter, op.getLoc(), input_ty, input, + nudged_float_min, nudged_float_max); - quantized_input = rewriter.create(op.getLoc(), input_ty, - quantized_input, nudged_float_min); + quantized_input = SubOp::create(rewriter, op.getLoc(), input_ty, + quantized_input, nudged_float_min); - quantized_input = rewriter.create(op.getLoc(), input_ty, - quantized_input, float_to_quant); + quantized_input = MulOp::create(rewriter, op.getLoc(), input_ty, + quantized_input, float_to_quant); // Round the quantized input always to the positive direction. - auto half_val = rewriter.create( - op.getLoc(), + auto half_val = ConstOp::create( + rewriter, op.getLoc(), DenseElementsAttr::get(scalar_ty, ConvertToAPFloat(0.5, element_ty))); - quantized_input = rewriter.create(op.getLoc(), input_ty, - quantized_input, half_val); + quantized_input = AddV2Op::create(rewriter, op.getLoc(), input_ty, + quantized_input, half_val); - quantized_input = rewriter.create(op.getLoc(), quantized_input); + quantized_input = FloorOp::create(rewriter, op.getLoc(), quantized_input); // Convert back into floating point spae. - Value output = rewriter.create(op.getLoc(), input_ty, - quantized_input, quant_to_float); + Value output = MulOp::create(rewriter, op.getLoc(), input_ty, + quantized_input, quant_to_float); - output = rewriter.create(op.getLoc(), input_ty, output, - nudged_float_min); + output = AddV2Op::create(rewriter, op.getLoc(), input_ty, output, + nudged_float_min); rewriter.replaceOp(op, {output}); return success(); @@ -549,20 +556,21 @@ class LowerInvertPermutationOp : public RewritePattern { Type int_type = x_type.getElementType(); // Could be i32 or i64. auto result_type = x_type; - auto start = rewriter.create(loc, GetScalarOfType(int_type, 0)); - Value limit = rewriter.create( - loc, GetScalarOfType(int_type, x_type.getShape()[0])); - auto delta = rewriter.create(loc, GetScalarOfType(int_type, 1)); + auto start = ConstOp::create(rewriter, loc, GetScalarOfType(int_type, 0)); + Value limit = ConstOp::create( + rewriter, loc, GetScalarOfType(int_type, x_type.getShape()[0])); + auto delta = ConstOp::create(rewriter, loc, GetScalarOfType(int_type, 1)); // Construct a sequence of numbers [0, 1, ... len(x)-1]. auto updates = - rewriter.create(loc, result_type, start, limit, delta); + RangeOp::create(rewriter, loc, result_type, start, limit, delta); auto shape_type = tensorflow::GetTypeFromTFTensorShape({2}, rewriter.getIntegerType(32)); - auto shape = rewriter.create( - loc, DenseElementsAttr::get( - shape_type, {static_cast(x_type.getDimSize(0)), 1})); - auto indices = rewriter.create(loc, op.getX(), shape); + auto shape = ConstOp::create( + rewriter, loc, + DenseElementsAttr::get(shape_type, + {static_cast(x_type.getDimSize(0)), 1})); + auto indices = ReshapeOp::create(rewriter, loc, op.getX(), shape); rewriter.replaceOpWithNewOp( op, result_type, op.getX(), indices, updates); @@ -641,16 +649,17 @@ class LowerLgammaOp : public RewritePattern { } else { tensor_type = UnrankedTensorType::get(float_type); } - input = rewriter.create(loc, tensor_type, input); + input = CastOp::create(rewriter, loc, tensor_type, input); } // Helper lambda function for creating a ConstOp for a tensor filled with // the given constant float value. auto create_const_op = [&rewriter, loc, tensor_type, float_type](double value) { - return rewriter.create( - loc, DenseElementsAttr::get(tensor_type, - FloatAttr::get(float_type, value))); + return ConstOp::create( + rewriter, loc, + DenseElementsAttr::get(tensor_type, + FloatAttr::get(float_type, value))); }; Value one_half = create_const_op(0.5); @@ -664,26 +673,26 @@ class LowerLgammaOp : public RewritePattern { create_const_op(std::log(kLanczosGamma + 0.5)); Value base_lanczos_coeff = create_const_op(kBaseLanczosCoeff); - Value minus_input = rewriter.create(loc, input); - Value input_minus_one = rewriter.create(loc, input, one); + Value minus_input = NegOp::create(rewriter, loc, input); + Value input_minus_one = SubOp::create(rewriter, loc, input, one); // If the input is less than 0.5 use Euler's reflection formula: // gamma(x) = pi / (sin(pi * x) * gamma(1 - x)) - Value need_to_reflect = rewriter.create(loc, input, one_half); + Value need_to_reflect = LessOp::create(rewriter, loc, input, one_half); Type tensor_bool_type = need_to_reflect.getType(); - Value z = rewriter.create(loc, need_to_reflect, minus_input, - input_minus_one); + Value z = SelectV2Op::create(rewriter, loc, need_to_reflect, minus_input, + input_minus_one); Value x = base_lanczos_coeff; for (int i = 0, end = kLanczosCoefficients.size(); i < end; ++i) { Value lanczos_coefficient = create_const_op(kLanczosCoefficients[i]); Value index = create_const_op(static_cast(i)); - Value z_plus_index = rewriter.create(loc, z, index); + Value z_plus_index = AddV2Op::create(rewriter, loc, z, index); Value z_plus_index_plus_one = - rewriter.create(loc, z_plus_index, one); - Value incr = rewriter.create(loc, lanczos_coefficient, - z_plus_index_plus_one); - x = rewriter.create(loc, x, incr); + AddV2Op::create(rewriter, loc, z_plus_index, one); + Value incr = DivOp::create(rewriter, loc, lanczos_coefficient, + z_plus_index_plus_one); + x = AddV2Op::create(rewriter, loc, x, incr); } // To improve accuracy on platforms with less-precise log implementations, @@ -691,14 +700,14 @@ class LowerLgammaOp : public RewritePattern { // the device. // log(t) = log(kLanczosGamma + 0.5 + z) // = log(kLanczosGamma + 0.5) + log1p(z / (kLanczosGamma + 0.5)) - Value t = rewriter.create(loc, lanczos_gamma_plus_one_half, z); + Value t = AddV2Op::create(rewriter, loc, lanczos_gamma_plus_one_half, z); Value z_div_lanczos_gamma_plus_one_half = - rewriter.create(loc, z, lanczos_gamma_plus_one_half); + DivOp::create(rewriter, loc, z, lanczos_gamma_plus_one_half); Value log1p_z_div_lanczos_gamma_plus_one_half = - rewriter.create(loc, z_div_lanczos_gamma_plus_one_half); + Log1pOp::create(rewriter, loc, z_div_lanczos_gamma_plus_one_half); Value log_t = - rewriter.create(loc, log_lanczos_gamma_plus_one_half, - log1p_z_div_lanczos_gamma_plus_one_half); + AddV2Op::create(rewriter, loc, log_lanczos_gamma_plus_one_half, + log1p_z_div_lanczos_gamma_plus_one_half); // Compute the final result (modulo reflection). t(z) may be large, and we // need to be careful not to overflow to infinity in the first term of @@ -710,17 +719,17 @@ class LowerLgammaOp : public RewritePattern { // (z + 1/2 - t(z) / log(t(z))) * log(t(z)). // // log_y = log_sqrt_two_pi + (z + one_half - t / log_t) * log_t + Log(x); - Value t_div_log_t = rewriter.create(loc, t, log_t); + Value t_div_log_t = DivOp::create(rewriter, loc, t, log_t); Value one_half_minus_t_div_log_t = - rewriter.create(loc, one_half, t_div_log_t); + SubOp::create(rewriter, loc, one_half, t_div_log_t); Value z_plus_one_half_minus_t_div_log_t = - rewriter.create(loc, z, one_half_minus_t_div_log_t); + AddV2Op::create(rewriter, loc, z, one_half_minus_t_div_log_t); Value z_plus_one_half_minus_t_div_log_t_mul_log_t = - rewriter.create(loc, z_plus_one_half_minus_t_div_log_t, log_t); - Value log_x = rewriter.create(loc, x); - Value log_y_rhs = rewriter.create( - loc, z_plus_one_half_minus_t_div_log_t_mul_log_t, log_x); - Value log_y = rewriter.create(loc, log_sqrt_two_pi, log_y_rhs); + MulOp::create(rewriter, loc, z_plus_one_half_minus_t_div_log_t, log_t); + Value log_x = LogOp::create(rewriter, loc, x); + Value log_y_rhs = AddV2Op::create( + rewriter, loc, z_plus_one_half_minus_t_div_log_t_mul_log_t, log_x); + Value log_y = AddV2Op::create(rewriter, loc, log_sqrt_two_pi, log_y_rhs); // Compute the reflected value, used when x < 0.5: // @@ -747,48 +756,48 @@ class LowerLgammaOp : public RewritePattern { // Furthermore, pi * abs(frac(x)) loses precision when abs(frac(x)) is close // to 1. To remedy this, we can use the fact that sin(pi * x) in the domain // [0, 1] is symmetric across the line Y=0.5. - Value abs_input = rewriter.create(loc, input); - Value abs_input_floor = rewriter.create(loc, abs_input); + Value abs_input = AbsOp::create(rewriter, loc, input); + Value abs_input_floor = FloorOp::create(rewriter, loc, abs_input); Value abs_frac_input = - rewriter.create(loc, abs_input, abs_input_floor); + SubOp::create(rewriter, loc, abs_input, abs_input_floor); // Convert values of abs_frac_input > 0.5 to (1 - frac_input) to improve // precision of pi * abs_frac_input for values of abs_frac_input close to 1. Value one_minus_abs_frac_input = - rewriter.create(loc, one, abs_frac_input); + SubOp::create(rewriter, loc, one, abs_frac_input); Value abs_frac_input_gt_one_half = - rewriter.create(loc, abs_frac_input, one_half); + GreaterOp::create(rewriter, loc, abs_frac_input, one_half); Value reduced_frac_input = - rewriter.create(loc, abs_frac_input_gt_one_half, - one_minus_abs_frac_input, abs_frac_input); + SelectV2Op::create(rewriter, loc, abs_frac_input_gt_one_half, + one_minus_abs_frac_input, abs_frac_input); Value pi_mul_reduced_frac_input = - rewriter.create(loc, pi, reduced_frac_input); + MulOp::create(rewriter, loc, pi, reduced_frac_input); Value sin_pi_mul_reduced_frac_input = - rewriter.create(loc, pi_mul_reduced_frac_input); + SinOp::create(rewriter, loc, pi_mul_reduced_frac_input); Value reflection_denom = - rewriter.create(loc, sin_pi_mul_reduced_frac_input); + LogOp::create(rewriter, loc, sin_pi_mul_reduced_frac_input); // Avoid computing -inf - inf, which is nan. If reflection_denom is +/-inf, // then it "wins" and the result is +/-inf. Value is_finite = - rewriter.create(loc, tensor_bool_type, reflection_denom); - Value neg_reflection_denom = rewriter.create(loc, reflection_denom); + IsFiniteOp::create(rewriter, loc, tensor_bool_type, reflection_denom); + Value neg_reflection_denom = NegOp::create(rewriter, loc, reflection_denom); Value log_pi_minus_reflection_denom = - rewriter.create(loc, log_pi, reflection_denom); + SubOp::create(rewriter, loc, log_pi, reflection_denom); Value reflection_if_finite = - rewriter.create(loc, log_pi_minus_reflection_denom, log_y); - Value reflection = rewriter.create( - loc, is_finite, reflection_if_finite, neg_reflection_denom); + SubOp::create(rewriter, loc, log_pi_minus_reflection_denom, log_y); + Value reflection = SelectV2Op::create( + rewriter, loc, is_finite, reflection_if_finite, neg_reflection_denom); Value result = - rewriter.create(loc, need_to_reflect, reflection, log_y); + SelectV2Op::create(rewriter, loc, need_to_reflect, reflection, log_y); // lgamma(+/-inf) = +inf. - Value is_inf = rewriter.create(loc, tensor_bool_type, input); - result = rewriter.create(loc, is_inf, infinity, result); + Value is_inf = IsInfOp::create(rewriter, loc, tensor_bool_type, input); + result = SelectV2Op::create(rewriter, loc, is_inf, infinity, result); if (needs_cast) { - result = rewriter.create(loc, original_tensor_type, result); + result = CastOp::create(rewriter, loc, original_tensor_type, result); } rewriter.replaceOp(op, result); @@ -819,10 +828,11 @@ class LowerPackOp : public RewritePattern { auto op = cast(src_op); Location loc = op.getLoc(); - auto axis_value = rewriter.create( - loc, DenseElementsAttr::get(tensorflow::GetTypeFromTFTensorShape( - {}, rewriter.getIntegerType(64)), - op.getAxis())); + auto axis_value = ConstOp::create( + rewriter, loc, + DenseElementsAttr::get(tensorflow::GetTypeFromTFTensorShape( + {}, rewriter.getIntegerType(64)), + op.getAxis())); int64_t axis = op.getAxis(); Type prev_input_ty, inferred_ty; @@ -838,7 +848,7 @@ class LowerPackOp : public RewritePattern { prev_input_ty = input_ty; } expanded_inputs.push_back( - rewriter.create(loc, inferred_ty, input, axis_value)); + ExpandDimsOp::create(rewriter, loc, inferred_ty, input, axis_value)); } rewriter.replaceOpWithNewOp(op, op.getType(), expanded_inputs, @@ -922,28 +932,28 @@ class LowerSpaceToBatchNDOp : public RewritePattern { auto block_shape_i64_type = tensorflow::GetTypeFromTFTensorShape( block_shape_type.getShape(), rewriter.getIntegerType(64)); auto block_shape_i64 = - rewriter.create(loc, block_shape_i64_type, op.getBlockShape()); + CastOp::create(rewriter, loc, block_shape_i64_type, op.getBlockShape()); auto paddings_i64_type = tensorflow::GetTypeFromTFTensorShape( paddings_type.getShape(), rewriter.getIntegerType(64)); auto paddings_i64 = - rewriter.create(loc, paddings_i64_type, op.getPaddings()); + CastOp::create(rewriter, loc, paddings_i64_type, op.getPaddings()); - auto pad00 = rewriter.create( - loc, DenseElementsAttr::get( - tensorflow::GetTypeFromTFTensorShape( - {1, 2}, rewriter.getIntegerType(64)), - {0, 0})); + auto pad00 = ConstOp::create(rewriter, loc, + DenseElementsAttr::get( + tensorflow::GetTypeFromTFTensorShape( + {1, 2}, rewriter.getIntegerType(64)), + {0, 0})); SmallVector full_paddings_list{pad00, paddings_i64}; full_paddings_list.append(remaining_rank, pad00); auto full_paddings_type = tensorflow::GetTypeFromTFTensorShape( {input_rank, 2}, rewriter.getIntegerType(64)); - auto zero_i64 = rewriter.create( - loc, GetScalarOfType(rewriter.getIntegerType(64), 0)); + auto zero_i64 = ConstOp::create( + rewriter, loc, GetScalarOfType(rewriter.getIntegerType(64), 0)); // Extends paddings to all dimensions of input by adding 0s to non-block // dimensions. - auto full_paddings = rewriter.create( - loc, full_paddings_type, full_paddings_list, zero_i64); + auto full_paddings = ConcatV2Op::create(rewriter, loc, full_paddings_type, + full_paddings_list, zero_i64); // Compute the result type here instead of using shape inference because the // full_paddings won't be available as a constant for shape inference. @@ -973,45 +983,44 @@ class LowerSpaceToBatchNDOp : public RewritePattern { tensorflow::GetTypeFromTFTensorShape(padded_shape, element_type); // padded = pad(input, full_paddings) auto padded = - rewriter.create(loc, padded_type, op.getInput(), full_paddings); + PadOp::create(rewriter, loc, padded_type, op.getInput(), full_paddings); auto paddings_sum_type = tensorflow::GetTypeFromTFTensorShape( {input_rank}, rewriter.getIntegerType(64)); // paddings_sum = paddings[*,0] + paddings[*,1] - auto paddings_split = rewriter.create( - loc, TypeRange({paddings_sum_type, paddings_sum_type}), full_paddings, - rewriter.getI64IntegerAttr(1)); - auto paddings_sum = rewriter.create( - loc, paddings_split.getResult(0), paddings_split.getResult(1)); - - auto input_shape_tensor = rewriter.create( - loc, + auto paddings_split = UnpackOp::create( + rewriter, loc, TypeRange({paddings_sum_type, paddings_sum_type}), + full_paddings, rewriter.getI64IntegerAttr(1)); + auto paddings_sum = + AddV2Op::create(rewriter, loc, paddings_split.getResult(0), + paddings_split.getResult(1)); + + auto input_shape_tensor = ConstOp::create( + rewriter, loc, DenseElementsAttr::get(tensorflow::GetTypeFromTFTensorShape( {input_rank}, rewriter.getIntegerType(64)), input_shape)); // padded_shape_tensor is the shape of padded. auto padded_shape_tensor = - rewriter.create(loc, paddings_sum, input_shape_tensor); + AddV2Op::create(rewriter, loc, paddings_sum, input_shape_tensor); - auto zero_i32 = rewriter.create( - loc, GetScalarOfType(rewriter.getIntegerType(32), 0)); + auto zero_i32 = ConstOp::create( + rewriter, loc, GetScalarOfType(rewriter.getIntegerType(32), 0)); SmallVector padded_shape_splits_types( input_rank, tensorflow::GetTypeFromTFTensorShape({1}, rewriter.getIntegerType(64))); SmallVector padded_shape_splits( - rewriter - .create(loc, padded_shape_splits_types, zero_i32, - padded_shape_tensor) + SplitOp::create(rewriter, loc, padded_shape_splits_types, zero_i32, + padded_shape_tensor) .getOutput()); SmallVector block_shape_splits_types( block_rank, tensorflow::GetTypeFromTFTensorShape({1}, rewriter.getIntegerType(64))); SmallVector block_shape_splits( - rewriter - .create(loc, block_shape_splits_types, zero_i32, - block_shape_i64) + SplitOp::create(rewriter, loc, block_shape_splits_types, zero_i32, + block_shape_i64) .getOutput()); SmallVector outer_shape_ints; @@ -1019,8 +1028,8 @@ class LowerSpaceToBatchNDOp : public RewritePattern { for (int64_t i = 0; i < block_rank; ++i) { // TODO(b/157475606): Insert tf.Assert that the following division has // remainder 0. - outer_shape_vals.push_back(rewriter.create( - loc, padded_shape_splits[1 + i], block_shape_splits[i])); + outer_shape_vals.push_back(DivOp::create( + rewriter, loc, padded_shape_splits[1 + i], block_shape_splits[i])); auto padded_shape_i = padded_shape[1 + i]; auto block_shape_ints_i = block_shape_ints[i]; @@ -1049,8 +1058,8 @@ class LowerSpaceToBatchNDOp : public RewritePattern { auto reshaped_shape = ValuesToRank1( rewriter, loc, rewriter.getIntegerType(64), reshaped_shape_vals); - auto reshaped = rewriter.create( - loc, + auto reshaped = ReshapeOp::create( + rewriter, loc, tensorflow::GetTypeFromTFTensorShape(reshaped_shape_ints, element_type), padded, reshaped_shape); @@ -1065,14 +1074,14 @@ class LowerSpaceToBatchNDOp : public RewritePattern { for (int64_t i = 1 + block_rank; i < input_rank; ++i) { permutation_vals.push_back(block_rank + i); } - auto permutation = rewriter.create( - loc, GetI64ElementsAttr(permutation_vals, &rewriter)); + auto permutation = ConstOp::create( + rewriter, loc, GetI64ElementsAttr(permutation_vals, &rewriter)); - auto permuted = rewriter.create(loc, reshaped, permutation); + auto permuted = TransposeOp::create(rewriter, loc, reshaped, permutation); auto output_batch = padded_shape_splits[0]; for (int64_t i = 0; i < block_rank; ++i) { output_batch = - rewriter.create(loc, output_batch, block_shape_splits[i]); + MulOp::create(rewriter, loc, output_batch, block_shape_splits[i]); } SmallVector output_shape_vals{output_batch}; for (int64_t i = 0; i < block_rank; ++i) { @@ -1163,11 +1172,11 @@ class LowerBatchToSpaceND : public RewritePattern { std::copy(input_shape.begin() + 1, input_shape.end(), reshaped_shape.begin() + block_rank + 1); - auto reshaped = rewriter.create( - op.getLoc(), + auto reshaped = TF::ReshapeOp::create( + rewriter, op.getLoc(), tensorflow::GetTypeFromTFTensorShape(reshaped_shape, element_ty), input, - rewriter.create(op.getLoc(), - rewriter.getI64TensorAttr(reshaped_shape))); + ConstOp::create(rewriter, op.getLoc(), + rewriter.getI64TensorAttr(reshaped_shape))); // 2. Permute dimensions of `reshaped` to produce `permuted` of shape // [batch / prod(block_shape), @@ -1191,12 +1200,12 @@ class LowerBatchToSpaceND : public RewritePattern { transpose_shape[it.index()] = reshaped_shape[it.value()]; } - auto permuted = rewriter.create( - op.getLoc(), + auto permuted = TF::TransposeOp::create( + rewriter, op.getLoc(), tensorflow::GetTypeFromTFTensorShape(transpose_shape, element_ty), reshaped, - rewriter.create(op.getLoc(), - rewriter.getI64TensorAttr(permutation))); + ConstOp::create(rewriter, op.getLoc(), + rewriter.getI64TensorAttr(permutation))); // 3. Reshape `permuted` to produce `reshaped_permuted` of shape // [batch / prod(block_shape), @@ -1219,13 +1228,13 @@ class LowerBatchToSpaceND : public RewritePattern { std::copy(remainder_shape.begin(), remainder_shape.end(), reshaped_permuted_shape.begin() + 1 + block_rank); - auto reshaped_permuted = rewriter.create( - op.getLoc(), + auto reshaped_permuted = TF::ReshapeOp::create( + rewriter, op.getLoc(), tensorflow::GetTypeFromTFTensorShape(reshaped_permuted_shape, element_ty), permuted, - rewriter.create( - op.getLoc(), rewriter.getI64TensorAttr(reshaped_permuted_shape))); + ConstOp::create(rewriter, op.getLoc(), + rewriter.getI64TensorAttr(reshaped_permuted_shape))); // 4. Crop the start and end of dimensions `[1, ..., M]` of // `reshaped_permuted` according to `crops` to produce the output of @@ -1263,10 +1272,10 @@ class LowerBatchToSpaceND : public RewritePattern { rewriter.replaceOpWithNewOp( op, tensorflow::GetTypeFromTFTensorShape(slice_sizes, element_ty), reshaped_permuted, - rewriter.create(op.getLoc(), - rewriter.getI64TensorAttr(start_indices)), - rewriter.create(op.getLoc(), - rewriter.getI64TensorAttr(slice_sizes))); + ConstOp::create(rewriter, op.getLoc(), + rewriter.getI64TensorAttr(start_indices)), + ConstOp::create(rewriter, op.getLoc(), + rewriter.getI64TensorAttr(slice_sizes))); return success(); } }; @@ -1310,11 +1319,11 @@ class LowerSparseMatMulOp : public RewritePattern { tensor_type_f32 = UnrankedTensorType::get(Float32Type::get(context)); } // Add cast to f32 to conform with element type of result. - operand = rewriter.create(op.getLoc(), tensor_type_f32, operand); + operand = CastOp::create(rewriter, op.getLoc(), tensor_type_f32, operand); } - Value result = rewriter.create( - op.getLoc(), op.getProduct().getType(), operands[0], operands[1], - op.getTransposeA(), op.getTransposeB()); + Value result = MatMulOp::create( + rewriter, op.getLoc(), op.getProduct().getType(), operands[0], + operands[1], op.getTransposeA(), op.getTransposeB()); rewriter.replaceOp(op, {result}); return success(); @@ -1441,20 +1450,22 @@ class LowerResizeNearestNeighbor : public RewritePattern { } auto one = - rewriter.create(loc, GetScalarOfType(out_size_element_ty, 1)); + ConstOp::create(rewriter, loc, GetScalarOfType(out_size_element_ty, 1)); // Extract the image shape. - Value input_shape = rewriter.create( - loc, tensorflow::GetTypeFromTFTensorShape({4}, rewriter.getI64Type()), + Value input_shape = ShapeOp::create( + rewriter, loc, + tensorflow::GetTypeFromTFTensorShape({4}, rewriter.getI64Type()), input); - input_shape = rewriter.create( - loc, tensorflow::GetTypeFromTFTensorShape({4}, out_size_element_ty), + input_shape = CastOp::create( + rewriter, loc, + tensorflow::GetTypeFromTFTensorShape({4}, out_size_element_ty), input_shape); auto scalar_dim_ty = tensorflow::GetTypeFromTFTensorShape({}, out_size_element_ty); - auto split_image_shape = rewriter.create( - loc, + auto split_image_shape = UnpackOp::create( + rewriter, loc, TypeRange({scalar_dim_ty, scalar_dim_ty, scalar_dim_ty, scalar_dim_ty}), input_shape); @@ -1464,151 +1475,156 @@ class LowerResizeNearestNeighbor : public RewritePattern { auto in_x = split_image_shape.getResult(2); auto channels = split_image_shape.getResult(3); - auto in_count = rewriter.create( - loc, tensorflow::GetTypeFromTFTensorShape({}, out_size_element_ty), - in_y, in_x); + auto in_count = MulOp::create( + rewriter, loc, + tensorflow::GetTypeFromTFTensorShape({}, out_size_element_ty), in_y, + in_x); // Unpack and separate the out width/height. - auto split_out_size = rewriter.create( - loc, TypeRange({scalar_dim_ty, scalar_dim_ty}), out_size); + auto split_out_size = UnpackOp::create( + rewriter, loc, TypeRange({scalar_dim_ty, scalar_dim_ty}), out_size); auto out_y = split_out_size.getResult(0); auto out_x = split_out_size.getResult(1); - auto out_count = rewriter.create( - loc, tensorflow::GetTypeFromTFTensorShape({}, out_size_element_ty), - out_y, out_x); + auto out_count = MulOp::create( + rewriter, loc, + tensorflow::GetTypeFromTFTensorShape({}, out_size_element_ty), out_y, + out_x); // Generate what the final output shape will look like. - auto out_shape = rewriter.create( - loc, tensorflow::GetTypeFromTFTensorShape({4}, out_size_element_ty), + auto out_shape = PackOp::create( + rewriter, loc, + tensorflow::GetTypeFromTFTensorShape({4}, out_size_element_ty), ValueRange({batch, out_y, out_x, channels})); // Compute the indices along the vertical dimension. - auto in_y_f32 = rewriter.create( - loc, tensorflow::GetTypeFromTFTensorShape({}, rewriter.getF32Type()), - in_y); - auto out_w_f32 = rewriter.create( - loc, tensorflow::GetTypeFromTFTensorShape({}, rewriter.getF32Type()), - out_y); - - Value y_scale = rewriter.create( - loc, tensorflow::GetTypeFromTFTensorShape({}, rewriter.getF32Type()), + auto in_y_f32 = CastOp::create( + rewriter, loc, + tensorflow::GetTypeFromTFTensorShape({}, rewriter.getF32Type()), in_y); + auto out_w_f32 = CastOp::create( + rewriter, loc, + tensorflow::GetTypeFromTFTensorShape({}, rewriter.getF32Type()), out_y); + + Value y_scale = DivOp::create( + rewriter, loc, + tensorflow::GetTypeFromTFTensorShape({}, rewriter.getF32Type()), in_y_f32, out_w_f32); - Value zero_f32 = rewriter.create( - loc, GetScalarOfType(rewriter.getF32Type(), 0.0)); - Value one_f32 = rewriter.create( - loc, GetScalarOfType(rewriter.getF32Type(), 1.0)); - - Value y_range = rewriter.create( - loc, - tensorflow::GetTypeFromTFTensorShape({out_height_constant}, - rewriter.getF32Type()), - zero_f32, out_w_f32, one_f32); - - y_range = rewriter.create( - loc, - tensorflow::GetTypeFromTFTensorShape({out_height_constant}, - rewriter.getF32Type()), - y_range, y_scale); - - y_range = - rewriter.create(loc, - tensorflow::GetTypeFromTFTensorShape( - {out_height_constant}, out_size_element_ty), - y_range); - - y_range = rewriter.create( - loc, + Value zero_f32 = ConstOp::create( + rewriter, loc, GetScalarOfType(rewriter.getF32Type(), 0.0)); + Value one_f32 = ConstOp::create( + rewriter, loc, GetScalarOfType(rewriter.getF32Type(), 1.0)); + + Value y_range = + RangeOp::create(rewriter, loc, + tensorflow::GetTypeFromTFTensorShape( + {out_height_constant}, rewriter.getF32Type()), + zero_f32, out_w_f32, one_f32); + + y_range = MulOp::create(rewriter, loc, + tensorflow::GetTypeFromTFTensorShape( + {out_height_constant}, rewriter.getF32Type()), + y_range, y_scale); + + y_range = CastOp::create(rewriter, loc, + tensorflow::GetTypeFromTFTensorShape( + {out_height_constant}, out_size_element_ty), + y_range); + + y_range = ReshapeOp::create( + rewriter, loc, tensorflow::GetTypeFromTFTensorShape({out_height_constant, 1}, out_size_element_ty), y_range, - rewriter.create( - loc, tensorflow::GetTypeFromTFTensorShape({2}, out_size_element_ty), + PackOp::create( + rewriter, loc, + tensorflow::GetTypeFromTFTensorShape({2}, out_size_element_ty), ValueRange({out_y, one}))); - Value y_indices = rewriter.create( - loc, - tensorflow::GetTypeFromTFTensorShape({out_height_constant, 1}, - out_size_element_ty), - y_range, in_x); + Value y_indices = + MulOp::create(rewriter, loc, + tensorflow::GetTypeFromTFTensorShape( + {out_height_constant, 1}, out_size_element_ty), + y_range, in_x); // Compute the indices for the nearest neighbour lookup across the width // dim. - auto in_x_f32 = rewriter.create( - loc, tensorflow::GetTypeFromTFTensorShape({}, rewriter.getF32Type()), - in_x); - auto out_h_f32 = rewriter.create( - loc, tensorflow::GetTypeFromTFTensorShape({}, rewriter.getF32Type()), - out_x); - - Value x_scale = rewriter.create( - loc, tensorflow::GetTypeFromTFTensorShape({}, rewriter.getF32Type()), + auto in_x_f32 = CastOp::create( + rewriter, loc, + tensorflow::GetTypeFromTFTensorShape({}, rewriter.getF32Type()), in_x); + auto out_h_f32 = CastOp::create( + rewriter, loc, + tensorflow::GetTypeFromTFTensorShape({}, rewriter.getF32Type()), out_x); + + Value x_scale = DivOp::create( + rewriter, loc, + tensorflow::GetTypeFromTFTensorShape({}, rewriter.getF32Type()), in_x_f32, out_h_f32); - Value x_range = rewriter.create( - loc, - tensorflow::GetTypeFromTFTensorShape({out_width_constant}, - rewriter.getF32Type()), - zero_f32, out_h_f32, one_f32); - - x_range = - rewriter.create(loc, - tensorflow::GetTypeFromTFTensorShape( - {out_width_constant}, rewriter.getF32Type()), - x_range, x_scale); - - x_range = - rewriter.create(loc, - tensorflow::GetTypeFromTFTensorShape( - {out_width_constant}, out_size_element_ty), - x_range); - - Value x_indices = rewriter.create( - loc, + Value x_range = + RangeOp::create(rewriter, loc, + tensorflow::GetTypeFromTFTensorShape( + {out_width_constant}, rewriter.getF32Type()), + zero_f32, out_h_f32, one_f32); + + x_range = MulOp::create(rewriter, loc, + tensorflow::GetTypeFromTFTensorShape( + {out_width_constant}, rewriter.getF32Type()), + x_range, x_scale); + + x_range = CastOp::create(rewriter, loc, + tensorflow::GetTypeFromTFTensorShape( + {out_width_constant}, out_size_element_ty), + x_range); + + Value x_indices = ReshapeOp::create( + rewriter, loc, tensorflow::GetTypeFromTFTensorShape({1, out_width_constant}, out_size_element_ty), x_range, - rewriter.create( - loc, tensorflow::GetTypeFromTFTensorShape({2}, out_size_element_ty), + PackOp::create( + rewriter, loc, + tensorflow::GetTypeFromTFTensorShape({2}, out_size_element_ty), ValueRange({one, out_x}))); // Generate the combined index array, reshape to be 1-D. - Value indices = rewriter.create( - loc, + Value indices = AddV2Op::create( + rewriter, loc, tensorflow::GetTypeFromTFTensorShape( {out_height_constant, out_width_constant}, out_size_element_ty), y_indices, x_indices); - indices = rewriter.create( - loc, + indices = ReshapeOp::create( + rewriter, loc, tensorflow::GetTypeFromTFTensorShape({out_spatial_cst}, out_size_element_ty), indices, - rewriter.create( - loc, tensorflow::GetTypeFromTFTensorShape({1}, out_size_element_ty), + ReshapeOp::create( + rewriter, loc, + tensorflow::GetTypeFromTFTensorShape({1}, out_size_element_ty), out_count, - rewriter.create(loc, rewriter.getI64TensorAttr({1})))); + ConstOp::create(rewriter, loc, rewriter.getI64TensorAttr({1})))); // Group the spatial indices and gather along that combined index. - Value input_collapsed_spatial = rewriter.create( - loc, + Value input_collapsed_spatial = ReshapeOp::create( + rewriter, loc, tensorflow::GetTypeFromTFTensorShape( {batch_cst, in_spatial_cst, channels_cst}, input_element_ty), input, - rewriter.create( - loc, tensorflow::GetTypeFromTFTensorShape({3}, out_size_element_ty), + PackOp::create( + rewriter, loc, + tensorflow::GetTypeFromTFTensorShape({3}, out_size_element_ty), ValueRange({batch, in_count, channels}))); - Value gathered_values = rewriter.create( - loc, + Value gathered_values = GatherV2Op::create( + rewriter, loc, tensorflow::GetTypeFromTFTensorShape( {batch_cst, out_spatial_cst, channels_cst}, input_element_ty), input_collapsed_spatial, indices, /*axis=*/one); gathered_values = - rewriter.create(loc, result_ty, gathered_values, out_shape); + ReshapeOp::create(rewriter, loc, result_ty, gathered_values, out_shape); rewriter.replaceOp(op, gathered_values); return success(); @@ -1681,18 +1697,18 @@ struct LowerRollOp : public RewritePattern { begin_values[axis_i] = begin_i; auto begin_attr = DenseIntElementsAttr::get(axis_type, begin_values); auto begin = - rewriter.create(op->getLoc(), axis_type, begin_attr); + ConstOp::create(rewriter, op->getLoc(), axis_type, begin_attr); SmallVector output_shape; output_shape.append(input_shape.begin(), input_shape.end()); output_shape[axis_i] = size_i; auto size_attr = DenseIntElementsAttr::get(axis_type, output_shape); - auto size = rewriter.create(op->getLoc(), axis_type, size_attr); + auto size = ConstOp::create(rewriter, op->getLoc(), axis_type, size_attr); auto slice_op_ty = tensorflow::GetTypeFromTFTensorShape( output_shape, input_ty.getElementType()); - return rewriter.create(op->getLoc(), slice_op_ty, input, begin, - size); + return SliceOp::create(rewriter, op->getLoc(), slice_op_ty, input, begin, + size); }; auto result = tf_roll_op.getInput(); @@ -1708,9 +1724,9 @@ struct LowerRollOp : public RewritePattern { auto dim_attr = DenseIntElementsAttr::get(scalar_type, {axis_i}); auto concat_dim = - rewriter.create(op->getLoc(), scalar_type, dim_attr); - auto concat_op = rewriter.create( - op->getLoc(), input_ty, + ConstOp::create(rewriter, op->getLoc(), scalar_type, dim_attr); + auto concat_op = ConcatV2Op::create( + rewriter, op->getLoc(), input_ty, ArrayRef({slice_op_1.getOutput(), slice_op_2.getOutput()}), concat_dim); result = concat_op.getResult(); @@ -1741,7 +1757,7 @@ class LowerSoftmaxOp : public OpRewritePattern { // Note that the TensorFlow Softmax op verifies that the input rank is // greater than or equal to one so the following sequence is valid. auto reduce_dim = - rewriter.create(loc, GetI64ElementsAttr({-1}, &rewriter)); + TF::ConstOp::create(rewriter, loc, GetI64ElementsAttr({-1}, &rewriter)); // Exponential of input values and then their sum can be very large here. // Division with large denominator is numerically unstable. To improve @@ -1750,20 +1766,19 @@ class LowerSoftmaxOp : public OpRewritePattern { // after adding or subtracting all inputs in a batch using a common value // gives mathematically equivalent result. auto max_logits = - rewriter.create(loc, logits, reduce_dim, - /*keep_dims=*/rewriter.getBoolAttr(true)); - auto shifted_logits = rewriter.create(loc, logits, max_logits); + TF::MaxOp::create(rewriter, loc, logits, reduce_dim, + /*keep_dims=*/rewriter.getBoolAttr(true)); + auto shifted_logits = TF::SubOp::create(rewriter, loc, logits, max_logits); // Exponentiate the inputs. - Value exp = rewriter.create(loc, shifted_logits); + Value exp = TF::ExpOp::create(rewriter, loc, shifted_logits); // Compute summation of the exponentials. - Value sum = - rewriter.create(loc, exp, reduce_dim, - /*keep_dims=*/rewriter.getBoolAttr(true)); + Value sum = TF::SumOp::create(rewriter, loc, exp, reduce_dim, + /*keep_dims=*/rewriter.getBoolAttr(true)); if (use_log) { - Value log = rewriter.create(loc, sum); + Value log = TF::LogOp::create(rewriter, loc, sum); rewriter.replaceOpWithNewOp(op, shifted_logits, log); } else { rewriter.replaceOpWithNewOp(op, exp, sum); diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.td b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.td index a9ff5a8f76268a..1061d564f51afc 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.td +++ b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.td @@ -37,7 +37,7 @@ class GetF32Scalar : def TrueBoolAttr : AttrConstraint($_self).getValue()">>; def CreateTFShapeOp : NativeCodeCall< - "$_builder.create($0.getLoc(), $1, $2)">; + "TF::ShapeOp::create($_builder, $0.getLoc(), $1, $2)">; def IsI32 : NativeCodeCall< "$_builder.getBoolAttr(getElementTypeOrSelf($0.getType()).isInteger(32))">; @@ -49,11 +49,11 @@ def CreateTFCastOpI32 : NativeCodeCall< "CreateTFCastOpI32(&$_builder, $0.getLoc(), $1, $2)">; def CreateTensorScatterNdOp : NativeCodeCall< - "$_builder.create(" + "TF::ScatterNdOp::create($_builder, " "$0.getLoc(), $0.getType(), $1, $2, $3, $4)">; def CreateTensorScatterUpdateOp : NativeCodeCall< - "$_builder.create(" + "TF::TensorScatterUpdateOp::create($_builder, " "$0.getLoc(), $0.getType(), $0, $1, $2, $3)">; //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/prepare_tpu_computation_for_tf_export.cc b/tensorflow/compiler/mlir/tensorflow/transforms/prepare_tpu_computation_for_tf_export.cc index 4ddd3577957163..bd8ae6260ce259 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/prepare_tpu_computation_for_tf_export.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/prepare_tpu_computation_for_tf_export.cc @@ -105,13 +105,13 @@ class RewriteXlaHostComputeMlir rewriter.setInsertionPointToStart(&cloned_func.getBody().front()); auto result_type = RankedTensorType::get({3}, rewriter.getType()); - auto dynamic_key = - rewriter.create( - func.getLoc(), /*program=*/result_type, llvm::ArrayRef{}); + auto dynamic_key = TF::_XlaCompileMlirPlaceholderProgramKeyOp::create( + rewriter, func.getLoc(), /*program=*/result_type, + llvm::ArrayRef{}); - auto recv_at_host = rewriter.create( - func.getLoc(), op.getOperandTypes(), /*dynamic_key=*/dynamic_key, - op.getSendKeyAttr(), + auto recv_at_host = TF::_XlaRecvAtHostOp::create( + rewriter, func.getLoc(), op.getOperandTypes(), + /*dynamic_key=*/dynamic_key, op.getSendKeyAttr(), /*device_ordinal=*/rewriter.getI64IntegerAttr(0), rewriter.getStringAttr("TPU")); for (auto result : @@ -120,8 +120,8 @@ class RewriteXlaHostComputeMlir } rewriter.setInsertionPoint(cloned_func.getBody().front().getTerminator()); - rewriter.create( - func.getLoc(), + TF::_XlaSendFromHostOp::create( + rewriter, func.getLoc(), cloned_func.getBody().front().getTerminator()->getOperands(), /*dynamic_key=*/dynamic_key, op.getRecvKeyAttr(), /*device_ordinal=*/rewriter.getI64IntegerAttr(0), @@ -157,8 +157,8 @@ void UpdateArgAttributes(mlir::func::FuncOp func) { // 'sharding' attribute. // TODO(b/414807890): Not sure whether we need to pass a V2 sharding to // the _XlaShardingV2, do this when we actually have a use case. - auto updated_arg = builder.create( - func.getLoc(), arg.getType(), arg, /*sharding=*/sharding, + auto updated_arg = TF::XlaShardingOp::create( + builder, func.getLoc(), arg.getType(), arg, /*sharding=*/sharding, /*_XlaSharding=*/sharding, /*_XlaShardingV2=*/mlir::StringAttr()); func.getArgument(i).replaceAllUsesExcept( updated_arg, llvm::SmallPtrSet({updated_arg})); diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/replicate_invariant_op_hoisting.cc b/tensorflow/compiler/mlir/tensorflow/transforms/replicate_invariant_op_hoisting.cc index 803f135af624d7..656f87deb0b79f 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/replicate_invariant_op_hoisting.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/replicate_invariant_op_hoisting.cc @@ -118,8 +118,8 @@ void MakeShapeOpInvariant(tf_device::ReplicateOp replicate_op, int num_replicas, if (block_arg.getOwner() != replicate_block) return; OpBuilder builder(shape_op); - auto new_shape_op = builder.create( - shape_op.getLoc(), shape_op.getType(), + auto new_shape_op = TF::VariableShapeOp::create( + builder, shape_op.getLoc(), shape_op.getType(), replicate_op.GetReplicaOperandForBlockArgument(block_arg, /*replica=*/0)); shape_op.replaceAllUsesWith(new_shape_op.getOperation()); diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/rewrite_tpu_embedding_ops.cc b/tensorflow/compiler/mlir/tensorflow/transforms/rewrite_tpu_embedding_ops.cc index deef690b4d9636..1945aa6d811c19 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/rewrite_tpu_embedding_ops.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/rewrite_tpu_embedding_ops.cc @@ -45,8 +45,8 @@ OpT AddOperandAndRewriteAs(Operation* op, Value operand, NamedAttrList attr, builder->setInsertionPoint(op); auto operands = llvm::to_vector<4>(op->getOperands()); operands.push_back(operand); - auto new_op = builder->create(op->getLoc(), op->getResultTypes(), - operands, attr.getAttrs()); + auto new_op = OpT::create(*builder, op->getLoc(), op->getResultTypes(), + operands, attr.getAttrs()); op->replaceAllUsesWith(new_op.getOperation()->getResults()); op->erase(); return new_op; @@ -82,8 +82,8 @@ LogicalResult RunOnRegion(Region* region) { OpBuilder builder(region); auto output_ty = RankedTensorType::get({}, VariantType::get(region->getContext())); - auto dedup_op = builder.create( - loc, output_ty, config); + auto dedup_op = XlaRecvTPUEmbeddingDeduplicationDataOp::create( + builder, loc, output_ty, config); // Rewrite RecvTPUEmbeddingActivations op to the corresponding internal op. if (recv_op) diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/sparsecore/embedding_program_key.cc b/tensorflow/compiler/mlir/tensorflow/transforms/sparsecore/embedding_program_key.cc index 1e7958660fd8c4..ce3b6bb5dd5070 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/sparsecore/embedding_program_key.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/sparsecore/embedding_program_key.cc @@ -213,13 +213,13 @@ tf_device::LaunchOp CreateLaunchForBlock(OpBuilder* builder, } builder->setInsertionPointAfter(before_op); - auto launch = builder->create( - before_op->getLoc(), builder->getStringAttr(host_device), - launch_result_types); + auto launch = tf_device::LaunchOp::create(*builder, before_op->getLoc(), + builder->getStringAttr(host_device), + launch_result_types); launch.getBody().push_back(launch_block); builder->setInsertionPointToEnd(&launch.GetBody()); - builder->create(before_op->getLoc(), launch_results); + tf_device::ReturnOp::create(*builder, before_op->getLoc(), launch_results); return launch; } diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tf_data_optimization.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tf_data_optimization.cc index 72302903b37fa5..d57390cbc919ad 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tf_data_optimization.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tf_data_optimization.cc @@ -37,16 +37,17 @@ struct FuseParallelMapAndBatch : public OpRewritePattern { // The type of the `num_parallel_calls` argument in ParallelMapDataset // and MapAndBatchDataset is different (int32 and int64 respectively) - auto num_parallel_calls_op = rewriter.create( - op.getLoc(), UnrankedTensorType::get(rewriter.getIntegerType(64)), + auto num_parallel_calls_op = CastOp::create( + rewriter, op.getLoc(), + UnrankedTensorType::get(rewriter.getIntegerType(64)), batchInputOp.getNumParallelCalls(), rewriter.getBoolAttr(false)); if (op.getMetadata() != batchInputOp.getMetadata()) { return failure(); } - auto fused_op = rewriter.create( - op.getLoc(), op.getType(), batchInputOp.getInputDataset(), + auto fused_op = MapAndBatchDatasetOp::create( + rewriter, op.getLoc(), op.getType(), batchInputOp.getInputDataset(), batchInputOp.getOtherArguments(), op.getBatchSize(), num_parallel_calls_op.getY(), op.getDropRemainder(), batchInputOp.getF(), op.getOutputTypes(), op.getOutputShapes(), diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_parallel_execute_sink_resource_write.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_parallel_execute_sink_resource_write.cc index bb4c951065f771..2ee19787c7552f 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_parallel_execute_sink_resource_write.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_parallel_execute_sink_resource_write.cc @@ -131,8 +131,8 @@ void SinkResourceWritesIntoParallelExecute( new_result_types.push_back(old_result.getType()); OpBuilder builder(parallel_execute); - auto new_parallel_execute = builder.create( - parallel_execute.getLoc(), num_regions, new_result_types); + auto new_parallel_execute = tf_device::ParallelExecuteOp::create( + builder, parallel_execute.getLoc(), num_regions, new_result_types); for (auto region : llvm::zip(new_parallel_execute.getRegions(), parallel_execute.getRegions())) diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_resource_read_for_write.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_resource_read_for_write.cc index 5f708ce0ee1a74..8cd90d0a96e9e9 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_resource_read_for_write.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_resource_read_for_write.cc @@ -106,9 +106,9 @@ void TPUResourceReadForWritePass::runOnOperation() { if (!resource_and_type.resource) continue; if (ClusterFuncHasResourceRead(cluster_func, resource_and_type.resource)) continue; - auto new_read = builder.create( - resource_and_type.resource.getLoc(), resource_and_type.subtype, - resource_and_type.resource); + auto new_read = TF::ReadVariableOp::create( + builder, resource_and_type.resource.getLoc(), + resource_and_type.subtype, resource_and_type.resource); read_operands.push_back(new_read.getValue()); } @@ -119,8 +119,9 @@ void TPUResourceReadForWritePass::runOnOperation() { operands.append(read_operands.begin(), read_operands.end()); auto loc = cluster_func.getLoc(); - auto new_cluster_func = builder.create( - loc, cluster_func.getResultTypes(), operands, cluster_func->getAttrs()); + auto new_cluster_func = tf_device::ClusterFuncOp::create( + builder, loc, cluster_func.getResultTypes(), operands, + cluster_func->getAttrs()); cluster_func.replaceAllUsesWith(new_cluster_func); func::FuncOp func = cluster_func.getFuncOp(); Block& block = func.front(); diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/unroll_batch_matmul.cc b/tensorflow/compiler/mlir/tensorflow/transforms/unroll_batch_matmul.cc index 03618d23464b0a..85db75ea51a543 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/unroll_batch_matmul.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/unroll_batch_matmul.cc @@ -89,9 +89,9 @@ TF::ReshapeOp ConvertTFBatchMatMulOp::createReshapeOp( Type resultType = RankedTensorType::get(shape, element_type); auto constant_attr = DenseElementsAttr::get(shape_spec_type, shape); auto shape_tensor = - rewriter.create(loc, shape_spec_type, constant_attr); - return rewriter.create(loc, resultType, /*tensor=*/value, - /*shape=*/shape_tensor); + TF::ConstOp::create(rewriter, loc, shape_spec_type, constant_attr); + return TF::ReshapeOp::create(rewriter, loc, resultType, /*tensor=*/value, + /*shape=*/shape_tensor); } template @@ -122,16 +122,16 @@ std::vector ConvertTFBatchMatMulOp::sliceInput( auto split_dimension_type = RankedTensorType::get({}, rewriter.getIntegerType(32)); auto split_dimension_attr = DenseElementsAttr::get(split_dimension_type, 0); - auto split_dimension_op = rewriter.create( - loc, split_dimension_type, split_dimension_attr); + auto split_dimension_op = TF::ConstOp::create( + rewriter, loc, split_dimension_type, split_dimension_attr); // Split along each batch. SmallVector slice_size = {1, num_rows, num_cols}; Type slice_result_type = RankedTensorType::get(slice_size, element_type); llvm::SmallVector output_types(batch_size, slice_result_type); - auto split_op = rewriter.create(loc, output_types, - split_dimension_op.getOutput(), - reshape_op.getOutput()); + auto split_op = TF::SplitOp::create(rewriter, loc, output_types, + split_dimension_op.getOutput(), + reshape_op.getOutput()); // Squeeze each batch, i.e. reshape // [1, num_rows, num_cols] -> [num_rows, num_cols] @@ -259,11 +259,11 @@ LogicalResult ConvertTFBatchMatMulOp::matchAndRewrite( lhs_batch_idx = batch_idx; rhs_batch_idx = batch_idx; } - auto matmul = rewriter.create(loc, matmul_type, - /*a=*/sliced_lhs[lhs_batch_idx], - /*b=*/sliced_rhs[rhs_batch_idx], - /*transpose_a=*/op.getAdjX(), - /*transpose_b=*/op.getAdjY()); + auto matmul = TF::MatMulOp::create(rewriter, loc, matmul_type, + /*a=*/sliced_lhs[lhs_batch_idx], + /*b=*/sliced_rhs[rhs_batch_idx], + /*transpose_a=*/op.getAdjX(), + /*transpose_b=*/op.getAdjY()); matmuls.emplace_back(matmul.getProduct()); } @@ -272,7 +272,7 @@ LogicalResult ConvertTFBatchMatMulOp::matchAndRewrite( {bcast.output_batch_size(), rows, cols}, element_type); const auto axis = rewriter.getI64IntegerAttr(0); auto pack_op = - rewriter.create(loc, packed_type, /*values=*/matmuls, axis); + TF::PackOp::create(rewriter, loc, packed_type, /*values=*/matmuls, axis); // Reshape the rank-3 tensor into the correct output shape. const auto& result_batch_shape = bcast.output_batch_shape().dim_sizes(); diff --git a/tensorflow/compiler/mlir/tensorflow/utils/dump_graph_test.cc b/tensorflow/compiler/mlir/tensorflow/utils/dump_graph_test.cc index 9d9780d231523f..3fea8e64e85ca3 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/dump_graph_test.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/dump_graph_test.cc @@ -15,11 +15,22 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/utils/dump_graph.h" +#include +#include + +#include "absl/log/check.h" +#include "absl/status/status.h" +#include "absl/strings/match.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "xla/tsl/lib/core/status_test_util.h" +#include "tensorflow/core/framework/op.h" #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/graph/node_builder.h" -#include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/platform/file_system.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/util/dump_graph.h" @@ -68,7 +79,7 @@ class StringWritableFile : public WritableFile { TEST(Dump, TextualIrToFileSuccess) { Graph graph(OpRegistry::Global()); Node* node; - TF_CHECK_OK(NodeBuilder("A", "NoOp").Finalize(&graph, &node)); + CHECK_OK(NodeBuilder("A", "NoOp").Finalize(&graph, &node)); setenv("TF_DUMP_GRAPH_PREFIX", testing::TmpDir().c_str(), 1); UseMlirForGraphDump(MlirDumpConfig()); @@ -98,7 +109,7 @@ TEST(Dump, TextualIrWithOptions) { TEST(Dump, DumpToTFG) { Graph graph(OpRegistry::Global()); Node* node; - TF_CHECK_OK(NodeBuilder("A", "NoOp").Finalize(&graph, &node)); + CHECK_OK(NodeBuilder("A", "NoOp").Finalize(&graph, &node)); std::string actual; StringWritableFile file(&actual); diff --git a/tensorflow/compiler/mlir/tensorflow/utils/error_util_test.cc b/tensorflow/compiler/mlir/tensorflow/utils/error_util_test.cc index dcd71dedc9790f..8634afe5fc1498 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/error_util_test.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/error_util_test.cc @@ -15,57 +15,71 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/utils/error_util.h" -#include "llvm/ADT/Twine.h" #include "mlir/IR/Builders.h" // from @llvm-project +#include "mlir/IR/Diagnostics.h" // from @llvm-project +#include "mlir/IR/Location.h" // from @llvm-project #include "mlir/IR/MLIRContext.h" // from @llvm-project #include "xla/hlo/testlib/test.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/core/status_test_util.h" +#include "xla/tsl/lib/core/status_test_util.h" +#include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/platform/status.h" namespace mlir { namespace { -using testing::HasSubstr; +using ::testing::HasSubstr; -TEST(ErrorUtilTest, StatusScopedDiagnosticHandler) { - MLIRContext context; - auto id = StringAttr::get(&context, "//tensorflow/python/test.py"); - auto loc = FileLineColLoc::get(&context, id, 0, 0); +class ErrorUtilTest : public ::testing::Test { + protected: + ErrorUtilTest() + : id_(StringAttr::get(&context_, "//tensorflow/python/test.py")), + loc_(FileLineColLoc::get(&context_, id_, 0, 0)) {} + + MLIRContext context_; + StringAttr id_; + FileLineColLoc loc_; +}; + +using StatusScopedDiagnosticHandlerTest = ErrorUtilTest; + +TEST_F(StatusScopedDiagnosticHandlerTest, + OkWithoutDiagnosticGetsPassedThrough) { + TF_ASSERT_OK( + StatusScopedDiagnosticHandler(&context_).Combine(tensorflow::OkStatus())); +} + +TEST_F(StatusScopedDiagnosticHandlerTest, + VerifyDiagnosticsAreCapturedAsUnknownStatus) { + StatusScopedDiagnosticHandler handler(&context_); + emitError(loc_) << "Diagnostic message"; + ASSERT_TRUE(absl::IsUnknown(handler.ConsumeStatus())); +} + +TEST_F(StatusScopedDiagnosticHandlerTest, VerifyPassedInErrorsArePropagated) { + const Status err = tensorflow::errors::Internal("Passed in error"); + ASSERT_TRUE( + absl::IsInternal(StatusScopedDiagnosticHandler(&context_).Combine(err))); +} + +TEST_F(StatusScopedDiagnosticHandlerTest, + VerifyThatReportedDiagnosticsAreAppendedToPassedInError) { + StatusScopedDiagnosticHandler ssdh(&context_); + emitError(loc_) << "Diagnostic message reported"; + emitError(loc_) << "Second diagnostic message reported"; + const Status s = + ssdh.Combine(tensorflow::errors::Internal("Passed in error")); + ASSERT_TRUE(absl::IsInternal(s)); + EXPECT_THAT(s.message(), HasSubstr("Passed in error")); + EXPECT_THAT(s.message(), HasSubstr("Diagnostic message reported")); + EXPECT_THAT(s.message(), HasSubstr("Second diagnostic message reported")); +} - // Test OK without diagnostic gets passed through. - { - TF_ASSERT_OK( - StatusScopedDiagnosticHandler(&context).Combine(absl::OkStatus())); - } - - // Verify diagnostics are captured as Unknown status. - { - StatusScopedDiagnosticHandler handler(&context); - emitError(loc) << "Diagnostic message"; - ASSERT_TRUE(absl::IsUnknown(handler.ConsumeStatus())); - } - - // Verify passed in errors are propagated. - { - Status err = tensorflow::errors::Internal("Passed in error"); - ASSERT_TRUE( - absl::IsInternal(StatusScopedDiagnosticHandler(&context).Combine(err))); - } - - // Verify diagnostic reported are append to passed in error. - { - auto function = [&]() { - emitError(loc) << "Diagnostic message reported"; - emitError(loc) << "Second diagnostic message reported"; - return tensorflow::errors::Internal("Passed in error"); - }; - StatusScopedDiagnosticHandler ssdh(&context); - Status s = ssdh.Combine(function()); - ASSERT_TRUE(absl::IsInternal(s)); - EXPECT_THAT(s.message(), HasSubstr("Passed in error")); - EXPECT_THAT(s.message(), HasSubstr("Diagnostic message reported")); - EXPECT_THAT(s.message(), HasSubstr("Second diagnostic message reported")); - } +TEST_F(StatusScopedDiagnosticHandlerTest, VerifyThatWarningsAreIgnored) { + // Note: this logic is actually implemented in BaseScopedDiagnosticHandler's + // handler() function, but only StatusScopedDiagnosticHandler uses it. + StatusScopedDiagnosticHandler handler(&context_); + emitWarning(loc_) << "Warning message"; + TF_EXPECT_OK(handler.ConsumeStatus()); } TEST(ErrorUtilTest, StatusScopedDiagnosticHandlerWithFilter) { diff --git a/tensorflow/compiler/mlir/tensorflow/utils/fake_session.cc b/tensorflow/compiler/mlir/tensorflow/utils/fake_session.cc index 2ee95c1337aa52..cb406a2d0e3fc9 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/fake_session.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/fake_session.cc @@ -19,17 +19,21 @@ limitations under the License. #include #include +#include "absl/log/check.h" +#include "absl/status/status.h" #include "absl/strings/match.h" #include "llvm/Support/CommandLine.h" #include "tensorflow/core/common_runtime/device_mgr.h" -#include "tensorflow/core/common_runtime/threadpool_device.h" -#include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/device_attributes.pb.h" #include "tensorflow/core/framework/device_factory.h" +#include "tensorflow/core/framework/resource_handle.h" #include "tensorflow/core/framework/resource_mgr.h" #include "tensorflow/core/framework/resource_var.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.pb.h" -#include "tensorflow/core/graph/types.h" +#include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/platform/status.h" #include "tensorflow/core/platform/threadpool_options.h" #include "tensorflow/core/protobuf/error_codes.pb.h" #include "tensorflow/core/public/session_options.h" @@ -81,9 +85,9 @@ void FakeSession::InitVariables() { auto container = device->resource_manager()->default_container(); // Create 2 resources and initialize them with dummy values. - TF_CHECK_OK(device->resource_manager()->Create( + CHECK_OK(device->resource_manager()->Create( container, "var1", new tensorflow::Var(tensorflow::DataType::DT_FLOAT))); - TF_CHECK_OK(device->resource_manager()->Create( + CHECK_OK(device->resource_manager()->Create( container, "var2", new tensorflow::Var(tensorflow::DataType::DT_FLOAT))); } diff --git a/tensorflow/compiler/mlir/tensorflow/utils/parallel_execute_util.cc b/tensorflow/compiler/mlir/tensorflow/utils/parallel_execute_util.cc index 4bca511ca252b5..52d1bfc8ffde3a 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/parallel_execute_util.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/parallel_execute_util.cc @@ -25,15 +25,15 @@ tf_device::ParallelExecuteOp BuildParallelExecuteOp( tf_device::ClusterFuncOp cluster_func, OpBuilder* builder) { const auto output_types = cluster_func.getResultTypes(); builder->setInsertionPoint(cluster_func); - auto parallel_execute = builder->create( - cluster_func.getLoc(), 1, output_types); + auto parallel_execute = tf_device::ParallelExecuteOp::create( + *builder, cluster_func.getLoc(), 1, output_types); cluster_func->remove(); auto& block = parallel_execute.GetRegionBlockWithIndex(0); builder->setInsertionPointToEnd(&block); builder->insert(cluster_func); cluster_func.replaceAllUsesWith(parallel_execute); - builder->create(block.getParent()->getLoc(), - cluster_func.getResults()); + tf_device::ReturnOp::create(*builder, block.getParent()->getLoc(), + cluster_func.getResults()); return parallel_execute; } diff --git a/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util_test.cc b/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util_test.cc index 932c941c6f5f7a..a7b676d8541909 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util_test.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util_test.cc @@ -793,8 +793,8 @@ TEST(TPURewriteDeviceUtilTest, TestHasModelParallelismFalse) { mlir::OpBuilder builder(module_ref->getBodyRegion()); llvm::SmallVector result_types; - auto cluster = builder.create( - mlir::UnknownLoc::get(&context), result_types); + auto cluster = mlir::tf_device::ClusterOp::create( + builder, mlir::UnknownLoc::get(&context), result_types); cluster->setAttr(kNumCoresPerReplicaAttr, builder.getIntegerAttr(builder.getIntegerType(64), 1)); cluster->setAttr(kTopologyAttr, builder.getStringAttr("")); @@ -811,8 +811,8 @@ TEST(TPURewriteDeviceUtilTest, TestHasModelParallelismTrue) { mlir::OpBuilder builder(module_ref->getBodyRegion()); llvm::SmallVector result_types; - auto cluster = builder.create( - mlir::UnknownLoc::get(&context), result_types); + auto cluster = mlir::tf_device::ClusterOp::create( + builder, mlir::UnknownLoc::get(&context), result_types); cluster->setAttr(kNumCoresPerReplicaAttr, builder.getIntegerAttr(builder.getIntegerType(64), 5)); cluster->setAttr(kTopologyAttr, builder.getStringAttr("")); @@ -830,8 +830,8 @@ TEST(TPURewriteDeviceUtilTest, mlir::OpBuilder builder(module_ref->getBodyRegion()); llvm::SmallVector result_types; - auto cluster = builder.create( - mlir::UnknownLoc::get(&context), result_types); + auto cluster = mlir::tf_device::ClusterOp::create( + builder, mlir::UnknownLoc::get(&context), result_types); cluster->setAttr(kNumCoresPerReplicaAttr, builder.getIntegerAttr(builder.getIntegerType(64), 1)); cluster->setAttr(kTopologyAttr, builder.getStringAttr("")); @@ -848,8 +848,8 @@ TEST(TPURewriteDeviceUtilTest, mlir::ModuleOp::create(mlir::UnknownLoc::get(&context)); mlir::OpBuilder builder(module_ref->getBodyRegion()); llvm::SmallVector result_types; - auto cluster = builder.create( - mlir::UnknownLoc::get(&context), result_types); + auto cluster = mlir::tf_device::ClusterOp::create( + builder, mlir::UnknownLoc::get(&context), result_types); cluster->setAttr(kDeviceAssignmentAttr, builder.getArrayAttr({})); mlir::TF::RuntimeDevices devices; @@ -865,8 +865,8 @@ TEST(TPURewriteDeviceUtilTest, TestGetHostFailDeviceMissingAttributes) { mlir::ModuleOp::create(mlir::UnknownLoc::get(&context)); mlir::OpBuilder builder(module_ref->getBodyRegion()); llvm::SmallVector result_types; - auto cluster = builder.create( - mlir::UnknownLoc::get(&context), result_types); + auto cluster = mlir::tf_device::ClusterOp::create( + builder, mlir::UnknownLoc::get(&context), result_types); cluster->setAttr(kNumCoresPerReplicaAttr, builder.getIntegerAttr(builder.getIntegerType(64), 1)); @@ -884,8 +884,8 @@ TEST(TPURewriteDeviceUtilTest, TestGetHostDeviceFailMissingTopology) { mlir::OpBuilder builder(module_ref->getBodyRegion()); llvm::SmallVector result_types; - auto cluster = builder.create( - mlir::UnknownLoc::get(&context), result_types); + auto cluster = mlir::tf_device::ClusterOp::create( + builder, mlir::UnknownLoc::get(&context), result_types); cluster->setAttr(kNumCoresPerReplicaAttr, builder.getIntegerAttr(builder.getIntegerType(64), 1)); cluster->setAttr(kDeviceAssignmentAttr, builder.getArrayAttr({})); @@ -904,8 +904,8 @@ TEST(TPURewriteDeviceUtilTest, TestGetHostDeviceFailMissingDeviceAssignment) { mlir::OpBuilder builder(module_ref->getBodyRegion()); llvm::SmallVector result_types; - auto cluster = builder.create( - mlir::UnknownLoc::get(&context), result_types); + auto cluster = mlir::tf_device::ClusterOp::create( + builder, mlir::UnknownLoc::get(&context), result_types); cluster->setAttr(kNumCoresPerReplicaAttr, builder.getIntegerAttr(builder.getIntegerType(64), 1)); cluster->setAttr(kTopologyAttr, builder.getStringAttr("")); @@ -924,8 +924,8 @@ TEST(TPURewriteDeviceUtilTest, TestGetHostDeviceFailBadDeviceAssignment) { mlir::OpBuilder builder(module_ref->getBodyRegion()); llvm::SmallVector result_types; - auto cluster = builder.create( - mlir::UnknownLoc::get(&context), result_types); + auto cluster = mlir::tf_device::ClusterOp::create( + builder, mlir::UnknownLoc::get(&context), result_types); cluster->setAttr(kNumCoresPerReplicaAttr, builder.getIntegerAttr(builder.getIntegerType(64), 1)); cluster->setAttr(kTopologyAttr, builder.getStringAttr("")); @@ -951,8 +951,8 @@ TEST(TPURewriteDeviceUtilTest, TestGetHostDeviceFailBadDeviceName) { llvm::ArrayRef({"bad_device_name"}))); llvm::SmallVector result_types; - auto cluster = builder.create( - mlir::UnknownLoc::get(&context), result_types); + auto cluster = mlir::tf_device::ClusterOp::create( + builder, mlir::UnknownLoc::get(&context), result_types); cluster->setAttr(kNumCoresPerReplicaAttr, builder.getIntegerAttr(builder.getIntegerType(64), 1)); cluster->setAttr(kTopologyAttr, builder.getStringAttr("")); @@ -974,16 +974,16 @@ TEST(TPURewriteDeviceUtilTest, TestGetHostDeviceTPUReplicate) { llvm::SmallDenseMap> devices; - auto replicate = builder.create( - mlir::UnknownLoc::get(&context), /*num_replicas=*/2, devices, + auto replicate = mlir::tf_device::ReplicateOp::create( + builder, mlir::UnknownLoc::get(&context), /*num_replicas=*/2, devices, llvm::ArrayRef>{}, mlir::ValueRange{}, mlir::TypeRange{}); builder.setInsertionPoint(&replicate.getBody().front(), replicate.getBody().front().begin()); llvm::SmallVector result_types; - auto cluster = builder.create( - mlir::UnknownLoc::get(&context), result_types); + auto cluster = mlir::tf_device::ClusterOp::create( + builder, mlir::UnknownLoc::get(&context), result_types); mlir::TF::RuntimeDevices runtime_devices; std::string host_device; @@ -1007,8 +1007,8 @@ TEST(TPURewriteDeviceUtilTest, TestGetHostDeviceNotReplicated) { "/job:worker/replica:0/task:0/device:CPU:0"}))); llvm::SmallVector result_types; - auto cluster = builder.create( - mlir::UnknownLoc::get(&context), result_types); + auto cluster = mlir::tf_device::ClusterOp::create( + builder, mlir::UnknownLoc::get(&context), result_types); cluster->setAttr(kNumCoresPerReplicaAttr, builder.getIntegerAttr(builder.getIntegerType(64), 1)); cluster->setAttr(kTopologyAttr, builder.getStringAttr("")); @@ -1034,8 +1034,8 @@ TEST(TPURewriteDeviceUtilTest, TestGetHostDeviceInGenericPipeline) { {"/job:localhost/replica:0/task:0/device:CPU:0"}))); llvm::SmallVector result_types; - auto cluster = builder.create( - mlir::UnknownLoc::get(&context), result_types); + auto cluster = mlir::tf_device::ClusterOp::create( + builder, mlir::UnknownLoc::get(&context), result_types); mlir::TF::RuntimeDevices runtime_devices; (void)GetDevicesFromOp(*module_ref, &runtime_devices); @@ -1060,8 +1060,8 @@ TEST(TPURewriteDeviceUtilTest, TestGetHostDeviceInGenericPipelineMultiCPUs) { "/job:worker/replica:0/task:2/device:CPU:0"}))); llvm::SmallVector result_types; - auto cluster = builder.create( - mlir::UnknownLoc::get(&context), result_types); + auto cluster = mlir::tf_device::ClusterOp::create( + builder, mlir::UnknownLoc::get(&context), result_types); mlir::TF::RuntimeDevices runtime_devices; (void)GetDevicesFromOp(*module_ref, &runtime_devices); diff --git a/tensorflow/compiler/mlir/tensorflow/utils/xla_rewrite_util.cc b/tensorflow/compiler/mlir/tensorflow/utils/xla_rewrite_util.cc index ba4d1b71a857cd..82b7202d6d78e9 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/xla_rewrite_util.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/xla_rewrite_util.cc @@ -83,8 +83,8 @@ int MovePreservedParallelExecuteChildren( // `num_moved_children` is the number of children that will be preserved. const size_t num_moved_children = old_parallel_execute.getRegions().size() - 1; - *new_parallel_execute = builder->create( - old_parallel_execute->getLoc(), + *new_parallel_execute = mlir::tf_device::ParallelExecuteOp::create( + *builder, old_parallel_execute->getLoc(), num_moved_children + num_cores_per_replica, concatenated_output_types); // `cluster_idx` is the index of the child with the `ClusterFuncOp`, which @@ -118,12 +118,12 @@ mlir::tf_device::LaunchOp WrapOpInLaunch(mlir::OpBuilder* builder, llvm::StringRef device) { mlir::OpBuilder::InsertPoint insert_point = builder->saveInsertionPoint(); - auto launch = builder->create( - loc, builder->getStringAttr(device), op->getResultTypes()); + auto launch = mlir::tf_device::LaunchOp::create( + *builder, loc, builder->getStringAttr(device), op->getResultTypes()); launch.getBody().push_back(new mlir::Block); builder->setInsertionPointToEnd(&launch.GetBody()); - builder->create(loc, op->getResults()); + mlir::tf_device::ReturnOp::create(*builder, loc, op->getResults()); // Move op inside cluster. op->moveBefore(launch.GetBody().getTerminator()); diff --git a/tensorflow/compiler/mlir/tensorflow/utils/xla_sharding_util.cc b/tensorflow/compiler/mlir/tensorflow/utils/xla_sharding_util.cc index 89e00e9b4d628c..3bca701131151f 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/xla_sharding_util.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/xla_sharding_util.cc @@ -94,22 +94,23 @@ mlir::TF::SliceOp CreateSliceOp(mlir::OpBuilder* builder, auto start_position_type = mlir::RankedTensorType::get(shape.dims(), builder->getIntegerType(64)); - auto start_position_op = builder->create( - input.getLoc(), mlir::DenseIntElementsAttr::get(start_position_type, - slice_start_position)); - - auto slice_size_op = builder->create( - input.getLoc(), mlir::DenseIntElementsAttr::get( - mlir::RankedTensorType::get( - shape.dims(), builder->getIntegerType(64)), - slice_size)); + auto start_position_op = + mlir::TF::ConstOp::create(*builder, input.getLoc(), + mlir::DenseIntElementsAttr::get( + start_position_type, slice_start_position)); + + auto slice_size_op = mlir::TF::ConstOp::create( + *builder, input.getLoc(), + mlir::DenseIntElementsAttr::get( + mlir::RankedTensorType::get(shape.dims(), + builder->getIntegerType(64)), + slice_size)); auto slice_result_type = mlir::RankedTensorType::get(slice_size, getElementTypeOrSelf(input)); - return builder->create(input.getLoc(), slice_result_type, - input, start_position_op, - slice_size_op); + return mlir::TF::SliceOp::create(*builder, input.getLoc(), slice_result_type, + input, start_position_op, slice_size_op); } mlir::TF::PadOp CreatePadOp(mlir::OpBuilder* builder, @@ -135,15 +136,15 @@ mlir::TF::PadOp CreatePadOp(mlir::OpBuilder* builder, auto padding_type = mlir::RankedTensorType::get({num_dims, 2}, builder->getIntegerType(64)); auto paddings = mlir::DenseIntElementsAttr::get(padding_type, padding_values); - auto paddings_value = builder->create(location, paddings); + auto paddings_value = mlir::TF::ConstOp::create(*builder, location, paddings); mlir::SmallVector expand_shape(padded_shape.begin(), padded_shape.end()); auto expand_result_type = mlir::RankedTensorType::get(expand_shape, input_type.getElementType()); - return builder->create(location, expand_result_type, - src_input, paddings_value); + return mlir::TF::PadOp::create(*builder, location, expand_result_type, + src_input, paddings_value); } // Creates a tf::SplitOp that splits 'src_input' into 'num_splits' ways @@ -198,8 +199,8 @@ mlir::LogicalResult CreateSplitOp( output_type = input_type; } - auto split_dimension_op = builder->create( - location, split_dim_type, split_dimension_attr); + auto split_dimension_op = mlir::TF::ConstOp::create( + *builder, location, split_dim_type, split_dimension_attr); if (is_ici_weight_dist_spmd) { split_dimension_op->setAttr(kICIWeightDistributionMlirBridgeMarker, builder->getBoolAttr(true)); @@ -207,8 +208,9 @@ mlir::LogicalResult CreateSplitOp( // Creates a split op that splits |src_input| along |split_dimension|. llvm::SmallVector output_types(num_split, output_type); - *split_op = builder->create( - location, output_types, split_dimension_op.getOutput(), src_input); + *split_op = + mlir::TF::SplitOp::create(*builder, location, output_types, + split_dimension_op.getOutput(), src_input); (*split_op)->setAttr( kNumSplitAttr, builder->getIntegerAttr(builder->getIntegerType(32), num_split)); @@ -230,8 +232,8 @@ mlir::TF::ConcatOp CreateConcatOp(const int concat_dimension, mlir::RankedTensorType::get({}, builder->getIntegerType(32)); auto concat_dimension_attr = mlir::DenseElementsAttr::get(concat_dim_type, concat_dimension); - auto concat_dimension_op = builder->create( - location, concat_dim_type, concat_dimension_attr); + auto concat_dimension_op = mlir::TF::ConstOp::create( + *builder, location, concat_dim_type, concat_dimension_attr); // Correctly set output shapes of concat op output if output shape is // statically known. Since the shape of TPUExecute op must be the same @@ -253,8 +255,8 @@ mlir::TF::ConcatOp CreateConcatOp(const int concat_dimension, output_type = input_type; } - return builder->create( - location, output_type, concat_dimension_op.getOutput(), inputs); + return mlir::TF::ConcatOp::create(*builder, location, output_type, + concat_dimension_op.getOutput(), inputs); } mlir::TF::XlaConcatNDOp CreateXlaConcatNDOp( @@ -292,9 +294,9 @@ mlir::TF::XlaConcatNDOp CreateXlaConcatNDOp( output_type = input_slice_type; } - auto op = builder.create( - location, output_type, inputs, builder.getI64ArrayAttr(num_concats), - builder.getI64ArrayAttr(paddings)); + auto op = mlir::TF::XlaConcatNDOp::create( + builder, location, output_type, inputs, + builder.getI64ArrayAttr(num_concats), builder.getI64ArrayAttr(paddings)); return op; } @@ -338,9 +340,9 @@ mlir::LogicalResult CreateXlaSplitNDOp(const mlir::Location& location, << absl::StrJoin(input_shape, ",") << ", Padding: " << absl::StrJoin(paddings, ","); - *xla_split_op = builder->create( - location, output_types, src_input, builder->getI64ArrayAttr(num_splits), - builder->getI64ArrayAttr(paddings)); + *xla_split_op = mlir::TF::XlaSplitNDOp::create( + *builder, location, output_types, src_input, + builder->getI64ArrayAttr(num_splits), builder->getI64ArrayAttr(paddings)); if (is_ici_weight_dist_spmd) { (*xla_split_op) ->setAttr(kICIWeightDistributionMlirBridgeMarker, diff --git a/tensorflow/compiler/mlir/tf2xla/api/v1/compile_mlir_util_test.cc b/tensorflow/compiler/mlir/tf2xla/api/v1/compile_mlir_util_test.cc index b13e099fde3557..475bd79849e80e 100644 --- a/tensorflow/compiler/mlir/tf2xla/api/v1/compile_mlir_util_test.cc +++ b/tensorflow/compiler/mlir/tf2xla/api/v1/compile_mlir_util_test.cc @@ -85,7 +85,7 @@ TEST(LegalizeMlirTest, LegalizesModule) { /*shape_determination_fns=*/{}, &compilation_result); EXPECT_TRUE(status.ok()); - EXPECT_THAT(status.value(), HasSubstr("mhlo.const")); + EXPECT_THAT(status.value(), HasSubstr("stablehlo.constant")); } TEST(LegalizeMlirTest, FailsLegalizesModule) { diff --git a/tensorflow/compiler/mlir/tf2xla/api/v2/BUILD b/tensorflow/compiler/mlir/tf2xla/api/v2/BUILD index 746bca0cdb79b7..da75a97030412d 100644 --- a/tensorflow/compiler/mlir/tf2xla/api/v2/BUILD +++ b/tensorflow/compiler/mlir/tf2xla/api/v2/BUILD @@ -323,6 +323,7 @@ cc_library( ], visibility = ["//visibility:public"], deps = [ + ":mlir_roundtrip_flags", "//tensorflow/compiler/jit:shape_inference_helpers", "//tensorflow/compiler/mlir:op_or_arg_name_mapper", "//tensorflow/compiler/mlir/tensorflow", @@ -349,13 +350,13 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core/common_runtime:function_body", - "//tensorflow/core/platform:crash_analysis", "//tensorflow/core/platform:types", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/container:inlined_vector", "@com_google_absl//absl/log", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/status", "@com_google_absl//absl/strings", "@llvm-project//llvm:Support", @@ -365,7 +366,6 @@ cc_library( "@llvm-project//mlir:Pass", "@llvm-project//mlir:Support", "@local_xla//xla:status_macros", - "@local_xla//xla/tsl/platform:status", ], ) diff --git a/tensorflow/compiler/mlir/tf2xla/api/v2/graph_to_tf_executor.cc b/tensorflow/compiler/mlir/tf2xla/api/v2/graph_to_tf_executor.cc index edf726134f66bd..cb48ff03def75d 100644 --- a/tensorflow/compiler/mlir/tf2xla/api/v2/graph_to_tf_executor.cc +++ b/tensorflow/compiler/mlir/tf2xla/api/v2/graph_to_tf_executor.cc @@ -34,6 +34,7 @@ limitations under the License. #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" #include "absl/container/inlined_vector.h" +#include "absl/log/check.h" #include "absl/log/log.h" #include "absl/status/status.h" #include "absl/strings/match.h" @@ -74,7 +75,6 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/ir/tf_dialect.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" -#include "tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_flags.h" #include "tensorflow/compiler/mlir/tensorflow/utils/convert_attr.h" #include "tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.h" #include "tensorflow/compiler/mlir/tensorflow/utils/convert_type.h" @@ -83,14 +83,15 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/utils/error_util.h" #include "tensorflow/compiler/mlir/tensorflow/utils/mangling_util.h" #include "tensorflow/compiler/mlir/tensorflow/utils/translate_utils.h" +#include "tensorflow/compiler/mlir/tf2xla/api/v2/mlir_roundtrip_flags.h" #include "tensorflow/compiler/mlir/tf2xla/internal/graph_to_tf_executor_util.h" #include "tensorflow/compiler/mlir/tf2xla/internal/node_order.h" #include "tensorflow/compiler/tf2xla/functionalize_control_flow.h" #include "tensorflow/compiler/tf2xla/functionalize_control_flow_util.h" #include "tensorflow/compiler/tf2xla/tf2xla_defs.h" #include "xla/status_macros.h" +#include "xla/tsl/platform/crash_analysis.h" #include "xla/tsl/platform/errors.h" -#include "xla/tsl/platform/status.h" #include "xla/tsl/platform/statusor.h" #include "tensorflow/core/common_runtime/function_body.h" #include "tensorflow/core/common_runtime/function_def_utils.h" @@ -120,11 +121,9 @@ limitations under the License. #include "tensorflow/core/graph/node_builder.h" #include "tensorflow/core/graph/tensor_id.h" #include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/platform/crash_analysis.h" #include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stack_frame.h" -#include "tensorflow/core/platform/stringpiece.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/protobuf/meta_graph.pb.h" #include "tensorflow/core/protobuf/saved_object_graph.pb.h" @@ -1889,7 +1888,7 @@ mlir::Operation* ImporterBase::CreateOperation( NameRangeMap input_ranges, output_ranges; // This will fail only if the OpDef is syntactically invalid. // TODO(jpienaar): Convert this CHECK into a properly propagated error. - TF_CHECK_OK( + CHECK_OK( NameRangesForNode(node, node.op_def(), &input_ranges, &output_ranges)); if (inner_op->hasTrait()) { // Add derived "operand_segment_sizes" attr to the created operation. diff --git a/tensorflow/compiler/mlir/tf2xla/internal/BUILD b/tensorflow/compiler/mlir/tf2xla/internal/BUILD index 4e7d058c3c6c6c..f292b270f855e8 100644 --- a/tensorflow/compiler/mlir/tf2xla/internal/BUILD +++ b/tensorflow/compiler/mlir/tf2xla/internal/BUILD @@ -419,10 +419,10 @@ tf_cc_test( "//tensorflow/core:protos_all_cc", "//tensorflow/core/framework:tensor_testutil", "//tensorflow/core/platform:enable_tf2_utils", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/status", "@com_google_absl//absl/strings", "@com_google_googletest//:gtest_main", "@local_xla//xla/tsl/lib/core:status_test_util", - "@local_xla//xla/tsl/platform:status", ], ) diff --git a/tensorflow/compiler/mlir/tf2xla/internal/graph_to_tf_executor_util_test.cc b/tensorflow/compiler/mlir/tf2xla/internal/graph_to_tf_executor_util_test.cc index 8ffe558029ad8b..cb332fe4fb997b 100644 --- a/tensorflow/compiler/mlir/tf2xla/internal/graph_to_tf_executor_util_test.cc +++ b/tensorflow/compiler/mlir/tf2xla/internal/graph_to_tf_executor_util_test.cc @@ -20,6 +20,7 @@ limitations under the License. #include #include +#include "absl/log/check.h" #include "absl/status/status.h" #include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" @@ -31,7 +32,6 @@ limitations under the License. #include "tensorflow/cc/ops/tpu_functional_ops.h" #include "tensorflow/cc/ops/tpu_replication_ops.h" #include "xla/tsl/lib/core/status_test_util.h" -#include "xla/tsl/platform/status.h" #include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/function.pb.h" #include "tensorflow/core/framework/node_def.pb.h" @@ -124,11 +124,11 @@ Node* FromNodeDef(absl::string_view name, absl::string_view node_type, } NodeDef node_def; - TF_CHECK_OK(builder.Finalize(&node_def)); + CHECK_OK(builder.Finalize(&node_def)); absl::Status s; Node* node = graph.AddNode(node_def, &s); - TF_CHECK_OK(s); + CHECK_OK(s); return node; } @@ -547,12 +547,12 @@ TEST(UnsupportedOpTest, builder.Attr("dtypes", DT_FLOAT); builder.Attr("shapes", 1); NodeDef node_def; - TF_CHECK_OK(builder.Finalize(&node_def)); + CHECK_OK(builder.Finalize(&node_def)); absl::Status s; Node* node_InfeedDequeueTuple = (*root.graph()).AddNode(node_def, &s); node_InfeedDequeueTuple->set_requested_device( "/device:TPU_REPLICATED_CORE:0"); - TF_CHECK_OK(s); + CHECK_OK(s); ASSERT_NE(node_InfeedDequeueTuple, nullptr); Graph graph(OpRegistry::Global()); diff --git a/tensorflow/compiler/mlir/tf2xla/transforms/legalization_op_config_test.cc b/tensorflow/compiler/mlir/tf2xla/transforms/legalization_op_config_test.cc index 7d00bc41716979..f40ada575d2f4a 100644 --- a/tensorflow/compiler/mlir/tf2xla/transforms/legalization_op_config_test.cc +++ b/tensorflow/compiler/mlir/tf2xla/transforms/legalization_op_config_test.cc @@ -84,7 +84,7 @@ TEST(LegalizationOpConfigTest, CountLoweringsSet) { // a new op, we should expect these to change too. EXPECT_EQ(mlir_lowering_count, 67); EXPECT_EQ(tf2xla_fallback_count, 333); - EXPECT_EQ(non_categorized_count, 434); + EXPECT_EQ(non_categorized_count, 435); } // Just a counter test to see which ops have duplicate lowerings. This isn't a diff --git a/tensorflow/compiler/mlir/tf2xla/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/tf2xla/transforms/legalize_tf.cc index 2ab0c3c619b292..e4fe30755c2eb7 100644 --- a/tensorflow/compiler/mlir/tf2xla/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/tf2xla/transforms/legalize_tf.cc @@ -229,8 +229,8 @@ static std::optional GetIntegerHLOAxisFromTFAxis(Value value, /// the shape of the input value. static stablehlo::ConvertOp CastValueToI64(Location loc, Value value, PatternRewriter *rewriter) { - return rewriter->create(loc, value, - rewriter->getIntegerType(64)); + return stablehlo::ConvertOp::create(*rewriter, loc, value, + rewriter->getIntegerType(64)); } // Creates an unpack op along the 0th dimension of the tensor. The `value` input @@ -242,9 +242,9 @@ static TF::UnpackOp UnpackTensorAlongZeroDim(Location loc, Value value, SmallVector unpacked_indices_type( num_outputs, tensorflow::GetTypeFromTFTensorShape({}, indices_type.getElementType())); - auto unpacked_indices = rewriter->create( - loc, unpacked_indices_type, value, - IntegerAttr::get(rewriter->getIntegerType(64), 0)); + auto unpacked_indices = + TF::UnpackOp::create(*rewriter, loc, unpacked_indices_type, value, + IntegerAttr::get(rewriter->getIntegerType(64), 0)); return unpacked_indices; } @@ -277,8 +277,8 @@ tensorflow::TensorShape ToTensorShape( static stablehlo::ConstantOp GetScalarLimitConstOfType(Type ty, Location loc, hlo::ScalarLimit limit, OpBuilder *builder) { - return builder->create( - loc, hlo::getScalarLimitOfType(ty, limit)); + return stablehlo::ConstantOp::create(*builder, loc, + hlo::getScalarLimitOfType(ty, limit)); } // Deprecated: This is maintained to aid in porting old code that is not yet @@ -396,12 +396,12 @@ static Value Broadcast1DToFeatureDim(Location loc, Value broadcast_to, OpBuilder &builder) { auto broadcast_dims = GetI64ArrayAttr({feature_dim}, &builder); auto to_type = mlir::cast(broadcast_to.getType()); - auto result_shape = builder.create(loc, broadcast_to); + auto result_shape = shape::ShapeOfOp::create(builder, loc, broadcast_to); auto result_extents_type = GetExtentsTensorTypeFor(to_type); - auto result_extents = builder.create( - loc, result_extents_type, result_shape); - return builder.create( - loc, to_type, broadcast_from, result_extents, broadcast_dims); + auto result_extents = shape::ToExtentTensorOp::create( + builder, loc, result_extents_type, result_shape); + return stablehlo::DynamicBroadcastInDimOp::create( + builder, loc, to_type, broadcast_from, result_extents, broadcast_dims); } // Broadcasts `input` to the shape of `broadcast_to` value following @@ -413,15 +413,15 @@ static Value Broadcast1DToFeatureDim(Location loc, Value broadcast_to, // supports unranked inputs in the lowering. static Value BroadcastToShapeOf(Location loc, Value input, Value broadcast_to, OpBuilder &builder) { - auto result_shape = builder.create(loc, broadcast_to); + auto result_shape = shape::ShapeOfOp::create(builder, loc, broadcast_to); auto to_type = mlir::cast(broadcast_to.getType()); auto result_extents_type = GetExtentsTensorTypeFor(to_type); - auto result_extents = builder.create( - loc, result_extents_type, result_shape); + auto result_extents = shape::ToExtentTensorOp::create( + builder, loc, result_extents_type, result_shape); int64_t rank = mlir::cast(input.getType()).getRank(); auto broadcast_dims = GetI64ArrayAttrForSeq(0, rank, &builder); - return builder.create( - loc, to_type, input, result_extents, broadcast_dims); + return stablehlo::DynamicBroadcastInDimOp::create( + builder, loc, to_type, input, result_extents, broadcast_dims); } // Builds a set of operations for applying reduction on the input value. A @@ -430,9 +430,9 @@ static Value ApplyReduction(Location loc, Value input, DenseIntElementsAttr reduce_dims, OpBuilder *builder) { auto reduce_dims_op = - builder->create(loc, reduce_dims); - return builder->create(loc, input, reduce_dims_op, - builder->getBoolAttr(false)); + stablehlo::ConstantOp::create(*builder, loc, reduce_dims); + return TF::SumOp::create(*builder, loc, input, reduce_dims_op, + builder->getBoolAttr(false)); } // Creates a stablehlo.rng_uniform op with `builder` to generate `num_elements` @@ -440,17 +440,16 @@ static Value ApplyReduction(Location loc, Value input, static stablehlo::RngOp CreateRngUniform32(Location loc, int num_elements, int lower_limit, int upper_limit, OpBuilder *builder) { - auto shape_tensor = builder->create( - loc, GetI64ElementsAttr({num_elements}, builder)); + auto shape_tensor = stablehlo::ConstantOp::create( + *builder, loc, GetI64ElementsAttr({num_elements}, builder)); - auto lower = builder->create( - loc, builder->getI32IntegerAttr(lower_limit)); - auto upper = builder->create( - loc, builder->getI32IntegerAttr(upper_limit)); + auto lower = stablehlo::ConstantOp::create( + *builder, loc, builder->getI32IntegerAttr(lower_limit)); + auto upper = stablehlo::ConstantOp::create( + *builder, loc, builder->getI32IntegerAttr(upper_limit)); - return builder->create( - loc, lower, upper, shape_tensor, - ::mlir::stablehlo::RngDistribution::UNIFORM); + return stablehlo::RngOp::create(*builder, loc, lower, upper, shape_tensor, + ::mlir::stablehlo::RngDistribution::UNIFORM); } using WhileBodyFnType = llvm::function_refcreate( - loc, builder->getI32IntegerAttr(0))); + init_values_with_loop_iv.push_back(stablehlo::ConstantOp::create( + *builder, loc, builder->getI32IntegerAttr(0))); init_values_with_loop_iv.append(init_values.begin(), init_values.end()); // Accumulate types of all the init values. @@ -498,8 +497,8 @@ static void CreateWhile32(Location loc, int num_iterations, init_types_with_loop_iv.push_back(init_value_with_loop_iv.getType()); // Create the while op. - auto while_op = builder->create( - loc, init_types_with_loop_iv, init_values_with_loop_iv); + auto while_op = stablehlo::WhileOp::create( + *builder, loc, init_types_with_loop_iv, init_values_with_loop_iv); auto ivs_count = init_types_with_loop_iv.size(); { @@ -513,12 +512,13 @@ static void CreateWhile32(Location loc, int num_iterations, // Get the loop induction variable and compare it against the upper limit. auto loop_iv = block->getArgument(0); - auto upper_limit = builder->create( - loc, builder->getI32IntegerAttr(num_iterations)); - Value compare = builder->create( - loc, loop_iv, upper_limit, stablehlo::ComparisonDirection::LT); + auto upper_limit = stablehlo::ConstantOp::create( + *builder, loc, builder->getI32IntegerAttr(num_iterations)); + Value compare = + stablehlo::CompareOp::create(*builder, loc, loop_iv, upper_limit, + stablehlo::ComparisonDirection::LT); - builder->create(loc, compare); + stablehlo::ReturnOp::create(*builder, loc, compare); } { @@ -540,15 +540,15 @@ static void CreateWhile32(Location loc, int num_iterations, &new_values, builder); // Increment the loop induction variable by one. - auto one = builder->create( - loc, builder->getI32IntegerAttr(1)); + auto one = stablehlo::ConstantOp::create(*builder, loc, + builder->getI32IntegerAttr(1)); auto scalar_broadcast_dims = builder->getDenseI64ArrayAttr({}); - auto plus_one = builder->create( - loc, block->getArgument(0), one, scalar_broadcast_dims); + auto plus_one = chlo::BroadcastAddOp::create( + *builder, loc, block->getArgument(0), one, scalar_broadcast_dims); // Prepend with the updated loop induction variable. new_values.insert(new_values.begin(), plus_one); - builder->create(loc, new_values); + stablehlo::ReturnOp::create(*builder, loc, new_values); } // TODO(jpienaar): Support multi-operand while op. @@ -748,20 +748,20 @@ static void BuildArgMinMaxReductionBody( ImplicitLocOpBuilder b(loc, *builder); Value compare_dt = - b.create(lhs_val, rhs_val, direction); + stablehlo::CompareOp::create(b, lhs_val, rhs_val, direction); Value selected_input = - b.create(input_type, compare_dt, lhs_val, rhs_val); + stablehlo::SelectOp::create(b, input_type, compare_dt, lhs_val, rhs_val); - Value compare_eq = b.create( - lhs_val, rhs_val, stablehlo::ComparisonDirection::EQ); - Value min_index = b.create(lhs_index, rhs_index); - Value min_val_index = b.create(index_type, compare_dt, - lhs_index, rhs_index); - Value selected_index = b.create( - index_type, compare_eq, min_index, min_val_index); + Value compare_eq = stablehlo::CompareOp::create( + b, lhs_val, rhs_val, stablehlo::ComparisonDirection::EQ); + Value min_index = stablehlo::MinOp::create(b, lhs_index, rhs_index); + Value min_val_index = stablehlo::SelectOp::create(b, index_type, compare_dt, + lhs_index, rhs_index); + Value selected_index = stablehlo::SelectOp::create(b, index_type, compare_eq, + min_index, min_val_index); Value return_values[] = {selected_input, selected_index}; - b.create(return_values); + stablehlo::ReturnOp::create(b, return_values); } //===----------------------------------------------------------------------===// @@ -898,9 +898,9 @@ static void BuildBodyWithCall(PatternRewriter &rewriter, const Location &loc, Block *block = rewriter.createBlock(body); auto inputs = func_ty.getInputs(); block->addArguments(inputs, SmallVector(inputs.size(), loc)); - mlir::func::CallOp call_op = rewriter.create( - loc, func, func_ty.getResults(), block->getArguments()); - rewriter.create(loc, call_op.getResults()); + mlir::func::CallOp call_op = mlir::func::CallOp::create( + rewriter, loc, func, func_ty.getResults(), block->getArguments()); + stablehlo::ReturnOp::create(rewriter, loc, call_op.getResults()); } //===----------------------------------------------------------------------===// @@ -955,9 +955,9 @@ class ConvertBiasAddOp : public OpRewritePattern { auto bias_broadcast = Broadcast1DToFeatureDim( loc, op.getValue(), op.getBias(), feature_dim, rewriter); Value add = - rewriter.create(loc, op.getValue(), bias_broadcast); + stablehlo::AddOp::create(rewriter, loc, op.getValue(), bias_broadcast); if (add.getType() != op.getType()) { - add = rewriter.create(loc, op.getType(), add); + add = tensor::CastOp::create(rewriter, loc, op.getType(), add); } rewriter.replaceOp(op, {add}); return success(); @@ -986,7 +986,7 @@ class ConvertConvDynamic : public OpRewritePattern { switch (padding_type) { case tensorflow::Padding::VALID: { auto zero = - rewriter.create(loc, shape_scalar_type, 0); + arith::ConstantIntOp::create(rewriter, loc, shape_scalar_type, 0); *padding_low = *padding_high = zero; break; } @@ -994,48 +994,49 @@ class ConvertConvDynamic : public OpRewritePattern { break; case tensorflow::Padding::SAME: { auto zero = - rewriter.create(loc, shape_scalar_type, 0); + arith::ConstantIntOp::create(rewriter, loc, shape_scalar_type, 0); auto one = - rewriter.create(loc, shape_scalar_type, 1); + arith::ConstantIntOp::create(rewriter, loc, shape_scalar_type, 1); auto two = - rewriter.create(loc, shape_scalar_type, 2); + arith::ConstantIntOp::create(rewriter, loc, shape_scalar_type, 2); // See also the parallel implementation in // GetWindowedOutputSizeFromDimsV2. effective_filter_size = (filter_size // - 1) * dilation_rate + 1 - Value stride_value = rewriter.create( - loc, shape_scalar_type, stride); - Value dilation_rate_value = rewriter.create( - loc, shape_scalar_type, dilation_rate); - Value effective_filter_size_op = rewriter.create( - loc, one, - rewriter.create( - loc, dilation_rate_value, - rewriter.create(loc, filter_size, one))); + Value stride_value = arith::ConstantIntOp::create( + rewriter, loc, shape_scalar_type, stride); + Value dilation_rate_value = arith::ConstantIntOp::create( + rewriter, loc, shape_scalar_type, dilation_rate); + Value effective_filter_size_op = arith::AddIOp::create( + rewriter, loc, one, + arith::MulIOp::create( + rewriter, loc, dilation_rate_value, + arith::SubIOp::create(rewriter, loc, filter_size, one))); // output_size = (input_size + stride - 1) / stride; - Value output_size = rewriter.create( - loc, - rewriter.create( - loc, input_size, - rewriter.create(loc, stride_value, one)), + Value output_size = arith::DivUIOp::create( + rewriter, loc, + arith::AddIOp::create( + rewriter, loc, input_size, + arith::SubIOp::create(rewriter, loc, stride_value, one)), stride_value); // std::max(int64{0}, (output_size - 1) * stride + // effective_filter_size - input_size); - Value padding_needed = rewriter.create( - loc, - rewriter.create( - loc, effective_filter_size_op, - rewriter.create( - loc, stride_value, - rewriter.create(loc, output_size, one))), + Value padding_needed = arith::SubIOp::create( + rewriter, loc, + arith::AddIOp::create( + rewriter, loc, effective_filter_size_op, + arith::MulIOp::create( + rewriter, loc, stride_value, + arith::SubIOp::create(rewriter, loc, output_size, one))), input_size); - Value cond = rewriter.create( - loc, arith::CmpIPredicate::sge, padding_needed, zero); - padding_needed = rewriter.create( - loc, padding_needed.getType(), cond, padding_needed, zero); + Value cond = mlir::arith::CmpIOp::create( + rewriter, loc, arith::CmpIPredicate::sge, padding_needed, zero); + padding_needed = mlir::arith::SelectOp::create( + rewriter, loc, padding_needed.getType(), cond, padding_needed, + zero); *padding_low = - rewriter.create(loc, padding_needed, two); + arith::DivUIOp::create(rewriter, loc, padding_needed, two); *padding_high = - rewriter.create(loc, padding_needed, *padding_low); + arith::SubIOp::create(rewriter, loc, padding_needed, *padding_low); break; } } @@ -1086,13 +1087,13 @@ class ConvertConvDynamic : public OpRewritePattern { auto shape_scalar_type = rewriter.getIntegerType(32); auto get_const = [&](int64_t val) { - return rewriter.create(loc, shape_scalar_type, - val); + return mlir::arith::ConstantIntOp::create(rewriter, loc, + shape_scalar_type, val); }; auto get_dim_value = [&](Value val, int64_t dim) { - Value dim_value = rewriter.create(loc, val, dim); - return rewriter.create(loc, shape_scalar_type, - dim_value); + Value dim_value = tensor::DimOp::create(rewriter, loc, val, dim); + return arith::IndexCastOp::create(rewriter, loc, shape_scalar_type, + dim_value); }; for (auto i : llvm::seq(0, num_spatial_dims)) { @@ -1149,8 +1150,8 @@ class ConvertConvDynamic : public OpRewritePattern { auto precision_config_attr = rewriter.getNamedAttr( "precision_config", GetPrecisionConfig(&rewriter)); - Value paddings_op = rewriter.create( - op.getLoc(), + Value paddings_op = tensor::FromElementsOp::create( + rewriter, op.getLoc(), tensorflow::GetTypeFromTFTensorShape(2 * num_spatial_dims, rewriter.getI32Type()), paddings); @@ -1166,8 +1167,8 @@ class ConvertConvDynamic : public OpRewritePattern { new_shape.push_back(1); new_shape.push_back(filter_shape[num_spatial_dims] * filter_shape[num_spatial_dims + 1]); - operands[1] = rewriter.create( - op.getLoc(), + operands[1] = stablehlo::ReshapeOp::create( + rewriter, op.getLoc(), tensorflow::GetTypeFromTFTensorShape(new_shape, filter_ty.getElementType()), operands[1]); @@ -1324,8 +1325,8 @@ class ConvertConvOp : public OpRewritePattern { new_shape.push_back(1); new_shape.push_back(filter_shape[num_spatial_dims] * filter_shape[num_spatial_dims + 1]); - operands[1] = rewriter.create( - op.getLoc(), + operands[1] = stablehlo::ReshapeOp::create( + rewriter, op.getLoc(), tensorflow::GetTypeFromTFTensorShape(new_shape, filter_ty.getElementType()), operands[1]); @@ -1373,35 +1374,35 @@ class ConvertPadOpDynamic : public OpRewritePattern { auto interior_attr = GetI64ElementsAttr(interior_values, &rewriter); Value interior_padding_tensor = - rewriter.create(loc, interior_attr); + stablehlo::ConstantOp::create(rewriter, loc, interior_attr); Type paddings_elem_ty = paddings_type.getElementType(); if (!paddings_elem_ty.isInteger(64)) { - interior_padding_tensor = rewriter.create( - loc, interior_padding_tensor, paddings_elem_ty); + interior_padding_tensor = stablehlo::ConvertOp::create( + rewriter, loc, interior_padding_tensor, paddings_elem_ty); } llvm::SmallVector transposed_shape = {2, input_rank}; auto transpose_attr = GetI64ArrayAttr({1, 0}, &rewriter); Value transposed_paddings = - rewriter.create(loc, paddings, transpose_attr); - Value reshaped_paddings = rewriter.create( - loc, - tensorflow::GetTypeFromTFTensorShape({input_rank * 2}, - paddings_elem_ty), - transposed_paddings); + stablehlo::TransposeOp::create(rewriter, loc, paddings, transpose_attr); + Value reshaped_paddings = + stablehlo::ReshapeOp::create(rewriter, loc, + tensorflow::GetTypeFromTFTensorShape( + {input_rank * 2}, paddings_elem_ty), + transposed_paddings); auto left_padding_start_attr = GetI64ArrayAttr({0}, &rewriter); auto left_padding_limit_attr = GetI64ArrayAttr({input_rank}, &rewriter); auto left_padding_stride_attr = GetI64ArrayAttr({1}, &rewriter); - Value left_padding_tensor = rewriter.create( - loc, reshaped_paddings, left_padding_start_attr, + Value left_padding_tensor = stablehlo::SliceOp::create( + rewriter, loc, reshaped_paddings, left_padding_start_attr, left_padding_limit_attr, left_padding_stride_attr); auto right_padding_start_attr = GetI64ArrayAttr({input_rank}, &rewriter); auto right_padding_limit_attr = GetI64ArrayAttr({2 * input_rank}, &rewriter); auto right_padding_stride_attr = GetI64ArrayAttr({1}, &rewriter); - Value right_padding_tensor = rewriter.create( - loc, reshaped_paddings, right_padding_start_attr, + Value right_padding_tensor = stablehlo::SliceOp::create( + rewriter, loc, reshaped_paddings, right_padding_start_attr, right_padding_limit_attr, right_padding_stride_attr); rewriter.replaceOpWithNewOp( @@ -1450,23 +1451,24 @@ class ConvertGatherNdOpDynamic : public OpRewritePattern { Value slice_sizes_value = nullptr; for (int64_t i = 0; i < params_rank; ++i) { if (i < num_index_dims) { - slice_sizes_vals.push_back(rewriter.create( - loc, rewriter.getIntegerAttr(indices_ty.getElementType(), 1))); + slice_sizes_vals.push_back(arith::ConstantOp::create( + rewriter, loc, + rewriter.getIntegerAttr(indices_ty.getElementType(), 1))); } else { int64_t dim_size = params_ty.getDimSize(i); if (dim_size != ShapedType::kDynamic) { - slice_sizes_vals.push_back(rewriter.create( - loc, + slice_sizes_vals.push_back(arith::ConstantOp::create( + rewriter, loc, rewriter.getIntegerAttr(indices_ty.getElementType(), dim_size))); } else { - slice_sizes_vals.push_back(rewriter.create( - loc, indices_ty.getElementType(), - rewriter.create(loc, params, i))); + slice_sizes_vals.push_back(arith::IndexCastOp::create( + rewriter, loc, indices_ty.getElementType(), + tensor::DimOp::create(rewriter, loc, params, i))); } } } slice_sizes_value = - rewriter.create(loc, slice_sizes_vals); + tensor::FromElementsOp::create(rewriter, loc, slice_sizes_vals); // collapsed_slice_dims SmallVector collapsed_slice_dims; @@ -1535,18 +1537,18 @@ class ConvertBF16FloorDivOp : public OpRewritePattern { auto out_type = op.getZ().getType(); - l = rewriter.create(op.getLoc(), l, - rewriter.getF32Type()); - r = rewriter.create(op.getLoc(), r, - rewriter.getF32Type()); + l = stablehlo::ConvertOp::create(rewriter, op.getLoc(), l, + rewriter.getF32Type()); + r = stablehlo::ConvertOp::create(rewriter, op.getLoc(), r, + rewriter.getF32Type()); - auto intermediate = rewriter.create( - op.getLoc(), + auto intermediate = TF::FloorDivOp::create( + rewriter, op.getLoc(), ChangeTensorElementType(&rewriter, out_type, rewriter.getF32Type()), l, r); - auto floor_op = rewriter.create(op.getLoc(), out_type, - intermediate); + auto floor_op = stablehlo::ConvertOp::create(rewriter, op.getLoc(), + out_type, intermediate); rewriter.replaceOp(op, floor_op.getResult()); return success(); } @@ -1615,24 +1617,26 @@ class ConvertRollOp : public OpRewritePattern { // offset = ((offset % axis_size) + axis_size) % axis_size ImplicitLocOpBuilder b(op.getLoc(), rewriter); Value offset = op.getShift(); - auto axis_size = b.create(b.getIntegerAttr( - getElementTypeOrSelf(offset.getType()), input_shape[axis])); - offset = b.create( - b.create( - b.create(offset, axis_size), axis_size), + auto axis_size = stablehlo::ConstantOp::create( + b, b.getIntegerAttr(getElementTypeOrSelf(offset.getType()), + input_shape[axis])); + offset = stablehlo::RemOp::create( + b, + stablehlo::AddOp::create( + b, stablehlo::RemOp::create(b, offset, axis_size), axis_size), axis_size); // Stack two copies of the dimension, then slice from the calculated // offset. This also works if shift is not constant. // DynamicSliceOp requires the sizes being integer, and we can get the // information from input shape. - auto concat = b.create( - ValueRange{op.getInput(), op.getInput()}, b.getI64IntegerAttr(axis)); - Value zero = b.create( - b.getIntegerAttr(getElementTypeOrSelf(offset.getType()), 0)); + auto concat = stablehlo::ConcatenateOp::create( + b, ValueRange{op.getInput(), op.getInput()}, b.getI64IntegerAttr(axis)); + Value zero = stablehlo::ConstantOp::create( + b, b.getIntegerAttr(getElementTypeOrSelf(offset.getType()), 0)); SmallVector slice_begin_indices(input_rank, zero); slice_begin_indices[axis] = - b.create(axis_size, offset); + stablehlo::SubtractOp::create(b, axis_size, offset); rewriter.replaceOpWithNewOp( op, input_ty, concat, slice_begin_indices, GetI64ArrayAttr(input_shape, &rewriter)); @@ -1656,10 +1660,10 @@ class ConvertLeakyReluOp : public OpRewritePattern { Value zeroVal = chlo::getConstantLike(rewriter, loc, 0.0, features); Value leakyActivationVal = - rewriter.create(loc, features, alphaVal); + stablehlo::MulOp::create(rewriter, loc, features, alphaVal); - Value compareGtZero = rewriter.create( - loc, features, zeroVal, stablehlo::ComparisonDirection::GT); + Value compareGtZero = stablehlo::CompareOp::create( + rewriter, loc, features, zeroVal, stablehlo::ComparisonDirection::GT); rewriter.replaceOpWithNewOp( op, compareGtZero, features, leakyActivationVal); @@ -1686,10 +1690,10 @@ class ConvertLeakyReluGradOp : public OpRewritePattern { Value zeroVal = chlo::getConstantLike(rewriter, loc, 0.0, features); Value leakyGradientVal = - rewriter.create(loc, gradients, alphaVal); + stablehlo::MulOp::create(rewriter, loc, gradients, alphaVal); - Value compareGtZero = rewriter.create( - loc, features, zeroVal, stablehlo::ComparisonDirection::GT); + Value compareGtZero = stablehlo::CompareOp::create( + rewriter, loc, features, zeroVal, stablehlo::ComparisonDirection::GT); rewriter.replaceOpWithNewOp( op, featureType, compareGtZero, gradients, leakyGradientVal); @@ -1733,29 +1737,30 @@ class ConvertDiagPartOp : public OpRewritePattern { new_size *= input_type.getDimSize(i); new_dims.push_back(input_type.getDimSize(i)); } - Value reshaped_input = rewriter.create( - op.getLoc(), + Value reshaped_input = stablehlo::ReshapeOp::create( + rewriter, op.getLoc(), tensorflow::GetTypeFromTFTensorShape({new_size, new_size}, input_type.getElementType()), op.getInput()); auto iota_type = tensorflow::GetTypeFromTFTensorShape( {new_size, new_size}, rewriter.getIntegerType(32)); - auto iota0 = rewriter.create( - op.getLoc(), iota_type, rewriter.getI64IntegerAttr(0)); - auto iota1 = rewriter.create( - op.getLoc(), iota_type, rewriter.getI64IntegerAttr(1)); - Value compare = rewriter.create( - op.getLoc(), iota0, iota1, stablehlo::ComparisonDirection::EQ); + auto iota0 = stablehlo::IotaOp::create(rewriter, op.getLoc(), iota_type, + rewriter.getI64IntegerAttr(0)); + auto iota1 = stablehlo::IotaOp::create(rewriter, op.getLoc(), iota_type, + rewriter.getI64IntegerAttr(1)); + Value compare = + stablehlo::CompareOp::create(rewriter, op.getLoc(), iota0, iota1, + stablehlo::ComparisonDirection::EQ); Value zero = GetScalarConstOfType(input_type.getElementType(), op.getLoc(), 0, &rewriter); - Value zero_matrix = rewriter.create( - op.getLoc(), reshaped_input.getType(), zero, + Value zero_matrix = stablehlo::BroadcastOp::create( + rewriter, op.getLoc(), reshaped_input.getType(), zero, GetI64ArrayAttr({new_size, new_size}, &rewriter)); - Value masked = rewriter.create( - op.getLoc(), reshaped_input.getType(), compare, reshaped_input, - zero_matrix); - auto reduce = rewriter.create( - op.getLoc(), masked, zero, GetI64ArrayAttr({0}, &rewriter), + Value masked = stablehlo::SelectOp::create( + rewriter, op.getLoc(), reshaped_input.getType(), compare, + reshaped_input, zero_matrix); + auto reduce = stablehlo::ReduceOp::create( + rewriter, op.getLoc(), masked, zero, GetI64ArrayAttr({0}, &rewriter), input_type.getElementType()); assert(!input_type.getElementType().isInteger(1) && "data type should not be i1"); @@ -1802,8 +1807,8 @@ class ConvertMatrixDiagPartV3Op stablehlo::BroadcastOp BroadcastConstant(Location loc, Shape shape, int32_t constant, int int_size, PatternRewriter &rewriter) const { - return rewriter.create( - loc, + return stablehlo::BroadcastOp::create( + rewriter, loc, tensorflow::GetTypeFromTFTensorShape(shape, rewriter.getIntegerType(int_size)), GetScalarConstOfType(rewriter.getIntegerType(int_size), loc, constant, @@ -1878,10 +1883,10 @@ class ConvertMatrixDiagPartV3Op RankedTensorType iota_type = tensorflow::GetTypeFromTFTensorShape( indices_shape, rewriter.getIntegerType(32)); - Value iotaM = rewriter.create( - loc, iota_type, rewriter.getI64IntegerAttr(1)); - Value iotaN = rewriter.create( - loc, iota_type, rewriter.getI64IntegerAttr(2)); + Value iotaM = stablehlo::IotaOp::create(rewriter, loc, iota_type, + rewriter.getI64IntegerAttr(1)); + Value iotaN = stablehlo::IotaOp::create(rewriter, loc, iota_type, + rewriter.getI64IntegerAttr(2)); // Boradcasted constants, of the same shape as iotaM and iotaN. Value b_zero = BroadcastConstant(loc, indices_shape, 0, 32, rewriter); @@ -1898,17 +1903,19 @@ class ConvertMatrixDiagPartV3Op // subtract m here. This means we start with the superdiagonals and // move downwards towards the subdiagonals. So the start indices will // be decreasing.) - Value d = rewriter.create(loc, b_k1, iotaM); - Value neg_d = rewriter.create(loc, d); + Value d = stablehlo::SubtractOp::create(rewriter, loc, b_k1, iotaM); + Value neg_d = stablehlo::NegOp::create(rewriter, loc, d); // diag_len_d = min(rows + min(d, 0), cols - max(d, 0)) // (Length of a diagonal for a given d. Same as max_diag_len for m = 0.) - Value diag_len_d = rewriter.create( - loc, - rewriter.create( - loc, b_rows, rewriter.create(loc, d, b_zero)), - rewriter.create( - loc, b_cols, rewriter.create(loc, d, b_zero))); + Value diag_len_d = stablehlo::MinOp::create( + rewriter, loc, + stablehlo::AddOp::create( + rewriter, loc, b_rows, + stablehlo::MinOp::create(rewriter, loc, d, b_zero)), + stablehlo::SubtractOp::create( + rewriter, loc, b_cols, + stablehlo::MaxOp::create(rewriter, loc, d, b_zero))); // offset is max_diag_len - diag_len_d if we're padding, 0 otherwise. Value cmp; @@ -1916,10 +1923,10 @@ class ConvertMatrixDiagPartV3Op cmp = b_true; } else if (superdiagonal_align == kRight) { // offset = d>=0 ? max_diag_len - diag_len_d : 0 - cmp = rewriter.create(loc, d, b_zero); + cmp = TF::GreaterEqualOp::create(rewriter, loc, d, b_zero); } else if (subdiagonal_align == kRight) { // offset = d<=0 ? max_diag_len - diag_len_d : 0 - cmp = rewriter.create(loc, d, b_zero); + cmp = TF::LessEqualOp::create(rewriter, loc, d, b_zero); } else { // offset = 0 cmp = b_false; @@ -1927,45 +1934,48 @@ class ConvertMatrixDiagPartV3Op // This offset shifts the diagonals to the "left" or "right", depending // on alignment. - Value offset = rewriter.create( - loc, b_zero.getType(), cmp, - rewriter.create(loc, b_max_diag_len, diag_len_d), + Value offset = stablehlo::SelectOp::create( + rewriter, loc, b_zero.getType(), cmp, + stablehlo::SubtractOp::create(rewriter, loc, b_max_diag_len, + diag_len_d), b_zero); // x = max(d, 0) - offset // y = max(-d, 0) - offset - Value x = rewriter.create( - loc, rewriter.create(loc, d, b_zero), offset); - Value y = rewriter.create( - loc, rewriter.create(loc, neg_d, b_zero), offset); + Value x = stablehlo::SubtractOp::create( + rewriter, loc, stablehlo::MaxOp::create(rewriter, loc, d, b_zero), + offset); + Value y = stablehlo::SubtractOp::create( + rewriter, loc, stablehlo::MaxOp::create(rewriter, loc, neg_d, b_zero), + offset); - Value n_plus_x = rewriter.create(loc, iotaN, x); - Value n_plus_y = rewriter.create(loc, iotaN, y); + Value n_plus_x = stablehlo::AddOp::create(rewriter, loc, iotaN, x); + Value n_plus_y = stablehlo::AddOp::create(rewriter, loc, iotaN, y); // GatherOp is happy about letting us index out of bounds values, but those // values will be undefined. So we mask them later. Set up the boolean // expression that tells us which entries, in the output shape, are out of // bounds and thus become the padding_value. - Value x_in_bounds = rewriter.create( - loc, - rewriter.create(loc, b_false.getType(), n_plus_x, - b_zero), - rewriter.create(loc, b_false.getType(), n_plus_x, b_cols)); - Value y_in_bounds = rewriter.create( - loc, - rewriter.create(loc, b_false.getType(), n_plus_y, - b_zero), - rewriter.create(loc, b_false.getType(), n_plus_y, b_rows)); - Value in_bounds = rewriter.create( - loc, + Value x_in_bounds = stablehlo::AndOp::create( + rewriter, loc, + TF::GreaterEqualOp::create(rewriter, loc, b_false.getType(), n_plus_x, + b_zero), + TF::LessOp::create(rewriter, loc, b_false.getType(), n_plus_x, b_cols)); + Value y_in_bounds = stablehlo::AndOp::create( + rewriter, loc, + TF::GreaterEqualOp::create(rewriter, loc, b_false.getType(), n_plus_y, + b_zero), + TF::LessOp::create(rewriter, loc, b_false.getType(), n_plus_y, b_rows)); + Value in_bounds = stablehlo::ReshapeOp::create( + rewriter, loc, tensorflow::GetTypeFromTFTensorShape(Shape({num_diags, max_diag_len}), rewriter.getIntegerType(1)), - rewriter.create(loc, x_in_bounds, y_in_bounds)); + stablehlo::AndOp::create(rewriter, loc, x_in_bounds, y_in_bounds)); // Now combine x and y into the index data structure needed for gather. Shape concat_shape({2, num_diags, max_diag_len}); - Value start_indices = rewriter.create( - loc, + Value start_indices = stablehlo::ConcatenateOp::create( + rewriter, loc, tensorflow::GetTypeFromTFTensorShape(concat_shape, rewriter.getIntegerType(32)), mlir::ValueRange({n_plus_y, n_plus_x}), @@ -2009,8 +2019,8 @@ class ConvertMatrixDiagPartV3Op /*operandBatchingDims=*/{}, /*startIndicesBatchingDims=*/{}, start_index_map, /*indexVectorDim=*/0); - Value gather = rewriter.create( - loc, op.getInput(), start_indices, dims_attr, + Value gather = stablehlo::GatherOp::create( + rewriter, loc, op.getInput(), start_indices, dims_attr, GetI64ArrayAttr(slice_sizes, &rewriter)); // We now need to broadcast the "in_bounds" boolean expression, as well as @@ -2019,22 +2029,24 @@ class ConvertMatrixDiagPartV3Op for (int i = 0; i < output_shape.size() - 2; i++) { broadcast_bounds.push_back(output_shape[i]); } - Value b_in_bounds = rewriter.create( - loc, + Value b_in_bounds = stablehlo::BroadcastOp::create( + rewriter, loc, tensorflow::GetTypeFromTFTensorShape(output_shape, rewriter.getIntegerType(1)), in_bounds, GetI64ArrayAttr(broadcast_bounds, &rewriter)); - Value b_padding = rewriter.create( - loc, op.getPaddingValue(), GetI64ArrayAttr(output_shape, &rewriter)); + Value b_padding = stablehlo::BroadcastOp::create( + rewriter, loc, op.getPaddingValue(), + GetI64ArrayAttr(output_shape, &rewriter)); // Replace all out-of-bounds values in the result with padding_value. - Value result = rewriter.create(loc, b_in_bounds, - gather, b_padding); + Value result = stablehlo::SelectOp::create(rewriter, loc, b_in_bounds, + gather, b_padding); if (num_diags == 1) { // matrix_diag_part folds away the 1-sized band dimension if we only // extract a single diagonal. - result = rewriter.create(loc, op.getType(), result); + result = + stablehlo::ReshapeOp::create(rewriter, loc, op.getType(), result); } rewriter.replaceOp(op, result); @@ -2057,9 +2069,10 @@ class ConvertEinsumOp : public OpRewritePattern { // creates a scalar constant 1.0 for first operand. if (op.getN() == 1) { equation_str = "," + equation_str; - inputs.push_back(rewriter.create( - op.getLoc(), hlo::getScalarOfType( - mlir::getElementTypeOrSelf(op.getOperand(0)), 1))); + inputs.push_back(stablehlo::ConstantOp::create( + rewriter, op.getLoc(), + hlo::getScalarOfType(mlir::getElementTypeOrSelf(op.getOperand(0)), + 1))); } // Insert remaining operands into inputs, TF op verifier requires there be // 0 or 1 operands. @@ -2129,8 +2142,8 @@ class ConvertFFTOp : public OpRewritePattern { // Last dim larger than expected_dim, slice the input if (input_shape.back() > expected_dim) { - reshaped = rewriter.create( - op.getLoc(), + reshaped = stablehlo::SliceOp::create( + rewriter, op.getLoc(), tensorflow::GetTypeFromTFTensorShape(expected_shape, input_ty.getElementType()), op.getInput(), GetI64ArrayAttr(begin_indices, &rewriter), @@ -2144,8 +2157,8 @@ class ConvertFFTOp : public OpRewritePattern { padding.push_back(expected_dim - input_shape.back()); Value zero = GetScalarConstOfType(input_ty.getElementType(), loc, 0, &rewriter); - reshaped = rewriter.create( - loc, + reshaped = stablehlo::PadOp::create( + rewriter, loc, tensorflow::GetTypeFromTFTensorShape(expected_shape, input_ty.getElementType()), op.getInput(), zero, GetI64ArrayAttr(no_padding, &rewriter), @@ -2193,8 +2206,8 @@ class ConvertFusedBatchNormGradBase // To support mixed precision, the statistics type, which maybe more // precise than the input types, are used for this op. Type kernel_type = mlir::cast(scale.getType()).getElementType(); - grad = rewriter.create(loc, grad, kernel_type); - act = rewriter.create(loc, act, kernel_type); + grad = stablehlo::ConvertOp::create(rewriter, loc, grad, kernel_type); + act = stablehlo::ConvertOp::create(rewriter, loc, act, kernel_type); tensorflow::TensorFormat data_format; if (!FormatFromString(op.getDataFormat().str(), &data_format)) @@ -2213,9 +2226,9 @@ class ConvertFusedBatchNormGradBase SmallVector operand_types = {act.getType(), feature_type, feature_type}; - auto training_op = rewriter.create( - loc, operand_types, act, scale, mean, var, grad, op.getEpsilon(), - feature_dim); + auto training_op = stablehlo::BatchNormGradOp::create( + rewriter, loc, operand_types, act, scale, mean, var, grad, + op.getEpsilon(), feature_dim); x_backprop = training_op.getResult(0); @@ -2234,52 +2247,55 @@ class ConvertFusedBatchNormGradBase // scratch1 = rsqrt(var + epsilon) RankedTensorType scalar_float = tensorflow::GetTypeFromTFTensorShape({}, kernel_type); - auto epsilon = rewriter.create( - loc, DenseFPElementsAttr::get(scalar_float, {op.getEpsilon()})); - auto add_op = rewriter.create( - loc, var, epsilon.getResult(), scalar_broadcast_dims); + auto epsilon = stablehlo::ConstantOp::create( + rewriter, loc, + DenseFPElementsAttr::get(scalar_float, {op.getEpsilon()})); + auto add_op = chlo::BroadcastAddOp::create( + rewriter, loc, var, epsilon.getResult(), scalar_broadcast_dims); - Value scratch1 = rewriter.create(loc, add_op); + Value scratch1 = stablehlo::RsqrtOp::create(rewriter, loc, add_op); // scratch2 = sum(y_backprop * (x - mean)) - auto sub_op = rewriter.create( - loc, act, + auto sub_op = stablehlo::SubtractOp::create( + rewriter, loc, act, Broadcast1DToFeatureDim(loc, act, mean, feature_dim, rewriter)); - auto weighted_grad = rewriter.create(loc, grad, sub_op); + auto weighted_grad = + stablehlo::MulOp::create(rewriter, loc, grad, sub_op); Value scratch2 = ApplyReduction(loc, weighted_grad, reduce_dims, &rewriter); // x_backprop = y_backprop * (scale * scratch1) auto scaled_grad = - rewriter.create(loc, op.getScale(), scratch1); - x_backprop = rewriter.create( - loc, grad, + stablehlo::MulOp::create(rewriter, loc, op.getScale(), scratch1); + x_backprop = stablehlo::MulOp::create( + rewriter, loc, grad, Broadcast1DToFeatureDim(loc, act, scaled_grad, feature_dim, rewriter)); // scale_backprop = scratch2 * scratch1 scale_backprop = - rewriter.create(loc, scratch1, scratch2); + stablehlo::MulOp::create(rewriter, loc, scratch1, scratch2); // offset_backprop = sum(y_backprop) offset_backprop = ApplyReduction(loc, grad, reduce_dims, &rewriter); } x_backprop = - rewriter.create(loc, x_backprop, act_ele_type); + stablehlo::ConvertOp::create(rewriter, loc, x_backprop, act_ele_type); Value last_val[2]; if (op.getResult(3).use_empty() && op.getResult(4).use_empty()) { // It doesn't matter what values we provide for the last 2 results. last_val[0] = last_val[1] = op.getX(); } else { - auto const_val = rewriter.create( - op.getLoc(), DenseElementsAttr::get( - tensorflow::GetTypeFromTFTensorShape( - {0}, getElementTypeOrSelf(op.getResult(3))), - 0.0)); + auto const_val = stablehlo::ConstantOp::create( + rewriter, op.getLoc(), + DenseElementsAttr::get( + tensorflow::GetTypeFromTFTensorShape( + {0}, getElementTypeOrSelf(op.getResult(3))), + 0.0)); auto maybe_cast = [&](Value val, Type t) -> Value { if (val.getType() == t) return val; - return rewriter.create(op.getLoc(), t, val); + return tensor::CastOp::create(rewriter, op.getLoc(), t, val); }; last_val[0] = maybe_cast(const_val, op.getResult(3).getType()); last_val[1] = maybe_cast(const_val, op.getResult(4).getType()); @@ -2333,8 +2349,8 @@ class ConvertFusedBatchNormBase : public OpRewritePattern { // TODO(b/69928690): Support mixed precision in the XLA batch // normalization operators. As a workaround, create a new x with the same // element type as scale (which may be more precise than the input type). - Value bn_train_input = rewriter.create( - op.getLoc(), op.getX(), scale_element_type); + Value bn_train_input = stablehlo::ConvertOp::create( + rewriter, op.getLoc(), op.getX(), scale_element_type); TensorType bn_train_input_type_tensor = mlir::cast(bn_train_input.getType()); @@ -2351,8 +2367,8 @@ class ConvertFusedBatchNormBase : public OpRewritePattern { // batch_mean, and batch_var. SmallVector operand_types = {bn_train_input_type_tensor, mean_var_type, mean_var_type}; - auto bn_train_op = rewriter.create( - op.getLoc(), operand_types, bn_train_input, op.getScale(), + auto bn_train_op = stablehlo::BatchNormTrainingOp::create( + rewriter, op.getLoc(), operand_types, bn_train_input, op.getScale(), op.getOffset(), op.getEpsilon(), feature_dim.getInt()); // HLO op outputs a tuple of tensors. Extract those results. Value y_out = bn_train_op.getResult(0); @@ -2368,48 +2384,53 @@ class ConvertFusedBatchNormBase : public OpRewritePattern { int sample_size_minus_one = std::max(1, sample_size - 1); double factor = static_cast(sample_size) / static_cast(sample_size_minus_one); - auto factor_const_op = rewriter.create( - op.getLoc(), rewriter.getFloatAttr(scale_element_type, factor)); + auto factor_const_op = stablehlo::ConstantOp::create( + rewriter, op.getLoc(), + rewriter.getFloatAttr(scale_element_type, factor)); - Value corrected_variance = rewriter.create( - op.getLoc(), batch_variance.getType(), batch_variance, + Value corrected_variance = chlo::BroadcastMulOp::create( + rewriter, op.getLoc(), batch_variance.getType(), batch_variance, factor_const_op, /*broadcast_dimensions=*/DenseI64ArrayAttr()); // Convert back to input type to stay aligned with expected output type // for TF op. - y_out = rewriter.create(op.getLoc(), y_out, - input_element_type); + y_out = stablehlo::ConvertOp::create(rewriter, op.getLoc(), y_out, + input_element_type); float exponential_avg_factor = op.getExponentialAvgFactor().convertToFloat(); if (exponential_avg_factor != 1.0f) { - auto alpha = rewriter.create( - op.getLoc(), rewriter.getFloatAttr(mean_element_type, - 1.0f - exponential_avg_factor)); - auto beta = rewriter.create( - op.getLoc(), + auto alpha = stablehlo::ConstantOp::create( + rewriter, op.getLoc(), + rewriter.getFloatAttr(mean_element_type, + 1.0f - exponential_avg_factor)); + auto beta = stablehlo::ConstantOp::create( + rewriter, op.getLoc(), rewriter.getFloatAttr(mean_element_type, exponential_avg_factor)); // new_running_mean = alpha * old_mean + beta * batch_mean. - auto alpha_mul_old_mean = rewriter.create( - op.getLoc(), op.getMean().getType(), alpha, op.getMean(), + auto alpha_mul_old_mean = chlo::BroadcastMulOp::create( + rewriter, op.getLoc(), op.getMean().getType(), alpha, op.getMean(), /*broadcast_dimensions=*/DenseI64ArrayAttr()); - auto beta_mul_batch_mean = rewriter.create( - op.getLoc(), batch_mean.getType(), beta, batch_mean, + auto beta_mul_batch_mean = chlo::BroadcastMulOp::create( + rewriter, op.getLoc(), batch_mean.getType(), beta, batch_mean, /*broadcast_dimensions=*/DenseI64ArrayAttr()); - batch_mean = rewriter.create( - op.getLoc(), alpha_mul_old_mean, beta_mul_batch_mean, + batch_mean = chlo::BroadcastAddOp::create( + rewriter, op.getLoc(), alpha_mul_old_mean, beta_mul_batch_mean, /*broadcast_dimensions=*/DenseI64ArrayAttr()); // new_running_variance = alpha * old_variance + beta * batch_variance. - auto alpha_mul_old_variance = rewriter.create( - op.getLoc(), op.getVariance().getType(), alpha, op.getVariance(), + auto alpha_mul_old_variance = chlo::BroadcastMulOp::create( + rewriter, op.getLoc(), op.getVariance().getType(), alpha, + op.getVariance(), /*broadcast_dimensions=*/DenseI64ArrayAttr()); - auto beta_mul_batch_variance = rewriter.create( - op.getLoc(), corrected_variance.getType(), beta, corrected_variance, + auto beta_mul_batch_variance = chlo::BroadcastMulOp::create( + rewriter, op.getLoc(), corrected_variance.getType(), beta, + corrected_variance, /*broadcast_dimensions=*/DenseI64ArrayAttr()); - corrected_variance = rewriter.create( - op.getLoc(), alpha_mul_old_variance, beta_mul_batch_variance, + corrected_variance = chlo::BroadcastAddOp::create( + rewriter, op.getLoc(), alpha_mul_old_variance, + beta_mul_batch_variance, /*broadcast_dimensions=*/DenseI64ArrayAttr()); } @@ -2433,11 +2454,12 @@ class ConvertFusedBatchNormBase : public OpRewritePattern { : 0; auto const_attr_type = tensorflow::GetTypeFromTFTensorShape( {num_elements}, getElementTypeOrSelf(reserve_space_3_type)); - Value dummy_const = rewriter.create( - op.getLoc(), DenseElementsAttr::get(const_attr_type, 0.0)); + Value dummy_const = stablehlo::ConstantOp::create( + rewriter, op.getLoc(), + DenseElementsAttr::get(const_attr_type, 0.0)); if (const_attr_type != reserve_space_3_type) - dummy_const = rewriter.create( - op.getLoc(), reserve_space_3_type, dummy_const); + dummy_const = tensor::CastOp::create( + rewriter, op.getLoc(), reserve_space_3_type, dummy_const); rewriter.replaceOp(op, {y_out, /*batch_mean=*/batch_mean, /*batch_variance=*/corrected_variance, /*reserve_space_1=*/reserve_space_1, @@ -2445,16 +2467,16 @@ class ConvertFusedBatchNormBase : public OpRewritePattern { /*reserve_space_3=*/dummy_const}); } } else { // Inference case. - auto bn_train_op = rewriter.create( - op.getLoc(), + auto bn_train_op = stablehlo::BatchNormInferenceOp::create( + rewriter, op.getLoc(), /*result_type=*/bn_train_input_type_tensor, bn_train_input, op.getScale(), op.getOffset(), op.getMean(), op.getVariance(), op.getEpsilon(), feature_dim.getInt()); // Convert back to input type to stay aligned with expected output type // for TF op. - auto y_out = rewriter.create( - op.getLoc(), bn_train_op, input_element_type); + auto y_out = stablehlo::ConvertOp::create( + rewriter, op.getLoc(), bn_train_op, input_element_type); // The mean, variance, and reserved space outputs of the batch norm op are // not used for inference. It doesn't matter what values we provide for @@ -2477,11 +2499,12 @@ class ConvertFusedBatchNormBase : public OpRewritePattern { : 0; auto const_attr_type = tensorflow::GetTypeFromTFTensorShape( {num_elements}, getElementTypeOrSelf(reserve_space_3_type)); - Value dummy_const = rewriter.create( - op.getLoc(), DenseElementsAttr::get(const_attr_type, 0.0)); + Value dummy_const = stablehlo::ConstantOp::create( + rewriter, op.getLoc(), + DenseElementsAttr::get(const_attr_type, 0.0)); if (const_attr_type != reserve_space_3_type) - dummy_const = rewriter.create( - op.getLoc(), reserve_space_3_type, dummy_const); + dummy_const = tensor::CastOp::create( + rewriter, op.getLoc(), reserve_space_3_type, dummy_const); rewriter.replaceOp(op, {/*y=*/y_out, /*batch_mean=*/op.getMean(), /*batch_variance=*/op.getVariance(), @@ -2580,8 +2603,8 @@ Operation *AvgPoolDivideByCount( Value divisor = GetScalarConstOfType(element_type, loc, window_count, &rewriter); auto scalar_broadcast_dims = rewriter.getDenseI64ArrayAttr({}); - result = rewriter.create( - loc, pooled_type, pooled, divisor, scalar_broadcast_dims); + result = chlo::BroadcastDivOp::create(rewriter, loc, pooled_type, pooled, + divisor, scalar_broadcast_dims); } else { assert(op.getPadding() == "SAME"); // For SAME padding, only original entries that contributed to a window @@ -2589,7 +2612,7 @@ Operation *AvgPoolDivideByCount( // Build all-ones tensor of same shape as the original input. ElementsAttr splat = hlo::getSplat(&rewriter, orig_input_type, 1); - auto all_ones_tensor = rewriter.create(loc, splat); + auto all_ones_tensor = stablehlo::ConstantOp::create(rewriter, loc, splat); // Get padding for the input. DenseIntElementsAttr input_padding_attr = @@ -2599,8 +2622,8 @@ Operation *AvgPoolDivideByCount( // Count the 1's in each window, using the same padding as for the input, // which gives us the window counts by which `pooled` needs to be divided. - auto divisor = rewriter.create( - loc, pooled_type, + auto divisor = stablehlo::ReduceWindowOp::create( + rewriter, loc, pooled_type, /*operand=*/all_ones_tensor, /*init_value=*/zero, /*window_dimensions=*/ @@ -2614,8 +2637,8 @@ Operation *AvgPoolDivideByCount( &rewriter); // Divide `pooled` by window counts. - result = rewriter.create(loc, pooled_type, pooled, - divisor.getResult(0)); + result = stablehlo::DivOp::create(rewriter, loc, pooled_type, pooled, + divisor.getResult(0)); } return result; } @@ -2651,8 +2674,8 @@ class ConvertAvgPoolOp : public OpRewritePattern { // Convert if we need enlarge the element type's bitwidth. if (input_element_type != sum_element_type) - input_value = rewriter.create( - op.getLoc(), input_value, sum_element_type); + input_value = stablehlo::ConvertOp::create(rewriter, op.getLoc(), + input_value, sum_element_type); // Create the ReduceWindow op. Value init = @@ -2660,8 +2683,8 @@ class ConvertAvgPoolOp : public OpRewritePattern { DenseIntElementsAttr paddings_attr = GetReduceWindowPaddingAsAttr( input_type.getShape(), op.getKsize(), op.getStrides(), op.getPadding(), &rewriter); - auto reduce = rewriter.create( - op.getLoc(), result_type, input_value, init, + auto reduce = stablehlo::ReduceWindowOp::create( + rewriter, op.getLoc(), result_type, input_value, init, ToDenseI64ArrayAttr(GetI64ElementsAttr(op.getKsize()), &rewriter), ToDenseI64ArrayAttr(GetI64ElementsAttr(op.getStrides()), &rewriter), /*base_dilations=*/DenseI64ArrayAttr(), @@ -2683,8 +2706,8 @@ class ConvertAvgPoolOp : public OpRewritePattern { // Convert back if we enlarged the element type's bitwidth. Value result = result_op->getOpResult(0); if (input_element_type != sum_element_type) - result = rewriter.create(op.getLoc(), result, - input_element_type); + result = stablehlo::ConvertOp::create(rewriter, op.getLoc(), result, + input_element_type); rewriter.replaceOp(op, result); return success(); @@ -2825,8 +2848,9 @@ class ConvertAvgPoolGradOp : public OpRewritePattern { out_grad_shape[dim] = low_padding[dim] + high_padding[dim] + (out_grad_shape[dim] - 1) * strides[dim] + 1; } - Value reduce_window_input = rewriter.create( - loc, tensorflow::GetTypeFromTFTensorShape(out_grad_shape, element_type), + Value reduce_window_input = stablehlo::PadOp::create( + rewriter, loc, + tensorflow::GetTypeFromTFTensorShape(out_grad_shape, element_type), /*operand=*/out_grad_divided->getOpResult(0), /*padding_value=*/zero, /*edge_padding_low=*/GetI64ArrayAttr(low_padding, &rewriter), @@ -2839,13 +2863,13 @@ class ConvertAvgPoolGradOp : public OpRewritePattern { Type sum_element_type = GetSumAccumulationType(element_type); if (element_type != sum_element_type) { // Convert to appropriate sum accumulation type to avoid precision loss. - reduce_window_input = rewriter.create( - loc, reduce_window_input, sum_element_type); + reduce_window_input = stablehlo::ConvertOp::create( + rewriter, loc, reduce_window_input, sum_element_type); zero = GetScalarConstOfType(sum_element_type, loc, 0, &rewriter); } auto ones = GetI64ArrayAttr(DimVector(num_dims, 1), &rewriter); - auto reduce_window_op = rewriter.create( - loc, + auto reduce_window_op = stablehlo::ReduceWindowOp::create( + rewriter, loc, tensorflow::GetTypeFromTFTensorShape(orig_input_shape, sum_element_type), /*operand=*/reduce_window_input, @@ -2862,8 +2886,8 @@ class ConvertAvgPoolGradOp : public OpRewritePattern { if (element_type != sum_element_type) { // Convert back to original element type. - result = rewriter.create(op.getLoc(), result, - element_type); + result = stablehlo::ConvertOp::create(rewriter, op.getLoc(), result, + element_type); } rewriter.replaceOp(op, {result}); return success(); @@ -2909,8 +2933,8 @@ class ConvertMaxPoolOp : public OpRewritePattern { DenseIntElementsAttr paddings_attr = GetReduceWindowPaddingAsAttr( input_ty.getShape(), op.getKsize(), op.getStrides(), op.getPadding(), &rewriter); - auto reduce = rewriter.create( - loc, op.getType(), op.getInput(), init, + auto reduce = stablehlo::ReduceWindowOp::create( + rewriter, loc, op.getType(), op.getInput(), init, ToDenseI64ArrayAttr(GetI64ElementsAttr(op.getKsize()), &rewriter), ToDenseI64ArrayAttr(GetI64ElementsAttr(op.getStrides()), &rewriter), /*base_dilations=*/DenseI64ArrayAttr(), @@ -2958,7 +2982,7 @@ class ConvertSelectOp : public OpRewritePattern { bool needs_broadcast = cond_type.getRank() == 1 && then_type.getRank() != 1; Value then_shape_split = then_shape; if (needs_broadcast) { - Value const_one = b.create(1); + Value const_one = arith::ConstantIndexOp::create(b, 1); Type extent_first = shape::getExtentTensorType(b.getContext(), 1); Type extent_second = shape::getExtentTensorType(b.getContext(), then_type.getRank() - 1); @@ -2978,7 +3002,7 @@ class ConvertSelectOp : public OpRewritePattern { } auto result_type = mlir::cast(op.getResult().getType()); auto assuming_op = - b.create(ArrayRef{result_type}, assumption); + shape::AssumingOp::create(b, ArrayRef{result_type}, assumption); OpBuilder::InsertionGuard guard(b); b.createBlock(&assuming_op.getDoRegion()); @@ -2986,17 +3010,18 @@ class ConvertSelectOp : public OpRewritePattern { // Broadcast the cond if necessary. Value cond = op.getCondition(); if (needs_broadcast) { - Value result_extents = b.create( - GetExtentsTensorTypeFor(result_type), then_shape); - cond = b.create( + Value result_extents = shape::ToExtentTensorOp::create( + b, GetExtentsTensorTypeFor(result_type), then_shape); + cond = stablehlo::DynamicBroadcastInDimOp::create( + b, tensorflow::GetTypeFromTFTensorShape(result_type.getShape(), b.getI1Type()), cond, result_extents, GetI64ArrayAttrForSeq(0, cond_type.getRank(), &b)); } - Value select = b.create( - result_type, cond, op.getThenValue(), op.getElseValue()); - b.create(select); + Value select = stablehlo::SelectOp::create( + b, result_type, cond, op.getThenValue(), op.getElseValue()); + shape::AssumingYieldOp::create(b, select); rewriter.replaceOp(op, {assuming_op.getResult(0)}); return success(); } @@ -3034,57 +3059,58 @@ class ConvertSliceOpDynamic : public OpRewritePattern { int rank = begin_type.getDimSize(0); auto shape_scalar_type = begin_type.getElementType(); - Value one = rewriter.create(loc, 1); + Value one = arith::ConstantIndexOp::create(rewriter, loc, 1); SmallVector stride_values(rank, one); SmallVector end_values; SmallVector begin_values; end_values.reserve(rank); for (int i = 0; i < rank; ++i) { SmallVector indices; - indices.push_back(rewriter.create(loc, i)); + indices.push_back(arith::ConstantIndexOp::create(rewriter, loc, i)); auto begin_value = - rewriter.create(loc, begin_indices, indices); - auto size_value = rewriter.create(loc, sizes, indices); - Value minus_one = rewriter.create( - loc, shape_scalar_type, - rewriter.create(loc, -1)); - auto is_minus_one = rewriter.create( - loc, arith::CmpIPredicate::eq, size_value, minus_one); + tensor::ExtractOp::create(rewriter, loc, begin_indices, indices); + auto size_value = + tensor::ExtractOp::create(rewriter, loc, sizes, indices); + Value minus_one = arith::IndexCastOp::create( + rewriter, loc, shape_scalar_type, + arith::ConstantIndexOp::create(rewriter, loc, -1)); + auto is_minus_one = arith::CmpIOp::create( + rewriter, loc, arith::CmpIPredicate::eq, size_value, minus_one); Value end_value = - rewriter.create(loc, begin_value, size_value); - auto dim_value = rewriter.create( - loc, shape_scalar_type, - rewriter.create(loc, input, i)); - end_value = rewriter.create(loc, is_minus_one, - dim_value, end_value); - auto end_value_casted = rewriter.create( - loc, rewriter.getIndexType(), end_value); + arith::AddIOp::create(rewriter, loc, begin_value, size_value); + auto dim_value = arith::IndexCastOp::create( + rewriter, loc, shape_scalar_type, + tensor::DimOp::create(rewriter, loc, input, i)); + end_value = mlir::arith::SelectOp::create(rewriter, loc, is_minus_one, + dim_value, end_value); + auto end_value_casted = arith::IndexCastOp::create( + rewriter, loc, rewriter.getIndexType(), end_value); end_values.push_back(end_value_casted); - auto begin_value_casted = rewriter.create( - loc, rewriter.getIndexType(), begin_value); + auto begin_value_casted = arith::IndexCastOp::create( + rewriter, loc, rewriter.getIndexType(), begin_value); begin_values.push_back(begin_value_casted); } auto index_ty = rewriter.getIndexType(); - auto start_indices = rewriter.create( - loc, + auto start_indices = tensor::FromElementsOp::create( + rewriter, loc, tensorflow::GetTypeFromTFTensorShape( {static_cast(begin_values.size())}, index_ty), begin_values); - auto end_indices = rewriter.create( - loc, + auto end_indices = tensor::FromElementsOp::create( + rewriter, loc, tensorflow::GetTypeFromTFTensorShape( {static_cast(end_values.size())}, index_ty), end_values); - auto stride_indices = rewriter.create( - loc, + auto stride_indices = tensor::FromElementsOp::create( + rewriter, loc, tensorflow::GetTypeFromTFTensorShape( {static_cast(stride_values.size())}, index_ty), stride_values); - auto d_slice = rewriter.create( - loc, op.getOperation()->getResult(0).getType(), input, start_indices, - end_indices, stride_indices); + auto d_slice = stablehlo::RealDynamicSliceOp::create( + rewriter, loc, op.getOperation()->getResult(0).getType(), input, + start_indices, end_indices, stride_indices); rewriter.replaceOp(op, d_slice.getOperation()->getResults()); return success(); } @@ -3110,15 +3136,15 @@ static void BroadcastBatchMatMulV2Operands(Value lhs, Value rhs, Location loc, // TODO(silvasean): Reduce duplication across reified shape calculations and // the static computation of output types needed to create ops. - Value lhs_shape = rewriter->create(loc, lhs); - Value rhs_shape = rewriter->create(loc, rhs); + Value lhs_shape = shape::ShapeOfOp::create(*rewriter, loc, lhs); + Value rhs_shape = shape::ShapeOfOp::create(*rewriter, loc, rhs); Value const_neg2 = - rewriter->create(loc, rewriter->getIndexAttr(-2)); + arith::ConstantOp::create(*rewriter, loc, rewriter->getIndexAttr(-2)); auto shape_type = shape::ShapeType::get(rewriter->getContext()); - auto lhs_splitted = rewriter->create( - loc, TypeRange{shape_type, shape_type}, lhs_shape, const_neg2); - auto rhs_splitted = rewriter->create( - loc, TypeRange{shape_type, shape_type}, rhs_shape, const_neg2); + auto lhs_splitted = shape::SplitAtOp::create( + *rewriter, loc, TypeRange{shape_type, shape_type}, lhs_shape, const_neg2); + auto rhs_splitted = shape::SplitAtOp::create( + *rewriter, loc, TypeRange{shape_type, shape_type}, rhs_shape, const_neg2); auto lhs_type = mlir::cast(lhs.getType()); auto rhs_type = mlir::cast(rhs.getType()); // The last two dimensions are the matrix row/col dimensions. Don't broadcast @@ -3127,9 +3153,10 @@ static void BroadcastBatchMatMulV2Operands(Value lhs, Value rhs, Location loc, mlir::OpTrait::util::getBroadcastedShape( lhs_type.getShape().drop_back(2), rhs_type.getShape().drop_back(2), result_batch_shape_compile_time_extents); - auto result_batch_shape = rewriter->create( - loc, shape_type, lhs_splitted.getHead(), rhs_splitted.getHead(), - /*error=*/nullptr); + auto result_batch_shape = + shape::BroadcastOp::create(*rewriter, loc, shape_type, + lhs_splitted.getHead(), rhs_splitted.getHead(), + /*error=*/nullptr); // Lambda which handles the broadcasting of one side to the common // leading-batch dimensions. auto broadcast_one_side = [&](Value side, RankedTensorType type, @@ -3139,16 +3166,16 @@ static void BroadcastBatchMatMulV2Operands(Value lhs, Value rhs, Location loc, result_shape.append(matrix_dims.begin(), matrix_dims.end()); auto result_type = tensorflow::GetTypeFromTFTensorShape( result_shape, type.getElementType()); - auto shape = rewriter->create( - loc, shape_type, result_batch_shape, tail_shape); - auto shape_tensor = rewriter->create( - loc, + auto shape = shape::ConcatOp::create(*rewriter, loc, shape_type, + result_batch_shape, tail_shape); + auto shape_tensor = shape::ToExtentTensorOp::create( + *rewriter, loc, tensorflow::GetTypeFromTFTensorShape( {static_cast(result_shape.size())}, rewriter->getIndexType()), shape); - *out_side = rewriter->create(loc, result_type, side, - shape_tensor); + *out_side = TF::BroadcastToOp::create(*rewriter, loc, result_type, side, + shape_tensor); }; broadcast_one_side(lhs, lhs_type, lhs_splitted.getTail(), out_lhs); broadcast_one_side(rhs, rhs_type, rhs_splitted.getTail(), out_rhs); @@ -3177,10 +3204,10 @@ class ConvertBatchMatMulV2Op : public OpRewritePattern { auto rhs_type = mlir::dyn_cast(rhs.getType()); if (!lhs_type || !rhs_type) return failure(); if (mlir::isa(lhs_type.getElementType()) && op.getAdjX()) { - lhs = rewriter.create(op.getLoc(), lhs_type, lhs); + lhs = TF::ConjOp::create(rewriter, op.getLoc(), lhs_type, lhs); } if (mlir::isa(rhs_type.getElementType()) && op.getAdjY()) { - rhs = rewriter.create(op.getLoc(), rhs_type, rhs); + rhs = TF::ConjOp::create(rewriter, op.getLoc(), rhs_type, rhs); } // Broadcast both operands. @@ -3288,8 +3315,8 @@ class ConvertSplitOp : public OpRewritePattern { for (int i = 0; i < num_splits; ++i) { begin_indices[dim_index] = i * slice_size; end_indices[dim_index] = (i + 1) * slice_size; - slices.push_back(rewriter.create( - op.getLoc(), slice_type, op.getValue(), + slices.push_back(stablehlo::SliceOp::create( + rewriter, op.getLoc(), slice_type, op.getValue(), GetI64ArrayAttr(begin_indices, &rewriter), GetI64ArrayAttr(end_indices, &rewriter), GetI64ArrayAttr(strides, &rewriter))); @@ -3332,23 +3359,23 @@ class ConvertSplitOpDynamic : public OpRewritePattern { if (dim_index < 0) dim_index += input_rank; Value input_dim_size = - rewriter.create(loc, input, dim_index); + tensor::DimOp::create(rewriter, loc, input, dim_index); // Calculate the dimension size for each slice along the split dimension. int num_splits = op.getNumResults(); - Value num_splits_value = rewriter.create( - loc, rewriter.getIndexAttr(num_splits)); + Value num_splits_value = arith::ConstantOp::create( + rewriter, loc, rewriter.getIndexAttr(num_splits)); Value slice_size = - rewriter.create(loc, input_dim_size, num_splits_value); + arith::DivSIOp::create(rewriter, loc, input_dim_size, num_splits_value); - Value zero = rewriter.create(loc, 0); - Value one = rewriter.create(loc, 1); + Value zero = arith::ConstantIndexOp::create(rewriter, loc, 0); + Value one = arith::ConstantIndexOp::create(rewriter, loc, 1); SmallVector begin_indices(input_rank, zero); SmallVector end_indices; end_indices.reserve(input_rank); SmallVector strides(input_rank, one); for (int i = 0; i < input_rank; ++i) { - end_indices.push_back(rewriter.create(loc, input, i)); + end_indices.push_back(tensor::DimOp::create(rewriter, loc, input, i)); } // All HLO d_slice results used to replace the original tf.Split op. @@ -3356,30 +3383,32 @@ class ConvertSplitOpDynamic : public OpRewritePattern { slices.reserve(num_splits); for (int i = 0; i < num_splits; ++i) { - begin_indices[dim_index] = rewriter.create( - loc, slice_size, rewriter.create(loc, i)); - end_indices[dim_index] = rewriter.create( - loc, slice_size, rewriter.create(loc, i + 1)); + begin_indices[dim_index] = arith::MulIOp::create( + rewriter, loc, slice_size, + arith::ConstantIndexOp::create(rewriter, loc, i)); + end_indices[dim_index] = arith::MulIOp::create( + rewriter, loc, slice_size, + arith::ConstantIndexOp::create(rewriter, loc, i + 1)); Type index_ty = rewriter.getIndexType(); - auto begin_value = rewriter.create( - loc, + auto begin_value = tensor::FromElementsOp::create( + rewriter, loc, tensorflow::GetTypeFromTFTensorShape( {static_cast(begin_indices.size())}, index_ty), begin_indices); - auto end_value = rewriter.create( - loc, + auto end_value = tensor::FromElementsOp::create( + rewriter, loc, tensorflow::GetTypeFromTFTensorShape( {static_cast(end_indices.size())}, index_ty), end_indices); - auto stride_value = rewriter.create( - loc, + auto stride_value = tensor::FromElementsOp::create( + rewriter, loc, tensorflow::GetTypeFromTFTensorShape( {static_cast(strides.size())}, index_ty), strides); - slices.push_back(rewriter.create( - loc, op.getOperation()->getResult(i).getType(), input, begin_value, - end_value, stride_value)); + slices.push_back(stablehlo::RealDynamicSliceOp::create( + rewriter, loc, op.getOperation()->getResult(i).getType(), input, + begin_value, end_value, stride_value)); } rewriter.replaceOp(op, slices); @@ -3484,10 +3513,11 @@ class ConvertSplitVOp : public OpRewritePattern { for (int i = 0, end = op.getNumResults(); i < end; ++i) { end_indices[dim_index] = begin_indices[dim_index] + split_sizes[i]; - slices.push_back(rewriter.create( - op.getLoc(), op.getValue(), GetI64ArrayAttr(begin_indices, &rewriter), - GetI64ArrayAttr(end_indices, &rewriter), - GetI64ArrayAttr(strides, &rewriter))); + slices.push_back( + stablehlo::SliceOp::create(rewriter, op.getLoc(), op.getValue(), + GetI64ArrayAttr(begin_indices, &rewriter), + GetI64ArrayAttr(end_indices, &rewriter), + GetI64ArrayAttr(strides, &rewriter))); // Prepare the begin indice for the next slice. begin_indices[dim_index] = end_indices[dim_index]; } @@ -3568,11 +3598,11 @@ class ConvertStridedSliceOp : public OpRewritePattern { Location loc = op.getLoc(); Value input = op.getInput(); if (!dims_to_reverse.empty()) - input = rewriter.create( - loc, input_ty, op.getInput(), + input = stablehlo::ReverseOp::create( + rewriter, loc, input_ty, op.getInput(), GetI64ArrayAttr(dims_to_reverse, &rewriter)); - auto sliced = rewriter.create( - loc, input, GetI64ArrayAttr(hlo_begin_indices, &rewriter), + auto sliced = stablehlo::SliceOp::create( + rewriter, loc, input, GetI64ArrayAttr(hlo_begin_indices, &rewriter), GetI64ArrayAttr(hlo_end_indices, &rewriter), GetI64ArrayAttr(hlo_strides, &rewriter)); @@ -3663,21 +3693,21 @@ class ConvertStridedSliceOp : public OpRewritePattern { continue; } - auto index = rewriter.create( - loc, op.getBegin(), GetI64ArrayAttr({d}, &rewriter), + auto index = stablehlo::SliceOp::create( + rewriter, loc, op.getBegin(), GetI64ArrayAttr({d}, &rewriter), GetI64ArrayAttr({d + 1}, &rewriter), GetI64ArrayAttr({1}, &rewriter)); // Convert index to scalar. auto reshaped_index = - rewriter.create(loc, type, index); + stablehlo::ReshapeOp::create(rewriter, loc, type, index); // If the index is negative, wrap it around with dimension size. auto index_negative = - rewriter.create(loc, reshaped_index, zero); + TF::LessOp::create(rewriter, loc, reshaped_index, zero); auto input_val = GetScalarConstOfType(begin_element_ty, loc, input_shape[d], &rewriter); auto wrapped_index = - rewriter.create(loc, input_val, reshaped_index); - auto final_index = rewriter.create( - loc, type, index_negative, wrapped_index, reshaped_index); + TF::AddV2Op::create(rewriter, loc, input_val, reshaped_index); + auto final_index = stablehlo::SelectOp::create( + rewriter, loc, type, index_negative, wrapped_index, reshaped_index); slice_begin_indices.push_back(final_index); slice_sizes.push_back(1); } @@ -3687,8 +3717,9 @@ class ConvertStridedSliceOp : public OpRewritePattern { slice_sizes, op.getType().getElementType()); // This must be an xla DynamicSlice op due to the inputs that aren't // constant. - auto sliced = rewriter.create( - loc, sliced_type, op.getInput(), slice_begin_indices, slice_sizes_attr); + auto sliced = stablehlo::DynamicSliceOp::create( + rewriter, loc, sliced_type, op.getInput(), slice_begin_indices, + slice_sizes_attr); // Reshape slice result so that the shape is updated depending on // 'new_axis_mask' or 'shrink_axis_mask' attributes. @@ -3760,9 +3791,9 @@ class ConvertStridedSliceGradOp Type element_type = mlir::cast(grad.getType()).getElementType(); // Perform reshape to undo any new/shrink axes done by strided slice. - grad = rewriter.create( - op.getLoc(), tensorflow::GetTypeFromTFTensorShape(shape, element_type), - grad); + grad = stablehlo::ReshapeOp::create( + rewriter, op.getLoc(), + tensorflow::GetTypeFromTFTensorShape(shape, element_type), grad); SmallVector padding_low, padding_high, padding_interm; SmallVector dims_to_reverse; @@ -3797,8 +3828,8 @@ class ConvertStridedSliceGradOp } if (!dims_to_reverse.empty()) { - grad = rewriter.create( - op.getLoc(), grad.getType(), grad, + grad = stablehlo::ReverseOp::create( + rewriter, op.getLoc(), grad.getType(), grad, GetI64ArrayAttr(dims_to_reverse, &rewriter)); } @@ -3840,10 +3871,10 @@ class ConvertRangeOp : public OpRewritePattern { return failure(); } - auto iota = rewriter.create( - op.getLoc(), result_type, rewriter.getI64IntegerAttr(0)); - auto scaled = rewriter.create( - op.getLoc(), result_type, iota, op.getDelta(), + auto iota = stablehlo::IotaOp::create(rewriter, op.getLoc(), result_type, + rewriter.getI64IntegerAttr(0)); + auto scaled = chlo::BroadcastMulOp::create( + rewriter, op.getLoc(), result_type, iota, op.getDelta(), hlo::getBroadcastDimensionsAttr(&rewriter, iota, op.getDelta())); rewriter.replaceOpWithNewOp( op, result_type, scaled, op.getStart(), @@ -3893,25 +3924,25 @@ class ConvertDynamicRangeOp : public OpRewritePattern { // // %size = ceil(abs((%limit - %start) / %delta)) auto range = - rewriter.create(op.getLoc(), limit, start); - auto abs = rewriter.create(op.getLoc(), range); + stablehlo::SubtractOp::create(rewriter, op.getLoc(), limit, start); + auto abs = stablehlo::AbsOp::create(rewriter, op.getLoc(), range); // Delta is not necessarily the same type as start and limit. auto abs_cast = - rewriter.create(op.getLoc(), compute_type, abs); - auto delta_cast = - rewriter.create(op.getLoc(), compute_type, delta); + stablehlo::ConvertOp::create(rewriter, op.getLoc(), compute_type, abs); + auto delta_cast = stablehlo::ConvertOp::create(rewriter, op.getLoc(), + compute_type, delta); // Compute the total number of integer steps and convert to the HLO // dimension tensor. auto normalized = - rewriter.create(op.getLoc(), abs_cast, delta_cast); - auto ceil = rewriter.create(op.getLoc(), normalized); - auto steps = rewriter.create( - op.getLoc(), + stablehlo::DivOp::create(rewriter, op.getLoc(), abs_cast, delta_cast); + auto ceil = stablehlo::CeilOp::create(rewriter, op.getLoc(), normalized); + auto steps = stablehlo::ConvertOp::create( + rewriter, op.getLoc(), tensorflow::GetTypeFromTFTensorShape({}, rewriter.getI64Type()), ceil); - auto reshape = rewriter.create( - op.getLoc(), + auto reshape = stablehlo::ReshapeOp::create( + rewriter, op.getLoc(), tensorflow::GetTypeFromTFTensorShape({1}, rewriter.getI64Type()), steps); @@ -3920,15 +3951,16 @@ class ConvertDynamicRangeOp : public OpRewritePattern { // %range = %start + %delta * iota(%size) auto out_scalar_type = tensorflow::GetTypeFromTFTensorShape( {}, getElementTypeOrSelf(result_type)); - auto start_out_cast = rewriter.create( - op.getLoc(), out_scalar_type, start); - auto delta_out_cast = rewriter.create( - op.getLoc(), out_scalar_type, delta); - - auto iota = rewriter.create( - op.getLoc(), result_type, reshape, rewriter.getI64IntegerAttr(0)); - auto scaled = rewriter.create( - op.getLoc(), result_type, iota, delta_out_cast, + auto start_out_cast = stablehlo::ConvertOp::create(rewriter, op.getLoc(), + out_scalar_type, start); + auto delta_out_cast = stablehlo::ConvertOp::create(rewriter, op.getLoc(), + out_scalar_type, delta); + + auto iota = stablehlo::DynamicIotaOp::create(rewriter, op.getLoc(), + result_type, reshape, + rewriter.getI64IntegerAttr(0)); + auto scaled = chlo::BroadcastMulOp::create( + rewriter, op.getLoc(), result_type, iota, delta_out_cast, hlo::getBroadcastDimensionsAttr(&rewriter, iota, delta_cast)); rewriter.replaceOpWithNewOp( op, result_type, scaled, start_out_cast, @@ -3979,29 +4011,32 @@ class ConvertLinSpaceOp : public OpRewritePattern { int64_t num = (*num_attr.begin()).getSExtValue(); // Calculate the scaling that needs to be applied to the iota. - auto step_numerator = rewriter.create( - op.getLoc(), op.getStart().getType(), op.getStop(), op.getStart(), + auto step_numerator = chlo::BroadcastSubOp::create( + rewriter, op.getLoc(), op.getStart().getType(), op.getStop(), + op.getStart(), hlo::getBroadcastDimensionsAttr(&rewriter, op.getStop(), op.getStart())); - Value step_denominator = rewriter.create( - op.getLoc(), op.getNum(), result_type.getElementType()); + Value step_denominator = stablehlo::ConvertOp::create( + rewriter, op.getLoc(), op.getNum(), result_type.getElementType()); if (num > 1) { Value one = GetScalarConstOfType(result_type.getElementType(), op.getLoc(), 1, &rewriter); - step_denominator = rewriter.create( - op.getLoc(), step_denominator.getType(), step_denominator, one, + step_denominator = chlo::BroadcastSubOp::create( + rewriter, op.getLoc(), step_denominator.getType(), step_denominator, + one, hlo::getBroadcastDimensionsAttr(&rewriter, step_denominator, one)); } - auto step = rewriter.create( - op.getLoc(), step_numerator.getType(), step_numerator, step_denominator, + auto step = chlo::BroadcastDivOp::create( + rewriter, op.getLoc(), step_numerator.getType(), step_numerator, + step_denominator, hlo::getBroadcastDimensionsAttr(&rewriter, step_numerator, step_denominator)); // Scale the iota and add the offset. - auto iota = rewriter.create( - op.getLoc(), result_type, rewriter.getI64IntegerAttr(0)); - auto scaled = rewriter.create( - op.getLoc(), result_type, iota, step, + auto iota = stablehlo::IotaOp::create(rewriter, op.getLoc(), result_type, + rewriter.getI64IntegerAttr(0)); + auto scaled = chlo::BroadcastMulOp::create( + rewriter, op.getLoc(), result_type, iota, step, hlo::getBroadcastDimensionsAttr(&rewriter, iota, step)); rewriter.replaceOpWithNewOp( op, result_type, scaled, op.getStart(), @@ -4068,14 +4103,14 @@ class GenericConvertReductionOp : public OpRewritePattern { // repeated arithmetic operations. Type reduce_element_type = is_accumulation ? GetAccumulationType(element_type) : element_type; - auto casted_input = rewriter.create( - loc, op.getInput(), reduce_element_type); + auto casted_input = stablehlo::ConvertOp::create( + rewriter, loc, op.getInput(), reduce_element_type); // Each reduction op can have a different initial value. Value init = Derived::GetInitialValue(reduce_element_type, loc, &rewriter); - auto reduction = rewriter.create( - loc, casted_input.getResult(), init, + auto reduction = stablehlo::ReduceOp::create( + rewriter, loc, casted_input.getResult(), init, GetI64ArrayAttr(xla_dimensions, &rewriter), reduce_element_type); BuildReduceBody(reduce_element_type, &reduction.getBody(), &rewriter); @@ -4083,32 +4118,34 @@ class GenericConvertReductionOp : public OpRewritePattern { // The mean op needs to divide by the product of the reduced dimensions. if (std::is_same::value) { - Value in_shape = rewriter.create(loc, op.getInput()); - Value divisor_count = rewriter.create(loc, 1); + Value in_shape = shape::ShapeOfOp::create(rewriter, loc, op.getInput()); + Value divisor_count = arith::ConstantIndexOp::create(rewriter, loc, 1); for (size_t i = 0; i < input_shape.size(); ++i) { if (reduced_dimensions_bitmap[i]) { - Value index = rewriter.create(loc, i); - auto dim = rewriter.create(loc, in_shape, index); + Value index = arith::ConstantIndexOp::create(rewriter, loc, i); + auto dim = tensor::ExtractOp::create(rewriter, loc, in_shape, index); divisor_count = - rewriter.create(loc, divisor_count, dim); + arith::MulIOp::create(rewriter, loc, divisor_count, dim); } } // HLO ops are only defined on tensors, so we cast the divisor from // index -> i64 -> tensor<1xi64> -> tensor -> tensor - Value divisor_casted = rewriter.create( - loc, rewriter.getI64Type(), divisor_count); - Value divisor_tensor = rewriter.create( - loc, tensorflow::GetTypeFromTFTensorShape({}, rewriter.getI64Type()), + Value divisor_casted = arith::IndexCastOp::create( + rewriter, loc, rewriter.getI64Type(), divisor_count); + Value divisor_tensor = tensor::FromElementsOp::create( + rewriter, loc, + tensorflow::GetTypeFromTFTensorShape({}, rewriter.getI64Type()), divisor_casted); - Value divisor = rewriter.create( - loc, tensorflow::GetTypeFromTFTensorShape({}, reduce_element_type), + Value divisor = stablehlo::ConvertOp::create( + rewriter, loc, + tensorflow::GetTypeFromTFTensorShape({}, reduce_element_type), divisor_tensor); auto broadcast_dims = rewriter.getDenseI64ArrayAttr({}); - result = rewriter.create(loc, result, divisor, - broadcast_dims); + result = chlo::BroadcastDivOp::create(rewriter, loc, result, divisor, + broadcast_dims); } - result = rewriter.create(loc, result, element_type); + result = stablehlo::ConvertOp::create(rewriter, loc, result, element_type); // Need to reshape back after the reduction if we're keeping the reduced // dimensions. Note that we do this through successive (nominally 1) @@ -4122,8 +4159,8 @@ class GenericConvertReductionOp : public OpRewritePattern { if (dim_is_reduced.value()) { auto index_attr = GetI32ElementsAttr( {static_cast(dim_is_reduced.index())}, &rewriter); - Value index = rewriter.create(loc, index_attr); - result = rewriter.create(loc, result, index); + Value index = arith::ConstantOp::create(rewriter, loc, index_attr); + result = TF::ExpandDimsOp::create(rewriter, loc, result, index); } } } @@ -4300,15 +4337,15 @@ class ConvertArgMinMaxOp : public OpRewritePattern { IntegerAttr iota_dimension = IntegerAttr::get(rewriter.getIntegerType(64), axis); - Value input_shape = rewriter.create(loc, op.getInput()); - Value index_values = rewriter.create( - loc, index_type, input_shape, iota_dimension); + Value input_shape = shape::ShapeOfOp::create(rewriter, loc, op.getInput()); + Value index_values = stablehlo::DynamicIotaOp::create( + rewriter, loc, index_type, input_shape, iota_dimension); Value operands[] = {op.getInput(), index_values}; Value init_values[] = {init_value, index_init_value}; - auto reduction = rewriter.create( - loc, llvm::ArrayRef(operands), + auto reduction = stablehlo::ReduceOp::create( + rewriter, loc, llvm::ArrayRef(operands), llvm::ArrayRef(init_values), GetI64ArrayAttr({axis}, &rewriter), TypeRange({input_element_type, index_element_type})); auto direction = Derived::GetDirection(); @@ -4426,14 +4463,14 @@ class ConvertTensorScatterOp : public OpRewritePattern { auto const_attr = GetI64ElementsAttr(expected_update_shape, &rewriter); auto const_op = - rewriter.create(op->getLoc(), const_type, const_attr); + TF::ConstOp::create(rewriter, op->getLoc(), const_type, const_attr); auto broadcast_to_type = tensorflow::GetTypeFromTFTensorShape( llvm::ArrayRef(expected_update_shape), updates_ty.getElementType()); - updates = rewriter.create( - op->getLoc(), broadcast_to_type, op.getUpdates(), const_op); + updates = TF::BroadcastToOp::create( + rewriter, op->getLoc(), broadcast_to_type, op.getUpdates(), const_op); updates_ty = mlir::dyn_cast(updates.getType()); } @@ -4455,9 +4492,9 @@ class ConvertTensorScatterOp : public OpRewritePattern { indices_rank - 1); Location loc = op.getLoc(); - auto scatter = rewriter.create( - loc, op.getType(), ValueRange(Value(op.getTensor())), op.getIndices(), - updates, dims_attr); + auto scatter = stablehlo::ScatterOp::create( + rewriter, loc, op.getType(), ValueRange(Value(op.getTensor())), + op.getIndices(), updates, dims_attr); Derived::BuildScatterBody(tensor_ty.getElementType(), &scatter.getUpdateComputation(), loc, rewriter); @@ -4479,7 +4516,7 @@ class ConvertTensorScatterUpdateOp Type type = tensorflow::GetTypeFromTFTensorShape(/*shape=*/{}, element_type); block->addArguments({type, type}, SmallVector(2, loc)); - builder.create(loc, block->getArgument(1)); + stablehlo::ReturnOp::create(builder, loc, block->getArgument(1)); } }; @@ -4496,9 +4533,9 @@ class ConvertTensorScatterAddOp Type type = tensorflow::GetTypeFromTFTensorShape(/*shape=*/{}, element_type); block->addArguments({type, type}, SmallVector(2, loc)); - auto add_op = builder.create(loc, block->getArgument(0), - block->getArgument(1)); - builder.create(loc, add_op.getResult()); + auto add_op = stablehlo::AddOp::create(builder, loc, block->getArgument(0), + block->getArgument(1)); + stablehlo::ReturnOp::create(builder, loc, add_op.getResult()); } }; @@ -4515,9 +4552,9 @@ class ConvertTensorScatterSubOp Type type = tensorflow::GetTypeFromTFTensorShape(/*shape=*/{}, element_type); block->addArguments({type, type}, SmallVector(2, loc)); - auto sub_op = builder.create( - loc, block->getArgument(0), block->getArgument(1)); - builder.create(loc, sub_op.getResult()); + auto sub_op = stablehlo::SubtractOp::create( + builder, loc, block->getArgument(0), block->getArgument(1)); + stablehlo::ReturnOp::create(builder, loc, sub_op.getResult()); } }; @@ -4534,9 +4571,9 @@ class ConvertTensorScatterMinOp Type type = tensorflow::GetTypeFromTFTensorShape(/*shape=*/{}, element_type); block->addArguments({type, type}, SmallVector(2, loc)); - auto min_op = builder.create(loc, block->getArgument(0), - block->getArgument(1)); - builder.create(loc, min_op.getResult()); + auto min_op = stablehlo::MinOp::create(builder, loc, block->getArgument(0), + block->getArgument(1)); + stablehlo::ReturnOp::create(builder, loc, min_op.getResult()); } }; @@ -4553,9 +4590,9 @@ class ConvertTensorScatterMaxOp Type type = tensorflow::GetTypeFromTFTensorShape(/*shape=*/{}, element_type); block->addArguments({type, type}, SmallVector(2, loc)); - auto max_op = builder.create(loc, block->getArgument(0), - block->getArgument(1)); - builder.create(loc, max_op.getResult()); + auto max_op = stablehlo::MaxOp::create(builder, loc, block->getArgument(0), + block->getArgument(1)); + stablehlo::ReturnOp::create(builder, loc, max_op.getResult()); } }; @@ -4670,10 +4707,10 @@ class ConvertTileOpDynamic : public OpRewritePattern { auto dim_size = input_ty.getDimSize(i); if (dim_size == ShapedType::kDynamic) { input_shape_values.push_back( - rewriter.create(loc, input, i)); + tensor::DimOp::create(rewriter, loc, input, i)); } else { - input_shape_values.push_back(rewriter.create( - loc, rewriter.getIndexAttr(dim_size))); + input_shape_values.push_back(arith::ConstantOp::create( + rewriter, loc, rewriter.getIndexAttr(dim_size))); } } @@ -4691,12 +4728,12 @@ class ConvertTileOpDynamic : public OpRewritePattern { SmallVector out_dim_size; out_dim_size.reserve(input_rank * 2); for (int64_t dim_idx = 0; dim_idx < input_rank; ++dim_idx) { - Value index = rewriter.create( - loc, rewriter.getIndexAttr(dim_idx)); - Value multiples_size = - rewriter.create(loc, multiples, ValueRange{index}); + Value index = arith::ConstantOp::create(rewriter, loc, + rewriter.getIndexAttr(dim_idx)); + Value multiples_size = tensor::ExtractOp::create(rewriter, loc, multiples, + ValueRange{index}); Value multiples_size_casted = - rewriter.create(loc, index_ty, multiples_size); + arith::IndexCastOp::create(rewriter, loc, index_ty, multiples_size); out_dim_size.push_back(multiples_size_casted); out_dim_size.push_back(input_shape_values[dim_idx]); } @@ -4707,8 +4744,8 @@ class ConvertTileOpDynamic : public OpRewritePattern { } auto broadcast_dims_attr = GetI64ArrayAttr(broadcast_dimensions, &rewriter); - Value out_dim_size_tensor = rewriter.create( - loc, + Value out_dim_size_tensor = tensor::FromElementsOp::create( + rewriter, loc, tensorflow::GetTypeFromTFTensorShape( {static_cast(out_dim_size.size())}, index_ty), out_dim_size); @@ -4716,19 +4753,21 @@ class ConvertTileOpDynamic : public OpRewritePattern { ShapedType::kDynamic); RankedTensorType broadcast_type = tensorflow::GetTypeFromTFTensorShape(broadcast_shape, element_type); - Value broadcast = rewriter.create( - loc, broadcast_type, input, out_dim_size_tensor, broadcast_dims_attr); + Value broadcast = stablehlo::DynamicBroadcastInDimOp::create( + rewriter, loc, broadcast_type, input, out_dim_size_tensor, + broadcast_dims_attr); // %shape = [MS1, MS2] SmallVector shape_values; shape_values.reserve(input_rank); for (int64_t i = 0; i < input_rank; ++i) { - Value dim_size_value = rewriter.create( - loc, out_dim_size[2 * i], out_dim_size[2 * i + 1]); + Value dim_size_value = mlir::arith::MulIOp::create( + rewriter, loc, out_dim_size[2 * i], out_dim_size[2 * i + 1]); shape_values.push_back(dim_size_value); } - Value shape = rewriter.create( - loc, tensorflow::GetTypeFromTFTensorShape({input_rank}, index_ty), + Value shape = tensor::FromElementsOp::create( + rewriter, loc, + tensorflow::GetTypeFromTFTensorShape({input_rank}, index_ty), shape_values); rewriter.replaceOpWithNewOp(op, op.getType(), broadcast, shape); @@ -4758,8 +4797,8 @@ class ConvertMaxPoolGradOp : public OpRewritePattern { input_ty.getShape(), op.getKsize(), op.getStrides(), op.getPadding(), &rewriter); - auto result = rewriter.create( - loc, op.getType(), op.getOrigInput(), op.getGrad(), + auto result = stablehlo::SelectAndScatterOp::create( + rewriter, loc, op.getType(), op.getOrigInput(), op.getGrad(), GetScalarConstOfType(element_type, loc, 0, &rewriter), ToDenseI64ArrayAttr(GetI64ElementsAttr(op.getKsize()), &rewriter), ToDenseI64ArrayAttr(GetI64ElementsAttr(op.getStrides()), &rewriter), @@ -4776,10 +4815,10 @@ class ConvertMaxPoolGradOp : public OpRewritePattern { tensorflow::GetTypeFromTFTensorShape(/*shape=*/{}, element_type); block->addArguments({type, type}, SmallVector(2, loc)); - auto reducer = rewriter.create( - loc, block->getArgument(0), block->getArgument(1), + auto reducer = stablehlo::CompareOp::create( + rewriter, loc, block->getArgument(0), block->getArgument(1), stablehlo::ComparisonDirection::GE); - rewriter.create(loc, reducer.getResult()); + stablehlo::ReturnOp::create(rewriter, loc, reducer.getResult()); } rewriter.replaceOp(op, result); @@ -4955,7 +4994,7 @@ class ConvertConvBackpropInputOp : public OpRewritePattern { Type filter_element_ty = filter_ty.getElementType(); auto ty = tensorflow::GetTypeFromTFTensorShape(new_shape, filter_element_ty); - filter = rewriter.create(op.getLoc(), ty, filter); + filter = stablehlo::ReshapeOp::create(rewriter, op.getLoc(), ty, filter); // 2. Transpose to [H, W, ..., G, filter_in_depth, out_depth / G]. llvm::SmallVector perm(num_dims + 1); @@ -4963,15 +5002,15 @@ class ConvertConvBackpropInputOp : public OpRewritePattern { std::swap(perm[num_spatial_dims], perm[num_spatial_dims + 1]); std::swap(new_shape[num_spatial_dims], new_shape[num_spatial_dims + 1]); ty = tensorflow::GetTypeFromTFTensorShape(new_shape, filter_element_ty); - filter = rewriter.create( - op.getLoc(), ty, filter, GetI64ArrayAttr(perm, &rewriter)); + filter = stablehlo::TransposeOp::create(rewriter, op.getLoc(), ty, filter, + GetI64ArrayAttr(perm, &rewriter)); // 3. Reshape to [H, W, ..., in_depth, out_depth / G]. new_shape[num_spatial_dims] *= new_shape[num_spatial_dims + 1]; new_shape[num_spatial_dims + 1] = new_shape.back(); new_shape.pop_back(); ty = tensorflow::GetTypeFromTFTensorShape(new_shape, filter_element_ty); - filter = rewriter.create(op.getLoc(), ty, filter); + filter = stablehlo::ReshapeOp::create(rewriter, op.getLoc(), ty, filter); } SmallVector kernel_spatial_dims; @@ -4979,13 +5018,14 @@ class ConvertConvBackpropInputOp : public OpRewritePattern { std::iota(kernel_spatial_dims.begin(), kernel_spatial_dims.end(), 0); // Mirror the filter in the spatial dimensions. - filter = rewriter.create( - op.getLoc(), filter, GetI64ArrayAttr(kernel_spatial_dims, &rewriter)); + filter = stablehlo::ReverseOp::create( + rewriter, op.getLoc(), filter, + GetI64ArrayAttr(kernel_spatial_dims, &rewriter)); // activation gradients // = gradients (with padding and dilation) mirrored_weights - Value result = rewriter.create( - op.getLoc(), op.getType(), op.getOutBackprop(), filter, + Value result = stablehlo::ConvolutionOp::create( + rewriter, op.getLoc(), op.getType(), op.getOutBackprop(), filter, /*window_strides=*/ GetI64ArrayAttrForValue(/*size=*/num_spatial_dims, /*val=*/1, &rewriter), @@ -5191,8 +5231,8 @@ class ConvertConvBackpropFilterOp : public OpRewritePattern { const int batch_dim = tensorflow::GetTensorBatchDimIndex(num_dims, data_format); - Value result = rewriter.create( - op.getLoc(), op.getType(), op.getInput(), op.getOutBackprop(), + Value result = stablehlo::ConvolutionOp::create( + rewriter, op.getLoc(), op.getType(), op.getInput(), op.getOutBackprop(), /*window_strides=*/GetI64ArrayAttr(window_strides, &rewriter), /*padding=*/paddings_attr, /*lhs_dilation=*/ GetI64ArrayAttrForValue(/*size=*/num_spatial_dims, /*val=*/1, @@ -5331,14 +5371,15 @@ class ConvertInfeedDequeueTupleOp // Infeed takes a single token operand. Generate the token using // create_token op to pass to the infeed op. - auto token = rewriter.create( - op.getLoc(), stablehlo::TokenType::get(rewriter.getContext())); + auto token = stablehlo::CreateTokenOp::create( + rewriter, op.getLoc(), + stablehlo::TokenType::get(rewriter.getContext())); result_types.push_back(token.getType()); ArrayAttr layout; // filled in during the xla-adjust-layout pass - auto data_and_token = rewriter.create( - op.getLoc(), result_types, token, + auto data_and_token = stablehlo::InfeedOp::create( + rewriter, op.getLoc(), result_types, token, /*infeed_config=*/rewriter.getStringAttr(""), /*layout=*/layout); @@ -5409,11 +5450,11 @@ class ConvertOutfeedEnqueueTupleOp PatternRewriter &rewriter) const override { auto token_type = stablehlo::TokenType::get(rewriter.getContext()); auto token = - rewriter.create(op.getLoc(), token_type); + stablehlo::CreateTokenOp::create(rewriter, op.getLoc(), token_type); - rewriter.create( - op.getLoc(), token_type, op.getInputs(), token, - /*outfeed_config=*/rewriter.getStringAttr("")); + stablehlo::OutfeedOp::create(rewriter, op.getLoc(), token_type, + op.getInputs(), token, + /*outfeed_config=*/rewriter.getStringAttr("")); rewriter.eraseOp(op); return success(); } @@ -5475,14 +5516,15 @@ class ConvertUnpackOp : public OpRewritePattern { begin_indices[axis] = i; end_indices[axis] = i + 1; - auto slice_op = rewriter.create( - op.getLoc(), op.getValue(), GetI64ArrayAttr(begin_indices, &rewriter), - GetI64ArrayAttr(end_indices, &rewriter), - GetI64ArrayAttr(strides, &rewriter)); + auto slice_op = + stablehlo::SliceOp::create(rewriter, op.getLoc(), op.getValue(), + GetI64ArrayAttr(begin_indices, &rewriter), + GetI64ArrayAttr(end_indices, &rewriter), + GetI64ArrayAttr(strides, &rewriter)); // Reshape to drop the axis dimension. - auto result = rewriter.create( - op.getLoc(), op.getType(i), slice_op, - rewriter.getI64ArrayAttr(op.getAxis())); + auto result = + TF::SqueezeOp::create(rewriter, op.getLoc(), op.getType(i), slice_op, + rewriter.getI64ArrayAttr(op.getAxis())); results.push_back(result); } @@ -5525,16 +5567,16 @@ class ConvertUnpackOpDynamic : public OpRewritePattern { for (int64_t dim_idx = 0; dim_idx < value_rank; ++dim_idx) { int64_t dim_size = value_type.getDimSize(dim_idx); if (dim_size == ShapedType::kDynamic) { - Value dim_i = rewriter.create( - loc, shape_scalar_type, - rewriter.create(loc, op.getOperand(), dim_idx)); + Value dim_i = arith::IndexCastOp::create( + rewriter, loc, shape_scalar_type, + tensor::DimOp::create(rewriter, loc, op.getOperand(), dim_idx)); end_indices.push_back(dim_i); if (dim_idx != axis) { shape_values.push_back(dim_i); } } else { - Value dim_i = rewriter.create( - loc, shape_scalar_type, + Value dim_i = arith::ConstantOp::create( + rewriter, loc, shape_scalar_type, rewriter.getIntegerAttr(shape_scalar_type, dim_size)); end_indices.push_back(dim_i); if (dim_idx != axis) { @@ -5545,44 +5587,45 @@ class ConvertUnpackOpDynamic : public OpRewritePattern { } } begin_indices.push_back( - rewriter.create(loc, 0, 32)); - strides.push_back(rewriter.create(loc, 1, 32)); + arith::ConstantIntOp::create(rewriter, loc, 0, 32)); + strides.push_back(arith::ConstantIntOp::create(rewriter, loc, 1, 32)); } SmallVector results; results.reserve(op.getNumResults()); Type i32_ty = rewriter.getI32Type(); for (int64_t i = 0; i < op.getNumResults(); ++i) { - begin_indices[axis] = rewriter.create(loc, i, 32); - end_indices[axis] = rewriter.create(loc, i + 1, 32); - Value slice_op = rewriter.create( - loc, + begin_indices[axis] = arith::ConstantIntOp::create(rewriter, loc, i, 32); + end_indices[axis] = + arith::ConstantIntOp::create(rewriter, loc, i + 1, 32); + Value slice_op = stablehlo::RealDynamicSliceOp::create( + rewriter, loc, tensorflow::GetTypeFromTFTensorShape(slice_shape, value_type.getElementType()), op.getValue(), - rewriter.create( - loc, + tensor::FromElementsOp::create( + rewriter, loc, tensorflow::GetTypeFromTFTensorShape( {static_cast(begin_indices.size())}, i32_ty), begin_indices), - rewriter.create( - loc, + tensor::FromElementsOp::create( + rewriter, loc, tensorflow::GetTypeFromTFTensorShape( {static_cast(end_indices.size())}, i32_ty), end_indices), - rewriter.create( - loc, + tensor::FromElementsOp::create( + rewriter, loc, tensorflow::GetTypeFromTFTensorShape( {static_cast(strides.size())}, i32_ty), strides)); // Reshape to drop the axis dimension. - Value new_shape = rewriter.create( - loc, + Value new_shape = tensor::FromElementsOp::create( + rewriter, loc, tensorflow::GetTypeFromTFTensorShape( {static_cast(shape_values.size())}, i32_ty), shape_values); - Value reshape_op = rewriter.create( - loc, op.getType(i), slice_op, new_shape); + Value reshape_op = stablehlo::DynamicReshapeOp::create( + rewriter, loc, op.getType(i), slice_op, new_shape); results.push_back(reshape_op); } @@ -5619,16 +5662,20 @@ class ConvertSigmoidGradOpDynamic : public OpRewritePattern { assert(mlir::isa(elem_tp)); attr = rewriter.getFloatAttr(elem_tp, 1); } - Value one = rewriter.create( - loc, DenseElementsAttr::get( - tensorflow::GetTypeFromTFTensorShape({}, elem_tp), attr)); - - auto v0 = rewriter.create( - loc, dy, y, hlo::getBroadcastDimensionsAttr(&rewriter, dy, y)); - auto v1 = rewriter.create( - loc, one, y, hlo::getBroadcastDimensionsAttr(&rewriter, one, y)); - auto result = rewriter.create( - loc, v0, v1, hlo::getBroadcastDimensionsAttr(&rewriter, v0, v1)); + Value one = stablehlo::ConstantOp::create( + rewriter, loc, + DenseElementsAttr::get( + tensorflow::GetTypeFromTFTensorShape({}, elem_tp), attr)); + + auto v0 = chlo::BroadcastMulOp::create( + rewriter, loc, dy, y, + hlo::getBroadcastDimensionsAttr(&rewriter, dy, y)); + auto v1 = chlo::BroadcastSubOp::create( + rewriter, loc, one, y, + hlo::getBroadcastDimensionsAttr(&rewriter, one, y)); + auto result = chlo::BroadcastMulOp::create( + rewriter, loc, v0, v1, + hlo::getBroadcastDimensionsAttr(&rewriter, v0, v1)); rewriter.replaceOp(op, result.getOperation()->getResults()); return success(); @@ -5684,8 +5731,8 @@ class GenericConvertUnsortedSegmentReductionOp : public OpRewritePattern { // 'operand' parameter to scatter to for the final scatter op. Value init = ConcreteClass::GetInitialValue(data_type.getElementType(), op.getLoc(), &rewriter); - auto broadcasted_init = rewriter.create( - op.getLoc(), output_type, init, + auto broadcasted_init = stablehlo::BroadcastOp::create( + rewriter, op.getLoc(), output_type, init, GetI64ArrayAttr(output_shape, &rewriter)); // Parameters for the generated scatter op. @@ -5702,9 +5749,10 @@ class GenericConvertUnsortedSegmentReductionOp : public OpRewritePattern { /*scatterIndicesBatchingDims=*/{}, scatter_dims_to_operand_dims, index_vector_dim); - auto scatter = rewriter.create( - op.getLoc(), op.getType(), ValueRange(Value(broadcasted_init)), - op.getSegmentIds(), op.getData(), dims_attr); + auto scatter = stablehlo::ScatterOp::create( + rewriter, op.getLoc(), op.getType(), + ValueRange(Value(broadcasted_init)), op.getSegmentIds(), op.getData(), + dims_attr); BuildReduceBody(data_type.getElementType(), &scatter.getUpdateComputation(), &rewriter); @@ -5868,8 +5916,8 @@ class ConvertRandomShuffleOp : public OpRewritePattern { // Generate range(n) as the initial value for the indices to be swapped. auto indices_type = tensorflow::GetTypeFromTFTensorShape( {first_dim_size}, rewriter.getIntegerType(32)); - Value indices = rewriter.create( - op.getLoc(), indices_type, rewriter.getI64IntegerAttr(0)); + Value indices = stablehlo::IotaOp::create( + rewriter, op.getLoc(), indices_type, rewriter.getI64IntegerAttr(0)); // Generate random numbers to be used as swaps for the indices. Value swaps = CreateRngUniform32(op.getLoc(), first_dim_size, 0, @@ -5889,22 +5937,23 @@ class ConvertRandomShuffleOp : public OpRewritePattern { // We need to swap the indices[i] with indices[swaps[i]]. First get // these index values. - Value source_index = builder->create( - loc, indices, i, scalar_one); - Value swap_index = builder->create( - loc, scalar_i32_type, - builder->create(loc, swaps, i, - scalar_one)); - Value target_index = builder->create( - loc, indices, swap_index, scalar_one); + Value source_index = stablehlo::DynamicSliceOp::create( + *builder, loc, indices, i, scalar_one); + Value swap_index = stablehlo::ReshapeOp::create( + *builder, loc, scalar_i32_type, + stablehlo::DynamicSliceOp::create(*builder, loc, swaps, i, + scalar_one)); + Value target_index = stablehlo::DynamicSliceOp::create( + *builder, loc, indices, swap_index, scalar_one); // Then perform the swap. // indices[i] <- indices[swaps[i]] - indices = builder->create( - loc, indices.getType(), indices, target_index, llvm::ArrayRef(i)); + indices = stablehlo::DynamicUpdateSliceOp::create( + *builder, loc, indices.getType(), indices, target_index, + llvm::ArrayRef(i)); // indices[swaps[i]] <- indices[i] - indices = builder->create( - loc, indices.getType(), indices, source_index, + indices = stablehlo::DynamicUpdateSliceOp::create( + *builder, loc, indices.getType(), indices, source_index, llvm::ArrayRef(swap_index)); // Update new values. @@ -5932,25 +5981,27 @@ class ConvertRandomShuffleOp : public OpRewritePattern { SmallVector slice_sizes_values; for (auto i = 0; i < slice_sizes.size(); ++i) { if (slice_sizes[i] == tensorflow::kTFDynamicSize) { - Value i_const = rewriter.create( - op.getLoc(), rewriter.getIndexAttr(i)); + Value i_const = arith::ConstantOp::create(rewriter, op.getLoc(), + rewriter.getIndexAttr(i)); Value slice_size_index = - rewriter.create(op.getLoc(), op.getValue(), i_const); - Value index_to_i64 = rewriter.create( - op.getLoc(), rewriter.getI64Type(), slice_size_index); - Value i64_to_tensor = rewriter.create( - op.getLoc(), + shape::DimOp::create(rewriter, op.getLoc(), op.getValue(), i_const); + Value index_to_i64 = arith::IndexCastOp::create( + rewriter, op.getLoc(), rewriter.getI64Type(), slice_size_index); + Value i64_to_tensor = tensor::FromElementsOp::create( + rewriter, op.getLoc(), tensorflow::GetTypeFromTFTensorShape({1}, rewriter.getI64Type()), index_to_i64); slice_sizes_values.push_back(i64_to_tensor); } else { - slice_sizes_values.push_back(rewriter.create( - op.getLoc(), GetI64ElementsAttr({slice_sizes[i]}, &rewriter))); + slice_sizes_values.push_back(stablehlo::ConstantOp::create( + rewriter, op.getLoc(), + GetI64ElementsAttr({slice_sizes[i]}, &rewriter))); } } - auto slice_sizes_concat = rewriter.create( - op.getLoc(), slice_sizes_values, rewriter.getI64IntegerAttr(0)); + auto slice_sizes_concat = stablehlo::ConcatenateOp::create( + rewriter, op.getLoc(), slice_sizes_values, + rewriter.getI64IntegerAttr(0)); rewriter.replaceOpWithNewOp( op, op.getType(), op.getValue(), swaped_indices, slice_sizes_concat, dims_attr); @@ -5981,8 +6032,8 @@ class ConvertXlaShardingOp : public OpRewritePattern { NamedAttribute call_target_name = rewriter.getNamedAttr( "call_target_name", rewriter.getStringAttr("Sharding")); - auto custom_call = rewriter.create( - op.getLoc(), op.getType(), op.getInput(), + auto custom_call = stablehlo::CustomCallOp::create( + rewriter, op.getLoc(), op.getType(), op.getInput(), ArrayRef{call_target_name}); custom_call->setAttr(kShardingAttr, *sharding); rewriter.replaceOp(op, custom_call.getResult(0)); @@ -6023,8 +6074,8 @@ class ConvertInplaceUpdateOp : public OpRewritePattern { // subsequent ones are constructed based on zero_attr. Thus the type // for zero_attr needs to be i32 as well. auto zero_attr = IntegerAttr::get(rewriter.getIntegerType(32), 0); - auto unpacked_indices = rewriter.create( - op.getLoc(), unpacked_indices_type, indices, zero_attr); + auto unpacked_indices = TF::UnpackOp::create( + rewriter, op.getLoc(), unpacked_indices_type, indices, zero_attr); SmallVector split_updates_shape; split_updates_shape.append(updates_type.getShape().begin(), @@ -6036,10 +6087,10 @@ class ConvertInplaceUpdateOp : public OpRewritePattern { tensorflow::GetTypeFromTFTensorShape(split_updates_shape, updates_type.getElementType())); - auto cst = rewriter.create(op.getLoc(), zero_attr) + auto cst = stablehlo::ConstantOp::create(rewriter, op.getLoc(), zero_attr) .getResult(); - auto split_updates = rewriter.create( - op.getLoc(), split_updates_type, cst, updates); + auto split_updates = TF::SplitOp::create(rewriter, op.getLoc(), + split_updates_type, cst, updates); SmallVector input_indices; input_indices.resize(input_type.getRank(), cst); @@ -6047,8 +6098,9 @@ class ConvertInplaceUpdateOp : public OpRewritePattern { for (auto pair : llvm::zip(unpacked_indices.getOutput(), split_updates.getOutput())) { input_indices.front() = std::get<0>(pair); - input = rewriter.create( - op.getLoc(), op.getType(), input, std::get<1>(pair), input_indices); + input = stablehlo::DynamicUpdateSliceOp::create( + rewriter, op.getLoc(), op.getType(), input, std::get<1>(pair), + input_indices); } rewriter.replaceOp(op, input); @@ -6073,8 +6125,8 @@ class ConvertXlaDynamicUpdateSliceOp SmallVector unpacked_indices_type( indices_type.getDimSize(0), tensorflow::GetTypeFromTFTensorShape( {}, indices_type.getElementType())); - auto unpacked_indices = rewriter.create( - op.getLoc(), unpacked_indices_type, op.getIndices(), + auto unpacked_indices = TF::UnpackOp::create( + rewriter, op.getLoc(), unpacked_indices_type, op.getIndices(), IntegerAttr::get(rewriter.getIntegerType(64), 0)); rewriter.replaceOpWithNewOp( op, op.getType(), op.getInput(), op.getUpdate(), @@ -6106,8 +6158,8 @@ class ConvertXlaReduceScatterOp Location loc = op.getLoc(); Type element_type = getElementTypeOrSelf(op.getInput().getType()); - auto reduce_scatter = rewriter.create( - loc, op.getType(), op.getInput(), + auto reduce_scatter = stablehlo::ReduceScatterOp::create( + rewriter, loc, op.getType(), op.getInput(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), scatter_dimension.getSExtValue()), replica_groups, stablehlo::ChannelHandleAttr()); @@ -6140,8 +6192,8 @@ class ConvertXlaReduceScatterOp auto divisor = GetScalarConstOfType(element_type, loc, replica_group_size, &rewriter); auto broadcast_dims = rewriter.getDenseI64ArrayAttr({}); - result = rewriter.create( - loc, result, divisor.getResult(), broadcast_dims); + result = chlo::BroadcastDivOp::create( + rewriter, loc, result, divisor.getResult(), broadcast_dims); } rewriter.replaceOp(op, {result}); @@ -6171,8 +6223,8 @@ class ConvertXlaReduceWindowOp SmallVector result_types{op.getResult().getType()}; // Create the stablehlo.SelectAndScatter op. - auto reduce_window_op = rewriter.create( - loc, result_types, op.getInput(), op.getInitValue(), + auto reduce_window_op = stablehlo::ReduceWindowOp::create( + rewriter, loc, result_types, op.getInput(), op.getInitValue(), ToDenseI64ArrayAttr(window_dimensions, &rewriter), ToDenseI64ArrayAttr(window_strides, &rewriter), ToDenseI64ArrayAttr(base_dilations, &rewriter), @@ -6213,20 +6265,20 @@ class ConvertClipByValueOp : public OpRewritePattern { return failure(); } - auto shape = rewriter.create( - op.getLoc(), - tensorflow::GetTypeFromTFTensorShape({input_ty.getRank()}, - rewriter.getI32Type()), - input); + auto shape = + TF::ShapeOp::create(rewriter, op.getLoc(), + tensorflow::GetTypeFromTFTensorShape( + {input_ty.getRank()}, rewriter.getI32Type()), + input); if (min_ty != input_ty) { - min = - rewriter.create(op.getLoc(), input_ty, min, shape); + min = TF::BroadcastToOp::create(rewriter, op.getLoc(), input_ty, min, + shape); } if (max_ty != input_ty) { - max = - rewriter.create(op.getLoc(), input_ty, max, shape); + max = TF::BroadcastToOp::create(rewriter, op.getLoc(), input_ty, max, + shape); } rewriter.replaceOpWithNewOp(op, input_ty, min, input, @@ -6250,9 +6302,9 @@ class ConvertConstOp : public OpRewritePattern { return failure(); Location loc = op.getLoc(); - Value result = rewriter.create(loc, op.getValue()); + Value result = stablehlo::ConstantOp::create(rewriter, loc, op.getValue()); if (result.getType() != op.getType()) - result = rewriter.create(loc, op.getType(), result); + result = tensor::CastOp::create(rewriter, loc, op.getType(), result); rewriter.replaceOp(op, result); return success(); } @@ -6298,8 +6350,9 @@ class ConvertCumOp : public OpRewritePattern { // the input and then later reverse the output. if (op.getReverse()) { llvm::SmallVector dims_to_reverse({axis}); - input = rewriter.create( - op.getLoc(), input, GetI64ArrayAttr(dims_to_reverse, &rewriter)); + input = stablehlo::ReverseOp::create( + rewriter, op.getLoc(), input, + GetI64ArrayAttr(dims_to_reverse, &rewriter)); } // Convert if we need to enlarge the element type's bitwidth to avoid @@ -6313,8 +6366,8 @@ class ConvertCumOp : public OpRewritePattern { } Type sum_element_type = GetSumAccumulationType(input_element_type); - input = rewriter.create(op.getLoc(), input, - sum_element_type); + input = stablehlo::ConvertOp::create(rewriter, op.getLoc(), input, + sum_element_type); SmallVector window_dims(rank, 1); SmallVector window_strides(rank, 1); @@ -6333,8 +6386,8 @@ class ConvertCumOp : public OpRewritePattern { Value init = GetScalarConstOfType(sum_element_type, op.getLoc(), init_value, &rewriter); - auto reduce = rewriter.create( - op.getLoc(), input.getType(), input, init, + auto reduce = stablehlo::ReduceWindowOp::create( + rewriter, op.getLoc(), input.getType(), input, init, GetI64ArrayAttr(window_dims, &rewriter), GetI64ArrayAttr(window_strides, &rewriter), /*base_dilations=*/DenseI64ArrayAttr(), @@ -6353,20 +6406,22 @@ class ConvertCumOp : public OpRewritePattern { llvm::SmallVector interior_padding(rank, 0); low_padding[axis] = 1; high_padding[axis] = -1; - result = rewriter.create( - op.getLoc(), result, init, GetI64ArrayAttr(low_padding, &rewriter), + result = stablehlo::PadOp::create( + rewriter, op.getLoc(), result, init, + GetI64ArrayAttr(low_padding, &rewriter), GetI64ArrayAttr(high_padding, &rewriter), GetI64ArrayAttr(interior_padding, &rewriter)); } // Convert back if we enlarged the element type's bitwidth. - result = rewriter.create(op.getLoc(), result, - input_element_type); + result = stablehlo::ConvertOp::create(rewriter, op.getLoc(), result, + input_element_type); if (op.getReverse()) { llvm::SmallVector dims_to_reverse({axis}); - result = rewriter.create( - op.getLoc(), result, GetI64ArrayAttr(dims_to_reverse, &rewriter)); + result = stablehlo::ReverseOp::create( + rewriter, op.getLoc(), result, + GetI64ArrayAttr(dims_to_reverse, &rewriter)); } rewriter.replaceOp(op, result); @@ -6397,7 +6452,7 @@ class ConvertShapeOp : public OpRewritePattern { auto index_tensor = tensorflow::GetTypeFromTFTensorShape( result_ty.getShape(), rewriter.getIndexType()); auto shape_op = - rewriter.create(op.getLoc(), index_tensor, input); + shape::ShapeOfOp::create(rewriter, op.getLoc(), index_tensor, input); rewriter.replaceOpWithNewOp(op, result_ty, shape_op); return success(); } @@ -6422,8 +6477,8 @@ class ConvertDynamicExpandDimsOp : public OpRewritePattern { return failure(); } - auto shape = rewriter.create( - op.getLoc(), + auto shape = shape::ShapeOfOp::create( + rewriter, op.getLoc(), tensorflow::GetTypeFromTFTensorShape({input_ty.getRank()}, rewriter.getIndexType()), input); @@ -6444,17 +6499,18 @@ class ConvertDynamicExpandDimsOp : public OpRewritePattern { } dims[inserted_dim] = - rewriter.create(op.getLoc(), 1); + arith::ConstantIndexOp::create(rewriter, op.getLoc(), 1); for (int i = 0; i < dims.size() - 1; i++) { // Add the extracted dim. - Value index = rewriter.create(op.getLoc(), i); - Value dim = rewriter.create(op.getLoc(), shape, index); + Value index = arith::ConstantIndexOp::create(rewriter, op.getLoc(), i); + Value dim = + tensor::ExtractOp::create(rewriter, op.getLoc(), shape, index); dims[i >= inserted_dim ? i + 1 : i] = dim; } auto from_extents = - rewriter.create(op.getLoc(), dims); + tensor::FromElementsOp::create(rewriter, op.getLoc(), dims); rewriter.replaceOpWithNewOp( op, result_ty, input, from_extents); return success(); @@ -6497,11 +6553,11 @@ class ConvertDynamicSqueezeOp : public OpRewritePattern { llvm::SmallVector dims; for (int64_t i = 0; i != input_rank; ++i) { if (llvm::is_contained(squeeze_dims, i)) continue; - dims.push_back(rewriter.create(op.getLoc(), input, i)); + dims.push_back(tensor::DimOp::create(rewriter, op.getLoc(), input, i)); } auto from_extents = - rewriter.create(op.getLoc(), dims); + tensor::FromElementsOp::create(rewriter, op.getLoc(), dims); rewriter.replaceOpWithNewOp( op, result_ty, input, from_extents); return success(); @@ -6592,9 +6648,9 @@ class ConvertXlaSelectAndScatterOp SmallVector result_types{op.getResult().getType()}; // Create the stablehlo.SelectAndScatter op. - auto select_and_scatter_op = rewriter.create( - loc, result_types, op.getOperand(), op.getSource(), op.getInitValue(), - ToDenseI64ArrayAttr(window_dimensions, &rewriter), + auto select_and_scatter_op = stablehlo::SelectAndScatterOp::create( + rewriter, loc, result_types, op.getOperand(), op.getSource(), + op.getInitValue(), ToDenseI64ArrayAttr(window_dimensions, &rewriter), ToDenseI64ArrayAttr(window_strides, &rewriter), mlir::cast( hlo::convertElementsAttr(padding, rewriter.getIntegerType(64)))); @@ -6672,8 +6728,9 @@ class ConvertXlaRngBitGeneratorOp auto algorithm_attr = mlir::stablehlo::RngAlgorithmAttr::get( rewriter.getContext(), *mlir::stablehlo::symbolizeRngAlgorithm(xla_alg.value())); - auto rng_bit_generator_op = rewriter.create( - loc, op.getResultTypes(), algorithm_attr, op.getInitialState()); + auto rng_bit_generator_op = stablehlo::RngBitGeneratorOp::create( + rewriter, loc, op.getResultTypes(), algorithm_attr, + op.getInitialState()); rewriter.replaceOp(op, rng_bit_generator_op.getResults()); @@ -6700,8 +6757,8 @@ class ConvertXlaVariadicReduceV2Op [](Type ty) { return mlir::cast(ty).getElementType(); })}; // Create the stablehlo.reduce op. - auto reduce_op = rewriter.create( - loc, op.getInputs(), op.getInitValues(), + auto reduce_op = stablehlo::ReduceOp::create( + rewriter, loc, op.getInputs(), op.getInitValues(), ToDenseI64ArrayAttr(GetI64ElementsAttr(op.getDimensionsToReduce()), &rewriter), elementTypes); @@ -6727,9 +6784,9 @@ class ConvertXlaVariadicSortOp ElementsAttr dimension; matchPattern(op.getDimension(), m_Constant(&dimension)); // Create the stablehlo.sort op. - auto sort_op = rewriter.create( - loc, op.getInputs(), dimension.getValues()[0].getInt(), - op.getIsStable()); + auto sort_op = stablehlo::SortOp::create( + rewriter, loc, op.getInputs(), + dimension.getValues()[0].getInt(), op.getIsStable()); mlir::SymbolRefAttr func = op.getComparator(); auto func_op = cast(SymbolTable::lookupSymbolIn( op->getParentOfType(), func)); @@ -6816,9 +6873,9 @@ class LowerControlFlowOp : public OpConversionPattern { if constexpr (std::is_same::value) { // Explicitly handle the Case op because it has variadic regions and takes // the number of regions as an input along with the operands. - stablehlo_op = rewriter.create(loc, op.getResultTypes(), - adaptor.getBranchIndex(), - op.getBranches().size()); + stablehlo_op = + DstOpT::create(rewriter, loc, op.getResultTypes(), + adaptor.getBranchIndex(), op.getBranches().size()); } else if constexpr (std::is_same::value) { llvm::SmallVector while_result_types; while_result_types.reserve(num_results); @@ -6827,11 +6884,11 @@ class LowerControlFlowOp : public OpConversionPattern { while_result_types.push_back(ty); } - stablehlo_op = rewriter.create(loc, TypeRange(while_result_types), - adaptor.getOperands()); + stablehlo_op = DstOpT::create( + rewriter, loc, TypeRange(while_result_types), adaptor.getOperands()); } else { - stablehlo_op = rewriter.create(loc, op.getResultTypes(), - adaptor.getOperands()); + stablehlo_op = DstOpT::create(rewriter, loc, op.getResultTypes(), + adaptor.getOperands()); } int64_t num_regions = op.getNumRegions(); diff --git a/tensorflow/compiler/mlir/tf2xla/transforms/legalize_tf_collective.cc b/tensorflow/compiler/mlir/tf2xla/transforms/legalize_tf_collective.cc index 7061aaa4a5657b..abfcc0d26acc65 100644 --- a/tensorflow/compiler/mlir/tf2xla/transforms/legalize_tf_collective.cc +++ b/tensorflow/compiler/mlir/tf2xla/transforms/legalize_tf_collective.cc @@ -143,8 +143,9 @@ LogicalResult ConvertAllReduce(OpBuilder& builder, int64_t channel_id, ChannelHandleAttr channel_handle = ConvertChannel(builder, channel_id, mode); Location loc = op->getLoc(); Type element_type = getElementTypeOrSelf(input.getType()); - auto all_reduce = builder.create( - loc, result_type, input, replica_groups, channel_handle, nullptr); + auto all_reduce = + AllReduceOp::create(builder, loc, result_type, input, replica_groups, + channel_handle, nullptr); if (all_reduce.getNumResults() != 1) { return op->emitOpError() @@ -178,8 +179,8 @@ LogicalResult ConvertAllReduce(OpBuilder& builder, int64_t channel_id, auto divisor = GetScalarConstOfType(element_type, loc, replica_group_size, &builder); auto broadcast_dims = builder.getDenseI64ArrayAttr({}); - result = builder.create( - loc, all_reduce.getResult(0), divisor.getResult(), broadcast_dims); + result = chlo::BroadcastDivOp::create(builder, loc, all_reduce.getResult(0), + divisor.getResult(), broadcast_dims); } else if (final_op != "Id") { return op->emitOpError() << "invalid final_op " << final_op << ", want one of [Id, Div]"; @@ -373,11 +374,12 @@ class ConvertCollectiveAssignGroupV2 IntegerAttr group_size = rewriter.getI32IntegerAttr(replica_groups.size()); IntegerAttr group_key = rewriter.getI32IntegerAttr(0); - auto const_group_size = rewriter.create( - assign_group->getLoc(), assign_group.getResult(0).getType(), - group_size); - auto const_group_key = rewriter.create( - assign_group->getLoc(), assign_group.getResult(1).getType(), group_key); + auto const_group_size = + TF::ConstOp::create(rewriter, assign_group->getLoc(), + assign_group.getResult(0).getType(), group_size); + auto const_group_key = + TF::ConstOp::create(rewriter, assign_group->getLoc(), + assign_group.getResult(1).getType(), group_key); rewriter.replaceAllUsesWith(assign_group.getResult(0), const_group_size); rewriter.replaceAllUsesWith(assign_group.getResult(1), const_group_key); rewriter.eraseOp(assign_group); diff --git a/tensorflow/compiler/mlir/tf2xla/transforms/legalize_tf_communication.cc b/tensorflow/compiler/mlir/tf2xla/transforms/legalize_tf_communication.cc index 7e653188857283..b1105d1a4e4000 100644 --- a/tensorflow/compiler/mlir/tf2xla/transforms/legalize_tf_communication.cc +++ b/tensorflow/compiler/mlir/tf2xla/transforms/legalize_tf_communication.cc @@ -289,10 +289,10 @@ Value CreateSendOp(OpBuilder& builder, Location loc, Value operand, /*handle=*/GetNextChannelId(), /*type=*/2); auto empty_source_target_pairs = builder.getI64TensorAttr({}); - auto send = builder.create( - loc, token.getType(), operand, token, channel_handle, - /*is_host_transfer=*/builder.getBoolAttr(true), - /*source_target_pairs=*/empty_source_target_pairs); + auto send = SendOp::create(builder, loc, token.getType(), operand, token, + channel_handle, + /*is_host_transfer=*/builder.getBoolAttr(true), + /*source_target_pairs=*/empty_source_target_pairs); SetFrontendAttributes(send, index, key, operand.getType(), /*device_to_host=*/true, host_handler_name); @@ -311,10 +311,10 @@ Value CreateRecvOp(OpBuilder& builder, Location loc, Value result, /*type=*/3); auto result_type = result.getType(); SmallVector recv_result_type = {result_type, token.getType()}; - auto recv = builder.create( - loc, recv_result_type, token, channel_handle, - /*is_host_transfer=*/builder.getBoolAttr(true), - /*source_target_pairs=*/builder.getI64TensorAttr({})); + auto recv = + RecvOp::create(builder, loc, recv_result_type, token, channel_handle, + /*is_host_transfer=*/builder.getBoolAttr(true), + /*source_target_pairs=*/builder.getI64TensorAttr({})); SetFrontendAttributes(recv, index, key, result_type, /*device_to_host=*/false, host_handler_name); @@ -336,7 +336,7 @@ Value CreateSinkToken(OpBuilder& builder, Location loc, ArrayRef tokens, } else if (llvm::hasSingleElement(tokens)) { return tokens[0]; } else { - return builder.create(loc, original_token.getType(), tokens) + return AfterAllOp::create(builder, loc, original_token.getType(), tokens) .getResult(); } } @@ -413,8 +413,8 @@ Value RewriteCallOp(OpBuilder& builder, func::CallOp call, new_operands.push_back(token); auto new_result_types = llvm::to_vector(call.getResultTypes()); new_result_types.push_back(token.getType()); - auto new_call = builder.create( - call.getLoc(), new_result_types, + auto new_call = func::CallOp::create( + builder, call.getLoc(), new_result_types, new_symbol ? *new_symbol : call.getCallee(), new_operands); for (auto results : llvm::zip(call.getResults(), new_call.getResults())) @@ -435,7 +435,7 @@ struct OpVisitorState { // Creates a tuple from a sequence of values. Value CreateTuple(OpBuilder& builder, Location loc, ArrayRef operands) { - return builder.create(loc, operands).getResult(); + return TupleOp::create(builder, loc, operands).getResult(); } // Extends `values` with the value `token` attached. If `flatten_tuple` is @@ -480,7 +480,7 @@ SmallVector GetValueWithToken( SmallVector tuple_operands; for (auto idx : llvm::seq(0, tuple_type.getTypes().size())) tuple_operands.push_back( - builder.create(value.getLoc(), value, idx) + GetTupleElementOp::create(builder, value.getLoc(), value, idx) .getResult()); tuple_operands.push_back(token); @@ -518,7 +518,7 @@ Value CreateSubTuple(OpBuilder& builder, Value value, size_t end) { SmallVector tuple_operands; for (auto idx : llvm::seq(0, end)) tuple_operands.push_back( - builder.create(value.getLoc(), value, idx) + GetTupleElementOp::create(builder, value.getLoc(), value, idx) .getResult()); return CreateTuple(builder, value.getLoc(), tuple_operands); @@ -543,8 +543,8 @@ void ReplaceWithTupleResult(OpBuilder& builder, ValueRange values, auto tuple_type = mlir::dyn_cast(value.getType()); if (!tuple_type) { if (!value.use_empty()) { - auto new_element = builder.create(replacement.getLoc(), - replacement, 0); + auto new_element = GetTupleElementOp::create( + builder, replacement.getLoc(), replacement, 0); value.replaceAllUsesWith(new_element.getResult()); } return; @@ -620,8 +620,8 @@ void RewriteRegionIfOp(OpBuilder& builder, IfOp region_if, /*flatten_tuple=*/true); // Create new `mhlo.if` op with extra token operands and result. - auto new_if = builder.create(region_if.getLoc(), new_result_types, - region_if.getPred()); + auto new_if = IfOp::create(builder, region_if.getLoc(), new_result_types, + region_if.getPred()); // Move all regions from the old `mhlo.if` op to its replacement. new_if.getTrueBranch().takeBody(region_if.getTrueBranch()); @@ -745,8 +745,8 @@ void RewriteRegionWhileOp(OpBuilder& builder, WhileOp region_while, /*flatten_tuple*/ true); // Create new `mhlo.while` op with extra token operand and result. - auto new_while = builder.create(region_while.getLoc(), - new_result_types, new_val_operands); + auto new_while = WhileOp::create(builder, region_while.getLoc(), + new_result_types, new_val_operands); // Move all regions from the old `mhlo.while` op to its replacement. new_while.getCond().takeBody(region_while.getCond()); @@ -815,7 +815,7 @@ void RewriteFunctionTerminator(OpBuilder& builder, auto new_results = llvm::to_vector(terminator.getOperands()); new_results.push_back(token); builder.setInsertionPoint(terminator); - builder.create(terminator.getLoc(), new_results); + mlir::func::ReturnOp::create(builder, terminator.getLoc(), new_results); terminator.erase(); } @@ -844,7 +844,7 @@ LogicalResult RewriteFunction( // a token will be created. Otherwise a token block argument is inserted. Value init_token = rewrite_block ? func_body.addArgument(token_type, func.getLoc()) - : builder.create(func.getLoc(), token_type) + : CreateTokenOp::create(builder, func.getLoc(), token_type) .getResult(); // Stack to keep track of region based control flow op nesting and current diff --git a/tensorflow/compiler/mlir/tf2xla/transforms/legalize_tf_patterns.td b/tensorflow/compiler/mlir/tf2xla/transforms/legalize_tf_patterns.td index 5507c82bc6f479..957c4887366e16 100644 --- a/tensorflow/compiler/mlir/tf2xla/transforms/legalize_tf_patterns.td +++ b/tensorflow/compiler/mlir/tf2xla/transforms/legalize_tf_patterns.td @@ -41,7 +41,7 @@ def CastValueToI64: NativeCodeCall< "CastValueToI64($0.getLoc(), $1, &$_builder)">; def CastValueToElementType: NativeCodeCall< - "$_builder.create($0.getLoc(), $1, " + "stablehlo::ConvertOp::create($_builder, $0.getLoc(), $1, " "getElementTypeOrSelf($2.getType()))">; // Here, $0 is an ElementsAttr with exactly one element of type integer. $1 is diff --git a/tensorflow/compiler/mlir/tf2xla/transforms/split_into_island_per_op_pass.cc b/tensorflow/compiler/mlir/tf2xla/transforms/split_into_island_per_op_pass.cc index ecf3aea5f65d48..0b0e68548032a9 100644 --- a/tensorflow/compiler/mlir/tf2xla/transforms/split_into_island_per_op_pass.cc +++ b/tensorflow/compiler/mlir/tf2xla/transforms/split_into_island_per_op_pass.cc @@ -108,15 +108,15 @@ void PopulateEmptyIsland(tf_executor::IslandOp island) { OpBuilder builder(&island.GetBody(), island.GetBody().begin()); tf_executor::YieldOp yield = island.GetYield(); if (yield.getNumOperands() == 0) { - builder.create(island.getLoc(), TypeRange{}, ValueRange{}); + TF::NoOp::create(builder, island.getLoc(), TypeRange{}, ValueRange{}); } else if (yield.getNumOperands() == 1) { Value operand = yield.getOperand(0); - auto identity = builder.create(island.getLoc(), - operand.getType(), operand); + auto identity = TF::IdentityOp::create(builder, island.getLoc(), + operand.getType(), operand); yield.setOperand(0, identity.getOutput()); } else { - auto identity_n = builder.create( - island.getLoc(), yield.getOperandTypes(), yield.getOperands()); + auto identity_n = TF::IdentityNOp::create( + builder, island.getLoc(), yield.getOperandTypes(), yield.getOperands()); for (const auto& it : llvm::enumerate(identity_n.getResults())) yield.setOperand(it.index(), it.value()); } @@ -128,15 +128,15 @@ tf_executor::IslandOp CreateIsland(TypeRange result_types, const Location& loc, Operation& sub_op, tf_executor::IslandOp original_island) { OpBuilder builder(original_island); - auto island = builder.create( - loc, result_types, control_type, mlir::ValueRange{}); + auto island = tf_executor::IslandOp::create(builder, loc, result_types, + control_type, mlir::ValueRange{}); island.getBody().push_back(new Block); Block* block = &island.getBody().back(); OpBuilder island_builder(original_island); island_builder.setInsertionPointToEnd(block); sub_op.replaceAllUsesWith(island.getOutputs()); sub_op.moveBefore(block, block->begin()); - island_builder.create(loc, sub_op.getResults()); + tf_executor::YieldOp::create(island_builder, loc, sub_op.getResults()); return island; } diff --git a/tensorflow/compiler/mlir/tf2xla/transforms/split_into_island_per_op_pass_test.cc b/tensorflow/compiler/mlir/tf2xla/transforms/split_into_island_per_op_pass_test.cc index d888b0c12588c9..2e5e4764f63d34 100644 --- a/tensorflow/compiler/mlir/tf2xla/transforms/split_into_island_per_op_pass_test.cc +++ b/tensorflow/compiler/mlir/tf2xla/transforms/split_into_island_per_op_pass_test.cc @@ -53,12 +53,11 @@ class SplitIntoIslandPerOpPass : public ::testing::Test { llvm::SmallVector island_result_types; island_result_types.push_back(op_builder_.getF64Type()); - mlir::Operation* yield_op = op_builder_.create( - op_state.location, mlir::ValueRange{}); - mlir::tf_executor::IslandOp island_op = - op_builder_.create( - op_state.location, island_result_types, mlir::ValueRange{}, - mlir::ArrayRef{}); + mlir::Operation* yield_op = mlir::tf_executor::YieldOp::create( + op_builder_, op_state.location, mlir::ValueRange{}); + mlir::tf_executor::IslandOp island_op = mlir::tf_executor::IslandOp::create( + op_builder_, op_state.location, island_result_types, mlir::ValueRange{}, + mlir::ArrayRef{}); island_op.getBody().push_back(new mlir::Block); island_op.getBody().back().push_back(yield_op); return island_op; @@ -126,13 +125,13 @@ TEST_F(SplitIntoIslandPerOpPass, IslandOpTwoOpsSplitsIntoTwoIslands) { islandOp.getBody().back().push_front(inner_op_2); // Code relies on a parent with a fetch op containing the island op. mlir::tf_executor::GraphOp parent_graph_op = - op_builder_.create( - mlir::UnknownLoc::get(&context_), + mlir::tf_executor::GraphOp::create( + op_builder_, mlir::UnknownLoc::get(&context_), mlir::TypeRange{op_builder_.getF64Type()}); parent_graph_op.getRegion().push_back(new mlir::Block); parent_graph_op.push_back(islandOp); mlir::tf_executor::FetchOp fetch_op = - op_builder_.create(parent_graph_op.getLoc()); + mlir::tf_executor::FetchOp::create(op_builder_, parent_graph_op.getLoc()); parent_graph_op.GetBody().push_back(fetch_op); SplitIsland(islandOp, control_type); diff --git a/tensorflow/compiler/mlir/tf2xla/transforms/tfxla_device_specific_transforms.cc b/tensorflow/compiler/mlir/tf2xla/transforms/tfxla_device_specific_transforms.cc index a7e9726e7575a3..2f7089edacbe31 100644 --- a/tensorflow/compiler/mlir/tf2xla/transforms/tfxla_device_specific_transforms.cc +++ b/tensorflow/compiler/mlir/tf2xla/transforms/tfxla_device_specific_transforms.cc @@ -57,8 +57,9 @@ LogicalResult TFXLADeviceSpecificTransforms::ConvertGetAlgOp( OpBuilder opbuilder(get_alg_op); - auto tf_const = opbuilder.create( - get_alg_op->getLoc(), opbuilder.getI32IntegerAttr((int)tensorflow_rng)); + auto tf_const = + TF::ConstOp::create(opbuilder, get_alg_op->getLoc(), + opbuilder.getI32IntegerAttr((int)tensorflow_rng)); get_alg_op->replaceAllUsesWith(tf_const); get_alg_op->erase(); diff --git a/tensorflow/compiler/mlir/tf2xla/transforms/utils.cc b/tensorflow/compiler/mlir/tf2xla/transforms/utils.cc index 0152cd1d1a7363..61c8e8e161425d 100644 --- a/tensorflow/compiler/mlir/tf2xla/transforms/utils.cc +++ b/tensorflow/compiler/mlir/tf2xla/transforms/utils.cc @@ -24,11 +24,11 @@ namespace mhlo { ConstantOp GetScalarConstOfType(Type ty, Location loc, int64_t raw_value, OpBuilder* builder) { - return builder->create(loc, hlo::getScalarOfType(ty, raw_value)); + return ConstantOp::create(*builder, loc, hlo::getScalarOfType(ty, raw_value)); } ConstantOp GetScalarNegZeroOfType(Type ty, Location loc, OpBuilder* builder) { - return builder->create(loc, hlo::getScalarNegZeroOfType(ty)); + return ConstantOp::create(*builder, loc, hlo::getScalarNegZeroOfType(ty)); } DenseIntElementsAttr GetI64ElementsAttr(ArrayAttr attr) { diff --git a/tensorflow/compiler/mlir/tf2xla/transforms/utils.h b/tensorflow/compiler/mlir/tf2xla/transforms/utils.h index 5dba4a4dcf894c..a6b848ae2fc27b 100644 --- a/tensorflow/compiler/mlir/tf2xla/transforms/utils.h +++ b/tensorflow/compiler/mlir/tf2xla/transforms/utils.h @@ -42,8 +42,8 @@ void BuildReduceBody(Type element_type, Region* body, OpBuilder* builder) { block->addArguments({type, type}, SmallVector(2, loc)); auto reducer = - builder->create(loc, block->getArgument(0), block->getArgument(1)); - builder->create(loc, reducer.getResult()); + Op::create(*builder, loc, block->getArgument(0), block->getArgument(1)); + ReturnOp::create(*builder, loc, reducer.getResult()); } ConstantOp GetScalarConstOfType(Type ty, Location loc, int64_t raw_value, diff --git a/tensorflow/compiler/mlir/tf2xla/transforms/xla_legalize_targets_test.cc b/tensorflow/compiler/mlir/tf2xla/transforms/xla_legalize_targets_test.cc index 6572aef984b043..71dce38198c96a 100644 --- a/tensorflow/compiler/mlir/tf2xla/transforms/xla_legalize_targets_test.cc +++ b/tensorflow/compiler/mlir/tf2xla/transforms/xla_legalize_targets_test.cc @@ -63,8 +63,8 @@ class XlaLegalizeTargetsTest : public testing::Test { }; TEST_F(XlaLegalizeTargetsTest, CreatesConversionTargets) { - auto const_int = builder_.create( - builder_.getUnknownLoc(), builder_.getI32Type(), /*value=*/10); + auto const_int = mlir::arith::ConstantIntOp::create( + builder_, builder_.getUnknownLoc(), builder_.getI32Type(), /*value=*/10); ConversionTarget target = GetDefaultLegalConversionTargets(context_, /*legalize_chlo=*/false); @@ -72,8 +72,8 @@ TEST_F(XlaLegalizeTargetsTest, CreatesConversionTargets) { } TEST_F(XlaLegalizeTargetsTest, AllowsCHLODialect) { - auto const_int = builder_.create( - builder_.getUnknownLoc(), builder_.getI32TensorAttr({42})); + auto const_int = chlo::ConstantOp::create(builder_, builder_.getUnknownLoc(), + builder_.getI32TensorAttr({42})); ConversionTarget target = GetDefaultLegalConversionTargets(context_, /*legalize_chlo=*/true); @@ -82,8 +82,8 @@ TEST_F(XlaLegalizeTargetsTest, AllowsCHLODialect) { } TEST_F(XlaLegalizeTargetsTest, DontAllowCHLODialect) { - auto const_int = builder_.create( - builder_.getUnknownLoc(), builder_.getI32TensorAttr({42})); + auto const_int = chlo::ConstantOp::create(builder_, builder_.getUnknownLoc(), + builder_.getI32TensorAttr({42})); ConversionTarget target = GetDefaultLegalConversionTargets(context_, /*legalize_chlo=*/false); diff --git a/tensorflow/compiler/mlir/tfr/integration/tfr_decompose_ctx.cc b/tensorflow/compiler/mlir/tfr/integration/tfr_decompose_ctx.cc index e2f1bdbfb0a0de..2672a90f93cdd3 100644 --- a/tensorflow/compiler/mlir/tfr/integration/tfr_decompose_ctx.cc +++ b/tensorflow/compiler/mlir/tfr/integration/tfr_decompose_ctx.cc @@ -81,8 +81,9 @@ absl::StatusOr> TFRDecomposeContext::Get( std::string tfr_lib_dir; TF_RETURN_IF_ERROR(ReadStringFromEnvVar( kTFRLibEnv, "tensorflow/compiler/mlir/tfr/resources", &tfr_lib_dir)); - string composite_mlir_dir = io::JoinPath(env->GetRunfilesDir(), tfr_lib_dir); - std::vector files; + std::string composite_mlir_dir = + io::JoinPath(env->GetRunfilesDir(), tfr_lib_dir); + std::vector files; TF_RETURN_IF_ERROR(env->GetChildren(composite_mlir_dir, &files)); if (files.empty()) { return errors::Internal(absl::StrCat( @@ -90,7 +91,7 @@ absl::StatusOr> TFRDecomposeContext::Get( } std::string tfr_raw_text; for (const auto& file : files) { - string fullpath = io::JoinPath(composite_mlir_dir, file); + std::string fullpath = io::JoinPath(composite_mlir_dir, file); if (env->MatchPath(fullpath, io::JoinPath(composite_mlir_dir, "*.mlir"))) { std::string text; TF_RETURN_IF_ERROR(ReadFileToString(env, fullpath, &text)); diff --git a/tensorflow/compiler/mlir/tfr/ir/tfr_ops.cc b/tensorflow/compiler/mlir/tfr/ir/tfr_ops.cc index d44e65f029ada3..66b5167839731b 100644 --- a/tensorflow/compiler/mlir/tfr/ir/tfr_ops.cc +++ b/tensorflow/compiler/mlir/tfr/ir/tfr_ops.cc @@ -118,10 +118,11 @@ class TFRInlinerInterface : public DialectInlinerInterface { auto result_itype = llvm::cast(result_type); if (input_itype.getWidth() == result_itype.getWidth()) return nullptr; if (input_itype.getWidth() > result_itype.getWidth()) { - return builder.create(conversion_loc, result_type, - input); + return arith::TruncIOp::create(builder, conversion_loc, result_type, + input); } else { - return builder.create(conversion_loc, result_type, input); + return arith::ExtSIOp::create(builder, conversion_loc, result_type, + input); } } }; @@ -148,11 +149,11 @@ TFRDialect::TFRDialect(MLIRContext *context) Operation *TFRDialect::materializeConstant(OpBuilder &builder, Attribute value, Type type, Location loc) { if (arith::ConstantOp::isBuildableWith(value, type)) - return builder.create(loc, type, - llvm::cast(value)); + return arith::ConstantOp::create(builder, loc, type, + llvm::cast(value)); if (func::ConstantOp::isBuildableWith(value, type)) - return builder.create( - loc, type, llvm::cast(value)); + return func::ConstantOp::create(builder, loc, type, + llvm::cast(value)); return nullptr; } @@ -421,9 +422,10 @@ class ConvertConstToTensorConst : public OpRewritePattern { {static_cast(array.size())}, *all_types.begin()); DenseElementsAttr attr = DenseElementsAttr::get(new_out_type, array.getValue()); - new_cst = rewriter.create(loc, new_out_type, attr); + new_cst = TF::ConstOp::create(rewriter, loc, new_out_type, attr); if (isa(out_type)) { - new_cst = rewriter.create(loc, out_type, new_cst->getResult(0)); + new_cst = + CastOp::create(rewriter, loc, out_type, new_cst->getResult(0)); } rewriter.replaceOp(cst_tensor_op, new_cst->getResult(0)); return success(); @@ -432,9 +434,10 @@ class ConvertConstToTensorConst : public OpRewritePattern { TypedAttr scalar; if (matchPattern(cst_tensor_op.getArg(), m_Constant(&scalar))) { Type new_out_type = RankedTensorType::get({}, scalar.getType()); - new_cst = rewriter.create(loc, new_out_type, scalar); + new_cst = TF::ConstOp::create(rewriter, loc, new_out_type, scalar); if (isa(out_type)) { - new_cst = rewriter.create(loc, out_type, new_cst->getResult(0)); + new_cst = + CastOp::create(rewriter, loc, out_type, new_cst->getResult(0)); } rewriter.replaceOp(cst_tensor_op, new_cst->getResult(0)); return success(); @@ -481,8 +484,8 @@ class RemoveRedundantCast : public OpRewritePattern { if ((input_tensor_type.getElementType() != output_tensor_type.getElementType()) && !isQuantizedType(input_type) && !isQuantizedType(output_type)) { - auto new_tfr_cast = rewriter.create( - cast_op.getLoc(), + auto new_tfr_cast = TFR::CastOp::create( + rewriter, cast_op.getLoc(), output_tensor_type.clone(input_tensor_type.getElementType()), cast_op.getArg()); rewriter.replaceOpWithNewOp(cast_op, output_type, @@ -652,8 +655,9 @@ class RemoveRawDataOp : public OpRewritePattern { new_list_values.push_back(redundant_cast.getArg()); } - auto new_list = rewriter.create( - raw_data_op.getLoc(), preceding_list.getType(), new_list_values); + auto new_list = + BuildListOp::create(rewriter, raw_data_op.getLoc(), + preceding_list.getType(), new_list_values); raw_data_op.getOutput().replaceAllUsesWith(new_list.getOut()); return success(); } @@ -679,11 +683,11 @@ class RemoveQParamsOp : public OpRewritePattern { rewriter.setInsertionPoint(qparams_op); Location loc = qparams_op->getLoc(); if (auto qtype = llvm::dyn_cast(cast_qtype)) { - scale_op = rewriter.create( - loc, RankedTensorType::get({}, rewriter.getF32Type()), + scale_op = TF::ConstOp::create( + rewriter, loc, RankedTensorType::get({}, rewriter.getF32Type()), rewriter.getF32FloatAttr(qtype.getScale())); - zp_op = rewriter.create( - loc, RankedTensorType::get({}, rewriter.getI32Type()), + zp_op = TF::ConstOp::create( + rewriter, loc, RankedTensorType::get({}, rewriter.getI32Type()), rewriter.getI32IntegerAttr(qtype.getZeroPoint())); } else if (auto qtype = llvm::dyn_cast( cast_qtype)) { @@ -697,20 +701,20 @@ class RemoveQParamsOp : public OpRewritePattern { {static_cast(num_channels)}, rewriter.getF32Type()); auto scales_attr = DenseElementsAttr::get(scales_type, llvm::ArrayRef(scales)); - scale_op = rewriter.create(loc, scales_attr); + scale_op = TF::ConstOp::create(rewriter, loc, scales_attr); auto zps_type = RankedTensorType::get( {static_cast(num_channels)}, rewriter.getI32Type()); auto zps_attr = DenseElementsAttr::get(zps_type, llvm::ArrayRef(zps)); - zp_op = rewriter.create(loc, zps_attr); + zp_op = TF::ConstOp::create(rewriter, loc, zps_attr); } if (!scale_op || !zp_op) { return failure(); } - auto scale_cast = rewriter.create( - loc, qparams_op.getScale().getType(), scale_op.getOutput()); - auto zp_cast = rewriter.create(loc, qparams_op.getZp().getType(), - zp_op.getOutput()); + auto scale_cast = CastOp::create( + rewriter, loc, qparams_op.getScale().getType(), scale_op.getOutput()); + auto zp_cast = CastOp::create(rewriter, loc, qparams_op.getZp().getType(), + zp_op.getOutput()); qparams_op.getScale().replaceAllUsesWith(scale_cast.getOut()); qparams_op.getZp().replaceAllUsesWith(zp_cast.getOut()); @@ -787,10 +791,11 @@ class RemoveScaleFactorOp : public OpRewritePattern { } rewriter.setInsertionPoint(scale_factor_op); const Location loc = scale_factor_op->getLoc(); - auto result_scale_op = rewriter.create( - loc, DenseElementsAttr::get(scale_type, llvm::ArrayRef(scale_factors))); - auto result_scale_cast_op = rewriter.create( - loc, scale_factor_op.getType(), result_scale_op.getOutput()); + auto result_scale_op = TF::ConstOp::create( + rewriter, loc, + DenseElementsAttr::get(scale_type, llvm::ArrayRef(scale_factors))); + auto result_scale_cast_op = CastOp::create( + rewriter, loc, scale_factor_op.getType(), result_scale_op.getOutput()); scale_factor_op.getScaleFactor().replaceAllUsesWith( result_scale_cast_op.getOut()); return success(); @@ -812,50 +817,55 @@ class RemoveRescaleOp : public OpRewritePattern { const Location loc = rescale_op->getLoc(); const auto result_types = rescale_op->getResultTypes(); auto c_false = - rewriter.create(loc, rewriter.getBoolAttr(false)); + arith::ConstantOp::create(rewriter, loc, rewriter.getBoolAttr(false)); TypeAttr f32_attr = TypeAttr::get(rewriter.getF32Type()); TFRAttrType output_type = TFRAttrType::get(rewriter.getContext()); - auto constant_f32_op = rewriter.create(loc, output_type, f32_attr); + auto constant_f32_op = + ConstOp::create(rewriter, loc, output_type, f32_attr); TypeAttr i32_attr = TypeAttr::get(rewriter.getI32Type()); - auto constant_i32_op = rewriter.create(loc, output_type, i32_attr); + auto constant_i32_op = + ConstOp::create(rewriter, loc, output_type, i32_attr); IntegerAttr zp_attr; if (!matchPattern(zp, m_Constant(&zp_attr))) { return failure(); } rewriter.setInsertionPoint(zp.getDefiningOp()); - auto zp_tensor = rewriter.create( - loc, RankedTensorType::get({}, zp.getType()), zp_attr); - auto zp_cast = rewriter.create( - loc, rewriter.getType(), zp_tensor.getOutput()); + auto zp_tensor = TF::ConstOp::create( + rewriter, loc, RankedTensorType::get({}, zp.getType()), zp_attr); + auto zp_cast = + CastOp::create(rewriter, loc, rewriter.getType(), + zp_tensor.getOutput()); rewriter.setInsertionPoint(rescale_op); - auto cast_input_to_float_op = rewriter.create( - loc, result_types, - SymbolRefAttr::get(rewriter.getContext(), "tf__cast"), - ArrayRef{input, constant_f32_op, c_false}, - /*args_attrs=*/nullptr, /*res_attrs=*/nullptr); - auto input_x_scale_op = rewriter.create( - loc, result_types, SymbolRefAttr::get(rewriter.getContext(), "tf__mul"), + auto cast_input_to_float_op = + CallOp::create(rewriter, loc, result_types, + SymbolRefAttr::get(rewriter.getContext(), "tf__cast"), + ArrayRef{input, constant_f32_op, c_false}, + /*args_attrs=*/nullptr, /*res_attrs=*/nullptr); + auto input_x_scale_op = CallOp::create( + rewriter, loc, result_types, + SymbolRefAttr::get(rewriter.getContext(), "tf__mul"), ArrayRef{cast_input_to_float_op.getResult(0), scale}, /*args_attrs=*/nullptr, /*res_attrs=*/nullptr); - auto round_rescaled_op = rewriter.create( - loc, result_types, - SymbolRefAttr::get(rewriter.getContext(), "tf__round"), - ArrayRef{input_x_scale_op->getResult(0)}, - /*args_attrs=*/nullptr, /*res_attrs=*/nullptr); - auto cast_zp_to_float_op = rewriter.create( - loc, result_types, - SymbolRefAttr::get(rewriter.getContext(), "tf__cast"), - ArrayRef{zp_cast, constant_f32_op, c_false}, - /*args_attrs=*/nullptr, /*res_attrs=*/nullptr); - auto recentered_op = rewriter.create( - loc, result_types, SymbolRefAttr::get(rewriter.getContext(), "tf__add"), - ArrayRef{round_rescaled_op->getResult(0), - cast_zp_to_float_op->getResult(0)}, - /*args_attrs=*/nullptr, /*res_attrs=*/nullptr); - auto cast_output_to_i32 = rewriter.create( - loc, result_types, + auto round_rescaled_op = + CallOp::create(rewriter, loc, result_types, + SymbolRefAttr::get(rewriter.getContext(), "tf__round"), + ArrayRef{input_x_scale_op->getResult(0)}, + /*args_attrs=*/nullptr, /*res_attrs=*/nullptr); + auto cast_zp_to_float_op = + CallOp::create(rewriter, loc, result_types, + SymbolRefAttr::get(rewriter.getContext(), "tf__cast"), + ArrayRef{zp_cast, constant_f32_op, c_false}, + /*args_attrs=*/nullptr, /*res_attrs=*/nullptr); + auto recentered_op = + CallOp::create(rewriter, loc, result_types, + SymbolRefAttr::get(rewriter.getContext(), "tf__add"), + ArrayRef{round_rescaled_op->getResult(0), + cast_zp_to_float_op->getResult(0)}, + /*args_attrs=*/nullptr, /*res_attrs=*/nullptr); + auto cast_output_to_i32 = CallOp::create( + rewriter, loc, result_types, SymbolRefAttr::get(rewriter.getContext(), "tf__cast"), ArrayRef{recentered_op->getResult(0), constant_i32_op, c_false}, /*args_attrs=*/nullptr, /*res_attrs=*/nullptr); diff --git a/tensorflow/compiler/mlir/tfr/passes/canonicalize.cc b/tensorflow/compiler/mlir/tfr/passes/canonicalize.cc index fb0640536d4fe5..7a03a46972371c 100644 --- a/tensorflow/compiler/mlir/tfr/passes/canonicalize.cc +++ b/tensorflow/compiler/mlir/tfr/passes/canonicalize.cc @@ -75,12 +75,12 @@ class UnrollSCFForOp : public OpRewritePattern { for (auto i = 0; i < trip_count; ++i) { if (!iv.use_empty()) { // iv' = iv + step * i; - Value iter = rewriter.create(loc, i); + Value iter = arith::ConstantIndexOp::create(rewriter, loc, i); Value step_cst = - rewriter.create(loc, step.getSExtValue()); - Value stride = rewriter.create(loc, step_cst, iter); + arith::ConstantIndexOp::create(rewriter, loc, step.getSExtValue()); + Value stride = arith::MulIOp::create(rewriter, loc, step_cst, iter); Value iv_unroll = - rewriter.create(loc, mapping.lookup(iv), stride); + arith::AddIOp::create(rewriter, loc, mapping.lookup(iv), stride); mapping.map(iv, iv_unroll); } diff --git a/tensorflow/compiler/mlir/tfr/passes/raise_to_tf.cc b/tensorflow/compiler/mlir/tfr/passes/raise_to_tf.cc index 94a84cc3072ea6..5dd6a22f90c972 100644 --- a/tensorflow/compiler/mlir/tfr/passes/raise_to_tf.cc +++ b/tensorflow/compiler/mlir/tfr/passes/raise_to_tf.cc @@ -148,7 +148,7 @@ class RewriteTFRCallOp : public OpRewritePattern { mlir::cast(cast_op.getInputElementType()).getValue(); if (result_elt_type != original_input_type) { UnrankedTensorType result_type = UnrankedTensorType::get(result_elt_type); - return rewriter.create(loc, result_type, cast_op.getArg()); + return TF::CastOp::create(rewriter, loc, result_type, cast_op.getArg()); } return cast_op.getArg(); } @@ -167,7 +167,7 @@ class RewriteTFRCallOp : public OpRewritePattern { Type current_input_type = mlir::cast(input_types[i]).getValue(); if (current_input_type != target_input_type) { input_values[i] = - rewriter.create(loc, result_type, input_values[i]); + TF::CastOp::create(rewriter, loc, result_type, input_values[i]); } } } @@ -397,7 +397,7 @@ LogicalResult RewriteTFRCallOp::CreateAndReplaceOp( Type res_type = res.value(); if (mlir::dyn_cast(res_type)) { Value new_res = new_op->getResult(res.index()); - auto casted = rewriter.create(loc, res_type, new_res); + auto casted = CastOp::create(rewriter, loc, res_type, new_res); new_results.push_back(casted.getOut()); } else if (auto list_type = mlir::dyn_cast(res.value())) { @@ -405,10 +405,10 @@ LogicalResult RewriteTFRCallOp::CreateAndReplaceOp( for (int i = res.index(); i < new_op->getNumResults(); i++) { Value new_res = new_op->getResult(i); auto casted = - rewriter.create(loc, unconstrainted_type, new_res); + CastOp::create(rewriter, loc, unconstrainted_type, new_res); tensor_list.push_back(casted.getOut()); } - auto list_op = rewriter.create(loc, res_type, tensor_list); + auto list_op = BuildListOp::create(rewriter, loc, res_type, tensor_list); new_results.push_back(list_op.getOut()); } } diff --git a/tensorflow/compiler/mlir/tfrt/tests/xla_rewrite.mlir b/tensorflow/compiler/mlir/tfrt/tests/xla_rewrite.mlir index 2118183569e6ed..24e195dbc9dc42 100644 --- a/tensorflow/compiler/mlir/tfrt/tests/xla_rewrite.mlir +++ b/tensorflow/compiler/mlir/tfrt/tests/xla_rewrite.mlir @@ -27,7 +27,14 @@ func.func @xla_launch(%arg: tensor, %v0: tensor<*x!tf_type.resource>, %v1: device = "/device:GPU:0", executor_type = "", f = @callee} : (tensor, tensor, tensor<*x!tf_type.resource>, tensor, tensor<*x!tf_type.resource>) -> tensor - func.return %r2 : tensor + // CHECK: tf.XlaLaunchV2 + // CHECK-SAME: constants = [0, 3] + // CHECK-SAME: resources = [2, 4] + %r3 = "tf.PartitionedCall"(%c0, %r2, %v0, %c1, %v1) {_XlaMustCompile = true, config = "", config_proto = "", + device = "/device:CPU:0", executor_type = "", f = @callee} + : (tensor, tensor, tensor<*x!tf_type.resource>, tensor, tensor<*x!tf_type.resource>) -> tensor + + func.return %r3 : tensor } func.func @callee(%c0: tensor, %arg: tensor, %v0: tensor<*x!tf_type.resource>, %c1: tensor, %v1: tensor<*x!tf_type.resource>) -> (tensor) { diff --git a/tensorflow/compiler/mlir/tfrt/transforms/deduplicate_if_result_pass.cc b/tensorflow/compiler/mlir/tfrt/transforms/deduplicate_if_result_pass.cc index a42d2f5d2ad7d1..aafd3d958f826b 100644 --- a/tensorflow/compiler/mlir/tfrt/transforms/deduplicate_if_result_pass.cc +++ b/tensorflow/compiler/mlir/tfrt/transforms/deduplicate_if_result_pass.cc @@ -94,7 +94,7 @@ mlir::func::FuncOp CreateBranchFunctionWithDeduplicatedResults( auto new_func_type = mlir::FunctionType::get(builder.getContext(), arg_types, new_result_types); - auto new_func = builder.create(loc, name, new_func_type); + auto new_func = mlir::func::FuncOp::create(builder, loc, name, new_func_type); new_func.setVisibility(mlir::func::FuncOp::Visibility::Private); mlir::OpBuilder::InsertionGuard guard(builder); @@ -110,8 +110,8 @@ mlir::func::FuncOp CreateBranchFunctionWithDeduplicatedResults( // Create the call op to the original func. The arguments are simply // the arguments from the wrapper function. - auto call_op = builder.create( - loc, result_types, block->getArguments(), /*args_attrs=*/nullptr, + auto call_op = mlir::TF::PartitionedCallOp::create( + builder, loc, result_types, block->getArguments(), /*args_attrs=*/nullptr, /*res_attrs=*/nullptr, mlir::FlatSymbolRefAttr::get(func.getSymNameAttr()), empty_string_attr, empty_string_attr, empty_string_attr); @@ -120,7 +120,7 @@ mlir::func::FuncOp CreateBranchFunctionWithDeduplicatedResults( results.push_back(call_op.getResult(i)); } - builder.create(loc, results); + mlir::func::ReturnOp::create(builder, loc, results); return new_func; } @@ -183,8 +183,8 @@ void DeduplicateIfOps(mlir::ModuleOp module) { new_result_types.push_back(op->getResult(i).getType()); } - auto new_if_op = builder.create( - op.getLoc(), new_result_types, op.getCond(), op.getInput(), + auto new_if_op = mlir::TF::IfOp::create( + builder, op.getLoc(), new_result_types, op.getCond(), op.getInput(), new_then_func.getSymName(), new_else_func.getSymName(), op.getIsStateless()); diff --git a/tensorflow/compiler/mlir/tfrt/transforms/fuse_tpu_compile_and_execute_ops.cc b/tensorflow/compiler/mlir/tfrt/transforms/fuse_tpu_compile_and_execute_ops.cc index 77de1e0eb48669..73d5836fa895a6 100644 --- a/tensorflow/compiler/mlir/tfrt/transforms/fuse_tpu_compile_and_execute_ops.cc +++ b/tensorflow/compiler/mlir/tfrt/transforms/fuse_tpu_compile_and_execute_ops.cc @@ -159,12 +159,11 @@ void FuseCompileAndExecuteOps( auto producer_name = used_exec_op->getAttrOfType("_producer_name"); if (!producer_name) producer_name = mlir::StringAttr::get(context, "default"); - auto compile_and_execute_op = - builder.create( - used_exec_op.getLoc(), output_types, exec_op_args, - static_shape_tensors, - builder.getI32ArrayAttr(static_shaped_operand_indices_attr), - compile_op.getMlirModule(), compile_op.getMetadata(), producer_name); + auto compile_and_execute_op = mlir::TF::TPUCompileMlirAndExecuteOp::create( + builder, used_exec_op.getLoc(), output_types, exec_op_args, + static_shape_tensors, + builder.getI32ArrayAttr(static_shaped_operand_indices_attr), + compile_op.getMlirModule(), compile_op.getMetadata(), producer_name); for (auto exec_op : exec_op_in_group) { exec_op.replaceAllUsesWith(compile_and_execute_op.getResults()); diff --git a/tensorflow/compiler/mlir/tfrt/transforms/ifrt/rewrite_cluster_to_ifrt_call.cc b/tensorflow/compiler/mlir/tfrt/transforms/ifrt/rewrite_cluster_to_ifrt_call.cc index 2fc2c173fed8ba..1e2231f1c59584 100644 --- a/tensorflow/compiler/mlir/tfrt/transforms/ifrt/rewrite_cluster_to_ifrt_call.cc +++ b/tensorflow/compiler/mlir/tfrt/transforms/ifrt/rewrite_cluster_to_ifrt_call.cc @@ -151,8 +151,8 @@ class RewriteClusterToIfrtCallPass // ifrt program already exists builder.setInsertionPoint(cluster_func); - mlir::TF::IfrtCallOp ifrt_call_op = builder.create( - cluster_func->getLoc(), cluster_func.getResultTypes(), + mlir::TF::IfrtCallOp ifrt_call_op = mlir::TF::IfrtCallOp::create( + builder, cluster_func->getLoc(), cluster_func.getResultTypes(), cluster_func->getOperands()); int64_t program_id; @@ -189,8 +189,8 @@ class RewriteClusterToIfrtCallPass mlir::OpBuilder::InsertionGuard insertion_guard(builder); builder.setInsertionPoint(callee_func); - mlir::func::FuncOp cloned_ifrt_program = builder.create( - callee_func->getLoc(), ifrt_program_name, + mlir::func::FuncOp cloned_ifrt_program = mlir::func::FuncOp::create( + builder, callee_func->getLoc(), ifrt_program_name, callee_func.getFunctionType()); mlir::IRMapping mapper; callee_func.cloneInto(cloned_ifrt_program, mapper); @@ -226,8 +226,8 @@ class RewriteClusterToIfrtCallPass builder.setInsertionPoint(cluster_func); - mlir::TF::IfrtCallOp ifrt_call_op = builder.create( - cluster_func->getLoc(), cluster_func.getResultTypes(), + mlir::TF::IfrtCallOp ifrt_call_op = mlir::TF::IfrtCallOp::create( + builder, cluster_func->getLoc(), cluster_func.getResultTypes(), cluster_func->getOperands()); // TODO(b/304839793): populate variable names after adding a variable diff --git a/tensorflow/compiler/mlir/tfrt/transforms/ifrt/tf_restore_merging.cc b/tensorflow/compiler/mlir/tfrt/transforms/ifrt/tf_restore_merging.cc index 5220824d3f716a..d0c8f03bf7f9c2 100644 --- a/tensorflow/compiler/mlir/tfrt/transforms/ifrt/tf_restore_merging.cc +++ b/tensorflow/compiler/mlir/tfrt/transforms/ifrt/tf_restore_merging.cc @@ -130,15 +130,15 @@ class TfRestoreMergingPass // merged in order to keep the dominance property. mlir::OpBuilder builder(restores_to_merge.front()); - auto new_tensor_names = builder.create( - builder.getFusedLoc(tensor_names_locs), + auto new_tensor_names = mlir::TF::ConstOp::create( + builder, builder.getFusedLoc(tensor_names_locs), GetStringTensorAttr(merged_tensor_names)); - auto new_shape_and_slices = builder.create( - builder.getFusedLoc(shape_and_slices_locs), + auto new_shape_and_slices = mlir::TF::ConstOp::create( + builder, builder.getFusedLoc(shape_and_slices_locs), GetStringTensorAttr(merged_shape_and_slices)); - auto new_restore = builder.create( - builder.getFusedLoc(restore_locs), + auto new_restore = mlir::TF::RestoreV2Op::create( + builder, builder.getFusedLoc(restore_locs), mlir::TypeRange(mlir::ValueRange(values_to_replace)), prefix, new_tensor_names, new_shape_and_slices); for (auto [old_value, new_value] : diff --git a/tensorflow/compiler/mlir/tfrt/transforms/ifrt/tf_restore_splitting.cc b/tensorflow/compiler/mlir/tfrt/transforms/ifrt/tf_restore_splitting.cc index 130ca0a2e90b74..cb5b3e7afdcc13 100644 --- a/tensorflow/compiler/mlir/tfrt/transforms/ifrt/tf_restore_splitting.cc +++ b/tensorflow/compiler/mlir/tfrt/transforms/ifrt/tf_restore_splitting.cc @@ -93,15 +93,15 @@ class TfRestoreSplittingPass shape_and_slices.getValues(), restore.getTensors())) { auto new_tensor_names = - builder.create(restore.getTensorNames().getLoc(), - GetStringTensorAttr({tensor_name})); + mlir::TF::ConstOp::create(builder, restore.getTensorNames().getLoc(), + GetStringTensorAttr({tensor_name})); - auto new_shape_and_slices = builder.create( - restore.getShapeAndSlices().getLoc(), + auto new_shape_and_slices = mlir::TF::ConstOp::create( + builder, restore.getShapeAndSlices().getLoc(), GetStringTensorAttr({shape_and_slice})); - auto new_restore = builder.create( - restore.getLoc(), mlir::TypeRange({result.getType()}), + auto new_restore = mlir::TF::RestoreV2Op::create( + builder, restore.getLoc(), mlir::TypeRange({result.getType()}), restore.getPrefix(), new_tensor_names, new_shape_and_slices); result.replaceAllUsesWith(new_restore.getTensors()[0]); } diff --git a/tensorflow/compiler/mlir/tfrt/transforms/lower_saved_model.cc b/tensorflow/compiler/mlir/tfrt/transforms/lower_saved_model.cc index 34b37eeefe7843..916b41620ad33e 100644 --- a/tensorflow/compiler/mlir/tfrt/transforms/lower_saved_model.cc +++ b/tensorflow/compiler/mlir/tfrt/transforms/lower_saved_model.cc @@ -440,8 +440,8 @@ void LowerTFSavedModelPass::HoistInvariantOps(mlir::ModuleOp module) { // "_tfrt_resource_init" is the special function that executes all invariant // ops (eg. read-only variables) used in the model. This function should be // executed after user-specified initialization. - auto init_func_op = builder.create( - module.getLoc(), "_tfrt_resource_init", + auto init_func_op = mlir::func::FuncOp::create( + builder, module.getLoc(), "_tfrt_resource_init", mlir::FunctionType::get(module.getContext(), /*inputs=*/{}, /*results=*/{})); auto *block = init_func_op.addEntryBlock(); @@ -481,8 +481,8 @@ void LowerTFSavedModelPass::HoistInvariantOps(mlir::ModuleOp module) { auto *new_op = new_value.getDefiningOp(); assert(new_op); builder.setInsertionPointAfter(new_op); - auto set_resource_op = builder.create( - new_op->getLoc(), new_value, index); + auto set_resource_op = mlir::TF::_TfrtSetResourceOp::create( + builder, new_op->getLoc(), new_value, index); // Preserve the device attribute. llvm::StringRef device = kCpuDeviceName; @@ -494,7 +494,7 @@ void LowerTFSavedModelPass::HoistInvariantOps(mlir::ModuleOp module) { builder.setInsertionPointToEnd(block); // Finish building the init function by inserting an return op. - builder.create(init_func_op.getLoc()); + mlir::func::ReturnOp::create(builder, init_func_op.getLoc()); // Now that we have the index for each value that will be replaced, we can // create the tf._TfrtGetResource op in each function using these indices. @@ -568,8 +568,8 @@ void LowerTFSavedModelPass::ReplaceHoistedValues( llvm::SmallVector new_values; if (fuse_get_resource_ops_) { - auto get_resource_op = builder.create( - block->getParentOp()->getLoc(), old_values.getTypes(), + auto get_resource_op = mlir::TF::_TfrtGetResourceOp::create( + builder, block->getParentOp()->getLoc(), old_values.getTypes(), builder.getI64ArrayAttr(indices), builder.getStrArrayAttr(shared_name_arr), builder.getStrArrayAttr(container_arr)); @@ -577,8 +577,8 @@ void LowerTFSavedModelPass::ReplaceHoistedValues( new_values = get_resource_op.getResults(); } else { for (int i = 0; i < old_values.size(); ++i) { - auto get_resource_op = builder.create( - block->getParentOp()->getLoc(), + auto get_resource_op = mlir::TF::_TfrtGetResourceOp::create( + builder, block->getParentOp()->getLoc(), mlir::TypeRange(old_values[i].getType()), builder.getI64ArrayAttr(indices[i]), builder.getStrArrayAttr(shared_name_arr[i]), @@ -670,8 +670,8 @@ mlir::LogicalResult ConvertReferenceVariableToResourceVariable( mlir::OpBuilder builder(var_op); - auto var_handle_op = builder.create( - var_op.getLoc(), + auto var_handle_op = mlir::TF::VarHandleOp::create( + builder, var_op.getLoc(), mlir::RankedTensorType::get( {}, mlir::TF::ResourceType::get( llvm::ArrayRef{tensor_type}, @@ -682,8 +682,8 @@ mlir::LogicalResult ConvertReferenceVariableToResourceVariable( // Set insertion point to this identity_op so that the side-effect // visibility is preserved. builder.setInsertionPoint(op); - auto read_var_op = builder.create( - op.getLoc(), op.getType(), var_handle_op); + auto read_var_op = mlir::TF::ReadVariableOp::create( + builder, op.getLoc(), op.getType(), var_handle_op); op.replaceAllUsesWith(read_var_op.getValue()); op.erase(); } @@ -692,8 +692,8 @@ mlir::LogicalResult ConvertReferenceVariableToResourceVariable( // Set the insertion point after the assign op so that all operands are // dominating the newly created op. builder.setInsertionPoint(op); - builder.create(op.getLoc(), var_handle_op, - op.getValue()); + mlir::TF::AssignVariableOp::create(builder, op.getLoc(), var_handle_op, + op.getValue()); op.erase(); } @@ -704,8 +704,8 @@ mlir::LogicalResult ConvertReferenceVariableToResourceVariable( // the newly created op. builder.setInsertionPoint(op); // Create a new read variable op, so that the side-effects are preserved. - auto read_var_op = builder.create( - op->getLoc(), tensor_type, var_handle_op); + auto read_var_op = mlir::TF::ReadVariableOp::create( + builder, op->getLoc(), tensor_type, var_handle_op); op->setOperand(idx, read_var_op.getValue()); } diff --git a/tensorflow/compiler/mlir/tfrt/transforms/merge_tf_if_ops.cc b/tensorflow/compiler/mlir/tfrt/transforms/merge_tf_if_ops.cc index 59f602c0991faf..38737e22d1c588 100644 --- a/tensorflow/compiler/mlir/tfrt/transforms/merge_tf_if_ops.cc +++ b/tensorflow/compiler/mlir/tfrt/transforms/merge_tf_if_ops.cc @@ -225,8 +225,8 @@ class MergeTfIfOpsPass [](mlir::TF::IfOp op) { return op.getIsStateless(); }); // Create the merged tf.If op using the new branches. - auto new_if_op = builder.create( - loc, new_result_types, if_ops.front().getCond(), + auto new_if_op = mlir::TF::IfOp::create( + builder, loc, new_result_types, if_ops.front().getCond(), if_ops.front().getInput(), then_branch_name, else_branch_name, is_stateless); @@ -249,8 +249,8 @@ class MergeTfIfOpsPass llvm::ArrayRef if_ops, llvm::function_ref get_branch) { std::string branch_name = absl::StrCat(branch_prefix, branch_suffix); - auto branch = builder.create(loc, branch_name, - branch_function_type); + auto branch = mlir::func::FuncOp::create(builder, loc, branch_name, + branch_function_type); branch.setVisibility(mlir::func::FuncOp::Visibility::Private); mlir::OpBuilder::InsertionGuard guard(builder); @@ -267,8 +267,9 @@ class MergeTfIfOpsPass for (auto if_op : if_ops) { // Create the call op to the original branch. The arguments are simply // the arguments from the wrapper function. - auto call_op = builder.create( - if_op.getLoc(), if_op.getResultTypes(), block->getArguments(), + auto call_op = mlir::TF::PartitionedCallOp::create( + builder, if_op.getLoc(), if_op.getResultTypes(), + block->getArguments(), /*args_attrs=*/nullptr, /*res_attrs=*/nullptr, get_branch(if_op), empty_string_attr, empty_string_attr, empty_string_attr); @@ -276,7 +277,7 @@ class MergeTfIfOpsPass results.append(call_op.getOutput().begin(), call_op.getOutput().end()); } - builder.create(loc, results); + mlir::func::ReturnOp::create(builder, loc, results); return branch.getSymName(); } diff --git a/tensorflow/compiler/mlir/tfrt/transforms/xla_rewrite_pass.cc b/tensorflow/compiler/mlir/tfrt/transforms/xla_rewrite_pass.cc index 0ed5a6ac1b6a8a..fea7a988bd40d7 100644 --- a/tensorflow/compiler/mlir/tfrt/transforms/xla_rewrite_pass.cc +++ b/tensorflow/compiler/mlir/tfrt/transforms/xla_rewrite_pass.cc @@ -38,15 +38,16 @@ namespace tensorflow { namespace tfrt_compiler { namespace { -struct RewriteStatefulPartitionedCallToXlaLaunchOnCpu - : public mlir::OpRewritePattern { - using OpRewritePattern::OpRewritePattern; +template +struct RewriteFunctionCallToXlaLaunchOnCpu + : public mlir::OpRewritePattern { + public: + using mlir::OpRewritePattern::OpRewritePattern; mlir::LogicalResult matchAndRewrite( - mlir::TF::StatefulPartitionedCallOp op, - mlir::PatternRewriter& rewriter) const override { + OpType op, mlir::PatternRewriter& rewriter) const override { if (auto xla_must_compile = - op->getAttrOfType("_XlaMustCompile"); + op->template getAttrOfType("_XlaMustCompile"); !xla_must_compile || !xla_must_compile.getValue()) { return mlir::failure(); } @@ -92,7 +93,11 @@ struct TfrtXlaRewritePass void runOnOperation() override { mlir::RewritePatternSet patterns(&getContext()); - patterns.add(&getContext()); + patterns + .add>( + &getContext()); + patterns.add>(&getContext()); if (mlir::failed( mlir::applyPatternsGreedily(getOperation(), std::move(patterns)))) { diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/broadcast_propagation_pass.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/broadcast_propagation_pass.cc index 159e630fb8fb16..b0ad89b6b55d24 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/broadcast_propagation_pass.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/broadcast_propagation_pass.cc @@ -280,8 +280,8 @@ DenseMap realizeBroadcastIntents( setInsertionPointToEarliestPointWithAllValuesAvailable( rewriter, parentBlock, ValueRange{it.targetValue, it.outputDimensions}); - realizations[it] = rewriter.create( - it.targetValue.getLoc(), it.resultType, it.targetValue, + realizations[it] = DynamicBroadcastInDimOp::create( + rewriter, it.targetValue.getLoc(), it.resultType, it.targetValue, it.outputDimensions, mlir::cast(it.broadcastDimensions)); continue; diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/embed_tf_framework.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/embed_tf_framework.cc index 200f09c33021b1..18459a9e4e13a8 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/embed_tf_framework.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/embed_tf_framework.cc @@ -95,10 +95,10 @@ struct AllocOpConverter : public OpConversionPattern { alloc, alloc.getType(), *ctx, adaptor.getOperands(), reuse_input_candidates, reuse_output_index); Location loc = buffer.getLoc(); - Value cond = rewriter.create( - loc, rewriter.getIntegerType(1), buffer); - rewriter.create(loc, *ctx, cond, ErrorCode::RESOURCE_EXHAUSTED, - "failed to allocate memory"); + Value cond = IsValidMemRefOp::create(rewriter, loc, + rewriter.getIntegerType(1), buffer); + TFAssertOp::create(rewriter, loc, *ctx, cond, ErrorCode::RESOURCE_EXHAUSTED, + "failed to allocate memory"); return success(); } }; diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/func_to_jit_invocations.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/func_to_jit_invocations.cc index 89d946516f6b9b..59792ae7297ce2 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/func_to_jit_invocations.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/func_to_jit_invocations.cc @@ -65,8 +65,8 @@ LogicalResult RewriteToFullJit(func::FuncOp op) { old_body->getArgumentTypes(), locs); // Create the JIT compile op. - auto jit_compile_op = rewriter.create( - loc, rewriter.getType(), + auto jit_compile_op = tf_framework::JITCompileOp::create( + rewriter, loc, rewriter.getType(), /*ctx=*/mlir::Value()); // Move the original functions operations into the body. @@ -80,18 +80,18 @@ LogicalResult RewriteToFullJit(func::FuncOp op) { Operation *terminator = jit_block->getTerminator(); rewriter.setInsertionPointAfter(terminator); - rewriter.create( - loc, terminator->getOperands().front()); + tf_framework::JITCompileYieldOp::create(rewriter, loc, + terminator->getOperands().front()); terminator->erase(); } // Create JIT execute op. - auto execute = rewriter.create( - loc, op.getResultTypes().front(), /*ctx=*/Value(), + auto execute = tf_framework::JITExecuteOp::create( + rewriter, loc, op.getResultTypes().front(), /*ctx=*/Value(), jit_compile_op.getResult(), new_body->getArguments()); // Create a return. - rewriter.create(loc, execute.getResult()); + func::ReturnOp::create(rewriter, loc, execute.getResult()); return success(); } @@ -111,28 +111,28 @@ LogicalResult RewriteToLargeSizeJit(FuncOp op) { // Create large argument condition. auto arg_1 = new_body->getArgument(0); - auto shape_1 = rewriter.create(loc, arg_1); - auto num_elems_1 = rewriter.create(loc, shape_1); - Value cst_i32_limit = rewriter.create(loc, i32Limit); - Value large_tensor_predicate = rewriter.create( - loc, arith::CmpIPredicate::sgt, num_elems_1, cst_i32_limit); + auto shape_1 = shape::ShapeOfOp::create(rewriter, loc, arg_1); + auto num_elems_1 = shape::NumElementsOp::create(rewriter, loc, shape_1); + Value cst_i32_limit = arith::ConstantIndexOp::create(rewriter, loc, i32Limit); + Value large_tensor_predicate = arith::CmpIOp::create( + rewriter, loc, arith::CmpIPredicate::sgt, num_elems_1, cst_i32_limit); if (new_body->getNumArguments() > 1) { auto arg_2 = new_body->getArgument(1); - auto shape_2 = rewriter.create(loc, arg_2); - auto num_elems_2 = rewriter.create(loc, shape_2); - large_tensor_predicate = rewriter.create( - loc, large_tensor_predicate, + auto shape_2 = shape::ShapeOfOp::create(rewriter, loc, arg_2); + auto num_elems_2 = shape::NumElementsOp::create(rewriter, loc, shape_2); + large_tensor_predicate = arith::OrIOp::create( + rewriter, loc, large_tensor_predicate, // Compare op to check size of the second op - rewriter.create(loc, arith::CmpIPredicate::sgt, - num_elems_2, cst_i32_limit)); + arith::CmpIOp::create(rewriter, loc, arith::CmpIPredicate::sgt, + num_elems_2, cst_i32_limit)); } // Create dispatch code. auto jit_body_builder_fn = [&](OpBuilder &b, Location loc) { // Create JIT compile op. auto callable_ty = b.getType(); - auto jit_compile_op = - b.create(loc, callable_ty, /*ctx=*/Value()); + auto jit_compile_op = tf_framework::JITCompileOp::create( + b, loc, callable_ty, /*ctx=*/Value()); { OpBuilder::InsertionGuard g(b); Block *block = b.createBlock( @@ -144,15 +144,15 @@ LogicalResult RewriteToLargeSizeJit(FuncOp op) { for (auto &op : old_body->without_terminator()) { b.clone(op, bvm); } - b.create( - loc, block->back().getResults().front()); + tf_framework::JITCompileYieldOp::create( + b, loc, block->back().getResults().front()); } // Create JIT execute op. - auto jit_execute_op = b.create( - loc, op.getResultTypes().front(), /*ctx=*/Value(), + auto jit_execute_op = tf_framework::JITExecuteOp::create( + b, loc, op.getResultTypes().front(), /*ctx=*/Value(), jit_compile_op.getResult(), new_body->getArguments()); - b.create(loc, jit_execute_op.getResult()); + scf::YieldOp::create(b, loc, jit_execute_op.getResult()); }; auto aot_body_builder_fn = [&](OpBuilder &b, Location loc) { IRMapping bvm; @@ -161,13 +161,13 @@ LogicalResult RewriteToLargeSizeJit(FuncOp op) { for (auto &op : old_body->without_terminator()) { last_clone = b.clone(op, bvm); } - b.create(loc, last_clone->getResults().front()); + scf::YieldOp::create(b, loc, last_clone->getResults().front()); }; // Create the conditional and return operation. - auto ifOp = rewriter.create( - loc, large_tensor_predicate, jit_body_builder_fn, aot_body_builder_fn); - rewriter.create(loc, ifOp.getResults().front()); + auto ifOp = scf::IfOp::create(rewriter, loc, large_tensor_predicate, + jit_body_builder_fn, aot_body_builder_fn); + func::ReturnOp::create(rewriter, loc, ifOp.getResults().front()); // Remove the old body. rewriter.eraseBlock(old_body); @@ -186,19 +186,19 @@ void PackJITCompileOp(tf_framework::JITCompileOp op, // Temporarily, build the module that would be JIT-compiled. This is only to // obtain the serialized code attribute. auto loc = op->getLoc(); - auto jit_module = rewriter.create(loc); + auto jit_module = ModuleOp::create(rewriter, loc); { OpBuilder::InsertionGuard g(rewriter); rewriter.setInsertionPointToStart(jit_module.SingleBlock::getBody()); - auto jit_function = rewriter.create( - loc, tf_framework::JITCompileFromStrOp::kJITEntryFunctionName, + auto jit_function = func::FuncOp::create( + rewriter, loc, tf_framework::JITCompileFromStrOp::kJITEntryFunctionName, rewriter.getFunctionType(body->getArgumentTypes(), yield_op->getOperandTypes())); jit_function->setAttr(tf_framework::TFFrameworkDialect::kTFEntryAttrName, rewriter.getUnitAttr()); jit_function.getBody().takeBody(op.getBodyRegion()); rewriter.setInsertionPointToEnd(&jit_function.getBody().front()); - rewriter.create(loc, yield_op.getResult()); + func::ReturnOp::create(rewriter, loc, yield_op.getResult()); rewriter.eraseOp(yield_op); } diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/merge_assuming_ops_pass.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/merge_assuming_ops_pass.cc index 4b1d10ca8dd372..66a455ca71c745 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/merge_assuming_ops_pass.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/merge_assuming_ops_pass.cc @@ -73,8 +73,8 @@ struct ShapeReificationPattern : public OpRewritePattern { // Insert cast if needed. if (reifiedShape.getType() != op.getType()) { - reifiedShape = rewriter.create(op.getLoc(), op.getType(), - reifiedShape); + reifiedShape = tensor::CastOp::create(rewriter, op.getLoc(), op.getType(), + reifiedShape); } rewriter.replaceOp(op, reifiedShape); @@ -148,9 +148,9 @@ LogicalResult moveUpIntoAssumingOpMatchAndRewrite(Operation *op, // Insert the rewritten assuming op right before the old one. OpBuilder::InsertionGuard guard(rewriter); rewriter.setInsertionPoint(assumingOp); - auto newAssumingOp = rewriter.create( - assumingOp.getLoc(), assumingOp.getWitness(), - [&](OpBuilder &b, Location) { + auto newAssumingOp = shape::AssumingOp::create( + rewriter, assumingOp.getLoc(), assumingOp.getWitness(), + [&](OpBuilder& b, Location) { // Copy body. IRMapping mapping; for (auto &nested : body->without_terminator()) @@ -304,9 +304,9 @@ struct MoveUpOutOfAssumingOpPattern : public OpRewritePattern { // explicitly as they are assumed to be independent. The assuming op is // rewritten accordingly. SmallVector replacementValues; - auto newAssumingOp = rewriter.create( - assumingOp.getLoc(), assumingOp.getWitness(), - [&](OpBuilder &b, Location) { + auto newAssumingOp = shape::AssumingOp::create( + rewriter, assumingOp.getLoc(), assumingOp.getWitness(), + [&](OpBuilder& b, Location) { // Copy body. IRMapping mapping; for (Operation &nested : body->without_terminator()) { @@ -354,15 +354,16 @@ struct MergeAssumingOpsPattern : public OpRewritePattern { // Merge witnesses. OpBuilder::InsertionGuard guard(rewriter); rewriter.setInsertionPoint(precedingOp); - Value newWitness = rewriter.create( - op.getWitness().getDefiningOp()->getLoc(), + Value newWitness = shape::AssumingAllOp::create( + rewriter, op.getWitness().getDefiningOp()->getLoc(), ValueRange{precedingOp.getWitness(), op.getWitness()}); // Merge assuming ops. Block *body_a = precedingOp.getBody(); Block *body_b = op.getBody(); - auto newAssumingOp = rewriter.create( - precedingOp.getLoc(), newWitness, [&](OpBuilder &b, Location) { + auto newAssumingOp = shape::AssumingOp::create( + rewriter, precedingOp.getLoc(), newWitness, + [&](OpBuilder& b, Location) { // Copy preceding op's body. IRMapping mapping; for (auto &nested : body_a->without_terminator()) { diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/rewrite_tf_framework_assert.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/rewrite_tf_framework_assert.cc index ceda47565bf999..959c56a87982ec 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/rewrite_tf_framework_assert.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/rewrite_tf_framework_assert.cc @@ -49,14 +49,14 @@ class TFAssertOpConverter : public OpConversionPattern { auto func = op->getParentOfType(); Block *error_reporting_block = rewriter.createBlock(&func.getRegion(), {}, {}); - rewriter.create(loc, adaptor.getCtx(), - adaptor.getErrorCode(), adaptor.getMsg()); + ReportErrorOp::create(rewriter, loc, adaptor.getCtx(), + adaptor.getErrorCode(), adaptor.getMsg()); SmallVector null_memrefs; for (auto type : func.getFunctionType().getResults()) { - null_memrefs.push_back(rewriter.create(loc, type)); + null_memrefs.push_back(NullMemRefOp::create(rewriter, loc, type)); } - rewriter.create(loc, null_memrefs); + func::ReturnOp::create(rewriter, loc, null_memrefs); rewriter.restoreInsertionPoint(ip); rewriter.replaceOpWithNewOp( diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tensorflow_abi_knowledge_propagation.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tensorflow_abi_knowledge_propagation.cc index a7d26813239571..2fd419972f4289 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tensorflow_abi_knowledge_propagation.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tensorflow_abi_knowledge_propagation.cc @@ -116,8 +116,8 @@ struct PropagateTfAbiKnowledgeToKernelsPass Value offset = kernel.getArgument(kernel_p + 2); Value &zero = constants[0]; if (!zero) { - zero = b.create(loc, offset.getType(), - b.getIndexAttr(0)); + zero = LLVM::ConstantOp::create(b, loc, offset.getType(), + b.getIndexAttr(0)); } offset.replaceAllUsesWith(zero); } @@ -128,9 +128,9 @@ struct PropagateTfAbiKnowledgeToKernelsPass kernel.getArgument(kernel_p + 2 + memref.getRank() * 2); Value &stride_val = constants[const_stride->second]; if (!stride_val) { - stride_val = b.create( - loc, inner_stride.getType(), - b.getIndexAttr(const_stride->second)); + stride_val = + LLVM::ConstantOp::create(b, loc, inner_stride.getType(), + b.getIndexAttr(const_stride->second)); } inner_stride.replaceAllUsesWith(stride_val); } diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm.cc index 4cbe21b73f62c3..21d477b30547c1 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm.cc @@ -70,25 +70,27 @@ class ConvertToLLVMCallOpPattern : public ConvertOpToLLVMPattern { // If the attribute is missing or empty, set the element count to 0 and // return NULL. if (!attr.has_value() || attr.value().empty()) { - Value zero = rewriter->create( - loc, size_ty, rewriter->getIntegerAttr(size_ty, 0)); - Value null_ptr = rewriter->create(loc, ptr_ty); + Value zero = LLVM::ConstantOp::create( + *rewriter, loc, size_ty, rewriter->getIntegerAttr(size_ty, 0)); + Value null_ptr = LLVM::ZeroOp::create(*rewriter, loc, ptr_ty); return std::make_pair(zero, null_ptr); } // Allocate array to store the elements. auto &array_attr = attr.value(); - Value array_size = rewriter->create( - loc, size_ty, rewriter->getIntegerAttr(size_ty, array_attr.size())); - Value array_ptr = rewriter->create( - loc, ptr_ty, element_ty, array_size, /*alignment=*/0); + Value array_size = LLVM::ConstantOp::create( + *rewriter, loc, size_ty, + rewriter->getIntegerAttr(size_ty, array_attr.size())); + Value array_ptr = LLVM::AllocaOp::create(*rewriter, loc, ptr_ty, element_ty, + array_size, /*alignment=*/0); for (const auto &e : llvm::enumerate(array_attr)) { - Value index = rewriter->create( - loc, size_ty, rewriter->getIntegerAttr(size_ty, e.index())); - Value element_ptr = rewriter->create(loc, ptr_ty, element_ty, - array_ptr, index); + Value index = LLVM::ConstantOp::create( + *rewriter, loc, size_ty, + rewriter->getIntegerAttr(size_ty, e.index())); + Value element_ptr = LLVM::GEPOp::create(*rewriter, loc, ptr_ty, + element_ty, array_ptr, index); Value element = create_element(e.value()); - rewriter->create(loc, element, element_ptr); + LLVM::StoreOp::create(*rewriter, loc, element, element_ptr); } return std::make_pair(array_size, array_ptr); } @@ -101,8 +103,8 @@ class ConvertToLLVMCallOpPattern : public ConvertOpToLLVMPattern { assert(mlir::isa(element_ty) && "expect integer element type"); return ConvertArrayAttrToStackAllocatedArray( loc, size_ty, element_ty, attr, rewriter, [&](Attribute attr) { - return rewriter->create( - loc, element_ty, + return LLVM::ConstantOp::create( + *rewriter, loc, element_ty, rewriter->getIntegerAttr(element_ty, mlir::cast(attr).getInt())); }); @@ -136,8 +138,8 @@ class TFAllocOpConverter : public ConvertToLLVMCallOpPattern { // Convert `output_index` or set it to -1 if the attribute is missing. Type llvmInt32Type = IntegerType::get(rewriter.getContext(), 32); - Value output_index = rewriter.create( - loc, llvmInt32Type, + Value output_index = LLVM::ConstantOp::create( + rewriter, loc, llvmInt32Type, rewriter.getI32IntegerAttr(tf_alloc_op.getOutputIndex().has_value() ? tf_alloc_op.getOutputIndex().value() : -1)); @@ -152,12 +154,11 @@ class TFAllocOpConverter : public ConvertToLLVMCallOpPattern { FlatSymbolRefAttr tf_func_ref = GetOrInsertLLVMFunction(GetFuncName(), GetFuncType(), op, &rewriter); Value allocated_byte_ptr = - rewriter - .create( - loc, getVoidPtrType(), tf_func_ref, - llvm::ArrayRef({adaptor.getCtx(), num_elements, element_size, - output_index, candidates_count_and_ptr.first, - candidates_count_and_ptr.second})) + LLVM::CallOp::create( + rewriter, loc, getVoidPtrType(), tf_func_ref, + llvm::ArrayRef({adaptor.getCtx(), num_elements, element_size, + output_index, candidates_count_and_ptr.first, + candidates_count_and_ptr.second})) .getResult(); MemRefDescriptor memRefDescriptor = CreateMemRefDescriptor( @@ -213,7 +214,7 @@ class TFAllocOpConverter : public ConvertToLLVMCallOpPattern { // Update stride if (pos > 0) { stride_carried = - rewriter.create(loc, stride_carried, size); + LLVM::MulOp::create(rewriter, loc, stride_carried, size); } } return memref_desc; @@ -272,12 +273,12 @@ class JITCompileFromStrOpConverter ConvertIntegerArrayAttrToStackAllocatedArray( loc, rewriter.getI64Type(), rewriter.getI64Type(), op.getUnrollFactors(), &rewriter); - Value enable_ftz = rewriter.create( - loc, rewriter.getI1Type(), op.getEnableFtzAttr()); - Value index_64bit = rewriter.create( - loc, rewriter.getI1Type(), op.getIndex64BitAttr()); - Value cpu_codegen = rewriter.create( - loc, rewriter.getI1Type(), op.getCpuCodegenAttr()); + Value enable_ftz = LLVM::ConstantOp::create( + rewriter, loc, rewriter.getI1Type(), op.getEnableFtzAttr()); + Value index_64bit = LLVM::ConstantOp::create( + rewriter, loc, rewriter.getI1Type(), op.getIndex64BitAttr()); + Value cpu_codegen = LLVM::ConstantOp::create( + rewriter, loc, rewriter.getI1Type(), op.getCpuCodegenAttr()); FlatSymbolRefAttr tf_func_ref = GetOrInsertLLVMFunction(GetFuncName(), GetFuncType(), op, &rewriter); rewriter.replaceOpWithNewOp( @@ -327,40 +328,39 @@ class JITExecuteOpConverter : public ConvertToLLVMCallOpPattern { getTypeConverter()->convertType(op->getResultTypes().front()); Type ptr_ty = LLVM::LLVMPointerType::get(getContext()); Type i64_ty = rewriter.getI64Type(); - Value one = rewriter.create( - loc, i64_ty, rewriter.getI64IntegerAttr(1)); + Value one = LLVM::ConstantOp::create(rewriter, loc, i64_ty, + rewriter.getI64IntegerAttr(1)); auto result_ptr = - rewriter.create(loc, ptr_ty, result_ty, one); + LLVM::AllocaOp::create(rewriter, loc, ptr_ty, result_ty, one); // Pass the buffer arguments as a stack-allocated array. Type args_elem_ty = adaptor.getInputs().front().getType(); - Value num_args = rewriter.create( - loc, i64_ty, + Value num_args = LLVM::ConstantOp::create( + rewriter, loc, i64_ty, rewriter.getI64IntegerAttr( static_cast(adaptor.getInputs().size()))); Value args_ptr = - rewriter.create(loc, ptr_ty, args_elem_ty, num_args, - /*alignment=*/0); + LLVM::AllocaOp::create(rewriter, loc, ptr_ty, args_elem_ty, num_args, + /*alignment=*/0); for (const auto &it : llvm::enumerate(adaptor.getInputs())) { - Value index = rewriter.create( - loc, i64_ty, rewriter.getI64IntegerAttr(it.index())); - Value element_ptr = rewriter.create( - loc, ptr_ty, args_elem_ty, args_ptr, index); - rewriter.create(loc, it.value(), element_ptr); + Value index = LLVM::ConstantOp::create( + rewriter, loc, i64_ty, rewriter.getI64IntegerAttr(it.index())); + Value element_ptr = LLVM::GEPOp::create(rewriter, loc, ptr_ty, + args_elem_ty, args_ptr, index); + LLVM::StoreOp::create(rewriter, loc, it.value(), element_ptr); } // Materialize runtime call. FlatSymbolRefAttr tf_func_ref = GetOrInsertLLVMFunction(GetFuncName(), GetFuncType(), op, &rewriter); - rewriter.create( - loc, mlir::TypeRange(), tf_func_ref, - ValueRange{adaptor.getCtx(), adaptor.getCallable(), result_ptr, - num_args, args_ptr}); + LLVM::CallOp::create(rewriter, loc, mlir::TypeRange(), tf_func_ref, + ValueRange{adaptor.getCtx(), adaptor.getCallable(), + result_ptr, num_args, args_ptr}); // Copy result (including the descriptor) to a stack-allocated buffer and // free the old descriptor. llvm::SmallVector final_result = { - rewriter.create(loc, result_ty, result_ptr)}; + LLVM::LoadOp::create(rewriter, loc, result_ty, result_ptr)}; if (failed(copyUnrankedDescriptors(rewriter, loc, op->getResultTypes(), final_result, /*toDynamic=*/false))) { @@ -402,8 +402,8 @@ class ReportErrorOpConverter // Insert function call. FlatSymbolRefAttr tf_func_ref = GetOrInsertLLVMFunction(GetFuncName(), GetFuncType(), op, &rewriter); - Value error_code = rewriter.create( - loc, typeConverter->convertType(rewriter.getI32Type()), + Value error_code = LLVM::ConstantOp::create( + rewriter, loc, typeConverter->convertType(rewriter.getI32Type()), adaptor.getErrorCodeAttr()); rewriter.replaceOpWithNewOp( op, mlir::TypeRange(), tf_func_ref, @@ -489,7 +489,7 @@ class NullMemRefOpConverter : public ConvertOpToLLVMPattern { // Prepare packed args [allocatedPtr, alignedPtr, offset, sizes, strides] // to create a memref descriptor. - Value null = rewriter.create(loc, llvm_ptr_type); + Value null = LLVM::ZeroOp::create(rewriter, loc, llvm_ptr_type); SmallVector packed_values{null, null, zero}; packed_values.append(sizes); packed_values.append(strides); @@ -518,11 +518,12 @@ class NullMemRefOpConverter : public ConvertOpToLLVMPattern { // setting its pointer to NULL. Value alloca_size = UnrankedMemRefDescriptor::computeSize( rewriter, loc, *getTypeConverter(), desc, addressSpace); - Value underlying_desc_ptr = rewriter.create( - loc, getVoidPtrType(), IntegerType::get(getContext(), 8), alloca_size); + Value underlying_desc_ptr = + LLVM::AllocaOp::create(rewriter, loc, getVoidPtrType(), + IntegerType::get(getContext(), 8), alloca_size); // Populate underlying ranked descriptor. - Value null = rewriter.create(loc, llvm_ptr_type); + Value null = LLVM::ZeroOp::create(rewriter, loc, llvm_ptr_type); UnrankedMemRefDescriptor::setAllocatedPtr( rewriter, loc, underlying_desc_ptr, llvm_ptr_type, null); UnrankedMemRefDescriptor::setAlignedPtr(rewriter, loc, *getTypeConverter(), @@ -551,21 +552,23 @@ class IsValidMemRefOpConverter // Compare every size in the descriptor to 0 to check num_elements == 0. int64_t rank = mlir::cast(op.getArg().getType()).getRank(); - Value is_empty_shape = rewriter.create( - loc, rewriter.getI1Type(), rewriter.getBoolAttr(false)); + Value is_empty_shape = LLVM::ConstantOp::create( + rewriter, loc, rewriter.getI1Type(), rewriter.getBoolAttr(false)); Value zero = createIndexAttrConstant(rewriter, loc, getIndexType(), 0); for (int i = 0; i < rank; ++i) { Value size = desc.size(rewriter, loc, i); - Value is_zero_size = rewriter.create( - loc, rewriter.getI1Type(), LLVM::ICmpPredicate::eq, size, zero); + Value is_zero_size = + LLVM::ICmpOp::create(rewriter, loc, rewriter.getI1Type(), + LLVM::ICmpPredicate::eq, size, zero); is_empty_shape = - rewriter.create(loc, is_empty_shape, is_zero_size); + LLVM::OrOp::create(rewriter, loc, is_empty_shape, is_zero_size); } Value ptr = desc.allocatedPtr(rewriter, loc); - Value null = rewriter.create(loc, getVoidPtrType()); - Value is_not_nullptr = rewriter.create( - loc, rewriter.getI1Type(), LLVM::ICmpPredicate::ne, ptr, null); + Value null = LLVM::ZeroOp::create(rewriter, loc, getVoidPtrType()); + Value is_not_nullptr = + LLVM::ICmpOp::create(rewriter, loc, rewriter.getI1Type(), + LLVM::ICmpPredicate::ne, ptr, null); // Valid memref = ptr != NULL || num_elements == 0; rewriter.replaceOpWithNewOp(op, is_not_nullptr, is_empty_shape); diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_kernel_to_llvm_pass.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_kernel_to_llvm_pass.cc index ff19510805fe50..e51a397363e01e 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_kernel_to_llvm_pass.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_kernel_to_llvm_pass.cc @@ -115,27 +115,28 @@ Value ConvertLaunchFuncOpToTfRuntimeCallPattern::generateParamsArray( for (auto argument : arguments) argument_types.push_back(argument.getType()); auto struct_type = LLVM::LLVMStructType::getNewIdentified( context_, StringRef(), argument_types); - auto one = builder.create(loc, llvm_int32_type_, - builder.getI32IntegerAttr(1)); - auto struct_ptr = builder.create( - loc, llvm_pointer_type_, struct_type, one, /*alignment=*/0); - auto array_size = builder.create( - loc, llvm_int32_type_, builder.getI32IntegerAttr(num_arguments)); - auto array_ptr = builder.create( - loc, llvm_pointer_type_, llvm_pointer_type_, array_size, /*alignment=*/0); - auto zero = builder.create(loc, llvm_int32_type_, - builder.getI32IntegerAttr(0)); + auto one = LLVM::ConstantOp::create(builder, loc, llvm_int32_type_, + builder.getI32IntegerAttr(1)); + auto struct_ptr = LLVM::AllocaOp::create(builder, loc, llvm_pointer_type_, + struct_type, one, /*alignment=*/0); + auto array_size = LLVM::ConstantOp::create( + builder, loc, llvm_int32_type_, builder.getI32IntegerAttr(num_arguments)); + auto array_ptr = + LLVM::AllocaOp::create(builder, loc, llvm_pointer_type_, + llvm_pointer_type_, array_size, /*alignment=*/0); + auto zero = LLVM::ConstantOp::create(builder, loc, llvm_int32_type_, + builder.getI32IntegerAttr(0)); for (auto en : llvm::enumerate(arguments)) { - auto index = builder.create( - loc, llvm_int32_type_, builder.getI32IntegerAttr(en.index())); - auto field_ptr = builder.create( - loc, llvm_pointer_type_, struct_type, struct_ptr, + auto index = LLVM::ConstantOp::create( + builder, loc, llvm_int32_type_, builder.getI32IntegerAttr(en.index())); + auto field_ptr = LLVM::GEPOp::create( + builder, loc, llvm_pointer_type_, struct_type, struct_ptr, ArrayRef{zero, index.getResult()}); - builder.create(loc, en.value(), field_ptr); + LLVM::StoreOp::create(builder, loc, en.value(), field_ptr); auto element_ptr = - builder.create(loc, llvm_pointer_type_, llvm_pointer_type_, - array_ptr, index.getResult()); - builder.create(loc, field_ptr, element_ptr); + LLVM::GEPOp::create(builder, loc, llvm_pointer_type_, + llvm_pointer_type_, array_ptr, index.getResult()); + LLVM::StoreOp::create(builder, loc, field_ptr, element_ptr); } return array_ptr; } @@ -220,11 +221,11 @@ LogicalResult ConvertLaunchFuncOpToTfRuntimeCallPattern::matchAndRewrite( }); rewriter.setInsertionPointToStart( launch_op->getParentOfType().getBody()); - function = rewriter.create( - loc, kTfWrapperLibaryLaunchHelperName, function_type); + function = LLVM::LLVMFuncOp::create( + rewriter, loc, kTfWrapperLibaryLaunchHelperName, function_type); } - rewriter.create( - loc, TypeRange(), mlir::SymbolRefAttr::get(function), + LLVM::CallOp::create( + rewriter, loc, TypeRange(), mlir::SymbolRefAttr::get(function), ArrayRef{context_arg, module_blob, kernel_name_global, adaptor.getGridSizeX(), adaptor.getGridSizeY(), diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/utils.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/utils.cc index b3cb73b78baf20..a6ee71bfed73b8 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/utils.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/utils.cc @@ -35,7 +35,7 @@ FlatSymbolRefAttr GetOrInsertLLVMFunction(StringRef func_name, Type func_type, if (!tf_func) { OpBuilder::InsertionGuard guard(*b); b->setInsertionPointToStart(module.getBody()); - tf_func = b->create(b->getUnknownLoc(), func_name, func_type); + tf_func = LLVMFuncOp::create(*b, b->getUnknownLoc(), func_name, func_type); } return SymbolRefAttr::get(b->getContext(), func_name); } @@ -55,11 +55,12 @@ Value CreateOrFindGlobalStringConstant(Location loc, StringRef global_name, StringRef symbol_name = global_op.getName(); Type symbol_type = global_op.getType(); Type ptr_type = LLVM::LLVMPointerType::get(b->getContext()); - Value global_ptr = b->create(loc, ptr_type, symbol_name); + Value global_ptr = + LLVM::AddressOfOp::create(*b, loc, ptr_type, symbol_name); Value c0 = - b->create(loc, b->getI64Type(), b->getIndexAttr(0)); - return b->create(loc, ptr_type, symbol_type, global_ptr, - ValueRange{c0, c0}); + LLVM::ConstantOp::create(*b, loc, b->getI64Type(), b->getIndexAttr(0)); + return LLVM::GEPOp::create(*b, loc, ptr_type, symbol_type, global_ptr, + ValueRange{c0, c0}); } return LLVM::createGlobalString(loc, *b, global_name, content, LLVM::Linkage::Internal); diff --git a/tensorflow/compiler/mlir/tools/optimize/BUILD b/tensorflow/compiler/mlir/tools/optimize/BUILD index 6a3cc301bc24ca..d7bece21567fdf 100644 --- a/tensorflow/compiler/mlir/tools/optimize/BUILD +++ b/tensorflow/compiler/mlir/tools/optimize/BUILD @@ -17,9 +17,7 @@ cc_library( deps = [ "//tensorflow/compiler/mlir/tools:safe_cast", "//tensorflow/core/framework:tensor_shape", - "//tensorflow/core/platform:logging", - "//tensorflow/core/platform:macros", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/types:span", - "@local_xla//xla/tsl/platform:status", ], ) diff --git a/tensorflow/compiler/mlir/tools/optimize/quantization_utils.cc b/tensorflow/compiler/mlir/tools/optimize/quantization_utils.cc index 09a5b928a2622f..5e0ec1ccac3b9f 100644 --- a/tensorflow/compiler/mlir/tools/optimize/quantization_utils.cc +++ b/tensorflow/compiler/mlir/tools/optimize/quantization_utils.cc @@ -21,9 +21,9 @@ limitations under the License. #include #include +#include "absl/log/check.h" #include "absl/types/span.h" #include "tensorflow/compiler/mlir/tools/safe_cast.h" -#include "xla/tsl/platform/status.h" #include "tensorflow/core/framework/tensor_shape.h" namespace tflite_migration { @@ -92,13 +92,13 @@ void SymmetricPerChannelQuantizeValues(const float* const input, // Quantize the values. int indices[kPerChannelMaxDim]; tensorflow::TensorShape unextended_shape; - TF_CHECK_OK(tensorflow::TensorShapeUtils::MakeShape(absl::MakeSpan(dimension), - &unextended_shape)); + CHECK_OK(tensorflow::TensorShapeUtils::MakeShape(absl::MakeSpan(dimension), + &unextended_shape)); tensorflow::TensorShape shape; for (int i = 0; i < kPerChannelMaxDim - unextended_shape.dims(); ++i) { - TF_CHECK_OK(shape.AddDimWithStatus(1)); + CHECK_OK(shape.AddDimWithStatus(1)); } - TF_CHECK_OK(shape.AppendShapeWithStatus(unextended_shape)); + CHECK_OK(shape.AppendShapeWithStatus(unextended_shape)); channel_dim_index += kPerChannelMaxDim - unextended_shape.dims(); for (indices[0] = 0; indices[0] < shape.dim_size(0); indices[0]++) { diff --git a/tensorflow/compiler/mlir/tosa/tests/convert-tfl-uint8.mlir b/tensorflow/compiler/mlir/tosa/tests/convert-tfl-uint8.mlir index 02e9c0649e3f78..cd9a2dcdf746fd 100644 --- a/tensorflow/compiler/mlir/tosa/tests/convert-tfl-uint8.mlir +++ b/tensorflow/compiler/mlir/tosa/tests/convert-tfl-uint8.mlir @@ -1,4 +1,4 @@ -// RUN: tf-tosa-opt --tosa-convert-tfl-uint8 --verify-each %s | FileCheck %s +// RUN: tf-tosa-opt --tosa-convert-tfl-uint8 --verify-diagnostics --verify-each %s | FileCheck %s // Operations for testing --tosa-convert-tfl-uint8 @@ -28,3 +28,20 @@ func.func @test_cast_ui8(%arg0: tensor<1x256x256x3x!quant.uniform>) -> tensor<1x256x256x3xf32> func.return %0 : tensor<1x256x256x3xf32> } + +// ---- + +// CHECK-LABEL: test_error_tosa_ops +func.func @test_error_tosa_ops(%arg0: tensor<5x10xi8>) -> (tensor<5x10xi8>, none) { + + // Dummy use to TFL dialect to load TFL dialect in MLIR context + %0 = "tfl.no_value"() <{value}> : () -> none + + // expected-error @+1 {{tosa operations are not expected in this pass. Run tosa-convert-tfl-uint8 before tosa-legalize-tfl}} + %cst1 = "tosa.const"() <{values = dense<1> : tensor<5x10xi8>}> : () -> tensor<5x10xi8> + // expected-error @+1 {{tosa operations are not expected in this pass. Run tosa-convert-tfl-uint8 before tosa-legalize-tfl}} + %1 = "tosa.add"(%arg0, %cst1) : (tensor<5x10xi8>, tensor<5x10xi8>) -> tensor<5x10xi8> + + + func.return %1, %0 : tensor<5x10xi8>, none +} \ No newline at end of file diff --git a/tensorflow/compiler/mlir/tosa/tests/tfl-to-tosa-pipeline.mlir b/tensorflow/compiler/mlir/tosa/tests/tfl-to-tosa-pipeline.mlir index 0b6dd410c57d9a..78e616d8967bb6 100644 --- a/tensorflow/compiler/mlir/tosa/tests/tfl-to-tosa-pipeline.mlir +++ b/tensorflow/compiler/mlir/tosa/tests/tfl-to-tosa-pipeline.mlir @@ -2930,6 +2930,21 @@ func.func @test_relu_qi8(%arg0: tensor<13x21x3x!quant.uniform>) -> tensor> { +// CHECK: %[[VAL_0:.*]] = "tosa.const"() <{values = dense<1073741824> : tensor<1xi32>}> : () -> tensor<1xi32> +// CHECK: %[[VAL_1:.*]] = "tosa.const"() <{values = dense<30> : tensor<1xi8>}> : () -> tensor<1xi8> +// CHECK: %[[VAL_2:.*]] = "tosa.const"() <{values = dense<0> : tensor<1xui16>}> : () -> tensor<1xui16> +// CHECK: %[[RESCALE_0:.*]] = tosa.rescale %[[ARG0]], %[[VAL_0]], %[[VAL_1]], %[[VAL_2]], %[[VAL_2]] {input_unsigned = true, output_unsigned = true, per_channel = false, rounding_mode = SINGLE_ROUND, scale32 = true} : (tensor>, tensor<1xi32>, tensor<1xi8>, tensor<1xui16>, tensor<1xui16>) -> tensor> +// CHECK: %[[CLAMP_0:.*]] = tosa.clamp %[[RESCALE_0]] {max_val = 65535 : ui16, min_val = 0 : ui16} : (tensor>) -> tensor> +// CHECK: return %[[CLAMP_0]] +func.func @test_relu_qu16(%arg0:tensor>) -> (tensor>) { + %0 = "tfl.relu"(%arg0) : (tensor>) -> tensor> + return %0 : tensor> +} + +// ----- + // CHECK-LABEL: test_relu0To1_qi8 // CHECK-DAG: %[[VAL_0:.*]]: tensor<13x21x3x!quant.uniform> // CHECK-DAG: %[[VAL_1:.*]] = "tosa.const"() <{values = dense<2147449478> : tensor<1xi32>}> @@ -3267,6 +3282,21 @@ func.func @test_fullyconnected_dynamic_output(%arg0: tensor<1x2048xf32>, %arg1: // ----- +// CHECK-LABEL: @test_fullyconnected_dynamic_batch +func.func @test_fullyconnected_dynamic_batch(%arg0: tensor, %arg1: tensor<256x512xf32>, %arg2: tensor<256xf32>) -> tensor { + // CHECK-DAG: %[[OUT_SHAPE:.*]] = tosa.const_shape {values = dense<[-1, 256]> : tensor<2xindex>} : () -> !tosa.shape<2> + // CHECK-DAG: %[[FILTER_SHAPE:.*]] = tosa.const_shape {values = dense<[256, 1, 1, 512]> : tensor<4xindex>} : () -> !tosa.shape<4> + // CHECK-DAG: %[[IN_SHAPE:.*]] = tosa.const_shape {values = dense<[-1, 1, 1, 512]> : tensor<4xindex>} : () -> !tosa.shape<4> + // CHECK: %[[RESHAPE_IN:.*]] = tosa.reshape %arg0, %[[IN_SHAPE]] + // CHECK: %[[RESHAPE_FILTER:.*]] = tosa.reshape %arg1, %[[FILTER_SHAPE]] + // CHECK: %[[CONV:.*]] = tosa.conv2d %[[RESHAPE_IN]], %[[RESHAPE_FILTER]], %arg2, {{.*}}, {{.*}} + // CHECK: tosa.reshape %[[CONV]], %[[OUT_SHAPE]] + %0 = "tfl.fully_connected"(%arg0, %arg1, %arg2) {fused_activation_function = "NONE", keep_num_dims = false, weights_format = "DEFAULT"} : (tensor, tensor<256x512xf32>, tensor<256xf32>) -> tensor + func.return %0 : tensor +} + +// ----- + // CHECK-LABEL: @test_fullyconnected_keep_dims func.func @test_fullyconnected_keep_dims(%arg0: tensor<1x64x64x768x!quant.uniform>, %arg1: tensor<3072x768x!quant.uniform:f32, 0.003333511995151639>>, %arg2: tensor<3072x!quant.uniform>) -> tensor<1x64x64x3072x!quant.uniform> { // CHECK-DAG: %[[CONST_SHAPE0:.*]] = tosa.const_shape {values = dense<[1, 64, 64, 3072]> : tensor<4xindex>} @@ -3684,6 +3714,24 @@ func.func @test_conv2d_int8_input_variable_bias(%input: tensor<1x32x32x8x!quant. // ----- +// CHECK-LABEL: func.func @test_conv2d_qu16( +// CHECK-SAME: %[[ARG0:.*]]: tensor<1x32x32x8x!quant.uniform>, +// CHECK-SAME: %[[ARG1:.*]]: tensor<3x3x8x16x!quant.uniform>) -> tensor<1x32x32x3x!quant.uniform> { +// CHECK: %[[VAL_0:.*]] = "tosa.const"() <{values = dense<14> : tensor<1xi8>}> : () -> tensor<1xi8> +// CHECK: %[[VAL_1:.*]] = "tosa.const"() <{values = dense<16384> : tensor<1xi16>}> : () -> tensor<1xi16> +// CHECK: %[[VAL_2:.*]] = "tosa.const"() <{values = dense<0> : tensor<1xi48>}> : () -> tensor<1xi48> +// CHECK: %[[VAL_3:.*]] = "tosa.const"() <{values = dense<0> : tensor<1xui16>}> : () -> tensor<1xui16> +// CHECK: %[[VAL_4:.*]] = "tosa.const"() <{values = dense<0> : tensor<1xi8>}> : () -> tensor<1xi8> +// CHECK: %[[VAL_5:.*]] = tosa.conv2d %[[ARG0]], %[[ARG1]], %[[VAL_2]], %[[VAL_3]], %[[VAL_4]] {acc_type = i48, dilation = array, pad = array, stride = array} : (tensor<1x32x32x8x!quant.uniform>, tensor<3x3x8x16x!quant.uniform>, tensor<1xi48>, tensor<1xui16>, tensor<1xi8>) -> tensor<1x32x32x3xi48> +// CHECK: %[[RESCALE_0:.*]] = tosa.rescale %[[VAL_5]], %[[VAL_1]], %[[VAL_0]], %[[VAL_2]], %[[VAL_3]] {input_unsigned = true, output_unsigned = true, per_channel = false, rounding_mode = SINGLE_ROUND, scale32 = false} : (tensor<1x32x32x3xi48>, tensor<1xi16>, tensor<1xi8>, tensor<1xi48>, tensor<1xui16>) -> tensor<1x32x32x3x!quant.uniform> +// CHECK: return %[[RESCALE_0]] +func.func @test_conv2d_qu16(%input: tensor<1x32x32x8x!quant.uniform>, %filter: tensor<3x3x8x16x!quant.uniform>) -> tensor<1x32x32x3x!quant.uniform> { + %bias = "tfl.no_value"() {value} : () -> none + %0 = "tfl.conv_2d"(%input, %filter, %bias) {dilation_h_factor = 1 : i32, dilation_w_factor = 1 : i32, fused_activation_function = "NONE", padding = "SAME", stride_h = 1 : i32, stride_w = 1 : i32} : (tensor<1x32x32x8x!quant.uniform>, tensor<3x3x8x16x!quant.uniform>, none) -> tensor<1x32x32x3x!quant.uniform> + return %0 : tensor<1x32x32x3x!quant.uniform> +} +// ----- + // CHECK-LABEL: @test_squeeze func.func @test_squeeze(%arg0: tensor<2x1x3x1xf32>) -> tensor<2x3x1xf32> { // CHECK: tosa.reshape diff --git a/tensorflow/compiler/mlir/tosa/tests/tfl-unequal-ranks.mlir b/tensorflow/compiler/mlir/tosa/tests/tfl-unequal-ranks.mlir index c4d07792549543..7805fdd9742f11 100644 --- a/tensorflow/compiler/mlir/tosa/tests/tfl-unequal-ranks.mlir +++ b/tensorflow/compiler/mlir/tosa/tests/tfl-unequal-ranks.mlir @@ -13,6 +13,15 @@ func.func @test_add(%arg0: tensor<192x192x3xf32>, %arg1: tensor<16x192x192x3xf32 // ----- +// CHECK-LABEL: test_add_dynamic +func.func @test_add_dynamic(%arg0: tensor, %arg1: tensor<5xf32>) -> tensor { + // CHECK: tosa.add + %1 = tfl.add(%arg0, %arg1) {fused_activation_function = "NONE"} : (tensor, tensor<5xf32>) -> tensor + func.return %1 : tensor +} + +// ----- + // CHECK-LABEL: test_add_qi8 func.func @test_add_qi8(%arg0: tensor<13x21x1x!quant.uniform>, %arg1: tensor<1x13x21x3x!quant.uniform>) -> tensor<1x13x21x3x!quant.uniform> { // CHECK: tosa.add diff --git a/tensorflow/compiler/mlir/tosa/transforms/convert_tfl_uint8.cc b/tensorflow/compiler/mlir/tosa/transforms/convert_tfl_uint8.cc index afd66102b8a29c..6edfc57ad8a89a 100644 --- a/tensorflow/compiler/mlir/tosa/transforms/convert_tfl_uint8.cc +++ b/tensorflow/compiler/mlir/tosa/transforms/convert_tfl_uint8.cc @@ -264,7 +264,8 @@ LogicalResult convert_graph_uint8_tensor(mlir::MLIRContext &context, // Convert intermediate tensor. for (auto &op : bb) { if (llvm::dyn_cast(&op)) { - continue; // Skip if the operation is a tosa::ConstOp + // Skip tosa const ops created during rescaling. + continue; } for (Value output_val : op.getResults()) { @@ -355,6 +356,13 @@ void ConvertUint8ToInt8::runOnOperation() { auto &ctx = getContext(); mlir::func::FuncOp func = getOperation(); + func.walk([&](Operation *op) { + if (isa(op)){ + // Run this before calling convert_graph_uint8_tensor as rescaling introduces tosa ops + op->emitError("tosa operations are not expected in this pass. Run tosa-convert-tfl-uint8 before tosa-legalize-tfl"); + } + }); + // Convert uint8 const tensor. const needs to be handled specifically. patterns.add(&ctx); (void)applyPatternsGreedily(func, std::move(patterns)); diff --git a/tensorflow/compiler/mlir/tosa/transforms/legalize_common.cc b/tensorflow/compiler/mlir/tosa/transforms/legalize_common.cc index 803061fe56adaf..a2aaa3b905f87f 100644 --- a/tensorflow/compiler/mlir/tosa/transforms/legalize_common.cc +++ b/tensorflow/compiler/mlir/tosa/transforms/legalize_common.cc @@ -4774,7 +4774,7 @@ std::optional convertOneHotOp(PatternRewriter& rewriter, Operation* op, tensorflow::GetTypeFromTFTensorShape({N, W, C}, on_value_type.getElementType()), op1_reshape_on_value.getResult(), - getTosaConstShape(rewriter, op, {N, W, C})); + getTosaConstShape(rewriter, op->getLoc(), {N, W, C})); // Reshape off_value to [1, 1, 1] auto op3_reshape_off_value = CreateOpAndInfer( @@ -4789,7 +4789,7 @@ std::optional convertOneHotOp(PatternRewriter& rewriter, Operation* op, tensorflow::GetTypeFromTFTensorShape({N, K, C}, on_value_type.getElementType()), op3_reshape_off_value.getResult(), - getTosaConstShape(rewriter, op, {N, K, C})); + getTosaConstShape(rewriter, op->getLoc(), {N, K, C})); // Reshape indices to [N, W] shape_value = diff --git a/tensorflow/compiler/mlir/tosa/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/tosa/transforms/legalize_tf.cc index 9d227f75bad616..43a22266bcb0c6 100644 --- a/tensorflow/compiler/mlir/tosa/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/tosa/transforms/legalize_tf.cc @@ -1558,7 +1558,7 @@ LogicalResult ConvertTFTileOp::matchAndRewrite( multiples_vals.push_back( multiples_elems.getValues()[i].getInt()); - auto multiples = getTosaConstShape(rewriter, op, multiples_vals); + auto multiples = getTosaConstShape(rewriter, op->getLoc(), multiples_vals); CreateReplaceOpAndInfer(rewriter, op, output_type, tf_tile_op.getInput(), multiples); diff --git a/tensorflow/compiler/mlir/tosa/transforms/legalize_tfl.cc b/tensorflow/compiler/mlir/tosa/transforms/legalize_tfl.cc index 37a9f4234d992a..b5e19e35e9d40a 100644 --- a/tensorflow/compiler/mlir/tosa/transforms/legalize_tfl.cc +++ b/tensorflow/compiler/mlir/tosa/transforms/legalize_tfl.cc @@ -31,25 +31,26 @@ limitations under the License. #include #include "llvm/ADT/ArrayRef.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project -#include "mlir/Dialect/Quant/IR/QuantTypes.h" // from @llvm-project -#include "mlir/Dialect/Tosa/IR/TosaOps.h" // from @llvm-project +#include "mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project +#include "mlir/Dialect/Quant/IR/QuantTypes.h" // from @llvm-project +#include "mlir/Dialect/Tosa/IR/TosaOps.h" // from @llvm-project #include "mlir/Dialect/Tosa/Utils/ConversionUtils.h" // from @llvm-project -#include "mlir/IR/Block.h" // from @llvm-project +#include "mlir/Dialect/Tosa/Utils/QuantUtils.h" +#include "mlir/IR/Block.h" // from @llvm-project #include "mlir/IR/BuiltinAttributeInterfaces.h" // from @llvm-project -#include "mlir/IR/BuiltinAttributes.h" // from @llvm-project -#include "mlir/IR/BuiltinTypeInterfaces.h" // from @llvm-project -#include "mlir/IR/BuiltinTypes.h" // from @llvm-project -#include "mlir/IR/MLIRContext.h" // from @llvm-project -#include "mlir/IR/Matchers.h" // from @llvm-project -#include "mlir/IR/PatternMatch.h" // from @llvm-project -#include "mlir/IR/Region.h" // from @llvm-project -#include "mlir/IR/TypeUtilities.h" // from @llvm-project -#include "mlir/IR/Types.h" // from @llvm-project -#include "mlir/IR/Value.h" // from @llvm-project -#include "mlir/IR/ValueRange.h" // from @llvm-project -#include "mlir/Support/LLVM.h" // from @llvm-project -#include "mlir/Support/LogicalResult.h" // from @llvm-project +#include "mlir/IR/BuiltinAttributes.h" // from @llvm-project +#include "mlir/IR/BuiltinTypeInterfaces.h" // from @llvm-project +#include "mlir/IR/BuiltinTypes.h" // from @llvm-project +#include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/IR/Matchers.h" // from @llvm-project +#include "mlir/IR/PatternMatch.h" // from @llvm-project +#include "mlir/IR/Region.h" // from @llvm-project +#include "mlir/IR/TypeUtilities.h" // from @llvm-project +#include "mlir/IR/Types.h" // from @llvm-project +#include "mlir/IR/Value.h" // from @llvm-project +#include "mlir/IR/ValueRange.h" // from @llvm-project +#include "mlir/Support/LLVM.h" // from @llvm-project +#include "mlir/Support/LogicalResult.h" // from @llvm-project #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h" #include "tensorflow/compiler/mlir/lite/quantization/ir/QuantOps.h" #include "tensorflow/compiler/mlir/tensorflow/utils/dynamic_shape_utils.h" @@ -359,7 +360,8 @@ LogicalResult ConvertTFLReluOp::matchAndRewrite( auto element_type = input_type.getElementType(); if (auto quant_type = dyn_cast(element_type)) { - element_type = quant_type.getStorageType(); + element_type = + tosa::getStorageElementTypeFromQuantized(quant_type); } mlir::Attribute min_val, max_val; @@ -429,7 +431,7 @@ LogicalResult ConvertTFLRelu1Op::matchAndRewrite( auto element_type = input_type.getElementType(); if (auto quant_type = dyn_cast(element_type)) { - element_type = quant_type.getStorageType(); + element_type = tosa::getStorageElementTypeFromQuantized(quant_type); } mlir::Attribute min_val, max_val; @@ -496,7 +498,7 @@ LogicalResult ConvertTFLRelu0To1Op::matchAndRewrite( auto element_type = input_type.getElementType(); if (auto quant_type = dyn_cast(element_type)) { - element_type = quant_type.getStorageType(); + element_type = tosa::getStorageElementTypeFromQuantized(quant_type); } mlir::Attribute min_val, max_val; @@ -563,7 +565,7 @@ LogicalResult ConvertTFLRelu6Op::matchAndRewrite( auto element_type = input_type.getElementType(); if (auto quant_type = dyn_cast(element_type)) { - element_type = quant_type.getStorageType(); + element_type = tosa::getStorageElementTypeFromQuantized(quant_type); } mlir::Attribute min_val, max_val; @@ -1405,7 +1407,8 @@ RankedTensorType getTypeForSlice(RankedTensorType type, int64_t slice_dim, per_channel_qtype.getZeroPoints().begin() + offset, per_channel_qtype.getZeroPoints().begin() + offset + slice_size); auto output_per_channel_qtype = quant::UniformQuantizedPerAxisType::get( - per_channel_qtype.getFlags(), per_channel_qtype.getStorageType(), + per_channel_qtype.getFlags(), + tosa::getStorageElementTypeFromQuantized(per_channel_qtype), per_channel_qtype.getExpressedType(), output_scale_arr, output_zp_arr, per_channel_qtype.getQuantizedDimension(), per_channel_qtype.getStorageTypeMin(), @@ -2333,7 +2336,10 @@ LogicalResult ConvertTFLFullyConnectedOp::matchAndRewrite( // shape[1]. if (input_type.getRank() != 2) { int64_t num_elems = filter_type.getShape()[1]; - int64_t num_batch = input_type.getNumElements() / num_elems; + int64_t num_batch = ShapedType::kDynamic; + if (input_type.hasStaticShape()) { + num_batch = input_type.getNumElements() / num_elems; + } SmallVector shape_vals({num_batch, num_elems}); RankedTensorType reshape_type = @@ -3006,7 +3012,7 @@ LogicalResult ConvertTFLTileOp::matchAndRewrite( multiples_vals.push_back( multiples_elems.getValues()[i].getSExtValue()); - auto multiples = getTosaConstShape(rewriter, op, multiples_vals); + auto multiples = getTosaConstShape(rewriter, op->getLoc(), multiples_vals); CreateReplaceOpAndInfer(rewriter, op, output_type, tfl_tile_op.getInput(), multiples); diff --git a/tensorflow/compiler/mlir/tosa/transforms/legalize_utils.cc b/tensorflow/compiler/mlir/tosa/transforms/legalize_utils.cc index d1f6772ae6c5fa..dcfff41af1f1d7 100644 --- a/tensorflow/compiler/mlir/tosa/transforms/legalize_utils.cc +++ b/tensorflow/compiler/mlir/tosa/transforms/legalize_utils.cc @@ -54,11 +54,11 @@ mlir::TypeAttr getConvAccTypeAttr(PatternRewriter& rewriter, // in case of quantized types: get base element types if (auto qtype = llvm::dyn_cast(input_etype)) - input_etype = qtype.getStorageType(); + input_etype = tosa::getStorageElementTypeFromQuantized(qtype); if (auto qtype = llvm::dyn_cast(output_etype)) - output_etype = qtype.getStorageType(); + output_etype = tosa::getStorageElementTypeFromQuantized(qtype); // special cases: input_etype and output_etype are both f16 or bf16: use // acc_type=f32 @@ -355,8 +355,19 @@ Value buildRescale(PatternRewriter& rewriter, Operation* op, int32_t scale_multiplier, int32_t scale_shift, int64_t input_zp, int64_t output_zp, tosa::RoundingMode rounding_mode, bool scale32) { - bool input_unsigned = input_val.getType().isUnsignedInteger(); - bool output_unsigned = output_type.isUnsignedInteger(); + bool input_unsigned, output_unsigned; + if (auto qtype = dyn_cast( + cast(input_val.getType()).getElementType())) { + input_unsigned = !qtype.isSigned(); + } else { + input_unsigned = input_val.getType().isUnsignedInteger(); + } + if (auto qtype = + dyn_cast(output_type.getElementType())) { + output_unsigned = !qtype.isSigned(); + } else { + output_unsigned = output_type.isUnsignedInteger(); + } auto loc = op->getLoc(); Value multiplier_val = buildRescaleMultiplier(scale32, rewriter, loc, {scale_multiplier}); @@ -486,8 +497,8 @@ Value buildRescaleOpConvOutput(PatternRewriter& rewriter, Operation* op, const auto rounding_mode_attr = tosa::RoundingModeAttr::get( rewriter.getContext(), rounding_mode); - bool input_unsigned = input_qtype.isUnsignedInteger(); - bool output_unsigned = output_qtype.isUnsignedInteger(); + bool input_unsigned = !input_qtype.isSigned(); + bool output_unsigned = !output_qtype.isSigned(); auto loc = op->getLoc(); const Value empty_output_val = rewriter.create( @@ -664,7 +675,7 @@ Value getTosaConstHardSwish8bitTable(PatternRewriter& rewriter, Operation* op, rewriter.getF32Type(), 1.0f, 0, -128, 127); auto const_type = tensorflow::GetTypeFromTFTensorShape({256}, element_qtype); auto storage_type = tensorflow::GetTypeFromTFTensorShape( - {256}, element_qtype.getStorageType()); + {256}, getStorageElementTypeFromQuantized(element_qtype)); auto const_attr = DenseElementsAttr::get(storage_type, llvm::ArrayRef(table)); auto const_op = @@ -718,7 +729,8 @@ Value getTosaConstRsqrt8bitTable(PatternRewriter& rewriter, Operation* op, rewriter.getF32Type(), 1.0f, 0, -128, 127); auto const_type = tensorflow::GetTypeFromTFTensorShape({256}, element_qtype); auto storage_type = tensorflow::GetTypeFromTFTensorShape( - {256}, element_qtype.getStorageType()); + {256}, + tosa::getStorageElementTypeFromQuantized(element_qtype)); auto const_attr = DenseElementsAttr::get(storage_type, llvm::ArrayRef(table)); auto const_op = @@ -756,7 +768,7 @@ Value getTosaConst8bitTable(PatternRewriter& rewriter, Operation* op, rewriter.getF32Type(), 1.0f, 0, -128, 127); auto const_type = tensorflow::GetTypeFromTFTensorShape({256}, element_qtype); auto storage_type = tensorflow::GetTypeFromTFTensorShape( - {256}, element_qtype.getStorageType()); + {256}, tosa::getStorageElementTypeFromQuantized(element_qtype)); auto const_attr = DenseElementsAttr::get(storage_type, llvm::ArrayRef(table)); auto const_op = @@ -880,7 +892,7 @@ void getTosaConst32bitSoftmaxExpTable(PatternRewriter& rewriter, Operation* op, rewriter.getF32Type(), 1.0f, 0, -32768, 32767); auto const_type = tensorflow::GetTypeFromTFTensorShape({513}, element_qtype); auto storage_type = tensorflow::GetTypeFromTFTensorShape( - {513}, element_qtype.getStorageType()); + {513}, tosa::getStorageElementTypeFromQuantized(element_qtype)); auto first_const_attr = DenseElementsAttr::get(storage_type, llvm::ArrayRef(first_table)); @@ -979,15 +991,6 @@ Value getTosaConstTensorScalarInt(ImplicitLocOpBuilder& builder, Type type, return const_op.getResult(); } -Value getTosaConstShape(PatternRewriter& rewriter, Operation* op, - llvm::ArrayRef values) { - auto attr = rewriter.getIndexTensorAttr(values); - auto type = - tosa::shapeType::get(rewriter.getContext(), /* rank = */ values.size()); - return CreateOpAndInfer(rewriter, op->getLoc(), type, - attr); -} - // Create a vector from a 32-bit value tensor. Returns the size of // the new vector or -1 on error. // Populate a int32_t vector from a val tensor @@ -1409,7 +1412,7 @@ Value reshapeScalarTo1D(PatternRewriter& rewriter, Location loc, Value value) { auto element_qtype = dyn_cast(element_type); if (element_qtype) { storage_type = tensorflow::GetTypeFromTFTensorShape( - {1}, element_qtype.getStorageType()); + {1}, tosa::getStorageElementTypeFromQuantized(element_qtype)); } DenseElementsAttr const_attr; diff --git a/tensorflow/compiler/mlir/tosa/transforms/legalize_utils.h b/tensorflow/compiler/mlir/tosa/transforms/legalize_utils.h index 20908312f40718..b22db1b0963278 100644 --- a/tensorflow/compiler/mlir/tosa/transforms/legalize_utils.h +++ b/tensorflow/compiler/mlir/tosa/transforms/legalize_utils.h @@ -144,11 +144,6 @@ Value getTosaConstTensorSingleI32(PatternRewriter& rewriter, Operation* op, Value getTosaConstTensorScalarInt(ImplicitLocOpBuilder& builder, Type type, int64_t val, int rank); -// Create a tosa::ConstShape based on the specified values -Value getTosaConstShape(PatternRewriter& rewriter, Operation* op, - llvm::ArrayRef values); - - // Populate a int32_t vector from a val tensor // return failure if val is not a constant value // return success otherwise diff --git a/tensorflow/compiler/mlir/tosa/transforms/passes.h b/tensorflow/compiler/mlir/tosa/transforms/passes.h index 0475d46a37a091..bd170f61cb2fb8 100644 --- a/tensorflow/compiler/mlir/tosa/transforms/passes.h +++ b/tensorflow/compiler/mlir/tosa/transforms/passes.h @@ -70,7 +70,6 @@ std::unique_ptr> createVerifyFullyConvertedPass(); std::unique_ptr> createLegalizeTFLStatefulPass(); #define GEN_PASS_REGISTRATION -#define GEN_PASS_CLASSES #define GEN_PASS_DECL_TOSALEGALIZETFPASS #define GEN_PASS_DECL_TOSALEGALIZETFLPASS #define GEN_PASS_DECL_TOSALEGALIZETFTFLPASS diff --git a/tensorflow/compiler/mlir/tosa/transforms/strip_metadata.cc b/tensorflow/compiler/mlir/tosa/transforms/strip_metadata.cc index e4a6ca5a6e56a5..53d21de0195999 100644 --- a/tensorflow/compiler/mlir/tosa/transforms/strip_metadata.cc +++ b/tensorflow/compiler/mlir/tosa/transforms/strip_metadata.cc @@ -25,7 +25,8 @@ limitations under the License. namespace mlir::tosa { -#define GEN_PASS_DEF_STRIPM +#define GEN_PASS_DEF_STRIPFUNCTIONMETADATA +#define GEN_PASS_DEF_STRIPMODULEMETADATA #include "tensorflow/compiler/mlir/tosa/transforms/passes.h.inc" namespace { @@ -45,7 +46,7 @@ static bool isTFLAttr(NamedAttribute &namedAttr) { } class StripModuleMetadataPass - : public StripModuleMetadataBase { + : public impl::StripModuleMetadataBase { public: void runOnOperation() override { auto moduleOp = getOperation(); @@ -59,7 +60,7 @@ class StripModuleMetadataPass }; class StripFunctionMetadataPass - : public StripFunctionMetadataBase { + : public impl::StripFunctionMetadataBase { public: void runOnOperation() override { auto funcOp = getOperation(); diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index 00cd5c6dd87c96..3989c361047566 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -1788,7 +1788,6 @@ tf_xla_py_strict_test( srcs = ["unary_ops_test.py"], shard_count = 20, tags = [ - "cuda-only", "no_aarch64", # TODO(b/348125886) "no_cuda_asan", # times out "no_pip", # TODO(b/149738646): fix pip install so these tests run on kokoro pip @@ -2861,7 +2860,6 @@ tf_cuda_cc_test( tags = [ "config-cuda-only", "no_pip", # TODO(b/149738646): fix pip install so these tests run on kokoro pip - "cuda-only", # ROCmSoftwarePlatform #958 "noasan", # TODO(b/201651800) "requires-gpu-nvidia", ] + tf_cuda_tests_tags(), @@ -2882,7 +2880,6 @@ tf_cuda_cc_test( tags = [ "config-cuda-only", "no_pip", # TODO(b/149738646): fix pip install so these tests run on kokoro pip - "cuda-only", # ROCmSoftwarePlatform #958 "noasan", # TODO(b/201651800) "requires-gpu-nvidia", ] + tf_cuda_tests_tags(), @@ -2902,13 +2899,12 @@ tf_cuda_cc_test( "//tensorflow/compiler/jit:xla_kernel_creator", "//tensorflow/compiler/tf2xla:xla_compiler", "//tensorflow/core:framework", - "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", "//tensorflow/core/kernels:ops_testutil", - "@local_xla//xla/tsl/platform:status", + "@com_google_absl//absl/log:check", ], ) diff --git a/tensorflow/compiler/tests/randomized_tests.cc b/tensorflow/compiler/tests/randomized_tests.cc index fcd3aadbe10c9a..43d468a763f190 100644 --- a/tensorflow/compiler/tests/randomized_tests.cc +++ b/tensorflow/compiler/tests/randomized_tests.cc @@ -90,11 +90,9 @@ limitations under the License. #include "tensorflow/core/framework/types.h" #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/graph/graph.h" -#include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/bfloat16.h" #include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/test.h" -#include "tensorflow/core/platform/types.h" #include "tensorflow/core/protobuf/config.pb.h" #include "tensorflow/core/public/session.h" #include "tensorflow/core/public/session_options.h" @@ -4935,8 +4933,8 @@ int main(int argc, char** argv) { // XLA devices register kernels at construction time; create all known devices // to make sure the kernels are registered. std::vector> devices; - TF_CHECK_OK(tensorflow::DeviceFactory::AddDevices( - tensorflow::SessionOptions(), "", &devices)); + CHECK_OK(tensorflow::DeviceFactory::AddDevices(tensorflow::SessionOptions(), + "", &devices)); tensorflow::StaticDeviceMgr device_mgr(std::move(devices)); tensorflow::Device* ignored; diff --git a/tensorflow/compiler/tests/unary_ops_composition_test.cc b/tensorflow/compiler/tests/unary_ops_composition_test.cc index c27b8070bbb450..00fd0ea67041b9 100644 --- a/tensorflow/compiler/tests/unary_ops_composition_test.cc +++ b/tensorflow/compiler/tests/unary_ops_composition_test.cc @@ -16,12 +16,13 @@ limitations under the License. #include #include #include +#include #include +#include "absl/log/check.h" #include "tensorflow/compiler/jit/flags.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "xla/tsl/lib/core/status_test_util.h" -#include "xla/tsl/platform/status.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/device_base.h" #include "tensorflow/core/framework/device_factory.h" @@ -33,7 +34,6 @@ limitations under the License. #include "tensorflow/core/framework/types.h" #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/kernels/ops_testutil.h" -#include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/util/port.h" @@ -84,8 +84,8 @@ class UnaryOpsCompositionTest : public OpsTestBase { DeviceContext* device_context = device_->tensorflow_accelerator_device_info()->default_context; - TF_CHECK_OK(device_context->CopyCPUTensorToDeviceSync(&input_on_host, - device_, input)); + CHECK_OK(device_context->CopyCPUTensorToDeviceSync(&input_on_host, device_, + input)); TF_ASSERT_OK(RunOpKernel()); @@ -95,7 +95,7 @@ class UnaryOpsCompositionTest : public OpsTestBase { Tensor* output = GetOutput(0); Tensor output_on_host(cpu_allocator, output->dtype(), output->shape()); - TF_CHECK_OK(device_context->CopyDeviceTensorToCPUSync( + CHECK_OK(device_context->CopyDeviceTensorToCPUSync( output, "output 0", device_, &output_on_host)); test::ExpectClose(expected_tensor, output_on_host, /*atol=*/1e-5, diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD index e5545445817ec2..9c2dfc073afccb 100644 --- a/tensorflow/compiler/tf2xla/BUILD +++ b/tensorflow/compiler/tf2xla/BUILD @@ -490,7 +490,6 @@ cc_library( "@local_xla//xla/service/cpu:executable_proto_cc", "@local_xla//xla/tsl/concurrency:async_value", "@local_xla//xla/tsl/platform:env", - "@local_xla//xla/tsl/platform:status", ], ) @@ -721,12 +720,15 @@ cc_library( ":common", ":xla_expression", ":xla_helpers", + ":xla_resource", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core/common_runtime:core_cpu_internal", + "@com_google_absl//absl/log", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/types:span", - "@local_xla//xla:literal", "@local_xla//xla:shape_util", "@local_xla//xla:status_macros", "@local_xla//xla:xla_data_proto_cc", @@ -767,7 +769,6 @@ cc_library( "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:string_view", "@com_google_absl//absl/types:span", - "@local_tsl//tsl/platform:status", "@local_xla//xla:util", "@local_xla//xla/client:client_library", ], @@ -846,18 +847,25 @@ cc_library( "//tensorflow/core:core_cpu_base", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core/platform:refcount", + "@com_google_absl//absl/container:btree", + "@com_google_absl//absl/log", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/time", "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:span", "@local_xla//xla:executable_run_options", - "@local_xla//xla:types", - "@local_xla//xla/backends/gpu/collectives:gpu_clique_key", + "@local_xla//xla:literal", + "@local_xla//xla:literal_util", + "@local_xla//xla:shape_util", "@local_xla//xla/core/collectives:clique_id", "@local_xla//xla/core/collectives:clique_key", "@local_xla//xla/hlo/builder:xla_builder", "@local_xla//xla/hlo/builder:xla_computation", - "@local_xla//xla/hlo/builder/lib:arithmetic", - "@local_xla//xla/hlo/builder/lib:constants", "@local_xla//xla/hlo/ir:hlo", "@local_xla//xla/hlo/translate/mhlo_to_hlo:layout_util", "@local_xla//xla/service:computation_placer_hdr", @@ -1046,6 +1054,11 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/log", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", "@local_xla//xla:status_macros", @@ -1063,6 +1076,7 @@ tf_cc_test( "//tensorflow/cc:function_ops", "//tensorflow/cc:functional_ops", "//tensorflow/cc:ops", + "//tensorflow/cc:scope", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", "//tensorflow/core:lib", @@ -1071,6 +1085,8 @@ tf_cc_test( "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", "//tensorflow/core:test_main", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status", "@com_google_absl//absl/strings", ], ) @@ -1253,10 +1269,9 @@ cc_library( "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/types:span", - "@local_tsl//tsl/platform:status", - "@local_tsl//tsl/platform:statusor", "@local_xla//xla/stream_executor:device_memory", "@local_xla//xla/stream_executor:stream", + "@local_xla//xla/tsl/platform:statusor", ], alwayslink = 1, ) @@ -1327,21 +1342,21 @@ cc_library( "functionalize_cond.h", ], deps = [ - ":frontend_attributes_util", ":functionalize_control_flow_util", - ":tf2xla_util", + ":tf2xla_defs", "//tensorflow/compiler/tf2xla/ops:xla_ops", "//tensorflow/core:core_cpu", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", - "//tensorflow/core:graph", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", - "@com_google_absl//absl/memory", + "//tensorflow/core/platform:hash", + "@com_google_absl//absl/log", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", - "@com_google_absl//absl/types:optional", "@local_xla//xla:status_macros", - "@local_xla//xla:union_find", ], ) @@ -1607,7 +1622,14 @@ cc_library( visibility = [":friends"], deps = [ "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core/platform:errors", + "//tensorflow/core/platform:str_util", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status", "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:span", + "@local_xla//xla/tsl/platform:errors", ], ) @@ -1673,12 +1695,18 @@ cc_library( deps = [ ":resource_operation_table", "//tensorflow/core:core_cpu", - "//tensorflow/core:graph", + "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", + "//tensorflow/core/common_runtime:function_body", + "//tensorflow/core/common_runtime:function_utils", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/container:inlined_vector", "@com_google_absl//absl/hash", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@local_xla//xla:status_macros", ], diff --git a/tensorflow/compiler/tf2xla/functionalize_cond.cc b/tensorflow/compiler/tf2xla/functionalize_cond.cc index 2adc83512c6617..b5426bc35c58df 100644 --- a/tensorflow/compiler/tf2xla/functionalize_cond.cc +++ b/tensorflow/compiler/tf2xla/functionalize_cond.cc @@ -16,30 +16,49 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/functionalize_cond.h" #include +#include +#include +#include #include -#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include #include -#include "absl/memory/memory.h" -#include "absl/strings/match.h" +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" -#include "absl/types/optional.h" -#include "tensorflow/compiler/tf2xla/frontend_attributes_util.h" +#include "absl/strings/string_view.h" #include "tensorflow/compiler/tf2xla/functionalize_control_flow_util.h" -#include "tensorflow/compiler/tf2xla/tf2xla_util.h" -#include "xla/union_find.h" -#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/compiler/tf2xla/tf2xla_defs.h" +#include "xla/status_macros.h" +#include "xla/tsl/platform/errors.h" +#include "xla/tsl/platform/statusor.h" #include "tensorflow/core/common_runtime/shape_refiner.h" +#include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/graph_to_functiondef.h" #include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/types.h" #include "tensorflow/core/framework/versions.pb.h" #include "tensorflow/core/graph/algorithm.h" -#include "tensorflow/core/graph/control_flow.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/graph/graph_node_util.h" #include "tensorflow/core/graph/node_builder.h" #include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/hash/hash.h" -#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/hash.h" #include "tensorflow/core/util/dump_graph.h" namespace tensorflow { @@ -1138,7 +1157,7 @@ StateMap::CondId FunctionalizeCond::StateAlongEdge(const Edge* e) { StateMap::CondState state; if (id != nullptr) state = *id; OutputTensor predicate; - TF_CHECK_OK(GetSwitchPredicate(*src, &predicate)); + CHECK_OK(GetSwitchPredicate(*src, &predicate)); if (e->IsControlEdge()) { // In gradients of tf.cond(), in each branch, we have a NoOp node as // control pivot. These NoOp nodes have control dependency from Switch diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD index bb50d530484b10..281da5c23c54e8 100644 --- a/tensorflow/compiler/tf2xla/kernels/BUILD +++ b/tensorflow/compiler/tf2xla/kernels/BUILD @@ -1893,18 +1893,17 @@ tf_kernel_library( "//tensorflow/compiler/tf2xla:xla_resource", "//tensorflow/compiler/tf2xla/ops:xla_ops", "//tensorflow/core:framework", - "//tensorflow/core:portable_gif_internal", "//tensorflow/core:protos_all_cc", "//tensorflow/core/platform:errors", "@com_google_absl//absl/container:inlined_vector", "@com_google_absl//absl/log", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/strings", "@local_xla//xla:literal", "@local_xla//xla:shape_util", "@local_xla//xla:xla_data_proto_cc", "@local_xla//xla/hlo/builder:xla_builder", "@local_xla//xla/hlo/builder:xla_computation", - "@local_xla//xla/tsl/platform:status", ], ) @@ -3019,10 +3018,10 @@ tf_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", + "@com_google_absl//absl/log:check", "@local_xla//xla:shape_util", "@local_xla//xla:xla_data_proto_cc", "@local_xla//xla/hlo/builder:xla_builder", - "@local_xla//xla/tsl/platform:status", ], ) diff --git a/tensorflow/compiler/tf2xla/kernels/light_outside_compilation.cc b/tensorflow/compiler/tf2xla/kernels/light_outside_compilation.cc index 899c0063035b82..b699358106e5c4 100644 --- a/tensorflow/compiler/tf2xla/kernels/light_outside_compilation.cc +++ b/tensorflow/compiler/tf2xla/kernels/light_outside_compilation.cc @@ -553,7 +553,8 @@ absl::Status PopulateMetadataBufferIfNeeded(OpKernelContext& ctx, callback_data.outputs(i).buffer_description().shape())); void* location = static_cast(allocated->data()) + xla::ShapeUtil::ByteSizeOf(xla_shape); - se::DeviceMemoryBase m{location, num_dimensions * sizeof(int32_t)}; + stream_executor::DeviceAddressBase m{location, + num_dimensions * sizeof(int32_t)}; TF_RETURN_IF_ERROR(stream->Memcpy(&m, shape_info.data(), num_dimensions * sizeof(int32_t))); } diff --git a/tensorflow/compiler/tf2xla/kernels/reduction_ops_common.cc b/tensorflow/compiler/tf2xla/kernels/reduction_ops_common.cc index 3bfe9e384405b2..6e8417e2d25ff2 100644 --- a/tensorflow/compiler/tf2xla/kernels/reduction_ops_common.cc +++ b/tensorflow/compiler/tf2xla/kernels/reduction_ops_common.cc @@ -16,9 +16,11 @@ limitations under the License. // XLA-specific reduction Ops. #include +#include #include #include "absl/container/inlined_vector.h" +#include "absl/log/check.h" #include "absl/log/log.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" @@ -30,14 +32,12 @@ limitations under the License. #include "xla/hlo/builder/xla_computation.h" #include "xla/literal.h" #include "xla/shape_util.h" -#include "xla/tsl/platform/status.h" #include "xla/xla_data.pb.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/op_requires.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/platform/errors.h" -#include "tensorflow/core/platform/types.h" namespace tensorflow { @@ -125,7 +125,7 @@ void XlaReductionOp::Compile(XlaOpKernelContext* ctx) { // Construct the builder for the reduction lambda. xla::XlaBuilder r(absl::StrCat(desc, "-reduction")); xla::PrimitiveType type; - TF_CHECK_OK(DataTypeToPrimitiveType(reduction_type_, &type)); + CHECK_OK(DataTypeToPrimitiveType(reduction_type_, &type)); auto data = xla::ConvertElementType(ctx->Input(0), type); // Call virtual method to get the initial value. diff --git a/tensorflow/compiler/tf2xla/kernels/xla_call_module_op.cc b/tensorflow/compiler/tf2xla/kernels/xla_call_module_op.cc index e06c0b09ba9938..c2aee328a9cd23 100644 --- a/tensorflow/compiler/tf2xla/kernels/xla_call_module_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/xla_call_module_op.cc @@ -519,7 +519,7 @@ class XlaCallModuleOp : public XlaOpKernel { } else if (options.add_token_input_output) { // Add a dummy token if the inner computation takes a token but the // custom call doesn't have a token argument. - args.push_back(builder.create(loc)); + args.push_back(mlir::stablehlo::CreateTokenOp::create(builder, loc)); } input_args.reserve(result.input_mapping.size()); @@ -530,7 +530,7 @@ class XlaCallModuleOp : public XlaOpKernel { // Call the lowered function. auto call = - builder.create(loc, main_func, input_args); + mlir::func::CallOp::create(builder, loc, main_func, input_args); // Unpack the result tuple (`options.always_return_tuple` is true). If // `has_tuple_input_output` is true, the first result is a token type. @@ -548,7 +548,7 @@ class XlaCallModuleOp : public XlaOpKernel { mlir::Value token = results.back(); if (!token.use_empty()) { token.replaceAllUsesWith( - builder.create(loc)); + mlir::stablehlo::CreateTokenOp::create(builder, loc)); } results.pop_back(); } diff --git a/tensorflow/compiler/tf2xla/kernels/xla_custom_call_op.cc b/tensorflow/compiler/tf2xla/kernels/xla_custom_call_op.cc index 99a0ec6d9e38dd..e9d0314780ca54 100644 --- a/tensorflow/compiler/tf2xla/kernels/xla_custom_call_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/xla_custom_call_op.cc @@ -13,20 +13,20 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include #include +#include "absl/log/check.h" #include "tensorflow/compiler/tf2xla/shape_util.h" #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "xla/hlo/builder/xla_builder.h" #include "xla/shape.h" -#include "xla/tsl/platform/status.h" #include "xla/xla_data.pb.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/op_requires.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.pb.h" -#include "tensorflow/core/platform/types.h" namespace tensorflow { namespace { @@ -47,8 +47,7 @@ class XlaCustomCallOp : public XlaOpKernel { } xla::Shape output_shape; - TF_CHECK_OK( - TensorShapeToXLAShape(output_type_, output_shape_, &output_shape)); + CHECK_OK(TensorShapeToXLAShape(output_type_, output_shape_, &output_shape)); xla::XlaOp output = xla::CustomCall(ctx->builder(), target_name_, inputs, output_shape, backend_config_); ctx->SetOutput(0, output); diff --git a/tensorflow/compiler/tf2xla/light_outside_compilation_kernels_for_test.cc b/tensorflow/compiler/tf2xla/light_outside_compilation_kernels_for_test.cc index dd493a5606b597..93444bfeb49125 100644 --- a/tensorflow/compiler/tf2xla/light_outside_compilation_kernels_for_test.cc +++ b/tensorflow/compiler/tf2xla/light_outside_compilation_kernels_for_test.cc @@ -14,7 +14,9 @@ limitations under the License. ==============================================================================*/ #include -#include +#include +#include +#include #include "absl/log/check.h" #include "absl/status/status.h" @@ -24,6 +26,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "xla/stream_executor/device_memory.h" #include "xla/stream_executor/stream.h" +#include "xla/tsl/platform/statusor.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/node_def_util.h" @@ -36,8 +39,6 @@ limitations under the License. #include "tensorflow/core/framework/types.h" #include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/types.h" -#include "tsl/platform/status.h" -#include "tsl/platform/statusor.h" // Sample kernels for the light outside compilation test. @@ -64,14 +65,15 @@ class TestStaticTfOp : public OpKernel { // Just pass the value through. uint64_t size = input.AllocatedBytes(); - se::DeviceMemoryBase gpu_dst{out_tensor->data(), size}; + stream_executor::DeviceAddressBase gpu_dst{out_tensor->data(), size}; se::Stream* stream = ctx->op_device_context()->stream(); - OP_REQUIRES_OK(ctx, - stream->MemcpyD2D( - /*gpu_dst=*/&gpu_dst, - /*gpu_src=*/se::DeviceMemoryBase{input.data(), size}, - /*size=*/input.AllocatedBytes())); + OP_REQUIRES_OK( + ctx, + stream->MemcpyD2D( + /*gpu_dst=*/&gpu_dst, + /*gpu_src=*/stream_executor::DeviceAddressBase{input.data(), size}, + /*size=*/input.AllocatedBytes())); } }; @@ -105,21 +107,23 @@ class TestStaticMultipleOutputTfOp : public OpKernel { // Just pass the value through. uint64_t size = input.AllocatedBytes(); - se::DeviceMemoryBase gpu_dst1{out_tensor1->data(), size}; - se::DeviceMemoryBase gpu_dst2{out_tensor2->data(), size}; + stream_executor::DeviceAddressBase gpu_dst1{out_tensor1->data(), size}; + stream_executor::DeviceAddressBase gpu_dst2{out_tensor2->data(), size}; se::Stream* stream = ctx->device()->tensorflow_accelerator_device_info()->stream; - OP_REQUIRES_OK(ctx, - stream->MemcpyD2D( - /*gpu_dst=*/&gpu_dst1, - /*gpu_src=*/se::DeviceMemoryBase{input.data(), size}, - /*size=*/input.AllocatedBytes())); - OP_REQUIRES_OK(ctx, - stream->MemcpyD2D( - /*gpu_dst=*/&gpu_dst2, - /*gpu_src=*/se::DeviceMemoryBase{input.data(), size}, - /*size=*/input.AllocatedBytes())); + OP_REQUIRES_OK( + ctx, + stream->MemcpyD2D( + /*gpu_dst=*/&gpu_dst1, + /*gpu_src=*/stream_executor::DeviceAddressBase{input.data(), size}, + /*size=*/input.AllocatedBytes())); + OP_REQUIRES_OK( + ctx, + stream->MemcpyD2D( + /*gpu_dst=*/&gpu_dst2, + /*gpu_src=*/stream_executor::DeviceAddressBase{input.data(), size}, + /*size=*/input.AllocatedBytes())); } }; @@ -165,12 +169,12 @@ class TestDynamicTfOp : public OpKernel { se::Stream* stream = ctx->device()->tensorflow_accelerator_device_info()->stream; - se::DeviceMemoryBase gpu_dst{out_tensor->data(), size_to_cpy}; + stream_executor::DeviceAddressBase gpu_dst{out_tensor->data(), size_to_cpy}; OP_REQUIRES_OK(ctx, stream->MemcpyD2D( /*gpu_dst=*/&gpu_dst, /*gpu_src=*/ - se::DeviceMemoryBase{input.data(), - static_cast(size)}, + stream_executor::DeviceAddressBase{ + input.data(), static_cast(size)}, /*size=*/size_to_cpy)); } @@ -211,7 +215,7 @@ class DynamicMultidimOp : public OpKernel { void Compute(OpKernelContext* ctx) override { TensorShape output_shape; - auto vec = ctx->input(0).flat(); + auto vec = ctx->input(0).flat(); for (int i = 0; i < vec.size(); i++) { OP_REQUIRES_OK(ctx, output_shape.AddDimWithStatus(vec(i))); } @@ -225,8 +229,8 @@ class DynamicMultidimOp : public OpKernel { for (int i = 0; i < output_shape.num_elements(); i++) { host_data[i] = 1.0; } - se::DeviceMemoryBase gpu_dst{out_tensor->data(), - static_cast(num_elements)}; + stream_executor::DeviceAddressBase gpu_dst{ + out_tensor->data(), static_cast(num_elements)}; se::Stream* stream = ctx->device()->tensorflow_accelerator_device_info()->stream; @@ -299,13 +303,13 @@ class TestTfMustBeConstantOp : public OpKernel { AllocatorAttributes pinned_alloc_attrs; pinned_alloc_attrs.set_on_host(true); pinned_alloc_attrs.set_gpu_compatible(true); - TF_CHECK_OK(ctx->allocate_temp(input.dtype(), input.shape(), &tmp, - pinned_alloc_attrs)); + CHECK_OK(ctx->allocate_temp(input.dtype(), input.shape(), &tmp, + pinned_alloc_attrs)); - OP_REQUIRES_OK( - ctx, stream->Memcpy(tmp.data(), - se::DeviceMemoryBase{input.data(), allocated_size}, - allocated_size)); + OP_REQUIRES_OK(ctx, stream->Memcpy(tmp.data(), + stream_executor::DeviceAddressBase{ + input.data(), allocated_size}, + allocated_size)); OP_REQUIRES_OK(ctx, stream->BlockHostUntilDone()); @@ -316,8 +320,8 @@ class TestTfMustBeConstantOp : public OpKernel { Tensor* out_tensor = nullptr; OP_REQUIRES_OK(ctx, ctx->allocate_output("output", ctx->input(0).shape(), &out_tensor)); - se::DeviceMemoryBase gpu_dst{out_tensor->data(), - static_cast(allocated_size)}; + stream_executor::DeviceAddressBase gpu_dst{ + out_tensor->data(), static_cast(allocated_size)}; OP_REQUIRES_OK(ctx, stream->Memcpy(&gpu_dst, tmp.data(), allocated_size)); } }; @@ -361,11 +365,12 @@ class TestDynamicTfWithBoundOp : public OpKernel { se::Stream* stream = ctx->device()->tensorflow_accelerator_device_info()->stream; - se::DeviceMemoryBase gpu_dst{out_tensor->data(), size_to_cpy}; + stream_executor::DeviceAddressBase gpu_dst{out_tensor->data(), size_to_cpy}; OP_REQUIRES_OK( ctx, stream->MemcpyD2D( /*gpu_dst=*/&gpu_dst, - /*gpu_src=*/se::DeviceMemoryBase{input.data(), size_to_cpy}, + /*gpu_src=*/ + stream_executor::DeviceAddressBase{input.data(), size_to_cpy}, /*size=*/size_to_cpy)); } diff --git a/tensorflow/compiler/tf2xla/literal_util_test.cc b/tensorflow/compiler/tf2xla/literal_util_test.cc index b7c9b5fd7bbf13..e8c7dc1a579b6b 100644 --- a/tensorflow/compiler/tf2xla/literal_util_test.cc +++ b/tensorflow/compiler/tf2xla/literal_util_test.cc @@ -52,9 +52,9 @@ TEST(LiteralUtil, LiteralToHostTensor) { template using LiteralUtilTest = ::testing::Test; using Types = - ::testing::Types, std::pair, - std::pair, std::pair, - std::pair>; + ::testing::Types, std::pair, + std::pair, std::pair, + std::pair>; TYPED_TEST_SUITE(LiteralUtilTest, Types); diff --git a/tensorflow/compiler/tf2xla/mlir_tf2xla.cc b/tensorflow/compiler/tf2xla/mlir_tf2xla.cc index a9b2ead7b4d839..114905925cbf20 100644 --- a/tensorflow/compiler/tf2xla/mlir_tf2xla.cc +++ b/tensorflow/compiler/tf2xla/mlir_tf2xla.cc @@ -127,7 +127,7 @@ absl::Status ConvertGraphDefToXlaViaMlir( // with a placeholder node that contains a single output. FunctionLibraryDefinition flib_def(OpRegistry::Global(), graph_def.library()); std::unique_ptr graph(new Graph(flib_def)); - std::unordered_map feed_name_remap; + std::unordered_map feed_name_remap; TF_RETURN_IF_ERROR(AddPlaceholdersForFeeds(config, graph->op_registry(), &feed_name_remap, &graph_def)); diff --git a/tensorflow/compiler/tf2xla/resource_util.cc b/tensorflow/compiler/tf2xla/resource_util.cc index e78828df4e13a4..50990e0bb2858d 100644 --- a/tensorflow/compiler/tf2xla/resource_util.cc +++ b/tensorflow/compiler/tf2xla/resource_util.cc @@ -15,13 +15,27 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/resource_util.h" +#include #include +#include +#include #include #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" +#include "absl/container/inlined_vector.h" +#include "absl/log/check.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" #include "tensorflow/compiler/tf2xla/resource_operation_table.h" #include "xla/status_macros.h" +#include "xla/tsl/platform/errors.h" +#include "xla/tsl/platform/statusor.h" +#include "tensorflow/core/common_runtime/function_body.h" +#include "tensorflow/core/common_runtime/function_utils.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/framework/node_def_util.h" #include "tensorflow/core/graph/algorithm.h" #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/lib/core/errors.h" @@ -204,8 +218,8 @@ absl::Status PropagateThroughCallOp( // Instantiate associated function to get function body. FunctionLibraryRuntime::Handle handle; TF_RETURN_IF_ERROR(InstantiateFunctionCall(n.def(), lib_runtime, &handle)); - auto release_handle_on_return = gtl::MakeCleanup( - [&] { TF_CHECK_OK(lib_runtime->ReleaseHandle(handle)); }); + auto release_handle_on_return = + gtl::MakeCleanup([&] { CHECK_OK(lib_runtime->ReleaseHandle(handle)); }); const FunctionBody* fbody = lib_runtime->GetFunctionBody(handle); // Recursively analyze called function for resource sources and users. diff --git a/tensorflow/compiler/tf2xla/side_effect_util.cc b/tensorflow/compiler/tf2xla/side_effect_util.cc index e8b2a56cdf64d2..a28d6ac8b1554f 100644 --- a/tensorflow/compiler/tf2xla/side_effect_util.cc +++ b/tensorflow/compiler/tf2xla/side_effect_util.cc @@ -15,8 +15,21 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/side_effect_util.h" +#include +#include +#include +#include + +#include "absl/log/check.h" +#include "absl/status/status.h" #include "absl/strings/numbers.h" +#include "absl/types/span.h" +#include "xla/tsl/platform/errors.h" +#include "tensorflow/core/framework/node_def_util.h" #include "tensorflow/core/graph/algorithm.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/platform/str_util.h" namespace tensorflow { @@ -98,9 +111,8 @@ std::set CalculateTokenInputsForOutputToken(const Graph& g) { first_side_effecting_node_on_path = n; std::string original_node_name; - TF_CHECK_OK(GetNodeAttr(n->def(), - kXlaOriginalOutsideCompilationNodeName, - &original_node_name)); + CHECK_OK(GetNodeAttr(n->def(), kXlaOriginalOutsideCompilationNodeName, + &original_node_name)); results.insert(original_node_name); }, [&](Node* n) { diff --git a/tensorflow/compiler/tf2xla/tf2xla_util.cc b/tensorflow/compiler/tf2xla/tf2xla_util.cc index 042b572c234355..5884cddba62b3d 100644 --- a/tensorflow/compiler/tf2xla/tf2xla_util.cc +++ b/tensorflow/compiler/tf2xla/tf2xla_util.cc @@ -15,19 +15,36 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/tf2xla_util.h" -#include +#include +#include +#include +#include +#include +#include +#include #include #include #include +#include #include +#include +#include #include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" #include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" #include "tensorflow/compiler/tf2xla/sharding_util.h" #include "tensorflow/compiler/tf2xla/tf2xla.pb.h" +#include "xla/tsl/platform/errors.h" +#include "xla/tsl/platform/statusor.h" #include "xla/xla_data.pb.h" -#include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/function_body.h" +#include "tensorflow/core/common_runtime/function_def_utils.h" #include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/graph_def_util.h" @@ -35,13 +52,16 @@ limitations under the License. #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/node_def_builder.h" #include "tensorflow/core/framework/node_def_util.h" -#include "tensorflow/core/framework/op_def_builder.h" +#include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_shape.pb.h" +#include "tensorflow/core/framework/types.h" #include "tensorflow/core/framework/versions.pb.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/graph/graph_debug_info_builder.h" +#include "tensorflow/core/graph/graph_node_util.h" #include "tensorflow/core/graph/tensor_id.h" #include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/errors.h" namespace tensorflow { @@ -1025,7 +1045,7 @@ absl::Status RewriteTensorListWithConstElement(Graph* g, // Look into forward While body function and check if TensorListPushBack op // has a Const input. NameAttrList fwd_body_attr; - TF_CHECK_OK(GetNodeAttr(fwd_while->def(), "body", &fwd_body_attr)); + CHECK_OK(GetNodeAttr(fwd_while->def(), "body", &fwd_body_attr)); const FunctionDef* fwd_body = fld->Find(fwd_body_attr.name()); if (!fwd_body) { return errors::InvalidArgument("Cannot find function ", @@ -1033,7 +1053,7 @@ absl::Status RewriteTensorListWithConstElement(Graph* g, fwd_while->DebugString()); } std::unique_ptr fwd_fbody; - TF_CHECK_OK(FunctionDefToBodyHelper( + CHECK_OK(FunctionDefToBodyHelper( *fwd_body, AttrSlice(&fwd_body_attr.attr()), fld, &fwd_fbody)); // Find the TensorListPushBack node; it's one of fwd_arg's successors. @@ -1051,7 +1071,7 @@ absl::Status RewriteTensorListWithConstElement(Graph* g, // Get input for the TensorListPushBack node. Node* input_node; - TF_CHECK_OK(tl_push_nodes[0]->input_node(1, &input_node)); + CHECK_OK(tl_push_nodes[0]->input_node(1, &input_node)); if (input_node->type_string() != "Const") { // Input for the TensorList is not Const node. continue; @@ -1062,7 +1082,7 @@ absl::Status RewriteTensorListWithConstElement(Graph* g, // Rewrite backward While body function, replace usages of // TensorListPopBack with a Const node. NameAttrList bwd_body_attr; - TF_CHECK_OK(GetNodeAttr(bwd_while->def(), "body", &bwd_body_attr)); + CHECK_OK(GetNodeAttr(bwd_while->def(), "body", &bwd_body_attr)); const FunctionDef* bwd_body = fld->Find(bwd_body_attr.name()); if (!bwd_body) { return errors::InvalidArgument("Cannot find function ", @@ -1070,7 +1090,7 @@ absl::Status RewriteTensorListWithConstElement(Graph* g, bwd_while->DebugString()); } std::unique_ptr bwd_fbody; - TF_CHECK_OK(FunctionDefToBodyHelper( + CHECK_OK(FunctionDefToBodyHelper( *bwd_body, AttrSlice(&bwd_body_attr.attr()), fld, &bwd_fbody)); // Find the TensorListPopBack node; it's one of bwd_arg's successors. diff --git a/tensorflow/compiler/tf2xla/tf2xla_util_test.cc b/tensorflow/compiler/tf2xla/tf2xla_util_test.cc index ef64b82f50e5be..1d81f778232523 100644 --- a/tensorflow/compiler/tf2xla/tf2xla_util_test.cc +++ b/tensorflow/compiler/tf2xla/tf2xla_util_test.cc @@ -15,25 +15,39 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/tf2xla_util.h" +#include +#include +#include +#include +#include + +#include "absl/log/check.h" +#include "absl/status/status.h" #include "absl/strings/match.h" #include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" #include "tensorflow/cc/framework/ops.h" -#include "tensorflow/cc/ops/data_flow_ops.h" +#include "tensorflow/cc/framework/scope.h" +#include "tensorflow/cc/ops/array_ops.h" +#include "tensorflow/cc/ops/const_op.h" #include "tensorflow/cc/ops/function_ops.h" #include "tensorflow/cc/ops/functional_ops.h" #include "tensorflow/cc/ops/list_ops.h" -#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/cc/ops/math_ops.h" +#include "tensorflow/cc/ops/no_op.h" #include "tensorflow/compiler/tf2xla/sharding_util.h" +#include "xla/tsl/lib/core/status_test_util.h" #include "tensorflow/core/common_runtime/function.h" -#include "tensorflow/core/common_runtime/graph_optimizer.h" +#include "tensorflow/core/common_runtime/function_def_utils.h" #include "tensorflow/core/common_runtime/process_function_library_runtime.h" #include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/graph_to_functiondef.h" #include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/graph/graph.h" -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/public/version.h" @@ -492,8 +506,7 @@ TEST(PropagateConstIntoFunctionalNodes, RewriteTensorListWithConstMember) { const FunctionDef* bwd_body = fld.Find("bwd_body_tl_rewrite_0"); ASSERT_NE(bwd_body, nullptr); std::unique_ptr bwd_fbody; - TF_CHECK_OK( - FunctionDefToBodyHelper(*bwd_body, AttrSlice(), &fld, &bwd_fbody)); + CHECK_OK(FunctionDefToBodyHelper(*bwd_body, AttrSlice(), &fld, &bwd_fbody)); auto node_name_index = bwd_fbody->graph->BuildNodeNameIndex(); const Node* identity = node_name_index.at("identity"); ASSERT_NE(identity, nullptr); diff --git a/tensorflow/compiler/tf2xla/xla_compilation_device.cc b/tensorflow/compiler/tf2xla/xla_compilation_device.cc index add79c369b69ef..e7925a011f9eb5 100644 --- a/tensorflow/compiler/tf2xla/xla_compilation_device.cc +++ b/tensorflow/compiler/tf2xla/xla_compilation_device.cc @@ -45,7 +45,8 @@ class XlaCompilationAllocator : public Allocator { // Regardless of the size requested, always allocates an XlaExpression. // Respects the alignment request because there is alignment checking even // for Tensors whose data is never accessed. - void* p = port::AlignedMalloc(sizeof(XlaExpression), alignment); + void* p = tsl::port::AlignedMalloc( + sizeof(XlaExpression), static_cast(alignment)); XlaExpression* expression = reinterpret_cast(p); new (expression) XlaExpression(); return expression; diff --git a/tensorflow/compiler/tf2xla/xla_compiled_cpu_function_thunks.cc b/tensorflow/compiler/tf2xla/xla_compiled_cpu_function_thunks.cc index c2b9cc26d5d461..f4b7ed44ff41d5 100644 --- a/tensorflow/compiler/tf2xla/xla_compiled_cpu_function_thunks.cc +++ b/tensorflow/compiler/tf2xla/xla_compiled_cpu_function_thunks.cc @@ -29,7 +29,6 @@ limitations under the License. #include "xla/backends/cpu/runtime/function_library.h" #include "xla/service/cpu/cpu_aot_compilation_result.h" #include "xla/tsl/concurrency/async_value_ref.h" -#include "xla/tsl/platform/status.h" namespace tensorflow { @@ -47,17 +46,17 @@ XlaCompiledCpuFunctionThunks::XlaCompiledCpuFunctionThunks( std::move(function_library)); // To load a CPU executable we don't need a compiler or a stream executor. - TF_CHECK_OK(aot_compilation_result.status()); + CHECK_OK(aot_compilation_result.status()); // NO_CDC: aot_compilation_result is checked to be OK above. auto cpu_executable = std::move(*aot_compilation_result.value()) - .LoadExecutable(nullptr, nullptr); + .LoadExecutable(/*stream_exec=*/nullptr); - TF_CHECK_OK(cpu_executable.status()); + CHECK_OK(cpu_executable.status()); auto executable_or_err = // NO_CDC: cpu_executable is checked to be OK above. xla::cpu::NanoRtExecutable::Create(std::move(cpu_executable.value())); - TF_CHECK_OK(executable_or_err.status()); + CHECK_OK(executable_or_err.status()); // NO_CDC: executable_or_err is checked to be OK above. executable_ = std::move(executable_or_err.value()); } diff --git a/tensorflow/compiler/tf2xla/xla_compiler.h b/tensorflow/compiler/tf2xla/xla_compiler.h index 216125f9cb153e..b9abd5006a958a 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.h +++ b/tensorflow/compiler/tf2xla/xla_compiler.h @@ -226,7 +226,7 @@ class XlaCompiler { // This must be a shared_ptr, as this is passed all the way down to the // cluster compilation. This allows asynchronous compilation to hold a // reference until the compilation is finished. - std::shared_ptr device_allocator; + std::shared_ptr device_allocator; // Alias input and output buffers for parameters that are passed-through XLA // modules without being changed. diff --git a/tensorflow/compiler/tf2xla/xla_context.cc b/tensorflow/compiler/tf2xla/xla_context.cc index fad607b1ae1333..16289828892460 100644 --- a/tensorflow/compiler/tf2xla/xla_context.cc +++ b/tensorflow/compiler/tf2xla/xla_context.cc @@ -15,23 +15,29 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/xla_context.h" +#include +#include #include +#include #include #include +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" #include "absl/types/span.h" -#include "tensorflow/compiler/tf2xla/literal_util.h" -#include "tensorflow/compiler/tf2xla/shape_util.h" #include "tensorflow/compiler/tf2xla/type_util.h" +#include "tensorflow/compiler/tf2xla/xla_expression.h" #include "tensorflow/compiler/tf2xla/xla_helpers.h" -#include "xla/client/client_library.h" +#include "tensorflow/compiler/tf2xla/xla_resource.h" #include "xla/hlo/builder/xla_builder.h" #include "xla/hlo/builder/xla_computation.h" -#include "xla/layout_util.h" -#include "xla/literal.h" -#include "tensorflow/core/common_runtime/dma_helper.h" -#include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/platform/statusor.h" +#include "xla/shape_util.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/platform/errors.h" namespace tensorflow { @@ -44,8 +50,8 @@ const char XlaContext::kXlaContextResourceName[] = "_xla_context"; // per-step context is looked up in the resource manager. The // JIT will prepopulate the JITContext. XlaContext* context; - TF_CHECK_OK(ctx->step_container()->Lookup(ctx->resource_manager(), - kXlaContextResourceName, &context)); + CHECK_OK(ctx->step_container()->Lookup(ctx->resource_manager(), + kXlaContextResourceName, &context)); // The resource manager handed us a fresh reference to 'context', but retains // a reference itself so the context won't be freed. The resource manager will // outlive the JIT compilation. @@ -88,7 +94,7 @@ const xla::XlaComputation* XlaContext::GetOrCreateMax(const DataType type) { VLOG(1) << "Building Max() for " << type_string; xla::XlaBuilder b("max<" + type_string + ">"); xla::PrimitiveType xla_type; - TF_CHECK_OK(DataTypeToPrimitiveType(type, &xla_type)); + CHECK_OK(DataTypeToPrimitiveType(type, &xla_type)); auto x = xla::Parameter(&b, 0, xla::ShapeUtil::MakeShape(xla_type, {}), "x"); auto y = @@ -104,7 +110,7 @@ const xla::XlaComputation* XlaContext::GetOrCreateMin(const DataType type) { VLOG(1) << "Building Min() for " << type_string; xla::XlaBuilder b("min<" + type_string + ">"); xla::PrimitiveType xla_type; - TF_CHECK_OK(DataTypeToPrimitiveType(type, &xla_type)); + CHECK_OK(DataTypeToPrimitiveType(type, &xla_type)); auto x = xla::Parameter(&b, 0, xla::ShapeUtil::MakeShape(xla_type, {}), "x"); auto y = @@ -120,7 +126,7 @@ const xla::XlaComputation* XlaContext::GetOrCreateAdd(const DataType type) { VLOG(1) << "Building Add() for " << type_string; xla::XlaBuilder b("add<" + type_string + ">"); xla::PrimitiveType xla_type; - TF_CHECK_OK(DataTypeToPrimitiveType(type, &xla_type)); + CHECK_OK(DataTypeToPrimitiveType(type, &xla_type)); auto x = xla::Parameter(&b, 0, xla::ShapeUtil::MakeShape(xla_type, {}), "x"); auto y = @@ -137,7 +143,7 @@ const xla::XlaComputation* XlaContext::GetOrCreateLogAddExp( VLOG(1) << "Building LogAddExp() for " << type_string; xla::XlaBuilder b("log_add_exp<" + type_string + ">"); xla::PrimitiveType xla_type; - TF_CHECK_OK(DataTypeToPrimitiveType(type, &xla_type)); + CHECK_OK(DataTypeToPrimitiveType(type, &xla_type)); auto x = xla::Parameter(&b, 0, xla::ShapeUtil::MakeShape(xla_type, {}), "x"); auto y = @@ -158,7 +164,7 @@ const xla::XlaComputation* XlaContext::GetOrCreateMul(const DataType type) { VLOG(1) << "Building Mul() for " << type_string; xla::XlaBuilder b("mul<" + type_string + ">"); xla::PrimitiveType xla_type; - TF_CHECK_OK(DataTypeToPrimitiveType(type, &xla_type)); + CHECK_OK(DataTypeToPrimitiveType(type, &xla_type)); auto x = xla::Parameter(&b, 0, xla::ShapeUtil::MakeShape(xla_type, {}), "x"); auto y = diff --git a/tensorflow/compiler/tf2xla/xla_helpers.cc b/tensorflow/compiler/tf2xla/xla_helpers.cc index 45814517342abc..0250d423296ede 100644 --- a/tensorflow/compiler/tf2xla/xla_helpers.cc +++ b/tensorflow/compiler/tf2xla/xla_helpers.cc @@ -17,58 +17,74 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/xla_helpers.h" -#include +#include +#include #include #include +#include +#include "absl/container/btree_map.h" +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/str_cat.h" #include "absl/synchronization/notification.h" +#include "absl/time/time.h" #include "absl/types/span.h" #include "tensorflow/compiler/tf2xla/lib/util.h" -#include "tensorflow/compiler/tf2xla/literal_util.h" #include "tensorflow/compiler/tf2xla/shape_util.h" #include "tensorflow/compiler/tf2xla/type_util.h" -#include "xla/backends/gpu/collectives/gpu_clique_key.h" #include "xla/core/collectives/clique_id.h" #include "xla/core/collectives/clique_key.h" -#include "xla/hlo/builder/lib/arithmetic.h" -#include "xla/hlo/builder/lib/constants.h" +#include "xla/executable_run_options.h" #include "xla/hlo/builder/xla_builder.h" #include "xla/hlo/builder/xla_computation.h" +#include "xla/literal.h" +#include "xla/literal_util.h" +#include "xla/service/computation_placer.h" #include "xla/service/gpu/gpu_executable_run_options.h" +#include "xla/shape.h" +#include "xla/shape_util.h" #include "xla/stream_executor/stream.h" #include "xla/stream_executor/stream_executor.h" -#include "xla/types.h" +#include "xla/tsl/platform/errors.h" #include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/framework/collective.h" #include "tensorflow/core/framework/device.h" +#include "tensorflow/core/framework/device_base.h" #include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/types.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/platform/refcount.h" namespace tensorflow { xla::XlaOp XlaHelpers::Zero(xla::XlaBuilder* b, DataType data_type) { xla::PrimitiveType type; - TF_CHECK_OK(DataTypeToPrimitiveType(data_type, &type)); + CHECK_OK(DataTypeToPrimitiveType(data_type, &type)); return xla::ConstantLiteral(b, xla::LiteralUtil::Zero(type)); } xla::XlaOp XlaHelpers::One(xla::XlaBuilder* b, DataType data_type) { xla::PrimitiveType type; - TF_CHECK_OK(DataTypeToPrimitiveType(data_type, &type)); + CHECK_OK(DataTypeToPrimitiveType(data_type, &type)); return xla::ConstantLiteral(b, xla::LiteralUtil::One(type)); } xla::XlaOp XlaHelpers::IntegerLiteral(xla::XlaBuilder* b, DataType data_type, int64_t value) { xla::PrimitiveType type; - TF_CHECK_OK(DataTypeToPrimitiveType(data_type, &type)); + CHECK_OK(DataTypeToPrimitiveType(data_type, &type)); return ::tensorflow::IntegerLiteral(b, type, value); } xla::XlaOp XlaHelpers::FloatLiteral(xla::XlaBuilder* b, DataType data_type, double value) { xla::PrimitiveType type; - TF_CHECK_OK(DataTypeToPrimitiveType(data_type, &type)); + CHECK_OK(DataTypeToPrimitiveType(data_type, &type)); return ::tensorflow::FloatLiteral(b, type, value); } @@ -139,7 +155,7 @@ DataType XlaHelpers::SumAccumulationType(const DataType& dtype) { xla::XlaOp XlaHelpers::ConvertElementType(const xla::XlaOp operand, const DataType new_element_type) { xla::PrimitiveType convert_to; - TF_CHECK_OK(DataTypeToPrimitiveType(new_element_type, &convert_to)); + CHECK_OK(DataTypeToPrimitiveType(new_element_type, &convert_to)); return xla::ConvertElementType(operand, convert_to); } diff --git a/tensorflow/compiler/tf2xla/xla_op_registry.cc b/tensorflow/compiler/tf2xla/xla_op_registry.cc index c74db865769229..f8e85ba81f677a 100644 --- a/tensorflow/compiler/tf2xla/xla_op_registry.cc +++ b/tensorflow/compiler/tf2xla/xla_op_registry.cc @@ -15,9 +15,14 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/xla_op_registry.h" -#include +#include +#include #include +#include #include +#include +#include +#include #include "absl/algorithm/container.h" #include "absl/log/check.h" @@ -28,6 +33,7 @@ limitations under the License. #include "absl/types/span.h" #include "tensorflow/compiler/jit/flags.h" #include "tensorflow/compiler/jit/xla_cluster_util.h" +#include "xla/tsl/platform/errors.h" #include "xla/util.h" #include "tensorflow/core/common_runtime/next_pluggable_device/next_pluggable_device_factory.h" #include "tensorflow/core/framework/device_base.h" @@ -42,11 +48,7 @@ limitations under the License. #include "tensorflow/core/framework/types.h" #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/platform/mutex.h" -#include "tensorflow/core/platform/status.h" -#include "tensorflow/core/platform/types.h" #include "tensorflow/core/tfrt/common/pjrt_util.h" -#include "tsl/platform/errors.h" -#include "tsl/platform/status.h" namespace tensorflow { @@ -265,7 +267,7 @@ void XlaOpRegistry::RegisterCompilationKernels() { "Ops registered: \n" + dynamic_cast(op_registry)->DebugString(true)); } - TF_CHECK_OK(lookup_status); + CHECK_OK(lookup_status); std::unordered_set type_attrs; for (const OpDef::AttrDef& attr_def : op_def->attr()) { @@ -475,7 +477,7 @@ XlaOpRegistry::CompileTimeConstantInputArgNames(const std::string& op) { } } else { int start, stop; - TF_CHECK_OK(op_kernel->InputRange(input, &start, &stop)); + CHECK_OK(op_kernel->InputRange(input, &start, &stop)); for (int i = start; i < stop; ++i) { result->push_back(i); } diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index b76a4ffd8955b9..183b5289a1736c 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -180,23 +180,23 @@ tf_proto_library( srcs = [], create_go_proto = False, make_default_target_header_only = True, - protodeps = [ + visibility = ["//visibility:public"], + deps = [ "//tensorflow/core/example:protos_all", "//tensorflow/core/framework:protos_all", + "//tensorflow/core/grappler/costs:op_performance_data", "//tensorflow/core/lib/core:error_codes_proto", "//tensorflow/core/profiler:profiler_options_proto", "//tensorflow/core/protobuf:error_codes_proto_impl", "//tensorflow/core/protobuf:for_core_protos", "//tensorflow/core/util:protos_all", "//tensorflow/core/util:test_log_proto", - "//tensorflow/core/grappler/costs:op_performance_data", "@local_tsl//tsl/profiler/protobuf:profiler_options_proto", "@local_tsl//tsl/profiler/protobuf:xplane_proto", "@local_xla//xla/tsl/protobuf:coordination_config_proto", "@local_xla//xla/tsl/protobuf:distributed_runtime_payloads_proto", "@local_xla//xla/tsl/protobuf:status_proto", ], - visibility = ["//visibility:public"], ) tf_jspb_proto_library( diff --git a/tensorflow/core/common_runtime/device/device_event_mgr_test.cc b/tensorflow/core/common_runtime/device/device_event_mgr_test.cc index d252b541fcddfb..e485fb8c7d31b0 100644 --- a/tensorflow/core/common_runtime/device/device_event_mgr_test.cc +++ b/tensorflow/core/common_runtime/device/device_event_mgr_test.cc @@ -184,12 +184,12 @@ class EMBenchmarkHelper { // The rest of these are one per chain. NodeDef add_node_def_; NodeDef id_node_def_; - gtl::InlinedVector add_inputs_; + absl::InlinedVector add_inputs_; std::vector allocator_attrs_; - gtl::InlinedVector gpu_inputs_; - gtl::InlinedVector gpu_outputs_; - gtl::InlinedVector host_inputs_; - gtl::InlinedVector host_outputs_; + absl::InlinedVector gpu_inputs_; + absl::InlinedVector gpu_outputs_; + absl::InlinedVector host_inputs_; + absl::InlinedVector host_outputs_; public: // Length of tensors. TODO(tucker): make this a variable parameter. @@ -242,7 +242,7 @@ class EMBenchmarkHelper { } std::unique_ptr GetOpKernel(const NodeDef& node_def, - Status* status) { + absl::Status* status) { return CreateOpKernel("GPU", gpu_helper_->gpu(), gpu_helper_->gpu_allocator(), node_def, TF_GRAPH_DEF_VERSION, status); @@ -256,7 +256,7 @@ class EMBenchmarkHelper { .Device("/job:a/replica:0/task:0/GPU:0") .Finalize(&add_node_def_)); } - Status status; + absl::Status status; add_kernels_.emplace_back(GetOpKernel(add_node_def_, &status)); TF_ASSERT_OK(status); add_params_.push_back(new OpKernelContext::Params); @@ -385,12 +385,12 @@ class EMBenchmarkHelper { gpu_helper_->h2d_stream()->WaitFor(gpu_helper_->compute_stream())); // Begin by copying the input values from CPU to GPU. const int64_t src_bytes = host_inputs_[0].TotalBytes(); - se::DeviceMemoryBase gpu_dst_ptr0(DMAHelper::base(&gpu_inputs_[0]), - src_bytes); + stream_executor::DeviceAddressBase gpu_dst_ptr0( + DMAHelper::base(&gpu_inputs_[0]), src_bytes); TF_ASSERT_OK(gpu_helper_->h2d_stream()->Memcpy( &gpu_dst_ptr0, DMAHelper::base(&host_inputs_[0]), src_bytes)); - se::DeviceMemoryBase gpu_dst_ptr1(DMAHelper::base(&gpu_inputs_[1]), - src_bytes); + stream_executor::DeviceAddressBase gpu_dst_ptr1( + DMAHelper::base(&gpu_inputs_[1]), src_bytes); TF_ASSERT_OK(gpu_helper_->h2d_stream()->Memcpy( &gpu_dst_ptr1, DMAHelper::base(&host_inputs_[1]), src_bytes)); TF_ASSERT_OK( @@ -421,8 +421,8 @@ class EMBenchmarkHelper { TF_ASSERT_OK( gpu_helper_->d2h_stream()->WaitFor(gpu_helper_->compute_stream())); const int64_t return_bytes = ctx->mutable_output(0)->TotalBytes(); - se::DeviceMemoryBase gpu_src_ptr(DMAHelper::base(ctx->mutable_output(0)), - return_bytes); + stream_executor::DeviceAddressBase gpu_src_ptr( + DMAHelper::base(ctx->mutable_output(0)), return_bytes); TF_ASSERT_OK(gpu_helper_->d2h_stream()->Memcpy( DMAHelper::base(&host_outputs_[0]), gpu_src_ptr, return_bytes)); gpu_helper_->event_mgr()->ThenExecute(gpu_helper_->d2h_stream(), diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD index d285bb2f8740d1..ed5ab0149ecbee 100644 --- a/tensorflow/core/common_runtime/eager/BUILD +++ b/tensorflow/core/common_runtime/eager/BUILD @@ -272,7 +272,7 @@ tf_cuda_library( "//tensorflow/core/framework:resource_base", "@local_xla//xla/pjrt/distributed:key_value_store_interface", "@local_xla//xla/pjrt:local_device_state", - "@local_xla//xla/pjrt/gpu:gpu_topology", + "@local_xla//xla/service:gpu_topology", "@local_xla//xla/pjrt:pjrt_client", "@local_xla//xla/pjrt:pjrt_compiler", "@local_xla//xla/service/gpu:gpu_executable_run_options", diff --git a/tensorflow/core/common_runtime/eager/attr_builder.cc b/tensorflow/core/common_runtime/eager/attr_builder.cc index 9852cce5ee3413..e7700d1076c132 100644 --- a/tensorflow/core/common_runtime/eager/attr_builder.cc +++ b/tensorflow/core/common_runtime/eager/attr_builder.cc @@ -35,13 +35,14 @@ namespace { mutex g_op_name_to_attr_type_map_lock(LINKER_INITIALIZED); -tensorflow::gtl::FlatMap* OpNameToAttrTypeMap() { +tensorflow::gtl::FlatMap* +OpNameToAttrTypeMap() { static auto* const m = - new tensorflow::gtl::FlatMap; + new tensorflow::gtl::FlatMap; return m; } -const uint32 kIsList = 1U << 31; +const uint32_t kIsList = 1U << 31; AttrTypeMap* DefaultFunctionAttrTypeMap() { AttrTypeMap* map = new AttrTypeMap(); @@ -57,7 +58,7 @@ const AttrTypeMap* GetDefaultFunctionAttrTypeMap() { } // namespace -absl::Status OpDefForOp(const string& op_name, const OpDef** op_def) { +absl::Status OpDefForOp(const std::string& op_name, const OpDef** op_def) { const OpRegistrationData* op_reg_data = nullptr; absl::Status s = OpRegistry::Global()->LookUp(op_name, &op_reg_data); if (s.ok()) { @@ -102,12 +103,12 @@ absl::Status AttrTypeMapForOp(const char* op_name, const AttrTypeMap** out, // TODO(agarwal): Avoid having to create this "registry" at runtime, // perhaps can be done at op registration time? for (const auto& attr : op_def->attr()) { - string type = attr.type(); + std::string type = attr.type(); const bool is_list = (type.length() > 6 && type.compare(0, 4, "list") == 0); if (is_list) { type = type.substr(5, type.length() - 6); } - uint32 t = is_list ? kIsList : 0; + uint32_t t = is_list ? kIsList : 0; if (type == "string") { t |= TF_ATTR_STRING; } else if (type == "int") { @@ -163,7 +164,7 @@ DEFINE_GET_ATTR(tensorflow::DataType, type, "type"); template <> absl::Status AttrBuilder::Get(absl::string_view attr_name, absl::InlinedVector* value) const { - auto it = encoded_attrs_.find(string(attr_name)); + auto it = encoded_attrs_.find(std::string(attr_name)); if (it == encoded_attrs_.end()) { return errors::NotFound("No attr named '", attr_name, "' found in AttrBuilder for ", op_name_); @@ -207,7 +208,7 @@ void AttrBuilder::FillAttrValueMap(AttrValueMap* m) const { namespace { -bool ValueMatchesDefault(const OpDef* op_def, const string& attr_name, +bool ValueMatchesDefault(const OpDef* op_def, const std::string& attr_name, const AttrValue& attr_value) { // TODO(iga): It might make sense to augment OpRegistrationData with a // {attr_name -> default_attr_value} FlatMap to avoid the loop here. @@ -238,7 +239,7 @@ void AttrBuilder::FillAttrValueMapWithoutDefaults(AttrValueMap* m) const { void AttrBuilder::AddAttrIfNotPresent(absl::string_view attr_name, const AttrValue& value) { - encoded_attrs_.emplace(string(attr_name), value.SerializeAsString()); + encoded_attrs_.emplace(std::string(attr_name), value.SerializeAsString()); } const NodeDef& AttrBuilder::BuildNodeDef() { @@ -260,7 +261,7 @@ void AttrBuilder::CopyAttributes(const AttrBuilder& other) { other.encoded_attrs_.end()); } -absl::Status AttrTypeByName(const AttrTypeMap& m, const string& attr_name, +absl::Status AttrTypeByName(const AttrTypeMap& m, const std::string& attr_name, TF_AttrType* out, unsigned char* is_list) { auto* t = gtl::FindOrNull(m, attr_name); if (t == nullptr) { @@ -290,7 +291,7 @@ inline tensorflow::Fprint128 CacheKeyHelper(absl::string_view s, return FingerprintCat128(a, b); } -inline tensorflow::Fprint128 CacheKeyHelper(absl::string_view s, uint64 b) { +inline tensorflow::Fprint128 CacheKeyHelper(absl::string_view s, uint64_t b) { return CacheKeyHelper(s, {b, b}); } @@ -299,7 +300,7 @@ inline tensorflow::Fprint128 CacheKeyHelper(absl::string_view s, uint64 b) { tensorflow::Fprint128 AttrBuilder::CacheKey(const absl::string_view device) { if (!cached_cache_key_ || device != device_for_cached_cache_key_) { cached_cache_key_ = BuildCacheKeyForDevice(device); - device_for_cached_cache_key_ = string(device); + device_for_cached_cache_key_ = std::string(device); } return *cached_cache_key_; diff --git a/tensorflow/core/common_runtime/eager/attr_builder.h b/tensorflow/core/common_runtime/eager/attr_builder.h index 9dc480d8c8187a..bdd644a6331ca6 100644 --- a/tensorflow/core/common_runtime/eager/attr_builder.h +++ b/tensorflow/core/common_runtime/eager/attr_builder.h @@ -40,10 +40,10 @@ namespace tensorflow { // If the type is not a list type, the value is the same as the TF_AttrType type // of the value. Else, the highest order bit is on, and the rest of the bits // represent the TF_AttrType type of the values in the list. -typedef std::unordered_map AttrTypeMap; +typedef std::unordered_map AttrTypeMap; // Look up OpDef for `op_name`. -absl::Status OpDefForOp(const string& op_name, const OpDef** op_def); +absl::Status OpDefForOp(const std::string& op_name, const OpDef** op_def); // Returns the AttrTypeMap for the TensorFlow operation named op_name. // If op_name is not registered in global op registry, AttrTypeMapForOp assumes @@ -53,7 +53,7 @@ absl::Status AttrTypeMapForOp(const char* op_name, const AttrTypeMap** out, bool* is_function); // Looks for 'attr_name' in 'm' and sets 'out' and 'is_list'. -absl::Status AttrTypeByName(const AttrTypeMap& m, const string& attr_name, +absl::Status AttrTypeByName(const AttrTypeMap& m, const std::string& attr_name, TF_AttrType* out, unsigned char* is_list); // KernelAndDevice::Init needs a NodeDef only to pass the attribute map through. @@ -111,8 +111,8 @@ class AttrBuilder : public AbstractOpAttrs { device_for_cached_cache_key_.clear(); } - const string& op_name() const { return op_name_; } - void set_op_name(const string& name) { op_name_ = name; } + const std::string& op_name() const { return op_name_; } + void set_op_name(const std::string& name) { op_name_ = name; } // Needed to work around call to ValidateNodeDef in CreateOpKernel. AttrBuilder& NumInputs(int n); @@ -186,7 +186,7 @@ class AttrBuilder : public AbstractOpAttrs { tensorflow::Fprint128 BuildCacheKeyForDevice(absl::string_view device) const; template - void SetInAttrValueMap(AttrValueMap* m, const string& attr_name, + void SetInAttrValueMap(AttrValueMap* m, const std::string& attr_name, T&& value) const { DCHECK(!node_def_finalized_) << "Calling SetInAttrValueMap after BuildNodeDef."; @@ -196,17 +196,17 @@ class AttrBuilder : public AbstractOpAttrs { void AddAttrIfNotPresent(absl::string_view attr_name, const AttrValue& value); - gtl::FlatMap encoded_attrs_; + gtl::FlatMap encoded_attrs_; mutable AttrValue attr_tmp_; // For encoding - string op_name_; + std::string op_name_; int num_inputs_; NodeDef node_def_; bool node_def_initialized_; bool node_def_finalized_; std::optional cached_cache_key_; - string device_for_cached_cache_key_; + std::string device_for_cached_cache_key_; }; template <> diff --git a/tensorflow/core/common_runtime/eager/attr_builder_test.cc b/tensorflow/core/common_runtime/eager/attr_builder_test.cc index 77462842f493a2..e0a35cfc59c524 100644 --- a/tensorflow/core/common_runtime/eager/attr_builder_test.cc +++ b/tensorflow/core/common_runtime/eager/attr_builder_test.cc @@ -85,8 +85,8 @@ TEST(AttrTypeMap, CacheKey) { ASSERT_FALSE(cache_key == a.CacheKey("cpu:0")); } -string ToString(const AttrValueMap& m) { - std::vector strs; +std::string ToString(const AttrValueMap& m) { + std::vector strs; for (const auto& e : m) { strs.push_back(absl::StrCat(e.first, " -> ", e.second.DebugString())); } diff --git a/tensorflow/core/common_runtime/eager/context.cc b/tensorflow/core/common_runtime/eager/context.cc index fe649546530f3c..358c51f22c098e 100644 --- a/tensorflow/core/common_runtime/eager/context.cc +++ b/tensorflow/core/common_runtime/eager/context.cc @@ -150,7 +150,8 @@ EagerContext::EagerContext( allow_soft_placement_(opts.config.allow_soft_placement()), num_active_steps_(0), step_container_(std::make_unique( - 0, [this](const string& name) { ClearResourceContainer(name); })), + 0, + [this](const std::string& name) { ClearResourceContainer(name); })), default_executor_(async, /*enable_streaming_enqueue=*/!opts.config.experimental() .disable_eager_executor_streaming_enqueue()), @@ -198,7 +199,7 @@ AbstractTensorInterface* EagerContext::CreateInt64Scalar(int64_t value) { return new TensorInterface(Tensor(value)); } -AbstractTensorInterface* EagerContext::CreateUint64Scalar(uint64 value) { +AbstractTensorInterface* EagerContext::CreateUint64Scalar(uint64_t value) { return new TensorInterface(Tensor(value)); } @@ -285,8 +286,9 @@ void EagerContext::InitPrioritizedDeviceTypeList() { namespace { // Using absl::StrJoin with lambda does not work in tf-lite builds. // TODO(b/148160441): Replace with absl::StrJoin once DeviceBase has operator<<. -std::vector DevicesToString(const PrioritizedDeviceVector& devices) { - std::vector v; +std::vector DevicesToString( + const PrioritizedDeviceVector& devices) { + std::vector v; v.reserve(devices.size()); for (const auto& p : devices) { v.push_back(p.first->name()); @@ -294,9 +296,9 @@ std::vector DevicesToString(const PrioritizedDeviceVector& devices) { return v; } -std::vector DeviceTypesToString( +std::vector DeviceTypesToString( const PrioritizedDeviceTypeVector& types) { - std::vector v; + std::vector v; v.reserve(types.size()); for (const auto& p : types) { v.push_back(p.first.type_string()); @@ -316,8 +318,8 @@ std::vector DeviceTypesToString( Device* SelectBestMatchingDevice(const DeviceNameUtils::ParsedName& pattern, const PrioritizedDeviceVector& existing, const PrioritizedDeviceTypeVector& supported) { - for (const std::pair& prioritized_type : supported) { - for (const std::pair& prioritized_device : existing) { + for (const std::pair& prioritized_type : supported) { + for (const std::pair& prioritized_device : existing) { Device* dev = prioritized_device.first; if (DeviceType(dev->attributes().device_type()) == prioritized_type.first && @@ -485,7 +487,7 @@ void EagerContext::ClearCachesAndDefaultExecutor() { { mutex_lock ml(metadata_mu_); step_container_ = std::make_unique( - 0, [this](const string& name) { ClearResourceContainer(name); }); + 0, [this](const std::string& name) { ClearResourceContainer(name); }); } } @@ -509,7 +511,7 @@ ContextDevicePlacementPolicy EagerContext::GetDevicePlacementPolicy() const { } #if !defined(IS_MOBILE_PLATFORM) -std::vector EagerContext::GetRemoteContexts() { +std::vector EagerContext::GetRemoteContexts() { tf_shared_lock l(remote_state_mu_); return remote_contexts_; } @@ -520,9 +522,9 @@ bool EagerContext::IsRemoteContextsEmpty() { } void EagerContext::CloseAndClearAllRemoteContexts() { - uint64 context_id; - uint64 context_view_id; - std::vector remote_contexts_copy; + uint64_t context_id; + uint64_t context_view_id; + std::vector remote_contexts_copy; { mutex_lock l(remote_state_mu_); if (!is_master_) return; @@ -541,8 +543,8 @@ void EagerContext::CloseAndClearAllRemoteContexts() { } void EagerContext::CloseRemoteContexts( - const std::vector& remote_contexts, uint64 context_id, - uint64 context_view_id) { + const std::vector& remote_contexts, uint64_t context_id, + uint64_t context_view_id) { // Close all remote contexts. eager::CloseContextRequest request; request.set_context_id(context_id); @@ -689,21 +691,22 @@ EagerContext::~EagerContext() { } } -bool EagerContext::FindFunctionByName(const string& name) const { +bool EagerContext::FindFunctionByName(const std::string& name) const { return func_lib_def_.Find(name) != nullptr; } absl::Status EagerContext::FindFunctionOpData( - const string& name, const tensorflow::OpRegistrationData** op_data) { + const std::string& name, const tensorflow::OpRegistrationData** op_data) { return func_lib_def_.LookUp(name, op_data); } -const FunctionDef* EagerContext::FindFunctionDef(const string& name) const { +const FunctionDef* EagerContext::FindFunctionDef( + const std::string& name) const { return func_lib_def_.Find(name); } core::RefCountPtr EagerContext::FindRecord( - const string& name) const { + const std::string& name) const { return func_lib_def_.FindRecord(name); } @@ -763,7 +766,7 @@ std::vector EagerContext::ListAllTfDevices() { // Since remote_device_mgr may also contain local devices, make sure no // duplicated device is returned. std::vector devices; - std::unordered_set dev_names; + std::unordered_set dev_names; if (local_device_mgr()) { for (const auto& dev : local_device_mgr()->ListDevices()) { @@ -832,7 +835,7 @@ void EagerContext::EndStep() { // TODO(b/139809335): This does not properly clean up remote resources // Clean up the previous step container and create a new one. step_container_ = std::make_unique( - 0, [this](const string& name) { ClearResourceContainer(name); }); + 0, [this](const std::string& name) { ClearResourceContainer(name); }); } } @@ -880,7 +883,7 @@ absl::Status EagerContext::MaybeRegisterFunctionRemotely( } absl::Status EagerContext::MaybeRemoveFunctionRemotely( - const string& function_name) { + const std::string& function_name) { // Only client context can remove function on remote worker context. if (!remote_device_manager_.Owned()) { return absl::OkStatus(); @@ -917,10 +920,10 @@ absl::Status EagerContext::MaybeRemoveFunctionRemotely( } absl::Status EagerContext::RegisterExistingFunctionsOnRemoteWorkers( - const std::vector& remote_workers) { + const std::vector& remote_workers) { #if !defined(IS_MOBILE_PLATFORM) // Register multiple functions on selected remote workers. - uint64 context_id = GetContextId(); + uint64_t context_id = GetContextId(); FunctionDefLibrary function_defs = func_lib_def_.ToProto(); std::vector> requests( function_defs.function_size()); @@ -1079,16 +1082,17 @@ absl::Status EagerContext::AddComponentFunction( return absl::OkStatus(); } -const FunctionDef* EagerContext::GetFunctionDef(const string& function_name) { +const FunctionDef* EagerContext::GetFunctionDef( + const std::string& function_name) { return func_lib_def_.Find(function_name); } -std::vector EagerContext::ListFunctionNames() { +std::vector EagerContext::ListFunctionNames() { return func_lib_def_.ListFunctionNames(); } absl::Status EagerContext::AddRemoveFunctionNotifier( - const string& func, std::function notifier) { + const std::string& func, std::function notifier) { mutex_lock l(remove_function_notifiers_mu_); auto iter = remove_function_notifiers_.find(func); if (iter != remove_function_notifiers_.end()) { @@ -1122,7 +1126,7 @@ EagerContext::GetCacheStats() { return stats; } -absl::Status EagerContext::RemoveFunction(const string& func) { +absl::Status EagerContext::RemoveFunction(const std::string& func) { // TODO(mdan): The context owns these functions. Why check refcount then? std::vector> notifiers; bool is_last_ref = false; @@ -1308,14 +1312,14 @@ absl::Status EagerContext::FindCompositeDeviceFromName( return errors::NotFound("Unknown composite device: ", device_name); } -bool EagerContext::IsCustomDevice(const string& device_name) { +bool EagerContext::IsCustomDevice(const std::string& device_name) { CustomDevice* device = nullptr; return custom_device_op_handler_.FindCustomDeviceFromName(device_name, &device); } absl::Status EagerContext::RegisterCustomDevice( - const string& device_name, std::unique_ptr device) { + const std::string& device_name, std::unique_ptr device) { Device* existing_physical_device = nullptr; if (FindDeviceFromName(device_name.c_str(), &existing_physical_device).ok()) { return errors::AlreadyExists(device_name, @@ -1326,14 +1330,15 @@ absl::Status EagerContext::RegisterCustomDevice( } absl::Status EagerContext::FindOrCreateCompositeDevice( - const std::vector& underlying_devices, const string& device_name, - CompositeDevice** composite_device) { + const std::vector& underlying_devices, + const std::string& device_name, CompositeDevice** composite_device) { if (!device_name.empty() && FindCompositeDeviceFromName(device_name, composite_device).ok()) { return absl::OkStatus(); } - const uint64 hash_key = Fingerprint64(absl::StrJoin(underlying_devices, ",")); + const uint64_t hash_key = + Fingerprint64(absl::StrJoin(underlying_devices, ",")); mutex_lock l(composite_devices_mu_); auto iter = composite_devices_.find(hash_key); @@ -1371,14 +1376,14 @@ bool EagerContext::OnSameTask(const Device* first, const Device* second) const { // Gets the CPU device on the task of device. absl::Status EagerContext::CPUDeviceOnTask(const Device* device, Device** cpu_device) const { - string cpu_device_name; + std::string cpu_device_name; TF_RETURN_IF_ERROR(DeviceNameUtils::DeviceNameToCpuDeviceName( device->name(), &cpu_device_name)); return FindDeviceFromName(cpu_device_name.c_str(), cpu_device); } -void EagerContext::ClearResourceContainer(const string& name) { +void EagerContext::ClearResourceContainer(const std::string& name) { // TODO(b/139809335): This does not properly clean up remote resources auto local_devices = local_device_mgr()->ListDevices(); for (Device* device : local_devices) { @@ -1406,8 +1411,8 @@ void EagerContext::UpdateGlobalRendezvousDeviceManager( } namespace { -absl::Status GetTaskName(Device* d, string* task_name) { - string ignored; +absl::Status GetTaskName(Device* d, std::string* task_name) { + std::string ignored; if (!DeviceNameUtils::SplitDeviceName(d->name(), task_name, &ignored)) { return errors::InvalidArgument("Unable to parse device name: ", d->name()); } @@ -1425,7 +1430,7 @@ absl::Status EagerContext::GetClient( absl::Status EagerContext::GetClient( const DeviceNameUtils::ParsedName& device_name, core::RefCountPtr* client) { - string device_task_name; + std::string device_task_name; if (!DeviceNameUtils::GetTaskName(device_name, &device_task_name)) { return errors::InvalidArgument( "Task is not fully specified in device name: ", @@ -1457,7 +1462,8 @@ absl::Status EagerContext::GetClient( } absl::Status EagerContext::GetClient( - const string& remote_task, core::RefCountPtr* client) { + const std::string& remote_task, + core::RefCountPtr* client) { { tf_shared_lock l(remote_state_mu_); if (remote_eager_workers_ == nullptr) { @@ -1474,12 +1480,12 @@ absl::Status EagerContext::GetClient( return absl::OkStatus(); } -uint64 EagerContext::GetContextId() const { +uint64_t EagerContext::GetContextId() const { tf_shared_lock l(remote_state_mu_); return context_id_; } -uint64 EagerContext::GetContextViewId() const { +uint64_t EagerContext::GetContextViewId() const { tf_shared_lock l(remote_state_mu_); return context_view_id_; } @@ -1544,9 +1550,10 @@ absl::Status EagerContext::StoreCollectiveOpsServer( } absl::Status EagerContext::SetRemoteDeviceFilters( - const string& remote_worker, const std::vector& device_filters) { + const std::string& remote_worker, + const std::vector& device_filters) { // Get fully specified task name for remote worker - string remote_worker_task_name; + std::string remote_worker_task_name; DeviceNameUtils::ParsedName pw; if (!DeviceNameUtils::ParseFullName(remote_worker, &pw)) { return tensorflow::errors::InvalidArgument( @@ -1583,7 +1590,7 @@ absl::Status EagerContext::SetRemoteDeviceFilters( } void EagerContext::FilterDevicesForRemoteWorkers( - const string& remote_worker, + const std::string& remote_worker, const protobuf::RepeatedPtrField& device_attrs, std::vector* filtered_device_mask) { filtered_device_mask->resize(device_attrs.size()); @@ -1634,7 +1641,7 @@ absl::Status EagerContext::InitializeRemoteMaster( std::shared_ptr worker_session, std::unique_ptr remote_eager_workers, std::unique_ptr remote_device_manager, - const std::vector& remote_contexts, uint64 context_id, + const std::vector& remote_contexts, uint64_t context_id, tsl::core::RefCountPtr r, DeviceMgr* local_device_mgr, int keep_alive_secs, DistributedFunctionLibraryRuntime* cluster_flr, std::unique_ptr> @@ -1661,10 +1668,10 @@ absl::Status EagerContext::InitializeRemoteMaster( } absl::Status EagerContext::UpdateRemoteMaster( - uint64 context_id, + uint64_t context_id, std::unique_ptr remote_eager_workers, - const std::vector& add_remote_contexts, - const std::vector& remove_remote_contexts) { + const std::vector& add_remote_contexts, + const std::vector& remove_remote_contexts) { { tf_shared_lock l(remote_state_mu_); if (context_id != context_id_) { @@ -1682,7 +1689,7 @@ absl::Status EagerContext::UpdateRemoteMaster( // a larger view id and ignores this request. CloseRemoteContexts(remove_remote_contexts, context_id, GetContextViewId()); mutex_lock l(remote_state_mu_); - for (const string& remote_context : remove_remote_contexts) { + for (const std::string& remote_context : remove_remote_contexts) { remote_contexts_.erase( std::remove(remote_contexts_.begin(), remote_contexts_.end(), remote_context), @@ -1731,10 +1738,10 @@ absl::Status EagerContext::SetMasterContextState( std::unique_ptr server, WorkerEnv* worker_env, std::shared_ptr worker_session, std::unique_ptr remote_eager_workers, - std::unique_ptr remote_device_manager, uint64 context_id, - uint64 context_view_id, tsl::core::RefCountPtr r, - DeviceMgr* local_device_mgr, int keep_alive_secs, - DistributedFunctionLibraryRuntime* cluster_flr, + std::unique_ptr remote_device_manager, + uint64_t context_id, uint64_t context_view_id, + tsl::core::RefCountPtr r, DeviceMgr* local_device_mgr, + int keep_alive_secs, DistributedFunctionLibraryRuntime* cluster_flr, std::unique_ptr> remote_mgr) { mutex_lock l(remote_state_mu_); @@ -1852,8 +1859,8 @@ absl::Status EagerContext::SetMasterContextState( absl::Status EagerContext::InitializeRemoteWorker( std::unique_ptr remote_eager_workers, DynamicDeviceMgr* remote_device_mgr, - const std::vector& remote_contexts, uint64 context_id, - uint64 context_view_id, + const std::vector& remote_contexts, uint64_t context_id, + uint64_t context_view_id, std::function(const int64_t)> rendezvous_creator, DistributedFunctionLibraryRuntime* cluster_flr, @@ -1908,7 +1915,7 @@ absl::Status EagerContext::InitializeRemoteWorker( absl::Status EagerContext::UpdateRemoteWorker( std::unique_ptr remote_eager_workers, - const std::vector& remote_contexts, uint64 context_id) { + const std::vector& remote_contexts, uint64_t context_id) { { mutex_lock l(remote_state_mu_); if (context_id != context_id_) { diff --git a/tensorflow/core/common_runtime/eager/context.h b/tensorflow/core/common_runtime/eager/context.h index 98fa2e7e31b9a7..1013cc17bf95fe 100644 --- a/tensorflow/core/common_runtime/eager/context.h +++ b/tensorflow/core/common_runtime/eager/context.h @@ -86,10 +86,10 @@ bool SkipRemoteHandleWaitReady(); class EagerContext : public ImmediateExecutionContext, public core::RefCounted { public: - static constexpr uint64 kInvalidContextId = 0; + static constexpr uint64_t kInvalidContextId = 0; - static uint64 NewContextId() { - uint64 context_id = random::New64(); + static uint64_t NewContextId() { + uint64_t context_id = random::New64(); while (context_id == kInvalidContextId) { context_id = random::New64(); } @@ -108,7 +108,7 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted { void Release() override { Unref(); } AbstractTensorInterface* CreateInt64Scalar(int64_t value) override; - AbstractTensorInterface* CreateUint64Scalar(uint64 value) override; + AbstractTensorInterface* CreateUint64Scalar(uint64_t value) override; AbstractTensorInterface* CreateInt32Scalar(int32_t value) override; AbstractTensorInterface* CreateFloatScalar(float value) override; AbstractTensorInterface* CreateDoubleScalar(double value) override; @@ -208,14 +208,14 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted { const NodeDef& ndef, Device** out) const; // TODO(mdan): Rename to ContainsFunction. - bool FindFunctionByName(const string& name) const; + bool FindFunctionByName(const std::string& name) const; absl::Status FindFunctionOpData( - const string& name, const tensorflow::OpRegistrationData** op_data); + const std::string& name, const tensorflow::OpRegistrationData** op_data); - const FunctionDef* FindFunctionDef(const string& name) const override; + const FunctionDef* FindFunctionDef(const std::string& name) const override; core::RefCountPtr FindRecord( - const string& name) const override; + const std::string& name) const override; Device* HostCPU() const { return host_cpu_device_; } Device* CanonicalDevice(Device* d) const { @@ -225,7 +225,7 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted { return HostCPU()->parsed_name(); } - const string& HostCPUName() const override { return HostCPU()->name(); } + const std::string& HostCPUName() const override { return HostCPU()->name(); } GraphCollector* GetGraphCollector() { return &graph_collector_; } @@ -263,14 +263,14 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted { absl::Status AddComponentFunction(const FunctionDef& fdef, const FunctionDefLibrary& library); - const FunctionDef* GetFunctionDef(const string& function_name); + const FunctionDef* GetFunctionDef(const std::string& function_name); - std::vector ListFunctionNames() override; + std::vector ListFunctionNames() override; tensorflow::ImmediateExecutionContext::CacheStats GetCacheStats() override; - absl::Status RemoveFunction(const string& func) override; + absl::Status RemoveFunction(const std::string& func) override; absl::Status AddRemoveFunctionNotifier( - const string& func, std::function notifier) override; + const std::string& func, std::function notifier) override; // Wait for pending nodes to be finished in local executors (including context // default executor and thread executors) and executors on remote workers. @@ -401,7 +401,7 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted { const FunctionLibraryDefinition* FuncLibDef() const { return &func_lib_def_; } FunctionLibraryDefinition* GetComponentFunctionFunctionLibraryDefinition( - const string& function_name) { + const std::string& function_name) { tf_shared_lock lock(cache_mu_); auto iter = component_function_libraries_.find(function_name); if (iter != component_function_libraries_.end()) { @@ -421,11 +421,11 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted { core::RefCountPtr* client); absl::Status GetClient(const DeviceNameUtils::ParsedName& device_name, core::RefCountPtr* client); - absl::Status GetClient(const string& remote_task, + absl::Status GetClient(const std::string& remote_task, core::RefCountPtr* client); - uint64 GetContextId() const; - uint64 GetContextViewId() const; + uint64_t GetContextId() const; + uint64_t GetContextViewId() const; void IncrementContextViewId(); absl::Status EnableCollectiveOps(const ServerDef& server_def) override; @@ -450,7 +450,7 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted { std::shared_ptr worker_session, std::unique_ptr remote_eager_workers, std::unique_ptr remote_device_manager, - const std::vector& remote_contexts, uint64 context_id, + const std::vector& remote_contexts, uint64_t context_id, tsl::core::RefCountPtr r, /*const*/ DeviceMgr* local_device_mgr, int keep_alive_secs, DistributedFunctionLibraryRuntime* cluster_flr, @@ -464,18 +464,18 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted { // can still be accessed, and will automatically register existing functions // if there are newly added hosts. absl::Status UpdateRemoteMaster( - uint64 context_id, + uint64_t context_id, std::unique_ptr remote_eager_workers, - const std::vector& add_remote_contexts, - const std::vector& remove_remote_contexts); + const std::vector& add_remote_contexts, + const std::vector& remove_remote_contexts); // Similar with InitializeRemoteMaster but this context will not kill remote // contexts in shutdown. absl::Status InitializeRemoteWorker( std::unique_ptr remote_eager_workers, DynamicDeviceMgr* remote_device_mgr, - const std::vector& remote_contexts, uint64 context_id, - uint64 context_view_id, + const std::vector& remote_contexts, uint64_t context_id, + uint64_t context_view_id, std::function(const int64_t)> rendezvous_creator, DistributedFunctionLibraryRuntime* cluster_flr, @@ -487,7 +487,7 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted { // increment context_view_id. absl::Status UpdateRemoteWorker( std::unique_ptr remote_eager_workers, - const std::vector& remote_contexts, uint64 context_id); + const std::vector& remote_contexts, uint64_t context_id); absl::Status StoreCollectiveOpsServer( std::unique_ptr new_server, DeviceMgr* device_mgr, @@ -495,7 +495,8 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted { // For the specified remote worker, preprocess and set its device filters. absl::Status SetRemoteDeviceFilters( - const string& remote_worker, const std::vector& device_filters); + const std::string& remote_worker, + const std::vector& device_filters); // For the specified remote worker, apply the stored device filters to the // list of device attributes following these rules: @@ -507,7 +508,7 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted { // filtered_device_mask) indicating whether each of the devices is visible to // the remote worker. void FilterDevicesForRemoteWorkers( - const string& remote_worker, + const std::string& remote_worker, const protobuf::RepeatedPtrField& device_attrs, std::vector* filtered_device_mask); @@ -567,10 +568,10 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted { absl::Status FindCompositeDeviceFromName(absl::string_view device_name, CompositeDevice** device) const; - bool IsCustomDevice(const string& device_name) override; + bool IsCustomDevice(const std::string& device_name) override; absl::Status RegisterCustomDevice( - const string& name, std::unique_ptr device) override; + const std::string& name, std::unique_ptr device) override; CustomDeviceOpHandler& GetCustomDeviceOpHandler() override { return custom_device_op_handler_; @@ -579,8 +580,8 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted { // Find or create a composite device with the given `underlying_devices` and // `device_name` (if not empty). absl::Status FindOrCreateCompositeDevice( - const std::vector& underlying_devices, const string& device_name, - CompositeDevice** composite_device); + const std::vector& underlying_devices, + const std::string& device_name, CompositeDevice** composite_device); bool OnSameTask(const Device* first, const Device* second) const; // Gets the CPU device on the task of device. @@ -667,9 +668,9 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted { ~EagerContext() override; absl::Status MaybeRegisterFunctionRemotely(const FunctionDef& fdef); - absl::Status MaybeRemoveFunctionRemotely(const string& function_name); + absl::Status MaybeRemoveFunctionRemotely(const std::string& function_name); absl::Status RegisterExistingFunctionsOnRemoteWorkers( - const std::vector& remote_workers); + const std::vector& remote_workers); void ResetPFLR(const DeviceMgr* device_mgr, Env* env, const ConfigProto* config, int graph_def_version, @@ -681,7 +682,7 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted { void ResetClusterFLR(DistributedFunctionLibraryRuntime* cluster_flr); void UpdateGlobalRendezvousDeviceManager(tensorflow::DeviceMgr* device_mgr); - void ClearResourceContainer(const string& name); + void ClearResourceContainer(const std::string& name); template struct OwnedOrUnownedHelper { @@ -750,7 +751,7 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted { // Maps from the fingerprint of a set of device names to a virtual // CompositeDevice. // TODO(b/145922293): Consider taking device names as keys. - absl::flat_hash_map> + absl::flat_hash_map> composite_devices_ ABSL_GUARDED_BY(composite_devices_mu_); FunctionLibraryDefinition func_lib_def_{OpRegistry::Global(), @@ -780,10 +781,10 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted { std::unordered_map, Fprint128Hasher> kernel_cache_ TF_GUARDED_BY(cache_mu_); - std::unordered_map registered_functions_ + std::unordered_map registered_functions_ TF_GUARDED_BY(cache_mu_); - std::unordered_map> + std::unordered_map> component_function_libraries_ TF_GUARDED_BY(cache_mu_); absl::flat_hash_map device_cache_ TF_GUARDED_BY(device_cache_mu_); @@ -830,11 +831,12 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted { OwnedOrUnownedHelper collective_executor_mgr_; #if !defined(IS_MOBILE_PLATFORM) - std::vector GetRemoteContexts() TF_LOCKS_EXCLUDED(remote_state_mu_); + std::vector GetRemoteContexts() + TF_LOCKS_EXCLUDED(remote_state_mu_); bool IsRemoteContextsEmpty() TF_LOCKS_EXCLUDED(remote_state_mu_); void CloseAndClearAllRemoteContexts(); - void CloseRemoteContexts(const std::vector& remote_contexts, - uint64 context_id, uint64 context_view_id); + void CloseRemoteContexts(const std::vector& remote_contexts, + uint64_t context_id, uint64_t context_view_id); // TODO(b/184375824): clean up parameter order for better readability. absl::Status SetMasterContextState( @@ -842,7 +844,7 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted { std::shared_ptr worker_session, std::unique_ptr remote_eager_workers, std::unique_ptr remote_device_manager, - uint64 context_id, uint64 context_view_id, + uint64_t context_id, uint64_t context_view_id, tsl::core::RefCountPtr r, /*const*/ DeviceMgr* local_device_mgr, int keep_alive_secs, DistributedFunctionLibraryRuntime* cluster_flr, @@ -858,12 +860,12 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted { mutable mutex remote_state_mu_; - uint64 context_id_ TF_GUARDED_BY(remote_state_mu_); + uint64_t context_id_ TF_GUARDED_BY(remote_state_mu_); // The view id of an eager context should be set to 0 when context is created, // and continuously incremented when context with the same context_id gets // updated. The view id should be consistent between master and workers. - uint64 context_view_id_ TF_GUARDED_BY(remote_state_mu_); - std::vector remote_contexts_ TF_GUARDED_BY(remote_state_mu_); + uint64_t context_view_id_ TF_GUARDED_BY(remote_state_mu_); + std::vector remote_contexts_ TF_GUARDED_BY(remote_state_mu_); std::unique_ptr remote_eager_workers_ TF_GUARDED_BY(remote_state_mu_); @@ -880,7 +882,7 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted { bool is_master_ TF_GUARDED_BY(remote_state_mu_); // Maps from a remote worker to a list of parsed device filters. - std::unordered_map> + std::unordered_map> cluster_device_filters_ TF_GUARDED_BY(remote_state_mu_); // A distributed manager that helps setup, update, and check liveness of diff --git a/tensorflow/core/common_runtime/eager/context_distributed_manager.cc b/tensorflow/core/common_runtime/eager/context_distributed_manager.cc index d51031b78b7387..8725479fb891da 100644 --- a/tensorflow/core/common_runtime/eager/context_distributed_manager.cc +++ b/tensorflow/core/common_runtime/eager/context_distributed_manager.cc @@ -80,11 +80,11 @@ limitations under the License. #if (defined(PLATFORM_GOOGLE) && defined(TF_PLATFORM_LINUX_X86_64)) #define TF_GPU_USE_PJRT #include "xla/pjrt/distributed/key_value_store_interface.h" -#include "xla/pjrt/gpu/gpu_topology.h" #include "xla/pjrt/gpu/se_gpu_pjrt_client.h" #include "xla/pjrt/local_device_state.h" #include "xla/pjrt/pjrt_compiler.h" #include "xla/service/gpu/gpu_executable_run_options.h" +#include "xla/service/gpu_topology.h" #include "tensorflow/core/framework/resource_base.h" #include "tensorflow/core/framework/resource_mgr.h" #include "tensorflow/core/tfrt/common/global_state.h" @@ -362,7 +362,7 @@ bool AreLocalDevicesCompatible(const EagerContext* context, } absl::Status AddRemoteDevicesToMgr( - const std::vector& added_remote_workers, + const std::vector& added_remote_workers, WorkerCacheInterface* worker_cache, DynamicDeviceMgr* remote_device_mgr) { std::vector> remote_devices; mutex remote_devices_mu; @@ -394,7 +394,7 @@ absl::Status AddRemoteDevicesToMgr( } absl::Status GetAllRemoteDevices( - const std::vector& remote_workers, + const std::vector& remote_workers, WorkerCacheInterface* worker_cache, std::unique_ptr* device_mgr) { auto remote_device_mgr = std::make_unique(); @@ -405,13 +405,13 @@ absl::Status GetAllRemoteDevices( } absl::Status RemoveRemoteDevicesFromMgr( - const std::vector& removed_remote_workers, + const std::vector& removed_remote_workers, DynamicDeviceMgr* remote_device_mgr) { const std::vector remote_devices = (remote_device_mgr->ListDevices()); std::vector devices_to_remove; for (Device* d : remote_devices) { - for (const string& remote_worker : removed_remote_workers) { + for (const std::string& remote_worker : removed_remote_workers) { if (DeviceNameUtils::IsSameAddressSpace(remote_worker, d->name())) { devices_to_remove.emplace_back(d); break; @@ -423,8 +423,8 @@ absl::Status RemoveRemoteDevicesFromMgr( } absl::Status ListRemoteWorkers(ServerInterface* server, - const string& local_worker, - std::vector* remote_workers) { + const std::string& local_worker, + std::vector* remote_workers) { server->master_env()->worker_cache->ListWorkers(remote_workers); remote_workers->erase( std::remove(remote_workers->begin(), remote_workers->end(), local_worker), @@ -432,22 +432,22 @@ absl::Status ListRemoteWorkers(ServerInterface* server, return absl::OkStatus(); } -void DifferentiateWorkerLists(const std::vector* current_list, - const std::vector* new_list, - std::vector* added, - std::vector* removed, - std::vector* existing) { +void DifferentiateWorkerLists(const std::vector* current_list, + const std::vector* new_list, + std::vector* added, + std::vector* removed, + std::vector* existing) { // Get STL set_difference and set_intersection with one list traversal. // Similar to the set_difference library function, the input lists // (`current_list` and `new_list`) must be sorted before calling the function. added->resize(new_list->size()); removed->resize(current_list->size()); existing->resize(current_list->size()); - std::vector::const_iterator curr_it = current_list->begin(); - std::vector::const_iterator new_it = new_list->begin(); - std::vector::iterator added_it = added->begin(); - std::vector::iterator removed_it = removed->begin(); - std::vector::iterator existing_it = existing->begin(); + std::vector::const_iterator curr_it = current_list->begin(); + std::vector::const_iterator new_it = new_list->begin(); + std::vector::iterator added_it = added->begin(); + std::vector::iterator removed_it = removed->begin(); + std::vector::iterator existing_it = existing->begin(); while (curr_it != current_list->end() && new_it != new_list->end()) { if (*curr_it < *new_it) { *removed_it++ = *curr_it++; @@ -466,10 +466,10 @@ void DifferentiateWorkerLists(const std::vector* current_list, } absl::Status GetReplacedFromExistingWorkers( - const std::vector* existing_workers, uint64 context_id, - uint64 context_view_id, const ServerDef& server_def, + const std::vector* existing_workers, uint64_t context_id, + uint64_t context_view_id, const ServerDef& server_def, eager::EagerClientCache* client_cache, - std::vector* replaced_workers) { + std::vector* replaced_workers) { BlockingCounter counter(existing_workers->size()); std::vector statuses(existing_workers->size()); eager::KeepAliveRequest request; @@ -505,8 +505,8 @@ absl::Status GetReplacedFromExistingWorkers( } absl::Status CreateRemoteContexts( - EagerContext* context, const std::vector& remote_workers, - uint64 context_id, uint64 context_view_id, int keep_alive_secs, + EagerContext* context, const std::vector& remote_workers, + uint64_t context_id, uint64_t context_view_id, int keep_alive_secs, const ServerDef& server_def, eager::EagerClientCache* remote_eager_workers, bool async, const eager::CreateContextRequest& base_request, int64_t init_timeout_in_ms, int retries, bool clear_existing_contexts) { @@ -514,7 +514,7 @@ absl::Status CreateRemoteContexts( BlockingCounter counter(num_remote_workers); std::vector statuses(num_remote_workers); for (int i = 0; i < num_remote_workers; i++) { - const string& remote_worker = remote_workers[i]; + const std::string& remote_worker = remote_workers[i]; DeviceNameUtils::ParsedName parsed_name; if (!DeviceNameUtils::ParseFullName(remote_worker, &parsed_name)) { statuses[i] = errors::InvalidArgument("Unable to parse ", remote_worker, @@ -583,10 +583,10 @@ absl::Status CreateRemoteContexts( } absl::Status UpdateRemoteContexts( - EagerContext* context, const std::vector& remote_workers, - const std::vector& added_workers, - const std::vector& removed_workers, uint64 context_id, - uint64 context_view_id, const ServerDef& server_def, + EagerContext* context, const std::vector& remote_workers, + const std::vector& added_workers, + const std::vector& removed_workers, uint64_t context_id, + uint64_t context_view_id, const ServerDef& server_def, eager::EagerClientCache* remote_eager_workers, const eager::CreateContextRequest& base_request) { int num_remote_workers = remote_workers.size(); @@ -594,8 +594,8 @@ absl::Status UpdateRemoteContexts( std::vector statuses(num_remote_workers); int cluster_device_count = base_request.cluster_device_attributes_size(); - std::unordered_set added_or_removed(added_workers.begin(), - added_workers.end()); + std::unordered_set added_or_removed(added_workers.begin(), + added_workers.end()); std::copy(removed_workers.begin(), removed_workers.end(), std::inserter(added_or_removed, added_or_removed.end())); // Whether each device is in the updated (added or removed) workers @@ -604,7 +604,7 @@ absl::Status UpdateRemoteContexts( const auto& da = base_request.cluster_device_attributes().at(i); DeviceNameUtils::ParsedName pn; DeviceNameUtils::ParseFullName(da.name(), &pn); - string task_name; + std::string task_name; DeviceNameUtils::GetTaskName(pn, &task_name); if (added_or_removed.find(task_name) != added_or_removed.end()) { device_added_or_removed[i] = true; @@ -612,7 +612,7 @@ absl::Status UpdateRemoteContexts( } for (int i = 0; i < num_remote_workers; i++) { - const string& remote_worker = remote_workers[i]; + const std::string& remote_worker = remote_workers[i]; DeviceNameUtils::ParsedName parsed_name; if (!DeviceNameUtils::ParseFullName(remote_worker, &parsed_name)) { statuses[i] = errors::InvalidArgument("Unable to parse ", remote_worker, @@ -689,15 +689,15 @@ absl::Status UpdateContextWithServerDef(EagerContext* context, bool reset_context, int keep_alive_secs, int64_t init_timeout_in_ms, int retries, bool clear_existing_contexts = false) { - string worker_name = - strings::StrCat("/job:", server_def.job_name(), - "/replica:0/task:", server_def.task_index()); + std::string worker_name = + absl::StrCat("/job:", server_def.job_name(), + "/replica:0/task:", server_def.task_index()); // List of current remote workers before updating server_def. Unused if // resetting the server_def. - std::vector curr_remote_workers; + std::vector curr_remote_workers; // List of updated remote workers. - std::vector remote_workers; + std::vector remote_workers; // New server created for new server_def. Unused if updating server_def. std::unique_ptr new_server; @@ -722,10 +722,10 @@ absl::Status UpdateContextWithServerDef(EagerContext* context, ListRemoteWorkers(server, worker_name, &remote_workers)); } - uint64 context_id = context->GetContextId(); + uint64_t context_id = context->GetContextId(); // TODO(b/291142876) Check for invalid context id here (instead of in the C // API). - uint64 context_view_id = context->GetContextViewId(); + uint64_t context_view_id = context->GetContextViewId(); if (reset_context) { context_id = EagerContext::NewContextId(); context_view_id = 0; @@ -757,10 +757,10 @@ absl::Status UpdateContextWithServerDef(EagerContext* context, // * existing_workers: set(curr_remote_workers) intersect set(remote_workers) // * replaced_workers: workers with the same task names and potentially the // same `hostname:port`s, but replaced by different processes - std::vector added_workers; - std::vector removed_workers; - std::vector existing_workers; - std::vector replaced_workers; + std::vector added_workers; + std::vector removed_workers; + std::vector existing_workers; + std::vector replaced_workers; // New remote device manager created for new server_def. Unused if updating // server_def. @@ -791,10 +791,11 @@ absl::Status UpdateContextWithServerDef(EagerContext* context, remote_eager_workers.get(), &replaced_workers)); if (VLOG_IS_ON(1)) { VLOG(1) << "Updating cluster with following changes"; - for (const string& w : added_workers) VLOG(1) << " Added worker " << w; - for (const string& w : removed_workers) + for (const std::string& w : added_workers) + VLOG(1) << " Added worker " << w; + for (const std::string& w : removed_workers) VLOG(1) << " Removed worker " << w; - for (const string& w : replaced_workers) + for (const std::string& w : replaced_workers) VLOG(1) << " Replaced worker " << w; } if (!replaced_workers.empty()) { @@ -804,7 +805,7 @@ absl::Status UpdateContextWithServerDef(EagerContext* context, replaced_workers.end()); added_workers.insert(added_workers.end(), replaced_workers.begin(), replaced_workers.end()); - for (const string& w : replaced_workers) { + for (const std::string& w : replaced_workers) { existing_workers.erase( std::remove(existing_workers.begin(), existing_workers.end(), w), existing_workers.end()); @@ -868,7 +869,7 @@ absl::Status UpdateContextWithServerDef(EagerContext* context, } if (!existing_workers.empty()) { if (VLOG_IS_ON(1)) { - for (const string& w : existing_workers) { + for (const std::string& w : existing_workers) { VLOG(1) << "Updating cluster with existing worker " << w; } } @@ -883,7 +884,7 @@ absl::Status UpdateContextWithServerDef(EagerContext* context, } } - auto session_name = strings::StrCat("eager_", context_id); + auto session_name = absl::StrCat("eager_", context_id); auto* session_mgr = server->worker_env()->session_mgr; if (reset_context) { tsl::core::RefCountPtr r = @@ -937,15 +938,16 @@ absl::Status EagerContextDistributedManager::SetOrUpdateServerDef( if (reset_context) { const auto& cdf = server_def.cluster_device_filters(); for (const auto& jdf : cdf.jobs()) { - const string remote_prefix = "/job:" + jdf.name() + "/task:"; + const std::string remote_prefix = "/job:" + jdf.name() + "/task:"; for (const auto& tdf : jdf.tasks()) { const int32_t task_index = tdf.first; - std::vector device_filters(tdf.second.device_filters_size()); + std::vector device_filters( + tdf.second.device_filters_size()); for (int i = 0; i < tdf.second.device_filters_size(); i++) { device_filters[i] = tdf.second.device_filters(i); } - const string remote_worker = - strings::StrCat(remote_prefix, task_index); + const std::string remote_worker = + absl::StrCat(remote_prefix, task_index); TF_RETURN_IF_ERROR( context_->SetRemoteDeviceFilters(remote_worker, device_filters)); } @@ -973,9 +975,9 @@ absl::Status EagerContextDistributedManager::SetOrUpdateServerDef( absl::Status EagerContextDistributedManager::InitializeLocalOnlyContext( const ServerDef& server_def, int keep_alive_secs) { - string worker_name = - strings::StrCat("/job:", server_def.job_name(), - "/replica:0/task:", server_def.task_index()); + std::string worker_name = + absl::StrCat("/job:", server_def.job_name(), + "/replica:0/task:", server_def.task_index()); // New server created for new server_def. Unused if updating server_def. std::unique_ptr new_server; ServerInterface* server; @@ -985,7 +987,7 @@ absl::Status EagerContextDistributedManager::InitializeLocalOnlyContext( LOG_AND_RETURN_IF_ERROR( NewServerWithOptions(server_def, {device_mgr}, &new_server)); server = new_server.get(); - uint64 context_id = EagerContext::NewContextId(); + uint64_t context_id = EagerContext::NewContextId(); // Make master eager context accessible by local eager service, which might // receive send tensor requests from remote workers. LOG_AND_RETURN_IF_ERROR( @@ -995,7 +997,7 @@ absl::Status EagerContextDistributedManager::InitializeLocalOnlyContext( server->worker_env()->device_mgr->ListDeviceAttributes( &local_device_attributes); - auto session_name = strings::StrCat("eager_", context_id); + auto session_name = absl::StrCat("eager_", context_id); auto* session_mgr = server->worker_env()->session_mgr; tsl::core::RefCountPtr r = server->worker_env()->rendezvous_mgr->Find(context_id); @@ -1054,7 +1056,7 @@ absl::Status EagerContextDistributedManager::EnableCollectiveOps( const bool enable_coordination = !config.experimental().coordination_config().service_type().empty(); if (enable_coordination) { - auto session_name = strings::StrCat("eager_", context_->GetContextId()); + auto session_name = absl::StrCat("eager_", context_->GetContextId()); std::shared_ptr worker_session; auto* session_mgr = server->worker_env()->session_mgr; // Start coordination service within session if this is the leader. diff --git a/tensorflow/core/common_runtime/eager/context_test.cc b/tensorflow/core/common_runtime/eager/context_test.cc index 56cdcf5c5fa746..590abf83871f67 100644 --- a/tensorflow/core/common_runtime/eager/context_test.cc +++ b/tensorflow/core/common_runtime/eager/context_test.cc @@ -50,7 +50,7 @@ using ::testing::HasSubstr; typedef FunctionDefHelper FDH; // Return a fake device. -static Device* CreateDevice(const string& type, int n) { +static Device* CreateDevice(const std::string& type, int n) { class FakeDevice : public Device { public: explicit FakeDevice(const DeviceAttributes& attr) : Device(nullptr, attr) {} @@ -99,7 +99,7 @@ class EagerContextTest : public ::testing::Test { TEST_F(EagerContextTest, CompositeDevice) { InitContext(SessionOptions(), DEVICE_PLACEMENT_EXPLICIT); - std::vector underlying_devices = { + std::vector underlying_devices = { "/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:CPU:1"}; CompositeDevice* composite_device_0 = nullptr; @@ -134,10 +134,10 @@ TEST_F(EagerContextTest, CompositeDevice) { TEST_F(EagerContextTest, CompositeDeviceWithGivenName) { InitContext(SessionOptions(), DEVICE_PLACEMENT_EXPLICIT); - const std::vector underlying_devices_0 = { + const std::vector underlying_devices_0 = { "/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:CPU:1"}; - const string composite_device_name = + const std::string composite_device_name = "/job:worker1/replica:0/task:0/device:COMPOSITE:5"; // Create a CompositeDevice with the given name. CompositeDevice* composite_device_0 = nullptr; @@ -150,7 +150,7 @@ TEST_F(EagerContextTest, CompositeDeviceWithGivenName) { context()->FindCompositeDeviceFromName(composite_device_name, &device)); EXPECT_EQ(device, composite_device_0); - std::vector underlying_devices_1 = { + std::vector underlying_devices_1 = { "/job:worker/replica:0/task:0/device:CPU:1", "/job:worker/replica:0/task:0/device:CPU:2"}; // Find a CompositeDevice with the given name. diff --git a/tensorflow/core/common_runtime/eager/copy_to_device_node.h b/tensorflow/core/common_runtime/eager/copy_to_device_node.h index 34fe7f2b122de0..d12f4965e1fded 100644 --- a/tensorflow/core/common_runtime/eager/copy_to_device_node.h +++ b/tensorflow/core/common_runtime/eager/copy_to_device_node.h @@ -71,8 +71,8 @@ class CopyToDeviceNode : public EagerNode { void Abort(absl::Status status) override { dst_->Poison(status, dstd_); } - string DebugString() const override { - string out = "[CopyToDeviceNode]"; + std::string DebugString() const override { + std::string out = "[CopyToDeviceNode]"; absl::StrAppend(&out, " src_tensor: ", src_->DebugString()); absl::StrAppend(&out, ", dst_tensor: ", dst_->DebugString()); absl::StrAppend(&out, ", dst_device: ", dstd_ ? dstd_->name() : "[]"); diff --git a/tensorflow/core/common_runtime/eager/custom_device.h b/tensorflow/core/common_runtime/eager/custom_device.h index 2f4f5acc95549f..f72f76b0f5a7ca 100644 --- a/tensorflow/core/common_runtime/eager/custom_device.h +++ b/tensorflow/core/common_runtime/eager/custom_device.h @@ -37,13 +37,14 @@ class CustomDeviceTensorHandle; class CustomDevice { public: virtual ~CustomDevice() = default; - virtual const string& name() = 0; + virtual const std::string& name() = 0; virtual absl::Status CopyTensorToDevice( ImmediateExecutionTensorHandle* tensor, ImmediateExecutionTensorHandle** result) = 0; virtual absl::Status CopyTensorFromDevice( - ImmediateExecutionTensorHandle* tensor, const string& target_device_name, + ImmediateExecutionTensorHandle* tensor, + const std::string& target_device_name, ImmediateExecutionTensorHandle** result) = 0; virtual absl::Status Execute(const ImmediateExecutionOperation* op, diff --git a/tensorflow/core/common_runtime/eager/custom_device_op_handler.cc b/tensorflow/core/common_runtime/eager/custom_device_op_handler.cc index 426930f04b8cda..2a736e67bae789 100644 --- a/tensorflow/core/common_runtime/eager/custom_device_op_handler.cc +++ b/tensorflow/core/common_runtime/eager/custom_device_op_handler.cc @@ -26,7 +26,7 @@ namespace tensorflow { void CustomDeviceOpHandler::Clear() { custom_devices_.clear(); } absl::Status CustomDeviceOpHandler::RegisterCustomDevice( - const string& device_name, std::unique_ptr device) { + const std::string& device_name, std::unique_ptr device) { DeviceNameUtils::ParsedName parsed; if (!DeviceNameUtils::ParseFullName(device_name, &parsed) || !parsed.has_job || !parsed.has_replica || !parsed.has_task || @@ -46,7 +46,7 @@ absl::Status CustomDeviceOpHandler::RegisterCustomDevice( } bool CustomDeviceOpHandler::FindCustomDeviceFromName( - const string& name, CustomDevice** device) const { + const std::string& name, CustomDevice** device) const { auto dev_it = custom_devices_.find(name); if (dev_it == custom_devices_.end()) { return false; diff --git a/tensorflow/core/common_runtime/eager/custom_device_op_handler.h b/tensorflow/core/common_runtime/eager/custom_device_op_handler.h index 6c38e50d458dcd..66d186014b2176 100644 --- a/tensorflow/core/common_runtime/eager/custom_device_op_handler.h +++ b/tensorflow/core/common_runtime/eager/custom_device_op_handler.h @@ -29,11 +29,11 @@ class CustomDeviceOpHandler { public: ~CustomDeviceOpHandler() = default; // Register a new custom device. - absl::Status RegisterCustomDevice(const string& device_name, + absl::Status RegisterCustomDevice(const std::string& device_name, std::unique_ptr device); // Find the custom device from given name. Return true if it finds one. - bool FindCustomDeviceFromName(const string& name, + bool FindCustomDeviceFromName(const std::string& name, CustomDevice** device) const; absl::Status Execute(ImmediateExecutionOperation* op, @@ -53,7 +53,8 @@ class CustomDeviceOpHandler { void Clear(); private: - std::unordered_map> custom_devices_; + std::unordered_map> + custom_devices_; }; } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/eager/eager_executor.cc b/tensorflow/core/common_runtime/eager/eager_executor.cc index fc552f3127576d..02f8eae99fb80a 100644 --- a/tensorflow/core/common_runtime/eager/eager_executor.cc +++ b/tensorflow/core/common_runtime/eager/eager_executor.cc @@ -117,7 +117,7 @@ absl::Status EagerExecutor::SyncExecute(EagerNode* node) { } // NOTE: SyncExecute runs every node regardless of error status in executor. - uint64 id = next_node_id_++; + uint64_t id = next_node_id_++; absl::Status s = node->Prepare(); if (!s.ok()) { @@ -312,9 +312,9 @@ void EagerExecutor::NodeDone(const core::RefCountPtr& item, // a deadlock. } -void EagerExecutor::NotifyWaiters(uint64 id) { +void EagerExecutor::NotifyWaiters(uint64_t id) { if (!node_done_notifications_.empty()) { - uint64 upperbound_id = 0; + uint64_t upperbound_id = 0; if (!unfinished_nodes_.empty()) { upperbound_id = unfinished_nodes_.begin()->first - 1; } else if (!node_queue_.empty()) { diff --git a/tensorflow/core/common_runtime/eager/eager_executor.h b/tensorflow/core/common_runtime/eager/eager_executor.h index 7826b271ec98f1..ff8ce9cbc7322c 100644 --- a/tensorflow/core/common_runtime/eager/eager_executor.h +++ b/tensorflow/core/common_runtime/eager/eager_executor.h @@ -76,7 +76,7 @@ class EagerNode { virtual AsyncEagerNode* AsAsync() { return nullptr; } virtual AsyncRemoteExecuteNode* AsAsyncRemoteExecuteNode() { return nullptr; } - virtual string DebugString() const = 0; + virtual std::string DebugString() const = 0; // Indicates whether a node failure should make the executor unusable. virtual bool Fatal() const { return true; } @@ -193,7 +193,7 @@ class EagerExecutor { struct NodeItem : core::RefCounted { // Unique id generated in EagerExecutor::Add(). If item1.id < item2.id, it // means item1.node is added before item2.node. - uint64 id; + uint64_t id; std::unique_ptr node; NodeState state; }; @@ -203,7 +203,8 @@ class EagerExecutor { void NodeDone(const core::RefCountPtr& item, const absl::Status& status, bool from_queue); - void NotifyWaiters(uint64 id) TF_EXCLUSIVE_LOCKS_REQUIRED(node_queue_mutex_); + void NotifyWaiters(uint64_t id) + TF_EXCLUSIVE_LOCKS_REQUIRED(node_queue_mutex_); // Starts execution of pending EagerNodes. This function loops till executor // state_ is set to kShutDown. If any errors are encountered, these are set @@ -220,9 +221,9 @@ class EagerExecutor { absl::Status WaitForAllPendingNodesLocked(mutex_lock* lock) TF_EXCLUSIVE_LOCKS_REQUIRED(node_queue_mutex_); - absl::Status WaitImpl(bool wait_all, uint64 node_id); + absl::Status WaitImpl(bool wait_all, uint64_t node_id); - std::atomic next_node_id_; + std::atomic next_node_id_; mutable mutex node_queue_mutex_; @@ -236,7 +237,7 @@ class EagerExecutor { TF_GUARDED_BY(node_queue_mutex_); // Ordered by NodeItem::id. - std::map, std::less> + std::map, std::less> unfinished_nodes_ TF_GUARDED_BY(node_queue_mutex_); // `status_` is set based on any errors raised during execution of a @@ -248,7 +249,7 @@ class EagerExecutor { // These condition_variables are notified and removed when that EagerNode is // done executing, or if an error is found in execution of any EagerNode. // The map is ordered by id. - std::multimap> + std::multimap> node_done_notifications_ TF_GUARDED_BY(node_queue_mutex_); // thread_exited_notification_ is notified by the `thread_` right before it diff --git a/tensorflow/core/common_runtime/eager/eager_executor_test.cc b/tensorflow/core/common_runtime/eager/eager_executor_test.cc index 3fc6f3860085f0..acaba8320ed871 100644 --- a/tensorflow/core/common_runtime/eager/eager_executor_test.cc +++ b/tensorflow/core/common_runtime/eager/eager_executor_test.cc @@ -63,7 +63,7 @@ class TestEagerNode : public EagerNode { }; void Abort(absl::Status status) override {} - string DebugString() const override { return "testEagerNode"; } + std::string DebugString() const override { return "testEagerNode"; } private: TestState* state_; @@ -94,7 +94,7 @@ class TestAsyncEagerNode : public AsyncEagerNode { }; void Abort(absl::Status status) override {} - string DebugString() const override { return "testAsyncEagerNode"; } + std::string DebugString() const override { return "testAsyncEagerNode"; } private: TestState* state_; diff --git a/tensorflow/core/common_runtime/eager/eager_op_rewrite_registry.h b/tensorflow/core/common_runtime/eager/eager_op_rewrite_registry.h index bd7098473d7532..221d30d98518f6 100644 --- a/tensorflow/core/common_runtime/eager/eager_op_rewrite_registry.h +++ b/tensorflow/core/common_runtime/eager/eager_op_rewrite_registry.h @@ -28,7 +28,7 @@ namespace tensorflow { // implement the Run method. class EagerOpRewrite { public: - EagerOpRewrite(string name, string file, string line) { + EagerOpRewrite(std::string name, std::string file, std::string line) { debug_info_.name = name; debug_info_.file = file; debug_info_.line = line; @@ -43,7 +43,7 @@ class EagerOpRewrite { // Holds information about the rewrite registration. struct DebugInfo { - string name, file, line; + std::string name, file, line; }; // Returns information about the registered Eager op rewrite. @@ -75,7 +75,7 @@ class EagerOpRewriteRegistry { private: static constexpr int32_t kNumPhases = 2; // Holds all the registered Eager op rewrites and their ordinal numbers. - std::array, int32>>, + std::array, int32_t>>, kNumPhases> rewrites_; }; diff --git a/tensorflow/core/common_runtime/eager/eager_op_rewrite_registry_test.cc b/tensorflow/core/common_runtime/eager/eager_op_rewrite_registry_test.cc index d50f3e0a4ec411..e76627a3680daf 100644 --- a/tensorflow/core/common_runtime/eager/eager_op_rewrite_registry_test.cc +++ b/tensorflow/core/common_runtime/eager/eager_op_rewrite_registry_test.cc @@ -23,7 +23,7 @@ namespace tensorflow { class TestEagerOpRewrite : public EagerOpRewrite { public: - TestEagerOpRewrite(string name, string file, string line) + TestEagerOpRewrite(std::string name, std::string file, std::string line) : EagerOpRewrite(name, file, line), executor_(/*async=*/false, /*enable_streaming_enqueue=*/true) {} static int count_; diff --git a/tensorflow/core/common_runtime/eager/eager_operation.cc b/tensorflow/core/common_runtime/eager/eager_operation.cc index b14cbeeba9bb81..d730df6b608b06 100644 --- a/tensorflow/core/common_runtime/eager/eager_operation.cc +++ b/tensorflow/core/common_runtime/eager/eager_operation.cc @@ -473,7 +473,7 @@ absl::Status EagerOperation::MutableTensorHandleInputs( } absl::Status EagerOperation::SetDeviceName(const char* c_name) { - string name(c_name != nullptr ? c_name : ""); + std::string name(c_name != nullptr ? c_name : ""); if (name != last_set_device_name_) { if (!DeviceNameUtils::ParseFullName(name, &device_parsed_name_)) { return errors::InvalidArgument("Malformed device specification '", name, @@ -498,7 +498,7 @@ bool EagerOperation::IsLocal() const { device_parsed_name_.task == host_cpu_name.task; } -string VariantDeviceDebugString(VariantDevice device) { +std::string VariantDeviceDebugString(VariantDevice device) { if (device == kVariantDeviceNull) { return "[]"; } else if (std::holds_alternative(device)) { @@ -513,8 +513,8 @@ void EagerOperation::AddAttrs(const AbstractOpAttrs* op_attrs) { attrs_.CopyAttributes(*(down_cast(op_attrs))); } -string EagerOperation::DebugString() const { - string out; +std::string EagerOperation::DebugString() const { + std::string out; VLOG(1) << "EagerOperation::DebugString() over " << this; absl::StrAppend(&out, "Name: ", Name(), "\n"); diff --git a/tensorflow/core/common_runtime/eager/eager_operation.h b/tensorflow/core/common_runtime/eager/eager_operation.h index 04cefa00861198..b51e098413685d 100644 --- a/tensorflow/core/common_runtime/eager/eager_operation.h +++ b/tensorflow/core/common_runtime/eager/eager_operation.h @@ -57,9 +57,9 @@ class EagerOperation : public ImmediateExecutionOperation { return Reset(op, raw_device_name, false, nullptr); } - const string& Name() const override { return attrs_.op_name(); } + const std::string& Name() const override { return attrs_.op_name(); } - const string& DeviceName() const override { return device_name_; } + const std::string& DeviceName() const override { return device_name_; } ImmediateExecutionContext* GetContext() const override { return &ctx_; } @@ -196,7 +196,7 @@ class EagerOperation : public ImmediateExecutionOperation { // This is useful if we want the EagerOperation to point to a different // function. - void UpdateName(const string& name) { + void UpdateName(const std::string& name) { attrs_.set_op_name(name); op_name_ = attrs_.op_name(); } @@ -242,7 +242,7 @@ class EagerOperation : public ImmediateExecutionOperation { EagerExecutor& Executor() { return *executor_; } - string DebugString() const; + std::string DebugString() const; const absl::optional& eager_func_params() const { return eager_func_params_; @@ -289,12 +289,12 @@ class EagerOperation : public ImmediateExecutionOperation { // The last device name given to SetDeviceName. // This is used to avoid having to re-process the same device in repeated // calls to SetDeviceName. - string last_set_device_name_; + std::string last_set_device_name_; // The operation's device name. // This contains the named passed to SetDeviceName until device_ is set, // at which point it contains the device_ name. - string device_name_; + std::string device_name_; // The parsed device name. // This will always contain the result of diff --git a/tensorflow/core/common_runtime/eager/eager_operation_test.cc b/tensorflow/core/common_runtime/eager/eager_operation_test.cc index 499d2ef110bfd9..2ff6952eb0d17e 100644 --- a/tensorflow/core/common_runtime/eager/eager_operation_test.cc +++ b/tensorflow/core/common_runtime/eager/eager_operation_test.cc @@ -68,7 +68,7 @@ TEST(EagerOperationTest, EagerFunctionParamsAndStepId) { auto op = new EagerOperation(ctx); EXPECT_FALSE(op->eager_func_params().has_value()); - string device_name = "/job:localhost/replica:0/task:0/device:CPU:0"; + std::string device_name = "/job:localhost/replica:0/task:0/device:CPU:0"; TF_ASSERT_OK(op->SetDeviceName(device_name.c_str())); TF_ASSERT_OK(op->Reset("DummyFunction", device_name.c_str())); diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc index d12d51db3907f9..547336cdeb6d76 100644 --- a/tensorflow/core/common_runtime/eager/execute.cc +++ b/tensorflow/core/common_runtime/eager/execute.cc @@ -132,8 +132,8 @@ bool SendAsProtosWhenPossible() { return send_as_protos_when_possible; } -const string& DeviceNameOrUnspecified(Device* device) { - static string* unspecified_string = new string(""); +const std::string& DeviceNameOrUnspecified(Device* device) { + static std::string* unspecified_string = new std::string(""); return (device == nullptr) ? *unspecified_string : device->name(); } @@ -158,7 +158,7 @@ absl::Status CopyInputToExpectedDevice(EagerContext* ctx, EagerOperation* op, // Should only be called when these don't match DCHECK(expected_input_device != handle_device); *result = nullptr; - const string& op_device_name = DeviceNameOrUnspecified(op_device); + const std::string& op_device_name = DeviceNameOrUnspecified(op_device); switch (ctx->GetDevicePlacementPolicy()) { case DEVICE_PLACEMENT_SILENT_FOR_INT32: @@ -314,7 +314,7 @@ absl::Status GetDeviceForInput(const EagerOperation& op, const bool is_host_memory_arg, TensorHandle* tensor_handle, Device** result) { Device* cpu_device = ctx.HostCPU(); - string device_name; + std::string device_name; if (tensor_handle->Type() != TensorHandle::LOCAL) { Device* device = tensor_handle->device(); device_name = device != nullptr ? device->name() : cpu_device->name(); @@ -473,7 +473,7 @@ absl::Status MustCompileWithXLA(const EagerOperation* op, // `has_jit_compile` and `device`. absl::Status HasNestedJitCompile(const EagerOperation& op, const EagerContext& ctx, bool* has_jit_compile, - string* device) { + std::string* device) { *has_jit_compile = false; const std::string kStatefulPartitionedCallOp = "StatefulPartitionedCall"; @@ -488,7 +488,7 @@ absl::Status HasNestedJitCompile(const EagerOperation& op, const FunctionLibraryDefinition* func_lib_def = op.FuncLibDef(); while (!function_names.empty()) { - const string& function_name = function_names.front(); + const std::string& function_name = function_names.front(); const FunctionDef* function_def = func_lib_def->Find(function_name); if (function_def == nullptr) { @@ -518,8 +518,8 @@ absl::Status HasNestedJitCompile(const EagerOperation& op, return absl::OkStatus(); } -string CanonicalizeDeviceType(std::string_view device_type) { - string canonical_device_type = "Unknown"; +std::string CanonicalizeDeviceType(std::string_view device_type) { + std::string canonical_device_type = "Unknown"; if (device_type == "XLA_CPU" || device_type == tensorflow::DEVICE_CPU) { canonical_device_type = tensorflow::DEVICE_CPU; } @@ -542,11 +542,12 @@ absl::Status UpdateCompileCounter(const EagerOperation* op, return absl::OkStatus(); } - string device_type = CanonicalizeDeviceType(op->GetDeviceParsedName().type); - string compilation_option = kDisabled; + std::string device_type = + CanonicalizeDeviceType(op->GetDeviceParsedName().type); + std::string compilation_option = kDisabled; if (!compile_with_xla) { bool nested_jit_compile = false; - string device; + std::string device; if (!ctx.FuncLibDef()->HasOptimizedFunctionGraph(op->Name())) { TF_RETURN_IF_ERROR( HasNestedJitCompile(*op, ctx, &nested_jit_compile, &device)); @@ -586,14 +587,14 @@ absl::Status UpdateCompileCounter(const EagerOperation* op, using ProtoArgListType = protobuf::RepeatedPtrField; -string EscapeOrigName(const string& orig_name) { +std::string EscapeOrigName(const std::string& orig_name) { // Replace _ with __ in the original name to avoid name conflicts. return absl::StrReplaceAll(orig_name, {{"_", "__"}}); } // Variadic args are flattened during wrapping. This utility returns the name // of a flattened arg/attr. -string GetFlatName(const string orig_name, int index) { +std::string GetFlatName(const std::string orig_name, int index) { return absl::StrCat(EscapeOrigName(orig_name), "_", index); } @@ -607,13 +608,14 @@ string GetFlatName(const string orig_name, int index) { // IdentityN[T:[DT_FLOAT, DT_INT64]] -> __wrapped__IdentityN_T_2 // Concat[N:2, T:DT_FLOAT] -> __wrapped__Concat_N_2 absl::Status BuildWrappedOpName(EagerOperation* op, const OpDef& opdef, - const AbstractOpAttrs* op_attrs, string* name) { - string fname = absl::StrCat("__wrapped__", EscapeOrigName(op->Name())); + const AbstractOpAttrs* op_attrs, + std::string* name) { + std::string fname = absl::StrCat("__wrapped__", EscapeOrigName(op->Name())); // For every variadic arg in `args`, populates `attr_to_len` with // (attr_name, len(arg)). auto FillAttrToLen = [op_attrs, op]( const ProtoArgListType& args, - absl::btree_map* attr_to_len) { + absl::btree_map* attr_to_len) { for (const auto& arg : args) { if (!arg.type_list_attr().empty()) { absl::InlinedVector type_list; @@ -631,7 +633,7 @@ absl::Status BuildWrappedOpName(EagerOperation* op, const OpDef& opdef, } return absl::OkStatus(); }; - absl::btree_map attr_to_len; + absl::btree_map attr_to_len; TF_RETURN_IF_ERROR(FillAttrToLen(opdef.input_arg(), &attr_to_len)); TF_RETURN_IF_ERROR(FillAttrToLen(opdef.output_arg(), &attr_to_len)); for (auto& name_len : attr_to_len) { @@ -768,7 +770,8 @@ absl::Status BuildWrappedOpName(EagerOperation* op, const OpDef& opdef, // Note that the N attr is preserved so that it can get copied to the // inner op via a placeholder. This allows additional verification. absl::Status BuildWrappedOpSignature(EagerOperation* op, const OpDef& opdef, - const string& fname, OpDef& signature) { + const std::string& fname, + OpDef& signature) { signature = opdef; signature.clear_input_arg(); signature.clear_output_arg(); @@ -777,7 +780,7 @@ absl::Status BuildWrappedOpSignature(EagerOperation* op, const OpDef& opdef, auto FillSignatureArgs = [op_attrs, op]( const ProtoArgListType& opdef_args, ProtoArgListType* sig_args, - absl::flat_hash_set& new_attrs) { + absl::flat_hash_set& new_attrs) { for (const auto& arg : opdef_args) { if (!arg.type_list_attr().empty()) { absl::InlinedVector type_list; @@ -817,7 +820,7 @@ absl::Status BuildWrappedOpSignature(EagerOperation* op, const OpDef& opdef, } return absl::OkStatus(); }; - absl::flat_hash_set new_attrs; + absl::flat_hash_set new_attrs; TF_RETURN_IF_ERROR(FillSignatureArgs( opdef.input_arg(), signature.mutable_input_arg(), new_attrs)); TF_RETURN_IF_ERROR(FillSignatureArgs( @@ -838,7 +841,7 @@ absl::Status AddMixedTypeListAttrs(EagerOperation* wrapped_op, const OpDef& opdef) { auto FillAttrsToAdd = [op_attrs](const ProtoArgListType& opdef_args, - absl::flat_hash_map* attrs_to_add) { + absl::flat_hash_map* attrs_to_add) { for (const auto& arg : opdef_args) { if (!arg.type_list_attr().empty()) { absl::InlinedVector type_list; @@ -852,7 +855,7 @@ absl::Status AddMixedTypeListAttrs(EagerOperation* wrapped_op, } return absl::OkStatus(); }; - absl::flat_hash_map attrs_to_add; + absl::flat_hash_map attrs_to_add; TF_RETURN_IF_ERROR(FillAttrsToAdd(opdef.input_arg(), &attrs_to_add)); TF_RETURN_IF_ERROR(FillAttrsToAdd(opdef.output_arg(), &attrs_to_add)); for (auto& name_type : attrs_to_add) { @@ -867,7 +870,8 @@ absl::Status AddMixedTypeListAttrs(EagerOperation* wrapped_op, // outputs which need to be flattened. absl::Status PopulateRetMap(FunctionDef* fdef, const AbstractOpAttrs* op_attrs, const EagerOperation* op, const OpDef& opdef, - const OpDef& signature, const string& node_name) { + const OpDef& signature, + const std::string& node_name) { int next_sig_output = 0; for (size_t i = 0; i < opdef.output_arg_size(); i++) { const auto& output_arg = opdef.output_arg(i); @@ -916,7 +920,7 @@ absl::Status WrapInCallOp(EagerOperation* op, EagerOperation** wrapped_op) { // TODO(srbs): Support list inputs/outputs. auto verify_wrappable_in_call_op = [](const OpDef& opdef, EagerOperation* op) -> absl::Status { - absl::flat_hash_set opdef_attrs; + absl::flat_hash_set opdef_attrs; for (const auto& attr : opdef.attr()) { opdef_attrs.insert(attr.name()); } @@ -941,7 +945,7 @@ absl::Status WrapInCallOp(EagerOperation* op, EagerOperation** wrapped_op) { // This can be avoided by introducing a dict in EagerContext that stores a // mapping from the eager op's name to its unique FunctionDef name. auto op_attrs = op->GetOpAttrs(); - string fname; + std::string fname; TF_RETURN_IF_ERROR(BuildWrappedOpName(op, opdef, op_attrs, &fname)); if (!op->EagerContext().GetFunctionDef(fname)) { FunctionDef fdef; @@ -1168,7 +1172,8 @@ absl::StatusOr GetKernelCacheKey( absl::Status ExtractFunctionInputInfo( EagerOperation* op, const KernelDef* kernel_def, std::vector& input_device_ptrs, - absl::flat_hash_map*>& composite_devices, + absl::flat_hash_map*>& + composite_devices, std::unordered_map& input_resource_variable_dtypes_and_shapes) { tsl::profiler::TraceMe activity("EagerCopyToDevice", @@ -1268,7 +1273,7 @@ absl::Status GetOrCreateKernelAndDevice( if (is_small_constant_optimization_enabled(*op)) { TF_ASSIGN_OR_RETURN(BoolTensorInputs bool_inputs, GetBoolInputs(op, /*delete_inputs=*/false)); - string folded_name = op->Name(); + std::string folded_name = op->Name(); for (const auto& [input_name, input_value] : bool_inputs) { folded_name = small_constants_optimizer::FoldedFunctionName( folded_name, input_name, input_value); @@ -1320,7 +1325,8 @@ absl::Status GetOrCreateKernelAndDevice( (ctx.RunEagerOpAsFunction() && !op->is_function()); std::vector input_device_ptrs; - absl::flat_hash_map*> composite_devices; + absl::flat_hash_map*> + composite_devices; std::unordered_map input_resource_variable_dtypes_and_shapes; const KernelDef* kernel_def = nullptr; @@ -1380,7 +1386,7 @@ absl::Status GetOrCreateKernelAndDevice( bool run_function_with_flr = false; bool function_runs_at_most_once = FunctionRunsAtMostOnce(op, ctx); - std::optional xla_compile_device_type; + std::optional xla_compile_device_type; if (op->is_function()) { bool compile_with_xla; // By default we should run functions with FunctionLibraryRuntime. @@ -1474,7 +1480,8 @@ absl::Status GetOrCreateKernelAndDevice( // Check if any of the Op's output_arg(s) are pinned to Host. if (kernel_def == nullptr) return false; const OpDef& op_def = OpRegistry::Global()->LookUp(op->Name())->op_def; - for (const string& host_memory_arg : kernel_def->host_memory_arg()) { + for (const std::string& host_memory_arg : + kernel_def->host_memory_arg()) { for (const auto& output_arg : op_def.output_arg()) { if (output_arg.name() == host_memory_arg) { return false; @@ -1613,7 +1620,7 @@ absl::Status CreateUnshapedOutput( return errors::InvalidArgument( "Unable to find a remote op id for a remote output of ", kernel.name()); } - string remote_task; + std::string remote_task; if (!DeviceNameUtils::GetTaskName(output_device->parsed_name(), &remote_task)) { return errors::InvalidArgument( @@ -1762,8 +1769,8 @@ absl::Status EagerLocalExecute(EagerOperation* op, TensorHandle** retvals, TF_RETURN_IF_ERROR(ValidateInputTypeAndPlacement(&ctx, op, kernel)); if (ctx.LogDevicePlacement() || VLOG_IS_ON(1)) { - string msg = absl::StrCat("Executing op ", op->Name(), " in device ", - kernel->device()->name()); + std::string msg = absl::StrCat("Executing op ", op->Name(), " in device ", + kernel->device()->name()); if (!logging::LogToListeners(msg)) { LOG(INFO) << msg; } @@ -1828,15 +1835,15 @@ absl::Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals, // TODO(fishx): Remove following code when lazy tensor copy is ready. if (op->Device() == kVariantDeviceNull) { tensorflow::Device* device = nullptr; - string device_name = op->DeviceName(); + std::string device_name = op->DeviceName(); TF_RETURN_IF_ERROR(ctx.FindDeviceFromName(device_name.c_str(), &device)); op->SetDevice(device); } core::RefCountPtr eager_client; - uint64 context_id = ctx.GetContextId(); + uint64_t context_id = ctx.GetContextId(); TF_RETURN_IF_ERROR(ctx.GetClient(op->GetDeviceParsedName(), &eager_client)); - string remote_task; + std::string remote_task; if (!DeviceNameUtils::GetTaskName(op->GetDeviceParsedName(), &remote_task)) { return errors::InvalidArgument( "Unable to find remote task corresponding to device ", @@ -1859,7 +1866,7 @@ absl::Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals, tensorflow::TensorHandle* input = (*inputs)[i]; tensorflow::Device* input_device = input->device(); tensorflow::Device* input_device_or_cpu = input->DeviceOrHostCPU(ctx); - const string* input_device_name = &input_device_or_cpu->name(); + const std::string* input_device_name = &input_device_or_cpu->name(); bool serialize_resource_dtype_and_shape = false; if (op_device != input_device && // If the expected and actual devices are on the same task, don't @@ -1986,7 +1993,7 @@ absl::Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals, } *num_retvals = num_outputs; - const tensorflow::uint64 id = remote_op->id(); + const uint64_t id = remote_op->id(); for (size_t i = 0; i < num_outputs; ++i) { // TODO(nareshmodi): Change the callback to instead add the decref to a // list of pending decrefs that we can send as a batch with the next @@ -2048,7 +2055,7 @@ absl::Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals, {retvals, num_outputs})); if (op->EagerContext().LogDevicePlacement() || VLOG_IS_ON(1)) { - string msg = absl::StrCat( + std::string msg = absl::StrCat( "Executing op ", op->Name(), " on task ", DeviceNameUtils::ParsedNameToString(op->GetDeviceParsedName())); if (!logging::LogToListeners(msg)) { @@ -2362,7 +2369,7 @@ absl::Status EagerCopyToDevice(TensorHandle* h, EagerContext* ctx, return errors::Unimplemented( "Eager's remote execution is not available on mobile devices."); #else // !IS_MOBILE_PLATFORM - uint64 recv_op_id = 0; + uint64_t recv_op_id = 0; if (receiver_is_local) { Device* d = ctx->CanonicalDevice(device); // TODO(gjn): Need to add support for async execution. Note if receiver @@ -2403,7 +2410,7 @@ absl::Status EagerCopyToDevice(TensorHandle* h, EagerContext* ctx, return absl::OkStatus(); } } - string remote_task; + std::string remote_task; if (!DeviceNameUtils::GetTaskName(device->parsed_name(), &remote_task)) { return errors::InvalidArgument( "Unable to find remote task corresponding to device ", @@ -2523,8 +2530,8 @@ void EagerLocalExecuteAsync(EagerOperation* op, TensorHandle** retvals, } if (ctx.LogDevicePlacement() || VLOG_IS_ON(1)) { - string msg = absl::StrCat("Executing op ", op->Name(), " in device ", - kernel->device()->name()); + std::string msg = absl::StrCat("Executing op ", op->Name(), " in device ", + kernel->device()->name()); if (!logging::LogToListeners(msg)) { LOG(INFO) << msg; } diff --git a/tensorflow/core/common_runtime/eager/execute_node.cc b/tensorflow/core/common_runtime/eager/execute_node.cc index 09bebd3e1f7cf2..a8fb4fc308affe 100644 --- a/tensorflow/core/common_runtime/eager/execute_node.cc +++ b/tensorflow/core/common_runtime/eager/execute_node.cc @@ -23,7 +23,7 @@ namespace tensorflow { #if !defined(IS_MOBILE_PLATFORM) bool ExecuteNodeArgs::IsRemote(EagerContext* ctx, Device* input_device, TensorHandle* handle) { - uint64 context_view_id = ctx->GetContextViewId(); + uint64_t context_view_id = ctx->GetContextViewId(); if (handle->Type() == TensorHandle::REMOTE || handle->HasRemoteMirror(input_device, context_view_id)) { if (!has_remote_inputs_) { diff --git a/tensorflow/core/common_runtime/eager/execute_test.cc b/tensorflow/core/common_runtime/eager/execute_test.cc index ea174fd22f76a2..5427851f3d8b3f 100644 --- a/tensorflow/core/common_runtime/eager/execute_test.cc +++ b/tensorflow/core/common_runtime/eager/execute_test.cc @@ -70,7 +70,7 @@ TEST(ExecuteTest, SimpleFunction) { false, &device_mgr, false, nullptr, nullptr); const Tensor kTwo = test::AsScalar(2); - const string function_name = "XTimesTwo"; + const std::string function_name = "XTimesTwo"; const FunctionDef x_times_two = FunctionDefHelper::Define( // Name function_name, @@ -125,7 +125,7 @@ TEST(ExecuteTest, SimpleFunctionInt32BadFullType) { /*run_eager_op_as_function=*/true); const Tensor kTwo = test::AsScalar(2); - const string function_name = "XTimesTwo"; + const std::string function_name = "XTimesTwo"; const FunctionDef x_times_two = FunctionDefHelper::Define( // Name function_name, @@ -188,7 +188,7 @@ TEST(ExecuteTest, CompiledFunction) { false, &device_mgr, false, nullptr, nullptr); const Tensor kTwo = test::AsScalar(2); - const string function_name = "XTimesTwo"; + const std::string function_name = "XTimesTwo"; const FunctionDef x_times_two = FunctionDefHelper::Define( // Name function_name, @@ -245,7 +245,7 @@ TEST(ExecuteTest, NestedCompiledFunction) { false, &device_mgr, false, nullptr, nullptr); const Tensor kTwo = test::AsScalar(2); - const string function_name = "XTimesTwo"; + const std::string function_name = "XTimesTwo"; const FunctionDef x_times_two = FunctionDefHelper::Define( // Name function_name, @@ -266,7 +266,7 @@ TEST(ExecuteTest, NestedCompiledFunction) { }); TF_ASSERT_OK(ctx->AddFunctionDef(x_times_two)); - const string call_function_name = "FunctionCall"; + const std::string call_function_name = "FunctionCall"; const FunctionDef function_call = FunctionDefHelper::Define( // Name call_function_name, @@ -325,7 +325,7 @@ TEST(ExecuteTest, MultipleNestedCompiledFunction) { false, &device_mgr, false, nullptr, nullptr); const Tensor kTwo = test::AsScalar(2); - const string function_name = "XTimesTwo"; + const std::string function_name = "XTimesTwo"; const FunctionDef x_times_two = FunctionDefHelper::Define( // Name function_name, @@ -346,7 +346,7 @@ TEST(ExecuteTest, MultipleNestedCompiledFunction) { }); TF_ASSERT_OK(ctx->AddFunctionDef(x_times_two)); - const string call_function_name = "FunctionCall"; + const std::string call_function_name = "FunctionCall"; FunctionDef function_call = FunctionDefHelper::Define( // Name call_function_name, @@ -379,7 +379,7 @@ TEST(ExecuteTest, MultipleNestedCompiledFunction) { TF_ASSERT_OK(ctx->AddFunctionDef(function_call)); - const string call_function_name2 = "FunctionCall2"; + const std::string call_function_name2 = "FunctionCall2"; const FunctionDef function_call2 = FunctionDefHelper::Define( // Name call_function_name2, diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.h b/tensorflow/core/common_runtime/eager/kernel_and_device.h index 790151d92129a4..ba437b5df5e37d 100644 --- a/tensorflow/core/common_runtime/eager/kernel_and_device.h +++ b/tensorflow/core/common_runtime/eager/kernel_and_device.h @@ -193,7 +193,7 @@ class KernelAndDevice : public core::RefCounted { virtual int num_inputs() const = 0; virtual int num_outputs() const = 0; - virtual const string& name() const = 0; + virtual const std::string& name() const = 0; protected: std::function)>* get_runner() const; @@ -262,7 +262,7 @@ class KernelAndDeviceOp final : public KernelAndDevice { } int num_inputs() const override { return kernel_->num_inputs(); } int num_outputs() const override { return kernel_->num_outputs(); } - const string& name() const override { return kernel_->name(); } + const std::string& name() const override { return kernel_->name(); } private: std::unique_ptr kernel_; @@ -286,19 +286,20 @@ class KernelAndDeviceFunc : public KernelAndDevice { KernelAndDeviceFunc( FunctionLibraryRuntime* flr, ProcessFunctionLibraryRuntime* pflr, std::vector input_devices, - absl::flat_hash_map*> composite_devices, + absl::flat_hash_map*> + composite_devices, std::unordered_map input_resource_dtypes_and_shapes, std::function)>* runner, std::unique_ptr collective_executor, - Device* host_cpu_device, const string& name, + Device* host_cpu_device, const std::string& name, const bool outputs_on_op_device, const bool allow_small_function_optimizations, const bool allow_control_flow_sync_execution, const bool shape_inference_on_tfe_dialect_import, const bool int_args_and_retvals_on_device, const bool function_runs_at_most_once, - std::optional xla_compile_device_type, + std::optional xla_compile_device_type, const bool allow_soft_placement, Rendezvous::Factory rendezvous_factory, std::function get_op_id) : KernelAndDevice(flr, runner, std::move(collective_executor), @@ -366,7 +367,7 @@ class KernelAndDeviceFunc : public KernelAndDevice { } int num_inputs() const override { return input_dtypes_.size(); } int num_outputs() const override { return output_dtypes_.size(); } - const string& name() const override { return name_; }; + const std::string& name() const override { return name_; }; private: std::shared_ptr PrepareForRun( @@ -402,7 +403,7 @@ class KernelAndDeviceFunc : public KernelAndDevice { const bool function_runs_at_most_once_; - const absl::optional xla_compile_device_type_; + const absl::optional xla_compile_device_type_; const bool allow_soft_placement_; @@ -413,13 +414,14 @@ class KernelAndDeviceFunc : public KernelAndDevice { // devices. std::vector input_devices_; // Maps from a CompositeDevice name to a list of physical device names. - absl::flat_hash_map*> composite_devices_; + absl::flat_hash_map*> + composite_devices_; std::unordered_map input_resource_dtypes_and_shapes_; DataTypeVector input_dtypes_; DataTypeVector output_dtypes_; - string name_; + std::string name_; Rendezvous::Factory rendezvous_factory_; std::function get_op_id_; diff --git a/tensorflow/core/common_runtime/eager/placement_utils.cc b/tensorflow/core/common_runtime/eager/placement_utils.cc index e6d547d1e9832b..9b6e0e66a72a64 100644 --- a/tensorflow/core/common_runtime/eager/placement_utils.cc +++ b/tensorflow/core/common_runtime/eager/placement_utils.cc @@ -34,19 +34,20 @@ namespace eager { // generate and then copy the data instead of just generating the data on the // device directly. static bool IsPinnableOp(absl::string_view op_name) { - static const gtl::FlatSet* unpinnable_ops = new gtl::FlatSet({ - "RandomUniform", - "RandomUniformInt", - "RandomStandardNormal", - "StatelessRandomUniform", - "StatelessRandomUniformInt", - "StatelessRandomUniformFullInt", - "StatelessRandomNormal", - }); + static const gtl::FlatSet* unpinnable_ops = + new gtl::FlatSet({ + "RandomUniform", + "RandomUniformInt", + "RandomStandardNormal", + "StatelessRandomUniform", + "StatelessRandomUniformInt", + "StatelessRandomUniformFullInt", + "StatelessRandomNormal", + }); // XRT ops refer to per-device handles that are not safe to move between // devices. - return unpinnable_ops->find(string(op_name)) == unpinnable_ops->end() && + return unpinnable_ops->find(std::string(op_name)) == unpinnable_ops->end() && !absl::StartsWith(op_name, "XRT"); } // Validate if the remote device with the given incarnation is valid in the @@ -64,12 +65,12 @@ static absl::Status ValidateTensorHandleRemoteDevice( bool IsColocationExempt(absl::string_view op_name) { const auto& exempt_ops = InputColocationExemptionRegistry::Global()->Get(); - return exempt_ops.find(string(op_name)) != exempt_ops.end(); + return exempt_ops.find(std::string(op_name)) != exempt_ops.end(); } bool IsFunction(absl::string_view op_name) { const OpDef* op_def = nullptr; - absl::Status s = OpDefForOp(string(op_name), &op_def); + absl::Status s = OpDefForOp(std::string(op_name), &op_def); if (!s.ok()) { if (!absl::IsNotFound(s)) { LOG(WARNING) << "Looking up OpDef failed with error: " << s; diff --git a/tensorflow/core/common_runtime/eager/placement_utils_test.cc b/tensorflow/core/common_runtime/eager/placement_utils_test.cc index c543b9475a072c..aadec6deab8eb8 100644 --- a/tensorflow/core/common_runtime/eager/placement_utils_test.cc +++ b/tensorflow/core/common_runtime/eager/placement_utils_test.cc @@ -94,7 +94,7 @@ struct MaybePinSmallOpsToCpuTestCase { std::string test_name; DataType dtype; TensorShape shape; - string op_name; + std::string op_name; const char* device; bool expect; }; @@ -152,7 +152,7 @@ INSTANTIATE_TEST_SUITE_P( struct MaybePinToResourceDeviceTestCase { std::string test_name; DataType dtype; - string op_name; + std::string op_name; const char* device; bool expect; }; diff --git a/tensorflow/core/common_runtime/eager/tensor_handle.cc b/tensorflow/core/common_runtime/eager/tensor_handle.cc index d4faba6415579f..583a8f15a657f4 100644 --- a/tensorflow/core/common_runtime/eager/tensor_handle.cc +++ b/tensorflow/core/common_runtime/eager/tensor_handle.cc @@ -60,7 +60,7 @@ int64_t GetRemoteDeviceIncarnation(Device* device) { return device->attributes().incarnation(); } -string SafeDeviceDebugString(Device* device) { +std::string SafeDeviceDebugString(Device* device) { if (device == nullptr) { return "[]"; } else { @@ -150,8 +150,8 @@ void TensorHandle::PackedTensorHandleData::Poison(absl::Status status) { is_poisoned_ = status; } -string TensorHandle::PackedTensorHandleData::DebugString() const { - string debug_str = "PackedTensorHandleData: "; +std::string TensorHandle::PackedTensorHandleData::DebugString() const { + std::string debug_str = "PackedTensorHandleData: "; for (const auto* handle : handles_) { debug_str.append( absl::StrCat(std::visit([](auto& data) { return data.DebugString(); }, @@ -308,7 +308,7 @@ TensorHandle::TensorHandle(Device* d, Device* op_device, absl::Status TensorHandle::CreatePackedHandle( std::vector&& handles, const tensorflow::DataType dtype, - const tensorflow::TensorShape& shape, const string& device_name, + const tensorflow::TensorShape& shape, const std::string& device_name, EagerContext* ctx, TensorHandle** packed_handle) { if (handles.empty()) { return errors::InvalidArgument("Handles should not be empty."); @@ -319,7 +319,7 @@ absl::Status TensorHandle::CreatePackedHandle( TF_RETURN_IF_ERROR( handles.at(0)->GetResourceHandleDtypesAndShapes(&dtypes_and_shapes)); } - std::vector devices; + std::vector devices; devices.reserve(handles.size()); for (auto* handle : handles) { devices.push_back(handle->op_device() ? handle->op_device()->name() @@ -372,7 +372,7 @@ TensorHandle::TensorHandle(std::vector&& handles, Device* device, #if !defined(IS_MOBILE_PLATFORM) TensorHandle* TensorHandle::CreateUnshapedRemoteHandle( - int64_t op_id, int32_t output_num, const string& remote_task, + int64_t op_id, int32_t output_num, const std::string& remote_task, tensorflow::DataType dtype, Device* d, EagerContext* ctx, const bool unknown_device) { return new TensorHandle(op_id, output_num, remote_task, dtype, d, ctx, @@ -380,7 +380,7 @@ TensorHandle* TensorHandle::CreateUnshapedRemoteHandle( } TensorHandle::TensorHandle(int64_t op_id, int32_t output_num, - const string& remote_task, + const std::string& remote_task, tensorflow::DataType dtype, Device* d, EagerContext* ctx, const bool unknown_device) : ImmediateExecutionTensorHandle(kEager), @@ -450,7 +450,7 @@ TensorHandle::HandleType TensorHandle::Type() const { } } -string TensorHandle::TypeString() const { +std::string TensorHandle::TypeString() const { if (data_.index() == 0) { return "LOCAL"; } else if (data_.index() == 1) { @@ -713,7 +713,7 @@ absl::Status TensorHandle::AddEmptyLocalMirror(const Device* d) { absl::Status TensorHandle::RemoteAddress(const Device* d, const bool wait_until_ready, int64_t* op_id, - int32* output_num) const { + int32_t* output_num) const { DVLOG(3) << "RemoteAddress on TensorHandle: " << this << " device: " << d << " " << d->name(); @@ -759,7 +759,7 @@ absl::Status TensorHandle::RemoteAddress(const Device* d, } bool TensorHandle::HasRemoteMirror(const Device* d, - uint64 context_view_id) const { + uint64_t context_view_id) const { DVLOG(3) << "HasRemoteMirror on TensorHandle: " << this << " device: " << d << " " << d->name(); @@ -777,7 +777,7 @@ bool TensorHandle::HasRemoteMirror(const Device* d, } bool TensorHandle::HasResourceShapeMirror(const Device* d, - uint64 context_view_id) const { + uint64_t context_view_id) const { DVLOG(3) << "HasResourceShapeMirror on TensorHandle: " << this << " device: " << d << " " << d->name(); @@ -793,11 +793,9 @@ bool TensorHandle::HasResourceShapeMirror(const Device* d, return false; } -absl::Status TensorHandle::AddUnshapedRemoteMirror(const Device* d, - int64_t op_id, - int output_num, - const string& remote_task, - EagerContext* ctx) { +absl::Status TensorHandle::AddUnshapedRemoteMirror( + const Device* d, int64_t op_id, int output_num, + const std::string& remote_task, EagerContext* ctx) { DVLOG(3) << "AddUnshapedRemoteMirror on TensorHandle: " << this << " device: " << d << " " << d->name() << " op_id: " << op_id << " output_num: " << output_num; @@ -856,14 +854,14 @@ absl::Status TensorHandle::AddResourceShapeMirror(const Device* d, absl::Status TensorHandle::SetRemoteShape(const TensorShape& shape, const Device* d, - uint64 context_view_id) { + uint64_t context_view_id) { return SetRemoteShapeAndDevice(shape, d, context_view_id, /*op_device=*/""); } absl::Status TensorHandle::SetRemoteShapeAndDevice(const TensorShape& shape, const Device* d, - uint64 context_view_id, - string op_device) { + uint64_t context_view_id, + std::string op_device) { DVLOG(3) << "SetRemoteShape on TensorHandle: " << this << " device: " << d << " " << d->name(); @@ -930,7 +928,7 @@ absl::Status TensorHandle::SetRemoteShapeAndDevice(const TensorShape& shape, resource_device_ = dtype == DT_RESOURCE ? device : nullptr; resource_remote_device_incarnation_ = GetRemoteDeviceIncarnation(resource_device_); - string remote_task; + std::string remote_task; if (!DeviceNameUtils::GetTaskName(device->parsed_name(), &remote_task)) { return errors::InvalidArgument( "Unable to find remote task corresponding to device ", @@ -948,7 +946,7 @@ absl::Status TensorHandle::SetRemoteShapeAndDevice(const TensorShape& shape, } void TensorHandle::PoisonRemote(absl::Status status, const Device* d, - uint64 context_view_id) { + uint64_t context_view_id) { DVLOG(3) << "PoisonRemote on TensorHandle: " << this << " device: " << d << " " << d->name(); diff --git a/tensorflow/core/common_runtime/eager/tensor_handle.h b/tensorflow/core/common_runtime/eager/tensor_handle.h index ca60815d76ec9e..e2fdb872c317a2 100644 --- a/tensorflow/core/common_runtime/eager/tensor_handle.h +++ b/tensorflow/core/common_runtime/eager/tensor_handle.h @@ -66,9 +66,9 @@ class TensorHandle : public ImmediateExecutionTensorHandle { tensorflow::DataType dtype, EagerContext* ctx); #if !defined(IS_MOBILE_PLATFORM) - TensorHandle(int64_t op_id, int32_t output_num, const string& remote_task, - tensorflow::DataType dtype, Device* device, EagerContext* ctx, - bool unknown_device); + TensorHandle(int64_t op_id, int32_t output_num, + const std::string& remote_task, tensorflow::DataType dtype, + Device* device, EagerContext* ctx, bool unknown_device); TensorHandle(int64_t op_id, int32_t output_num, tensorflow::DataType dtype, Device* device, bool is_ready, EagerContext* ctx); #endif // IS_MOBILE_PLATFORM @@ -97,7 +97,7 @@ class TensorHandle : public ImmediateExecutionTensorHandle { static absl::Status CreatePackedHandle(std::vector&& handles, tensorflow::DataType dtype, const tensorflow::TensorShape& shape, - const string& device_name, + const std::string& device_name, EagerContext* ctx, TensorHandle** packed_handle); static absl::Status CreatePackedHandle(std::vector&& handles, @@ -108,12 +108,10 @@ class TensorHandle : public ImmediateExecutionTensorHandle { // An unshaped remote handle refers to a tensor on a remote worker. It's not // ready until the shape is set. It controls the lifetime of the remote // tensor. - static TensorHandle* CreateUnshapedRemoteHandle(int64_t op_id, - int32_t output_num, - const string& remote_task, - tensorflow::DataType dtype, - Device* d, EagerContext* ctx, - bool unknown_device = false); + static TensorHandle* CreateUnshapedRemoteHandle( + int64_t op_id, int32_t output_num, const std::string& remote_task, + tensorflow::DataType dtype, Device* d, EagerContext* ctx, + bool unknown_device = false); // A lazy remote handle refers to a tensor on a remote worker. The lifetime of // the remote tensor is controlled by the remote worker, but not by the lazy // remote handle. Lazy handles are normally created on a default function @@ -189,12 +187,12 @@ class TensorHandle : public ImmediateExecutionTensorHandle { absl::Status AddLocalMirror(tensorflow::Tensor&& tensor, const Device* d); #if !defined(IS_MOBILE_PLATFORM) - bool HasRemoteMirror(const Device* d, uint64 context_view_id) const; - bool HasResourceShapeMirror(const Device* d, uint64 context_view_id) const; + bool HasRemoteMirror(const Device* d, uint64_t context_view_id) const; + bool HasResourceShapeMirror(const Device* d, uint64_t context_view_id) const; absl::Status AddUnshapedRemoteMirror(const Device* d, int64_t op_id, int output_num, - const string& remote_task, + const std::string& remote_task, EagerContext* ctx); absl::Status AddResourceShapeMirror(const Device* d, int64_t op_id, int output_num, EagerContext* ctx); @@ -203,7 +201,7 @@ class TensorHandle : public ImmediateExecutionTensorHandle { // If wait_until_ready is true, block until the remote tensor is ready on the // given remote worker. absl::Status RemoteAddress(const Device* d, bool wait_until_ready, - int64_t* op_id, int32* output_num) const; + int64_t* op_id, int32_t* output_num) const; // Called on an async remote tensor once it's shape has been determined. This // transitions the tensor handle from a non-ready to a ready state by @@ -213,12 +211,13 @@ class TensorHandle : public ImmediateExecutionTensorHandle { // This method or Poison must be called exactly once for remote tensors that // were created without a known shape. absl::Status SetRemoteShape(const TensorShape& shape, const Device* d, - uint64 context_view_id); + uint64_t context_view_id); // If op_device is not empty, reset the devices of a remote tensor which is // created without known devices (e.g. function outputs). absl::Status SetRemoteShapeAndDevice(const TensorShape& shape, - const Device* d, uint64 context_view_id, - string op_device); + const Device* d, + uint64_t context_view_id, + std::string op_device); // Poisons either this handle or a remote mirror with error `status`. // Poisoning means that the handle will become ready and methods trying @@ -226,7 +225,7 @@ class TensorHandle : public ImmediateExecutionTensorHandle { // Exactly one of SetRemoteShape or PoisonRemote methods must be called on a // unshaped handle on a remote device. void PoisonRemote(absl::Status status, const Device* d, - uint64 context_view_id); + uint64_t context_view_id); #endif // Sets the `tensor` for this async non-ready handle making it ready. @@ -260,7 +259,7 @@ class TensorHandle : public ImmediateExecutionTensorHandle { enum HandleType { LOCAL = 0, PACKED = 1, REMOTE = 2 }; HandleType Type() const; - string TypeString() const; + std::string TypeString() const; void SetResourceHandleDtypeAndShape( std::vector dtypes_and_shapes); @@ -330,9 +329,9 @@ class TensorHandle : public ImmediateExecutionTensorHandle { // TODO(yujingzhang): Remove resource_shape_mirrors_ once scalable per-replica // variable is ready, since we could get the shape locally without remote copy // then. - std::unordered_map resource_shape_mirrors_ - TF_GUARDED_BY(mu_); - std::unordered_map remote_mirrors_ + std::unordered_map + resource_shape_mirrors_ TF_GUARDED_BY(mu_); + std::unordered_map remote_mirrors_ TF_GUARDED_BY(mu_); #endif @@ -371,7 +370,7 @@ class TensorHandle : public ImmediateExecutionTensorHandle { bool IsReady() const; absl::Status WaitReady(const char* caller) const; void Poison(absl::Status status); - string DebugString() const; + std::string DebugString() const; // Number of packed handles. int NumPackedHandles() const; diff --git a/tensorflow/core/common_runtime/eager/tensor_handle_data.cc b/tensorflow/core/common_runtime/eager/tensor_handle_data.cc index 2212b19db9c683..b0a089874dd744 100644 --- a/tensorflow/core/common_runtime/eager/tensor_handle_data.cc +++ b/tensorflow/core/common_runtime/eager/tensor_handle_data.cc @@ -96,7 +96,7 @@ absl::Status LocalTensorHandleData::SetTensor(tensorflow::Tensor&& t) { return absl::OkStatus(); } -string LocalTensorHandleData::DebugString() const { +std::string LocalTensorHandleData::DebugString() const { if (IsReady()) { return tensor_.DeviceSafeDebugString(); } else { diff --git a/tensorflow/core/common_runtime/eager/tensor_handle_data.h b/tensorflow/core/common_runtime/eager/tensor_handle_data.h index ed58e83a183bfe..73a20425871156 100644 --- a/tensorflow/core/common_runtime/eager/tensor_handle_data.h +++ b/tensorflow/core/common_runtime/eager/tensor_handle_data.h @@ -60,7 +60,7 @@ class LocalTensorHandleData { absl::Status SetTensor(tensorflow::Tensor&& t); - string DebugString() const; + std::string DebugString() const; private: tensorflow::Tensor tensor_; diff --git a/tensorflow/core/common_runtime/eager/tensor_handle_test.cc b/tensorflow/core/common_runtime/eager/tensor_handle_test.cc index 209222d33f1185..0bd94f635f0f00 100644 --- a/tensorflow/core/common_runtime/eager/tensor_handle_test.cc +++ b/tensorflow/core/common_runtime/eager/tensor_handle_test.cc @@ -44,7 +44,7 @@ TEST(TensorHandle_ShapeTest, AsyncShape) { EXPECT_TRUE(t.shape().IsSameSize(TensorShape({2, 2}))); for (int64_t a = 0; a < t.shape().dim_size(0); a++) { for (int64_t b = 0; b < t.shape().dim_size(1); b++) { - t.matrix()(a, b) = uint16(a * b); + t.matrix()(a, b) = uint16_t(a * b); } } @@ -181,7 +181,7 @@ TEST_F(PackedTensorHandleTest, PackedHandle) { handles.push_back(h1); // Create 2 remote TensorHandles (not ready). - const string remote_task = "/job:worker/replica:0/task:1"; + const std::string remote_task = "/job:worker/replica:0/task:1"; Device* d2 = ListGPUDevices().at(2); TensorHandle* h2 = TensorHandle::CreateUnshapedRemoteHandle( /*op_id=*/0, /*output_num=*/0, remote_task, dtype, d2, context()); @@ -439,7 +439,7 @@ TEST_F(RemoteTensorHandleTest, UnknownRemoteDevice) { tensorflow::DataType dtype = DT_FLOAT; TensorShape shape = {}; - const string remote_task = "/job:worker/replica:0/task:1"; + const std::string remote_task = "/job:worker/replica:0/task:1"; Device* d1 = device_mgr.ListDevices().at(1); TensorHandle* h = TensorHandle::CreateUnshapedRemoteHandle( /*op_id=*/0, /*output_num=*/0, remote_task, dtype, d1, context, @@ -478,7 +478,7 @@ TEST_F(RemoteTensorHandleTest, PoisonRemote) { tensorflow::DataType dtype = DT_FLOAT; TensorShape shape = {}; - const string remote_task = "/job:worker/replica:0/task:1"; + const std::string remote_task = "/job:worker/replica:0/task:1"; Device* d1 = device_mgr.ListDevices().at(1); TensorHandle* h = TensorHandle::CreateUnshapedRemoteHandle( /*op_id=*/0, /*output_num=*/0, remote_task, dtype, d1, context, @@ -519,7 +519,7 @@ TEST_F(RemoteTensorHandleTest, PoisonRemoteMirror) { tensorflow::DataType dtype = DT_FLOAT; TensorShape shape = {}; - const string remote_task = "/job:worker/replica:0/task:1"; + const std::string remote_task = "/job:worker/replica:0/task:1"; Device* d1 = device_mgr.ListDevices().at(1); TensorHandle* h = TensorHandle::CreateUnshapedRemoteHandle( /*op_id=*/0, /*output_num=*/0, remote_task, dtype, d1, context, @@ -565,7 +565,7 @@ TEST_F(RemoteTensorHandleTest, SetRemoteTensorHandleShapeTwice) { tensorflow::DataType dtype = DT_FLOAT; TensorShape shape = {}; - const string remote_task = "/job:worker/replica:0/task:1"; + const std::string remote_task = "/job:worker/replica:0/task:1"; Device* d1 = device_mgr.ListDevices().at(1); TensorHandle* h = TensorHandle::CreateUnshapedRemoteHandle( /*op_id=*/0, /*output_num=*/0, remote_task, dtype, d1, context, @@ -623,7 +623,7 @@ TEST_F(RemoteTensorHandleTest, SetRemoteMirrorShapeTwice) { tensorflow::DataType dtype = DT_FLOAT; TensorShape shape = {}; - const string remote_task = "/job:worker/replica:0/task:1"; + const std::string remote_task = "/job:worker/replica:0/task:1"; Device* d1 = device_mgr.ListDevices().at(1); TensorHandle* h = TensorHandle::CreateUnshapedRemoteHandle( /*op_id=*/0, /*output_num=*/0, remote_task, dtype, d1, context, diff --git a/tensorflow/core/common_runtime/gpu/BUILD b/tensorflow/core/common_runtime/gpu/BUILD index c7a06f7e5c600b..87b1101bbab536 100644 --- a/tensorflow/core/common_runtime/gpu/BUILD +++ b/tensorflow/core/common_runtime/gpu/BUILD @@ -205,6 +205,7 @@ tf_cuda_library( "@com_google_absl//absl/strings", "@com_google_absl//absl/synchronization", "@local_xla//xla:shape_util", + "@local_xla//xla/pjrt:host_memory_allocator", "@local_xla//xla/stream_executor/gpu:gpu_init_impl", "@local_xla//xla/stream_executor/integrations:stream_executor_allocator", "@local_xla//xla/tsl/framework:device_id_utils", diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc index 8fd3dc450c98a6..80ba5156327af4 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc @@ -434,7 +434,7 @@ class GPUBFCAllocatorPrivateMethodsTest std::array bin_infos; { - absl::MutexLock l(&a.mutex_); + absl::MutexLock l(a.mutex_); bin_infos = a.get_bin_debug_info(); } @@ -486,7 +486,7 @@ class GPUBFCAllocatorPrivateMethodsTest initial_ptrs[i] = nullptr; } { - absl::MutexLock l(&a.mutex_); + absl::MutexLock l(a.mutex_); bin_infos = a.get_bin_debug_info(); } for (int i = 0; i < BFCAllocator::kNumBins; i++) { @@ -610,7 +610,7 @@ class GPUBFCAllocatorPrivateMethodsTest_SubAllocatorSpecific } { - absl::MutexLock l(&a.mutex_); + absl::MutexLock l(a.mutex_); // Make sure there are more than 1 regions in preparation for the test. EXPECT_LT(1, a.region_manager_.regions().size()); } @@ -623,7 +623,7 @@ class GPUBFCAllocatorPrivateMethodsTest_SubAllocatorSpecific // Deallocate free regions and there shall be only one region left. EXPECT_EQ(true, a.DeallocateFreeRegions(/*rounded_bytes=*/0)); { - absl::MutexLock l(&a.mutex_); + absl::MutexLock l(a.mutex_); EXPECT_EQ(1, a.region_manager_.regions().size()); } diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc index 0a2d98e840a849..1bebcc31d45c76 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc @@ -44,7 +44,8 @@ int64_t* before_mask = NewMask(0xabababababababab); int64_t* after_mask = NewMask(0xcdcdcdcdcdcdcdcd); bool CheckMask(se::StreamExecutor* exec, void* ptr, int64_t* mask) { - se::DeviceMemory gpu_ptr{se::DeviceMemoryBase{ptr, MASK_BYTES}}; + stream_executor::DeviceAddress gpu_ptr{ + stream_executor::DeviceAddressBase{ptr, MASK_BYTES}}; int64_t tmp[MASK_WORDS]; absl::Status result = exec->SynchronousMemcpyD2H(gpu_ptr, MASK_BYTES, tmp); @@ -66,7 +67,8 @@ bool CheckMask(se::StreamExecutor* exec, void* ptr, int64_t* mask) { } void InitMask(se::StreamExecutor* exec, void* ptr, int64_t* mask) { - se::DeviceMemory gpu_ptr{se::DeviceMemoryBase{ptr, MASK_BYTES}}; + stream_executor::DeviceAddress gpu_ptr{ + stream_executor::DeviceAddressBase{ptr, MASK_BYTES}}; absl::Status result = exec->SynchronousMemcpyH2D(mask, MASK_BYTES, &gpu_ptr); if (!result.ok()) { LOG(FATAL) << "Could not copy debug mask, " << result; @@ -175,8 +177,9 @@ void* GPUNanResetAllocator::AllocateRaw(size_t alignment, size_t num_bytes) { size_t req_size = base_allocator_->RequestedSize(allocated_ptr); std::vector nans((req_size + sizeof(float) - 1) / sizeof(float), std::nanf("")); - se::DeviceMemory nan_ptr{ - se::DeviceMemoryBase{static_cast(allocated_ptr), req_size}}; + stream_executor::DeviceAddress nan_ptr{ + stream_executor::DeviceAddressBase{static_cast(allocated_ptr), + req_size}}; absl::Status result = stream_exec_->SynchronousMemcpyH2D(&nans[0], req_size, &nan_ptr); @@ -192,8 +195,8 @@ void GPUNanResetAllocator::DeallocateRaw(void* ptr) { size_t req_size = base_allocator_->RequestedSize(ptr); std::vector nans((req_size + sizeof(float) - 1) / sizeof(float), std::nanf("")); - se::DeviceMemory nan_ptr{ - se::DeviceMemoryBase{static_cast(ptr), req_size}}; + stream_executor::DeviceAddress nan_ptr{ + stream_executor::DeviceAddressBase{static_cast(ptr), req_size}}; absl::Status result = stream_exec_->SynchronousMemcpyH2D(&nans[0], req_size, &nan_ptr); if (!result.ok()) { diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc index 573e42fea61860..1d252f549d3803 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc @@ -57,7 +57,8 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_None) { memset(&cpu_array[0], 0, cpu_array.size() * sizeof(int64_t)); int64_t* gpu_array = TypedAllocator::Allocate(&a, cpu_array.size(), {}); - se::DeviceMemory gpu_array_ptr{se::DeviceMemoryBase{gpu_array}}; + stream_executor::DeviceAddress gpu_array_ptr{ + stream_executor::DeviceAddressBase{gpu_array}}; TF_CHECK_OK(stream_exec->SynchronousMemcpyH2D( &cpu_array[0], s * sizeof(int64_t), &gpu_array_ptr)); EXPECT_TRUE(a.CheckHeader(gpu_array)); @@ -85,14 +86,14 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Header) { int64_t* gpu_array = TypedAllocator::Allocate(&a, cpu_array.size(), {}); - se::DeviceMemory gpu_array_ptr{ - se::DeviceMemoryBase{gpu_array}}; + stream_executor::DeviceAddress gpu_array_ptr{ + stream_executor::DeviceAddressBase{gpu_array}}; TF_CHECK_OK(stream_exec->SynchronousMemcpyH2D( &cpu_array[0], cpu_array.size() * sizeof(int64_t), &gpu_array_ptr)); - se::DeviceMemory gpu_hdr_ptr{ - se::DeviceMemoryBase{gpu_array - 1}}; + stream_executor::DeviceAddress gpu_hdr_ptr{ + stream_executor::DeviceAddressBase{gpu_array - 1}}; // Clobber first word of the header. float pi = 3.1417; TF_CHECK_OK(stream_exec->SynchronousMemcpyH2D(&pi, sizeof(float), @@ -122,15 +123,15 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) { int64_t* gpu_array = TypedAllocator::Allocate(&a, cpu_array.size(), {}); - se::DeviceMemory gpu_array_ptr{ - se::DeviceMemoryBase{gpu_array}}; + stream_executor::DeviceAddress gpu_array_ptr{ + stream_executor::DeviceAddressBase{gpu_array}}; TF_CHECK_OK(stream_exec->SynchronousMemcpyH2D( &cpu_array[0], cpu_array.size() * sizeof(int64_t), &gpu_array_ptr)); // Clobber word of the footer. - se::DeviceMemory gpu_ftr_ptr{ - se::DeviceMemoryBase{gpu_array + s}}; + stream_executor::DeviceAddress gpu_ftr_ptr{ + stream_executor::DeviceAddressBase{gpu_array + s}}; float pi = 3.1417; TF_CHECK_OK(stream_exec->SynchronousMemcpyH2D(&pi, sizeof(float), &gpu_ftr_ptr)); @@ -156,7 +157,8 @@ TEST(GPUDebugAllocatorTest, ResetToNan) { // Allocate 1024 floats float* gpu_array = TypedAllocator::Allocate(&a, cpu_array.size(), {}); - se::DeviceMemory gpu_array_ptr{se::DeviceMemoryBase{gpu_array}}; + stream_executor::DeviceAddress gpu_array_ptr{ + stream_executor::DeviceAddressBase{gpu_array}}; TF_CHECK_OK(stream_exec->SynchronousMemcpyD2H( gpu_array_ptr, cpu_array.size() * sizeof(float), &cpu_array[0])); for (float f : cpu_array) { @@ -200,7 +202,8 @@ TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) { // Allocate 1024 floats float* gpu_array = TypedAllocator::Allocate(&a, cpu_array.size(), {}); - se::DeviceMemory gpu_array_ptr{se::DeviceMemoryBase{gpu_array}}; + stream_executor::DeviceAddress gpu_array_ptr{ + stream_executor::DeviceAddressBase{gpu_array}}; TF_CHECK_OK(stream_exec->SynchronousMemcpyD2H( gpu_array_ptr, cpu_array.size() * sizeof(float), &cpu_array[0])); for (float f : cpu_array) { diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc index 22eecde5ba7d8a..f40fd04472700c 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc @@ -15,6 +15,7 @@ limitations under the License. // TODO(opensource): Use a more generic sounding preprocessor name than // GOOGLE_CUDA +#include "xla/pjrt/host_memory_allocator.h" #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) @@ -1880,8 +1881,10 @@ Status BaseGPUDeviceFactory::CreateDevices( // TODO(chuanhao): Use the correct NUMA_NODE. const int64_t numa_node = 0; - std::unique_ptr pjrt_gpu_host_allocator( - process_state->GetGpuHostAllocator(/*options=*/{}, numa_node)); + auto pjrt_gpu_host_allocator = + std::make_unique( + std::unique_ptr( + process_state->GetGpuHostAllocator(/*options=*/{}, numa_node))); if (populate_pjrt_gpu_client_creation_info && !should_create_new_pjrt_client) { diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h index d09cdc2fb2c0f4..441715bd2d22cb 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.h +++ b/tensorflow/core/common_runtime/gpu/gpu_device.h @@ -105,28 +105,28 @@ class BaseGPUDevice : public LocalDevice { #endif se::Stream* host_to_device = nullptr; se::Stream* device_to_host = nullptr; - gtl::InlinedVector device_to_device; + absl::InlinedVector device_to_device; int priority = 0; }; // Initialize the device and return the status of initialization. #ifdef TF_GPU_USE_PJRT - Status Init(const SessionOptions& options, - xla::LocalDeviceState* xla_local_device_state); + absl::Status Init(const SessionOptions& options, + xla::LocalDeviceState* xla_local_device_state); #else - Status Init(const SessionOptions& options); + absl::Status Init(const SessionOptions& options); #endif // TF_GPU_USE_PJRT void Compute(OpKernel* op_kernel, OpKernelContext* context) override; - Status Sync() override; + absl::Status Sync() override; void ComputeAsync(AsyncOpKernel* op_kernel, OpKernelContext* context, AsyncOpKernel::DoneCallback done) override; - Status MakeTensorFromProto(const TensorProto& tensor_proto, - AllocatorAttributes alloc_attrs, - Tensor* tensor) override; + absl::Status MakeTensorFromProto(const TensorProto& tensor_proto, + AllocatorAttributes alloc_attrs, + Tensor* tensor) override; void CopyTensorInSameDevice(const Tensor* input_tensor, Tensor* output_tensor, const DeviceContext* device_context, @@ -135,9 +135,9 @@ class BaseGPUDevice : public LocalDevice { // The caller owns the returned device. PerOpGpuDevice* MakeGpuDevice() override; - Status ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device, - DeviceContext* dc, - Allocator* allocator) override; + absl::Status ReinitializeGpuDevice(OpKernelContext* context, + PerOpGpuDevice* device, DeviceContext* dc, + Allocator* allocator) override; // Returns the platform GPU id of this device within the native driver system; // e.g., for CUDA and ROCm this is the ordinal of the GPU within the system. @@ -164,7 +164,7 @@ class BaseGPUDevice : public LocalDevice { // If returned value is > 0 then GPU Memory chunks freed before this count // are guaranteed not to be in use by any kernel pending on this device. - uint64 SafeAllocFrontier(uint64 old_value) override; + uint64_t SafeAllocFrontier(uint64_t old_value) override; // Returns the number of kernels that have been queued for execution on // the compute stream and are not yet known to have completed. @@ -216,13 +216,13 @@ class BaseGPUDevice : public LocalDevice { EventMgr* em_ = nullptr; std::unique_ptr thread_pool_; std::unique_ptr kernel_tracker_; - int32 pending_cap_ = 0; + int32_t pending_cap_ = 0; bool timestamped_allocator_ = false; NodeFileWriter* node_file_writer_ = nullptr; // not owned const GPUOptions::Experimental::StreamMergeOptions stream_merge_options_; // Initialize scratch buffers used by Eigen. - Status InitScratchBuffers(); + absl::Status InitScratchBuffers(); void ReinitializeDevice(OpKernelContext* context, PerOpGpuDevice* device, int stream_id, Allocator* allocator); @@ -235,9 +235,9 @@ class BaseGPUDevice : public LocalDevice { // allocate memory or if the tensor "from" is not DMA-copyable. // If there is no error prior to enqueueing the copy, an OK status // is returned. - Status MaybeCopyTensorToGPU(const AllocatorAttributes& alloc_attrs, - const Tensor& from, Tensor* to, - StatusCallback done); + absl::Status MaybeCopyTensorToGPU(const AllocatorAttributes& alloc_attrs, + const Tensor& from, Tensor* to, + StatusCallback done); Tensor CopyGpuTensorToHostDebugOnly(const Tensor& gpu_tensor); void LogInputs(OpKernel* op_kernel, OpKernelContext* context); @@ -293,25 +293,25 @@ class GPUKernelTracker { // Determine whether a GPU kernel should have a recording event queued // immediately afterwards. If so, advance the counter and return the new // counter value after enqueuing. - uint64 MaybeQueue(OpKernelContext* ctx); + uint64_t MaybeQueue(OpKernelContext* ctx); // Record that a GPU kernel has just been enqueued on the compute stream. // Inserts the supplied counter value in a new PendingKernel record appended // to the end of the ring buffer then returns that same count. // Caller is responsible for ensuring that RecordTerminate() is eventually // called with the same counter value. - void RecordQueued(uint64 queued_count, int weight) + void RecordQueued(uint64_t queued_count, int weight) TF_EXCLUSIVE_LOCKS_REQUIRED(mu_); // Takes a count value returned by RecordQueued and finds the corresponding // PendingKernel record in the ring buffer. Marks the kernel as completed and // advances the completion frontier accordingly. - void RecordTerminated(uint64 queued_count); + void RecordTerminated(uint64_t queued_count); // Returns the largest timing count such that all kernels queued no // later than that count are known to have terminated. - inline uint64 LastTerminatedCount(uint64 old_value) { - uint64 new_value = last_terminated_count_.load(std::memory_order_relaxed); + inline uint64_t LastTerminatedCount(uint64_t old_value) { + uint64_t new_value = last_terminated_count_.load(std::memory_order_relaxed); if (new_value == old_value) { MaybeQueueProgressEvent(); } @@ -344,22 +344,22 @@ class GPUKernelTracker { std::unique_ptr owned_counter_; Allocator* allocator_ = nullptr; EventMgr* em_ = nullptr; - std::atomic last_terminated_count_ = {1}; + std::atomic last_terminated_count_ = {1}; void MaybeQueueProgressEvent(); // Records when a kernel was queued for execution. Kernel launches are // identified by a unique count value from a per-GPU device timing counter. struct PendingKernel { - uint64 queued_count; + uint64_t queued_count; int weight; bool terminated; PendingKernel(const PendingKernel& pk) = default; PendingKernel() : queued_count(0), weight(0), terminated(false) {} }; mutex mu_; - int32 mem_since_last_ TF_GUARDED_BY(mu_); - int32 ops_since_last_ TF_GUARDED_BY(mu_); + int32_t mem_since_last_ TF_GUARDED_BY(mu_); + int32_t ops_since_last_ TF_GUARDED_BY(mu_); // Ring buffer of PendingKernel records. std::vector pending_kernels_ TF_GUARDED_BY(mu_); // Next unused slot in pending_kernels_. @@ -376,12 +376,13 @@ class GPUKernelTracker { class BaseGPUDeviceFactory : public DeviceFactory { public: - Status ListPhysicalDevices(std::vector* devices) override; - Status CreateDevices(const SessionOptions& options, - const std::string& name_prefix, - std::vector>* devices) override; - Status GetDeviceDetails(int device_index, - std::unordered_map* details) override; + absl::Status ListPhysicalDevices(std::vector* devices) override; + absl::Status CreateDevices( + const SessionOptions& options, const std::string& name_prefix, + std::vector>* devices) override; + absl::Status GetDeviceDetails( + int device_index, + std::unordered_map* details) override; struct InterconnectMap { // Name of interconnect technology, if known. @@ -390,7 +391,7 @@ class BaseGPUDeviceFactory : public DeviceFactory { // Where architecture-specific subclassing is not done that won't // always be possible. The minimum expectation is that // faster links should have a higher value than slower links. - int32 strength; + int32_t strength; static const int kSameDeviceStrength; static const int kStreamExecutorStrength; std::set> @@ -400,7 +401,7 @@ class BaseGPUDeviceFactory : public DeviceFactory { protected: // Populates *maps with interconnect maps for all local direct access // pathways between GPUs. - virtual Status GetInterconnectMaps( + virtual absl::Status GetInterconnectMaps( const std::vector& visible_gpu_order, se::Platform* gpu_manager, std::vector* maps); @@ -413,7 +414,7 @@ class BaseGPUDeviceFactory : public DeviceFactory { LocalityMap; // Populates *localities with the DeviceLocality descriptor for // every TfDeviceId. - virtual Status GetDeviceLocalities( + virtual absl::Status GetDeviceLocalities( int num_tf_gpus, const std::vector& interconnects, LocalityMap* localities); @@ -422,29 +423,29 @@ class BaseGPUDeviceFactory : public DeviceFactory { // 'devices' vector. The 'gpu_allocator' is created by the caller and usually // preallocates a set amount of GPU memory. #ifdef TF_GPU_USE_PJRT - Status CreateGPUDevice(const SessionOptions& options, - const std::string& name_prefix, - tsl::TfDeviceId tf_device_id, - const DeviceLocality& dev_locality, - xla::LocalDeviceState* xla_local_device_state, - Allocator* gpu_allocator, - std::vector>* devices); + absl::Status CreateGPUDevice(const SessionOptions& options, + const std::string& name_prefix, + tsl::TfDeviceId tf_device_id, + const DeviceLocality& dev_locality, + xla::LocalDeviceState* xla_local_device_state, + Allocator* gpu_allocator, + std::vector>* devices); #else - Status CreateGPUDevice(const SessionOptions& options, - const std::string& name_prefix, - tsl::TfDeviceId tf_device_id, - const DeviceLocality& dev_locality, - Allocator* gpu_allocator, - std::vector>* devices); + absl::Status CreateGPUDevice(const SessionOptions& options, + const std::string& name_prefix, + tsl::TfDeviceId tf_device_id, + const DeviceLocality& dev_locality, + Allocator* gpu_allocator, + std::vector>* devices); #endif // TF_GPU_USE_PJRT virtual std::unique_ptr CreateGPUDevice( - const SessionOptions& options, const string& name, Bytes memory_limit, - const DeviceLocality& dev_locality, tsl::TfDeviceId tf_device_id, - const string& physical_device_desc, Allocator* gpu_allocator, - Allocator* cpu_allocator) = 0; + const SessionOptions& options, const std::string& name, + Bytes memory_limit, const DeviceLocality& dev_locality, + tsl::TfDeviceId tf_device_id, const std::string& physical_device_desc, + Allocator* gpu_allocator, Allocator* cpu_allocator) = 0; - Status EnablePeerAccess( + absl::Status EnablePeerAccess( const std::vector& visible_gpu_order); // Returns into 'ids' the list of valid platform GPU ids, in the order that @@ -452,7 +453,7 @@ class BaseGPUDeviceFactory : public DeviceFactory { // based upon 'visible_gpu_order' which was generated by parsing // GPUOptions::visible_device_list which is a comma-separated list of CUDA or // ROCm GPU ids. - Status GetValidDeviceIds( + absl::Status GetValidDeviceIds( const std::vector& visible_gpu_order, std::vector* ids); @@ -460,7 +461,7 @@ class BaseGPUDeviceFactory : public DeviceFactory { // field cached_device_ids_. Passes {0, 1, ..., num_devices-1} to // GetValidDeviceIds, so this should only be used in functions where all // devices should be treated as visible, like ListPhysicalDevices. - Status CacheDeviceIds(); + absl::Status CacheDeviceIds(); // visible_gpu_initialized_[platform_device_id] is true if visible GPU // platform_device_id has been initialized by the process. diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc b/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc index d5b9c127351a36..2848cf5d16d91d 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc @@ -28,10 +28,11 @@ namespace tensorflow { class GPUDevice : public BaseGPUDevice { public: - GPUDevice(const SessionOptions& options, const string& name, + GPUDevice(const SessionOptions& options, const std::string& name, Bytes memory_limit, const DeviceLocality& locality, - tsl::TfDeviceId tf_device_id, const string& physical_device_desc, - Allocator* gpu_allocator, Allocator* cpu_allocator) + tsl::TfDeviceId tf_device_id, + const std::string& physical_device_desc, Allocator* gpu_allocator, + Allocator* cpu_allocator) : BaseGPUDevice(options, name, memory_limit, locality, tf_device_id, physical_device_desc, gpu_allocator, cpu_allocator, false /* sync every op */), @@ -64,10 +65,10 @@ class GPUDevice : public BaseGPUDevice { class GPUDeviceFactory : public BaseGPUDeviceFactory { private: std::unique_ptr CreateGPUDevice( - const SessionOptions& options, const string& name, Bytes memory_limit, - const DeviceLocality& locality, tsl::TfDeviceId tf_device_id, - const string& physical_device_desc, Allocator* gpu_allocator, - Allocator* cpu_allocator) override { + const SessionOptions& options, const std::string& name, + Bytes memory_limit, const DeviceLocality& locality, + tsl::TfDeviceId tf_device_id, const std::string& physical_device_desc, + Allocator* gpu_allocator, Allocator* cpu_allocator) override { return absl::make_unique(options, name, memory_limit, locality, tf_device_id, physical_device_desc, gpu_allocator, cpu_allocator); @@ -82,7 +83,7 @@ REGISTER_LOCAL_DEVICE_FACTORY("GPU", GPUDeviceFactory, 210); // ----------------------------------------------------------------------------- class GPUCompatibleCPUDevice : public ThreadPoolDevice { public: - GPUCompatibleCPUDevice(const SessionOptions& options, const string& name, + GPUCompatibleCPUDevice(const SessionOptions& options, const std::string& name, Bytes memory_limit, const DeviceLocality& locality, Allocator* allocator) : ThreadPoolDevice(options, name, memory_limit, locality, allocator), @@ -114,14 +115,15 @@ class GPUCompatibleCPUDevice : public ThreadPoolDevice { // The associated factory. class GPUCompatibleCPUDeviceFactory : public DeviceFactory { public: - Status ListPhysicalDevices(std::vector* devices) override { + absl::Status ListPhysicalDevices(std::vector* devices) override { devices->push_back("/physical_device:CPU:0"); - return OkStatus(); + return absl::OkStatus(); } - Status CreateDevices(const SessionOptions& options, const string& name_prefix, - std::vector>* devices) override { + absl::Status CreateDevices( + const SessionOptions& options, const std::string& name_prefix, + std::vector>* devices) override { int n = 1; auto iter = options.config.device_count().find("CPU"); if (iter != options.config.device_count().end()) { @@ -131,7 +133,7 @@ class GPUCompatibleCPUDeviceFactory : public DeviceFactory { ? port::NUMANumNodes() : 1; for (int i = 0; i < n; i++) { - string name = strings::StrCat(name_prefix, "/device:CPU:", i); + std::string name = absl::StrCat(name_prefix, "/device:CPU:", i); int numa_node = i % num_numa_nodes; DeviceLocality locality; locality.set_numa_node(numa_node); @@ -140,7 +142,7 @@ class GPUCompatibleCPUDeviceFactory : public DeviceFactory { ProcessState::singleton()->GetCPUAllocator(numa_node))); } - return OkStatus(); + return absl::OkStatus(); } }; REGISTER_LOCAL_DEVICE_FACTORY("CPU", GPUCompatibleCPUDeviceFactory, 70); diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc index ad854582ff8116..f3c7681fa26b30 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc @@ -80,7 +80,7 @@ bool IsRocm() { .IsRocm(); } -void ExpectErrorMessageSubstr(const Status& s, StringPiece substr) { +void ExpectErrorMessageSubstr(const absl::Status& s, absl::string_view substr) { EXPECT_TRUE(absl::StrContains(s.ToString(), substr)) << s << ", expected substring " << substr; } @@ -96,12 +96,12 @@ class GPUDeviceTest : public ::testing::Test { protected: static SessionOptions MakeSessionOptions( - const string& visible_device_list = "", + const std::string& visible_device_list = "", double per_process_gpu_memory_fraction = 0, int gpu_device_count = 1, const std::vector>& memory_limit_mb = {}, - const std::vector>& priority = {}, - const std::vector>& device_ordinal = {}, - const int32 num_virtual_devices = 0, + const std::vector>& priority = {}, + const std::vector>& device_ordinal = {}, + const int32_t num_virtual_devices = 0, const bool use_cuda_malloc_async = false) { SessionOptions options; ConfigProto* config = &options.config; @@ -182,7 +182,7 @@ TEST_F(GPUDeviceTest, CudaMallocAsync) { SessionOptions opts = MakeSessionOptions("0", 0, 1, {}, {}, {}, 0, /*use_cuda_malloc_async=*/true); std::vector> devices; - Status status; + absl::Status status; int number_instantiated = se::GpuCudaMallocAsyncAllocator::GetInstantiatedCountTestOnly(); { // The new scope is to trigger the destruction of the object. @@ -213,7 +213,7 @@ TEST_F(GPUDeviceTest, CudaMallocAsyncPreallocate) { /*use_cuda_malloc_async=*/true); setenv("TF_CUDA_MALLOC_ASYNC_SUPPORTED_PREALLOC", "2048", 1); std::vector> devices; - Status status; + absl::Status status; int number_instantiated = se::GpuCudaMallocAsyncAllocator::GetInstantiatedCountTestOnly(); @@ -244,7 +244,7 @@ TEST_F(GPUDeviceTest, CudaMallocAsyncPreallocate) { TEST_F(GPUDeviceTest, FailedToParseVisibleDeviceList) { SessionOptions opts = MakeSessionOptions("0,abc"); std::vector> devices; - Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( + absl::Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( opts, kDeviceNamePrefix, &devices); EXPECT_EQ(status.code(), error::INVALID_ARGUMENT); ExpectErrorMessageSubstr(status, "Could not parse entry"); @@ -253,7 +253,7 @@ TEST_F(GPUDeviceTest, FailedToParseVisibleDeviceList) { TEST_F(GPUDeviceTest, InvalidGpuId) { SessionOptions opts = MakeSessionOptions("100"); std::vector> devices; - Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( + absl::Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( opts, kDeviceNamePrefix, &devices); EXPECT_EQ(status.code(), error::INVALID_ARGUMENT); ExpectErrorMessageSubstr(status, @@ -263,7 +263,7 @@ TEST_F(GPUDeviceTest, InvalidGpuId) { TEST_F(GPUDeviceTest, DuplicateEntryInVisibleDeviceList) { SessionOptions opts = MakeSessionOptions("0,0"); std::vector> devices; - Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( + absl::Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( opts, kDeviceNamePrefix, &devices); EXPECT_EQ(status.code(), error::INVALID_ARGUMENT); ExpectErrorMessageSubstr(status, @@ -273,7 +273,7 @@ TEST_F(GPUDeviceTest, DuplicateEntryInVisibleDeviceList) { TEST_F(GPUDeviceTest, VirtualDeviceConfigConflictsWithMemoryFractionSettings) { SessionOptions opts = MakeSessionOptions("0", 0.1, 1, {{}}); std::vector> devices; - Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( + absl::Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( opts, kDeviceNamePrefix, &devices); EXPECT_EQ(status.code(), error::INVALID_ARGUMENT); ExpectErrorMessageSubstr( @@ -285,7 +285,7 @@ TEST_F(GPUDeviceTest, GpuDeviceCountTooSmall) { // (empty) VirtualDevices messages. SessionOptions opts = MakeSessionOptions("0", 0, 0, {{}}); std::vector> devices; - Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( + absl::Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( opts, kDeviceNamePrefix, &devices); EXPECT_EQ(status.code(), error::UNKNOWN); ExpectErrorMessageSubstr(status, @@ -297,7 +297,7 @@ TEST_F(GPUDeviceTest, NotEnoughGpuInVisibleDeviceList) { // messages. SessionOptions opts = MakeSessionOptions("0", 0, 8, {{}, {}}); std::vector> devices; - Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( + absl::Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( opts, kDeviceNamePrefix, &devices); EXPECT_EQ(status.code(), error::UNKNOWN); ExpectErrorMessageSubstr(status, @@ -311,7 +311,7 @@ TEST_F(GPUDeviceTest, VirtualDeviceConfigConflictsWithVisibleDeviceList) { // messages. SessionOptions opts = MakeSessionOptions("0,1", 0, 8, {{}}); std::vector> devices; - Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( + absl::Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( opts, kDeviceNamePrefix, &devices); EXPECT_EQ(status.code(), error::INVALID_ARGUMENT); ExpectErrorMessageSubstr( @@ -380,7 +380,7 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) { MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-9999, 0}}); #endif std::vector> devices; - Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( + absl::Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( opts, kDeviceNamePrefix, &devices); EXPECT_EQ(status.code(), error::INVALID_ARGUMENT); #if TENSORFLOW_USE_ROCM @@ -403,7 +403,7 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) { SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 1}}); #endif std::vector> devices; - Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( + absl::Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( opts, kDeviceNamePrefix, &devices); EXPECT_EQ(status.code(), error::INVALID_ARGUMENT); #if TENSORFLOW_USE_ROCM @@ -461,7 +461,7 @@ TEST_F(GPUDeviceTest, MultipleVirtualDevicesWithPriority) { // 0 is a valid priority value for both AMD and NVidia GPUs SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0}}); std::vector> devices; - Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( + absl::Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( opts, kDeviceNamePrefix, &devices); EXPECT_EQ(status.code(), error::INVALID_ARGUMENT); ExpectErrorMessageSubstr( @@ -550,7 +550,7 @@ TEST_F(GPUDeviceTest, UnifiedMemoryUnavailableOnPrePascalGpus) { ->mutable_experimental() ->set_use_unified_memory(true); std::vector> devices; - Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( + absl::Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( opts, kDeviceNamePrefix, &devices); EXPECT_EQ(status.code(), error::INTERNAL); ExpectErrorMessageSubstr(status, "does not support oversubscription."); @@ -615,7 +615,7 @@ TEST_F(GPUDeviceTest, CopyTensorInSameDevice) { CopyCPUToGPU(&cpu_tensor, &input_tensor, device, device_context); absl::Notification note; device->CopyTensorInSameDevice(&input_tensor, &output_tensor, device_context, - [¬e](const Status& s) { + [¬e](const absl::Status& s) { TF_ASSERT_OK(s); note.Notify(); }); @@ -633,11 +633,11 @@ TEST_F(GPUDeviceTest, CopyTensorInSameDevice) { TEST_F(GPUDeviceTest, DeviceDetails) { DeviceFactory* factory = DeviceFactory::GetFactory("GPU"); - std::vector devices; + std::vector devices; TF_ASSERT_OK(factory->ListPhysicalDevices(&devices)); EXPECT_GE(devices.size(), 1); for (int i = 0; i < devices.size(); i++) { - std::unordered_map details; + std::unordered_map details; TF_ASSERT_OK(factory->GetDeviceDetails(i, &details)); #if TENSORFLOW_USE_ROCM EXPECT_EQ(details.count("compute_capability"), 0); @@ -673,7 +673,7 @@ class GPUKernelTrackerTest : public ::testing::Test { nullptr)); } - void RecordQueued(uint64 v) { + void RecordQueued(uint64_t v) { mutex_lock l(kernel_tracker_->mu_); kernel_tracker_->RecordQueued(v, 1); } @@ -690,7 +690,7 @@ TEST_F(GPUKernelTrackerTest, CappingOnly) { std::deque queued_counts; for (int i = 0; i < 32; ++i) { - uint64 queued_count = timing_counter_->next(); + uint64_t queued_count = timing_counter_->next(); queued_counts.push_back(queued_count); RecordQueued(queued_count); } @@ -712,7 +712,7 @@ TEST_F(GPUKernelTrackerTest, CappingOnly) { // to introduce gaps between last_completed_ and first_available_. int64_t lower_bound = timing_counter_->get(); for (int i = 0; i < 1111; ++i) { - uint64 queued_count = timing_counter_->next(); + uint64_t queued_count = timing_counter_->next(); queued_counts.push_back(queued_count); RecordQueued(queued_count); int64_t upper_bound = timing_counter_->get(); diff --git a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc index 3179d8858ad154..15fd92a873bea0 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc @@ -122,11 +122,11 @@ static std::unique_ptr CreateSubAllocator( options.experimental().use_unified_memory()); if (use_unified_memory) { auto unified_memory_allocator = - executor->CreateMemoryAllocator(stream_executor::MemoryType::kUnified) + executor->CreateMemoryAllocator(stream_executor::MemorySpace::kUnified) .value(); return std::make_unique( std::move(unified_memory_allocator), - stream_executor::MemoryType::kUnified, platform_device_id.value(), + stream_executor::MemorySpace::kUnified, platform_device_id.value(), alloc_visitors); } else { return std::make_unique( @@ -140,7 +140,7 @@ Allocator* GPUProcessState::GetGPUAllocator( CHECK(process_state_); #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) - const string& allocator_type = options.allocator_type(); + const std::string& allocator_type = options.allocator_type(); mutex_lock lock(mu_); tsl::CheckValidTfDeviceId( DEVICE_GPU, se::GPUMachineManager()->VisibleDeviceCount(), tf_device_id); @@ -172,7 +172,7 @@ Allocator* GPUProcessState::GetGPUAllocator( auto gpu_bfc_allocator = std::make_unique( std::move(sub_allocator), total_bytes, - strings::StrCat("GPU_", tf_device_id.value(), "_bfc"), [&] { + absl::StrCat("GPU_", tf_device_id.value(), "_bfc"), [&] { GPUBFCAllocator::Options o; o.allow_growth = options.allow_growth(); o.allow_retry_on_failure = @@ -366,9 +366,9 @@ Allocator* GPUProcessState::GetGpuHostAllocator(const GPUOptions& options, gpu_host_free_visitors_.push_back({}); } auto host_memory_allocator = - se->CreateMemoryAllocator(stream_executor::MemoryType::kHost).value(); + se->CreateMemoryAllocator(stream_executor::MemorySpace::kHost).value(); SubAllocator* sub_allocator = new se::StreamExecutorAllocator( - std::move(host_memory_allocator), stream_executor::MemoryType::kHost, + std::move(host_memory_allocator), stream_executor::MemorySpace::kHost, numa_node, gpu_host_alloc_visitors_[numa_node], gpu_host_free_visitors_[numa_node]); diff --git a/tensorflow/core/common_runtime/gpu/gpu_util.cc b/tensorflow/core/common_runtime/gpu/gpu_util.cc index 4d192d8af9fab4..6fb3a800d0ab60 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_util.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_util.cc @@ -548,34 +548,35 @@ absl::Status GPUUtil::SyncAll(Device* gpu_device) { return absl::OkStatus(); } -string GPUUtil::MemoryDebugString(const Device* device, Tensor* tensor) { - string ret; +std::string GPUUtil::MemoryDebugString(const Device* device, Tensor* tensor) { + std::string ret; CHECK(tensor); const int64_t num_bytes = std::min( FLAGS_brain_gpu_util_debug_string_maxlen, tensor->TotalBytes()); void* ptr = (num_bytes > 0) ? GetBase(tensor) : nullptr; - strings::Appendf(&ret, "%p:", ptr); + void* arg1 = ptr; + absl::StrAppendFormat(&ret, "%p:", arg1); if (num_bytes > 0) { auto* dev_info = device->tensorflow_accelerator_device_info(); if (!dev_info) { - strings::StrAppend( + absl::StrAppend( &ret, PrintMemory(reinterpret_cast(ptr), num_bytes)); } else { - string buf; + std::string buf; buf.resize(num_bytes); DeviceMemoryBase gpu_ptr(ptr, num_bytes); auto s = dev_info->stream->parent()->SynchronousMemcpyD2H( gpu_ptr, num_bytes, &*buf.begin()); - strings::StrAppend(&ret, PrintMemory(&*buf.begin(), num_bytes)); + absl::StrAppend(&ret, PrintMemory(&*buf.begin(), num_bytes)); } } return ret; } // TODO(pbar) Checksum is called from places without a valid device context. -uint64 GPUUtil::Checksum(Device* gpu_device, - const DeviceContext* device_context, - const Tensor& tensor) { +uint64_t GPUUtil::Checksum(Device* gpu_device, + const DeviceContext* device_context, + const Tensor& tensor) { Tensor copy(tensor.dtype(), tensor.shape()); absl::Status s; absl::Notification n; @@ -589,7 +590,7 @@ uint64 GPUUtil::Checksum(Device* gpu_device, return Checksum(copy); } -uint64 GPUUtil::Checksum(const Tensor& tensor) { +uint64_t GPUUtil::Checksum(const Tensor& tensor) { const float* fptr = reinterpret_cast(GetBase(&tensor)); size_t num_bytes = tensor.TotalBytes(); size_t num_floats = num_bytes / sizeof(float); diff --git a/tensorflow/core/common_runtime/gpu/gpu_util.h b/tensorflow/core/common_runtime/gpu/gpu_util.h index 0b650ad9804343..6675aa3802c081 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_util.h +++ b/tensorflow/core/common_runtime/gpu/gpu_util.h @@ -62,7 +62,7 @@ class GPUUtil { // For debugging purpose, given a "device" and a "tensor" allocated // on the device, return a string printing each byte in the tensor // (up to a limit). "device" can be either a CPU or a GPU device. - static string MemoryDebugString(const Device* device, Tensor* tensor); + static std::string MemoryDebugString(const Device* device, Tensor* tensor); // Map a Tensor as a DeviceMemory object wrapping the given typed // buffer. @@ -72,18 +72,19 @@ class GPUUtil { template static se::DeviceMemory AsDeviceMemory(const Tensor& t) { T* ptr = reinterpret_cast(const_cast(DMAHelper::base(&t))); - return se::DeviceMemory(se::DeviceMemoryBase(ptr, t.TotalBytes())); + return se::DeviceMemory( + stream_executor::DeviceAddressBase(ptr, t.TotalBytes())); } // Computes a checksum over the contents of "tensor", which is allocated // on "gpu_device". - static uint64 Checksum(Device* gpu_device, - const DeviceContext* device_context, - const Tensor& tensor); + static uint64_t Checksum(Device* gpu_device, + const DeviceContext* device_context, + const Tensor& tensor); // Computes a checksum over the contents of "tensor", which is allocated // in local CPU RAM. - static uint64 Checksum(const Tensor& tensor); + static uint64_t Checksum(const Tensor& tensor); static void CopyCPUTensorToGPU(const Tensor* cpu_tensor, const DeviceContext* device_context, diff --git a/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc b/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc index fbc733ce4b85d4..954658e1111a4c 100644 --- a/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc +++ b/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc @@ -30,9 +30,9 @@ TEST(PoolAllocatorTest, ZeroSizeBuffers) { se::PlatformManager::PlatformWithName(se::GpuPlatformName()).value(); se::StreamExecutor* se = platform->ExecutorForDevice(/*ordinal=*/0).value(); auto host_memory_allocator = - se->CreateMemoryAllocator(stream_executor::MemoryType::kHost).value(); + se->CreateMemoryAllocator(stream_executor::MemorySpace::kHost).value(); SubAllocator* sub_allocator = new se::StreamExecutorAllocator( - std::move(host_memory_allocator), stream_executor::MemoryType::kHost, 0); + std::move(host_memory_allocator), stream_executor::MemorySpace::kHost, 0); PoolAllocator pool(2 /*pool_size_limit*/, false /*auto_resize*/, sub_allocator, new NoopRounder, "pool"); @@ -49,9 +49,9 @@ TEST(PoolAllocatorTest, ZeroSizePool) { se::PlatformManager::PlatformWithName(se::GpuPlatformName()).value(); se::StreamExecutor* se = platform->ExecutorForDevice(/*ordinal=*/0).value(); auto host_memory_allocator = - se->CreateMemoryAllocator(stream_executor::MemoryType::kHost).value(); + se->CreateMemoryAllocator(stream_executor::MemorySpace::kHost).value(); SubAllocator* sub_allocator = new se::StreamExecutorAllocator( - std::move(host_memory_allocator), stream_executor::MemoryType::kHost, 0); + std::move(host_memory_allocator), stream_executor::MemorySpace::kHost, 0); PoolAllocator pool(0 /*pool_size_limit*/, false /*auto_resize*/, sub_allocator, new NoopRounder, "pool"); @@ -83,9 +83,9 @@ TEST(PoolAllocatorTest, Alignment) { se::PlatformManager::PlatformWithName(se::GpuPlatformName()).value(); se::StreamExecutor* se = platform->ExecutorForDevice(/*ordinal=*/0).value(); auto host_memory_allocator = - se->CreateMemoryAllocator(stream_executor::MemoryType::kHost).value(); + se->CreateMemoryAllocator(stream_executor::MemorySpace::kHost).value(); SubAllocator* sub_allocator = new se::StreamExecutorAllocator( - std::move(host_memory_allocator), stream_executor::MemoryType::kHost, 0); + std::move(host_memory_allocator), stream_executor::MemorySpace::kHost, 0); PoolAllocator pool(0 /*pool_size_limit*/, false /*auto_resize*/, sub_allocator, new NoopRounder, "pool"); for (int i = 0; i < 16; ++i) { @@ -145,9 +145,9 @@ TEST(PoolAllocatorTest, CudaHostAllocator) { se::PlatformManager::PlatformWithName(se::GpuPlatformName()).value(); se::StreamExecutor* se = platform->ExecutorForDevice(/*ordinal=*/0).value(); auto host_memory_allocator = - se->CreateMemoryAllocator(stream_executor::MemoryType::kHost).value(); + se->CreateMemoryAllocator(stream_executor::MemorySpace::kHost).value(); SubAllocator* sub_allocator = new se::StreamExecutorAllocator( - std::move(host_memory_allocator), stream_executor::MemoryType::kHost, 0, + std::move(host_memory_allocator), stream_executor::MemorySpace::kHost, 0, {alloc_visitor}, {free_visitor}); PoolAllocator pool(2 /*pool_size_limit*/, false /*auto_resize*/, sub_allocator, new NoopRounder, "pool"); @@ -250,9 +250,9 @@ TEST(PoolAllocatorTest, Name) { se::PlatformManager::PlatformWithName(se::GpuPlatformName()).value(); se::StreamExecutor* se = platform->ExecutorForDevice(/*ordinal=*/0).value(); auto host_memory_allocator = - se->CreateMemoryAllocator(stream_executor::MemoryType::kHost).value(); + se->CreateMemoryAllocator(stream_executor::MemorySpace::kHost).value(); SubAllocator* sub_allocator = new se::StreamExecutorAllocator( - std::move(host_memory_allocator), stream_executor::MemoryType::kHost, 0); + std::move(host_memory_allocator), stream_executor::MemorySpace::kHost, 0); PoolAllocator pool(2 /*pool_size_limit*/, false /*auto_resize*/, sub_allocator, new NoopRounder, "pool"); EXPECT_EQ("pool", pool.Name()); diff --git a/tensorflow/core/common_runtime/gradients.cc b/tensorflow/core/common_runtime/gradients.cc index 466977ecf772d6..ede2bed5eced15 100644 --- a/tensorflow/core/common_runtime/gradients.cc +++ b/tensorflow/core/common_runtime/gradients.cc @@ -40,18 +40,18 @@ namespace tensorflow { static const char* const kGradientOp = "SymbolicGradient"; static const char* const kNodeLabel = "Func"; -string NodeOut::name() const { +std::string NodeOut::name() const { if (index == 0) { return node->name(); } else { - return strings::StrCat(node->name(), ":", index); + return absl::StrCat(node->name(), ":", index); } } DataType NodeOut::dtype() const { return node->output_type(index); } struct NodeOutHash { - uint64 operator()(const NodeOut& x) const { + uint64_t operator()(const NodeOut& x) const { return Hash64(reinterpret_cast(&x.node), sizeof(Node*), x.index); } @@ -334,7 +334,7 @@ NodeOut SymbolicGradientBuilder::SumGradients(const NodeOut& src) { return {add, 0}; } -static bool IsPrimitiveOpWithNoGrad(const string& func) { +static bool IsPrimitiveOpWithNoGrad(const std::string& func) { gradient::Creator creator; absl::Status s = gradient::GetOpGradientCreator(func, &creator); return s.ok() && (creator == nullptr); diff --git a/tensorflow/core/common_runtime/gradients.h b/tensorflow/core/common_runtime/gradients.h index aaa9cad80ad691..6eb32e450e1dcf 100644 --- a/tensorflow/core/common_runtime/gradients.h +++ b/tensorflow/core/common_runtime/gradients.h @@ -28,7 +28,7 @@ struct NodeOut { int index; // Returns the string name that represents the output of this node. - string name() const; + std::string name() const; // Returns the data type of the output of this node. DataType dtype() const; }; diff --git a/tensorflow/core/common_runtime/graph_constructor.cc b/tensorflow/core/common_runtime/graph_constructor.cc index cbbbee60ee7c6f..5fb43daa1c0b8d 100644 --- a/tensorflow/core/common_runtime/graph_constructor.cc +++ b/tensorflow/core/common_runtime/graph_constructor.cc @@ -137,14 +137,14 @@ class GraphConstructor { bool expect_device_spec; bool propagate_device_spec; - string prefix; + std::string prefix; bool uniquify_names; bool uniquify_prefix; std::map input_map; bool skip_mapped_nodes; - std::vector control_dependencies; + std::vector control_dependencies; std::vector return_tensors; - std::vector return_nodes; + std::vector return_nodes; // TODO(ashankar): This bool exists to separate out functionality required // to make ImportGraphDef a close equivalent of Python's import_graph_def @@ -166,7 +166,7 @@ class GraphConstructor { // value to the Node when they are missing from the NodeDef. bool add_default_attributes = true; - string default_device; + std::string default_device; }; typedef absl::Span NodeDefSlice; @@ -288,7 +288,7 @@ class GraphConstructor { // Returns a unique version of `original_name`, or `original_name` if it's // already unique in the graph. - string FindUniqueName(absl::string_view original_name); + std::string FindUniqueName(absl::string_view original_name); // Decrement pending count for users of `processed` and add the ones that now // have all of their pending inputs satisfied to `ready_`. @@ -321,7 +321,7 @@ class GraphConstructor { const VersionDef original_versions_; // A copy of opts_.prefix, possibly uniquified. - string prefix_; + std::string prefix_; StackTracesMap traces_; @@ -364,7 +364,7 @@ class GraphConstructor { // Imported node names that have been uniquified. The key is the original // name, the value is the new unique name. - gtl::FlatMap uniquified_names_; + gtl::FlatMap uniquified_names_; // Index of NodeDefs in node_defs_ with all inputs already converted. We use a // (sorted) set so nodes are created in the order defined in the GraphDef. @@ -381,10 +381,10 @@ class GraphConstructor { // Used in the conversion from node_defs_ to g_ to represent the ith input // of a node. struct InputInfo { - explicit InputInfo(const string& node_name, Node* n, int i) + explicit InputInfo(const std::string& node_name, Node* n, int i) : name(node_name), node(n), index(i) {} // Use string instead of StringPiece so we don't have to manage lifetime - string name; + std::string name; Node* node; int index; @@ -402,10 +402,10 @@ class GraphConstructor { // Used in the conversion from node_defs_ to g_ to represent an edge from // the node named 'name' to node 'n'. struct EdgeInfo { - explicit EdgeInfo(const string& name, int i1, Node* n, int i2) + explicit EdgeInfo(const std::string& name, int i1, Node* n, int i2) : src_name(name), src_index(i1), dst_node(n), dst_index(i2) {} // Use string instead of StringPiece so we don't have to manage lifetime - string src_name; + std::string src_name; int src_index; Node* dst_node; int dst_index; @@ -594,7 +594,7 @@ bool NodeNameInValues(const std::map& input_map, return false; } -bool NodeNameInValues(const std::vector& control_dependencies, +bool NodeNameInValues(const std::vector& control_dependencies, const absl::string_view& node_name) { return std::find(control_dependencies.begin(), control_dependencies.end(), node_name) != control_dependencies.end(); @@ -632,7 +632,7 @@ absl::Status GraphConstructor::EnsureNoNameCollisions() { } if (prefix_.empty() && opts_.importing && !opts_.uniquify_names) { for (size_t i = 0; i < node_def_count(); ++i) { - const string& name = get_node_def(i).name(); + const std::string& name = get_node_def(i).name(); if (NameExistsInGraph(name)) { return errors::InvalidArgument("Node name '", name, "' already exists in the Graph"); @@ -646,7 +646,7 @@ absl::Status GraphConstructor::EnsureNoNameCollisions() { "' would lead to invalid node names"); } if (NameExistsInGraph(prefix_no_slash) && opts_.uniquify_prefix) { - prefix_ = strings::StrCat(FindUniqueName(prefix_no_slash), "/"); + prefix_ = absl::StrCat(FindUniqueName(prefix_no_slash), "/"); } } return absl::OkStatus(); @@ -668,7 +668,7 @@ absl::Status GraphConstructor::ValidateInputMapAndControlDependencies() { "control edge and non-control edge"); } } - for (const string& node : opts_.control_dependencies) { + for (const std::string& node : opts_.control_dependencies) { if (existing_nodes_.count(node) == 0) { return errors::InvalidArgument( "node '", node, @@ -727,7 +727,7 @@ absl::Status GraphConstructor::InitFromEdges() { const int num_nodes = node_def_count(); pending_count_.reserve(num_nodes); outputs_.resize(num_nodes); - gtl::FlatSet next_iteration_nodes; + gtl::FlatSet next_iteration_nodes; for (int n = 0; n < node_def_count(); ++n) { const NodeDef& node_def = get_node_def(n); if (IsNextIteration(node_def)) { @@ -752,7 +752,7 @@ absl::Status GraphConstructor::InitFromEdges() { num_control_edges++; } else { TensorId id(ParseTensorName(input_name)); - if (next_iteration_nodes.find(string(id.first)) != + if (next_iteration_nodes.find(std::string(id.first)) != next_iteration_nodes.end()) { has_loop_back_edge = true; } @@ -796,7 +796,7 @@ absl::Status GraphConstructor::ValidateColocationConstraints( return absl::OkStatus(); const auto iter = node_def.attr().find(kColocationAttrName); if (iter == node_def.attr().end()) return absl::OkStatus(); - for (const string& c : iter->second.list().s()) { + for (const std::string& c : iter->second.list().s()) { absl::string_view s(c); if (absl::ConsumePrefix(&s, kColocationGroupPrefix) && gdef_nodes_.find(s) == gdef_nodes_.end()) { @@ -957,11 +957,11 @@ void GraphConstructor::AddControlDependencies( // node_def either has no inputs or all remapped inputs, add the control // dependencies - for (const string& control_dep : opts_.control_dependencies) { - string input = TensorId(control_dep, Graph::kControlSlot).ToString(); + for (const std::string& control_dep : opts_.control_dependencies) { + std::string input = TensorId(control_dep, Graph::kControlSlot).ToString(); bool found = false; for (int i = node_def->input_size() - 1; i >= 0; --i) { - const string& node_input = node_def->input(i); + const std::string& node_input = node_def->input(i); if (node_input[0] != '^') { // Control inputs are at the end. Break when we reach the non-control // inputs. @@ -984,7 +984,7 @@ void GraphConstructor::AddControlDependencies( void GraphConstructor::AddPrefixToNodeDef( const std::vector& input_already_exists, NodeDef* node_def) { if (prefix_.empty()) return; - node_def->set_name(strings::StrCat(prefix_, node_def->name())); + node_def->set_name(absl::StrCat(prefix_, node_def->name())); // Update names of input nodes for (int i = 0; i < node_def->input_size(); ++i) { // Skip remapped inputs (which already exist in g_ and are not being @@ -992,9 +992,9 @@ void GraphConstructor::AddPrefixToNodeDef( if (input_already_exists[i]) continue; absl::string_view input(node_def->input(i)); if (absl::ConsumePrefix(&input, "^")) { - node_def->set_input(i, strings::StrCat("^", prefix_, input)); + node_def->set_input(i, absl::StrCat("^", prefix_, input)); } else { - node_def->set_input(i, strings::StrCat(prefix_, input)); + node_def->set_input(i, absl::StrCat(prefix_, input)); } } // Update names of colocation groups @@ -1004,7 +1004,7 @@ void GraphConstructor::AddPrefixToNodeDef( for (int i = 0; i < list->s_size(); ++i) { absl::string_view v(list->s(i)); if (absl::ConsumePrefix(&v, kColocationGroupPrefix)) { - list->set_s(i, strings::StrCat(kColocationGroupPrefix, prefix_, v)); + list->set_s(i, absl::StrCat(kColocationGroupPrefix, prefix_, v)); } } } @@ -1013,7 +1013,7 @@ void GraphConstructor::AddPrefixToNodeDef( void GraphConstructor::UniquifyNames( const std::vector& input_already_exists, NodeDef* node_def) { if (NameExistsInGraph(node_def->name())) { - string old_name = node_def->name(); + std::string old_name = node_def->name(); node_def->set_name(FindUniqueName(node_def->name())); uniquified_names_[old_name] = node_def->name(); // Note that we don't have to update gdef_nodes_ or gdef_prefixes_ with @@ -1028,7 +1028,7 @@ void GraphConstructor::UniquifyNames( // We require that UniquifyNames() is called on all NodeDefs in topological // order. This guarantees that node_def's inputs will already be uniquified // if necessary. - auto iter = uniquified_names_.find(string(id.first)); + auto iter = uniquified_names_.find(std::string(id.first)); if (iter == uniquified_names_.end()) continue; id.first = iter->second; node_def->set_input(i, id.ToString()); @@ -1039,18 +1039,18 @@ void GraphConstructor::UpdateUniquifiedColocationNames() { for (const auto& pair : gdef_nodes_) { Node* node = pair.second.node; if (node == nullptr) continue; - std::vector coloc_values; + std::vector coloc_values; if (!TryGetNodeAttr(node->attrs(), kColocationAttrName, &coloc_values)) continue; bool updated = false; for (size_t i = 0; i < coloc_values.size(); ++i) { absl::string_view val(coloc_values[i]); if (absl::ConsumePrefix(&val, kColocationGroupPrefix)) { - auto name_pair = uniquified_names_.find(string(val)); + auto name_pair = uniquified_names_.find(std::string(val)); if (name_pair == uniquified_names_.end()) continue; updated = true; coloc_values[i] = - strings::StrCat(kColocationGroupPrefix, name_pair->second); + absl::StrCat(kColocationGroupPrefix, name_pair->second); } } if (updated) { @@ -1071,13 +1071,13 @@ bool GraphConstructor::NameExistsInGraphDef(absl::string_view name) { return false; } -string GraphConstructor::FindUniqueName(absl::string_view original_name) { - string name(original_name); +std::string GraphConstructor::FindUniqueName(absl::string_view original_name) { + std::string name(original_name); int count = 0; // Check that any generated names don't collide with imported NodeDefs (as // well as nodes in g_). while (NameExistsInGraph(name) || (count > 0 && NameExistsInGraphDef(name))) { - name = strings::StrCat(original_name, "_", ++count); + name = absl::StrCat(original_name, "_", ++count); } return name; } @@ -1280,7 +1280,7 @@ absl::Status GraphConstructor::Convert() { return errors::InvalidArgument(out.str()); } - inputs.emplace_back(string(tensor_id.node()), src_node, src_index); + inputs.emplace_back(std::string(tensor_id.node()), src_node, src_index); } if (has_data_back_edge && !IsMerge(node_def)) { diff --git a/tensorflow/core/common_runtime/graph_constructor.h b/tensorflow/core/common_runtime/graph_constructor.h index d0764903eb0931..e527801ea9f426 100644 --- a/tensorflow/core/common_runtime/graph_constructor.h +++ b/tensorflow/core/common_runtime/graph_constructor.h @@ -89,7 +89,7 @@ struct ImportGraphDefOptions { // prefix="animals" and GraphDef contains a node "bunny" then the node will be // named "animals/bunny" in *g. Must not be already used as a node name or // prefix in the graph. - string prefix; + std::string prefix; // If true, imported node names will be modified if their name already exists // in the graph. If false, conflicting names will be treated as an error. Note @@ -125,7 +125,7 @@ struct ImportGraphDefOptions { // Note that to avoid creating many redundant control edges, ImportGraphDef() // won't add control edges to nodes that will inherit the dependencies from // other nodes in `gdef`. - std::vector control_dependencies; + std::vector control_dependencies; // Tensors in `gdef` that will be returned via the ImportGraphDefResults // output parameter of `ImportGraphDef()`. If this list is non-empty, the @@ -151,7 +151,7 @@ struct ImportGraphDefOptions { // Unlike `return_tensors`, `input_map` has no effect on the nodes // returned. `return_nodes` must be empty if `skip_mapped_nodes` is true. // TODO(skyewm): make this work with `skip_mapped_nodes` if there's a need. - std::vector return_nodes; + std::vector return_nodes; // If true, checks that all colocation constraints are nodes in the GraphDef. bool validate_colocation_constraints = true; @@ -165,7 +165,7 @@ struct ImportGraphDefOptions { // python API. // Try to set default execution device for this grapth. - string default_device; + std::string default_device; // If true, propagates a node's assigned device. By default the runtime // will recompute the assigned device every time. diff --git a/tensorflow/core/common_runtime/graph_constructor_fuzz.cc b/tensorflow/core/common_runtime/graph_constructor_fuzz.cc index fa92230d4dcdc2..df0c63473b849d 100644 --- a/tensorflow/core/common_runtime/graph_constructor_fuzz.cc +++ b/tensorflow/core/common_runtime/graph_constructor_fuzz.cc @@ -63,10 +63,10 @@ void FuzzGraphEndToEndSimpleFixedInput(const GraphDef& graph_def) { p1.scalar()() = 1.0; Tensor p2(DT_FLOAT, TensorShape({1})); p2.scalar()() = 2.0; - std::vector> inputs = {{"Placeholder", p1}, - {"Placeholder_1", p2}}; - std::vector output_names = {"O_FUZZ"}; - std::vector target_names; + std::vector> inputs = {{"Placeholder", p1}, + {"Placeholder_1", p2}}; + std::vector output_names = {"O_FUZZ"}; + std::vector target_names; std::vector outputs; status = sess->Run(inputs, output_names, target_names, &outputs); } @@ -93,22 +93,22 @@ void FuzzGraphEndToEndAllStatic(const GraphDef& graph_def) { return; } - std::vector> inputs = {}; - std::vector output_names = {}; - std::vector target_names = {}; + std::vector> inputs = {}; + std::vector output_names = {}; + std::vector target_names = {}; std::vector outputs = {}; status = sess->Run(inputs, output_names, target_names, &outputs); } FUZZ_TEST(GraphDefFuzz, FuzzGraphEndToEndAllStatic); -Node* FindNode(const string& name, Graph* graph) { +Node* FindNode(const std::string& name, Graph* graph) { for (Node* n : graph->nodes()) { if (n->name() == name) return n; } return nullptr; } -bool HasNode(const string& name, Graph* graph) { +bool HasNode(const std::string& name, Graph* graph) { return FindNode(name, graph) != nullptr; } @@ -399,10 +399,10 @@ void FuzzGraphEndToEndFDP(std::vector data) { input_tensors.push_back(input_tensor); } - std::vector> inputs = {{"N0", input_tensors[0]}, - {"N1", input_tensors[1]}}; - std::vector output_names = {last_node}; - std::vector target_names; + std::vector> inputs = { + {"N0", input_tensors[0]}, {"N1", input_tensors[1]}}; + std::vector output_names = {last_node}; + std::vector target_names; std::vector outputs; s = sess->Run(inputs, output_names, target_names, &outputs); if (!s.ok()) { diff --git a/tensorflow/core/common_runtime/graph_constructor_test.cc b/tensorflow/core/common_runtime/graph_constructor_test.cc index 9494bf48f9a74f..036ee63a354f89 100644 --- a/tensorflow/core/common_runtime/graph_constructor_test.cc +++ b/tensorflow/core/common_runtime/graph_constructor_test.cc @@ -53,22 +53,22 @@ class GraphConstructorTest : public ::testing::Test { protected: GraphConstructorTest() : graph_(OpRegistry::Global()) {} - void Convert(const string& gdef_ascii) { + void Convert(const std::string& gdef_ascii) { CHECK(protobuf::TextFormat::ParseFromString(gdef_ascii, &gdef_)); } - void ExpectError(const string& gdef_ascii, - const std::vector& expected_error_strs, - string not_expected_error_str = "") { + void ExpectError(const std::string& gdef_ascii, + const std::vector& expected_error_strs, + std::string not_expected_error_str = "") { // Used to verify that errors don't change graph - const string original_graph_description = GraphDebugString(); + const std::string original_graph_description = GraphDebugString(); Convert(gdef_ascii); GraphConstructorOptions opts; absl::Status status = ConvertGraphDefToGraph(opts, gdef_, &graph_); EXPECT_FALSE(status.ok()); - for (const string& error : expected_error_strs) { + for (const std::string& error : expected_error_strs) { EXPECT_TRUE(absl::StrContains(status.message(), error)) << "Expected to find '" << error << "' in " << status; } @@ -82,19 +82,20 @@ class GraphConstructorTest : public ::testing::Test { EXPECT_EQ(original_graph_description, GraphDebugString()); } - void ExpectError(const string& gdef_ascii, const ImportGraphDefOptions& opts, - const std::vector& expected_error_strs, + void ExpectError(const std::string& gdef_ascii, + const ImportGraphDefOptions& opts, + const std::vector& expected_error_strs, ShapeRefiner* refiner = nullptr, ImportGraphDefResults* results = nullptr) { // Used to verify that errors don't change graph - const string original_graph_description = GraphDebugString(); + const std::string original_graph_description = GraphDebugString(); Convert(gdef_ascii); absl::Status status = ImportGraphDef(opts, gdef_, &graph_, refiner, results); EXPECT_FALSE(status.ok()); - for (const string& error : expected_error_strs) { + for (const std::string& error : expected_error_strs) { EXPECT_TRUE(absl::StrContains(status.message(), error)) << "Expected to find '" << error << "' in " << status; } @@ -102,13 +103,14 @@ class GraphConstructorTest : public ::testing::Test { EXPECT_EQ(original_graph_description, GraphDebugString()); } - void ExpectOK(const string& gdef_ascii) { + void ExpectOK(const std::string& gdef_ascii) { Convert(gdef_ascii); GraphConstructorOptions opts; TF_CHECK_OK(ConvertGraphDefToGraph(opts, gdef_, &graph_)); } - void ExpectOK(const string& gdef_ascii, const ImportGraphDefOptions& opts, + void ExpectOK(const std::string& gdef_ascii, + const ImportGraphDefOptions& opts, ShapeRefiner* refiner = nullptr, ImportGraphDefResults* results = nullptr) { Convert(gdef_ascii); @@ -125,16 +127,17 @@ class GraphConstructorTest : public ::testing::Test { << graph_.versions().producer(); } - Node* FindNode(const string& name) { + Node* FindNode(const std::string& name) { for (Node* n : graph_.nodes()) { if (n->name() == name) return n; } return nullptr; } - bool HasNode(const string& name) { return FindNode(name) != nullptr; } + bool HasNode(const std::string& name) { return FindNode(name) != nullptr; } - bool HasEdge(const string& src, int src_out, const string& dst, int dst_in) { + bool HasEdge(const std::string& src, int src_out, const std::string& dst, + int dst_in) { for (const Edge* e : graph_.edges()) { if (e->src()->name() == src && e->src_output() == src_out && e->dst()->name() == dst && e->dst_input() == dst_in) { @@ -144,11 +147,11 @@ class GraphConstructorTest : public ::testing::Test { return false; } - bool HasControlEdge(const string& src, const string& dst) { + bool HasControlEdge(const std::string& src, const std::string& dst) { return HasEdge(src, Graph::kControlSlot, dst, Graph::kControlSlot); } - string ColocationGroup(const string& node) { + std::string ColocationGroup(const std::string& node) { Node* n = nullptr; for (Node* ni : graph_.nodes()) { if (ni->name() == node) { @@ -159,7 +162,7 @@ class GraphConstructorTest : public ::testing::Test { if (n == nullptr) { return ""; } - std::vector value; + std::vector value; absl::Status s = GetNodeAttr(n->attrs(), kColocationAttrName, &value); if (!s.ok()) { return ""; @@ -171,10 +174,11 @@ class GraphConstructorTest : public ::testing::Test { return ""; } absl::string_view loc(value[0]); - return absl::ConsumePrefix(&loc, kColocationGroupPrefix) ? string(loc) : ""; + return absl::ConsumePrefix(&loc, kColocationGroupPrefix) ? std::string(loc) + : ""; } - string GraphDebugString() const { + std::string GraphDebugString() const { return graph_.ToGraphDefDebug().DebugString(); } @@ -232,7 +236,7 @@ REGISTER_OP("RequiresCurrentGraphVersion") TEST_F(GraphConstructorTest, InvalidNodeName) { auto expect_invalid_name = [this](const char* name) { - ExpectError(strings::StrCat("node { name: '", name, "' op: 'ABC' }"), + ExpectError(absl::StrCat("node { name: '", name, "' op: 'ABC' }"), {"Node name contains invalid characters"}); }; @@ -504,7 +508,7 @@ TEST_F(GraphConstructorTest, ImportGraphThatUsesConstantValueFromInsideLoop) { f.write(str(tf.get_default_graph().as_graph_def())) */ - const string pb_ascii = R"EOF( + const std::string pb_ascii = R"EOF( node { name: "Const" op: "Const" @@ -862,7 +866,7 @@ TEST_F(GraphConstructorTest, NoForwardCompatError) { } TEST_F(GraphConstructorTest, LowVersion) { - ExpectError(strings::StrCat("versions { producer: ", -1, " }"), + ExpectError(absl::StrCat("versions { producer: ", -1, " }"), {strings::StrCat("GraphDef producer version -1 below min " "producer ", TF_GRAPH_DEF_VERSION_MIN_PRODUCER, @@ -872,7 +876,7 @@ TEST_F(GraphConstructorTest, LowVersion) { TEST_F(GraphConstructorTest, HighVersion) { const int version = TF_GRAPH_DEF_VERSION + 1; - ExpectError(strings::StrCat("versions { min_consumer: ", version, " }"), + ExpectError(absl::StrCat("versions { min_consumer: ", version, " }"), {strings::StrCat("GraphDef min consumer version ", version, " above current version ", TF_GRAPH_DEF_VERSION, " for TensorFlow ", TF_VERSION_STRING, @@ -885,7 +889,7 @@ TEST_F(GraphConstructorTest, BadVersion) { ExpectError( strings::StrCat("versions { producer: ", version, " bad_consumers: ", bad, " }"), - {strings::StrCat( + {absl::StrCat( "GraphDef disallows consumer version ", bad, ". Please upgrade TensorFlow: this version is likely buggy.")}); } @@ -932,8 +936,8 @@ TEST_F(GraphConstructorTest, Error_ControlEdgeBeforeRealInput) { TEST_F(GraphConstructorTest, ImportGraphDef) { GraphDef def; ImportGraphDefOptions opts; - const string& source = graph_.FindNodeId(Graph::kSourceId)->name(); - const string& sink = graph_.FindNodeId(Graph::kSinkId)->name(); + const std::string& source = graph_.FindNodeId(Graph::kSourceId)->name(); + const std::string& sink = graph_.FindNodeId(Graph::kSinkId)->name(); // Importing an empty graph is fine. absl::Status s = ImportGraphDef(opts, def, &graph_, nullptr); @@ -2447,8 +2451,8 @@ TEST_F(GraphConstructorTest, ImportGraphDef_ErrorsDoNoChangeTheGraph) { TF_EXPECT_OK( NodeDefBuilder("scope/A", "TestParams").Finalize(def.add_node())); ImportGraphDefOptions opts; - const string& source = graph_.FindNodeId(Graph::kSourceId)->name(); - const string& sink = graph_.FindNodeId(Graph::kSinkId)->name(); + const std::string& source = graph_.FindNodeId(Graph::kSourceId)->name(); + const std::string& sink = graph_.FindNodeId(Graph::kSinkId)->name(); absl::Status s = ImportGraphDef(opts, def, &graph_, nullptr); ASSERT_EQ(absl::OkStatus(), s) << s; @@ -2457,7 +2461,7 @@ TEST_F(GraphConstructorTest, ImportGraphDef_ErrorsDoNoChangeTheGraph) { EXPECT_TRUE(HasControlEdge(source, "scope/A")); EXPECT_TRUE(HasControlEdge("scope/A", sink)); EXPECT_EQ(3, graph_.num_edges()); - const string original_graph_description = GraphDebugString(); + const std::string original_graph_description = GraphDebugString(); #define EXPECT_IMPORT_FAILURE(graph_def, options, expected_err) \ do { \ @@ -2663,10 +2667,10 @@ TEST_F(GraphConstructorTest, ImportGraphDef_FunctionDefs) { p1.scalar()() = 1.0; Tensor p2(DT_FLOAT, TensorShape({1})); p2.scalar()() = 2.0; - std::vector> inputs = {{"Placeholder", p1}, - {"Placeholder_1", p2}}; - std::vector output_names = {"Foo_d03c39a3"}; - std::vector target_names; + std::vector> inputs = {{"Placeholder", p1}, + {"Placeholder_1", p2}}; + std::vector output_names = {"Foo_d03c39a3"}; + std::vector target_names; std::vector outputs; TF_ASSERT_OK(sess->Run(inputs, output_names, target_names, &outputs)); @@ -2756,10 +2760,10 @@ TEST_F(GraphConstructorTest, ImportGraphDef_NestedFunctionDefs) { p1.scalar()() = 1.0; Tensor p2(DT_FLOAT, TensorShape({1})); p2.scalar()() = 2.0; - std::vector> inputs = {{"Placeholder", p1}, - {"Placeholder_1", p2}}; - std::vector output_names = {"Outer_966fa13d"}; - std::vector target_names; + std::vector> inputs = {{"Placeholder", p1}, + {"Placeholder_1", p2}}; + std::vector output_names = {"Outer_966fa13d"}; + std::vector target_names; std::vector outputs; s = sess->Run(inputs, output_names, target_names, &outputs); ASSERT_TRUE(s.ok()) << s.message(); @@ -2835,16 +2839,16 @@ TEST_F(GraphConstructorTest, CopyGraph) { // Confirms that graph def version in the graph reaches the shape inference // function. TEST_F(GraphConstructorTest, GraphDefVersionUsedForShapeInference) { - string gdef_ascii = strings::StrCat(R"EOF( + std::string gdef_ascii = absl::StrCat(R"EOF( node{ name:"A" op:"RequiresCurrentGraphVersion" } versions { producer: )EOF", - TF_GRAPH_DEF_VERSION - 1, "}"); + TF_GRAPH_DEF_VERSION - 1, "}"); ImportGraphDefOptions opts; ExpectError(gdef_ascii, opts, {"Wrong graph version for shape"}); - gdef_ascii = strings::StrCat(R"EOF( + gdef_ascii = absl::StrCat(R"EOF( node{ name:"A" op:"RequiresCurrentGraphVersion" } versions { producer: )EOF", - TF_GRAPH_DEF_VERSION, "}"); + TF_GRAPH_DEF_VERSION, "}"); ExpectOK(gdef_ascii, opts); } @@ -2887,7 +2891,7 @@ TEST_F(GraphConstructorTest, ImportGraphDefProvidedShapeRefinerVersions) { ImportGraphDefOptions opts; // A valid graph at producer version 20, but one // that would not import if the graph_def_version were 21. - string gdef_ascii; + std::string gdef_ascii; #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ gdef_ascii = strings::StrCat(R"EOF( node { @@ -2973,7 +2977,7 @@ versions { })EOF"); #else - gdef_ascii = strings::StrCat(R"EOF( + gdef_ascii = R"EOF( node { name: "Sum/input" op: "Const" @@ -3054,7 +3058,7 @@ node { } versions { producer: 20 -})EOF"); +})EOF"; #endif // Create a shape refiner with the latest TF_GRAPH_DEF_VERSION. // Importing the graphdef with an existing refiner should @@ -3098,7 +3102,7 @@ versions { })EOF"); #else - gdef_ascii = strings::StrCat(R"EOF( + gdef_ascii = R"EOF( node { name: "RandomConst" op: "Const" @@ -3128,7 +3132,7 @@ node { } versions { producer: 21 -})EOF"); +})EOF"; #endif ExpectOK(gdef_ascii, opts, &refiner); @@ -3171,7 +3175,7 @@ versions { })EOF"); #else - gdef_ascii = strings::StrCat(R"EOF( + gdef_ascii = R"EOF( node { name: "RandomConst2" op: "Const" @@ -3201,7 +3205,7 @@ node { } versions { producer: 17 -})EOF"); +})EOF"; #endif ExpectOK(gdef_ascii, opts, &refiner); @@ -3242,7 +3246,7 @@ TEST_F(GraphConstructorTest, ImportGraphDef_ValidateDefaultDevice) { ImportGraphDefResults res; TF_ASSERT_OK(ImportGraphDef(options, gdef, &graph_, nullptr, &res)); - std::map node2dev; + std::map node2dev; for (Node* n : graph_.nodes()) { node2dev[n->name()] = n->requested_device(); } @@ -3253,7 +3257,8 @@ TEST_F(GraphConstructorTest, ImportGraphDef_ValidateDefaultDevice) { } TEST_F(GraphConstructorTest, ImportGraphDef_UnknownOps) { - const string pb_ascii = "node { name: 'op_from_contrib' op: 'OpFromContrib'}"; + const std::string pb_ascii = + "node { name: 'op_from_contrib' op: 'OpFromContrib'}"; // Try load twice to check for two parts of the error message. We cannot check // for the whole thing in one go because the message includes the hostname. ExpectError(pb_ascii, {"Op type not registered 'OpFromContrib'"}); diff --git a/tensorflow/core/common_runtime/graph_execution_state.cc b/tensorflow/core/common_runtime/graph_execution_state.cc index d7a9462e387d2d..a3c1d024babae0 100644 --- a/tensorflow/core/common_runtime/graph_execution_state.cc +++ b/tensorflow/core/common_runtime/graph_execution_state.cc @@ -64,7 +64,7 @@ limitations under the License. namespace tensorflow { namespace { -bool IsCollectiveV2(const string& op) { +bool IsCollectiveV2(const std::string& op) { return op == "CollectiveReduceV2" || op == "CollectiveGatherV2" || op == "CollectiveBcastRecvV2" || op == "CollectiveBcastSendV2" || op == "ColectiveReduceScatterV2" || op == "ColectiveAllToAllV2"; @@ -199,7 +199,7 @@ absl::Status GraphExecutionState::Extend( *gdef.mutable_library() = flib_def_->ToProto(); // 2. Build an index of the new node names. - std::unordered_set new_names; + std::unordered_set new_names; for (const NodeDef& node : extension_def.node()) { new_names.insert(node.name()); } @@ -315,7 +315,7 @@ namespace { class TensorConnectionPruneRewrite : public subgraph::PruneRewrite { public: - TensorConnectionPruneRewrite(const string* endpoint_name, + TensorConnectionPruneRewrite(const std::string* endpoint_name, NodeBuilder::NodeOut from_tensor) : subgraph::PruneRewrite(endpoint_name, nullptr /* device_info */), from_tensor_(std::move(from_tensor)) {} @@ -336,8 +336,8 @@ class TensorConnectionPruneRewrite : public subgraph::PruneRewrite { TF_RETURN_IF_ERROR(s); TF_RETURN_IF_ERROR( - NodeBuilder(strings::StrCat("_identity_", feed_tensor.node->name(), "_", - feed_tensor.index), + NodeBuilder(absl::StrCat("_identity_", feed_tensor.node->name(), "_", + feed_tensor.index), "Identity") .Input(from_tensor_) .Attr("T", @@ -355,7 +355,7 @@ class TensorConnectionPruneRewrite : public subgraph::PruneRewrite { template absl::Status LookupDevice( - const DeviceSet& device_set, const string& tensor_name, + const DeviceSet& device_set, const std::string& tensor_name, const Map& tensor2device, const tensorflow::DeviceAttributes** out_device_attrs) { *out_device_attrs = nullptr; @@ -394,7 +394,7 @@ struct TensorAndDevice { // Tensors of some DataTypes cannot placed in device memory as feeds or // fetches. Validate against a allowlist of those known to work. -bool IsFeedAndFetchSupported(DataType dtype, const string& device_type) { +bool IsFeedAndFetchSupported(DataType dtype, const std::string& device_type) { // The mechanism for supporting feeds of device-backed Tensors requires // the _Arg kernel to be registered for the corresponding type (and that // the input to the kernel be in device and not host memory). @@ -474,8 +474,8 @@ absl::Status ValidateFeedAndFetchDevices( absl::Status GetFeedShapeAndTypeFromAttribute(const NodeDef& node, PartialTensorShape* shape, DataType* type) { - static const gtl::FlatSet* const kHasExplicitShapeAttribute = - CHECK_NOTNULL((new gtl::FlatSet{ + static const gtl::FlatSet* const kHasExplicitShapeAttribute = + CHECK_NOTNULL((new gtl::FlatSet{ "Placeholder", "PlaceholderV2", "PlaceholderWithDefault", "ParallelConcat", "ImmutableConst", "_ParallelConcatStart", "InfeedDequeue", "OutfeedDequeue", "CollectiveBcastSend", @@ -520,7 +520,7 @@ absl::Status GraphExecutionState::PruneGraph( for (int i = 0; i < options.callable_options.feed_size(); ++i) { // WARNING: feed MUST be a reference, since ArgFeedRewrite and // tensors_and_devices holds on to its address. - const string& feed = options.callable_options.feed(i); + const std::string& feed = options.callable_options.feed(i); const DeviceAttributes* device_info; TF_RETURN_IF_ERROR(LookupDevice(*device_set_, feed, options.callable_options.feed_devices(), @@ -540,7 +540,7 @@ absl::Status GraphExecutionState::PruneGraph( for (int i = 0; i < options.callable_options.fetch_size(); ++i) { // WARNING: fetch MUST be a reference, since RetvalFetchRewrite and // tensors_and_devices holds on to its address. - const string& fetch = options.callable_options.fetch(i); + const std::string& fetch = options.callable_options.fetch(i); const DeviceAttributes* device_info; TF_RETURN_IF_ERROR(LookupDevice(*device_set_, fetch, options.callable_options.fetch_devices(), @@ -561,11 +561,11 @@ absl::Status GraphExecutionState::PruneGraph( } const DeviceAttributes* device_info = &device_set_->client_device()->attributes(); - for (const string& feed : options.callable_options.feed()) { + for (const std::string& feed : options.callable_options.feed()) { feed_rewrites.emplace_back( new subgraph::RecvFeedRewrite(&feed, device_info)); } - for (const string& fetch : options.callable_options.fetch()) { + for (const std::string& fetch : options.callable_options.fetch()) { fetch_rewrites.emplace_back( new subgraph::SendFetchRewrite(&fetch, device_info)); } @@ -598,7 +598,7 @@ absl::Status GraphExecutionState::PruneGraph( &tensor_connection.to_tensor(), {from_node, from_id.second})); } - std::vector target_node_names( + std::vector target_node_names( options.callable_options.target().begin(), options.callable_options.target().end()); TF_RETURN_IF_ERROR(subgraph::RewriteGraphForExecution( @@ -699,7 +699,7 @@ absl::Status GraphExecutionState::OptimizeGraph( options.callable_options.tensor_connection().empty())) { std::vector feeds; - for (const string& feed : options.callable_options.feed()) { + for (const std::string& feed : options.callable_options.feed()) { feeds.emplace_back(ParseTensorName(feed)); } for (const TensorConnection& tensor_connection : @@ -830,7 +830,7 @@ absl::Status GraphExecutionState::OptimizeGraph( *optimized_flib = std::make_unique(*flib_def); for (const FunctionDef& fdef : new_graph.library().function()) { - const string& func_name = fdef.signature().name(); + const std::string& func_name = fdef.signature().name(); if ((*optimized_flib)->Contains(func_name)) { VLOG(3) << "Replace function: name=" << func_name; @@ -864,7 +864,7 @@ absl::Status GraphExecutionState::OptimizeGraph( absl::Status GraphExecutionState::BuildGraph( const BuildGraphOptions& options, std::unique_ptr* out) { VLOG(1) << "BuildGraph"; - const uint64 start_time_usecs = Env::Default()->NowMicros(); + const uint64_t start_time_usecs = Env::Default()->NowMicros(); if (!graph_) { // It is only valid to call this method directly when the original graph // was created with the option `place_pruned_graph == false`. @@ -922,7 +922,7 @@ absl::Status GraphExecutionState::BuildGraph( // nodes in the Graph and FunctionLibraryDefinition for collective ops and // if found, initialize a collective_graph_key as a hash of the ordered set // of instance keys. - std::set instance_key_set; + std::set instance_key_set; bool has_collective_v2 = false; for (Node* node : optimized_graph->nodes()) { if (node->IsCollective()) { @@ -952,7 +952,7 @@ absl::Status GraphExecutionState::BuildGraph( } } if (!instance_key_set.empty()) { - uint64 hash = 0x8774aa605c729c72ULL; + uint64_t hash = 0x8774aa605c729c72ULL; for (int32_t instance_key : instance_key_set) { hash = Hash64Combine(instance_key, hash); } diff --git a/tensorflow/core/common_runtime/graph_execution_state.h b/tensorflow/core/common_runtime/graph_execution_state.h index 4f713ae922f12d..a718b57063f10d 100644 --- a/tensorflow/core/common_runtime/graph_execution_state.h +++ b/tensorflow/core/common_runtime/graph_execution_state.h @@ -43,10 +43,10 @@ struct GraphExecutionStateOptions { const DeviceSet* device_set = nullptr; const SessionOptions* session_options = nullptr; // Unique session identifier. Can be empty. - string session_handle; + std::string session_handle; // A map from node name to device name, representing the unchangeable // placement of stateful nodes. - std::unordered_map stateful_placements; + std::unordered_map stateful_placements; // Whether to run Placer on the graph. bool run_placer = true; @@ -166,7 +166,7 @@ class GraphExecutionState { const FunctionLibraryDefinition& flib_def() const { return *flib_def_; } // Returns the node with the given name, or null if it does not exist. - const Node* get_node_by_name(const string& name) const { + const Node* get_node_by_name(const std::string& name) const { NodeNameToCostIdMap::const_iterator iter = node_name_to_cost_id_map_.find(name); if (iter != node_name_to_cost_id_map_.end()) { @@ -178,7 +178,7 @@ class GraphExecutionState { // Returns the map of stateful placements as a map of // node name to placement string. - std::unordered_map GetStatefulPlacements() const { + std::unordered_map GetStatefulPlacements() const { return stateful_placements_; } @@ -194,8 +194,9 @@ class GraphExecutionState { // is true, such as "params" and "queue" nodes. Once placed these // nodes can not be moved to a different device. Maps node names to // device names. - std::unordered_map stateful_placements_; // Immutable after - // ctor. + std::unordered_map + stateful_placements_; // Immutable after + // ctor. void SaveStatefulNodes(Graph* graph); void RestoreStatefulNodes(Graph* graph); @@ -215,7 +216,7 @@ class GraphExecutionState { const DeviceSet* device_set_; // Not owned const SessionOptions* session_options_; // Not owned // Unique session identifier. Can be empty. - string session_handle_; + std::string session_handle_; // Map from name to Node for the full graph in placed_. NodeNameToCostIdMap node_name_to_cost_id_map_; diff --git a/tensorflow/core/common_runtime/graph_optimizer.h b/tensorflow/core/common_runtime/graph_optimizer.h index f8322cfe7213a2..746c080e4d3f66 100644 --- a/tensorflow/core/common_runtime/graph_optimizer.h +++ b/tensorflow/core/common_runtime/graph_optimizer.h @@ -36,8 +36,8 @@ class GraphOptimizer { // pass may replace a node with a different node of the same name that has a // different number of outputs, or outputs with different known shapes. // TODO(b/65453533) introduce a unique way to name nodes in a graph. - std::unordered_map>* shape_map = - nullptr; + std::unordered_map>* + shape_map = nullptr; // If not null then only nodes for which cse_consider_fn returns true will // be considered for CSE. diff --git a/tensorflow/core/common_runtime/graph_runner.cc b/tensorflow/core/common_runtime/graph_runner.cc index 90052d68873c6a..8379c126e22711 100644 --- a/tensorflow/core/common_runtime/graph_runner.cc +++ b/tensorflow/core/common_runtime/graph_runner.cc @@ -58,7 +58,7 @@ class SimpleRendezvous : public RendezvousInterface { } mutex_lock l(mu_); - string edge_name(parsed.edge_name); + std::string edge_name(parsed.edge_name); if (table_.count(edge_name) > 0) { return errors::Internal("Send of an already sent tensor"); } @@ -71,7 +71,7 @@ class SimpleRendezvous : public RendezvousInterface { Tensor tensor; absl::Status status = absl::OkStatus(); { - string key(parsed.edge_name); + std::string key(parsed.edge_name); mutex_lock l(mu_); if (table_.count(key) <= 0) { status = errors::Internal("Did not find key ", key); @@ -85,7 +85,7 @@ class SimpleRendezvous : public RendezvousInterface { void StartAbort(const absl::Status& status) override {} private: - typedef std::unordered_map Table; + typedef std::unordered_map Table; mutex mu_; Table table_ TF_GUARDED_BY(mu_); @@ -103,7 +103,7 @@ GraphRunner::~GraphRunner() {} absl::Status GraphRunner::Run(Graph* graph, FunctionLibraryRuntime* function_library, const NamedTensorList& inputs, - const std::vector& output_names, + const std::vector& output_names, std::vector* outputs) { if (device_ == nullptr) { return errors::NotFound("Cannot find a device for GraphRunner."); @@ -130,12 +130,12 @@ absl::Status GraphRunner::Run(Graph* graph, SimpleRendezvous rendez; // Extract the input names and keys, and feed in the inputs. - std::vector input_names; + std::vector input_names; for (const auto& in : inputs) { - const string& tensor_name = in.first; + const std::string& tensor_name = in.first; input_names.emplace_back(tensor_name); - string full_key = Rendezvous::CreateKey("/device:CPU:0", 1, "/device:CPU:1", - tensor_name, FrameAndIter(0, 0)); + std::string full_key = Rendezvous::CreateKey( + "/device:CPU:0", 1, "/device:CPU:1", tensor_name, FrameAndIter(0, 0)); Rendezvous::ParsedKey parsed; TF_RETURN_IF_ERROR(Rendezvous::ParseKey(full_key, &parsed)); TF_RETURN_IF_ERROR(rendez.Send(parsed, Rendezvous::Args(), in.second, @@ -194,7 +194,7 @@ absl::Status GraphRunner::Run(Graph* graph, outputs->resize(output_names.size()); for (size_t i = 0; i < output_names.size(); ++i) { - const string& output_key = + const std::string& output_key = Rendezvous::CreateKey("/device:CPU:0", 1, "/device:CPU:1", output_names[i], FrameAndIter(0, 0)); Rendezvous::ParsedKey parsed; diff --git a/tensorflow/core/common_runtime/graph_runner.h b/tensorflow/core/common_runtime/graph_runner.h index a40d17b862b0af..3f651727db5923 100644 --- a/tensorflow/core/common_runtime/graph_runner.h +++ b/tensorflow/core/common_runtime/graph_runner.h @@ -58,10 +58,10 @@ class GraphRunner { // // REQUIRES: `graph`, `env`, and `outputs` are not nullptr. // `function_library` may be nullptr. - typedef std::vector> NamedTensorList; + typedef std::vector> NamedTensorList; absl::Status Run(Graph* graph, FunctionLibraryRuntime* function_library, const NamedTensorList& inputs, - const std::vector& output_names, + const std::vector& output_names, std::vector* outputs); private: diff --git a/tensorflow/core/common_runtime/graph_runner_test.cc b/tensorflow/core/common_runtime/graph_runner_test.cc index fa9798b929f79e..2d41bc455d5322 100644 --- a/tensorflow/core/common_runtime/graph_runner_test.cc +++ b/tensorflow/core/common_runtime/graph_runner_test.cc @@ -64,8 +64,8 @@ TEST(GraphRunnerTest, DeepCopy) { Tensor p2_data(DT_FLOAT, TensorShape({})); p1_data.scalar()() = 1.0f; p2_data.scalar()() = 2.0f; - std::vector> inputs = {{"p1:0", p1_data}, - {"p2:0", p2_data}}; + std::vector> inputs = {{"p1:0", p1_data}, + {"p2:0", p2_data}}; // Create and destroy the GraphRunner, and ensure that the outputs are // consumable beyond the lifetime of GraphRunner. @@ -102,8 +102,8 @@ TEST(GraphRunnerTest, FeedAndFetch) { Tensor p2_data(DT_FLOAT, TensorShape({})); p1_data.scalar()() = 1.0f; p2_data.scalar()() = 2.0f; - std::vector> inputs = {{"p1:0", p1_data}, - {"p2:0", p2_data}}; + std::vector> inputs = {{"p1:0", p1_data}, + {"p2:0", p2_data}}; GraphRunner graph_runner(Env::Default()); std::vector outputs; diff --git a/tensorflow/core/common_runtime/graph_view.cc b/tensorflow/core/common_runtime/graph_view.cc index f84dbfac0d3f6d..65359febf97937 100644 --- a/tensorflow/core/common_runtime/graph_view.cc +++ b/tensorflow/core/common_runtime/graph_view.cc @@ -40,12 +40,12 @@ limitations under the License. namespace tensorflow { -string NodeItem::DebugString() const { - string ret = strings::StrCat("{name:'", kernel->name(), "' id:", node_id); +std::string NodeItem::DebugString() const { + std::string ret = absl::StrCat("{name:'", kernel->name(), "' id:", node_id); if (is_source) { - strings::StrAppend(&ret, " source}"); + absl::StrAppend(&ret, " source}"); } else { - strings::StrAppend(&ret, " def:{", SummarizeNodeDef(kernel->def()), "}}"); + absl::StrAppend(&ret, " def:{", SummarizeNodeDef(kernel->def()), "}}"); } return ret; } @@ -67,7 +67,7 @@ GraphView::~GraphView() { } namespace { -typedef std::tuple OutputAndControlEdges; +typedef std::tuple OutputAndControlEdges; OutputAndControlEdges CountOutputEdges(const Node* n) { DCHECK_LE(n->out_edges().size(), std::numeric_limits::max()); @@ -102,8 +102,8 @@ size_t GraphView::NodeItemBytes(const Node* n) { sizeof(ControlEdgeInfo) // output_control_edges[...] + num_outputs * sizeof(AllocatorAttributes) // output_attr[...] + num_outputs * sizeof(int) // forward_from[num_outputs] - + num_inputs * sizeof(uint8) // input_type[num_inputs] - + num_outputs * sizeof(uint8); // output_type[num_outputs] + + num_inputs * sizeof(uint8_t) // input_type[num_inputs] + + num_outputs * sizeof(uint8_t); // output_type[num_outputs] static constexpr size_t kItemAlignment = sizeof(NodeItem*); static_assert(kItemAlignment % alignof(NodeItem) == 0, "NodeItem must be aligned with kItemAlignment"); @@ -141,7 +141,7 @@ char* GraphView::InitializeNode(char* ptr, const Node* n) { // values as "int" vs "size_t" in CHECK_LE. CHECK_LE(static_cast(ptr - space_), std::numeric_limits::max()); - const uint32 offset = static_cast(ptr - space_); + const uint32_t offset = static_cast(ptr - space_); node_offsets_[id] = offset; ptr += bytes; @@ -197,10 +197,10 @@ char* GraphView::InitializeNode(char* ptr, const Node* n) { } DCHECK_LT(DataType_MAX, 255); // Must fit in uint8 - uint8* input_types = item->input_type_base(); + uint8_t* input_types = item->input_type_base(); item->is_any_input_ref_typed = false; for (int i = 0; i < num_inputs; i++) { - input_types[i] = static_cast(n->input_type(i)); + input_types[i] = static_cast(n->input_type(i)); DCHECK_EQ(item->input_type(i), n->input_type(i)); item->is_any_input_ref_typed |= IsRefType(n->input_type(i)); } @@ -215,9 +215,9 @@ char* GraphView::InitializeNode(char* ptr, const Node* n) { GetNodeAttr(n->attrs(), "_scoped_allocator", &scoped_allocator_attrs); int* forward_from = item->forward_from_base(); - uint8* output_types = item->output_type_base(); + uint8_t* output_types = item->output_type_base(); for (int i = 0; i < num_outputs; ++i) { - output_types[i] = static_cast(n->output_type(i)); + output_types[i] = static_cast(n->output_type(i)); DCHECK_EQ(item->output_type(i), n->output_type(i)); forward_from[i] = OpKernelContext::Params::kNoReservation; @@ -264,7 +264,7 @@ absl::Status GraphView::Initialize(const Graph* g) { total_bytes += NodeItemBytes(n); } - node_offsets_ = new uint32[num_nodes]; + node_offsets_ = new uint32_t[num_nodes]; for (int i = 0; i < num_nodes; i++) { node_offsets_[i] = std::numeric_limits::max(); } @@ -363,7 +363,7 @@ absl::Status InferAllocAttr(const Node* n, const Node* dst, // Note that it's possible for *n to be a Recv and *dst to be a Send, // so these two cases are not mutually exclusive. if (IsRecv(n)) { - string src_name; + std::string src_name; s = GetNodeAttr(n->attrs(), "send_device", &src_name); if (!s.ok()) return s; DeviceNameUtils::ParsedName parsed_src_name; @@ -388,7 +388,7 @@ absl::Status InferAllocAttr(const Node* n, const Node* dst, } } if (IsSend(dst)) { - string dst_name; + std::string dst_name; s = GetNodeAttr(dst->attrs(), "recv_device", &dst_name); if (!s.ok()) return s; DeviceNameUtils::ParsedName parsed_dst_name; diff --git a/tensorflow/core/common_runtime/graph_view.h b/tensorflow/core/common_runtime/graph_view.h index 3864df8a6ce165..32df420842d657 100644 --- a/tensorflow/core/common_runtime/graph_view.h +++ b/tensorflow/core/common_runtime/graph_view.h @@ -102,10 +102,10 @@ struct NodeItem { int input_start = 0; // Number of output edges, excluding control edges. - int32 num_output_edges; + int32_t num_output_edges; // Number of output control edges. - int32 num_output_control_edges; + int32_t num_output_control_edges; // If non-null, contains an array of num_outputs bools, where the ith bool // is true if and only if the ith output is consumed by another node. @@ -143,7 +143,7 @@ struct NodeItem { // 0... for forward from that input. const int* forward_from() const { return forward_from_base(); } - string DebugString() const; + std::string DebugString() const; private: friend class GraphView; @@ -185,18 +185,18 @@ struct NodeItem { num_output_control_edges + sizeof(AllocatorAttributes) * num_outputs); } - uint8* input_type_base() const { - return reinterpret_cast( + uint8_t* input_type_base() const { + return reinterpret_cast( var() + sizeof(EdgeInfo) * num_output_edges + sizeof(ControlEdgeInfo) * num_output_control_edges + sizeof(AllocatorAttributes) * num_outputs + sizeof(int) * num_outputs); } - uint8* output_type_base() const { - return reinterpret_cast( + uint8_t* output_type_base() const { + return reinterpret_cast( var() + sizeof(EdgeInfo) * num_output_edges + sizeof(ControlEdgeInfo) * num_output_control_edges + sizeof(AllocatorAttributes) * num_outputs + sizeof(int) * num_outputs + - sizeof(uint8) * num_inputs); + sizeof(uint8_t) * num_inputs); } NodeItem(const NodeItem&) = delete; @@ -220,7 +220,7 @@ class GraphView { NodeItem* node(int32_t id) const { DCHECK_GE(id, 0); DCHECK_LT(id, num_nodes_); - uint32 offset = node_offsets_[id]; + uint32_t offset = node_offsets_[id]; return ((offset == std::numeric_limits::max()) ? nullptr : reinterpret_cast(space_ + node_offsets_[id])); @@ -232,19 +232,19 @@ class GraphView { const NodeItem& node_ref(int32_t id) const { DCHECK_GE(id, 0); DCHECK_LT(id, num_nodes_); - uint32 offset = node_offsets_[id]; + uint32_t offset = node_offsets_[id]; DCHECK_NE(offset, std::numeric_limits::max()); return *reinterpret_cast(space_ + node_offsets_[id]); } - int32 num_nodes() const { return num_nodes_; } + int32_t num_nodes() const { return num_nodes_; } private: char* InitializeNode(char* ptr, const Node* n); size_t NodeItemBytes(const Node* n); - int32 num_nodes_ = 0; - uint32* node_offsets_ = nullptr; // array of size "num_nodes_" + int32_t num_nodes_ = 0; + uint32_t* node_offsets_ = nullptr; // array of size "num_nodes_" // node_offsets_[id] holds the byte offset for node w/ "id" in space_ char* space_; // NodeItem objects are allocated here diff --git a/tensorflow/core/common_runtime/hierarchical_tree_broadcaster.cc b/tensorflow/core/common_runtime/hierarchical_tree_broadcaster.cc index eccea063ad5abf..ebbdfde177da79 100644 --- a/tensorflow/core/common_runtime/hierarchical_tree_broadcaster.cc +++ b/tensorflow/core/common_runtime/hierarchical_tree_broadcaster.cc @@ -43,8 +43,8 @@ namespace tensorflow { namespace { // Key to be used for BufRendezvous by Broadcaster. -string BroadcastBufKey(const string& exec_key, int subdiv, int src_rank, - int dst_rank) { +std::string BroadcastBufKey(const std::string& exec_key, int subdiv, + int src_rank, int dst_rank) { if (READABLE_KEYS) { return strings::StrCat("broadcast(", exec_key, "):subdiv(", subdiv, "):src(", src_rank, "):dst(", dst_rank, ")"); @@ -81,13 +81,13 @@ absl::Status HierarchicalTreeBroadcaster::InitializeCollectiveParams( CHECK_EQ(col_params->instance.type, BROADCAST_COLLECTIVE); CHECK_EQ(col_params->instance.impl_details.collective_name, "HierarchicalTreeBroadcast"); - const string& device_name = + const std::string& device_name = col_params->group.members[col_params->default_rank].device.name(); // Start by counting the devices in each task. // Precondition: device_names must be sorted so that all devices in // the same task are adjacent. std::vector dev_per_task; - const string* prior_task_name = &col_params->group.members[0].task; + const std::string* prior_task_name = &col_params->group.members[0].task; int dev_count = 1; for (int di = 1; di < col_params->group.group_size; ++di) { if (col_params->group.members[di].task != *prior_task_name) { @@ -102,8 +102,8 @@ absl::Status HierarchicalTreeBroadcaster::InitializeCollectiveParams( CHECK_EQ(col_params->group.num_tasks, dev_per_task.size()); if (VLOG_IS_ON(2)) { - string dpt_buf; - for (int dpt : dev_per_task) strings::StrAppend(&dpt_buf, dpt, ";"); + std::string dpt_buf; + for (int dpt : dev_per_task) absl::StrAppend(&dpt_buf, dpt, ";"); VLOG(2) << "HierarchicalTreeBroadcaster::InitializeCollectiveParams device=" << device_name << " source_rank=" << col_params->source_rank << " dev_per_task=" << dpt_buf; @@ -302,9 +302,9 @@ void HierarchicalTreeBroadcaster::RunTree() { if (-1 == my_rank) continue; int source_rank = col_params_->instance.impl_details.subdiv_source_rank[si]; if (VLOG_IS_ON(1)) { - string subdiv_buf; + std::string subdiv_buf; for (int r : col_params_->instance.impl_details.subdiv_permutations[si]) { - strings::StrAppend(&subdiv_buf, r, ","); + absl::StrAppend(&subdiv_buf, r, ","); } VLOG(1) << "Running Broadcast tree device=" << col_ctx_->device_name << " subdiv=" << si << " perm=" << subdiv_buf @@ -318,7 +318,7 @@ void HierarchicalTreeBroadcaster::RunTree() { if (my_rank >= 0 && my_rank != source_rank) { // Begin by receiving the value. tsl::profiler::TraceMe activity( - [&] { return strings::StrCat("ReceiveValue:", si); }, + [&] { return absl::StrCat("ReceiveValue:", si); }, tsl::profiler::TraceMeLevel::kInfo); int recv_from_rank = TreeRecvFrom(*col_params_, si); absl::Notification note; @@ -334,7 +334,7 @@ void HierarchicalTreeBroadcaster::RunTree() { // Then forward value to all descendent devices. { tsl::profiler::TraceMe activity( - [&] { return strings::StrCat("ForwardValue:", si); }, + [&] { return absl::StrCat("ForwardValue:", si); }, tsl::profiler::TraceMeLevel::kInfo); if (my_rank >= 0 && status_.ok()) { std::vector send_to_ranks; @@ -413,7 +413,7 @@ void HierarchicalTreeBroadcaster::DispatchSend(int subdiv, int dst_rank, tsl::profiler::ScopedMemoryDebugAnnotation op_annotation( col_params_->name, col_ctx_->step_id, "dynamic", src_tensor->dtype(), [src_tensor]() { return src_tensor->shape().DebugString(); }); - string send_buf_key = + std::string send_buf_key = BroadcastBufKey(col_ctx_->exec_key, subdiv, src_rank, dst_rank); int dst_idx = col_params_->instance.impl_details.subdiv_permutations[subdiv][dst_rank]; @@ -434,7 +434,7 @@ void HierarchicalTreeBroadcaster::DispatchSend(int subdiv, int dst_rank, void HierarchicalTreeBroadcaster::DispatchRecv(int subdiv, int src_rank, int dst_rank, Tensor* dst_tensor, const StatusCallback& done) { - string recv_buf_key = + std::string recv_buf_key = BroadcastBufKey(col_ctx_->exec_key, subdiv, src_rank, dst_rank); int src_idx = col_params_->instance.impl_details.subdiv_permutations[subdiv][src_rank]; diff --git a/tensorflow/core/common_runtime/hierarchical_tree_broadcaster_test.cc b/tensorflow/core/common_runtime/hierarchical_tree_broadcaster_test.cc index ba419077d2774e..408d8cb65b3682 100644 --- a/tensorflow/core/common_runtime/hierarchical_tree_broadcaster_test.cc +++ b/tensorflow/core/common_runtime/hierarchical_tree_broadcaster_test.cc @@ -191,7 +191,7 @@ class HierarchicalTreeBroadcasterTest : public ::testing::Test { if (!instances_[di]->status_.ok()) { ASSERT_GT(fail_after, 0); ASSERT_NE(instances_[di]->status_.message().find("Deliberate failure"), - string::npos); + std::string::npos); ++failure_count_; continue; } @@ -221,7 +221,7 @@ class HierarchicalTreeBroadcasterTest : public ::testing::Test { // In the test we always broadcast from rank 0. col_params_->is_source = (rank == 0); col_params_->source_rank = 0; - string dev_name = col_params_->group.members[rank].device.name(); + std::string dev_name = col_params_->group.members[rank].device.name(); TF_CHECK_OK(test_env_->device_mgr->LookupDevice(dev_name, &device_)) << "Couldn't find device " << dev_name << " existing devices: " << test_env_->device_mgr->DebugString(); @@ -356,10 +356,10 @@ TEST_F(HierarchicalTreeBroadcasterInitParamsTest, cp->instance.impl_details.collective_name = "HierarchicalTreeBroadcast"; std::vector dev_per_task = {4, 4, 6, 8}; for (int ti = 0; ti < cp->group.num_tasks; ti++) { - string task_name = strings::StrCat("/job:worker/replica:0/task:", ti); + std::string task_name = absl::StrCat("/job:worker/replica:0/task:", ti); for (int di = 0; di < dev_per_task[ti]; di++) { CollGroupMember member; - member.device.set_name(strings::StrCat(task_name, "/device:GPU:", di)); + member.device.set_name(absl::StrCat(task_name, "/device:GPU:", di)); member.task = task_name; cp->group.members.push_back(member); cp->group.group_size++; diff --git a/tensorflow/core/common_runtime/immutable_executor_state.cc b/tensorflow/core/common_runtime/immutable_executor_state.cc index 6eef9e802d862e..64ded72c5e0d4e 100644 --- a/tensorflow/core/common_runtime/immutable_executor_state.cc +++ b/tensorflow/core/common_runtime/immutable_executor_state.cc @@ -68,7 +68,7 @@ void GetMaxPendingCounts(const Node* n, size_t* max_pending, } // namespace ImmutableExecutorState::FrameInfo* ImmutableExecutorState::EnsureFrameInfo( - const string& fname) { + const std::string& fname) { auto iter = frame_info_.find(fname); if (iter != frame_info_.end()) { return iter->second.get(); @@ -110,8 +110,8 @@ absl::Status ImmutableExecutorState::Initialize(const Graph& graph) { // TODO(mrry): Track whether control flow was present in the // pre-partitioned graph, and enable the caller (e.g. // `DirectSession`) to relax this constraint. - string send_device; - string recv_device; + std::string send_device; + std::string recv_device; TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), "send_device", &send_device)); TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), "recv_device", &recv_device)); if (send_device != recv_device) { @@ -120,7 +120,7 @@ absl::Status ImmutableExecutorState::Initialize(const Graph& graph) { } const int id = n->id(); - const string& frame_name = cf_info.frame_names[id]; + const std::string& frame_name = cf_info.frame_names[id]; FrameInfo* frame_info = EnsureFrameInfo(frame_name); NodeItem* item = gview_.node(id); @@ -162,7 +162,7 @@ absl::Status ImmutableExecutorState::Initialize(const Graph& graph) { GetNodeAttr(n->attrs(), "is_constant", &is_constant_enter)); item->is_constant_enter = is_constant_enter; - string frame_name; + std::string frame_name; TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), "frame_name", &frame_name)); FrameInfo* frame_info = frame_info_[frame_name].get(); @@ -214,7 +214,7 @@ absl::Status ImmutableExecutorState::Initialize(const Graph& graph) { // Initialize static information about the frames in the graph. frame_info->nodes->push_back(item); if (item->is_enter) { - string enter_name; + std::string enter_name; TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), "frame_name", &enter_name)); EnsureFrameInfo(enter_name)->input_count++; } @@ -291,7 +291,7 @@ absl::Status ImmutableExecutorState::BuildControlFlowInfo( std::vector visited; visited.resize(num_nodes); - string frame_name; + std::string frame_name; std::deque ready; // Initialize with the root nodes. @@ -360,7 +360,7 @@ void ImmutableExecutorState::InitializePending(const Graph* graph, } if (!requires_control_flow_) { - atomic_pending_counts_.reset(new std::atomic[gview_.num_nodes()]); + atomic_pending_counts_.reset(new std::atomic[gview_.num_nodes()]); std::fill(atomic_pending_counts_.get(), atomic_pending_counts_.get() + gview_.num_nodes(), 0); } @@ -368,7 +368,7 @@ void ImmutableExecutorState::InitializePending(const Graph* graph, for (const Node* n : graph->nodes()) { if (IsSink(n)) continue; const int id = n->id(); - const string& name = cf_info.frame_names[id]; + const std::string& name = cf_info.frame_names[id]; size_t max_pending, max_dead; GetMaxPendingCounts(n, &max_pending, &max_dead); auto& counts = EnsureFrameInfo(name)->pending_counts; diff --git a/tensorflow/core/common_runtime/immutable_executor_state.h b/tensorflow/core/common_runtime/immutable_executor_state.h index 6a12bc1fb0b0c0..7e7437c5311d20 100644 --- a/tensorflow/core/common_runtime/immutable_executor_state.h +++ b/tensorflow/core/common_runtime/immutable_executor_state.h @@ -42,7 +42,7 @@ class Graph; class ImmutableExecutorState { public: struct FrameInfo { - explicit FrameInfo(string name) + explicit FrameInfo(std::string name) : name(std::move(name)), input_count(0), total_inputs(0), @@ -51,7 +51,7 @@ class ImmutableExecutorState { parallel_iterations(-1) {} // The name of the frame. - string name; + std::string name; // The total number of inputs to a frame. int input_count; @@ -71,7 +71,7 @@ class ImmutableExecutorState { std::unique_ptr> nodes; // The number of iterations of this frame that can execute concurrently. - int32 parallel_iterations; + int32_t parallel_iterations; }; explicit ImmutableExecutorState(const LocalExecutorParams& p) @@ -109,24 +109,24 @@ class ImmutableExecutorState { // // REQUIRES: `!requires_control_flow_support && len(dest) == // graph_view().num_nodes()`. - void copy_pending_counts(std::atomic* dest) const { + void copy_pending_counts(std::atomic* dest) const { DCHECK(!requires_control_flow_); memcpy(dest, atomic_pending_counts_.get(), - graph_view().num_nodes() * sizeof(std::atomic)); + graph_view().num_nodes() * sizeof(std::atomic)); std::atomic_thread_fence(std::memory_order_release); } private: struct ControlFlowInfo { - gtl::FlatSet unique_frame_names; - std::vector frame_names; + gtl::FlatSet unique_frame_names; + std::vector frame_names; }; static absl::Status BuildControlFlowInfo(const Graph* graph, ControlFlowInfo* cf_info); void InitializePending(const Graph* graph, const ControlFlowInfo& cf_info); - FrameInfo* EnsureFrameInfo(const string& fname); + FrameInfo* EnsureFrameInfo(const std::string& fname); // Owned. LocalExecutorParams params_; @@ -150,7 +150,7 @@ class ImmutableExecutorState { // If `requires_control_flow_` is false, this points to an array of initial // pending counts for the nodes in the graph, indexed by node ID. - std::unique_ptr[]> atomic_pending_counts_; + std::unique_ptr[]> atomic_pending_counts_; // Shallow copies of the constant tensors used in the graph. std::vector const_tensors_; diff --git a/tensorflow/core/common_runtime/inline_function_utils.cc b/tensorflow/core/common_runtime/inline_function_utils.cc index 1e8a85207fa0b1..a627e9e8aff9c9 100644 --- a/tensorflow/core/common_runtime/inline_function_utils.cc +++ b/tensorflow/core/common_runtime/inline_function_utils.cc @@ -70,11 +70,11 @@ struct Endpoint { int index; // Returns the string name represents this endpoint. - string name() const { + std::string name() const { if (index == 0) { return node->name(); } else { - return strings::StrCat(node->name(), ":", index); + return absl::StrCat(node->name(), ":", index); } } @@ -82,7 +82,7 @@ struct Endpoint { }; struct EndpointHash { - uint64 operator()(const Endpoint& x) const { + uint64_t operator()(const Endpoint& x) const { return Hash64(reinterpret_cast(&x.node), sizeof(Node*), x.index); } @@ -120,15 +120,15 @@ static Node* AddIdentity(absl::string_view name, Graph* g, Endpoint input) { return ret; } -std::vector InputDevices(const Node& caller) { - std::vector input_devices(caller.in_edges().size()); - std::vector input_tensors(caller.in_edges().size()); +std::vector InputDevices(const Node& caller) { + std::vector input_devices(caller.in_edges().size()); + std::vector input_tensors(caller.in_edges().size()); for (const Edge* edge : caller.in_edges()) { if (edge->IsControlEdge()) continue; - const string& input_device = edge->src()->has_assigned_device_name() - ? edge->src()->assigned_device_name() - : edge->src()->requested_device(); + const std::string& input_device = edge->src()->has_assigned_device_name() + ? edge->src()->assigned_device_name() + : edge->src()->requested_device(); input_devices[edge->dst_input()] = input_device; input_tensors[edge->dst_input()] = absl::StrCat(edge->src()->name(), ":", edge->src_output()); @@ -154,22 +154,24 @@ class DefaultFunctionBodyPlacer : public InlinedFunctionBodyPlacer { explicit DefaultFunctionBodyPlacer(const Node& caller) : input_devices_(InputDevices(caller)) {} - absl::optional InputNodeDevice(int input_index) const override { + absl::optional InputNodeDevice(int input_index) const override { return input_devices_[input_index]; } - absl::optional OutputNodeDevice(int output_index) const override { + absl::optional OutputNodeDevice( + int output_index) const override { return absl::nullopt; } bool ColocateInputOutputIdentities() const override { return false; } - absl::optional ControlNodeDevice() const override { + absl::optional ControlNodeDevice() const override { return absl::nullopt; } - absl::optional BodyNodeDevice(const NodeDef& ndef) const override { + absl::optional BodyNodeDevice( + const NodeDef& ndef) const override { return absl::nullopt; } private: - const std::vector input_devices_; + const std::vector input_devices_; }; // Place all nodes on the same device as caller node. @@ -178,22 +180,24 @@ class SingleDeviceFunctionBodyPlacer : public InlinedFunctionBodyPlacer { explicit SingleDeviceFunctionBodyPlacer(const Node& caller) : caller_device_(caller.def().device()) {} - absl::optional InputNodeDevice(int input_index) const override { + absl::optional InputNodeDevice(int input_index) const override { return caller_device_; } - absl::optional OutputNodeDevice(int output_index) const override { + absl::optional OutputNodeDevice( + int output_index) const override { return caller_device_; } bool ColocateInputOutputIdentities() const override { return false; } - absl::optional ControlNodeDevice() const override { + absl::optional ControlNodeDevice() const override { return caller_device_; } - absl::optional BodyNodeDevice(const NodeDef& ndef) const override { + absl::optional BodyNodeDevice( + const NodeDef& ndef) const override { return caller_device_; } private: - const string caller_device_; + const std::string caller_device_; }; // Place input nodes on the same device as the corresponding caller input @@ -209,17 +213,19 @@ class MultiDeviceFunctionBodyPlacer : public InlinedFunctionBodyPlacer { DeviceNameUtils::ParseFullName(caller_device_, &caller_parsed_device_); } - absl::optional InputNodeDevice(int input_index) const override { + absl::optional InputNodeDevice(int input_index) const override { return input_devices_[input_index]; } - absl::optional OutputNodeDevice(int output_index) const override { + absl::optional OutputNodeDevice( + int output_index) const override { return absl::nullopt; } bool ColocateInputOutputIdentities() const override { return true; } - absl::optional ControlNodeDevice() const override { + absl::optional ControlNodeDevice() const override { return caller_device_; } - absl::optional BodyNodeDevice(const NodeDef& ndef) const override { + absl::optional BodyNodeDevice( + const NodeDef& ndef) const override { // LINT.IfChange // TODO(ezhulenev): If function would have been instantiated as a // multi-device function and executed via FunctionLibraryRuntime, it could @@ -240,10 +246,10 @@ class MultiDeviceFunctionBodyPlacer : public InlinedFunctionBodyPlacer { } private: - string caller_device_; + std::string caller_device_; bool has_parsed_caller_device_; DeviceNameUtils::ParsedName caller_parsed_device_; - std::vector input_devices_; + std::vector input_devices_; }; } // namespace @@ -286,7 +292,7 @@ using OutputControlSrc = InlineFunctionBodyOptions::OutputControlSource; // Propagate the debug info of `nodes` in function `func` to the `target` node. // If the debug info of any node is missing, its node name and function name // is used. -void PropagateDebugInfoToNode(const string& func, +void PropagateDebugInfoToNode(const std::string& func, const std::vector& nodes, NodeDef* target) { if (nodes.empty() || target->has_experimental_debug_info()) { @@ -306,10 +312,10 @@ void PropagateDebugInfoToNode(const string& func, } } // namespace -string InlineFunctionBodyOptions::DebugString() const { +std::string InlineFunctionBodyOptions::DebugString() const { const auto true_false = [](bool b) { return b ? "true" : "false"; }; - const auto keep_caller_node_str = [this]() -> string { + const auto keep_caller_node_str = [this]() -> std::string { switch (keep_caller_node) { case KeepCallerNode::kDoNotKeep: return "DoNotKeep"; @@ -508,7 +514,7 @@ absl::Status InlineFunctionBody(const FunctionLibraryDefinition& flib_def, // Add a NoOp node for function control inputs/outputs. const auto no_op = [&](absl::string_view name) -> Node* { Node* node = AddNoOp(absl::StrCat(caller->name(), "/", name), g); - const absl::optional device = placer->ControlNodeDevice(); + const absl::optional device = placer->ControlNodeDevice(); if (device.has_value()) node->set_requested_device(*device); return node; }; @@ -517,13 +523,13 @@ absl::Status InlineFunctionBody(const FunctionLibraryDefinition& flib_def, const auto input_identity = [&](absl::string_view name, Endpoint input, int index) -> Node* { Node* node = AddIdentity(absl::StrCat(caller->name(), "/", name), g, input); - const absl::optional device = placer->InputNodeDevice(index); + const absl::optional device = placer->InputNodeDevice(index); if (device.has_value()) node->set_requested_device(*device); bool colocate_identity = placer->ColocateInputOutputIdentities(); if (colocate_identity) { node->AddAttr(kColocationAttrName, - std::vector{absl::StrCat(kColocationGroupPrefix, - input.node->name())}); + std::vector{absl::StrCat( + kColocationGroupPrefix, input.node->name())}); } return node; }; @@ -532,13 +538,13 @@ absl::Status InlineFunctionBody(const FunctionLibraryDefinition& flib_def, const auto output_identity = [&](absl::string_view name, Endpoint input, int index) -> Node* { Node* node = AddIdentity(absl::StrCat(caller->name(), "/", name), g, input); - const absl::optional device = placer->OutputNodeDevice(index); + const absl::optional device = placer->OutputNodeDevice(index); if (device.has_value()) node->set_requested_device(*device); bool colocate_identity = placer->ColocateInputOutputIdentities(); if (colocate_identity) { node->AddAttr(kColocationAttrName, - std::vector{absl::StrCat(kColocationGroupPrefix, - input.node->name())}); + std::vector{absl::StrCat( + kColocationGroupPrefix, input.node->name())}); } return node; }; @@ -597,7 +603,7 @@ absl::Status InlineFunctionBody(const FunctionLibraryDefinition& flib_def, // // If 'x' is a node in fbody->graph and its copy in 'g' is 'y', we // remember 'y' in node_map[x->id()]. - std::unordered_set fn_nodes; + std::unordered_set fn_nodes; for (Node* n : fbody->graph->op_nodes()) { fn_nodes.insert(n->name()); } @@ -606,7 +612,7 @@ absl::Status InlineFunctionBody(const FunctionLibraryDefinition& flib_def, NodeDef ndef = n->def(); // Maybe override requested node device assignment. - const absl::optional device = placer->BodyNodeDevice(ndef); + const absl::optional device = placer->BodyNodeDevice(ndef); if (device.has_value()) ndef.set_device(*device); // Add inlined function name to inlined node debug information. @@ -617,7 +623,7 @@ absl::Status InlineFunctionBody(const FunctionLibraryDefinition& flib_def, // 1) to node name to avoid collisions // 2) to frame name to avoid multiple LoopCond nodes in one frame // 3) to colocation attribute - const string prefix = strings::StrCat(caller->name(), "/"); + const std::string prefix = absl::StrCat(caller->name(), "/"); TF_RETURN_IF_ERROR(AddPrefixAndSuffixToNode(prefix, /*suffix=*/"", &ndef, options.uniquify_frame_names)); diff --git a/tensorflow/core/common_runtime/inline_function_utils.h b/tensorflow/core/common_runtime/inline_function_utils.h index 94c118fe882a20..7ffafe13e5df03 100644 --- a/tensorflow/core/common_runtime/inline_function_utils.h +++ b/tensorflow/core/common_runtime/inline_function_utils.h @@ -41,13 +41,16 @@ class InlinedFunctionBodyPlacer { public: virtual ~InlinedFunctionBodyPlacer() = default; - virtual absl::optional InputNodeDevice(int input_index) const = 0; - virtual absl::optional OutputNodeDevice(int output_index) const = 0; + virtual absl::optional InputNodeDevice( + int input_index) const = 0; + virtual absl::optional OutputNodeDevice( + int output_index) const = 0; // Returns true if the added input/output identity nodes should be colocated // with the corresponding input/output from the function body. virtual bool ColocateInputOutputIdentities() const = 0; - virtual absl::optional ControlNodeDevice() const = 0; - virtual absl::optional BodyNodeDevice(const NodeDef& ndef) const = 0; + virtual absl::optional ControlNodeDevice() const = 0; + virtual absl::optional BodyNodeDevice( + const NodeDef& ndef) const = 0; // LINT.IfChange // Place input nodes on the same device as the corresponding caller input @@ -72,7 +75,7 @@ class InlinedFunctionBodyPlacer { const Graph&, const Node&)>; struct Config { - string name; + std::string name; Factory get; }; @@ -147,7 +150,7 @@ struct InlineFunctionBodyOptions { bool uniquify_frame_names = true; // A human-readable debug string for this options. - string DebugString() const; + std::string DebugString() const; }; // Returns 'OkStatus()' iff the function '*fbody' can be inlined at 'node' diff --git a/tensorflow/core/common_runtime/inline_function_utils_test.cc b/tensorflow/core/common_runtime/inline_function_utils_test.cc index 0d726ade656f21..1e20e6da535a16 100644 --- a/tensorflow/core/common_runtime/inline_function_utils_test.cc +++ b/tensorflow/core/common_runtime/inline_function_utils_test.cc @@ -50,7 +50,7 @@ TEST(InlineFunctionBody, ColocationConstraintPropagation) { {{"z"}, "AddV2", {"x", "y"}, - {{"T", DT_FLOAT}, {"_class", std::vector({"loc:@x"})}}}, + {{"T", DT_FLOAT}, {"_class", std::vector({"loc:@x"})}}}, }); TF_ASSERT_OK(flib_def.AddFunctionDef(fdef)); @@ -98,7 +98,8 @@ TEST(InlineFunctionBody, ColocationConstraintPropagation) { // Func/call/input/_0. NDef("call/z", "AddV2", {"Func/call/input/_0", "Func/call/input/_1"}, {{"T", DT_FLOAT}, - {"_class", std::vector({"loc:@Func/call/input/_0"})}}), + {"_class", + std::vector({"loc:@Func/call/input/_0"})}}), NDef("Func/call/output/_2", "Identity", {"call/z"}, {{"T", DT_FLOAT}}), }, diff --git a/tensorflow/core/common_runtime/input_colocation_exemption_registry.cc b/tensorflow/core/common_runtime/input_colocation_exemption_registry.cc index 7b0fa4af464fe9..4edf42ff812b8d 100644 --- a/tensorflow/core/common_runtime/input_colocation_exemption_registry.cc +++ b/tensorflow/core/common_runtime/input_colocation_exemption_registry.cc @@ -27,7 +27,7 @@ InputColocationExemptionRegistry* InputColocationExemptionRegistry::Global() { return registry; } -void InputColocationExemptionRegistry::Register(const string& op) { +void InputColocationExemptionRegistry::Register(const std::string& op) { auto it = ops_.find(op); if (it != ops_.end()) { LOG(WARNING) << "Input colocation exemption for op: " << op diff --git a/tensorflow/core/common_runtime/input_colocation_exemption_registry.h b/tensorflow/core/common_runtime/input_colocation_exemption_registry.h index c393fe7498b696..9e4bbc9e77f4af 100644 --- a/tensorflow/core/common_runtime/input_colocation_exemption_registry.h +++ b/tensorflow/core/common_runtime/input_colocation_exemption_registry.h @@ -40,20 +40,20 @@ class InputColocationExemptionRegistry { static InputColocationExemptionRegistry* Global(); // Returns the set of ops exempt from the input colocation constraints. - const gtl::FlatSet& Get() { return ops_; } + const gtl::FlatSet& Get() { return ops_; } // Registers an op to be excluded from the input colocation constraints. - void Register(const string& op); + void Register(const std::string& op); private: - gtl::FlatSet ops_; + gtl::FlatSet ops_; }; namespace input_colocation_exemption_registration { class InputColocationExemptionRegistration { public: - explicit InputColocationExemptionRegistration(const string& op) { + explicit InputColocationExemptionRegistration(const std::string& op) { InputColocationExemptionRegistry::Global()->Register(op); } }; diff --git a/tensorflow/core/common_runtime/inspecting_placer.cc b/tensorflow/core/common_runtime/inspecting_placer.cc index 96799bcf1e4be8..816d3dcae487a9 100644 --- a/tensorflow/core/common_runtime/inspecting_placer.cc +++ b/tensorflow/core/common_runtime/inspecting_placer.cc @@ -34,21 +34,21 @@ limitations under the License. namespace tensorflow { -string IOColocationGroups::DebugString() const { - std::unordered_map> group_members; +std::string IOColocationGroups::DebugString() const { + std::unordered_map> group_members; for (int arg_index = 0; arg_index < input_groups.size(); ++arg_index) { int group_id = input_groups[arg_index]; - group_members[group_id].push_back(strings::StrCat("i:", arg_index)); + group_members[group_id].push_back(absl::StrCat("i:", arg_index)); } for (int ret_index = 0; ret_index < output_groups.size(); ++ret_index) { int group_id = output_groups[ret_index]; - group_members[group_id].push_back(strings::StrCat("o:", ret_index)); + group_members[group_id].push_back(absl::StrCat("o:", ret_index)); } - std::vector group_strings; + std::vector group_strings; for (const auto& it : group_members) { int group_id = it.first; - const std::vector& members = it.second; + const std::vector& members = it.second; const PossibleDevices& devices = group_devices[group_id]; group_strings.push_back(strings::StrCat( "Group(", group_id, " members = [", absl::StrJoin(members, ", "), @@ -57,11 +57,11 @@ string IOColocationGroups::DebugString() const { "\" resource_device_name = \"", DeviceNameUtils::ParsedNameToString(devices.resource_device_name), "\" device_types = [", - absl::StrJoin( - devices.device_types, ", ", - [](string* out, const std::pair& type_and_pref) { - out->append(DeviceTypeString(type_and_pref.first)); - }), + absl::StrJoin(devices.device_types, ", ", + [](std::string* out, + const std::pair& type_and_pref) { + out->append(DeviceTypeString(type_and_pref.first)); + }), "])")); } diff --git a/tensorflow/core/common_runtime/inspecting_placer.h b/tensorflow/core/common_runtime/inspecting_placer.h index 90df36c58139fd..27e45dacadad8b 100644 --- a/tensorflow/core/common_runtime/inspecting_placer.h +++ b/tensorflow/core/common_runtime/inspecting_placer.h @@ -59,7 +59,7 @@ struct IOColocationGroups { // group_devices[i] contains possible devices for group with id i. std::vector group_devices; - string DebugString() const; + std::string DebugString() const; }; class InspectingPlacer { diff --git a/tensorflow/core/common_runtime/int32_fulltype.h b/tensorflow/core/common_runtime/int32_fulltype.h index 1a55e0bc6a1e7c..8e89b0bec2f6d9 100644 --- a/tensorflow/core/common_runtime/int32_fulltype.h +++ b/tensorflow/core/common_runtime/int32_fulltype.h @@ -29,7 +29,7 @@ namespace tensorflow { class Int32FulltypePass { public: Int32FulltypePass() = default; - explicit Int32FulltypePass(string debug_location) + explicit Int32FulltypePass(std::string debug_location) : debug_location_(debug_location) {} // For each node in this graph that outputs int32 tensors, set full @@ -57,7 +57,7 @@ class Int32FulltypePass { private: // Location of where annotations were added for debug messages. - string debug_location_; + std::string debug_location_; }; } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/int32_fulltype_test.cc b/tensorflow/core/common_runtime/int32_fulltype_test.cc index 8cfb991cdacd38..ed8587667e9bcc 100644 --- a/tensorflow/core/common_runtime/int32_fulltype_test.cc +++ b/tensorflow/core/common_runtime/int32_fulltype_test.cc @@ -96,14 +96,14 @@ class Int32FulltypeTest : public ::testing::Test { // Returns the node in "graph" with the given name. // // REQUIRES: "graph" was produced by the most recent call to BuildGraph. - Node* GetNodeByName(const Graph& graph, const string& name) { + Node* GetNodeByName(const Graph& graph, const std::string& name) { const auto search = nodes_by_name_.find(name); CHECK(search != nodes_by_name_.end()) << "Unknown node name: " << name; return graph.FindNodeId(search->second); } protected: - std::unordered_map nodes_by_name_; + std::unordered_map nodes_by_name_; private: void RebuildNodeNameMap(const Graph& graph) { diff --git a/tensorflow/core/common_runtime/isolate_placer_inspection_required_ops_pass_test.cc b/tensorflow/core/common_runtime/isolate_placer_inspection_required_ops_pass_test.cc index 5afdc072fcc1ae..be10cd744f35f1 100644 --- a/tensorflow/core/common_runtime/isolate_placer_inspection_required_ops_pass_test.cc +++ b/tensorflow/core/common_runtime/isolate_placer_inspection_required_ops_pass_test.cc @@ -67,11 +67,11 @@ void RunPassAndCompare(const GraphDef& original, GraphDef rewritten; RunPass(original, &rewritten); - std::vector errors; + std::vector errors; errors.push_back(absl::StrCat("Graphs did not match.\n Rewritten graph:\n", SummarizeGraphDef(rewritten))); for (const GraphDef& alternative : expected_alternatives) { - string diff; + std::string diff; bool graphs_equal = EqualGraphDef(rewritten, alternative, &diff); if (graphs_equal) { return; diff --git a/tensorflow/core/common_runtime/kernel_benchmark_testlib.cc b/tensorflow/core/common_runtime/kernel_benchmark_testlib.cc index 1e17e24df37677..78f2d219505341 100644 --- a/tensorflow/core/common_runtime/kernel_benchmark_testlib.cc +++ b/tensorflow/core/common_runtime/kernel_benchmark_testlib.cc @@ -45,7 +45,7 @@ namespace tensorflow { namespace test { // TODO(hongm): Convert `g` and `init` to using std::unique_ptr. -Benchmark::Benchmark(const string& device, Graph* g, +Benchmark::Benchmark(const std::string& device, Graph* g, const SessionOptions* options, Graph* init, Rendezvous* rendez, const char* executor_type, bool old_benchmark_api) { @@ -61,7 +61,7 @@ Benchmark::Benchmark(const string& device, Graph* g, CHECK(!old_benchmark_api) << "Expected new API only"; - string t = absl::AsciiStrToUpper(device); + std::string t = absl::AsciiStrToUpper(device); // Allow NewDevice to allocate a new threadpool with different number of // threads for each new benchmark. LocalDevice::set_use_global_threadpool(false); @@ -121,7 +121,8 @@ Benchmark::Benchmark(const string& device, Graph* g, TF_CHECK_OK(NewExecutor(executor_type, params, *g, &exec_)); } -Benchmark::Benchmark(const string& device, Graph* g, bool old_benchmark_api) +Benchmark::Benchmark(const std::string& device, Graph* g, + bool old_benchmark_api) : Benchmark(device, g, nullptr, nullptr, nullptr, "", old_benchmark_api) {} Benchmark::~Benchmark() { @@ -141,14 +142,14 @@ void Benchmark::Run(benchmark::State& state) { RunWithRendezvousArgs({}, {}, state); } -string GetRendezvousKey(const Node* node) { - string send_device; +std::string GetRendezvousKey(const Node* node) { + std::string send_device; TF_CHECK_OK(GetNodeAttr(node->attrs(), "send_device", &send_device)); - string recv_device; + std::string recv_device; TF_CHECK_OK(GetNodeAttr(node->attrs(), "recv_device", &recv_device)); - string tensor_name; + std::string tensor_name; TF_CHECK_OK(GetNodeAttr(node->attrs(), "tensor_name", &tensor_name)); - uint64 send_device_incarnation; + uint64_t send_device_incarnation; TF_CHECK_OK( GetNodeAttr(node->attrs(), "send_device_incarnation", reinterpret_cast(&send_device_incarnation))); @@ -157,8 +158,8 @@ string GetRendezvousKey(const Node* node) { } void Benchmark::RunWithRendezvousArgs( - const std::vector>& inputs, - const std::vector& outputs, benchmark::State& state) { + const std::vector>& inputs, + const std::vector& outputs, benchmark::State& state) { if (!device_ || state.max_iterations == 0) { return; } @@ -179,7 +180,7 @@ void Benchmark::RunWithRendezvousArgs( TF_CHECK_OK(rendez_->Send(parsed, Rendezvous::Args(), p.second, false)); } TF_CHECK_OK(exec_->Run(args)); - for (const string& key : outputs) { + for (const std::string& key : outputs) { Rendezvous::ParsedKey parsed; TF_CHECK_OK(Rendezvous::ParseKey(key, &parsed)); TF_CHECK_OK(rendez_->Recv(parsed, Rendezvous::Args(), &unused, &is_dead)); @@ -197,7 +198,7 @@ void Benchmark::RunWithRendezvousArgs( TF_CHECK_OK(rendez_->Send(parsed, Rendezvous::Args(), p.second, false)); } TF_CHECK_OK(exec_->Run(args)); - for (const string& key : outputs) { + for (const std::string& key : outputs) { Rendezvous::ParsedKey parsed; TF_CHECK_OK(Rendezvous::ParseKey(key, &parsed)); TF_CHECK_OK(rendez_->Recv(parsed, Rendezvous::Args(), &unused, &is_dead)); diff --git a/tensorflow/core/common_runtime/kernel_benchmark_testlib.h b/tensorflow/core/common_runtime/kernel_benchmark_testlib.h index fcab9a65bc586a..a0e5486b96c120 100644 --- a/tensorflow/core/common_runtime/kernel_benchmark_testlib.h +++ b/tensorflow/core/common_runtime/kernel_benchmark_testlib.h @@ -48,20 +48,20 @@ class Benchmark { // * In the new API, the timer starts automatically at the first // iteration of the loop and stops after the last iteration. // TODO(vyng) Remove this once we have migrated all code to newer API. - Benchmark(const string& device, Graph* g, + Benchmark(const std::string& device, Graph* g, const SessionOptions* options = nullptr, Graph* init = nullptr, Rendezvous* rendez = nullptr, const char* executor_type = "", bool old_benchmark_api = false); - Benchmark(const string& device, Graph* g, bool old_benchmark_api); + Benchmark(const std::string& device, Graph* g, bool old_benchmark_api); ~Benchmark(); void Run(benchmark::State& state); void RunWithRendezvousArgs( - const std::vector>& inputs, - const std::vector& outputs, benchmark::State& state); + const std::vector>& inputs, + const std::vector& outputs, benchmark::State& state); private: thread::ThreadPool* pool_ = nullptr; // Not owned. @@ -78,7 +78,7 @@ class Benchmark { }; // Returns the rendezvous key associated with the given Send/Recv node. -string GetRendezvousKey(const Node* node); +std::string GetRendezvousKey(const Node* node); } // end namespace test } // end namespace tensorflow diff --git a/tensorflow/core/common_runtime/local_device.cc b/tensorflow/core/common_runtime/local_device.cc index 63fd2f1b59c223..9997ff2a30c008 100644 --- a/tensorflow/core/common_runtime/local_device.cc +++ b/tensorflow/core/common_runtime/local_device.cc @@ -85,7 +85,7 @@ struct LocalDevice::EigenThreadPoolInfo { thread_opts.numa_node = numa_node; eigen_worker_threads_.num_threads = intra_op_parallelism_threads; eigen_worker_threads_.workers = new thread::ThreadPool( - options.env, thread_opts, strings::StrCat("numa_", numa_node, "_Eigen"), + options.env, thread_opts, absl::StrCat("numa_", numa_node, "_Eigen"), intra_op_parallelism_threads, !options.config.experimental().disable_thread_spinning(), /*allocator=*/nullptr); diff --git a/tensorflow/core/common_runtime/lower_case_op.cc b/tensorflow/core/common_runtime/lower_case_op.cc index 39d1d150fa8a1b..88c169bc4a80d3 100644 --- a/tensorflow/core/common_runtime/lower_case_op.cc +++ b/tensorflow/core/common_runtime/lower_case_op.cc @@ -38,7 +38,7 @@ class CaseBuilder { public: // Create a CaseBuilder to create the lowered form of `case` with branch // functions identified by `branch_fn_names` in the `graph`. - CaseBuilder(Node* case_op, const std::vector& branch_fn_names, + CaseBuilder(Node* case_op, const std::vector& branch_fn_names, bool keep_node_fetchable, Graph* graph); // Constructs the basic conditional control flow using switch and merge nodes. @@ -58,7 +58,7 @@ class CaseBuilder { private: // Returns unique name containing the name of the Case op being rewritten // (name_), infix and a suffix to ensure it is unique within the graph. - string NewName(const string& infix); + std::string NewName(const std::string& infix); // Adds input to both the then and else nodes from src:src_output. absl::Status AddInput(Node* src, int src_output); @@ -88,7 +88,7 @@ class CaseBuilder { // for the side effects. Node* branch_executed_node_; Graph* graph_; - string name_; + std::string name_; bool keep_node_fetchable_; NodeDebugInfo debug_info_; @@ -96,7 +96,7 @@ class CaseBuilder { }; CaseBuilder::CaseBuilder(Node* case_op, - const std::vector& branch_fn_names, + const std::vector& branch_fn_names, bool keep_node_fetchable, Graph* graph) : case_op_(case_op), num_branches_(branch_fn_names.size()), @@ -106,7 +106,7 @@ CaseBuilder::CaseBuilder(Node* case_op, debug_info_(*case_op_) { branch_call_builders_.reserve(num_branches_); for (int b = 0; b < num_branches_; b++) { - branch_call_builders_.emplace_back(NewName(strings::StrCat("branch", b)), + branch_call_builders_.emplace_back(NewName(absl::StrCat("branch", b)), branch_fn_names[b], graph->op_registry(), &debug_info_); branch_call_builders_[b].Device(case_op_->requested_device()); @@ -129,7 +129,7 @@ absl::Status CaseBuilder::CreatePivotNodes() { control_predecessor_ = branch_index; pivots_.resize(num_branches_, nullptr); for (int b = 0; b < num_branches_; b++) { - TF_RETURN_IF_ERROR(NodeBuilder(NewName(strings::StrCat("pivot_", b)), + TF_RETURN_IF_ERROR(NodeBuilder(NewName(absl::StrCat("pivot_", b)), "Identity", graph_->op_registry(), &debug_info_) .Input(branch_index, b) @@ -139,8 +139,8 @@ absl::Status CaseBuilder::CreatePivotNodes() { return absl::OkStatus(); } -string CaseBuilder::NewName(const string& infix) { - return graph_->NewName(strings::StrCat(name_, "/", infix)); +std::string CaseBuilder::NewName(const std::string& infix) { + return graph_->NewName(absl::StrCat(name_, "/", infix)); } absl::Status CaseBuilder::AddInput(Node* src, int src_output) { @@ -276,7 +276,7 @@ absl::Status RewriteCaseNode(Node* n, Graph* g, bool keep_node_fetchable) { } int num_branches = branches_attr->list().func_size(); - std::vector branch_fn_names; + std::vector branch_fn_names; branch_fn_names.reserve(num_branches); for (int b = 0; b < num_branches; b++) { branch_fn_names.emplace_back(branches_attr->list().func(b).name()); diff --git a/tensorflow/core/common_runtime/lower_case_op_test.cc b/tensorflow/core/common_runtime/lower_case_op_test.cc index eb5033cd75b000..d460d761fc646d 100644 --- a/tensorflow/core/common_runtime/lower_case_op_test.cc +++ b/tensorflow/core/common_runtime/lower_case_op_test.cc @@ -184,8 +184,8 @@ TEST(LowerCaseOpTest, BranchFunctionsWithoutOutputs) { using FDH = ::tensorflow::FunctionDefHelper; // Wrap AssignAddVariable + Const into a function. - const auto assign_add = [](const string& fn_name, int v) { - const Tensor tensor = test::AsScalar(v); + const auto assign_add = [](const std::string& fn_name, int v) { + const Tensor tensor = test::AsScalar(v); return FDH::Create( fn_name, {"v: resource"}, {}, {}, { diff --git a/tensorflow/core/common_runtime/lower_function_call_op_test.cc b/tensorflow/core/common_runtime/lower_function_call_op_test.cc index d276c7c43abbb7..3a2de9036df433 100644 --- a/tensorflow/core/common_runtime/lower_function_call_op_test.cc +++ b/tensorflow/core/common_runtime/lower_function_call_op_test.cc @@ -36,13 +36,13 @@ limitations under the License. namespace tensorflow { namespace { -AttrValue FuncAttr(const string& name) { +AttrValue FuncAttr(const std::string& name) { AttrValue attr; attr.mutable_func()->set_name(name); return attr; } -AttrValue FuncAttr(const string& name, const DataType type) { +AttrValue FuncAttr(const std::string& name, const DataType type) { AttrValue attr; attr.mutable_func()->set_name(name); (*attr.mutable_func()->mutable_attr())["T"].set_type(type); diff --git a/tensorflow/core/common_runtime/lower_functional_ops.cc b/tensorflow/core/common_runtime/lower_functional_ops.cc index 49885ba8129e8e..a2c2b6986a5e8b 100644 --- a/tensorflow/core/common_runtime/lower_functional_ops.cc +++ b/tensorflow/core/common_runtime/lower_functional_ops.cc @@ -52,7 +52,7 @@ bool CheckBoolAttr(const Node* n, absl::string_view attr_name) { // Checks if string attribute is defined and it's not empty. bool CheckStringAttr(const Node* n, absl::string_view attr_name) { - string match; + std::string match; bool found = TryGetNodeAttr(n->attrs(), attr_name, &match); return found && !match.empty(); } diff --git a/tensorflow/core/common_runtime/lower_functional_ops_test.cc b/tensorflow/core/common_runtime/lower_functional_ops_test.cc index 2f16c6fef7e308..2d47ac5d70bd3c 100644 --- a/tensorflow/core/common_runtime/lower_functional_ops_test.cc +++ b/tensorflow/core/common_runtime/lower_functional_ops_test.cc @@ -66,7 +66,7 @@ absl::Status Rewrite(std::unique_ptr* graph) { // (counter:int32, pred:bool, x:int32) -> counter < N FunctionDef WhileWithIfCond(int32_t N) { - const Tensor kN = test::AsScalar(N); + const Tensor kN = test::AsScalar(N); return FDH::Define( // Name "WhileWithIfCond", @@ -90,7 +90,7 @@ FunctionDef WhileWithIfBody() { then_func.set_name("XTimesTwo"); NameAttrList else_func; else_func.set_name("XTimesFour"); - const Tensor kOne = test::AsScalar(1); + const Tensor kOne = test::AsScalar(1); std::vector input_types = {DT_INT32}; std::vector output_types = {DT_INT32}; return FDH::Define( diff --git a/tensorflow/core/common_runtime/lower_if_op.cc b/tensorflow/core/common_runtime/lower_if_op.cc index e46ef4ff3de543..01beef8fc2328d 100644 --- a/tensorflow/core/common_runtime/lower_if_op.cc +++ b/tensorflow/core/common_runtime/lower_if_op.cc @@ -60,7 +60,7 @@ class CondBuilder { private: // Returns unique name containing the name of the If op being rewritten // (name_), infix and a suffix to ensure it is unique within the graph. - string NewName(const string& infix); + std::string NewName(const std::string& infix); // Adds input to both the then and else nodes from src:src_output. absl::Status AddInput(Node* src, int src_output); @@ -102,7 +102,7 @@ class CondBuilder { // executed for the side effects. Node* branch_executed_node_; Graph* graph_; - string name_; + std::string name_; bool keep_node_fetchable_; NodeDebugInfo debug_info_; @@ -172,8 +172,8 @@ absl::Status CondBuilder::CreatePivotNodes() { return absl::OkStatus(); } -string CondBuilder::NewName(const string& infix) { - return graph_->NewName(strings::StrCat(name_, "/", infix)); +std::string CondBuilder::NewName(const std::string& infix) { + return graph_->NewName(absl::StrCat(name_, "/", infix)); } absl::Status CondBuilder::AddInput(Node* src, int src_output) { diff --git a/tensorflow/core/common_runtime/lower_if_op_test.cc b/tensorflow/core/common_runtime/lower_if_op_test.cc index 91bddb27b452be..68c55d27d16433 100644 --- a/tensorflow/core/common_runtime/lower_if_op_test.cc +++ b/tensorflow/core/common_runtime/lower_if_op_test.cc @@ -35,7 +35,7 @@ limitations under the License. namespace tensorflow { namespace { -AttrValue FuncAttr(const string& name) { +AttrValue FuncAttr(const std::string& name) { AttrValue attr; attr.mutable_func()->set_name(name); return attr; @@ -153,8 +153,8 @@ TEST(LowerIfOpTest, BranchFunctionsWithoutOutputs) { using FDH = ::tensorflow::FunctionDefHelper; // Wrap AssignAddVariable + Const into a function. - const auto assign_add = [](const string& fn_name, int v) { - const Tensor tensor = test::AsScalar(v); + const auto assign_add = [](const std::string& fn_name, int v) { + const Tensor tensor = test::AsScalar(v); return FDH::Create( fn_name, {"v: resource"}, {}, {}, { diff --git a/tensorflow/core/common_runtime/lower_while_op.cc b/tensorflow/core/common_runtime/lower_while_op.cc index 8a8c3c075dd235..84f03444a93972 100644 --- a/tensorflow/core/common_runtime/lower_while_op.cc +++ b/tensorflow/core/common_runtime/lower_while_op.cc @@ -132,7 +132,7 @@ class LowerWhileHelper { // Returns unique name containing the name of the While op being rewritten // (name_), infix and a suffix to ensure it is unique within the graph. - string NewName(const string& infix); + std::string NewName(const std::string& infix); // Returns true if the input at index is a resource and the same resource is // returned as an output. @@ -156,7 +156,7 @@ class LowerWhileHelper { Graph* graph_; const FunctionLibraryDefinition* flib_def_; // Name of the `while_op_`. - string name_; + std::string name_; // Max number of parallel_iterations for the while loop. const int parallel_iterations_; bool keep_node_fetchable_; @@ -363,15 +363,15 @@ absl::Status LowerWhileHelper::CreateSwitchNodes() { if (IsLoopCarriedResource(i)) { continue; } - string op_name; + std::string op_name; { const Node* input_node; TF_RETURN_IF_ERROR(while_op_->input_node(i, &input_node)); - op_name = strings::StrCat(input_node->name(), "_switch"); + op_name = absl::StrCat(input_node->name(), "_switch"); } Node* merge_node = merge_nodes_[op_input_output_to_lowered_node_[i]]; Node* switch_node; - string op_type = "Switch"; + std::string op_type = "Switch"; if (IsRefType(merge_node->output_type(0))) { op_type = "RefSwitch"; } @@ -413,7 +413,7 @@ absl::Status LowerWhileHelper::CreateBodyFuncCallNode() { // node is not the first one to be ready? Can we speed that case up using some // sort of multi-input Merge? Node* body_control_node_; - string op_type = "Identity"; + std::string op_type = "Identity"; if (IsRefType(switch_nodes_[0]->output_type(1))) { op_type = "RefIdentity"; } @@ -569,8 +569,8 @@ absl::Status LowerWhileHelper::UpdateConsumers() { return absl::OkStatus(); } -string LowerWhileHelper::NewName(const string& infix) { - return graph_->NewName(strings::StrCat(name_, "/", infix)); +std::string LowerWhileHelper::NewName(const std::string& infix) { + return graph_->NewName(absl::StrCat(name_, "/", infix)); } bool LowerWhileHelper::IsLoopCarriedResource(int index) { diff --git a/tensorflow/core/common_runtime/lower_while_op_test.cc b/tensorflow/core/common_runtime/lower_while_op_test.cc index 4fe9337c942766..eb19c84c04dd44 100644 --- a/tensorflow/core/common_runtime/lower_while_op_test.cc +++ b/tensorflow/core/common_runtime/lower_while_op_test.cc @@ -253,7 +253,8 @@ TEST(LowerWhileOpTest, ForwardAssignedInputDevice) { TF_CHECK_OK(NodeBuilder("placed_node", "Placeholder") .Attr("dtype", type) .Finalize(graph.get(), &placeholder)); - const string assigned_device_name = "/job:localhost/replica:0/task:0/gpu:0"; + const std::string assigned_device_name = + "/job:localhost/replica:0/task:0/gpu:0"; placeholder->set_assigned_device_name(assigned_device_name); Node* while_node; std::vector inputs({NodeBuilder::NodeOut(placeholder)}); @@ -343,11 +344,11 @@ TEST(LowerWhileOpTest, ForwardRequestedInputDevice) { TF_ASSERT_OK(graph->AddFunctionLibrary(f_lib_proto)); auto type = DT_FLOAT; // We will place the loop var on the gpu:0. - const string gpu_0_device = "/job:localhost/replica:0/task:0/gpu:0"; + const std::string gpu_0_device = "/job:localhost/replica:0/task:0/gpu:0"; // We will place loop's control input on the gpu:1. - const string gpu_1_device = "/job:localhost/replica:0/task:0/gpu:1"; + const std::string gpu_1_device = "/job:localhost/replica:0/task:0/gpu:1"; // We will place While op on gpu:2. - const string gpu_2_device = "/job:localhost/replica:0/task:0/gpu:2"; + const std::string gpu_2_device = "/job:localhost/replica:0/task:0/gpu:2"; Node* gpu_0_ph; TF_CHECK_OK(NodeBuilder("placed_node", "Placeholder") .Attr("dtype", type) @@ -483,11 +484,11 @@ TEST(LowerWhileOpTest, ForwardColocationKeyAttribute) { TF_ASSERT_OK(graph->AddFunctionLibrary(f_lib_proto)); auto type = DT_FLOAT; // We will place the loop var on the gpu:0. - const string gpu_0_device = "/job:localhost/replica:0/task:0/gpu:0"; + const std::string gpu_0_device = "/job:localhost/replica:0/task:0/gpu:0"; // We will place loop's control input on the gpu:1. - const string gpu_1_device = "/job:localhost/replica:0/task:0/gpu:1"; + const std::string gpu_1_device = "/job:localhost/replica:0/task:0/gpu:1"; // We will place While op on gpu:2. - const string gpu_2_device = "/job:localhost/replica:0/task:0/gpu:2"; + const std::string gpu_2_device = "/job:localhost/replica:0/task:0/gpu:2"; Node* gpu_0_ph; AttrValue gpu_0_colocation_attr; gpu_0_colocation_attr.mutable_list()->add_s("loc@:some_op_on_gpu_0_device"); diff --git a/tensorflow/core/common_runtime/memory_types.cc b/tensorflow/core/common_runtime/memory_types.cc index d22d72f1a57019..216fdfd6d239c4 100644 --- a/tensorflow/core/common_runtime/memory_types.cc +++ b/tensorflow/core/common_runtime/memory_types.cc @@ -34,14 +34,14 @@ struct Endpoint { }; struct EndpointHash { - uint32 operator()(const Endpoint& x) const { + uint32_t operator()(const Endpoint& x) const { return Hash32(reinterpret_cast(&x.node_id), sizeof(int), x.output_index); } }; struct EndpointEq { - uint32 operator()(const Endpoint& x, const Endpoint& y) const { + uint32_t operator()(const Endpoint& x, const Endpoint& y) const { return (x.node_id == y.node_id) && (x.output_index == y.output_index); } }; @@ -116,14 +116,14 @@ absl::Status ValidateMemoryTypes(const DeviceType& device_type, // within this process. That is sufficient because EnsureMemoryTypes // is only used on a TensorFlow graph that is gonna to be executed in // a single tf device (hence within a single process). -static string GetTensorName(const Edge* edge) { +static std::string GetTensorName(const Edge* edge) { static std::atomic counter(0); - return strings::StrCat("memtype_", counter.fetch_add(1), "_", - edge->src()->name()); + return absl::StrCat("memtype_", counter.fetch_add(1), "_", + edge->src()->name()); } -static Node* Send(Graph* g, const string& tensor_name, - const string& device_name, bool host, const Edge* edge) { +static Node* Send(Graph* g, const std::string& tensor_name, + const std::string& device_name, bool host, const Edge* edge) { Node* ret; TF_CHECK_OK(NodeBuilder(g->NewName("n"), host ? "_HostSend" : "_Send") .Input(edge->src(), edge->src_output()) @@ -138,8 +138,8 @@ static Node* Send(Graph* g, const string& tensor_name, return ret; } -static Node* Recv(Graph* g, const string& tensor_name, - const string& device_name, bool host, const Edge* edge) { +static Node* Recv(Graph* g, const std::string& tensor_name, + const std::string& device_name, bool host, const Edge* edge) { Node* ret; TF_CHECK_OK( NodeBuilder(g->NewName("n"), host ? "_HostRecv" : "_Recv") @@ -156,7 +156,7 @@ static Node* Recv(Graph* g, const string& tensor_name, } absl::Status EnsureMemoryTypes(const DeviceType& device_type, - const string& device_name, Graph* g) { + const std::string& device_name, Graph* g) { struct Item { const Edge* edge; MemoryType sm; @@ -191,7 +191,7 @@ absl::Status EnsureMemoryTypes(const DeviceType& device_type, Endpoint key{e->src()->id(), e->src_output()}; auto iter = recv_nodes.find(key); if (iter == recv_nodes.end()) { - const string tensor_name = GetTensorName(e); + const std::string tensor_name = GetTensorName(e); Node* send = Send(g, tensor_name, device_name, (item.sm == HOST_MEMORY), e); recv = Recv(g, tensor_name, device_name, (item.dm == HOST_MEMORY), e); diff --git a/tensorflow/core/common_runtime/memory_types.h b/tensorflow/core/common_runtime/memory_types.h index 46a943c0a3836e..bbadfe24e156c8 100644 --- a/tensorflow/core/common_runtime/memory_types.h +++ b/tensorflow/core/common_runtime/memory_types.h @@ -36,7 +36,7 @@ absl::Status ValidateMemoryTypes(const DeviceType& device_type, const Graph* g); // be OK). Otherwise, returns an error and '*g' may be in an // invalidate state and the caller should discard it. absl::Status EnsureMemoryTypes(const DeviceType& device_type, - const string& device_name, Graph* g); + const std::string& device_name, Graph* g); // Get the memory type for 'index'th output of node 'n' in graph 'g', when // running on 'device_type'. diff --git a/tensorflow/core/common_runtime/memory_types_test.cc b/tensorflow/core/common_runtime/memory_types_test.cc index 26f414c14204ce..0be98557679406 100644 --- a/tensorflow/core/common_runtime/memory_types_test.cc +++ b/tensorflow/core/common_runtime/memory_types_test.cc @@ -30,7 +30,7 @@ namespace tensorflow { TEST(MemoryTypeChecker, Int32OK) { Graph* g = new Graph(OpRegistry::Global()); Tensor v(DT_INT32, {}); - v.scalar().setZero(); + v.scalar().setZero(); auto in0 = test::graph::Constant(g, v); auto in1 = test::graph::Constant(g, v); test::graph::Add(g, in0, in1); @@ -45,7 +45,7 @@ TEST(MemoryTypeChecker, Int32OK) { TEST(MemoryTypeChecker, Int32NotOk) { Graph* g = new Graph(OpRegistry::Global()); Tensor v(DT_INT32, {}); - v.scalar().setZero(); + v.scalar().setZero(); auto x = test::graph::Constant(g, v); test::graph::Cast(g, x, DT_FLOAT); TF_EXPECT_OK(ValidateMemoryTypes(DEVICE_CPU, g)); diff --git a/tensorflow/core/common_runtime/next_pluggable_device/c/tf_rendezvous_c_api_test.cc b/tensorflow/core/common_runtime/next_pluggable_device/c/tf_rendezvous_c_api_test.cc index 28f029350da1a2..9570b9407e1574 100644 --- a/tensorflow/core/common_runtime/next_pluggable_device/c/tf_rendezvous_c_api_test.cc +++ b/tensorflow/core/common_runtime/next_pluggable_device/c/tf_rendezvous_c_api_test.cc @@ -58,7 +58,7 @@ Tensor CreateTestTensor() { Tensor t(DT_INT8, TensorShape({10, 20})); for (int64_t a = 0; a < t.shape().dim_size(0); a++) { for (int64_t b = 0; b < t.shape().dim_size(1); b++) { - t.matrix()(a, b) = static_cast((a + 1) * (b + 1)); + t.matrix()(a, b) = static_cast((a + 1) * (b + 1)); } } return t; @@ -68,7 +68,8 @@ class FakeAllocator : public Allocator { public: std::string Name() override { return "fake"; } void* AllocateRaw(size_t alignment, size_t num_bytes) override { - return port::AlignedMalloc(num_bytes, alignment); + return tsl::port::AlignedMalloc(num_bytes, + static_cast(alignment)); } void DeallocateRaw(void* ptr) override { return port::AlignedFree(ptr); } }; @@ -112,8 +113,9 @@ class FakeDeviceManager : public DeviceMgr { bool ContainsDevice(int64_t device_incarnation) const override { return false; } - void ClearContainers(absl::Span containers) const override {} - int NumDeviceType(const string& type) const override { return 0; } + void ClearContainers( + absl::Span containers) const override {} + int NumDeviceType(const std::string& type) const override { return 0; } int NumDevices() const override { return 0; } Device* HostCPU() const override { return nullptr; } @@ -127,7 +129,7 @@ class TestDeviceContext : public DeviceContext { Tensor* device_tensor, StatusCallback done, bool sync_dst_compute) const override { Tensor test_tensor = CreateTestTensor(); - test::ExpectTensorEqual(test_tensor, *cpu_tensor); + test::ExpectTensorEqual(test_tensor, *cpu_tensor); done(absl::OkStatus()); } @@ -191,7 +193,7 @@ TEST(RendezvousCAPI, DeviceToHost) { }); callback_done.WaitForNotification(); Tensor test_tensor = CreateTestTensor(); - test::ExpectTensorEqual(test_tensor, result); + test::ExpectTensorEqual(test_tensor, result); Destroy(thunk); delete thunk; diff --git a/tensorflow/core/common_runtime/next_pluggable_device/next_pluggable_device.cc b/tensorflow/core/common_runtime/next_pluggable_device/next_pluggable_device.cc index 79b1eebbb3c6c7..29c45068316914 100644 --- a/tensorflow/core/common_runtime/next_pluggable_device/next_pluggable_device.cc +++ b/tensorflow/core/common_runtime/next_pluggable_device/next_pluggable_device.cc @@ -44,7 +44,8 @@ namespace tensorflow { // TODO(chuanhao): implement an API to query device memory, and make // memory_limit a parameter instead of hard coding. static DeviceAttributes BuildNextPluggableDeviceAttributes( - const string& name_prefix, const string& device_name, int device_ordinal) { + const std::string& name_prefix, const std::string& device_name, + int device_ordinal) { return Device::BuildDeviceAttributes( absl::StrCat(name_prefix, "/device:", device_name, ":", device_ordinal), DeviceType(device_name), Bytes(16ULL << 30), DeviceLocality(), diff --git a/tensorflow/core/common_runtime/next_pluggable_device/next_pluggable_device.h b/tensorflow/core/common_runtime/next_pluggable_device/next_pluggable_device.h index cb8ecf514101b0..8ad6c2051a87ac 100644 --- a/tensorflow/core/common_runtime/next_pluggable_device/next_pluggable_device.h +++ b/tensorflow/core/common_runtime/next_pluggable_device/next_pluggable_device.h @@ -36,13 +36,13 @@ class NextPluggableDevice : public PjRtBaseDevice { public: struct Options { // The device name's prefix (e.g., "/task:7") - string device_name_prefix; + std::string device_name_prefix; // The name of the device (e.g., "GPU") - string device_name; + std::string device_name; // The name of the compilation device (e.g., "XLA_TPU_JIT"); - string compilation_device_name; + std::string compilation_device_name; // The TfDeviceId. int device_ordinal = -1; diff --git a/tensorflow/core/common_runtime/next_pluggable_device/next_pluggable_device_factory.cc b/tensorflow/core/common_runtime/next_pluggable_device/next_pluggable_device_factory.cc index 857d7f56a43355..f915ecdf47ce24 100644 --- a/tensorflow/core/common_runtime/next_pluggable_device/next_pluggable_device_factory.cc +++ b/tensorflow/core/common_runtime/next_pluggable_device/next_pluggable_device_factory.cc @@ -64,14 +64,14 @@ absl::StatusOr DeviceShapeRepresentation( } // namespace absl::Status NextPluggableDeviceFactory::ListPhysicalDevices( - std::vector* devices) { + std::vector* devices) { TF_Status* c_status = TF_NewStatus(); int32_t device_count = api_->TFNPD_GetDeviceCount(c_status); TF_RETURN_IF_ERROR(StatusFromTF_Status(c_status)); TF_DeleteStatus(c_status); for (int i = 0; i < device_count; ++i) { - const string device_name = + const std::string device_name = absl::StrCat("/physical_device:", device_type_, ":", i); devices->push_back(device_name); } diff --git a/tensorflow/core/common_runtime/next_pluggable_device/next_pluggable_device_factory.h b/tensorflow/core/common_runtime/next_pluggable_device/next_pluggable_device_factory.h index 5ccfb6dd336848..f23e5cd00cd76d 100644 --- a/tensorflow/core/common_runtime/next_pluggable_device/next_pluggable_device_factory.h +++ b/tensorflow/core/common_runtime/next_pluggable_device/next_pluggable_device_factory.h @@ -36,7 +36,7 @@ class NextPluggableDeviceFactory : public DeviceFactory { device_type_(device_type), compilation_device_name_(compilation_device_name) {} - absl::Status ListPhysicalDevices(std::vector* devices) override; + absl::Status ListPhysicalDevices(std::vector* devices) override; absl::Status CreateDevices( const SessionOptions& session_options, const std::string& name_prefix, diff --git a/tensorflow/core/config/BUILD b/tensorflow/core/config/BUILD index f41dc9f2d94a79..52217b6f7891a9 100644 --- a/tensorflow/core/config/BUILD +++ b/tensorflow/core/config/BUILD @@ -21,7 +21,10 @@ cc_library( "flags.h", ], visibility = ["//tensorflow:internal"], - deps = ["//tensorflow/core/platform:stringpiece"], + deps = [ + "//tensorflow/core/platform:stringpiece", + "@com_google_absl//absl/strings:string_view", + ], ) filegroup( @@ -63,6 +66,7 @@ cc_library( "//tensorflow/core/platform:stringpiece", "//tensorflow/core/util:env_var", "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:string_view", ], ) diff --git a/tensorflow/core/config/flags.cc b/tensorflow/core/config/flags.cc index d2d1ea502dfe9e..faf53293eb82d2 100644 --- a/tensorflow/core/config/flags.cc +++ b/tensorflow/core/config/flags.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/core/config/flags.h" #include "absl/strings/ascii.h" +#include "absl/strings/string_view.h" #include "tensorflow/core/platform/stringpiece.h" #include "tensorflow/core/util/env_var.h" diff --git a/tensorflow/core/config/flags.h b/tensorflow/core/config/flags.h index c882cd3939f4af..df4379e6ddb4b9 100644 --- a/tensorflow/core/config/flags.h +++ b/tensorflow/core/config/flags.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_CORE_CONFIG_FLAGS_H_ #define TENSORFLOW_CORE_CONFIG_FLAGS_H_ +#include "absl/strings/string_view.h" #include "tensorflow/core/platform/stringpiece.h" namespace tensorflow { diff --git a/tensorflow/core/data/service/client/data_service_client.cc b/tensorflow/core/data/service/client/data_service_client.cc index 1a79089fbccc0f..e99277b79f8752 100644 --- a/tensorflow/core/data/service/client/data_service_client.cc +++ b/tensorflow/core/data/service/client/data_service_client.cc @@ -226,16 +226,16 @@ TraceMeMetadata DataServiceClient::GetTraceMeMetadata() const { "num_tasks", num_tasks == -1 ? kTraceInfoUnavailable - : strings::Printf("%lld", static_cast(num_tasks)))); + : absl::StrFormat("%lld", static_cast(num_tasks)))); result.push_back(std::make_pair("job_name", params_.job_name)); result.push_back(std::make_pair( "max_outstanding_requests", - strings::Printf( + absl::StrFormat( "%lld", static_cast(params_.max_outstanding_requests)))); if (params_.max_outstanding_requests == model::kAutotune) { result.push_back(std::make_pair( "autotuned_max_outstanding_requests", - strings::Printf("%lld", static_cast( + absl::StrFormat("%lld", static_cast( autotuned_max_outstanding_requests)))); } return result; @@ -295,7 +295,7 @@ void DataServiceClient::TaskThreadManager() TF_LOCKS_EXCLUDED(mu_) { auto cleanup = gtl::MakeCleanup([] { VLOG(1) << "Task thread manager exiting"; }); VLOG(1) << "Starting task thread manager"; - uint64 next_check = Env::Default()->NowMicros(); + uint64_t next_check = Env::Default()->NowMicros(); while (true) { { mutex_lock l(mu_); diff --git a/tensorflow/core/data/service/client/data_service_client.h b/tensorflow/core/data/service/client/data_service_client.h index 7c211d5551c46e..ecaecc841573e5 100644 --- a/tensorflow/core/data/service/client/data_service_client.h +++ b/tensorflow/core/data/service/client/data_service_client.h @@ -48,7 +48,7 @@ namespace data { class DataServiceContext { public: virtual ~DataServiceContext() = default; - virtual std::unique_ptr StartThread(const string& name, + virtual std::unique_ptr StartThread(const std::string& name, std::function fn) = 0; virtual void RecordBufferEnqueue(const std::vector& element) = 0; virtual void RecordBufferDequeue(const std::vector& element) = 0; diff --git a/tensorflow/core/data/service/client/data_service_client_test.cc b/tensorflow/core/data/service/client/data_service_client_test.cc index 9af455d11a0201..0baca60d285a8f 100644 --- a/tensorflow/core/data/service/client/data_service_client_test.cc +++ b/tensorflow/core/data/service/client/data_service_client_test.cc @@ -82,7 +82,7 @@ class TestDataServiceContext : public DataServiceContext { TestDataServiceContext() = default; ~TestDataServiceContext() override = default; - std::unique_ptr StartThread(const string& name, + std::unique_ptr StartThread(const std::string& name, std::function fn) override { return absl::WrapUnique( Env::Default()->StartThread({}, name, std::move(fn))); diff --git a/tensorflow/core/data/service/credentials_factory.cc b/tensorflow/core/data/service/credentials_factory.cc index 721ce5b806e7af..4362800c525137 100644 --- a/tensorflow/core/data/service/credentials_factory.cc +++ b/tensorflow/core/data/service/credentials_factory.cc @@ -58,7 +58,7 @@ absl::Status CredentialsFactory::Get(absl::string_view protocol, return absl::OkStatus(); } - std::vector available_types; + std::vector available_types; for (const auto& factory : credentials_factories()) { available_types.push_back(factory.first); } diff --git a/tensorflow/core/data/service/data_transfer.cc b/tensorflow/core/data/service/data_transfer.cc index 4f45b11d313e31..ee6a0b1c4d3daa 100644 --- a/tensorflow/core/data/service/data_transfer.cc +++ b/tensorflow/core/data/service/data_transfer.cc @@ -128,7 +128,7 @@ absl::Status DataTransferClient::Build( return it->second(config, out); } - std::vector available_names; + std::vector available_names; for (const auto& factory : transfer_client_factories()) { available_names.push_back(factory.first); } diff --git a/tensorflow/core/data/service/dispatcher_client.cc b/tensorflow/core/data/service/dispatcher_client.cc index c06acb3e332ddf..4a3c8a12a31057 100644 --- a/tensorflow/core/data/service/dispatcher_client.cc +++ b/tensorflow/core/data/service/dispatcher_client.cc @@ -55,7 +55,7 @@ absl::Status DataServiceDispatcherClient::Initialize() { TF_RETURN_IF_ERROR( CredentialsFactory::CreateClientCredentials(protocol_, &credentials)); grpc::ChannelArguments args; - args.SetMaxReceiveMessageSize(std::numeric_limits::max()); + args.SetMaxReceiveMessageSize(std::numeric_limits::max()); args.SetInt(GRPC_ARG_USE_LOCAL_SUBCHANNEL_POOL, true); auto channel = grpc::CreateCustomChannel(address_, credentials, args); stub_ = DispatcherService::NewStub(channel); diff --git a/tensorflow/core/data/service/grpc_dispatcher_impl_test.cc b/tensorflow/core/data/service/grpc_dispatcher_impl_test.cc index c04cdf7a718456..6882a6b23e09e3 100644 --- a/tensorflow/core/data/service/grpc_dispatcher_impl_test.cc +++ b/tensorflow/core/data/service/grpc_dispatcher_impl_test.cc @@ -72,7 +72,7 @@ class GrpcDispatcherImplTest : public ::testing::Test { TF_RETURN_IF_ERROR( CredentialsFactory::CreateClientCredentials(kProtocol, &credentials)); ChannelArguments args; - args.SetMaxReceiveMessageSize(std::numeric_limits::max()); + args.SetMaxReceiveMessageSize(std::numeric_limits::max()); args.SetInt(GRPC_ARG_USE_LOCAL_SUBCHANNEL_POOL, true); std::shared_ptr channel = ::grpc::CreateCustomChannel(GetDispatcherAddress(), credentials, args); diff --git a/tensorflow/core/data/service/grpc_worker_impl_test.cc b/tensorflow/core/data/service/grpc_worker_impl_test.cc index 23eb6989c8cb1a..2d7563274bc295 100644 --- a/tensorflow/core/data/service/grpc_worker_impl_test.cc +++ b/tensorflow/core/data/service/grpc_worker_impl_test.cc @@ -83,7 +83,7 @@ class GrpcWorkerImplTest : public ::testing::Test { TF_RETURN_IF_ERROR( CredentialsFactory::CreateClientCredentials(kProtocol, &credentials)); ChannelArguments args; - args.SetMaxReceiveMessageSize(std::numeric_limits::max()); + args.SetMaxReceiveMessageSize(std::numeric_limits::max()); args.SetInt(GRPC_ARG_USE_LOCAL_SUBCHANNEL_POOL, true); std::shared_ptr channel = ::grpc::CreateCustomChannel(GetWorkerAddress(), credentials, args); diff --git a/tensorflow/core/data/service/snapshot/snapshot_manager.h b/tensorflow/core/data/service/snapshot/snapshot_manager.h index 98861523405206..4b2f19b9ca31ac 100644 --- a/tensorflow/core/data/service/snapshot/snapshot_manager.h +++ b/tensorflow/core/data/service/snapshot/snapshot_manager.h @@ -330,7 +330,7 @@ class SnapshotManager { absl::StatusOr> CreateSources( const DatasetDef& dataset_def) const; // Returns the total number of splits. - absl::StatusOr GetSplitsCardinality(); + absl::StatusOr GetSplitsCardinality(); // Resets a source when it runs out of splits, to support repetitions. absl::Status ResetSource(Source& source, int64_t source_index); int64_t num_sources() const TF_EXCLUSIVE_LOCKS_REQUIRED(mu_) { diff --git a/tensorflow/core/distributed_runtime/base_rendezvous_mgr.cc b/tensorflow/core/distributed_runtime/base_rendezvous_mgr.cc index 9f0ef2cf3c2886..495743c8d64c0c 100644 --- a/tensorflow/core/distributed_runtime/base_rendezvous_mgr.cc +++ b/tensorflow/core/distributed_runtime/base_rendezvous_mgr.cc @@ -257,9 +257,10 @@ void BaseRemoteRendezvous::SameWorkerRecvDone( recv_args.alloc_attrs.gpu_compatible()); Allocator* out_allocator = dst_device->GetAllocator(attr); AllocationAttributes allocation_attr; - uint64 safe_alloc_frontier = dst_device->SafeAllocFrontier(0); + uint64_t safe_alloc_frontier = dst_device->SafeAllocFrontier(0); bool sync_dst_compute = (safe_alloc_frontier == 0); - std::function freed_by_func = [dst_device, &safe_alloc_frontier]() { + std::function freed_by_func = [dst_device, + &safe_alloc_frontier]() { safe_alloc_frontier = dst_device->SafeAllocFrontier(safe_alloc_frontier); return safe_alloc_frontier; }; diff --git a/tensorflow/core/distributed_runtime/cancellable_call.h b/tensorflow/core/distributed_runtime/cancellable_call.h index 7311c8e3a44f42..3a2691b7cff22f 100644 --- a/tensorflow/core/distributed_runtime/cancellable_call.h +++ b/tensorflow/core/distributed_runtime/cancellable_call.h @@ -27,8 +27,8 @@ namespace tensorflow { // registration with a CancellationManager. class CancellableCall { public: - CancellableCall(CancellationManager* cancel_mgr, const string& remote_worker, - WorkerCacheInterface* wc) + CancellableCall(CancellationManager* cancel_mgr, + const std::string& remote_worker, WorkerCacheInterface* wc) : is_cancelled_(false), cancel_mgr_(cancel_mgr), remote_worker_(remote_worker), @@ -51,7 +51,7 @@ class CancellableCall { mutex mu_; bool is_cancelled_; CancellationManager* const cancel_mgr_; // Not owned - const string remote_worker_; + const std::string remote_worker_; WorkerCacheInterface* const wc_; // Not owned WorkerInterface* const wi_; // Owned by wc_, must be released. CallOptions opts_; diff --git a/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc b/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc index 966a281c1d2b66..c974bb4c520655 100644 --- a/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc +++ b/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc @@ -39,9 +39,9 @@ absl::Status ClusterFunctionLibraryRuntime::ConstructFunctionGraph( const OpDef& sig, AttrSlice attrs, const FunctionLibraryRuntime::InstantiateOptions& options, const FunctionLibraryDefinition& flib_def, GraphDef* gdef, - std::vector* send_keys, std::vector* recv_keys) { - const string& target = options.target; - const string& func_name = sig.name(); + std::vector* send_keys, std::vector* recv_keys) { + const std::string& target = options.target; + const std::string& func_name = sig.name(); const FunctionDef* func_def = flib_def.Find(sig.name()); if (func_def == nullptr) { return errors::InvalidArgument("Function ", func_name, @@ -90,7 +90,7 @@ absl::Status ClusterFunctionLibraryRuntime::ConstructFunctionGraph( // src_incarnation = 1 works because the transfer is across the same device. // TODO(rohanj): Find the src_incarnation for the remote device and set it. - const string& key = Rendezvous::CreateKey( + const std::string& key = Rendezvous::CreateKey( target, 1 /* src_incarnation */, target, in.name(), FrameAndIter(0, 0)); send_keys->push_back(key); ++i; @@ -140,7 +140,7 @@ absl::Status ClusterFunctionLibraryRuntime::ConstructFunctionGraph( g.AddEdge(function_node, i, output_node, 0); - const string& key = + const std::string& key = Rendezvous::CreateKey(target, 1 /* src_incarnation */, target, out.name(), FrameAndIter(0, 0)); recv_keys->push_back(key); @@ -180,7 +180,7 @@ ClusterFunctionLibraryRuntime::~ClusterFunctionLibraryRuntime() { } void ClusterFunctionLibraryRuntime::Instantiate( - const string& function_name, const FunctionLibraryDefinition& lib_def, + const std::string& function_name, const FunctionLibraryDefinition& lib_def, AttrSlice attrs, const FunctionLibraryRuntime::InstantiateOptions& options, FunctionLibraryRuntime::LocalHandle* handle, FunctionLibraryRuntime::DoneCallback done) { @@ -192,7 +192,7 @@ void ClusterFunctionLibraryRuntime::Instantiate( WorkerInterface* wi = worker_cache->GetOrCreateWorker(target); if (wi == nullptr) { - std::vector workers; + std::vector workers; worker_session_->worker_cache()->ListWorkers(&workers); done(errors::InvalidArgument( "Could not find worker with target: ", target, @@ -202,8 +202,8 @@ void ClusterFunctionLibraryRuntime::Instantiate( // Make RPC and obtain a graph handle. GraphDef gdef; - auto* send_keys = new std::vector; - auto* recv_keys = new std::vector; + auto* send_keys = new std::vector; + auto* recv_keys = new std::vector; auto construct_graph_fn = [&](const FunctionLibraryDefinition* lib_def) { const FunctionDef* fdef = lib_def->Find(function_name); const OpDef& sig = fdef->signature(); @@ -285,7 +285,7 @@ void ClusterFunctionLibraryRuntime::Run( args[i].AsProtoTensorContent(send->mutable_tensor()); i++; } - const std::vector& recv_keys = function_data->recv_keys; + const std::vector& recv_keys = function_data->recv_keys; for (const auto& recv_key : recv_keys) { req->add_recv_key(recv_key); } @@ -308,7 +308,7 @@ void ClusterFunctionLibraryRuntime::Run( if (!local_status->ok()) { return; } - std::map mapped_recvs; + std::map mapped_recvs; for (auto& recv : *resp->mutable_recv()) { mapped_recvs[recv.name()] = recv.mutable_tensor(); } @@ -363,7 +363,7 @@ void ClusterFunctionLibraryRuntime::Run( } void ClusterFunctionLibraryRuntime::CleanUp( - uint64 step_id, FunctionLibraryRuntime::LocalHandle handle, + uint64_t step_id, FunctionLibraryRuntime::LocalHandle handle, FunctionLibraryRuntime::DoneCallback done) { FunctionData* function_data = nullptr; { diff --git a/tensorflow/core/distributed_runtime/cluster_function_library_runtime.h b/tensorflow/core/distributed_runtime/cluster_function_library_runtime.h index a016a5eea418df..2d66854ec8c2ca 100644 --- a/tensorflow/core/distributed_runtime/cluster_function_library_runtime.h +++ b/tensorflow/core/distributed_runtime/cluster_function_library_runtime.h @@ -41,7 +41,7 @@ class ClusterFunctionLibraryRuntime : public DistributedFunctionLibraryRuntime { ~ClusterFunctionLibraryRuntime() override; - void Instantiate(const string& function_name, + void Instantiate(const std::string& function_name, const FunctionLibraryDefinition& lib_def, AttrSlice attrs, const FunctionLibraryRuntime::InstantiateOptions& options, FunctionLibraryRuntime::LocalHandle* handle, @@ -57,7 +57,7 @@ class ClusterFunctionLibraryRuntime : public DistributedFunctionLibraryRuntime { absl::Span args, std::vector* rets, FunctionLibraryRuntime::DoneCallback done) override; - void CleanUp(uint64 step_id, FunctionLibraryRuntime::LocalHandle handle, + void CleanUp(uint64_t step_id, FunctionLibraryRuntime::LocalHandle handle, FunctionLibraryRuntime::DoneCallback done) override; DeviceMgr* remote_device_mgr() const override { return remote_device_mgr_; } @@ -67,7 +67,7 @@ class ClusterFunctionLibraryRuntime : public DistributedFunctionLibraryRuntime { const OpDef& sig, AttrSlice attrs, const FunctionLibraryRuntime::InstantiateOptions& options, const FunctionLibraryDefinition& flib_def, GraphDef* g, - std::vector* send_keys, std::vector* recv_keys); + std::vector* send_keys, std::vector* recv_keys); friend class ClusterFunctionLibraryRuntimeTest; mutable mutex mu_; @@ -77,19 +77,19 @@ class ClusterFunctionLibraryRuntime : public DistributedFunctionLibraryRuntime { DeviceMgr* remote_device_mgr_; // not owned. struct FunctionData { - const string graph_handle; - const string target; + const std::string graph_handle; + const std::string target; // Hold a shared pointer to the underlying worker cache to avoid it being // deleted in potential cluster update. const std::shared_ptr worker_cache; WorkerInterface* wi = nullptr; - const std::vector send_keys; - const std::vector recv_keys; + const std::vector send_keys; + const std::vector recv_keys; - FunctionData(const string& graph_handle, const string& target, + FunctionData(const std::string& graph_handle, const std::string& target, std::shared_ptr worker_cache, - WorkerInterface* wi, const std::vector& send_keys, - const std::vector& recv_keys) + WorkerInterface* wi, const std::vector& send_keys, + const std::vector& recv_keys) : graph_handle(graph_handle), target(target), worker_cache(std::move(worker_cache)), diff --git a/tensorflow/core/distributed_runtime/cluster_function_library_runtime_test.cc b/tensorflow/core/distributed_runtime/cluster_function_library_runtime_test.cc index 40290ef3e4f54e..9be587fb48880c 100644 --- a/tensorflow/core/distributed_runtime/cluster_function_library_runtime_test.cc +++ b/tensorflow/core/distributed_runtime/cluster_function_library_runtime_test.cc @@ -42,7 +42,7 @@ class ClusterFunctionLibraryRuntimeTest : public ::testing::Test { &cluster_)); GrpcChannelSpec spec; - std::map host_ports; + std::map host_ports; int i = 0; for (const auto& target : cluster_->targets("localhost")) { host_ports[i++] = target; @@ -72,12 +72,13 @@ class ClusterFunctionLibraryRuntimeTest : public ::testing::Test { const OpDef& sig, test::function::Attrs attrs, const FunctionLibraryRuntime::InstantiateOptions& options, const FunctionLibraryDefinition& lib_def, GraphDef* g, - std::vector* send_keys, std::vector* recv_keys) { + std::vector* send_keys, + std::vector* recv_keys) { return ClusterFunctionLibraryRuntime::ConstructFunctionGraph( sig, attrs, options, lib_def, g, send_keys, recv_keys); } - void Instantiate(const string& function_name, + void Instantiate(const std::string& function_name, const FunctionLibraryDefinition& lib_def, test::function::Attrs attrs, const FunctionLibraryRuntime::InstantiateOptions& options, @@ -88,8 +89,8 @@ class ClusterFunctionLibraryRuntimeTest : public ::testing::Test { } absl::Status InstantiateAndRun( - const string& function_name, const FunctionLibraryDefinition& lib_def, - test::function::Attrs attrs, + const std::string& function_name, + const FunctionLibraryDefinition& lib_def, test::function::Attrs attrs, const FunctionLibraryRuntime::InstantiateOptions& options, const std::vector& args, std::vector rets) { FunctionLibraryRuntime::LocalHandle handle; @@ -135,7 +136,7 @@ class ClusterFunctionLibraryRuntimeTest : public ::testing::Test { TEST_F(ClusterFunctionLibraryRuntimeTest, ConstructFunctionGraph) { GraphDef actual; - std::vector send_keys, recv_keys; + std::vector send_keys, recv_keys; FunctionDefLibrary proto; *(proto.add_function()) = test::function::Swap(); FunctionLibraryDefinition lib_def(OpRegistry::Global(), proto); @@ -402,10 +403,10 @@ TEST_F(ClusterFunctionLibraryRuntimeTest, DISABLED_InstantiateAndRun) { instantiate_opts.target = "/job:localhost/replica:0/task:1/cpu:0"; Tensor y; - auto x = test::AsTensor({1, 2, 3, 4}); + auto x = test::AsTensor({1, 2, 3, 4}); TF_EXPECT_OK(InstantiateAndRun("XTimesTwoInt32", lib_def, {}, instantiate_opts, {x}, {&y})); - test::ExpectTensorEqual(y, test::AsTensor({2, 4, 6, 8})); + test::ExpectTensorEqual(y, test::AsTensor({2, 4, 6, 8})); } TEST_F(ClusterFunctionLibraryRuntimeTest, diff --git a/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.cc b/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.cc index ab13146b73bbbd..5acf12ccea0f69 100644 --- a/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.cc +++ b/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.cc @@ -34,7 +34,7 @@ class CompleteGroupCall : public CancellableCall { CompleteGroupCall(const CollGroupParams& group, const DeviceAttributes& device, CancellationManager* cancel_mgr, - const string& remote_worker, WorkerCacheInterface* wc) + const std::string& remote_worker, WorkerCacheInterface* wc) : CancellableCall(cancel_mgr, remote_worker, wc) { req_.set_group_key(group.group_key); req_.set_group_size(group.group_size); @@ -55,9 +55,11 @@ class CompleteInstanceCall : public CancellableCall { public: CompleteInstanceCall(const CollGroupParams& group, const CollInstanceParams& instance, - const string& node_name, const string& device_name, - bool is_source, CancellationManager* cancel_mgr, - const string& remote_worker, WorkerCacheInterface* wc) + const std::string& node_name, + const std::string& device_name, bool is_source, + CancellationManager* cancel_mgr, + const std::string& remote_worker, + WorkerCacheInterface* wc) : CancellableCall(cancel_mgr, remote_worker, wc) { req_.set_name(node_name); req_.set_type(instance.type); @@ -91,7 +93,7 @@ CollectiveParamResolverDistributed::CollectiveParamResolverDistributed( const ConfigProto& config, const DeviceMgr* dev_mgr, DeviceResolverDistributed* dev_resolver, NcclCommunicatorInterface* nccl_communicator, - WorkerCacheInterface* worker_cache, const string& task_name) + WorkerCacheInterface* worker_cache, const std::string& task_name) : CollectiveParamResolverLocal(config, dev_mgr, dev_resolver, nccl_communicator, task_name), worker_cache_(worker_cache), @@ -364,8 +366,8 @@ absl::Status CollectiveParamResolverDistributed::UpdateInstanceCache( } void CollectiveParamResolverDistributed::CompleteInstanceDistributed( - const string& device, CollectiveParams* cp, CancellationManager* cancel_mgr, - const StatusCallback& done) { + const std::string& device, CollectiveParams* cp, + CancellationManager* cancel_mgr, const StatusCallback& done) { if (group_leader_.empty()) { // This is the group leader so resolution is local. return CompleteInstanceLocal(device, cp, done); diff --git a/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.h b/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.h index 63006c1253547e..d885fe0bb81a0e 100644 --- a/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.h +++ b/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.h @@ -32,7 +32,7 @@ class CollectiveParamResolverDistributed : public CollectiveParamResolverLocal { const ConfigProto& config, const DeviceMgr* dev_mgr, DeviceResolverDistributed* dev_resolver, NcclCommunicatorInterface* nccl_communicator, - WorkerCacheInterface* worker_cache, const string& task_name); + WorkerCacheInterface* worker_cache, const std::string& task_name); void CompleteParamsAsync(const DeviceAttributes& device, CollectiveParams* cp, CancellationManager* cancel_mgr, @@ -82,13 +82,14 @@ class CollectiveParamResolverDistributed : public CollectiveParamResolverLocal { // Finish populating *cp. Semantics are like those of // CompleteInstanceLocal but will make a remote call to the group // leader if necessary. - void CompleteInstanceDistributed(const string& device, CollectiveParams* cp, + void CompleteInstanceDistributed(const std::string& device, + CollectiveParams* cp, CancellationManager* cancel_mgr, const StatusCallback& done) TF_LOCKS_EXCLUDED(instance_mu_, group_mu_); WorkerCacheInterface* worker_cache_; // Not owned - const string group_leader_; + const std::string group_leader_; CancellationManager abortion_cancel_mgr_; }; diff --git a/tensorflow/core/distributed_runtime/collective_param_resolver_distributed_test.cc b/tensorflow/core/distributed_runtime/collective_param_resolver_distributed_test.cc index 31140bf0755740..2880d722f0efbf 100644 --- a/tensorflow/core/distributed_runtime/collective_param_resolver_distributed_test.cc +++ b/tensorflow/core/distributed_runtime/collective_param_resolver_distributed_test.cc @@ -34,8 +34,8 @@ limitations under the License. namespace tensorflow { namespace { -static std::unique_ptr NewDevice(const string& type, - const string& name) { +static std::unique_ptr NewDevice(const std::string& type, + const std::string& name) { class FakeDevice : public Device { public: explicit FakeDevice(const DeviceAttributes& attr) : Device(nullptr, attr) {} @@ -54,15 +54,16 @@ class FakeCache : public TestWorkerCache { public: // Override the Locality methods to actually pass through to the // worker. - bool GetDeviceLocalityNonBlocking(const string& device, + bool GetDeviceLocalityNonBlocking(const std::string& device, DeviceLocality* locality) override { return false; } - void GetDeviceLocalityAsync(const string& device, DeviceLocality* locality, + void GetDeviceLocalityAsync(const std::string& device, + DeviceLocality* locality, StatusCallback done) override { - string task_name; - string dev_part; + std::string task_name; + std::string dev_part; if (!DeviceNameUtils::SplitDeviceName(device, &task_name, &dev_part)) { done(errors::Internal("failed to parse device name")); return; @@ -94,7 +95,9 @@ class FakeCache : public TestWorkerCache { class FakeNcclCommunicator : public NcclCommunicatorInterface { public: // We only need to define GenerateCommunicatorKey(). - string GenerateCommunicatorKey() override { return "mock-communicator-key"; } + std::string GenerateCommunicatorKey() override { + return "mock-communicator-key"; + } void Enqueue(std::shared_ptr col_ctx, StatusCallback done) override { @@ -114,15 +117,16 @@ class DeviceResDistTest : public ::testing::Test { protected: void DefineWorkers(int num_workers, int num_devices, - const string& device_type, bool nccl) { + const std::string& device_type, bool nccl) { for (int w = 0; w < num_workers; ++w) { - string name = absl::StrCat("/job:worker/replica:0/task:", w); + std::string name = absl::StrCat("/job:worker/replica:0/task:", w); DefineWorker(name, device_type, num_devices, nccl); } } - void DefineWorker(const string& worker_name, const string& device_type, - int num_devices, bool nccl) { + void DefineWorker(const std::string& worker_name, + const std::string& device_type, int num_devices, + bool nccl) { ConfigProto config; config.mutable_experimental()->set_collective_group_leader( "/job:worker/replica:0/task:0"); @@ -136,7 +140,7 @@ class DeviceResDistTest : public ::testing::Test { } device_mgrs_[worker_name] = std::make_unique(std::move(devices)); - std::vector* dv = &dev_by_task_[worker_name]; + std::vector* dv = &dev_by_task_[worker_name]; dv->clear(); for (auto* d : device_mgrs_[worker_name]->ListDevices()) { dv->push_back(d->name()); @@ -160,14 +164,14 @@ class DeviceResDistTest : public ::testing::Test { } void DefineCollectiveParams(int num_workers, int num_devices, - const string& device_type, + const std::string& device_type, CollectiveType coll_type = REDUCTION_COLLECTIVE, int source_rank = 0) { for (int wi = 0; wi < num_workers; ++wi) { - string task_name = absl::StrCat("/job:worker/replica:0/task:", wi); + std::string task_name = absl::StrCat("/job:worker/replica:0/task:", wi); for (int di = 0; di < num_devices; ++di) { int idx = wi * num_devices + di; - string device_name = + std::string device_name = strings::StrCat(task_name, "/device:", device_type, ":", di); cp_[device_name] = CreateCollectiveParams(num_workers, num_devices, device_type, @@ -177,7 +181,7 @@ class DeviceResDistTest : public ::testing::Test { } CollectiveParams* CreateCollectiveParams(int num_workers, int num_devices, - const string& device_type, + const std::string& device_type, CollectiveType coll_type, bool is_source) { const int kGroupKey = 5; @@ -203,16 +207,16 @@ class DeviceResDistTest : public ::testing::Test { } int group_size = num_workers * num_devices; for (int wi = 0; wi < num_workers; ++wi) { - string task_name = absl::StrCat("/job:worker/replica:0/task:", wi); + std::string task_name = absl::StrCat("/job:worker/replica:0/task:", wi); for (int di = 0; di < num_devices; ++di) { - string device_name = absl::StrCat(task_name, "/device:CPU:", di); + std::string device_name = absl::StrCat(task_name, "/device:CPU:", di); IssueRequest(task_name, device_name, group_size); } } } - void IssueRequest(const string& task_name, const string& device_name, - int group_size) { + void IssueRequest(const std::string& task_name, + const std::string& device_name, int group_size) { Device* device = nullptr; TF_CHECK_OK(device_mgrs_[task_name]->LookupDevice(device_name, &device)); CollectiveParams* cp = cp_[device_name]; @@ -243,11 +247,11 @@ class DeviceResDistTest : public ::testing::Test { // Verify that all cp_ values get the same set of task and device // names, with unique default_rank in the expected order. const int dev_count = num_workers * num_devices; - string dev0 = "/job:worker/replica:0/task:0/device:CPU:0"; + std::string dev0 = "/job:worker/replica:0/task:0/device:CPU:0"; for (int wi = 0; wi < num_workers; ++wi) { - string task_name = absl::StrCat("/job:worker/replica:0/task:", wi); + std::string task_name = absl::StrCat("/job:worker/replica:0/task:", wi); for (int di = 0; di < num_devices; ++di) { - string device_name = absl::StrCat(task_name, "/device:CPU:", di); + std::string device_name = absl::StrCat(task_name, "/device:CPU:", di); int idx = wi * num_devices + di; TF_ASSERT_OK(status_[device_name]); EXPECT_EQ(cp_[device_name]->default_rank, idx); @@ -270,7 +274,8 @@ class DeviceResDistTest : public ::testing::Test { } } - void ValidateDeviceResolver(const CollectiveParams& cp, const string& task) { + void ValidateDeviceResolver(const CollectiveParams& cp, + const std::string& task) { for (const CollGroupMember& member : cp.group.members) { DeviceAttributes attributes; TF_ASSERT_OK(dev_resolvers_[task]->GetDeviceAttributes( @@ -279,14 +284,14 @@ class DeviceResDistTest : public ::testing::Test { } void RestartWorker(int worker_idx, int num_workers, int num_devices, - const string& device_type, bool nccl, + const std::string& device_type, bool nccl, CollectiveType coll_type = REDUCTION_COLLECTIVE, bool is_source = false) { - string worker_name = + std::string worker_name = absl::StrCat("/job:worker/replica:0/task:", worker_idx); DefineWorker(worker_name, device_type, num_devices, nccl); for (int i = 0; i < num_devices; ++i) { - string device_name = + std::string device_name = strings::StrCat(worker_name, "/device:", device_type, ":", i); if (cp_.find(device_name) != cp_.end()) { cp_[device_name]->Unref(); @@ -301,18 +306,18 @@ class DeviceResDistTest : public ::testing::Test { FakeNcclCommunicator nccl_communicator_; CancellationManager cm_; // Below are keyed by task names. - absl::flat_hash_map> device_mgrs_; - absl::flat_hash_map> + absl::flat_hash_map> device_mgrs_; + absl::flat_hash_map> dev_resolvers_; - absl::flat_hash_map> cp_resolvers_; - absl::flat_hash_map> dev_by_task_; - absl::flat_hash_map> worker_envs_; - absl::flat_hash_map> workers_; + absl::flat_hash_map> dev_by_task_; + absl::flat_hash_map> worker_envs_; + absl::flat_hash_map> workers_; // Below are keyed by device names; - absl::flat_hash_map cp_; - absl::flat_hash_map status_; + absl::flat_hash_map cp_; + absl::flat_hash_map status_; mutex mu_; int num_done_ TF_GUARDED_BY(mu_); condition_variable done_; @@ -343,8 +348,8 @@ TEST_F(DeviceResDistTest, DifferentIncarnation) { DefineCollectiveParams(num_workers, num_devices, "CPU"); IssueRequests(num_workers, num_devices); RestartWorker(1, num_workers, num_devices, "CPU", /*nccl*/ false); - const string task_name = "/job:worker/replica:0/task:1"; - const string device_name = absl::StrCat(task_name, "/device:CPU:0"); + const std::string task_name = "/job:worker/replica:0/task:1"; + const std::string device_name = absl::StrCat(task_name, "/device:CPU:0"); IssueRequest(task_name, device_name, num_workers * num_devices); EXPECT_TRUE(absl::IsFailedPrecondition(status_[device_name])); } diff --git a/tensorflow/core/distributed_runtime/collective_rma_distributed.cc b/tensorflow/core/distributed_runtime/collective_rma_distributed.cc index 1b4ba6296f4978..afab5707e58e4e 100644 --- a/tensorflow/core/distributed_runtime/collective_rma_distributed.cc +++ b/tensorflow/core/distributed_runtime/collective_rma_distributed.cc @@ -39,9 +39,9 @@ namespace { class RecvBufCall : public CancellableCall { public: - RecvBufCall(int64_t step_id, const string& peer_device, - const string& peer_task, const string& key, Device* to_device, - DeviceContext* to_device_ctx, + RecvBufCall(int64_t step_id, const std::string& peer_device, + const std::string& peer_task, const std::string& key, + Device* to_device, DeviceContext* to_device_ctx, const AllocatorAttributes& to_alloc_attr, Tensor* to_tensor, const DeviceLocality& client_locality, const DeviceAttributes& server_attributes, @@ -107,11 +107,12 @@ absl::Status PopulateTensorFromResponse(const RecvBufResponse& response, } // namespace void CollectiveRemoteAccessDistributed::RecvFromPeer( - const string& peer_device, const string& peer_task, bool peer_is_local, - const string& key, Device* to_device, DeviceContext* to_device_ctx, - const AllocatorAttributes& to_alloc_attr, Tensor* to_tensor, - const DeviceLocality& client_locality, int dev_to_dev_stream_index, - CancellationManager* cancellation_manager, const StatusCallback& done) { + const std::string& peer_device, const std::string& peer_task, + bool peer_is_local, const std::string& key, Device* to_device, + DeviceContext* to_device_ctx, const AllocatorAttributes& to_alloc_attr, + Tensor* to_tensor, const DeviceLocality& client_locality, + int dev_to_dev_stream_index, CancellationManager* cancellation_manager, + const StatusCallback& done) { if (peer_is_local) { CollectiveRemoteAccessLocal::RecvFromPeer( peer_device, peer_task, peer_is_local, key, to_device, to_device_ctx, @@ -232,7 +233,7 @@ void CollectiveRemoteAccessDistributed::RecvFromPeer( } void CollectiveRemoteAccessDistributed::CheckPeerHealth( - const string& peer_task, int64_t timeout_in_ms, + const std::string& peer_task, int64_t timeout_in_ms, const StatusCallback& done) { if (peer_task == task_name_) { // Fast path if the peer is the worker itself. @@ -265,7 +266,7 @@ void CollectiveRemoteAccessDistributed::CheckPeerHealth( s = dev_resolver_->GetAllDeviceAttributes(peer_task, &cached_attrs); } if (s.ok()) { - absl::flat_hash_set remote_incarnations; + absl::flat_hash_set remote_incarnations; for (const DeviceAttributes& da : resp->device_attributes()) { remote_incarnations.insert(da.incarnation()); } diff --git a/tensorflow/core/distributed_runtime/collective_rma_distributed.h b/tensorflow/core/distributed_runtime/collective_rma_distributed.h index 22d4d6f5a119e6..4557e9b36ac206 100644 --- a/tensorflow/core/distributed_runtime/collective_rma_distributed.h +++ b/tensorflow/core/distributed_runtime/collective_rma_distributed.h @@ -29,7 +29,8 @@ class CollectiveRemoteAccessDistributed : public CollectiveRemoteAccessLocal { CollectiveRemoteAccessDistributed( const DeviceMgr* dev_mgr, DeviceResolverInterface* dev_resolver, std::shared_ptr work_queue, - WorkerCacheInterface* worker_cache, int64_t step_id, string task_name) + WorkerCacheInterface* worker_cache, int64_t step_id, + std::string task_name) : CollectiveRemoteAccessLocal(dev_mgr, dev_resolver, step_id), worker_cache_(worker_cache), work_queue_(std::move(work_queue)), @@ -37,8 +38,9 @@ class CollectiveRemoteAccessDistributed : public CollectiveRemoteAccessLocal { ~CollectiveRemoteAccessDistributed() override {} - void RecvFromPeer(const string& peer_device, const string& peer_task, - bool peer_is_local, const string& key, Device* to_device, + void RecvFromPeer(const std::string& peer_device, + const std::string& peer_task, bool peer_is_local, + const std::string& key, Device* to_device, DeviceContext* to_device_ctx, const AllocatorAttributes& to_alloc_attr, Tensor* to_tensor, const DeviceLocality& client_locality, @@ -46,7 +48,7 @@ class CollectiveRemoteAccessDistributed : public CollectiveRemoteAccessLocal { CancellationManager* cancellation_manager, const StatusCallback& done) override; - void CheckPeerHealth(const string& peer_task, int64_t timeout_in_ms, + void CheckPeerHealth(const std::string& peer_task, int64_t timeout_in_ms, const StatusCallback& done) override; void StartAbort(const absl::Status& s) override; @@ -57,7 +59,7 @@ class CollectiveRemoteAccessDistributed : public CollectiveRemoteAccessLocal { // `CollectiveExecutorMgr`. std::shared_ptr work_queue_; CancellationManager abortion_cancel_mgr_; - string task_name_; + std::string task_name_; }; } // namespace tensorflow diff --git a/tensorflow/core/distributed_runtime/collective_rma_distributed_test.cc b/tensorflow/core/distributed_runtime/collective_rma_distributed_test.cc index a2ec3b1aff2834..4d626cb9f49a9c 100644 --- a/tensorflow/core/distributed_runtime/collective_rma_distributed_test.cc +++ b/tensorflow/core/distributed_runtime/collective_rma_distributed_test.cc @@ -50,14 +50,16 @@ namespace { class FakeAllocator : public Allocator { public: - string Name() override { return "fake"; } + std::string Name() override { return "fake"; } void* AllocateRaw(size_t alignment, size_t num_bytes) override { - return port::AlignedMalloc(num_bytes, alignment); + return tsl::port::AlignedMalloc(num_bytes, + static_cast(alignment)); } void DeallocateRaw(void* ptr) override { return port::AlignedFree(ptr); } }; -static std::unique_ptr NewDevice(const string& type, const string& name, +static std::unique_ptr NewDevice(const std::string& type, + const std::string& name, Allocator* allocator) { class FakeDevice : public Device { public: @@ -81,7 +83,7 @@ static int64_t kStepId = 123; class FakeWorker : public TestWorkerInterface { public: - FakeWorker(const string& name, DeviceMgr* dev_mgr, + FakeWorker(const std::string& name, DeviceMgr* dev_mgr, DeviceResolverDistributed* dres, bool is_failed, bool set_tensor_in_extra) : name_(name), @@ -144,7 +146,7 @@ class FakeWorker : public TestWorkerInterface { // Since this is not really RDMA into pre-allocated memory send // the bytes in the response. RecvBufRespExtra extra; - extra.add_tensor_content(string( + extra.add_tensor_content(std::string( reinterpret_cast(DMAHelper::base(h->prod_value)), num_bytes)); response->mutable_transport_options()->PackFrom(extra); @@ -164,7 +166,7 @@ class FakeWorker : public TestWorkerInterface { } private: - string name_; + std::string name_; DeviceMgr* device_mgr_; DeviceResolverDistributed* device_resolver_; BufRendezvous buf_rendezvous_; @@ -176,15 +178,16 @@ class FakeCache : public TestWorkerCache { public: // Override the Locality methods to actually pass through to the // worker. - bool GetDeviceLocalityNonBlocking(const string& device, + bool GetDeviceLocalityNonBlocking(const std::string& device, DeviceLocality* locality) override { return false; } - void GetDeviceLocalityAsync(const string& device, DeviceLocality* locality, + void GetDeviceLocalityAsync(const std::string& device, + DeviceLocality* locality, StatusCallback done) override { - string task_name; - string dev_part; + std::string task_name; + std::string dev_part; if (!DeviceNameUtils::SplitDeviceName(device, &task_name, &dev_part)) { done(errors::Internal("failed to parse device name")); return; @@ -246,10 +249,10 @@ class CollRMADistTest void SetUp() override { const int num_workers = 2; const int num_devices = 1; - string device_type = "CPU"; - string dev0_worker_name; + std::string device_type = "CPU"; + std::string dev0_worker_name; for (int w = 0; w < num_workers; ++w) { - string name = absl::StrCat("/job:worker/replica:0/task:", w); + std::string name = absl::StrCat("/job:worker/replica:0/task:", w); if (w == 0) { dev0_worker_name = name; } @@ -288,8 +291,9 @@ class CollRMADistTest } } - void DefineWorker(const string& worker_name, const string& device_type, - int num_devices, bool is_failed = false) { + void DefineWorker(const std::string& worker_name, + const std::string& device_type, int num_devices, + bool is_failed = false) { std::vector> devices; for (int i = 0; i < num_devices; ++i) { devices.push_back(NewDevice( @@ -316,8 +320,9 @@ class CollRMADistTest wc_.AddWorker(worker_name, fw); } - void RestartWorker(const string& worker_name, const string& device_type, - int num_devices, bool is_failed = false) { + void RestartWorker(const std::string& worker_name, + const std::string& device_type, int num_devices, + bool is_failed = false) { auto it = dev_resolvers_.find(worker_name); if (it != dev_resolvers_.end()) { delete it->second; @@ -354,8 +359,8 @@ class CollRMADistTest FakeCache wc_; CancellationManager cm_; std::vector device_mgrs_; - std::unordered_map dev_resolvers_; - std::unordered_map> dev_by_task_; + std::unordered_map dev_resolvers_; + std::unordered_map> dev_by_task_; std::shared_ptr work_queue_; std::vector workers_; std::unique_ptr rma_; @@ -379,7 +384,7 @@ TEST_P(CollRMADistTest, ProdFirstOK) { absl::Status consumer_status; absl::Status producer_status; FakeWorker* wi = workers_[1]; - const string kBufKey = "fake_buf_key"; + const std::string kBufKey = "fake_buf_key"; wi->buf_rendezvous()->ProvideBuf( kBufKey, nullptr /*device*/, nullptr /*dev_ctx*/, &expected_value_, AllocatorAttributes(), @@ -389,7 +394,7 @@ TEST_P(CollRMADistTest, ProdFirstOK) { }, nullptr /*cancellation_manager*/); Device* dst_device = nullptr; - string dev_name = "CPU:0"; + std::string dev_name = "CPU:0"; TF_EXPECT_OK(device_mgrs_[0]->LookupDevice(dev_name, &dst_device)); DeviceContext* to_device_ctx = nullptr; MaybeSetGPUDevice(dst_device); @@ -418,9 +423,9 @@ TEST_P(CollRMADistTest, ConsFirstOK) { absl::Status consumer_status; absl::Status producer_status; FakeWorker* wi = workers_[1]; - const string kBufKey = "fake_buf_key"; + const std::string kBufKey = "fake_buf_key"; Device* dst_device = nullptr; - string dev_name = "CPU:0"; + std::string dev_name = "CPU:0"; TF_EXPECT_OK(device_mgrs_[0]->LookupDevice(dev_name, &dst_device)); MaybeSetGPUDevice(dst_device); DeviceContext* to_device_ctx = nullptr; @@ -454,9 +459,9 @@ TEST_P(CollRMADistTest, ConsFirstAbort) { ResolveDeviceAttributes(); absl::Notification consumer_note; absl::Status consumer_status; - const string kBufKey = "fake_buf_key"; + const std::string kBufKey = "fake_buf_key"; Device* dst_device = nullptr; - string dev_name = "CPU:0"; + std::string dev_name = "CPU:0"; TF_EXPECT_OK(device_mgrs_[0]->LookupDevice(dev_name, &dst_device)); MaybeSetGPUDevice(dst_device); DeviceContext* to_device_ctx = nullptr; @@ -483,7 +488,7 @@ TEST_P(CollRMADistTest, ResponseTooLarge) { absl::Status consumer_status; absl::Status producer_status; FakeWorker* wi = workers_[1]; - const string kBufKey = "fake_buf_key"; + const std::string kBufKey = "fake_buf_key"; wi->buf_rendezvous()->ProvideBuf( kBufKey, nullptr /*device*/, nullptr /*dev_ctx*/, &large_response_, AllocatorAttributes(), @@ -493,7 +498,7 @@ TEST_P(CollRMADistTest, ResponseTooLarge) { }, nullptr /*cancellation_manager*/); Device* dst_device = nullptr; - string dev_name = "CPU:0"; + std::string dev_name = "CPU:0"; TF_EXPECT_OK(device_mgrs_[0]->LookupDevice(dev_name, &dst_device)); DeviceContext* to_device_ctx = nullptr; MaybeSetGPUDevice(dst_device); @@ -523,9 +528,9 @@ TEST_P(CollRMADistTest, WorkerRestart) { absl::Status consumer_status; absl::Status producer_status; FakeWorker* wi = workers_[1]; - const string buf_key = "fake_buf_key"; + const std::string buf_key = "fake_buf_key"; Device* dst_device = nullptr; - string dev_name = "CPU:0"; + std::string dev_name = "CPU:0"; TF_EXPECT_OK(device_mgrs_[0]->LookupDevice(dev_name, &dst_device)); MaybeSetGPUDevice(dst_device); DeviceContext* to_device_ctx = nullptr; diff --git a/tensorflow/core/distributed_runtime/device_resolver_distributed.cc b/tensorflow/core/distributed_runtime/device_resolver_distributed.cc index f0f8c50b2fd50a..3de97cc08726ff 100644 --- a/tensorflow/core/distributed_runtime/device_resolver_distributed.cc +++ b/tensorflow/core/distributed_runtime/device_resolver_distributed.cc @@ -28,7 +28,7 @@ DeviceResolverDistributed::DeviceResolverDistributed(const DeviceMgr* dev_mgr) { } absl::Status DeviceResolverDistributed::GetDeviceAttributes( - const string& device, DeviceAttributes* attributes) { + const std::string& device, DeviceAttributes* attributes) { mutex_lock l(mu_); auto it = attr_table_.find(device); if (it == attr_table_.end()) { @@ -39,11 +39,11 @@ absl::Status DeviceResolverDistributed::GetDeviceAttributes( } absl::Status DeviceResolverDistributed::GetAllDeviceAttributes( - const string& task, std::vector* attributes) { + const std::string& task, std::vector* attributes) { mutex_lock l(mu_); attributes->clear(); for (const auto& it : attr_table_) { - const string& device_name = it.first; + const std::string& device_name = it.first; if (DeviceNameUtils::IsSameAddressSpace(task, device_name)) { attributes->push_back(it.second); } diff --git a/tensorflow/core/distributed_runtime/device_resolver_distributed.h b/tensorflow/core/distributed_runtime/device_resolver_distributed.h index b46c288cb3a456..3bf6cfa813fe2f 100644 --- a/tensorflow/core/distributed_runtime/device_resolver_distributed.h +++ b/tensorflow/core/distributed_runtime/device_resolver_distributed.h @@ -31,19 +31,21 @@ class DeviceResolverDistributed : public DeviceResolverInterface { public: explicit DeviceResolverDistributed(const DeviceMgr* dev_mgr); - absl::Status GetDeviceAttributes(const string& device, + absl::Status GetDeviceAttributes(const std::string& device, DeviceAttributes* attributes) override; absl::Status GetAllDeviceAttributes( - const string& task, std::vector* attributes) override; + const std::string& task, + std::vector* attributes) override; absl::Status UpdateDeviceAttributes( const std::vector& attributes) override; protected: - const string task_name_; + const std::string task_name_; mutex mu_; - absl::flat_hash_map attr_table_ TF_GUARDED_BY(mu_); + absl::flat_hash_map attr_table_ + TF_GUARDED_BY(mu_); }; } // namespace tensorflow diff --git a/tensorflow/core/distributed_runtime/device_resolver_distributed_test.cc b/tensorflow/core/distributed_runtime/device_resolver_distributed_test.cc index 0c2bdba1da59d4..8a3245ce2ee3e5 100644 --- a/tensorflow/core/distributed_runtime/device_resolver_distributed_test.cc +++ b/tensorflow/core/distributed_runtime/device_resolver_distributed_test.cc @@ -34,7 +34,8 @@ using ::testing::UnorderedElementsAre; // Create a fake 'Device' whose only interesting attribute is a non-default // DeviceLocality and incarnation. -std::unique_ptr NewDevice(const string& type, const string& name) { +std::unique_ptr NewDevice(const std::string& type, + const std::string& name) { class FakeDevice : public Device { public: explicit FakeDevice(const DeviceAttributes& attr) : Device(nullptr, attr) {} diff --git a/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.cc b/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.cc index 5688c30275eb2e..f62268f3a40d3f 100644 --- a/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.cc +++ b/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.cc @@ -54,7 +54,7 @@ void StripDefaultAttributesInRegisterFunctionOp( } // namespace void EagerClusterFunctionLibraryRuntime::Instantiate( - const string& function_name, const FunctionLibraryDefinition& lib_def, + const std::string& function_name, const FunctionLibraryDefinition& lib_def, AttrSlice attrs, const FunctionLibraryRuntime::InstantiateOptions& options, FunctionLibraryRuntime::LocalHandle* handle, FunctionLibraryRuntime::DoneCallback done) { @@ -281,7 +281,7 @@ void EagerClusterFunctionLibraryRuntime::Run( } void EagerClusterFunctionLibraryRuntime::CleanUp( - uint64 step_id, FunctionLibraryRuntime::LocalHandle handle, + uint64_t step_id, FunctionLibraryRuntime::LocalHandle handle, FunctionLibraryRuntime::DoneCallback done) { FunctionData* function_data = nullptr; { @@ -312,7 +312,8 @@ void EagerClusterFunctionLibraryRuntime::CleanUp( } DistributedFunctionLibraryRuntime* CreateClusterFLR( - const uint64 context_id, EagerContext* ctx, WorkerSession* worker_session) { + const uint64_t context_id, EagerContext* ctx, + WorkerSession* worker_session) { return new EagerClusterFunctionLibraryRuntime( context_id, ctx, worker_session->remote_device_mgr()); } diff --git a/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.h b/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.h index 58af5ed93ae8ac..6fb1fc280f0638 100644 --- a/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.h +++ b/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.h @@ -37,7 +37,8 @@ namespace eager { class EagerClusterFunctionLibraryRuntime : public DistributedFunctionLibraryRuntime { public: - EagerClusterFunctionLibraryRuntime(const uint64 context_id, EagerContext* ctx, + EagerClusterFunctionLibraryRuntime(const uint64_t context_id, + EagerContext* ctx, DeviceMgr* remote_device_mgr) : context_id_(context_id), ctx_(ctx), @@ -49,7 +50,7 @@ class EagerClusterFunctionLibraryRuntime // on the remote target specified in `options.target`. This should be // triggered as part of instantiating a multi-device function in // ProcessFunctionLibraryRuntime. - void Instantiate(const string& function_name, + void Instantiate(const std::string& function_name, const FunctionLibraryDefinition& lib_def, AttrSlice attrs, const FunctionLibraryRuntime::InstantiateOptions& options, FunctionLibraryRuntime::LocalHandle* handle, @@ -75,23 +76,23 @@ class EagerClusterFunctionLibraryRuntime absl::Span args, std::vector* rets, FunctionLibraryRuntime::DoneCallback done) override; - void CleanUp(uint64 step_id, FunctionLibraryRuntime::LocalHandle handle, + void CleanUp(uint64_t step_id, FunctionLibraryRuntime::LocalHandle handle, FunctionLibraryRuntime::DoneCallback done) override; DeviceMgr* remote_device_mgr() const override { return remote_device_mgr_; } private: - const uint64 context_id_; + const uint64_t context_id_; EagerContext* ctx_; DeviceMgr* remote_device_mgr_; // not owned. struct FunctionData { - const string target; + const std::string target; const absl::optional> ret_indices; core::RefCountPtr eager_client; std::unique_ptr op; - FunctionData(const string& target, + FunctionData(const std::string& target, const absl::optional>& ret_indices, EagerClient* eager_client, std::unique_ptr op) : target(target), @@ -107,7 +108,8 @@ class EagerClusterFunctionLibraryRuntime }; DistributedFunctionLibraryRuntime* CreateClusterFLR( - const uint64 context_id, EagerContext* ctx, WorkerSession* worker_session); + const uint64_t context_id, EagerContext* ctx, + WorkerSession* worker_session); } // namespace eager } // namespace tensorflow diff --git a/tensorflow/core/distributed_runtime/eager/destroy_tensor_handle_node.h b/tensorflow/core/distributed_runtime/eager/destroy_tensor_handle_node.h index ade7260cc9fb74..a0991dc601be4e 100644 --- a/tensorflow/core/distributed_runtime/eager/destroy_tensor_handle_node.h +++ b/tensorflow/core/distributed_runtime/eager/destroy_tensor_handle_node.h @@ -71,8 +71,8 @@ class DestroyTensorHandleNode : public tensorflow::AsyncEagerNode { // Remote node deletions are best effort bool Fatal() const override { return false; } - string DebugString() const override { - string out = "[DestroyTensorHandleNode]"; + std::string DebugString() const override { + std::string out = "[DestroyTensorHandleNode]"; absl::StrAppend(&out, " request: ", request_->DebugString()); return out; } @@ -80,7 +80,7 @@ class DestroyTensorHandleNode : public tensorflow::AsyncEagerNode { private: std::unique_ptr request_; core::RefCountPtr eager_client_; - const string remote_task_; + const std::string remote_task_; bool ready_; }; diff --git a/tensorflow/core/distributed_runtime/eager/eager_client.h b/tensorflow/core/distributed_runtime/eager/eager_client.h index 6fc956014ab666..a2a3d596bff10a 100644 --- a/tensorflow/core/distributed_runtime/eager/eager_client.h +++ b/tensorflow/core/distributed_runtime/eager/eager_client.h @@ -92,7 +92,7 @@ class EagerClientCache { // increment the refcount of the client. The reference ownership is // transferred to the caller, and the unref should automatically happen when // destructing the RefCountPtr object from the caller's side. - virtual absl::Status GetClient(const string& target, + virtual absl::Status GetClient(const std::string& target, core::RefCountPtr* client) = 0; }; diff --git a/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc b/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc index 2bb45a8ed53d67..abae4bdce1d23a 100644 --- a/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc +++ b/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc @@ -59,8 +59,9 @@ namespace eager { namespace { absl::Status GetNumRetvals( - FunctionLibraryDefinition* func_lib_def, const string& op_name, - const google::protobuf::Map& attrs, int* num_retvals) { + FunctionLibraryDefinition* func_lib_def, const std::string& op_name, + const google::protobuf::Map& attrs, + int* num_retvals) { const tensorflow::OpRegistrationData* op_reg_data = nullptr; auto status = tensorflow::OpRegistry::Global()->LookUp(op_name, &op_reg_data); if (absl::IsNotFound(status)) { @@ -189,10 +190,10 @@ absl::Status TensorHandleShape(TensorHandle* handle, TensorShapeProto* proto) { absl::Status AddOpRetvalsToResponse( EagerContext* eager_context, int op_id, int num_retvals, - const std::vector& output_nums, TensorHandle** retvals, + const std::vector& output_nums, TensorHandle** retvals, std::function add_tensor_proto_fn, std::function add_shape_proto_fn, - std::function add_device_fn = nullptr) { + std::function add_device_fn = nullptr) { // retvals hold references to the allocated output tensor handles. If errors // happen with adding some results to the response, aggregate the status in sg // instead of directly returning the error, to make sure unref or ownership @@ -291,7 +292,7 @@ absl::Status EagerServiceImpl::CreateContext( TF_RETURN_IF_ERROR(env_->session_mgr->DeleteAllSessions()); // Cleanup existing contexts if any. - std::unordered_map tmp_contexts; + std::unordered_map tmp_contexts; { mutex_lock l(contexts_mu_); if (!contexts_.empty()) { @@ -372,7 +373,7 @@ absl::Status EagerServiceImpl::CreateContext( // case ctx will be deleted by this unref. core::ScopedUnref unref_ctx(ctx); - std::vector remote_workers; + std::vector remote_workers; worker_session->worker_cache()->ListWorkers(&remote_workers); remote_workers.erase(std::remove(remote_workers.begin(), remote_workers.end(), worker_session->worker_name()), @@ -500,7 +501,7 @@ absl::Status EagerServiceImpl::UpdateContext( const tensorflow::DeviceMgr* device_mgr = worker_session->device_mgr(); - std::vector remote_workers; + std::vector remote_workers; worker_session->worker_cache()->ListWorkers(&remote_workers); remote_workers.erase(std::remove(remote_workers.begin(), remote_workers.end(), worker_session->worker_name()), @@ -508,7 +509,7 @@ absl::Status EagerServiceImpl::UpdateContext( VLOG(1) << "On existing server " << worker_session->worker_name() << " updating remote workers"; if (VLOG_IS_ON(2)) { - for (const string& rw : remote_workers) { + for (const std::string& rw : remote_workers) { VLOG(2) << "Remote worker " << rw; } } @@ -546,8 +547,8 @@ absl::Status EagerServiceImpl::UpdateContext( return absl::OkStatus(); } -absl::Status EagerServiceImpl::CreateMasterContext( - const tensorflow::uint64 context_id, EagerContext* context) { +absl::Status EagerServiceImpl::CreateMasterContext(const uint64_t context_id, + EagerContext* context) { { mutex_lock l(contexts_mu_); auto iter = contexts_.find(context_id); @@ -616,7 +617,7 @@ void EagerServiceImpl::RunComponentFunction( auto* retvals = new absl::FixedArray(*num_retvals); VLOG(3) << "ServerContext: Calling EagerLocalExecuteAsync for op " << operation.id(); - std::vector output_nums; + std::vector output_nums; for (const int32_t output_num : request->output_num()) { output_nums.push_back(output_num); } @@ -676,7 +677,7 @@ absl::Status EagerServiceImpl::ExecuteOp(CallOptions* call_opts, num_retvals), &num_retvals)); - std::function add_device_fn = nullptr; + std::function add_device_fn = nullptr; // Send the output devices of a function back to let a client know where the // outputs are. For a primitive op, an output devics is the op device which is // known on a client. @@ -694,7 +695,7 @@ absl::Status EagerServiceImpl::ExecuteOp(CallOptions* call_opts, absl::Status EagerServiceImpl::Enqueue(CallOptions* call_opts, const EnqueueRequest* request, EnqueueResponse* response, - uint64 stream_id) { + uint64_t stream_id) { tsl::profiler::TraceMe activity( [&] { return absl::StrCat( @@ -901,12 +902,12 @@ absl::Status EagerServiceImpl::SendPackedHandle( } absl::Status EagerServiceImpl::GetServerContext( - uint64 context_id, ServerContext** server_context) { + uint64_t context_id, ServerContext** server_context) { tf_shared_lock l(contexts_mu_); auto iter = contexts_.find(context_id); if (iter == contexts_.end()) { *server_context = nullptr; - return errors::Aborted(strings::Printf( + return errors::Aborted(absl::StrFormat( "Unable to find a context_id matching the specified one " "(%llu). Perhaps the worker was restarted, or the context was GC'd?", static_cast(context_id))); diff --git a/tensorflow/core/distributed_runtime/eager/eager_service_impl.h b/tensorflow/core/distributed_runtime/eager/eager_service_impl.h index 329f60cf583ef7..90d49cc7a64e19 100644 --- a/tensorflow/core/distributed_runtime/eager/eager_service_impl.h +++ b/tensorflow/core/distributed_runtime/eager/eager_service_impl.h @@ -83,15 +83,15 @@ class EagerServiceImpl { UpdateContextResponse* response); // Create a ServerContext for master eager context. - absl::Status CreateMasterContext(const tensorflow::uint64 context_id, + absl::Status CreateMasterContext(const uint64_t context_id, EagerContext* context); - static constexpr uint64 kInvalidStreamId = 0; + static constexpr uint64_t kInvalidStreamId = 0; // Used by both Enqueue and StreamingEnqueue RPCs. absl::Status Enqueue(CallOptions* call_opts, const EnqueueRequest* request, EnqueueResponse* response, - uint64 stream_id = kInvalidStreamId); + uint64_t stream_id = kInvalidStreamId); absl::Status WaitQueueDone(const WaitQueueDoneRequest* request, WaitQueueDoneResponse* response); @@ -166,7 +166,7 @@ class EagerServiceImpl { const bool is_master_; }; // The returned ServerContext will need to be Unrefed. - absl::Status GetServerContext(uint64, ServerContext**); + absl::Status GetServerContext(uint64_t, ServerContext**); class ClientTensorHandleDeleteNode : public EagerNode { public: @@ -194,8 +194,8 @@ class EagerServiceImpl { // Remote node deletions are best effort bool Fatal() const override { return false; } - string DebugString() const override { - string out = "[ClientTensorHandleDeleteNode]"; + std::string DebugString() const override { + std::string out = "[ClientTensorHandleDeleteNode]"; absl::StrAppend(&out, " op_id: ", handle_to_delete_->op_id); absl::StrAppend(&out, ", output_num: ", handle_to_delete_->output_num); return out; @@ -225,7 +225,7 @@ class EagerServiceImpl { WorkerEnv* const env_; // Not owned. mutex contexts_mu_; - std::unordered_map contexts_ + std::unordered_map contexts_ TF_GUARDED_BY(contexts_mu_); std::unique_ptr gc_thread_; diff --git a/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc b/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc index a4b1f6552b4b33..e9be274d4fea19 100644 --- a/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc +++ b/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc @@ -56,14 +56,14 @@ namespace { class TestEagerServiceImpl : public EagerServiceImpl { public: explicit TestEagerServiceImpl(WorkerEnv* env) : EagerServiceImpl(env) {} - absl::Status GetEagerContext(const uint64 context_id, EagerContext** ctx) { + absl::Status GetEagerContext(const uint64_t context_id, EagerContext** ctx) { ServerContext* context = nullptr; TF_RETURN_IF_ERROR(GetServerContext(context_id, &context)); core::ScopedUnref context_unref(context); *ctx = context->Context(); return absl::OkStatus(); } - absl::Status GetTensorHandle(const uint64 context_id, + absl::Status GetTensorHandle(const uint64_t context_id, const RemoteTensorHandleInternal& remote_handle, tensorflow::TensorHandle** handle) { ServerContext* context = nullptr; @@ -136,7 +136,7 @@ class FakeEagerClient : public EagerClient { class DummyEagerClientCache : public EagerClientCache { public: DummyEagerClientCache() : client_(new FakeEagerClient) {} - absl::Status GetClient(const string& target, + absl::Status GetClient(const std::string& target, core::RefCountPtr* client) override { client->reset(client_.get()); client_->Ref(); @@ -154,7 +154,7 @@ class FakeCache : public TestWorkerCache { return absl::OkStatus(); } - void ListWorkers(std::vector* workers) const override { + void ListWorkers(std::vector* workers) const override { workers->push_back("/job:localhost/replica:0/task:0"); } }; @@ -202,10 +202,11 @@ void SetTensorProto(TensorProto* tensor_proto) { } void BuildOperation( - Operation* operation, int64_t id, const string& name, - const std::vector>>& + Operation* operation, int64_t id, const std::string& name, + const std::vector>>& inputs, - const std::unordered_map& attrs, const string& device) { + const std::unordered_map& attrs, + const std::string& device) { operation->set_id(id); operation->set_name(name); operation->set_device(device); @@ -216,7 +217,7 @@ void BuildOperation( std::get(input); } else { const auto& tensor_handle_pair = - std::get>(input); + std::get>(input); auto* input = operation->add_op_inputs()->mutable_remote_handle(); input->set_op_id(tensor_handle_pair.first); input->set_output_num(tensor_handle_pair.second); @@ -231,21 +232,22 @@ void BuildOperation( } void AddOperationToEnqueueRequest( - int64_t id, const string& name, - const std::vector>>& + int64_t id, const std::string& name, + const std::vector>>& inputs, - const std::unordered_map& attrs, const string& device, - EnqueueRequest* request) { + const std::unordered_map& attrs, + const std::string& device, EnqueueRequest* request) { auto* operation = request->add_queue()->mutable_operation(); BuildOperation(operation, id, name, inputs, attrs, device); } void AddOperationToRunComponentFunctionRequest( - int64_t id, const string& name, - const std::vector>>& + int64_t id, const std::string& name, + const std::vector>>& inputs, - const std::unordered_map& attrs, const string& device, - const int output_num, RunComponentFunctionRequest* request) { + const std::unordered_map& attrs, + const std::string& device, const int output_num, + RunComponentFunctionRequest* request) { auto* operation = request->mutable_operation(); operation->set_is_function(true); operation->set_is_component_function(true); @@ -450,7 +452,7 @@ tensorflow::FunctionDef SingleRecvNodeFunction() { TEST_F(EagerServiceImplTest, BasicTest) { TestEagerServiceImpl eager_service_impl(&worker_env_); - uint64 context_id = random::New64(); + uint64_t context_id = random::New64(); CreateContextRequest request; request.mutable_server_def()->set_job_name("localhost"); @@ -464,7 +466,7 @@ TEST_F(EagerServiceImplTest, BasicTest) { remote_enqueue_request.set_context_id(context_id); EnqueueResponse remote_enqueue_response; - std::unordered_map const_attrs; + std::unordered_map const_attrs; AttrValue val; val.set_type(tensorflow::DataType::DT_FLOAT); const_attrs.insert({"dtype", val}); @@ -476,7 +478,7 @@ TEST_F(EagerServiceImplTest, BasicTest) { "/job:localhost/replica:0/task:0/device:CPU:0", &remote_enqueue_request); - std::unordered_map attrs; + std::unordered_map attrs; val.Clear(); val.set_type(tensorflow::DataType::DT_FLOAT); attrs.insert({"T", val}); @@ -529,12 +531,12 @@ class EagerServiceImplFunctionTest : public EagerServiceImplTest { // Creates a context and attempts to execute a function. void TestFunction(const RegisterFunctionOp& register_op, - const string& function_name, + const std::string& function_name, const bool local_inputs = false, const bool test_cancel = false) { TestEagerServiceImpl eager_service_impl(&worker_env_); - uint64 context_id = random::New64(); + uint64_t context_id = random::New64(); CreateContextRequest request; request.mutable_server_def()->set_job_name("localhost"); @@ -561,12 +563,12 @@ class EagerServiceImplFunctionTest : public EagerServiceImplTest { SetTensorProto(&tensor_proto); AddOperationToEnqueueRequest( 2, function_name, {tensor_proto}, - std::unordered_map(), + std::unordered_map(), "/job:localhost/replica:0/task:0/device:CPU:0", &remote_enqueue_request); } else { - std::unordered_map const_attrs; + std::unordered_map const_attrs; AttrValue val; val.set_type(tensorflow::DataType::DT_FLOAT); const_attrs.insert({"dtype", val}); @@ -581,7 +583,7 @@ class EagerServiceImplFunctionTest : public EagerServiceImplTest { &remote_enqueue_request); AddOperationToEnqueueRequest( 2, function_name, {std::make_pair(1, 0)}, - std::unordered_map(), + std::unordered_map(), "/job:localhost/replica:0/task:0/device:CPU:0", &remote_enqueue_request); } @@ -629,10 +631,10 @@ class EagerServiceImplFunctionTest : public EagerServiceImplTest { // Creates a context and attempts to execute a component function. void TestComponentFunction(const RegisterFunctionOp& register_op, - const string& function_name, + const std::string& function_name, const bool test_cancel) { TestEagerServiceImpl eager_service_impl(&worker_env_); - uint64 context_id = random::New64(); + uint64_t context_id = random::New64(); // Create context. CreateContextRequest request; @@ -655,7 +657,7 @@ class EagerServiceImplFunctionTest : public EagerServiceImplTest { remote_enqueue_request.set_context_id(context_id); EnqueueResponse remote_enqueue_response; - std::unordered_map const_attrs; + std::unordered_map const_attrs; AttrValue val; val.set_type(tensorflow::DataType::DT_FLOAT); const_attrs.insert({"dtype", val}); @@ -675,7 +677,7 @@ class EagerServiceImplFunctionTest : public EagerServiceImplTest { const int output_num = 5; AddOperationToRunComponentFunctionRequest( 2, function_name, {std::make_pair(1, 0)}, - std::unordered_map(), + std::unordered_map(), "/job:localhost/replica:0/task:0/device:CPU:0", output_num, &run_comp_func_request); @@ -772,7 +774,7 @@ TEST_F(EagerServiceImplFunctionTest, ComponentNestedFunctionTest) { TEST_F(EagerServiceImplFunctionTest, ComponentNestedFunctionWithNameClashTest) { TestEagerServiceImpl eager_service_impl(&worker_env_); - uint64 context_id = random::New64(); + uint64_t context_id = random::New64(); // Create context. CreateContextRequest request; @@ -820,7 +822,7 @@ TEST_F(EagerServiceImplFunctionTest, ComponentNestedFunctionWithNameClashTest) { remote_enqueue_request.set_context_id(context_id); EnqueueResponse remote_enqueue_response; - std::unordered_map const_attrs; + std::unordered_map const_attrs; AttrValue val; val.set_type(tensorflow::DataType::DT_FLOAT); const_attrs.insert({"dtype", val}); @@ -841,7 +843,7 @@ TEST_F(EagerServiceImplFunctionTest, ComponentNestedFunctionWithNameClashTest) { const int output_num = 5; AddOperationToRunComponentFunctionRequest( 2, "MatMulNestedFunction", {std::make_pair(1, 0)}, - std::unordered_map(), + std::unordered_map(), "/job:localhost/replica:0/task:0/device:CPU:0", output_num, &run_comp_func_request); @@ -883,7 +885,7 @@ TEST_F(EagerServiceImplFunctionTest, ComponentNestedFunctionWithNameClashTest) { const int output_num = 5; AddOperationToRunComponentFunctionRequest( 3, "MatMulNestedTransposeFunction", {std::make_pair(1, 0)}, - std::unordered_map(), + std::unordered_map(), "/job:localhost/replica:0/task:0/device:CPU:0", output_num, &run_comp_func_request); @@ -984,7 +986,7 @@ class FunctionWithRemoteInputsTest : public EagerServiceImplTest { EnqueueRequest remote_enqueue_request; remote_enqueue_request.set_context_id(context_id_); EnqueueResponse remote_enqueue_response; - std::unordered_map const_attrs; + std::unordered_map const_attrs; AttrValue val; val.set_type(tensorflow::DataType::DT_FLOAT); const_attrs.insert({"dtype", val}); @@ -1045,11 +1047,13 @@ class FunctionWithRemoteInputsTest : public EagerServiceImplTest { } protected: - const string local_device_ = "/job:localhost/replica:0/task:0/device:CPU:0"; - const string remote_device_ = "/job:localhost/replica:0/task:1/device:CPU:0"; + const std::string local_device_ = + "/job:localhost/replica:0/task:0/device:CPU:0"; + const std::string remote_device_ = + "/job:localhost/replica:0/task:1/device:CPU:0"; TestEagerServiceImpl eager_service_impl_; std::unique_ptr remote_device_mgr_; - uint64 context_id_; + uint64_t context_id_; tensorflow::FunctionDef fdef_; std::unique_ptr eager_pflr_; std::unique_ptr eager_cluster_flr_; @@ -1072,7 +1076,7 @@ TEST_F(FunctionWithRemoteInputsTest, EagerPFLRTest) { fdef_.signature().name(), AttrSlice(&fdef_.attr()), options, &handle)); EagerContext* ctx = nullptr; TF_ASSERT_OK(eager_service_impl_.GetEagerContext(context_id_, &ctx)); - for (const string& func_name : ctx->FuncLibDef()->ListFunctionNames()) { + for (const std::string& func_name : ctx->FuncLibDef()->ListFunctionNames()) { const FunctionDef* fdef = ctx->FuncLibDef()->Find(func_name); EXPECT_TRUE(fdef != nullptr); if (absl::StartsWith(func_name, "MatMulFunction")) { @@ -1085,7 +1089,7 @@ TEST_F(FunctionWithRemoteInputsTest, EagerPFLRTest) { // Run MatMulFunction on remote_device. FunctionLibraryRuntime::Options opts; - const uint64 op_id = 2; + const uint64_t op_id = 2; opts.op_id = op_id; absl::Notification done; absl::Status status; @@ -1133,7 +1137,7 @@ TEST_F(FunctionWithRemoteInputsTest, TF_ASSERT_OK(status); EagerContext* ctx = nullptr; TF_ASSERT_OK(eager_service_impl_.GetEagerContext(context_id_, &ctx)); - for (const string& func_name : ctx->FuncLibDef()->ListFunctionNames()) { + for (const std::string& func_name : ctx->FuncLibDef()->ListFunctionNames()) { const FunctionDef* fdef = ctx->FuncLibDef()->Find(func_name); EXPECT_TRUE(fdef != nullptr); if (absl::StartsWith(func_name, "MatMulFunction")) { @@ -1288,7 +1292,7 @@ TEST_F(FunctionWithRemoteInputsTest, KernelAndDeviceFuncAsyncTest) { TEST_F(EagerServiceImplTest, SendTensorTest) { TestEagerServiceImpl eager_service_impl(&worker_env_); - uint64 context_id = random::New64(); + uint64_t context_id = random::New64(); CreateContextRequest request; request.mutable_server_def()->set_job_name("localhost"); @@ -1306,7 +1310,7 @@ TEST_F(EagerServiceImplTest, SendTensorTest) { send_tensor->set_op_id(1); SetTensorProto(send_tensor->add_tensors()); - std::unordered_map attrs; + std::unordered_map attrs; AttrValue val; val.Clear(); val.set_type(tensorflow::DataType::DT_FLOAT); @@ -1351,13 +1355,13 @@ TEST_F(EagerServiceImplTest, SendTensorTest) { TEST_F(EagerServiceImplTest, SendPackedHandleTest) { TestEagerServiceImpl eager_service_impl(&worker_env_); - const string device0 = "/job:localhost/replica:0/task:0/device:CPU:0"; - const string device1 = "/job:localhost/replica:0/task:1/device:CPU:0"; - const string device2 = "/job:localhost/replica:0/task:2/device:CPU:0"; - const string composite_device = + const std::string device0 = "/job:localhost/replica:0/task:0/device:CPU:0"; + const std::string device1 = "/job:localhost/replica:0/task:1/device:CPU:0"; + const std::string device2 = "/job:localhost/replica:0/task:2/device:CPU:0"; + const std::string composite_device = "/job:localhost/replica:0/task:0/device:COMPOSITE:0"; - uint64 context_id = random::New64(); + uint64_t context_id = random::New64(); CreateContextRequest request; auto* server_def = request.mutable_server_def(); server_def->set_job_name("localhost"); @@ -1465,7 +1469,7 @@ TEST_F(EagerServiceImplTest, RequestsToMasterTest) { /*async=*/false, device_mgr_.get(), false, std::move(rendezvous), nullptr, nullptr, /*run_eager_op_as_function=*/true); - const uint64 context_id = random::New64(); + const uint64_t context_id = random::New64(); // Set RemoteMgr to ctx. auto remote_mgr = @@ -1506,7 +1510,7 @@ TEST_F(EagerServiceImplTest, RequestsToMasterTest) { TEST_F(EagerServiceImplTest, KeepAliveTest) { TestEagerServiceImpl eager_service_impl(&worker_env_); - uint64 context_id = random::New64(); + uint64_t context_id = random::New64(); CreateContextRequest request; request.mutable_server_def()->set_job_name("localhost"); request.mutable_server_def()->set_task_index(0); @@ -1531,7 +1535,7 @@ TEST_F(EagerServiceImplTest, KeepAliveTest) { EXPECT_PRED_FORMAT2(::testing::IsSubstring, "Unable to find a context_id", std::string(status.message())); - uint64 new_context_id = random::New64(); + uint64_t new_context_id = random::New64(); // Create a new context. request.set_context_id(new_context_id); TF_ASSERT_OK(eager_service_impl.CreateContext(&request, &response)); diff --git a/tensorflow/core/distributed_runtime/eager/remote_copy_node.cc b/tensorflow/core/distributed_runtime/eager/remote_copy_node.cc index 8066664cd0e456..e532bdff5e657a 100644 --- a/tensorflow/core/distributed_runtime/eager/remote_copy_node.cc +++ b/tensorflow/core/distributed_runtime/eager/remote_copy_node.cc @@ -66,8 +66,8 @@ absl::Status CreateUncachedKernelAndDeviceOp( // This gets a unique wire ID. We add a random identifier so that if the // worker has other clients that it is servicing, we don't have any collision. -string GetUniqueWireID() { - static tensorflow::uint64 random_seed = random::New64(); +std::string GetUniqueWireID() { + static uint64_t random_seed = random::New64(); static tensorflow::mutex wireid_mutex(tensorflow::LINKER_INITIALIZED); static std::atomic wire_id; return absl::StrCat(random_seed, "_", wire_id++); @@ -77,7 +77,7 @@ string GetUniqueWireID() { RemoteCopyNode::RemoteCopyNode(EagerContext* ctx, EagerExecutor* executor, TensorHandle* src, TensorHandle* dst, - Device* recv_device, uint64 recv_op_id) + Device* recv_device, uint64_t recv_op_id) : AsyncEagerNode(), src_(src), ctx_(ctx), @@ -220,12 +220,12 @@ absl::Status RemoteCopyNode::RunLocalRecv(EagerOperation* op, void RemoteCopyNode::RunRemoteRecv(EagerOperation* op, StatusCallback done) { EnqueueRequest request; - uint64 context_id = ctx_->GetContextId(); + uint64_t context_id = ctx_->GetContextId(); request.set_context_id(context_id); auto* remote_op = request.add_queue()->mutable_operation(); PrepareRemoteOp(remote_op, op); remote_op->set_id(recv_op_id_); - uint64 context_view_id = ctx_->GetContextViewId(); + uint64_t context_view_id = ctx_->GetContextViewId(); core::RefCountPtr eager_client; absl::Status status = ctx_->GetClient(recv_device_, &eager_client); @@ -316,7 +316,7 @@ void RemoteCopyNode::StartRecv(StatusCallback done) { } } -absl::Status SerializePackedHandle(const uint64 op_id, +absl::Status SerializePackedHandle(const uint64_t op_id, TensorHandle* packed_handle, const Device* target_device, EagerContext* ctx, SendPackedHandleOp* op) { @@ -362,7 +362,7 @@ absl::Status SerializePackedHandle(const uint64 op_id, void RemoteCopyNode::StartSendPackedHandle(StatusCallback done) { absl::Status s; - const uint64 context_view_id = ctx_->GetContextViewId(); + const uint64_t context_view_id = ctx_->GetContextViewId(); if (!send_device_->IsLocal()) { s = errors::InvalidArgument( "Copy a packed handle from a remote device is not supported"); @@ -372,7 +372,7 @@ void RemoteCopyNode::StartSendPackedHandle(StatusCallback done) { } EnqueueRequest request; - uint64 context_id = ctx_->GetContextId(); + uint64_t context_id = ctx_->GetContextId(); request.set_context_id(context_id); s = SerializePackedHandle(recv_op_id_, src_, recv_device_, ctx_, request.add_queue()->mutable_send_packed_handle()); @@ -426,12 +426,12 @@ void RemoteCopyNode::StartSendPackedHandle(StatusCallback done) { void RemoteCopyNode::StartRemoteSendTensor(StatusCallback done) { absl::Status s; EnqueueRequest request; - uint64 context_id = ctx_->GetContextId(); + uint64_t context_id = ctx_->GetContextId(); request.set_context_id(context_id); auto* send_tensor = request.add_queue()->mutable_send_tensor(); send_tensor->set_op_id(recv_op_id_); send_tensor->set_device_name(recv_device_->name()); - uint64 context_view_id = ctx_->GetContextViewId(); + uint64_t context_view_id = ctx_->GetContextViewId(); // AsProtoTensorContent doesn't work when the tensor is on the GPU, hence // copy it to the CPU before copying it out. @@ -515,7 +515,7 @@ void RemoteCopyNode::RunAsync(StatusCallback done) { void RemoteCopyNode::Abort(absl::Status status) { if (!started_) { - uint64 context_view_id = ctx_->GetContextViewId(); + uint64_t context_view_id = ctx_->GetContextViewId(); captured_state_->dst()->PoisonRemote(status, recv_device_, context_view_id); } } diff --git a/tensorflow/core/distributed_runtime/eager/remote_copy_node.h b/tensorflow/core/distributed_runtime/eager/remote_copy_node.h index 572b650651b0c3..a8dc387d9a7dbf 100644 --- a/tensorflow/core/distributed_runtime/eager/remote_copy_node.h +++ b/tensorflow/core/distributed_runtime/eager/remote_copy_node.h @@ -63,7 +63,7 @@ namespace eager { class RemoteCopyNode : public AsyncEagerNode { public: RemoteCopyNode(EagerContext* ctx, EagerExecutor* executor, TensorHandle* src, - TensorHandle* dst, Device* recv_device, uint64 recv_op_id); + TensorHandle* dst, Device* recv_device, uint64_t recv_op_id); ~RemoteCopyNode() override; @@ -73,8 +73,8 @@ class RemoteCopyNode : public AsyncEagerNode { void Abort(absl::Status status) override; - string DebugString() const override { - string out = "[RemoteCopyNode]"; + std::string DebugString() const override { + std::string out = "[RemoteCopyNode]"; absl::StrAppend(&out, " send_device: ", send_device_->name()); absl::StrAppend(&out, ", recv_device: ", recv_device_->name()); absl::StrAppend(&out, ", send_tensor: ", src_->DebugString()); @@ -167,8 +167,8 @@ class RemoteCopyNode : public AsyncEagerNode { EagerExecutor* const executor_; Device* const send_device_; Device* const recv_device_; - const string wire_id_; - const uint64 recv_op_id_; + const std::string wire_id_; + const uint64_t recv_op_id_; std::shared_ptr captured_state_; bool started_; diff --git a/tensorflow/core/distributed_runtime/eager/remote_execute_node.cc b/tensorflow/core/distributed_runtime/eager/remote_execute_node.cc index f118ecaeb2bbad..3c526f2904d34c 100644 --- a/tensorflow/core/distributed_runtime/eager/remote_execute_node.cc +++ b/tensorflow/core/distributed_runtime/eager/remote_execute_node.cc @@ -32,9 +32,9 @@ void RemoteExecuteNode::RunAsync(StatusCallback done) { Device* device = device_; // Filled and used only when VLOG(3) is on. - string rpc_description; + std::string rpc_description; if (VLOG_IS_ON(3)) { - std::vector ops; + std::vector ops; ops.reserve(request_->queue_size()); for (const QueueItem& item : request_->queue()) { if (item.has_operation()) { @@ -96,7 +96,7 @@ void RemoteExecuteNode::RunAsync(StatusCallback done) { } for (size_t i = 0; i < retvals.size(); ++i) { if (status.ok()) { - const string output_device = + const std::string output_device = response->queue_response(0).device().empty() ? "" : response->queue_response(0).device(i); diff --git a/tensorflow/core/distributed_runtime/eager/remote_execute_node.h b/tensorflow/core/distributed_runtime/eager/remote_execute_node.h index e29d8d1c187f31..8cc9501efb06d4 100644 --- a/tensorflow/core/distributed_runtime/eager/remote_execute_node.h +++ b/tensorflow/core/distributed_runtime/eager/remote_execute_node.h @@ -40,7 +40,7 @@ class RemoteExecuteNode : public AsyncRemoteExecuteNode { public: RemoteExecuteNode(EagerContext* eager_context, std::unique_ptr request, Device* device, - uint64 context_view_id, EagerClient* eager_client, + uint64_t context_view_id, EagerClient* eager_client, CancellationManager* cancellation_manager, const NodeDef& ndef, const FunctionLibraryDefinition* lib_def, @@ -118,8 +118,8 @@ class RemoteExecuteNode : public AsyncRemoteExecuteNode { return eager_client_->allow_multiple_pending_requests(); } - string DebugString() const override { - string out = "[RemoteExecuteNode]"; + std::string DebugString() const override { + std::string out = "[RemoteExecuteNode]"; absl::StrAppend(&out, " request: ", request_->DebugString()); absl::StrAppend(&out, ", target_device: ", device_->name()); return out; @@ -129,7 +129,7 @@ class RemoteExecuteNode : public AsyncRemoteExecuteNode { EagerContext* eager_context_; // Not owned, and must outlive this node. std::unique_ptr request_; Device* device_; // Not owned - uint64 context_view_id_; + uint64_t context_view_id_; bool needs_remote_inputs_; EagerClient* eager_client_; // Not owned, and must outlive this node. CancellationManager* cancellation_manager_; diff --git a/tensorflow/core/distributed_runtime/eager/remote_mgr.cc b/tensorflow/core/distributed_runtime/eager/remote_mgr.cc index acd34fd9ccbc86..5cec8424c2e14d 100644 --- a/tensorflow/core/distributed_runtime/eager/remote_mgr.cc +++ b/tensorflow/core/distributed_runtime/eager/remote_mgr.cc @@ -127,7 +127,7 @@ absl::Status RemoteMgr::GetMirroredResourceShape( absl::Status RemoteMgr::GetRemoteTensorHandle( const tensorflow::TensorHandle* handle, const bool wait_until_ready, - int64_t* op_id, int32* output_num) { + int64_t* op_id, int32_t* output_num) { TF_RETURN_IF_ERROR(handle->RemoteAddress(handle->device(), wait_until_ready, op_id, output_num)); tensorflow::TensorHandle* h; @@ -213,7 +213,7 @@ absl::Status RemoteMgr::DeserializeRemoteTensorHandle( } else { // Create a remote TensorHandle for remote tensors which have not been // copied to the local worker yet (e.g. remote function inputs). - const string& device_name = + const std::string& device_name = in.op_device().empty() ? in.device() : in.op_device(); TF_RETURN_IF_ERROR( parent_->FindDeviceFromName(device_name.c_str(), &device)); @@ -241,7 +241,7 @@ absl::Status RemoteMgr::DeserializeRemoteTensorHandle( return absl::OkStatus(); } -EagerExecutor& RemoteMgr::GetOrCreateExecutorForStream(uint64 stream_id) { +EagerExecutor& RemoteMgr::GetOrCreateExecutorForStream(uint64_t stream_id) { mutex_lock l(executor_map_mu_); auto it = executor_map_.find(stream_id); if (it == executor_map_.end()) { @@ -254,7 +254,7 @@ EagerExecutor& RemoteMgr::GetOrCreateExecutorForStream(uint64 stream_id) { return it->second; } -void RemoteMgr::DeleteExecutorForStream(uint64 stream_id) { +void RemoteMgr::DeleteExecutorForStream(uint64_t stream_id) { mutex_lock l(executor_map_mu_); auto it = executor_map_.find(stream_id); if (it == executor_map_.end()) { diff --git a/tensorflow/core/distributed_runtime/eager/remote_mgr.h b/tensorflow/core/distributed_runtime/eager/remote_mgr.h index b62134cd6e5860..975cfa13e45ef7 100644 --- a/tensorflow/core/distributed_runtime/eager/remote_mgr.h +++ b/tensorflow/core/distributed_runtime/eager/remote_mgr.h @@ -58,7 +58,7 @@ class RemoteMgr { // Helper function to create monotonically increasing ids unique to this // context. - uint64 NextOpId() { + uint64_t NextOpId() { DCHECK(is_master_); mutex_lock l(next_id_mutex_); return next_op_id_++; @@ -77,20 +77,20 @@ class RemoteMgr { absl::Status DeserializeRemoteTensorHandle(const RemoteTensorHandle& in, TensorHandle** out); - EagerExecutor& GetOrCreateExecutorForStream(uint64 stream_id); + EagerExecutor& GetOrCreateExecutorForStream(uint64_t stream_id); - void DeleteExecutorForStream(uint64 stream_id); + void DeleteExecutorForStream(uint64_t stream_id); protected: mutex next_id_mutex_; - uint64 next_op_id_ TF_GUARDED_BY(next_id_mutex_) = 1; + uint64_t next_op_id_ TF_GUARDED_BY(next_id_mutex_) = 1; private: // Returns the op_id and output_num if the given local TensorHandle exists in // remote_tensor_handle_map_. absl::Status GetRemoteTensorHandle(const tensorflow::TensorHandle* handle, const bool wait_until_ready, - int64_t* op_id, int32* output_num) + int64_t* op_id, int32_t* output_num) TF_SHARED_LOCKS_REQUIRED(remote_tensor_handle_mu_); absl::Status GetTensorHandleImpl( @@ -129,7 +129,7 @@ class RemoteMgr { EagerContext* parent_; // not owned. mutex executor_map_mu_; - std::unordered_map executor_map_ + std::unordered_map executor_map_ TF_GUARDED_BY(executor_map_mu_); }; diff --git a/tensorflow/core/distributed_runtime/eager/remote_mgr_test.cc b/tensorflow/core/distributed_runtime/eager/remote_mgr_test.cc index ae05ce640cf0dc..89901367b49b2d 100644 --- a/tensorflow/core/distributed_runtime/eager/remote_mgr_test.cc +++ b/tensorflow/core/distributed_runtime/eager/remote_mgr_test.cc @@ -35,7 +35,7 @@ class TestRemoteMgr : public RemoteMgr { TestRemoteMgr(bool is_master, EagerContext* ctx) : RemoteMgr(is_master, ctx) {} - uint64 OpId() { + uint64_t OpId() { tf_shared_lock l(next_id_mutex_); return next_op_id_; } @@ -75,7 +75,7 @@ TEST_F(RemoteMgrTest, SerializeLocalTensorHandleWithRemoteMirror) { TensorHandle* handle = TensorHandle::CreateLocalHandle( std::move(t), local_device_, local_device_, ctx_); - const uint64 op_id = 2; + const uint64_t op_id = 2; const int output_num = 3; TF_ASSERT_OK(handle->AddUnshapedRemoteMirror(remote_device_, op_id, output_num, "", ctx_)); @@ -94,7 +94,7 @@ TEST_F(RemoteMgrTest, SerializeLocalTensorHandleWithRemoteMirror) { TEST_F(RemoteMgrTest, SerializeRemoteTensorHandle) { RemoteMgr remote_mgr(false, ctx_); - const uint64 op_id = 3; + const uint64_t op_id = 3; const int output_num = 1; TensorHandle* handle = TensorHandle::CreateLazyRemoteHandle( op_id, output_num, DT_FLOAT, remote_device_, /*is_ready=*/true, ctx_); @@ -113,7 +113,7 @@ TEST_F(RemoteMgrTest, InvalidateRemoteMirrorWithClusterUpdate) { TensorHandle* handle = TensorHandle::CreateLocalHandle( std::move(t), local_device_, local_device_, ctx_); - const uint64 op_id = 2; + const uint64_t op_id = 2; const int output_num = 3; TF_ASSERT_OK(handle->AddUnshapedRemoteMirror(remote_device_, op_id, output_num, "", ctx_)); @@ -134,7 +134,7 @@ TEST_F(RemoteMgrTest, InvalidateRemoteMirrorWithClusterUpdate) { TEST_F(RemoteMgrTest, SetRemoteShapeWithClusterUpdate) { RemoteMgr remote_mgr(false, ctx_); - const uint64 op_id = 3; + const uint64_t op_id = 3; const int output_num = 1; TensorHandle* handle = TensorHandle::CreateUnshapedRemoteHandle( op_id, output_num, @@ -157,7 +157,7 @@ TEST_F(RemoteMgrTest, SetRemoteShapeWithClusterUpdate) { TEST_F(RemoteMgrTest, ErrorSourcesShouldExist) { RemoteMgr remote_mgr(false, ctx_); - const uint64 op_id = 3; + const uint64_t op_id = 3; const int output_num = 1; TensorHandle* handle = TensorHandle::CreateLazyRemoteHandle( op_id, output_num, DT_FLOAT, remote_device_, /*is_ready=*/true, ctx_); diff --git a/tensorflow/core/distributed_runtime/eager/remote_tensor_handle.h b/tensorflow/core/distributed_runtime/eager/remote_tensor_handle.h index 903d019172a457..51f8d97e6ce6f8 100644 --- a/tensorflow/core/distributed_runtime/eager/remote_tensor_handle.h +++ b/tensorflow/core/distributed_runtime/eager/remote_tensor_handle.h @@ -28,7 +28,7 @@ struct RemoteTensorHandleInternal { RemoteTensorHandleInternal(int64_t op_id, int32_t output_num) : op_id(op_id), output_num(output_num) {} int64_t op_id; - int32 output_num; + int32_t output_num; }; struct RemoteTensorHandleInternalHash { diff --git a/tensorflow/core/distributed_runtime/eager/remote_tensor_handle_data.cc b/tensorflow/core/distributed_runtime/eager/remote_tensor_handle_data.cc index 73427ed1372ed8..32ec58774d99cb 100644 --- a/tensorflow/core/distributed_runtime/eager/remote_tensor_handle_data.cc +++ b/tensorflow/core/distributed_runtime/eager/remote_tensor_handle_data.cc @@ -29,9 +29,10 @@ namespace tensorflow { namespace { -void DestroyRemoteTensorHandle(EagerContext* ctx, const string& remote_task, - uint64 context_id, uint64 op_id, int output_num, - bool ready) { +void DestroyRemoteTensorHandle(EagerContext* ctx, + const std::string& remote_task, + uint64_t context_id, uint64_t op_id, + int output_num, bool ready) { if (ctx->GetContextId() != context_id) { // This means that this tensor was pointing to a remote device, which // has been changed out from under us. Simply return since there is @@ -89,7 +90,7 @@ void DestroyRemoteTensorHandle(EagerContext* ctx, const string& remote_task, } // namespace RemoteTensorHandleData::RemoteTensorHandleData(int64_t op_id, int output_num, - uint64 context_view_id, + uint64_t context_view_id, bool is_ready) : is_ready_(is_ready), op_id_(op_id), @@ -102,7 +103,7 @@ RemoteTensorHandleData::RemoteTensorHandleData(int64_t op_id, int output_num, } RemoteTensorHandleData::RemoteTensorHandleData(int64_t op_id, int output_num, - const string& remote_task, + const std::string& remote_task, EagerContext* ctx) : is_ready_(false), op_id_(op_id), @@ -182,7 +183,7 @@ absl::Status RemoteTensorHandleData::SetShape(const TensorShape& shape) { } absl::Status RemoteTensorHandleData::SetShapeAndRemoteTask( - const TensorShape& shape, const string& remote_task) { + const TensorShape& shape, const std::string& remote_task) { // If `is_ready_` is set previously due to poisoning, return the original // error that poisoned this tensor. TF_RETURN_IF_ERROR(IsPoisoned()); @@ -216,13 +217,13 @@ absl::Status RemoteTensorHandleData::SetShapeAndRemoteTask( return absl::OkStatus(); } -string RemoteTensorHandleData::DebugString() const { +std::string RemoteTensorHandleData::DebugString() const { return absl::StrCat("RemoteTensorHandleData:", " op_id: ", op_id_, " output_num: ", output_num_); } absl::Status RemoteTensorHandleData::OpIdAndOutputNum( - const bool wait_until_ready, int64_t* op_id, int32* output_num) const { + const bool wait_until_ready, int64_t* op_id, int32_t* output_num) const { if (wait_until_ready) { TF_RETURN_IF_ERROR(WaitReady("OpIdAndOutputNumUntilReady")); } diff --git a/tensorflow/core/distributed_runtime/eager/remote_tensor_handle_data.h b/tensorflow/core/distributed_runtime/eager/remote_tensor_handle_data.h index 892d82bd5f7efe..1c7099cc66b1a4 100644 --- a/tensorflow/core/distributed_runtime/eager/remote_tensor_handle_data.h +++ b/tensorflow/core/distributed_runtime/eager/remote_tensor_handle_data.h @@ -31,12 +31,12 @@ class RemoteTensorHandleData { // the corresponding remote tensor is ready. So the remote tensor should be // ready when we create a lazy remote handle. If it refers to a remote output, // it's not ready until the shape is set. - RemoteTensorHandleData(int64_t op_id, int output_num, uint64 context_view_id, - bool is_ready); + RemoteTensorHandleData(int64_t op_id, int output_num, + uint64_t context_view_id, bool is_ready); // Constructor for unshaped remote handles. It controls the lifetime of a // remote handle that it refers to. RemoteTensorHandleData(int64_t op_id, int output_num, - const string& remote_task, EagerContext* ctx); + const std::string& remote_task, EagerContext* ctx); ~RemoteTensorHandleData(); // A remote tensor handle does not have a Tensor object, hence it can only @@ -51,18 +51,18 @@ class RemoteTensorHandleData { absl::Status WaitReady(const char* caller) const; absl::Status SetShape(const TensorShape& shape); absl::Status SetShapeAndRemoteTask(const TensorShape& shape, - const string& remote_task); + const std::string& remote_task); void Poison(absl::Status status); absl::Status IsPoisoned() const; - string DebugString() const; + std::string DebugString() const; // Return the op id and output num. If wait_until_ready is true, block until // the remote tensor is ready on a remote worker. absl::Status OpIdAndOutputNum(bool wait_until_ready, int64_t* op_id, - int32* output_num) const; + int32_t* output_num) const; - uint64 context_view_id() const { return context_view_id_; } + uint64_t context_view_id() const { return context_view_id_; } private: mutable mutex mu_; @@ -72,10 +72,10 @@ class RemoteTensorHandleData { // IDs required when this class is representing a remote tensor handle. const int64_t op_id_; - const int32 output_num_; - string remote_task_ TF_GUARDED_BY(mu_); - uint64 context_id_; - uint64 context_view_id_; + const int32_t output_num_; + std::string remote_task_ TF_GUARDED_BY(mu_); + uint64_t context_id_; + uint64_t context_view_id_; EagerContext* ctx_; }; diff --git a/tensorflow/core/distributed_runtime/graph_mgr.cc b/tensorflow/core/distributed_runtime/graph_mgr.cc index 13d130d289418c..507915a74152be 100644 --- a/tensorflow/core/distributed_runtime/graph_mgr.cc +++ b/tensorflow/core/distributed_runtime/graph_mgr.cc @@ -337,7 +337,7 @@ absl::Status GraphMgr::Register(const std::string& handle, const GraphDef& gdef, { mutex_lock l(mu_); *graph_handle = - strings::Printf("%016llx", static_cast(++next_id_)); + absl::StrFormat("%016llx", static_cast(++next_id_)); item->handle = *graph_handle; CHECK(table_.insert({*graph_handle, item}).second); } diff --git a/tensorflow/core/distributed_runtime/integration_test/c_api_coordination_test.cc b/tensorflow/core/distributed_runtime/integration_test/c_api_coordination_test.cc index d781cb254fa9a9..66e39b5a15ce61 100644 --- a/tensorflow/core/distributed_runtime/integration_test/c_api_coordination_test.cc +++ b/tensorflow/core/distributed_runtime/integration_test/c_api_coordination_test.cc @@ -60,7 +60,7 @@ void ConfigCoordinationService(tensorflow::ServerDef* server_def, coord_config->set_enable_health_check(enable_health_check); } -string SetConfigKeyValueFn() { +std::string SetConfigKeyValueFn() { FunctionDef fdef; tensorflow::protobuf::TextFormat::ParseFromString( " signature {" @@ -86,7 +86,7 @@ string SetConfigKeyValueFn() { return fdef.SerializeAsString(); } -string GetConfigKeyValueFn() { +std::string GetConfigKeyValueFn() { FunctionDef fdef; tensorflow::protobuf::TextFormat::ParseFromString( " signature {" @@ -521,7 +521,7 @@ TEST_P(SingleClientCoordinationServiceTest, TestSetGetConfigInOp) { TF_DeleteTensor(t); TFE_DeleteOp(get_op2); - const string& set_fdef = SetConfigKeyValueFn(); + const std::string& set_fdef = SetConfigKeyValueFn(); TFE_ContextAddFunctionDef(ctx, set_fdef.data(), set_fdef.size(), status); ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_Op* set_fn = TFE_NewOp(ctx, "SetConfigKeyValueFn", status); @@ -542,7 +542,7 @@ TEST_P(SingleClientCoordinationServiceTest, TestSetGetConfigInOp) { TFE_DeleteTensorHandle(set_val); TFE_DeleteOp(set_fn); - const string& get_fdef = GetConfigKeyValueFn(); + const std::string& get_fdef = GetConfigKeyValueFn(); TFE_ContextAddFunctionDef(ctx, get_fdef.data(), get_fdef.size(), status); ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_Op* get_fn = TFE_NewOp(ctx, "GetConfigKeyValueFn", status); diff --git a/tensorflow/core/distributed_runtime/integration_test/c_api_multi_client_function_test.cc b/tensorflow/core/distributed_runtime/integration_test/c_api_multi_client_function_test.cc index 7d767e9a8ce42a..73db4a0bb22cee 100644 --- a/tensorflow/core/distributed_runtime/integration_test/c_api_multi_client_function_test.cc +++ b/tensorflow/core/distributed_runtime/integration_test/c_api_multi_client_function_test.cc @@ -39,7 +39,7 @@ namespace { std::string SendFunction(const std::string& send_device, const std::string& recv_device, - const tensorflow::int64 send_device_incarnation) { + const int64_t send_device_incarnation) { tensorflow::FunctionDef def; CHECK(tensorflow::protobuf::TextFormat::ParseFromString( absl::StrCat(" signature {" @@ -100,7 +100,7 @@ std::string SendFunction(const std::string& send_device, std::string RecvFunction(const std::string& send_device, const std::string& recv_device, - const tensorflow::int64 send_device_incarnation) { + const int64_t send_device_incarnation) { tensorflow::FunctionDef def; CHECK(tensorflow::protobuf::TextFormat::ParseFromString( absl::StrCat(" signature {" @@ -239,7 +239,7 @@ TEST_P(MultiClientSendRecvTest, TestMultiClientSendRecv) { std::vector device_attrs; tensorflow::unwrap(ctx)->ListDevices(&device_attrs); - tensorflow::uint64 send_device_incarnation = 0; + uint64_t send_device_incarnation = 0; for (const auto& device_attr : device_attrs) { if (device_attr.name() == send_device) { send_device_incarnation = device_attr.incarnation(); diff --git a/tensorflow/core/distributed_runtime/integration_test/c_api_multi_client_test.cc b/tensorflow/core/distributed_runtime/integration_test/c_api_multi_client_test.cc index a4a1476edaab93..640dbb2a334050 100644 --- a/tensorflow/core/distributed_runtime/integration_test/c_api_multi_client_test.cc +++ b/tensorflow/core/distributed_runtime/integration_test/c_api_multi_client_test.cc @@ -174,7 +174,7 @@ TEST(CAPI, MultiClientSendRecv) { tensorflow::ContextFromInterface(tensorflow::unwrap(ctx)); context->ListDevices(&device_attrs); - tensorflow::uint64 send_device_incarnation = 0; + uint64_t send_device_incarnation = 0; for (const auto& device_attr : device_attrs) { if (device_attr.name() == send_device) { send_device_incarnation = device_attr.incarnation(); diff --git a/tensorflow/core/distributed_runtime/integration_test/coordination_test_opkernel_registration.cc b/tensorflow/core/distributed_runtime/integration_test/coordination_test_opkernel_registration.cc index 893ee615659298..5c1864ec2bff3d 100644 --- a/tensorflow/core/distributed_runtime/integration_test/coordination_test_opkernel_registration.cc +++ b/tensorflow/core/distributed_runtime/integration_test/coordination_test_opkernel_registration.cc @@ -45,12 +45,12 @@ class TestSetConfigKeyValueOp : public OpKernel { OP_REQUIRES_OK(ctx, ctx->input("key", &key_tensor)); OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(key_tensor->shape()), errors::InvalidArgument("Key must be scalar.")); - const string& config_key = key_tensor->scalar()(); + const std::string& config_key = key_tensor->scalar()(); const Tensor* val_tensor; OP_REQUIRES_OK(ctx, ctx->input("value", &val_tensor)); OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(key_tensor->shape()), errors::InvalidArgument("Value must be scalar.")); - const string& config_value = val_tensor->scalar()(); + const std::string& config_value = val_tensor->scalar()(); LOG(INFO) << "TestSetConfigKeyValueOp key=" << config_key << "value=" << config_value; auto* coord_agent = ctx->coordination_service_agent(); @@ -90,7 +90,7 @@ class TestGetConfigKeyValueOp : public OpKernel { OP_REQUIRES_OK(ctx, ctx->input("key", &key_tensor)); OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(key_tensor->shape()), errors::InvalidArgument("Key must be scalar.")); - const string& config_key = key_tensor->scalar()(); + const std::string& config_key = key_tensor->scalar()(); LOG(INFO) << "TestGetConfigKeyValueOp key=" << config_key; auto* coord_agent = ctx->coordination_service_agent(); @@ -142,7 +142,8 @@ class TestReportErrorToClusterOp : public OpKernel { OP_REQUIRES_OK(ctx, ctx->input("error_message", &error_message_tensor)); OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(error_message_tensor->shape()), errors::InvalidArgument("Error message must be scalar.")); - const string& error_message = error_message_tensor->scalar()(); + const std::string& error_message = + error_message_tensor->scalar()(); LOG(INFO) << "TestReportErrorToClusterOp error_code=" << error_code << " error_message=" << error_message; auto* coord_agent = ctx->coordination_service_agent(); diff --git a/tensorflow/core/distributed_runtime/local_master.cc b/tensorflow/core/distributed_runtime/local_master.cc index 54a50da2ace799..af41d4ad1d4b49 100644 --- a/tensorflow/core/distributed_runtime/local_master.cc +++ b/tensorflow/core/distributed_runtime/local_master.cc @@ -223,7 +223,7 @@ struct MasterInfo { : master(master), default_timeout_in_ms(default_timeout_in_ms) {} }; -typedef std::unordered_map LocalMasterRegistry; +typedef std::unordered_map LocalMasterRegistry; LocalMasterRegistry* local_master_registry() { static LocalMasterRegistry* local_master_registry_ = new LocalMasterRegistry; return local_master_registry_; @@ -231,7 +231,7 @@ LocalMasterRegistry* local_master_registry() { } // namespace /* static */ -void LocalMaster::Register(const string& target, Master* master, +void LocalMaster::Register(const std::string& target, Master* master, int64_t default_timeout_in_ms) { mutex_lock l(*get_local_master_registry_lock()); local_master_registry()->insert( @@ -239,7 +239,7 @@ void LocalMaster::Register(const string& target, Master* master, } /* static */ -std::unique_ptr LocalMaster::Lookup(const string& target) { +std::unique_ptr LocalMaster::Lookup(const std::string& target) { std::unique_ptr ret; mutex_lock l(*get_local_master_registry_lock()); auto iter = local_master_registry()->find(target); diff --git a/tensorflow/core/distributed_runtime/local_master.h b/tensorflow/core/distributed_runtime/local_master.h index e4fc37e4f60f50..b9fe78e8591f17 100644 --- a/tensorflow/core/distributed_runtime/local_master.h +++ b/tensorflow/core/distributed_runtime/local_master.h @@ -89,12 +89,12 @@ class LocalMaster : public MasterInterface { // any LocalMaster objects that may wrap this master. There is no // corresponding deregister method, since clean server shutdown is // not currently implemented for any server type. - static void Register(const string& target, Master* master, + static void Register(const std::string& target, Master* master, int64_t default_timeout_in_ms); // Returns a pointer to the local master associated with the given // `target`, or nullptr if none exists. - static std::unique_ptr Lookup(const string& target); + static std::unique_ptr Lookup(const std::string& target); private: Master* master_impl_; // Not owned. diff --git a/tensorflow/core/distributed_runtime/master.cc b/tensorflow/core/distributed_runtime/master.cc index 9a2c553f841faf..bc7fa3c80bb678 100644 --- a/tensorflow/core/distributed_runtime/master.cc +++ b/tensorflow/core/distributed_runtime/master.cc @@ -102,7 +102,7 @@ void Master::GC() { if (shutdown_) { break; } - std::vector handles; + std::vector handles; const int64_t num_micros = static_cast(session_gc_seconds_ * 1000000); for (const auto& entry : sessions_) { @@ -124,7 +124,7 @@ void Master::GC() { } } -MasterSession* Master::FindMasterSession(const string& handle) { +MasterSession* Master::FindMasterSession(const std::string& handle) { MasterSession* session = nullptr; { mutex_lock l(mu_); @@ -139,8 +139,8 @@ MasterSession* Master::FindMasterSession(const string& handle) { class DeviceFinder { public: static absl::Status GetRemoteDevices( - const protobuf::RepeatedPtrField& device_filters, MasterEnv* env, - WorkerCacheInterface* worker_cache, + const protobuf::RepeatedPtrField& device_filters, + MasterEnv* env, WorkerCacheInterface* worker_cache, std::vector>* out_remote) { DeviceFinder finder(device_filters, env, worker_cache); finder.Start(); @@ -150,19 +150,20 @@ class DeviceFinder { } static void GetRemoteWorkers( - const protobuf::RepeatedPtrField& device_filters, MasterEnv* env, - WorkerCacheInterface* worker_cache, std::vector* workers) { + const protobuf::RepeatedPtrField& device_filters, + MasterEnv* env, WorkerCacheInterface* worker_cache, + std::vector* workers) { DeviceFinder finder(device_filters, env, worker_cache); *workers = finder.targets_; } private: explicit DeviceFinder( - const protobuf::RepeatedPtrField& device_filters, MasterEnv* env, - WorkerCacheInterface* worker_cache) + const protobuf::RepeatedPtrField& device_filters, + MasterEnv* env, WorkerCacheInterface* worker_cache) : env_(env), worker_cache_(worker_cache) { CHECK(worker_cache) << "Worker cache was null!"; - auto process_filter = [this](const string& filter) { + auto process_filter = [this](const std::string& filter) { DeviceNameUtils::ParsedName parsed; if (DeviceNameUtils::ParseFullName(filter, &parsed)) { filters_.push_back(parsed); @@ -170,7 +171,7 @@ class DeviceFinder { LOG(FATAL) << "Skipping invalid filter: " << filter; } }; - for (const string& filter : device_filters) { + for (const std::string& filter : device_filters) { process_filter(filter); } // Enumerates all known workers' target. A target name is a @@ -178,19 +179,19 @@ class DeviceFinder { if (filters_.empty()) { // If no filters were specified, we list all known workers in // `worker_cache`. - std::vector workers; + std::vector workers; worker_cache->ListWorkers(&workers); std::swap(workers, targets_); } else { // When applying filters, we must include the local worker, even if it // does not match any of the filters. CHECK_GT(env_->local_devices.size(), 0) << "No local devices provided."; - const string& local_device_name = env_->local_devices[0]->name(); + const std::string& local_device_name = env_->local_devices[0]->name(); DeviceNameUtils::ParsedName local_parsed_name; CHECK(DeviceNameUtils::ParseFullName(local_device_name, &local_parsed_name)); bool all_filters_have_job = true; - std::unordered_set filter_job_names({local_parsed_name.job}); + std::unordered_set filter_job_names({local_parsed_name.job}); for (const DeviceNameUtils::ParsedName& filter : filters_) { all_filters_have_job = all_filters_have_job && filter.has_job; if (filter.has_job) { @@ -198,14 +199,14 @@ class DeviceFinder { } } - std::vector workers; + std::vector workers; if (all_filters_have_job) { // If all of the device filters have a job specified, then we only need // to list the workers in the jobs named in the filter, because a worker // in any other job would not match any filter. - for (const string& job_name : filter_job_names) { + for (const std::string& job_name : filter_job_names) { VLOG(2) << "Selectively listing workers in job: " << job_name; - std::vector workers_in_job; + std::vector workers_in_job; worker_cache->ListWorkersInJob(job_name, &workers_in_job); workers.insert(workers.end(), workers_in_job.begin(), workers_in_job.end()); @@ -218,13 +219,13 @@ class DeviceFinder { if (device_filters.empty()) { VLOG(2) << "- "; } else { - for (const string& filter : device_filters) { + for (const std::string& filter : device_filters) { VLOG(2) << "- " << filter; } } worker_cache->ListWorkers(&workers); } - for (const string& name : workers) { + for (const std::string& name : workers) { if (MatchFilters(name) || DeviceNameUtils::IsSameAddressSpace(name, local_device_name)) { targets_.push_back(name); @@ -263,7 +264,7 @@ class DeviceFinder { // Every `kLoggingPeriodMs`, while the DeviceFinder is still waiting // to hear from workers, log a list of the workers who have not // responded. - const int32 kLoggingPeriodMs = 10 * 1000; + const int32_t kLoggingPeriodMs = 10 * 1000; absl::Status Wait() { mutex_lock l(mu_); @@ -287,11 +288,11 @@ class DeviceFinder { // The caller takes the ownership of returned remote devices. void GetRemoteDevices(const std::vector& local, std::vector>* remote) { - std::unordered_set names(local.size()); + std::unordered_set names(local.size()); for (Device* dev : local) names.insert(dev->name()); mutex_lock l(mu_); for (Device* dev : found_) { - const string& name = dev->name(); + const std::string& name = dev->name(); if (names.insert(name).second && MatchFilters(name)) { remote->push_back(std::unique_ptr(dev)); } else { @@ -313,7 +314,7 @@ class DeviceFinder { // List of targets to be contacted by this DeviceFinder. The // respective `bool` in `seen_targets_` indicates whether we have // heard from this target or not. - std::vector targets_; + std::vector targets_; std::vector seen_targets_ TF_GUARDED_BY(mu_); absl::Status status_; @@ -347,7 +348,7 @@ class DeviceFinder { } // Returns true iff 'name' matches one of the filters_. - bool MatchFilters(const string& name) { + bool MatchFilters(const std::string& name) { if (filters_.empty()) return true; DeviceNameUtils::ParsedName x; if (DeviceNameUtils::ParseFullName(name, &x)) { @@ -386,7 +387,7 @@ void Master::CreateSession(const CreateSessionRequest* req, if (!cluster_def.job().empty()) { worker_cache_factory_options.cluster_def = cluster_def; // If the target starts with gRPC protocol prefix, remove the prefix - string normalized_string(req->target()); + std::string normalized_string(req->target()); RE2::Replace(&normalized_string, kGrpcPrefixRegex, ""); // Set the server_def's job_name and task_index fields. @@ -472,7 +473,7 @@ void Master::CreateSession(const CreateSessionRequest* req, options.config.mutable_experimental() ->set_disable_optimize_for_static_graph(true); - std::vector filtered_worker_list; + std::vector filtered_worker_list; DeviceFinder::GetRemoteWorkers(req->config().device_filters(), env_, worker_cache, &filtered_worker_list); @@ -555,7 +556,7 @@ void Master::RunStep(CallOptions* opts, const RunStepRequestWrapper* req, SchedClosure([this, start_time, session, opts, req, resp, done]() { absl::Status status = session->Run(opts, *req, resp); session->Unref(); - uint64 done_time = env_->env->NowMicros(); + uint64_t done_time = env_->env->NowMicros(); done(status); mutex_lock l(mu_); last_1000_steps_.AddValue((done_time - start_time) / 1e9); @@ -624,7 +625,7 @@ void Master::ListDevices(const ListDevicesRequest* req, } void Master::CleanupWorkers(const ResetRequest& reset) { - std::vector worker_names; + std::vector worker_names; DeviceFinder::GetRemoteWorkers(reset.device_filters(), env_, env_->worker_cache, &worker_names); if (!worker_names.empty()) { @@ -635,7 +636,7 @@ void Master::CleanupWorkers(const ResetRequest& reset) { std::vector resp(num_workers); int c = 0; for (int i = 0; i < num_workers; ++i) { - const string& worker_name = worker_names[i]; + const std::string& worker_name = worker_names[i]; auto worker = env_->worker_cache->GetOrCreateWorker(worker_name); if (worker) { worker->CleanupAllAsync( diff --git a/tensorflow/core/distributed_runtime/master.h b/tensorflow/core/distributed_runtime/master.h index a3930249b629ee..f39fd34d0a5900 100644 --- a/tensorflow/core/distributed_runtime/master.h +++ b/tensorflow/core/distributed_runtime/master.h @@ -84,7 +84,7 @@ class Master { Thread* gc_thread_; // Maps session handles to sessions. - std::unordered_map sessions_ TF_GUARDED_BY(mu_); + std::unordered_map sessions_ TF_GUARDED_BY(mu_); // Moving average of step times. MovingAverage last_1000_steps_ TF_GUARDED_BY(mu_); @@ -107,7 +107,7 @@ class Master { // Find master session by session handle, and increments the reference count // on the returned MasterSession if not null. - MasterSession* FindMasterSession(const string& handle); + MasterSession* FindMasterSession(const std::string& handle); Master(const Master&) = delete; void operator=(const Master&) = delete; diff --git a/tensorflow/core/distributed_runtime/master_env.h b/tensorflow/core/distributed_runtime/master_env.h index b8dcf1963df50d..5845a96836f913 100644 --- a/tensorflow/core/distributed_runtime/master_env.h +++ b/tensorflow/core/distributed_runtime/master_env.h @@ -41,7 +41,7 @@ class OpRegistryInterface; // Options passed to the worker_cache_factory function. struct WorkerCacheFactoryOptions { ClusterDef cluster_def; - string job_name; + std::string job_name; int task_index; int replica_index = 0; RPCOptions rpc_options; @@ -96,7 +96,7 @@ struct MasterEnv { std::unique_ptr>>, std::unique_ptr, std::unique_ptr device_set, - std::vector filtered_worker_list)> + std::vector filtered_worker_list)> master_session_factory; std::function client_graph, const SessionOptions& session_opts, const StatsPublisherFactory& stats_publisher_factory, @@ -122,7 +122,7 @@ class MasterSession::ReffedClientGraph : public core::RefCounted { int64_t collective_graph_key() { return collective_graph_key_; } - std::unique_ptr GetProfileHandler(uint64 step, + std::unique_ptr GetProfileHandler(uint64_t step, int64_t execution_count, const RunOptions& ropts) { return stats_publisher_->GetProfileHandler(step, execution_count, ropts); @@ -239,7 +239,7 @@ class MasterSession::ReffedClientGraph : public core::RefCounted { GraphExecutionState* execution_state); private: - const string session_handle_; + const std::string session_handle_; const BuildGraphOptions bg_opts_; // NOTE(mrry): This pointer will be null after `RegisterPartitions()` returns. @@ -250,13 +250,13 @@ class MasterSession::ReffedClientGraph : public core::RefCounted { WorkerCacheInterface* const worker_cache_; // Not owned. struct NodeDetails { - explicit NodeDetails(string type_string, string detail_text) + explicit NodeDetails(std::string type_string, std::string detail_text) : type_string(std::move(type_string)), detail_text(std::move(detail_text)) {} - const string type_string; - const string detail_text; + const std::string type_string; + const std::string detail_text; }; - std::unordered_map name_to_node_details_; + std::unordered_map name_to_node_details_; const bool should_deregister_; const int64_t collective_graph_key_; @@ -265,20 +265,20 @@ class MasterSession::ReffedClientGraph : public core::RefCounted { // Graph partitioned into per-location subgraphs. struct Part { // Worker name. - string name; + std::string name; // Maps feed names to rendezvous keys. Empty most of the time. - std::unordered_map feed_key; + std::unordered_map feed_key; // Maps rendezvous keys to fetch names. Empty most of the time. - std::unordered_map key_fetch; + std::unordered_map key_fetch; // The interface to the worker. Owned. WorkerInterface* worker = nullptr; // After registration with the worker, graph_handle identifies // this partition on the worker. - string graph_handle; + std::string graph_handle; Part() : feed_key(3), key_fetch(3) {} }; @@ -300,14 +300,15 @@ class MasterSession::ReffedClientGraph : public core::RefCounted { std::unique_ptr stats_publisher_; - string DetailText(const NodeDetails& details, const NodeExecStats& stats) { + std::string DetailText(const NodeDetails& details, + const NodeExecStats& stats) { int64_t tot = 0; for (auto& no : stats.output()) { tot += no.tensor_description().allocation_description().requested_bytes(); } - string bytes; + std::string bytes; if (tot >= 0.1 * 1048576.0) { - bytes = strings::Printf("[%.1fMB] ", tot / 1048576.0); + bytes = absl::StrFormat("[%.1fMB] ", tot / 1048576.0); } return strings::StrCat(bytes, stats.node_name(), " = ", details.type_string, details.detail_text); @@ -322,10 +323,10 @@ class MasterSession::ReffedClientGraph : public core::RefCounted { // The actual graph partitioning and registration implementation. absl::Status DoBuildPartitions( PartitionOptions popts, ClientGraph* client_graph, - std::unordered_map* out_partitions); + std::unordered_map* out_partitions); absl::Status DoRegisterPartitions( const PartitionOptions& popts, - std::unordered_map graph_partitions); + std::unordered_map graph_partitions); // Prepares a number of calls to workers. One call per partition. // This is a generic method that handles Run, PartialRun, and RunCallable. @@ -359,7 +360,7 @@ absl::Status MasterSession::ReffedClientGraph::RegisterPartitions( std::unique_ptr client_graph; std::swap(client_graph_before_register_, client_graph); mu_.unlock(); - std::unordered_map graph_defs; + std::unordered_map graph_defs; popts.flib_def = client_graph->flib_def.get(); absl::Status s = DoBuildPartitions(popts, client_graph.get(), &graph_defs); @@ -390,9 +391,9 @@ absl::Status MasterSession::ReffedClientGraph::RegisterPartitions( } } -static string SplitByWorker(const Node* node) { - string task; - string device; +static std::string SplitByWorker(const Node* node) { + std::string task; + std::string device; CHECK(DeviceNameUtils::SplitDeviceName(node->assigned_device_name(), &task, &device)) << "node: " << node->name() << " dev: " << node->assigned_device_name(); @@ -413,17 +414,17 @@ void MasterSession::ReffedClientGraph::TrackFeedsAndFetches( bool client_terminated; TF_CHECK_OK(GetNodeAttr(ndef, "client_terminated", &client_terminated)); if (client_terminated) { - string name; + std::string name; TF_CHECK_OK(GetNodeAttr(ndef, "tensor_name", &name)); - string send_device; + std::string send_device; TF_CHECK_OK(GetNodeAttr(ndef, "send_device", &send_device)); - string recv_device; + std::string recv_device; TF_CHECK_OK(GetNodeAttr(ndef, "recv_device", &recv_device)); - uint64 send_device_incarnation; + uint64_t send_device_incarnation; TF_CHECK_OK( GetNodeAttr(ndef, "send_device_incarnation", reinterpret_cast(&send_device_incarnation))); - const string& key = + const std::string& key = Rendezvous::CreateKey(send_device, send_device_incarnation, recv_device, name, FrameAndIter(0, 0)); @@ -439,7 +440,7 @@ void MasterSession::ReffedClientGraph::TrackFeedsAndFetches( absl::Status MasterSession::ReffedClientGraph::DoBuildPartitions( PartitionOptions popts, ClientGraph* client_graph, - std::unordered_map* out_partitions) { + std::unordered_map* out_partitions) { if (popts.need_to_record_start_times) { CostModel cost_model(true); cost_model.InitFromGraph(client_graph->graph); @@ -455,7 +456,7 @@ absl::Status MasterSession::ReffedClientGraph::DoBuildPartitions( absl::Status MasterSession::ReffedClientGraph::DoRegisterPartitions( const PartitionOptions& popts, - std::unordered_map graph_partitions) { + std::unordered_map graph_partitions) { partitions_.reserve(graph_partitions.size()); absl::Status s; for (auto& name_def : graph_partitions) { @@ -524,7 +525,7 @@ class RunManyGraphs { // Returns the index-th call. struct Call { CallOptions opts; - const string* worker_name; + const std::string* worker_name; std::atomic done{false}; std::unique_ptr req; std::unique_ptr resp; @@ -625,13 +626,15 @@ class RunManyGraphs { absl::Status AddSendFromClientRequest(const RunStepRequestWrapper& client_req, MutableRunGraphRequestWrapper* worker_req, - size_t index, const string& send_key) { + size_t index, + const std::string& send_key) { return worker_req->AddSendFromRunStepRequest(client_req, index, send_key); } absl::Status AddSendFromClientRequest(const RunCallableRequest& client_req, MutableRunGraphRequestWrapper* worker_req, - size_t index, const string& send_key) { + size_t index, + const std::string& send_key) { return worker_req->AddSendFromRunCallableRequest(client_req, index, send_key); } @@ -639,13 +642,13 @@ absl::Status AddSendFromClientRequest(const RunCallableRequest& client_req, // in-process messages. struct RunCallableResponseWrapper { RunCallableResponse* resp; // Not owned. - std::unordered_map fetch_key_to_protos; + std::unordered_map fetch_key_to_protos; RunMetadata* mutable_metadata() { return resp->mutable_metadata(); } absl::Status AddTensorFromRunGraphResponse( - const string& tensor_name, MutableRunGraphResponseWrapper* worker_resp, - size_t index) { + const std::string& tensor_name, + MutableRunGraphResponseWrapper* worker_resp, size_t index) { return worker_resp->RecvValue(index, &fetch_key_to_protos[tensor_name]); } }; @@ -709,18 +712,18 @@ absl::Status MasterSession::ReffedClientGraph::RunPartitionsHelper( // inadvertently slowing down the normal run path. if (is_partial_) { for (const auto& name_index : feeds) { - const auto iter = part.feed_key.find(string(name_index.first)); + const auto iter = part.feed_key.find(std::string(name_index.first)); if (iter == part.feed_key.end()) { // The provided feed must be for a different partition. continue; } - const string& key = iter->second; + const std::string& key = iter->second; TF_RETURN_IF_ERROR(AddSendFromClientRequest(req, c->req.get(), name_index.second, key)); } // TODO(suharshs): Make a map from feed to fetch_key to make this faster. // For now, we just iterate through partitions to find the matching key. - for (const string& req_fetch : fetches) { + for (const std::string& req_fetch : fetches) { for (const auto& key_fetch : part.key_fetch) { if (key_fetch.second == req_fetch) { c->req->add_recv_key(key_fetch.first); @@ -730,8 +733,8 @@ absl::Status MasterSession::ReffedClientGraph::RunPartitionsHelper( } } else { for (const auto& feed_key : part.feed_key) { - const string& feed = feed_key.first; - const string& key = feed_key.second; + const std::string& feed = feed_key.first; + const std::string& key = feed_key.second; auto iter = feeds.find(feed); if (iter == feeds.end()) { return errors::Internal("No feed index found for feed: ", feed); @@ -741,7 +744,7 @@ absl::Status MasterSession::ReffedClientGraph::RunPartitionsHelper( AddSendFromClientRequest(req, c->req.get(), feed_index, key)); } for (const auto& key_fetch : part.key_fetch) { - const string& key = key_fetch.first; + const std::string& key = key_fetch.first; c->req->add_recv_key(key); } } @@ -790,7 +793,7 @@ absl::Status MasterSession::ReffedClientGraph::RunPartitionsHelper( run_graph_resp->recv_key(j))); break; } - const string& fetch = iter->second; + const std::string& fetch = iter->second; status.Update( resp->AddTensorFromRunGraphResponse(fetch, run_graph_resp, j)); if (!status.ok()) { @@ -834,7 +837,7 @@ absl::Status MasterSession::ReffedClientGraph::RunPartitions( } } - std::vector fetches; + std::vector fetches; fetches.reserve(req.num_fetches()); for (size_t i = 0; i < req.num_fetches(); ++i) { fetches.push_back(req.fetch_name(i)); @@ -870,7 +873,7 @@ absl::Status MasterSession::ReffedClientGraph::RunPartitions( call_opts, req, &wrapped_resp, cm, false /* is_last_partial_run */)); // Collects fetches. - for (const string& fetch : callable_opts_.fetch()) { + for (const std::string& fetch : callable_opts_.fetch()) { TensorProto* fetch_proto = resp->mutable_fetch()->Add(); auto iter = wrapped_resp.fetch_key_to_protos.find(fetch); if (iter == wrapped_resp.fetch_key_to_protos.end()) { @@ -1001,7 +1004,7 @@ void MasterSession::ReffedClientGraph::ProcessStats(int64_t step_id, void MasterSession::ReffedClientGraph::ProcessDeviceStats( ProfileHandler* ph, const DeviceStepStats& ds, bool is_rpc) { - const string& dev_name = ds.device(); + const std::string& dev_name = ds.device(); VLOG(1) << "Device " << dev_name << " reports stats for " << ds.node_stats_size() << " nodes"; for (const auto& ns : ds.node_stats()) { @@ -1026,9 +1029,9 @@ void MasterSession::ReffedClientGraph::ProcessDeviceStats( } continue; } - const string& optype = + const std::string& optype = found_node_in_graph ? iter->second.type_string : ns.node_name(); - string details; + std::string details; if (!ns.timeline_label().empty()) { details = ns.timeline_label(); } else if (found_node_in_graph) { @@ -1055,7 +1058,7 @@ absl::Status MasterSession::ReffedClientGraph::CheckFetches( // Skip if already fed. if (input.second) continue; TensorId id(ParseTensorName(input.first)); - const Node* n = execution_state->get_node_by_name(string(id.first)); + const Node* n = execution_state->get_node_by_name(std::string(id.first)); if (n == nullptr) { return errors::NotFound("Feed ", input.first, ": not found"); } @@ -1069,9 +1072,9 @@ absl::Status MasterSession::ReffedClientGraph::CheckFetches( // Initialize the stack with the fetch nodes. std::vector stack; for (size_t i = 0; i < req.num_fetches(); ++i) { - const string& fetch = req.fetch_name(i); + const std::string& fetch = req.fetch_name(i); const TensorId id(ParseTensorName(fetch)); - const Node* n = execution_state->get_node_by_name(string(id.first)); + const Node* n = execution_state->get_node_by_name(std::string(id.first)); if (n == nullptr) { return errors::NotFound("Fetch ", fetch, ": not found"); } @@ -1120,7 +1123,7 @@ void MasterSession::ReffedClientGraph::DeregisterPartitions() { // NOTE(mrry): We must capture `worker_cache_` since `this` // could be deleted before the callback is called. WorkerCacheInterface* worker_cache = worker_cache_; - const string name = part.name; + const std::string name = part.name; WorkerInterface* w = part.worker; CHECK_NOTNULL(w); auto cb = [worker_cache, c, name, w](const absl::Status& s) { @@ -1138,10 +1141,10 @@ void MasterSession::ReffedClientGraph::DeregisterPartitions() { } namespace { -void CopyAndSortStrings(size_t size, - const std::function& input_accessor, - protobuf::RepeatedPtrField* output) { - std::vector temp; +void CopyAndSortStrings( + size_t size, const std::function& input_accessor, + protobuf::RepeatedPtrField* output) { + std::vector temp; temp.reserve(size); for (size_t i = 0; i < size; ++i) { output->Add(input_accessor(i)); @@ -1194,22 +1197,22 @@ void BuildBuildGraphOptions(const PartialRunSetupRequest& req, // TODO(cais): Add TFDBG support to partial runs. } -uint64 HashBuildGraphOptions(const BuildGraphOptions& opts) { - uint64 h = 0x2b992ddfa23249d6ull; - for (const string& name : opts.callable_options.feed()) { +uint64_t HashBuildGraphOptions(const BuildGraphOptions& opts) { + uint64_t h = 0x2b992ddfa23249d6ull; + for (const std::string& name : opts.callable_options.feed()) { h = Hash64(name.c_str(), name.size(), h); } - for (const string& name : opts.callable_options.target()) { + for (const std::string& name : opts.callable_options.target()) { h = Hash64(name.c_str(), name.size(), h); } - for (const string& name : opts.callable_options.fetch()) { + for (const std::string& name : opts.callable_options.fetch()) { h = Hash64(name.c_str(), name.size(), h); } const DebugOptions& debug_options = opts.callable_options.run_options().debug_options(); if (!debug_options.debug_tensor_watch_opts().empty()) { - const string watch_summary = + const std::string watch_summary = SummarizeDebugTensorWatches(debug_options.debug_tensor_watch_opts()); h = Hash64(watch_summary.c_str(), watch_summary.size(), h); } @@ -1217,17 +1220,17 @@ uint64 HashBuildGraphOptions(const BuildGraphOptions& opts) { return h; } -string BuildGraphOptionsString(const BuildGraphOptions& opts) { - string buf; - for (const string& name : opts.callable_options.feed()) { +std::string BuildGraphOptionsString(const BuildGraphOptions& opts) { + std::string buf; + for (const std::string& name : opts.callable_options.feed()) { absl::StrAppend(&buf, " FdE: ", name); } absl::StrAppend(&buf, "\n"); - for (const string& name : opts.callable_options.target()) { + for (const std::string& name : opts.callable_options.target()) { absl::StrAppend(&buf, " TN: ", name); } absl::StrAppend(&buf, "\n"); - for (const string& name : opts.callable_options.fetch()) { + for (const std::string& name : opts.callable_options.fetch()) { absl::StrAppend(&buf, " FeE: ", name); } if (opts.collective_graph_key != BuildGraphOptions::kNoCollectiveGraphKey) { @@ -1242,7 +1245,7 @@ MasterSession::MasterSession( std::unique_ptr>> remote_devs, std::unique_ptr worker_cache, std::unique_ptr device_set, - std::vector filtered_worker_list, + std::vector filtered_worker_list, StatsPublisherFactory stats_publisher_factory) : session_opts_(opt), env_(env), @@ -1301,12 +1304,12 @@ absl::Status MasterSession::Create(GraphDef&& graph_def, absl::Status MasterSession::CreateWorkerSessions( const ClusterDef& cluster_def) { - const std::vector worker_names = filtered_worker_list_; + const std::vector worker_names = filtered_worker_list_; WorkerCacheInterface* worker_cache = get_worker_cache(); struct WorkerGroup { // The worker name. (Not owned.) - const string* name; + const std::string* name; // The worker referenced by name. (Not owned.) WorkerInterface* worker = nullptr; @@ -1328,8 +1331,8 @@ absl::Status MasterSession::CreateWorkerSessions( } }); - string task_name; - string local_device_name; + std::string task_name; + std::string local_device_name; DeviceNameUtils::SplitDeviceName(devices_->client_device()->name(), &task_name, &local_device_name); const int64_t client_device_incarnation = @@ -1435,11 +1438,11 @@ absl::Status MasterSession::CreateWorkerSessions( absl::Status MasterSession::DeleteWorkerSessions() { WorkerCacheInterface* worker_cache = get_worker_cache(); - const std::vector& worker_names = filtered_worker_list_; + const std::vector& worker_names = filtered_worker_list_; struct WorkerGroup { // The worker name. (Not owned.) - const string* name; + const std::string* name; // The worker referenced by name. (Not owned.) WorkerInterface* worker = nullptr; @@ -1554,7 +1557,7 @@ absl::Status MasterSession::StartStep(const BuildGraphOptions& opts, bool is_partial, ReffedClientGraph** out_rcg, int64_t* out_count) { - const uint64 hash = HashBuildGraphOptions(opts); + const uint64_t hash = HashBuildGraphOptions(opts); { mutex_lock l(mu_); // TODO(suharshs): We cache partial run graphs and run graphs separately @@ -1599,12 +1602,12 @@ void MasterSession::ClearRunsTable(std::vector* to_unref, rcg_map->clear(); } -uint64 MasterSession::NewStepId(int64_t graph_key) { +uint64_t MasterSession::NewStepId(int64_t graph_key) { if (graph_key == BuildGraphOptions::kNoCollectiveGraphKey) { // StepId must leave the most-significant 7 bits empty for future use. return random::New64() & (((1uLL << 56) - 1) | (1uLL << 56)); } else { - uint64 step_id = env_->collective_executor_mgr->NextStepId(graph_key); + uint64_t step_id = env_->collective_executor_mgr->NextStepId(graph_key); int32_t retry_count = 0; while (static_cast(step_id) == CollectiveExecutor::kInvalidId) { absl::Notification note; @@ -1631,7 +1634,7 @@ uint64 MasterSession::NewStepId(int64_t graph_key) { absl::Status MasterSession::PartialRunSetup(const PartialRunSetupRequest* req, PartialRunSetupResponse* resp) { - std::vector inputs, outputs, targets; + std::vector inputs, outputs, targets; for (const auto& feed : req->feed()) { inputs.push_back(feed); } @@ -1642,7 +1645,7 @@ absl::Status MasterSession::PartialRunSetup(const PartialRunSetupRequest* req, targets.push_back(target); } - string handle = std::to_string(partial_run_handle_counter_.fetch_add(1)); + std::string handle = std::to_string(partial_run_handle_counter_.fetch_add(1)); ReffedClientGraph* rcg = nullptr; @@ -1706,11 +1709,11 @@ absl::Status MasterSession::BuildAndRegisterPartitions(ReffedClientGraph* rcg) { // The closures popts.{new_name,get_incarnation} are called synchronously in // RegisterPartitions() below, so do not need a Ref()/Unref() pair to keep // "this" alive during the closure. - popts.new_name = [this](const string& prefix) { + popts.new_name = [this](const std::string& prefix) { mutex_lock l(mu_); return absl::StrCat(prefix, "_S", next_node_id_++); }; - popts.get_incarnation = [this](const string& name) -> int64 { + popts.get_incarnation = [this](const std::string& name) -> int64_t { Device* d = devices_->FindDeviceByName(name); if (d == nullptr) { return PartitionOptions::kIllegalIncarnation; @@ -1746,7 +1749,7 @@ absl::Status MasterSession::DoPartialRun(CallOptions* opts, const RunStepRequestWrapper& req, MutableRunStepResponseWrapper* resp) { auto cleanup = gtl::MakeCleanup([this] { MarkRunCompletion(); }); - const string& prun_handle = req.partial_run_handle(); + const std::string& prun_handle = req.partial_run_handle(); RunState* run_state = nullptr; { mutex_lock l(mu_); @@ -1802,7 +1805,7 @@ absl::Status MasterSession::DoPartialRun(CallOptions* opts, // Make sure that this is a new set of feeds that are still pending. for (size_t i = 0; i < req.num_feeds(); ++i) { - const string& feed = req.feed_name(i); + const std::string& feed = req.feed_name(i); auto it = run_state->pending_inputs.find(feed); if (it == run_state->pending_inputs.end()) { return errors::InvalidArgument( @@ -1814,7 +1817,7 @@ absl::Status MasterSession::DoPartialRun(CallOptions* opts, } // Check that this is a new set of fetches that are still pending. for (size_t i = 0; i < req.num_fetches(); ++i) { - const string& fetch = req.fetch_name(i); + const std::string& fetch = req.fetch_name(i); auto it = run_state->pending_outputs.find(fetch); if (it == run_state->pending_outputs.end()) { return errors::InvalidArgument( @@ -1879,17 +1882,17 @@ absl::Status MasterSession::CreateDebuggerState( TF_RETURN_IF_ERROR( DebuggerStateRegistry::CreateState(debug_options, debugger_state)); - std::vector input_names; + std::vector input_names; input_names.reserve(req.num_feeds()); for (size_t i = 0; i < req.num_feeds(); ++i) { input_names.push_back(req.feed_name(i)); } - std::vector output_names; + std::vector output_names; output_names.reserve(req.num_fetches()); for (size_t i = 0; i < req.num_fetches(); ++i) { output_names.push_back(req.fetch_name(i)); } - std::vector target_names; + std::vector target_names; target_names.reserve(req.num_targets()); for (size_t i = 0; i < req.num_targets(); ++i) { target_names.push_back(req.target_name(i)); @@ -1908,7 +1911,7 @@ absl::Status MasterSession::CreateDebuggerState( void MasterSession::FillPerStepState(MasterSession::ReffedClientGraph* rcg, const RunOptions& run_options, - uint64 step_id, int64_t count, + uint64_t step_id, int64_t count, PerStepState* out_pss, std::unique_ptr* out_ph) { out_pss->collect_timeline = @@ -1935,7 +1938,7 @@ void MasterSession::FillPerStepState(MasterSession::ReffedClientGraph* rcg, } absl::Status MasterSession::PostRunCleanup( - MasterSession::ReffedClientGraph* rcg, uint64 step_id, + MasterSession::ReffedClientGraph* rcg, uint64_t step_id, const RunOptions& run_options, PerStepState* pss, const std::unique_ptr& ph, const absl::Status& run_status, RunMetadata* out_run_metadata) { @@ -2004,7 +2007,7 @@ absl::Status MasterSession::DoRunWithLocalExecution( // Keeps the highest 8 bits 0x01: we reserve some bits of the // step_id for future use. - uint64 step_id = NewStepId(rcg->collective_graph_key()); + uint64_t step_id = NewStepId(rcg->collective_graph_key()); TRACEPRINTF("stepid %llu", step_id); std::unique_ptr ph; @@ -2054,7 +2057,7 @@ absl::Status MasterSession::MakeCallable(const MakeCallableRequest& req, return s; } - uint64 handle; + uint64_t handle; { mutex_lock l(mu_); handle = next_callable_handle_++; @@ -2077,7 +2080,7 @@ absl::Status MasterSession::DoRunCallable(CallOptions* opts, // Prepare. int64_t count = rcg->get_and_increment_execution_count(); - const uint64 step_id = NewStepId(rcg->collective_graph_key()); + const uint64_t step_id = NewStepId(rcg->collective_graph_key()); TRACEPRINTF("stepid %llu", step_id); const RunOptions& run_options = rcg->callable_options().run_options(); @@ -2176,10 +2179,10 @@ void MasterSession::GarbageCollect() { Unref(); } -MasterSession::RunState::RunState(const std::vector& input_names, - const std::vector& output_names, - ReffedClientGraph* rcg, const uint64 step_id, - const int64_t count) +MasterSession::RunState::RunState(const std::vector& input_names, + const std::vector& output_names, + ReffedClientGraph* rcg, + const uint64_t step_id, const int64_t count) : rcg(rcg), step_id(step_id), count(count) { // Initially all the feeds and fetches are pending. for (auto& name : input_names) { diff --git a/tensorflow/core/distributed_runtime/master_session.h b/tensorflow/core/distributed_runtime/master_session.h index f7016518bca5a9..b22953547b8f7c 100644 --- a/tensorflow/core/distributed_runtime/master_session.h +++ b/tensorflow/core/distributed_runtime/master_session.h @@ -52,7 +52,7 @@ class MasterSession : public core::RefCounted { std::unique_ptr>> remote_devs, std::unique_ptr worker_cache, std::unique_ptr device_set, - std::vector filtered_worker_list, + std::vector filtered_worker_list, StatsPublisherFactory stats_publisher_factory); // Initialize the MasterSession for "def". Must be called before Extend(), @@ -60,11 +60,13 @@ class MasterSession : public core::RefCounted { absl::Status Create(GraphDef&& def, const ClusterDef& cluster_def); // Returns the session handle. - const string& handle() const { return handle_; } + const std::string& handle() const { return handle_; } // Returns the last access time (the number of micro-seconds since // some fixed point in time) of this session. - uint64 last_access_time_usec() const { return last_access_time_usec_.load(); } + uint64_t last_access_time_usec() const { + return last_access_time_usec_.load(); + } // Attempt to extend the graph according to the given "req". // (See master.proto for details of valid extensions.) @@ -117,7 +119,7 @@ class MasterSession : public core::RefCounted { const MasterEnv* env_; // The opaque session handle. - const string handle_; + const std::string handle_; std::unique_ptr>> remote_devs_; @@ -132,7 +134,7 @@ class MasterSession : public core::RefCounted { // The (partial device) names of remote worker tasks that this // session will contact. - const std::vector filtered_worker_list_; + const std::vector filtered_worker_list_; StatsPublisherFactory stats_publisher_factory_; @@ -140,7 +142,7 @@ class MasterSession : public core::RefCounted { std::atomic partial_run_handle_counter_ = {0}; - uint64 NewStepId(int64_t graph_key); + uint64_t NewStepId(int64_t graph_key); mutex mu_; std::unique_ptr execution_state_ TF_GUARDED_BY(mu_); @@ -152,7 +154,7 @@ class MasterSession : public core::RefCounted { // before a new substitute has been created, Variables can go out of // scope and lose their state. class ReffedClientGraph; - typedef std::unordered_map RCGMap; + typedef std::unordered_map RCGMap; RCGMap run_graphs_ TF_GUARDED_BY(mu_); RCGMap partial_run_graphs_ TF_GUARDED_BY(mu_); int64_t next_callable_handle_ TF_GUARDED_BY(mu_) = 0; @@ -172,35 +174,36 @@ class MasterSession : public core::RefCounted { }; struct RunState { - std::unordered_map pending_inputs; // true if fed - std::unordered_map pending_outputs; // true if fetched + std::unordered_map pending_inputs; // true if fed + std::unordered_map pending_outputs; // true if fetched ReffedClientGraph* rcg = nullptr; - uint64 step_id; + uint64_t step_id; int64_t collective_graph_key; int64_t count = 0; PerStepState pss; std::unique_ptr ph; bool step_started = false; - RunState(const std::vector& input_names, - const std::vector& output_names, ReffedClientGraph* rcg, - const uint64 step_id, const int64_t count); + RunState(const std::vector& input_names, + const std::vector& output_names, + ReffedClientGraph* rcg, const uint64_t step_id, + const int64_t count); bool PendingDone() const; ~RunState(); }; - std::unordered_map> partial_runs_ + std::unordered_map> partial_runs_ TF_GUARDED_BY(mu_); // Active RunStep calls. condition_variable num_running_is_zero_; - int32 num_running_ TF_GUARDED_BY(mu_) = 0; + int32_t num_running_ TF_GUARDED_BY(mu_) = 0; bool closed_ TF_GUARDED_BY(mu_) = false; bool garbage_collected_ TF_GUARDED_BY(mu_) = false; - std::unordered_map subgraph_execution_counts_ + std::unordered_map subgraph_execution_counts_ TF_GUARDED_BY(mu_); // We need to ensure that certain nodes added (e.g., send and recv @@ -228,7 +231,7 @@ class MasterSession : public core::RefCounted { void ClearRunsTable(std::vector* to_unref, RCGMap* rcg_map) TF_EXCLUSIVE_LOCKS_REQUIRED(mu_); void FillPerStepState(MasterSession::ReffedClientGraph* rcg, - const RunOptions& run_options, uint64 step_id, + const RunOptions& run_options, uint64_t step_id, int64_t count, PerStepState* out_pss, std::unique_ptr* out_ph); absl::Status DoRunWithLocalExecution(CallOptions* opts, @@ -240,7 +243,7 @@ class MasterSession : public core::RefCounted { const RunCallableRequest& req, RunCallableResponse* resp); absl::Status PostRunCleanup(MasterSession::ReffedClientGraph* rcg, - uint64 step_id, const RunOptions& run_options, + uint64_t step_id, const RunOptions& run_options, PerStepState* pss, const std::unique_ptr& ph, const absl::Status& run_status, diff --git a/tensorflow/core/distributed_runtime/master_test.cc b/tensorflow/core/distributed_runtime/master_test.cc index ed6461c63b07ac..8269f1dca201cd 100644 --- a/tensorflow/core/distributed_runtime/master_test.cc +++ b/tensorflow/core/distributed_runtime/master_test.cc @@ -44,7 +44,7 @@ namespace tensorflow { class MasterTest : public ::testing::Test { protected: MasterTest() { - std::vector targets; + std::vector targets; SessionOptions options; (*options.config.mutable_device_count())["CPU"] = 1; (*options.config.mutable_device_count())["GPU"] = 0; @@ -64,7 +64,7 @@ class MasterTest : public ::testing::Test { // Helpers for MasterService.{CreateSession,RunStep,CloseSession} // rpc calls. - absl::Status CreateSession(const GraphDef& def, string* handle, + absl::Status CreateSession(const GraphDef& def, std::string* handle, int64_t* initial_version) { ::grpc::ClientContext ctx; CreateSessionRequest req; @@ -81,7 +81,7 @@ class MasterTest : public ::testing::Test { return s; } - absl::Status ExtendSession(const string& handle, const GraphDef& def, + absl::Status ExtendSession(const std::string& handle, const GraphDef& def, int64_t current_version, int64_t* new_version) { ::grpc::ClientContext ctx; ExtendSessionRequest req; @@ -98,21 +98,21 @@ class MasterTest : public ::testing::Test { } absl::Status RunStep( - const string& handle, - const std::vector >& feed, - const std::map& fetch) { + const std::string& handle, + const std::vector >& feed, + const std::map& fetch) { ::grpc::ClientContext ctx; RunStepRequest req; req.set_session_handle(handle); for (const auto& p : feed) { - const string& feed_name = p.first; + const std::string& feed_name = p.first; const Tensor* feed_tensor = p.second; auto f = req.add_feed(); f->set_name(feed_name); feed_tensor->AsProtoTensorContent(f->mutable_tensor()); } for (const auto& p : fetch) { - const string& fetch_name = p.first; + const std::string& fetch_name = p.first; req.add_fetch(fetch_name); } RunStepResponse resp; @@ -127,7 +127,7 @@ class MasterTest : public ::testing::Test { return s; } - absl::Status CloseSession(const string& handle) { + absl::Status CloseSession(const std::string& handle) { ::grpc::ClientContext ctx; CloseSessionRequest req; req.set_session_handle(handle); @@ -145,7 +145,7 @@ class MasterTest : public ::testing::Test { TEST_F(MasterTest, CreateClose) { GraphDef def; // Empty. - string handle; + std::string handle; int64_t initial_version; TF_ASSERT_OK(CreateSession(def, &handle, &initial_version)); EXPECT_TRUE(absl::IsAborted(CloseSession("randombits"))); @@ -164,7 +164,7 @@ TEST_F(MasterTest, ListDevices) { TEST_F(MasterTest, Reset) { GraphDef def; // Empty. - string s1, s2; + std::string s1, s2; int64_t initial_version1, initial_version2; TF_ASSERT_OK(CreateSession(def, &s1, &initial_version1)); TF_ASSERT_OK(CreateSession(def, &s2, &initial_version2)); @@ -175,7 +175,7 @@ TEST_F(MasterTest, Reset) { TEST_F(MasterTest, Extend) { GraphDef def_0; // Empty. - string handle; + std::string handle; int64_t initial_version; TF_ASSERT_OK(CreateSession(def_0, &handle, &initial_version)); @@ -216,7 +216,7 @@ TEST_F(MasterTest, Extend) { TEST_F(MasterTest, ExtendUpdateStatefulFails) { GraphDef def_0; // Empty. - string handle; + std::string handle; int64_t initial_version; TF_ASSERT_OK(CreateSession(def_0, &handle, &initial_version)); @@ -235,7 +235,7 @@ TEST_F(MasterTest, ExtendUpdateStatefulFails) { TEST_F(MasterTest, ExtendTwiceFails) { GraphDef def_0; // Empty. - string handle; + std::string handle; int64_t initial_version; TF_ASSERT_OK(CreateSession(def_0, &handle, &initial_version)); @@ -254,7 +254,7 @@ TEST_F(MasterTest, ExtendTwiceFails) { TEST_F(MasterTest, ConcurrentExtendOnlyOneSucceeds) { GraphDef def_0; // Empty. - string handle; + std::string handle; int64_t initial_version; TF_ASSERT_OK(CreateSession(def_0, &handle, &initial_version)); @@ -306,7 +306,7 @@ TEST_F(MasterTest, ConcurrentExtendAndRun) { GraphDef def_0; test::graph::ToGraphDef(&graph_0, &def_0); - string handle; + std::string handle; int64_t initial_version; TF_ASSERT_OK(CreateSession(def_0, &handle, &initial_version)); @@ -388,7 +388,7 @@ TEST_F(MasterTest, EigenProblem) { GraphDef def; test::graph::ToGraphDef(&graph, &def); - string handle; + std::string handle; int64_t initial_version; TF_CHECK_OK(CreateSession(def, &handle, &initial_version)); diff --git a/tensorflow/core/distributed_runtime/message_wrappers.cc b/tensorflow/core/distributed_runtime/message_wrappers.cc index 60a264565dbb61..7eabcadcc173bf 100644 --- a/tensorflow/core/distributed_runtime/message_wrappers.cc +++ b/tensorflow/core/distributed_runtime/message_wrappers.cc @@ -38,24 +38,24 @@ bool ParseTensorProtoToTensor(const TensorProto& tensor_proto, return false; } -const string& InMemoryRunStepRequest::session_handle() const { +const std::string& InMemoryRunStepRequest::session_handle() const { return session_handle_; } -void InMemoryRunStepRequest::set_session_handle(const string& handle) { +void InMemoryRunStepRequest::set_session_handle(const std::string& handle) { session_handle_ = handle; } -const string& InMemoryRunStepRequest::partial_run_handle() const { +const std::string& InMemoryRunStepRequest::partial_run_handle() const { return partial_run_handle_; } -void InMemoryRunStepRequest::set_partial_run_handle(const string& handle) { +void InMemoryRunStepRequest::set_partial_run_handle(const std::string& handle) { partial_run_handle_ = handle; } size_t InMemoryRunStepRequest::num_feeds() const { return feeds_.size(); } -const string& InMemoryRunStepRequest::feed_name(size_t i) const { +const std::string& InMemoryRunStepRequest::feed_name(size_t i) const { return feeds_[i].first; } @@ -71,23 +71,24 @@ absl::Status InMemoryRunStepRequest::FeedValue(size_t i, return absl::OkStatus(); } -void InMemoryRunStepRequest::add_feed(const string& name, const Tensor& value) { +void InMemoryRunStepRequest::add_feed(const std::string& name, + const Tensor& value) { feeds_.emplace_back(name, value); } size_t InMemoryRunStepRequest::num_fetches() const { return fetches_.size(); } -const string& InMemoryRunStepRequest::fetch_name(size_t i) const { +const std::string& InMemoryRunStepRequest::fetch_name(size_t i) const { return fetches_[i]; } -void InMemoryRunStepRequest::add_fetch(const string& name) { +void InMemoryRunStepRequest::add_fetch(const std::string& name) { fetches_.push_back(name); } size_t InMemoryRunStepRequest::num_targets() const { return targets_.size(); } -const string& InMemoryRunStepRequest::target_name(size_t i) const { +const std::string& InMemoryRunStepRequest::target_name(size_t i) const { return targets_[i]; } -void InMemoryRunStepRequest::add_target(const string& name) { +void InMemoryRunStepRequest::add_target(const std::string& name) { targets_.push_back(name); } @@ -108,7 +109,7 @@ void InMemoryRunStepRequest::set_store_errors_in_response_body( store_errors_in_response_body_ = store_errors; } -string InMemoryRunStepRequest::DebugString() const { +std::string InMemoryRunStepRequest::DebugString() const { return ToProto().DebugString(); } @@ -133,24 +134,25 @@ const RunStepRequest& InMemoryRunStepRequest::ToProto() const { return *proto_version_; } -const string& MutableProtoRunStepRequest::session_handle() const { +const std::string& MutableProtoRunStepRequest::session_handle() const { return request_.session_handle(); } -void MutableProtoRunStepRequest::set_session_handle(const string& handle) { +void MutableProtoRunStepRequest::set_session_handle(const std::string& handle) { request_.set_session_handle(handle); } -const string& MutableProtoRunStepRequest::partial_run_handle() const { +const std::string& MutableProtoRunStepRequest::partial_run_handle() const { return request_.partial_run_handle(); } -void MutableProtoRunStepRequest::set_partial_run_handle(const string& handle) { +void MutableProtoRunStepRequest::set_partial_run_handle( + const std::string& handle) { request_.set_partial_run_handle(handle); } size_t MutableProtoRunStepRequest::num_feeds() const { return request_.feed_size(); } -const string& MutableProtoRunStepRequest::feed_name(size_t i) const { +const std::string& MutableProtoRunStepRequest::feed_name(size_t i) const { return request_.feed(i).name(); } absl::Status MutableProtoRunStepRequest::FeedValue(size_t i, @@ -168,7 +170,7 @@ absl::Status MutableProtoRunStepRequest::FeedValue( return absl::OkStatus(); } -void MutableProtoRunStepRequest::add_feed(const string& name, +void MutableProtoRunStepRequest::add_feed(const std::string& name, const Tensor& value) { NamedTensorProto* feed = request_.add_feed(); feed->set_name(name); @@ -180,10 +182,10 @@ size_t MutableProtoRunStepRequest::num_fetches() const { return request_.fetch_size(); } -const string& MutableProtoRunStepRequest::fetch_name(size_t i) const { +const std::string& MutableProtoRunStepRequest::fetch_name(size_t i) const { return request_.fetch(i); } -void MutableProtoRunStepRequest::add_fetch(const string& name) { +void MutableProtoRunStepRequest::add_fetch(const std::string& name) { request_.add_fetch(name); } @@ -191,11 +193,11 @@ size_t MutableProtoRunStepRequest::num_targets() const { return request_.target_size(); } -const string& MutableProtoRunStepRequest::target_name(size_t i) const { +const std::string& MutableProtoRunStepRequest::target_name(size_t i) const { return request_.target(i); } -void MutableProtoRunStepRequest::add_target(const string& name) { +void MutableProtoRunStepRequest::add_target(const std::string& name) { request_.add_target(name); } @@ -220,7 +222,7 @@ int64_t MutableProtoRunStepRequest::request_id() const { return request_.request_id(); } -string MutableProtoRunStepRequest::DebugString() const { +std::string MutableProtoRunStepRequest::DebugString() const { return request_.DebugString(); } @@ -231,17 +233,17 @@ const RunStepRequest& MutableProtoRunStepRequest::ToProto() const { ProtoRunStepRequest::ProtoRunStepRequest(const RunStepRequest* request) : request_(request) {} -const string& ProtoRunStepRequest::session_handle() const { +const std::string& ProtoRunStepRequest::session_handle() const { return request_->session_handle(); } -const string& ProtoRunStepRequest::partial_run_handle() const { +const std::string& ProtoRunStepRequest::partial_run_handle() const { return request_->partial_run_handle(); } size_t ProtoRunStepRequest::num_feeds() const { return request_->feed_size(); } -const string& ProtoRunStepRequest::feed_name(size_t i) const { +const std::string& ProtoRunStepRequest::feed_name(size_t i) const { return request_->feed(i).name(); } @@ -264,7 +266,7 @@ size_t ProtoRunStepRequest::num_fetches() const { return request_->fetch_size(); } -const string& ProtoRunStepRequest::fetch_name(size_t i) const { +const std::string& ProtoRunStepRequest::fetch_name(size_t i) const { return request_->fetch(i); } @@ -272,7 +274,7 @@ size_t ProtoRunStepRequest::num_targets() const { return request_->target_size(); } -const string& ProtoRunStepRequest::target_name(size_t i) const { +const std::string& ProtoRunStepRequest::target_name(size_t i) const { return request_->target(i); } @@ -288,13 +290,13 @@ int64_t ProtoRunStepRequest::request_id() const { return request_->request_id(); } -string ProtoRunStepRequest::DebugString() const { +std::string ProtoRunStepRequest::DebugString() const { return request_->DebugString(); } const RunStepRequest& ProtoRunStepRequest::ToProto() const { return *request_; } -const string& InMemoryRunGraphRequest::session_handle() const { +const std::string& InMemoryRunGraphRequest::session_handle() const { return session_handle_; } @@ -302,7 +304,7 @@ bool InMemoryRunGraphRequest::create_worker_session_called() const { return create_worker_session_called_; } -void InMemoryRunGraphRequest::set_session_handle(const string& handle) { +void InMemoryRunGraphRequest::set_session_handle(const std::string& handle) { session_handle_ = handle; } @@ -310,11 +312,11 @@ void InMemoryRunGraphRequest::set_create_worker_session_called(bool called) { create_worker_session_called_ = called; } -const string& InMemoryRunGraphRequest::graph_handle() const { +const std::string& InMemoryRunGraphRequest::graph_handle() const { return graph_handle_; } -void InMemoryRunGraphRequest::set_graph_handle(const string& handle) { +void InMemoryRunGraphRequest::set_graph_handle(const std::string& handle) { graph_handle_ = handle; } @@ -334,7 +336,7 @@ ExecutorOpts* InMemoryRunGraphRequest::mutable_exec_opts() { size_t InMemoryRunGraphRequest::num_sends() const { return sends_.size(); } -const string& InMemoryRunGraphRequest::send_key(size_t i) const { +const std::string& InMemoryRunGraphRequest::send_key(size_t i) const { return sends_[i].first; } @@ -346,7 +348,7 @@ absl::Status InMemoryRunGraphRequest::SendValue(size_t i, absl::Status InMemoryRunGraphRequest::AddSendFromRunStepRequest( const RunStepRequestWrapper& run_step_request, size_t i, - const string& send_key) { + const std::string& send_key) { Tensor tensor; TF_RETURN_IF_ERROR(run_step_request.FeedValue(i, &tensor)); sends_.emplace_back(send_key, std::move(tensor)); @@ -355,7 +357,7 @@ absl::Status InMemoryRunGraphRequest::AddSendFromRunStepRequest( absl::Status InMemoryRunGraphRequest::AddSendFromRunCallableRequest( const RunCallableRequest& run_callable_request, size_t i, - const string& send_key) { + const std::string& send_key) { Tensor tensor; if (!ParseTensorProtoToTensor(run_callable_request.feed(i), &tensor)) { return errors::InvalidArgument("Invalid TensorProto for feed value ", i); @@ -366,11 +368,11 @@ absl::Status InMemoryRunGraphRequest::AddSendFromRunCallableRequest( size_t InMemoryRunGraphRequest::num_recvs() const { return recvs_.size(); } -const string& InMemoryRunGraphRequest::recv_key(size_t i) const { +const std::string& InMemoryRunGraphRequest::recv_key(size_t i) const { return recvs_[i]; } -void InMemoryRunGraphRequest::add_recv_key(const string& recv_key) { +void InMemoryRunGraphRequest::add_recv_key(const std::string& recv_key) { recvs_.push_back(recv_key); } @@ -430,11 +432,12 @@ const RunGraphRequest& InMemoryRunGraphRequest::ToProto() const { return *proto_version_; } -const string& MutableProtoRunGraphRequest::session_handle() const { +const std::string& MutableProtoRunGraphRequest::session_handle() const { return request_.session_handle(); } -void MutableProtoRunGraphRequest::set_session_handle(const string& handle) { +void MutableProtoRunGraphRequest::set_session_handle( + const std::string& handle) { request_.set_session_handle(handle); } @@ -447,11 +450,11 @@ void MutableProtoRunGraphRequest::set_create_worker_session_called( request_.set_create_worker_session_called(called); } -const string& MutableProtoRunGraphRequest::graph_handle() const { +const std::string& MutableProtoRunGraphRequest::graph_handle() const { return request_.graph_handle(); } -void MutableProtoRunGraphRequest::set_graph_handle(const string& handle) { +void MutableProtoRunGraphRequest::set_graph_handle(const std::string& handle) { request_.set_graph_handle(handle); } @@ -475,7 +478,7 @@ size_t MutableProtoRunGraphRequest::num_sends() const { return request_.send_size(); } -const string& MutableProtoRunGraphRequest::send_key(size_t i) const { +const std::string& MutableProtoRunGraphRequest::send_key(size_t i) const { return request_.send(i).name(); } @@ -490,7 +493,7 @@ absl::Status MutableProtoRunGraphRequest::SendValue(size_t i, absl::Status MutableProtoRunGraphRequest::AddSendFromRunStepRequest( const RunStepRequestWrapper& run_step_request, size_t i, - const string& send_key) { + const std::string& send_key) { NamedTensorProto* send = request_.add_send(); send->set_name(send_key); TF_RETURN_IF_ERROR(run_step_request.FeedValue(i, send->mutable_tensor())); @@ -499,7 +502,7 @@ absl::Status MutableProtoRunGraphRequest::AddSendFromRunStepRequest( absl::Status MutableProtoRunGraphRequest::AddSendFromRunCallableRequest( const RunCallableRequest& run_callable_request, size_t i, - const string& send_key) { + const std::string& send_key) { NamedTensorProto* send = request_.add_send(); send->set_name(send_key); *send->mutable_tensor() = run_callable_request.feed(i); @@ -510,11 +513,11 @@ size_t MutableProtoRunGraphRequest::num_recvs() const { return request_.recv_key_size(); } -const string& MutableProtoRunGraphRequest::recv_key(size_t i) const { +const std::string& MutableProtoRunGraphRequest::recv_key(size_t i) const { return request_.recv_key(i); } -void MutableProtoRunGraphRequest::add_recv_key(const string& recv_key) { +void MutableProtoRunGraphRequest::add_recv_key(const std::string& recv_key) { request_.add_recv_key(recv_key); } @@ -559,7 +562,7 @@ const RunGraphRequest& MutableProtoRunGraphRequest::ToProto() const { ProtoRunGraphRequest::ProtoRunGraphRequest(const RunGraphRequest* request) : request_(request) {} -const string& ProtoRunGraphRequest::session_handle() const { +const std::string& ProtoRunGraphRequest::session_handle() const { return request_->session_handle(); } @@ -567,7 +570,7 @@ bool ProtoRunGraphRequest::create_worker_session_called() const { return request_->create_worker_session_called(); } -const string& ProtoRunGraphRequest::graph_handle() const { +const std::string& ProtoRunGraphRequest::graph_handle() const { return request_->graph_handle(); } @@ -579,7 +582,7 @@ const ExecutorOpts& ProtoRunGraphRequest::exec_opts() const { size_t ProtoRunGraphRequest::num_sends() const { return request_->send_size(); } -const string& ProtoRunGraphRequest::send_key(size_t i) const { +const std::string& ProtoRunGraphRequest::send_key(size_t i) const { return request_->send(i).name(); } @@ -596,7 +599,7 @@ size_t ProtoRunGraphRequest::num_recvs() const { return request_->recv_key_size(); } -const string& ProtoRunGraphRequest::recv_key(size_t i) const { +const std::string& ProtoRunGraphRequest::recv_key(size_t i) const { return request_->recv_key(i); } @@ -620,7 +623,7 @@ const RunGraphRequest& ProtoRunGraphRequest::ToProto() const { size_t InMemoryRunGraphResponse::num_recvs() const { return recvs_.size(); } -const string& InMemoryRunGraphResponse::recv_key(size_t i) const { +const std::string& InMemoryRunGraphResponse::recv_key(size_t i) const { return recvs_[i].first; } @@ -635,7 +638,8 @@ absl::Status InMemoryRunGraphResponse::RecvValue(size_t i, Tensor* out_tensor) { return absl::OkStatus(); } -void InMemoryRunGraphResponse::AddRecv(const string& key, const Tensor& value) { +void InMemoryRunGraphResponse::AddRecv(const std::string& key, + const Tensor& value) { recvs_.emplace_back(key, value); } @@ -679,7 +683,7 @@ size_t OwnedProtoRunGraphResponse::num_recvs() const { return response_.recv_size(); } -const string& OwnedProtoRunGraphResponse::recv_key(size_t i) const { +const std::string& OwnedProtoRunGraphResponse::recv_key(size_t i) const { return response_.recv(i).name(); } @@ -698,7 +702,7 @@ absl::Status OwnedProtoRunGraphResponse::RecvValue(size_t i, } } -void OwnedProtoRunGraphResponse::AddRecv(const string& key, +void OwnedProtoRunGraphResponse::AddRecv(const std::string& key, const Tensor& value) { NamedTensorProto* recv = response_.add_recv(); recv->set_name(key); @@ -752,7 +756,7 @@ size_t NonOwnedProtoRunGraphResponse::num_recvs() const { return response_->recv_size(); } -const string& NonOwnedProtoRunGraphResponse::recv_key(size_t i) const { +const std::string& NonOwnedProtoRunGraphResponse::recv_key(size_t i) const { return response_->recv(i).name(); } @@ -771,7 +775,7 @@ absl::Status NonOwnedProtoRunGraphResponse::RecvValue(size_t i, } } -void NonOwnedProtoRunGraphResponse::AddRecv(const string& key, +void NonOwnedProtoRunGraphResponse::AddRecv(const std::string& key, const Tensor& value) { NamedTensorProto* recv = response_->add_recv(); recv->set_name(key); @@ -823,7 +827,7 @@ MutableRunStepResponseWrapper::~MutableRunStepResponseWrapper() {} size_t InMemoryRunStepResponse::num_tensors() const { return tensors_.size(); } -const string& InMemoryRunStepResponse::tensor_name(size_t i) const { +const std::string& InMemoryRunStepResponse::tensor_name(size_t i) const { return tensors_[i].first; } @@ -838,7 +842,8 @@ const RunMetadata& InMemoryRunStepResponse::metadata() const { } absl::Status InMemoryRunStepResponse::AddTensorFromRunGraphResponse( - const string& name, MutableRunGraphResponseWrapper* wrapper, size_t i) { + const std::string& name, MutableRunGraphResponseWrapper* wrapper, + size_t i) { Tensor tensor; TF_RETURN_IF_ERROR(wrapper->RecvValue(i, &tensor)); tensors_.emplace_back(name, tensor); @@ -866,7 +871,7 @@ size_t OwnedProtoRunStepResponse::num_tensors() const { return response_.tensor_size(); } -const string& OwnedProtoRunStepResponse::tensor_name(size_t i) const { +const std::string& OwnedProtoRunStepResponse::tensor_name(size_t i) const { return response_.tensor(i).name(); } @@ -884,7 +889,7 @@ const RunMetadata& OwnedProtoRunStepResponse::metadata() const { } absl::Status OwnedProtoRunStepResponse::AddTensorFromRunGraphResponse( - const string& name, MutableRunGraphResponseWrapper* run_graph_response, + const std::string& name, MutableRunGraphResponseWrapper* run_graph_response, size_t i) { NamedTensorProto* response_tensor = response_.add_tensor(); response_tensor->set_name(name); @@ -919,7 +924,7 @@ size_t NonOwnedProtoRunStepResponse::num_tensors() const { return response_->tensor_size(); } -const string& NonOwnedProtoRunStepResponse::tensor_name(size_t i) const { +const std::string& NonOwnedProtoRunStepResponse::tensor_name(size_t i) const { return response_->tensor(i).name(); } @@ -937,7 +942,7 @@ const RunMetadata& NonOwnedProtoRunStepResponse::metadata() const { } absl::Status NonOwnedProtoRunStepResponse::AddTensorFromRunGraphResponse( - const string& name, MutableRunGraphResponseWrapper* run_graph_response, + const std::string& name, MutableRunGraphResponseWrapper* run_graph_response, size_t i) { NamedTensorProto* response_tensor = response_->add_tensor(); response_tensor->set_name(name); diff --git a/tensorflow/core/distributed_runtime/message_wrappers.h b/tensorflow/core/distributed_runtime/message_wrappers.h index d4b07fb51ce4a3..b911d23245b4ad 100644 --- a/tensorflow/core/distributed_runtime/message_wrappers.h +++ b/tensorflow/core/distributed_runtime/message_wrappers.h @@ -53,15 +53,15 @@ class RunStepRequestWrapper { // REQUIRED: session_handle must be returned by a CreateSession call // to the same master service. - virtual const string& session_handle() const = 0; + virtual const std::string& session_handle() const = 0; // Partial run handle (optional). If specified, this will be a partial run // execution, run up to the specified fetches. - virtual const string& partial_run_handle() const = 0; + virtual const std::string& partial_run_handle() const = 0; // Tensors to be fed in the step. Each feed is a named tensor. virtual size_t num_feeds() const = 0; - virtual const string& feed_name(size_t i) const = 0; + virtual const std::string& feed_name(size_t i) const = 0; // Stores the content of the feed value at index `i` in `tensor`. virtual absl::Status FeedValue(size_t i, Tensor* out_tensor) const = 0; @@ -71,12 +71,12 @@ class RunStepRequestWrapper { // be returned for each fetch[i] (see RunStepResponse.tensor). The // order of specified fetches does not change the execution order. virtual size_t num_fetches() const = 0; - virtual const string& fetch_name(size_t i) const = 0; + virtual const std::string& fetch_name(size_t i) const = 0; // Target Nodes. A list of node names. The named nodes will be run // to but their outputs will not be fetched. virtual size_t num_targets() const = 0; - virtual const string& target_name(size_t i) const = 0; + virtual const std::string& target_name(size_t i) const = 0; // Options for the run call. virtual const RunOptions& options() const = 0; @@ -94,7 +94,7 @@ class RunStepRequestWrapper { virtual int64_t request_id() const = 0; // Returns a human-readable representation of this message for debugging. - virtual string DebugString() const = 0; + virtual std::string DebugString() const = 0; // Returns the wrapped data as a protocol buffer message. virtual const RunStepRequest& ToProto() const = 0; @@ -105,11 +105,11 @@ class RunStepRequestWrapper { // See `RunStepRequestWrapper` above for a description of the fields. class MutableRunStepRequestWrapper : public RunStepRequestWrapper { public: - virtual void set_session_handle(const string& handle) = 0; - virtual void set_partial_run_handle(const string& handle) = 0; - virtual void add_feed(const string& name, const Tensor& value) = 0; - virtual void add_fetch(const string& name) = 0; - virtual void add_target(const string& name) = 0; + virtual void set_session_handle(const std::string& handle) = 0; + virtual void set_partial_run_handle(const std::string& handle) = 0; + virtual void add_feed(const std::string& name, const Tensor& value) = 0; + virtual void add_fetch(const std::string& name) = 0; + virtual void add_target(const std::string& name) = 0; virtual RunOptions* mutable_options() = 0; virtual void set_store_errors_in_response_body(bool store_errors) = 0; }; @@ -119,37 +119,37 @@ class MutableRunStepRequestWrapper : public RunStepRequestWrapper { class InMemoryRunStepRequest : public MutableRunStepRequestWrapper { public: // RunStepRequestWrapper methods. - const string& session_handle() const override; - const string& partial_run_handle() const override; + const std::string& session_handle() const override; + const std::string& partial_run_handle() const override; size_t num_feeds() const override; - const string& feed_name(size_t i) const override; + const std::string& feed_name(size_t i) const override; absl::Status FeedValue(size_t i, Tensor* out_tensor) const override; absl::Status FeedValue(size_t i, TensorProto* out_tensor) const override; size_t num_fetches() const override; - const string& fetch_name(size_t i) const override; + const std::string& fetch_name(size_t i) const override; size_t num_targets() const override; - const string& target_name(size_t i) const override; + const std::string& target_name(size_t i) const override; const RunOptions& options() const override; - string DebugString() const override; + std::string DebugString() const override; const RunStepRequest& ToProto() const override; bool store_errors_in_response_body() const override; int64_t request_id() const override; // MutableRunStepRequestWrapper methods. - void set_session_handle(const string& handle) override; - void set_partial_run_handle(const string& handle) override; - void add_feed(const string& name, const Tensor& value) override; - void add_fetch(const string& name) override; - void add_target(const string& name) override; + void set_session_handle(const std::string& handle) override; + void set_partial_run_handle(const std::string& handle) override; + void add_feed(const std::string& name, const Tensor& value) override; + void add_fetch(const std::string& name) override; + void add_target(const std::string& name) override; RunOptions* mutable_options() override; void set_store_errors_in_response_body(bool store_errors) override; private: - string session_handle_; - string partial_run_handle_; - absl::InlinedVector, 4UL> feeds_; - absl::InlinedVector fetches_; - absl::InlinedVector targets_; + std::string session_handle_; + std::string partial_run_handle_; + absl::InlinedVector, 4UL> feeds_; + absl::InlinedVector fetches_; + absl::InlinedVector targets_; RunOptions options_; bool store_errors_in_response_body_ = false; @@ -170,28 +170,28 @@ class InMemoryRunStepRequest : public MutableRunStepRequestWrapper { class MutableProtoRunStepRequest : public MutableRunStepRequestWrapper { public: // RunStepRequestWrapper methods. - const string& session_handle() const override; - const string& partial_run_handle() const override; + const std::string& session_handle() const override; + const std::string& partial_run_handle() const override; size_t num_feeds() const override; - const string& feed_name(size_t i) const override; + const std::string& feed_name(size_t i) const override; absl::Status FeedValue(size_t i, Tensor* out_tensor) const override; absl::Status FeedValue(size_t i, TensorProto* out_tensor) const override; size_t num_fetches() const override; - const string& fetch_name(size_t i) const override; + const std::string& fetch_name(size_t i) const override; size_t num_targets() const override; - const string& target_name(size_t i) const override; + const std::string& target_name(size_t i) const override; const RunOptions& options() const override; - string DebugString() const override; + std::string DebugString() const override; const RunStepRequest& ToProto() const override; bool store_errors_in_response_body() const override; int64_t request_id() const override; // MutableRunStepRequestWrapper methods. - void set_session_handle(const string& handle) override; - void set_partial_run_handle(const string& handle) override; - void add_feed(const string& name, const Tensor& value) override; - void add_fetch(const string& name) override; - void add_target(const string& name) override; + void set_session_handle(const std::string& handle) override; + void set_partial_run_handle(const std::string& handle) override; + void add_feed(const std::string& name, const Tensor& value) override; + void add_fetch(const std::string& name) override; + void add_target(const std::string& name) override; RunOptions* mutable_options() override; void set_store_errors_in_response_body(bool store_errors) override; @@ -211,18 +211,18 @@ class ProtoRunStepRequest : public RunStepRequestWrapper { ProtoRunStepRequest(const RunStepRequest* request); // RunStepRequestWrapper methods. - const string& session_handle() const override; - const string& partial_run_handle() const override; + const std::string& session_handle() const override; + const std::string& partial_run_handle() const override; size_t num_feeds() const override; - const string& feed_name(size_t i) const override; + const std::string& feed_name(size_t i) const override; absl::Status FeedValue(size_t i, Tensor* out_tensor) const override; absl::Status FeedValue(size_t i, TensorProto* out_tensor) const override; size_t num_fetches() const override; - const string& fetch_name(size_t i) const override; + const std::string& fetch_name(size_t i) const override; size_t num_targets() const override; - const string& target_name(size_t i) const override; + const std::string& target_name(size_t i) const override; const RunOptions& options() const override; - string DebugString() const override; + std::string DebugString() const override; const RunStepRequest& ToProto() const override; bool store_errors_in_response_body() const override; int64_t request_id() const override; @@ -254,14 +254,14 @@ class RunGraphRequestWrapper { // The session handle used to register the graph. If empty, a single global // namespace is used. - virtual const string& session_handle() const = 0; + virtual const std::string& session_handle() const = 0; // Set to true if `CreateWorkerSession` was called for `session_handle`. virtual bool create_worker_session_called() const = 0; // REQUIRED: graph_handle must be returned by a RegisterGraph call // to the same WorkerService. - virtual const string& graph_handle() const = 0; + virtual const std::string& graph_handle() const = 0; // A unique ID to distinguish different runs of the same graph. // @@ -276,12 +276,12 @@ class RunGraphRequestWrapper { // Sends the tensors in "send" into the graph before the run. virtual size_t num_sends() const = 0; - virtual const string& send_key(size_t i) const = 0; + virtual const std::string& send_key(size_t i) const = 0; virtual absl::Status SendValue(size_t i, Tensor* out_tensor) const = 0; // Fetches the keys into `RunGraphResponse.recv` after the run. virtual size_t num_recvs() const = 0; - virtual const string& recv_key(size_t i) const = 0; + virtual const std::string& recv_key(size_t i) const = 0; // True if the RunGraphRequest is a partial run request. virtual bool is_partial() const = 0; @@ -307,9 +307,9 @@ class RunGraphRequestWrapper { // See `RunGraphRequestWrapper` above for a description of the fields. class MutableRunGraphRequestWrapper : public RunGraphRequestWrapper { public: - virtual void set_session_handle(const string& handle) = 0; + virtual void set_session_handle(const std::string& handle) = 0; virtual void set_create_worker_session_called(bool called) = 0; - virtual void set_graph_handle(const string& handle) = 0; + virtual void set_graph_handle(const std::string& handle) = 0; virtual void set_step_id(int64_t step_id) = 0; virtual ExecutorOpts* mutable_exec_opts() = 0; @@ -317,12 +317,12 @@ class MutableRunGraphRequestWrapper : public RunGraphRequestWrapper { // request with the given `send_key`. virtual absl::Status AddSendFromRunStepRequest( const RunStepRequestWrapper& run_step_request, size_t i, - const string& send_key) = 0; + const std::string& send_key) = 0; virtual absl::Status AddSendFromRunCallableRequest( const RunCallableRequest& run_callable_request, size_t i, - const string& send_key) = 0; + const std::string& send_key) = 0; - virtual void add_recv_key(const string& recv_key) = 0; + virtual void add_recv_key(const std::string& recv_key) = 0; virtual void set_is_partial(bool is_partial) = 0; virtual void set_is_last_partial_run(bool is_last_partial_run) = 0; virtual void set_store_errors_in_response_body(bool store_errors) = 0; @@ -332,16 +332,16 @@ class MutableRunGraphRequestWrapper : public RunGraphRequestWrapper { class InMemoryRunGraphRequest : public MutableRunGraphRequestWrapper { public: // RunGraphRequestWrapper methods. - const string& session_handle() const override; - const string& graph_handle() const override; + const std::string& session_handle() const override; + const std::string& graph_handle() const override; bool create_worker_session_called() const override; int64_t step_id() const override; const ExecutorOpts& exec_opts() const override; size_t num_sends() const override; - const string& send_key(size_t i) const override; + const std::string& send_key(size_t i) const override; absl::Status SendValue(size_t i, Tensor* out_tensor) const override; size_t num_recvs() const override; - const string& recv_key(size_t i) const override; + const std::string& recv_key(size_t i) const override; bool is_partial() const override; bool is_last_partial_run() const override; const RunGraphRequest& ToProto() const override; @@ -349,31 +349,31 @@ class InMemoryRunGraphRequest : public MutableRunGraphRequestWrapper { int64_t request_id() const override; // MutableRunGraphRequestWrapper methods. - void set_session_handle(const string& handle) override; + void set_session_handle(const std::string& handle) override; void set_create_worker_session_called(bool called) override; - void set_graph_handle(const string& handle) override; + void set_graph_handle(const std::string& handle) override; void set_step_id(int64_t step_id) override; ExecutorOpts* mutable_exec_opts() override; absl::Status AddSendFromRunStepRequest( const RunStepRequestWrapper& run_step_request, size_t i, - const string& send_key) override; + const std::string& send_key) override; absl::Status AddSendFromRunCallableRequest( const RunCallableRequest& run_callable_request, size_t i, - const string& send_key) override; - void add_recv_key(const string& recv_key) override; + const std::string& send_key) override; + void add_recv_key(const std::string& recv_key) override; void set_is_partial(bool is_partial) override; void set_is_last_partial_run(bool is_last_partial_run) override; void set_store_errors_in_response_body(bool store_errors) override; void set_request_id(int64_t request_id) override; private: - string session_handle_; + std::string session_handle_; bool create_worker_session_called_ = false; - string graph_handle_; + std::string graph_handle_; int64_t step_id_; ExecutorOpts exec_opts_; - absl::InlinedVector, 4UL> sends_; - absl::InlinedVector recvs_; + absl::InlinedVector, 4UL> sends_; + absl::InlinedVector recvs_; bool is_partial_ = false; bool is_last_partial_run_ = false; bool store_errors_in_response_body_ = false; @@ -392,16 +392,16 @@ class InMemoryRunGraphRequest : public MutableRunGraphRequestWrapper { class MutableProtoRunGraphRequest : public MutableRunGraphRequestWrapper { public: // RunGraphRequestWrapper methods. - const string& session_handle() const override; + const std::string& session_handle() const override; bool create_worker_session_called() const override; - const string& graph_handle() const override; + const std::string& graph_handle() const override; int64_t step_id() const override; const ExecutorOpts& exec_opts() const override; size_t num_sends() const override; - const string& send_key(size_t i) const override; + const std::string& send_key(size_t i) const override; absl::Status SendValue(size_t i, Tensor* out_tensor) const override; size_t num_recvs() const override; - const string& recv_key(size_t i) const override; + const std::string& recv_key(size_t i) const override; bool is_partial() const override; bool is_last_partial_run() const override; bool store_errors_in_response_body() const override; @@ -409,18 +409,18 @@ class MutableProtoRunGraphRequest : public MutableRunGraphRequestWrapper { const RunGraphRequest& ToProto() const override; // MutableRunGraphRequestWrapper methods. - void set_session_handle(const string& handle) override; + void set_session_handle(const std::string& handle) override; void set_create_worker_session_called(bool called) override; - void set_graph_handle(const string& handle) override; + void set_graph_handle(const std::string& handle) override; void set_step_id(int64_t step_id) override; ExecutorOpts* mutable_exec_opts() override; absl::Status AddSendFromRunStepRequest( const RunStepRequestWrapper& run_step_request, size_t i, - const string& send_key) override; + const std::string& send_key) override; absl::Status AddSendFromRunCallableRequest( const RunCallableRequest& run_callable_request, size_t i, - const string& send_key) override; - void add_recv_key(const string& recv_key) override; + const std::string& send_key) override; + void add_recv_key(const std::string& recv_key) override; void set_is_partial(bool is_partial) override; void set_is_last_partial_run(bool is_last_partial_run) override; void set_store_errors_in_response_body(bool store_errors) override; @@ -435,16 +435,16 @@ class ProtoRunGraphRequest : public RunGraphRequestWrapper { ProtoRunGraphRequest(const RunGraphRequest* request); // RunGraphRequestWrapper methods. - const string& session_handle() const override; + const std::string& session_handle() const override; bool create_worker_session_called() const override; - const string& graph_handle() const override; + const std::string& graph_handle() const override; int64_t step_id() const override; const ExecutorOpts& exec_opts() const override; size_t num_sends() const override; - const string& send_key(size_t i) const override; + const std::string& send_key(size_t i) const override; absl::Status SendValue(size_t i, Tensor* out_tensor) const override; size_t num_recvs() const override; - const string& recv_key(size_t i) const override; + const std::string& recv_key(size_t i) const override; bool is_partial() const override; bool is_last_partial_run() const override; bool store_errors_in_response_body() const override; @@ -480,12 +480,12 @@ class MutableRunGraphResponseWrapper { // A list of tensors corresponding to those requested by // `RunGraphRequest.recv_key`. virtual size_t num_recvs() const = 0; - virtual const string& recv_key(size_t i) const = 0; + virtual const std::string& recv_key(size_t i) const = 0; // NOTE: The following methods may perform a destructive read, for // efficiency. virtual absl::Status RecvValue(size_t i, TensorProto* out_tensor) = 0; virtual absl::Status RecvValue(size_t i, Tensor* out_tensor) = 0; - virtual void AddRecv(const string& key, const Tensor& value) = 0; + virtual void AddRecv(const std::string& key, const Tensor& value) = 0; // Submessages that store performance statistics about the subgraph // execution, if necessary. @@ -520,10 +520,10 @@ class InMemoryRunGraphResponse : public MutableRunGraphResponseWrapper { public: // MutableRunGraphResponseWrapper methods. size_t num_recvs() const override; - const string& recv_key(size_t i) const override; + const std::string& recv_key(size_t i) const override; absl::Status RecvValue(size_t i, TensorProto* out_tensor) override; absl::Status RecvValue(size_t i, Tensor* out_tensor) override; - void AddRecv(const string& key, const Tensor& value) override; + void AddRecv(const std::string& key, const Tensor& value) override; StepStats* mutable_step_stats() override; CostGraphDef* mutable_cost_graph() override; size_t num_partition_graphs() const override; @@ -539,7 +539,7 @@ class InMemoryRunGraphResponse : public MutableRunGraphResponseWrapper { RunGraphResponse* get_proto() override; private: - absl::InlinedVector, 4UL> recvs_; + absl::InlinedVector, 4UL> recvs_; StepStats step_stats_; CostGraphDef cost_graph_; std::vector partition_graphs_; @@ -553,10 +553,10 @@ class OwnedProtoRunGraphResponse : public MutableRunGraphResponseWrapper { public: // MutableRunGraphResponseWrapper methods. size_t num_recvs() const override; - const string& recv_key(size_t i) const override; + const std::string& recv_key(size_t i) const override; absl::Status RecvValue(size_t i, TensorProto* out_tensor) override; absl::Status RecvValue(size_t i, Tensor* out_tensor) override; - void AddRecv(const string& key, const Tensor& value) override; + void AddRecv(const std::string& key, const Tensor& value) override; StepStats* mutable_step_stats() override; CostGraphDef* mutable_cost_graph() override; size_t num_partition_graphs() const override; @@ -580,10 +580,10 @@ class NonOwnedProtoRunGraphResponse : public MutableRunGraphResponseWrapper { // MutableRunGraphResponseWrapper methods. size_t num_recvs() const override; - const string& recv_key(size_t i) const override; + const std::string& recv_key(size_t i) const override; absl::Status RecvValue(size_t i, TensorProto* out_tensor) override; absl::Status RecvValue(size_t i, Tensor* out_tensor) override; - void AddRecv(const string& key, const Tensor& value) override; + void AddRecv(const std::string& key, const Tensor& value) override; StepStats* mutable_step_stats() override; CostGraphDef* mutable_cost_graph() override; size_t num_partition_graphs() const override; @@ -628,14 +628,14 @@ class MutableRunStepResponseWrapper { // NOTE: The order of the returned tensors may or may not match // the fetch order specified in RunStepRequest. virtual size_t num_tensors() const = 0; - virtual const string& tensor_name(size_t i) const = 0; + virtual const std::string& tensor_name(size_t i) const = 0; virtual absl::Status TensorValue(size_t i, Tensor* out_tensor) const = 0; // Stores the i^{th} recv value in `run_graph_response` in this // response with the given `name`. virtual absl::Status AddTensorFromRunGraphResponse( - const string& name, MutableRunGraphResponseWrapper* run_graph_response, - size_t i) = 0; + const std::string& name, + MutableRunGraphResponseWrapper* run_graph_response, size_t i) = 0; // Returned metadata if requested in the options. virtual const RunMetadata& metadata() const = 0; @@ -666,11 +666,11 @@ class InMemoryRunStepResponse : public MutableRunStepResponseWrapper { public: // MutableRunStepResponseWrapper methods. size_t num_tensors() const override; - const string& tensor_name(size_t i) const override; + const std::string& tensor_name(size_t i) const override; absl::Status TensorValue(size_t i, Tensor* out_tensor) const override; absl::Status AddTensorFromRunGraphResponse( - const string& name, MutableRunGraphResponseWrapper* run_graph_response, - size_t i) override; + const std::string& name, + MutableRunGraphResponseWrapper* run_graph_response, size_t i) override; const RunMetadata& metadata() const override; RunMetadata* mutable_metadata() override; absl::Status status() const override; @@ -683,7 +683,7 @@ class InMemoryRunStepResponse : public MutableRunStepResponseWrapper { RunStepResponse* get_proto() override; private: - absl::InlinedVector, 4UL> tensors_; + absl::InlinedVector, 4UL> tensors_; RunMetadata metadata_; // Store the code and message separately so that they can be updated // independently by setters. @@ -695,11 +695,11 @@ class OwnedProtoRunStepResponse : public MutableRunStepResponseWrapper { public: // MutableRunStepResponseWrapper methods. size_t num_tensors() const override; - const string& tensor_name(size_t i) const override; + const std::string& tensor_name(size_t i) const override; absl::Status TensorValue(size_t i, Tensor* out_tensor) const override; absl::Status AddTensorFromRunGraphResponse( - const string& name, MutableRunGraphResponseWrapper* run_graph_response, - size_t i) override; + const std::string& name, + MutableRunGraphResponseWrapper* run_graph_response, size_t i) override; const RunMetadata& metadata() const override; RunMetadata* mutable_metadata() override; absl::Status status() const override; @@ -720,11 +720,11 @@ class NonOwnedProtoRunStepResponse : public MutableRunStepResponseWrapper { // MutableRunStepResponseWrapper methods. size_t num_tensors() const override; - const string& tensor_name(size_t i) const override; + const std::string& tensor_name(size_t i) const override; absl::Status TensorValue(size_t i, Tensor* out_tensor) const override; absl::Status AddTensorFromRunGraphResponse( - const string& name, MutableRunGraphResponseWrapper* run_graph_response, - size_t i) override; + const std::string& name, + MutableRunGraphResponseWrapper* run_graph_response, size_t i) override; const RunMetadata& metadata() const override; RunMetadata* mutable_metadata() override; absl::Status status() const override; diff --git a/tensorflow/core/distributed_runtime/message_wrappers_test.cc b/tensorflow/core/distributed_runtime/message_wrappers_test.cc index f64d476d6dde3e..9f0af827eee591 100644 --- a/tensorflow/core/distributed_runtime/message_wrappers_test.cc +++ b/tensorflow/core/distributed_runtime/message_wrappers_test.cc @@ -27,13 +27,13 @@ namespace { Tensor TensorA() { Tensor a_tensor(DT_INT32, TensorShape({2, 2})); - test::FillValues(&a_tensor, {3, 2, -1, 0}); + test::FillValues(&a_tensor, {3, 2, -1, 0}); return a_tensor; } Tensor TensorB() { Tensor b_tensor(DT_INT32, TensorShape({1, 2})); - test::FillValues(&b_tensor, {1, 2}); + test::FillValues(&b_tensor, {1, 2}); return b_tensor; } @@ -57,9 +57,9 @@ void CheckRunStepRequest(const RunStepRequestWrapper& request) { EXPECT_EQ("feed_b:0", request.feed_name(1)); Tensor val; TF_EXPECT_OK(request.FeedValue(0, &val)); - test::ExpectTensorEqual(TensorA(), val); + test::ExpectTensorEqual(TensorA(), val); TF_EXPECT_OK(request.FeedValue(1, &val)); - test::ExpectTensorEqual(TensorB(), val); + test::ExpectTensorEqual(TensorB(), val); EXPECT_EQ(2, request.num_fetches()); EXPECT_EQ("fetch_x:0", request.fetch_name(0)); @@ -92,9 +92,9 @@ void CheckRunGraphRequest(const RunGraphRequestWrapper& request) { EXPECT_EQ(2, request.num_sends()); Tensor val; TF_EXPECT_OK(request.SendValue(0, &val)); - test::ExpectTensorEqual(TensorA(), val); + test::ExpectTensorEqual(TensorA(), val); TF_EXPECT_OK(request.SendValue(1, &val)); - test::ExpectTensorEqual(TensorB(), val); + test::ExpectTensorEqual(TensorB(), val); EXPECT_TRUE(request.is_partial()); EXPECT_FALSE(request.is_last_partial_run()); } @@ -117,9 +117,9 @@ void CheckRunGraphResponse(MutableRunGraphResponseWrapper* response) { EXPECT_EQ("recv_3", response->recv_key(1)); Tensor val; TF_EXPECT_OK(response->RecvValue(0, &val)); - test::ExpectTensorEqual(TensorA(), val); + test::ExpectTensorEqual(TensorA(), val); TF_EXPECT_OK(response->RecvValue(1, &val)); - test::ExpectTensorEqual(TensorB(), val); + test::ExpectTensorEqual(TensorB(), val); ASSERT_EQ(1, response->mutable_step_stats()->dev_stats_size()); EXPECT_EQ("/cpu:0", response->mutable_step_stats()->dev_stats(0).device()); ASSERT_EQ(1, response->mutable_cost_graph()->node_size()); @@ -152,9 +152,9 @@ void CheckRunStepResponse(const MutableRunStepResponseWrapper& response) { EXPECT_EQ("fetch_y:0", response.tensor_name(1)); Tensor val; TF_EXPECT_OK(response.TensorValue(0, &val)); - test::ExpectTensorEqual(TensorA(), val); + test::ExpectTensorEqual(TensorA(), val); TF_EXPECT_OK(response.TensorValue(1, &val)); - test::ExpectTensorEqual(TensorB(), val); + test::ExpectTensorEqual(TensorB(), val); ASSERT_EQ(1, response.metadata().step_stats().dev_stats_size()); EXPECT_EQ("/cpu:0", response.metadata().step_stats().dev_stats(0).device()); ASSERT_EQ(1, response.metadata().partition_graphs_size()); diff --git a/tensorflow/core/distributed_runtime/recent_request_ids.cc b/tensorflow/core/distributed_runtime/recent_request_ids.cc index f75390b26bd338..f98da9aa19629e 100644 --- a/tensorflow/core/distributed_runtime/recent_request_ids.cc +++ b/tensorflow/core/distributed_runtime/recent_request_ids.cc @@ -61,7 +61,7 @@ bool RecentRequestIds::Insert(int64_t request_id) { } absl::Status RecentRequestIds::TrackUnique(int64_t request_id, - const string& method_name, + const std::string& method_name, const protobuf::Message& request) { if (Insert(request_id)) { return absl::OkStatus(); diff --git a/tensorflow/core/distributed_runtime/recent_request_ids.h b/tensorflow/core/distributed_runtime/recent_request_ids.h index 2eb35ac7266c6c..0299d3d9289118 100644 --- a/tensorflow/core/distributed_runtime/recent_request_ids.h +++ b/tensorflow/core/distributed_runtime/recent_request_ids.h @@ -60,11 +60,11 @@ class RecentRequestIds { // num_tracked_request_ids insertions. For backwards compatibility, this // always returns OK for request_id 0. The method_name and the request's // ShortDebugString are added to returned errors. - absl::Status TrackUnique(int64_t request_id, const string& method_name, + absl::Status TrackUnique(int64_t request_id, const std::string& method_name, const protobuf::Message& request); // Overloaded version of the above function for wrapped protos. template - absl::Status TrackUnique(int64_t request_id, const string& method_name, + absl::Status TrackUnique(int64_t request_id, const std::string& method_name, const RequestWrapper* wrapper); private: @@ -88,7 +88,7 @@ class RecentRequestIds { template absl::Status RecentRequestIds::TrackUnique(int64_t request_id, - const string& method_name, + const std::string& method_name, const RequestWrapper* wrapper) { if (Insert(request_id)) { return absl::OkStatus(); diff --git a/tensorflow/core/distributed_runtime/remote_device.cc b/tensorflow/core/distributed_runtime/remote_device.cc index ad8ac2080ab833..5bcf27d54abd1c 100644 --- a/tensorflow/core/distributed_runtime/remote_device.cc +++ b/tensorflow/core/distributed_runtime/remote_device.cc @@ -53,7 +53,7 @@ class RemoteDevice : public Device { bool IsRemoteCallAllowed() const override { return true; } private: - const string local_dev_name_; + const std::string local_dev_name_; RemoteDevice(const RemoteDevice&) = delete; void operator=(const RemoteDevice&) = delete; @@ -78,7 +78,8 @@ void AsRemoteDevices( } void NewRemoteDevices(Env* env, WorkerCacheInterface* worker_cache, - const string& worker_name, NewRemoteDevicesDone done) { + const std::string& worker_name, + NewRemoteDevicesDone done) { WorkerInterface* wi = worker_cache->GetOrCreateWorker(worker_name); if (wi == nullptr) { std::vector empty; diff --git a/tensorflow/core/distributed_runtime/remote_device.h b/tensorflow/core/distributed_runtime/remote_device.h index 591531f94d567f..806123ed71b205 100644 --- a/tensorflow/core/distributed_runtime/remote_device.h +++ b/tensorflow/core/distributed_runtime/remote_device.h @@ -62,7 +62,8 @@ void AsRemoteDevices( typedef std::function*)> NewRemoteDevicesDone; void NewRemoteDevices(Env* env, WorkerCacheInterface* worker_cache, - const string& worker_name, NewRemoteDevicesDone done); + const std::string& worker_name, + NewRemoteDevicesDone done); // Create Remote Device based on the given attributes. std::unique_ptr NewRemoteDevice(Env* env, diff --git a/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client.cc b/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client.cc index 946b245a3e8fce..154eb09ee9d5ff 100644 --- a/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client.cc +++ b/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client.cc @@ -127,7 +127,7 @@ class GrpcEagerClientThread : public core::RefCounted { class GrpcEagerClient : public EagerClient { public: GrpcEagerClient(const tensorflow::SharedGrpcChannelPtr& channel, - GrpcEagerClientThread* thread, const string& target) + GrpcEagerClientThread* thread, const std::string& target) : stub_(channel), thread_(thread), target_(target) { // Hold a reference to make sure the corresponding EagerClientThread // outlives the client. @@ -266,13 +266,13 @@ class GrpcEagerClient : public EagerClient { private: ::grpc::GenericStub stub_; const GrpcEagerClientThread* thread_; - const string target_; + const std::string target_; ::grpc::CompletionQueue* cq_; mutable mutex mu_; - std::unordered_map> + std::unordered_map> enqueue_dispatchers_ TF_GUARDED_BY(mu_); StatusCallback callback_wrapper(StatusCallback done) { @@ -313,7 +313,7 @@ class GrpcEagerClientCache : public EagerClientCache { ~GrpcEagerClientCache() override { threads_.clear(); } - absl::Status GetClient(const string& target, + absl::Status GetClient(const std::string& target, core::RefCountPtr* client) override { mutex_lock l(clients_mu_); auto it = clients_.find(target); @@ -342,7 +342,7 @@ class GrpcEagerClientCache : public EagerClientCache { TF_GUARDED_BY(assignment_mu_); size_t next_round_robin_assignment_ TF_GUARDED_BY(assignment_mu_); - size_t AssignClientToThread(const string& target) { + size_t AssignClientToThread(const std::string& target) { // Round-robin target assignment, but keeps the same target on the same // polling thread always, as this is important for gRPC performance mutex_lock lock(assignment_mu_); @@ -358,7 +358,7 @@ class GrpcEagerClientCache : public EagerClientCache { std::shared_ptr cache_; mutable mutex clients_mu_; - std::unordered_map> clients_ + std::unordered_map> clients_ TF_GUARDED_BY(clients_mu_); std::vector> threads_; }; diff --git a/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client_test.cc b/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client_test.cc index 2d64e07794d41a..3a11ef95274fbc 100644 --- a/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client_test.cc +++ b/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client_test.cc @@ -48,7 +48,7 @@ TEST(GrpcEagerClientCache, TestGetClientThreadSafety) { for (int i = 0; i < num_calls; i++) { Env::Default()->SchedClosure([&client_cache, i, &counter]() { - string target = absl::StrCat("/job:worker/replica:0/task:", i); + std::string target = absl::StrCat("/job:worker/replica:0/task:", i); core::RefCountPtr eager_client; absl::Status s = client_cache->GetClient(target, &eager_client); // With 6 tasks added to the job, querying client for 0--5 should be OK, diff --git a/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_service_impl.cc b/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_service_impl.cc index b9bea2ea437a7a..33d567c56a0a63 100644 --- a/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_service_impl.cc +++ b/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_service_impl.cc @@ -45,7 +45,7 @@ GrpcEagerServiceImpl::GrpcEagerServiceImpl( } absl::Status GrpcEagerServiceImpl::CreateMasterContext( - const tensorflow::uint64 context_id, EagerContext* context) { + const uint64_t context_id, EagerContext* context) { return local_impl_.CreateMasterContext(context_id, context); } diff --git a/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_service_impl.h b/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_service_impl.h index 083ad55b3f4841..62ee6e9f13a9f0 100644 --- a/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_service_impl.h +++ b/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_service_impl.h @@ -50,8 +50,7 @@ class GrpcEagerServiceImpl : public tsl::AsyncServiceInterface { virtual ~GrpcEagerServiceImpl() {} // Create a master context in eager service. - absl::Status CreateMasterContext(tensorflow::uint64 context_id, - EagerContext* context); + absl::Status CreateMasterContext(uint64_t context_id, EagerContext* context); void HandleRPCsLoop() override; void Shutdown() override; @@ -136,7 +135,7 @@ class GrpcEagerServiceImpl : public tsl::AsyncServiceInterface { // streaming connection. absl::Status status = local_impl_.Enqueue( /*call_opts=*/nullptr, &call->request(), call->mutable_response(), - reinterpret_cast(static_cast(call))); + reinterpret_cast(static_cast(call))); if (status.ok()) { VLOG(1) << "local_impl_.Enqueue completed successfully"; diff --git a/tensorflow/core/distributed_runtime/rpc_collective_executor_mgr.cc b/tensorflow/core/distributed_runtime/rpc_collective_executor_mgr.cc index 1af67bdb51b3ca..b80045c28f08cf 100644 --- a/tensorflow/core/distributed_runtime/rpc_collective_executor_mgr.cc +++ b/tensorflow/core/distributed_runtime/rpc_collective_executor_mgr.cc @@ -30,7 +30,7 @@ RpcCollectiveExecutorMgr::RpcCollectiveExecutorMgr( std::unique_ptr dev_resolver, std::unique_ptr param_resolver, std::unique_ptr nccl_communicator, - WorkerCacheInterface* worker_cache, const string& task_name) + WorkerCacheInterface* worker_cache, const std::string& task_name) : CollectiveExecutorMgr(config, dev_mgr, std::move(dev_resolver), std::move(param_resolver), std::move(nccl_communicator)), @@ -172,7 +172,8 @@ void RpcCollectiveExecutorMgr::RetireStepId(int64_t graph_key, std::unique_ptr CreateProdRpcCollectiveExecutorMgr( const ConfigProto& config, const DeviceMgr* device_mgr, std::unique_ptr nccl_communicator, - WorkerCacheInterface* worker_cache, const string& default_worker_name) { + WorkerCacheInterface* worker_cache, + const std::string& default_worker_name) { auto dev_resolver = std::make_unique(device_mgr); auto param_resolver = std::make_unique( config, device_mgr, dev_resolver.get(), nccl_communicator.get(), diff --git a/tensorflow/core/distributed_runtime/rpc_collective_executor_mgr.h b/tensorflow/core/distributed_runtime/rpc_collective_executor_mgr.h index 6836204cc1a289..aadbaf33796437 100644 --- a/tensorflow/core/distributed_runtime/rpc_collective_executor_mgr.h +++ b/tensorflow/core/distributed_runtime/rpc_collective_executor_mgr.h @@ -39,7 +39,7 @@ class RpcCollectiveExecutorMgr : public CollectiveExecutorMgr { std::unique_ptr dev_resolver, std::unique_ptr param_resolver, std::unique_ptr nccl_communicator, - WorkerCacheInterface* worker_cache, const string& task_name); + WorkerCacheInterface* worker_cache, const std::string& task_name); virtual ~RpcCollectiveExecutorMgr(); @@ -60,8 +60,8 @@ class RpcCollectiveExecutorMgr : public CollectiveExecutorMgr { virtual CollectiveExecutor* Create(int64_t step_id) override; WorkerCacheInterface* const worker_cache_; // Not owned. - const string task_name_; - string group_leader_; + const std::string task_name_; + std::string group_leader_; friend class RpcCollectiveExecutorMgrTest; private: @@ -88,7 +88,7 @@ class RpcCollectiveExecutorMgr : public CollectiveExecutorMgr { std::unique_ptr CreateProdRpcCollectiveExecutorMgr( const ConfigProto& config, const DeviceMgr* device_mgr, std::unique_ptr nccl_communicator, - WorkerCacheInterface* worker_cache, const string& default_worker_name); + WorkerCacheInterface* worker_cache, const std::string& default_worker_name); } // namespace tensorflow #endif // TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_RPC_COLLECTIVE_EXECUTOR_MGR_H_ diff --git a/tensorflow/core/distributed_runtime/rpc_collective_executor_mgr_test.cc b/tensorflow/core/distributed_runtime/rpc_collective_executor_mgr_test.cc index f830fd96110456..55eebf621e5882 100644 --- a/tensorflow/core/distributed_runtime/rpc_collective_executor_mgr_test.cc +++ b/tensorflow/core/distributed_runtime/rpc_collective_executor_mgr_test.cc @@ -39,7 +39,7 @@ namespace tensorflow { class RpcCollectiveExecutorMgrTest : public ::testing::Test { protected: RpcCollectiveExecutorMgrTest() { - string task_name = "/job:localhost/replica:0/task:0"; + std::string task_name = "/job:localhost/replica:0/task:0"; SessionOptions options; options.config.mutable_experimental()->set_collective_group_leader( task_name); diff --git a/tensorflow/core/distributed_runtime/rpcbench_test.cc b/tensorflow/core/distributed_runtime/rpcbench_test.cc index 666800294ac003..70816cc8a7b556 100644 --- a/tensorflow/core/distributed_runtime/rpcbench_test.cc +++ b/tensorflow/core/distributed_runtime/rpcbench_test.cc @@ -42,7 +42,7 @@ static const int kWorkers = 60; static thread::ThreadPool* worker_threads; void MakeGRPCCluster(const SessionOptions& options, int n, - std::vector* workers, + std::vector* workers, std::vector* devices) { CHECK_GE(n, 1); @@ -100,7 +100,7 @@ void MakeGRPCCluster(const SessionOptions& options, int n, struct Cluster { SessionOptions options; - std::vector workers; + std::vector workers; std::vector devices; // One per process Cluster() { @@ -153,14 +153,14 @@ GraphDef CreateGraphDef(int num_stages, int width, int tensor_size, return def; } -string DebugString(const Tensor& x, const Tensor& y, int tensor_size) { +std::string DebugString(const Tensor& x, const Tensor& y, int tensor_size) { CHECK_EQ(x.NumElements(), tensor_size); CHECK_EQ(y.NumElements(), tensor_size); auto x_flat = x.flat(); auto y_flat = y.flat(); // Just print the first couple of elements of each tensor CHECK_GE(tensor_size, 2); - return strings::Printf("x = [%8.6f %8.6f] y = [%8.6f %8.6f]", x_flat(0), + return absl::StrFormat("x = [%8.6f %8.6f] y = [%8.6f %8.6f]", x_flat(0), x_flat(1), y_flat(0), y_flat(1)); } diff --git a/tensorflow/core/distributed_runtime/scheduler.h b/tensorflow/core/distributed_runtime/scheduler.h index 4385db786ff38a..d277bdab74e835 100644 --- a/tensorflow/core/distributed_runtime/scheduler.h +++ b/tensorflow/core/distributed_runtime/scheduler.h @@ -86,7 +86,7 @@ class GreedyScheduler { const CostModel* cost_model_; const Graph* graph_; std::vector* priority_; - std::unordered_map device_states_; + std::unordered_map device_states_; GreedyScheduler(const GreedyScheduler&) = delete; void operator=(const GreedyScheduler&) = delete; diff --git a/tensorflow/core/distributed_runtime/server_lib.cc b/tensorflow/core/distributed_runtime/server_lib.cc index 2f7cc4184662f4..527dd49507c607 100644 --- a/tensorflow/core/distributed_runtime/server_lib.cc +++ b/tensorflow/core/distributed_runtime/server_lib.cc @@ -28,7 +28,7 @@ mutex* get_server_factory_lock() { return &server_factory_lock; } -typedef std::unordered_map ServerFactories; +typedef std::unordered_map ServerFactories; ServerFactories* server_factories() { static ServerFactories* factories = new ServerFactories; return factories; @@ -36,7 +36,7 @@ ServerFactories* server_factories() { } // namespace /* static */ -void ServerFactory::Register(const string& server_type, +void ServerFactory::Register(const std::string& server_type, ServerFactory* factory) { mutex_lock l(*get_server_factory_lock()); if (!server_factories()->insert({server_type, factory}).second) { @@ -56,7 +56,7 @@ absl::Status ServerFactory::GetFactory(const ServerDef& server_def, } } - std::vector server_names; + std::vector server_names; for (const auto& server_factory : *server_factories()) { server_names.push_back(server_factory.first); } diff --git a/tensorflow/core/distributed_runtime/server_lib.h b/tensorflow/core/distributed_runtime/server_lib.h index cc92d0bae12b17..c49d47970b4ca0 100644 --- a/tensorflow/core/distributed_runtime/server_lib.h +++ b/tensorflow/core/distributed_runtime/server_lib.h @@ -64,7 +64,7 @@ class ServerInterface { // Returns a target string that can be used to connect to this server using // `tensorflow::NewSession()`. - virtual const string target() const = 0; + virtual const std::string target() const = 0; virtual WorkerEnv* worker_env() = 0; virtual MasterEnv* master_env() = 0; @@ -77,7 +77,7 @@ class ServerInterface { // Add master eager context to local eager service in order to handle enqueue // requests from remote workers. virtual absl::Status AddMasterEagerContextToEagerService( - const tensorflow::uint64 context_id, EagerContext* context) = 0; + const uint64_t context_id, EagerContext* context) = 0; // Set coordination service agent instance to coordination service RPC handler virtual absl::Status SetCoordinationServiceAgentInstance( tsl::CoordinationServiceAgent* agent) = 0; @@ -113,7 +113,7 @@ class ServerFactory { // be registered by calling this method. // // The `server_type` must be unique to the server factory. - static void Register(const string& server_type, ServerFactory* factory); + static void Register(const std::string& server_type, ServerFactory* factory); // Looks up a factory that can create a server based on the given // `server_def`, and stores it in `*out_factory`. Returns OK on diff --git a/tensorflow/core/distributed_runtime/tensor_coding.cc b/tensorflow/core/distributed_runtime/tensor_coding.cc index 1990f0c17c66a4..43524d19a35788 100644 --- a/tensorflow/core/distributed_runtime/tensor_coding.cc +++ b/tensorflow/core/distributed_runtime/tensor_coding.cc @@ -114,14 +114,14 @@ enum WireType { WIRETYPE_VARINT = 0, WIRETYPE_LENGTH_DELIMITED = 2, }; -inline int GetTagFieldNumber(uint32 tag) { return tag >> 3; } -inline WireType GetTagWireType(uint32 tag) { +inline int GetTagFieldNumber(uint32_t tag) { return tag >> 3; } +inline WireType GetTagWireType(uint32_t tag) { return static_cast(tag & 0x7); } bool ReadVarintSizeAsInt(protobuf::io::CodedInputStream* input, int* result) { protobuf_uint64 v; - if (input->ReadVarint64(&v) && v <= static_cast(INT_MAX)) { + if (input->ReadVarint64(&v) && v <= static_cast(INT_MAX)) { *result = static_cast(v); return true; } else { @@ -162,7 +162,7 @@ bool TensorResponse::ParseTensorSubmessage( } switch (tag) { case TensorProto::kDtypeFieldNumber: { - uint32 v; + uint32_t v; if ((wt != WIRETYPE_VARINT) || !input->ReadVarint32(&v)) return false; if (seen_tensor_content) return false; tensor_meta->set_dtype(static_cast(static_cast(v))); @@ -177,10 +177,10 @@ bool TensorResponse::ParseTensorSubmessage( break; } case TensorProto::kVersionNumberFieldNumber: { - uint32 v; + uint32_t v; if ((wt != WIRETYPE_VARINT) || !input->ReadVarint32(&v)) return false; if (seen_tensor_content) return false; - tensor_meta->set_version_number(static_cast(v)); + tensor_meta->set_version_number(static_cast(v)); break; } case TensorProto::kTensorContentFieldNumber: { @@ -242,7 +242,7 @@ bool TensorResponse::ParseFast(Source* source) { break; } case RecvTensorResponse::kIsDeadFieldNumber: { - uint32 v; + uint32_t v; if ((wt != WIRETYPE_VARINT) || !input.ReadVarint32(&v)) return false; meta_.set_is_dead(v != 0); break; @@ -260,7 +260,7 @@ bool TensorResponse::ParseFast(Source* source) { break; } case RecvTensorResponse::kRequireAckFieldNumber: { - uint32 v; + uint32_t v; if ((wt != WIRETYPE_VARINT) || !input.ReadVarint32(&v)) return false; meta_.set_require_ack(v != 0); break; diff --git a/tensorflow/core/distributed_runtime/tensor_coding_test.cc b/tensorflow/core/distributed_runtime/tensor_coding_test.cc index 9ef513f70392e4..66ba2bdce86b3a 100644 --- a/tensorflow/core/distributed_runtime/tensor_coding_test.cc +++ b/tensorflow/core/distributed_runtime/tensor_coding_test.cc @@ -48,7 +48,7 @@ class DummyDevice : public DeviceBase { class StringSource : public TensorResponse::Source { public: - explicit StringSource(const string* s, int block_size) + explicit StringSource(const std::string* s, int block_size) : s_(s), stream_(nullptr), block_size_(block_size) {} ~StringSource() override { DeleteStream(); } @@ -66,7 +66,7 @@ class StringSource : public TensorResponse::Source { } private: - const string* s_; + const std::string* s_; protobuf::io::ArrayInputStream* stream_; char space_[sizeof(protobuf::io::ArrayInputStream)]; int block_size_; @@ -83,7 +83,7 @@ class TensorResponseTest : public ::testing::Test { } else { src.AsProtoField(proto.mutable_tensor()); } - string encoded; + std::string encoded; proto.AppendToString(&encoded); StringSource source(&encoded, 1024); @@ -136,11 +136,11 @@ class TensorResponseTest : public ::testing::Test { TEST_F(TensorResponseTest, Simple) { DoTest(DT_FLOAT); DoTest(DT_DOUBLE); - DoTest(DT_INT32); - DoTest(DT_UINT16); - DoTest(DT_UINT8); - DoTest(DT_INT16); - DoTest(DT_INT8); + DoTest(DT_INT32); + DoTest(DT_UINT16); + DoTest(DT_UINT8); + DoTest(DT_INT16); + DoTest(DT_INT8); DoTest(DT_COMPLEX64); DoTest(DT_COMPLEX128); DoTest(DT_INT64); @@ -156,19 +156,19 @@ TEST_F(TensorResponseTest, Simple) { TEST_F(TensorResponseTest, StringTensor) { DoTestForStrings(DT_STRING); } -string MakeFloatTensorTestCase(int num_elems) { - std::vector v(num_elems); +std::string MakeFloatTensorTestCase(int num_elems) { + std::vector v(num_elems); for (int i = 0; i < num_elems; i++) { v[i] = i % 10; } Tensor src(DT_INT8, TensorShape({1, static_cast(v.size())})); - test::FillValues(&src, v); + test::FillValues(&src, v); RecvTensorResponse proto; proto.set_is_dead(false); proto.set_send_start_micros(123456); src.AsProtoTensorContent(proto.mutable_tensor()); - string encoded; + std::string encoded; proto.AppendToString(&encoded); return encoded; } @@ -176,7 +176,7 @@ string MakeFloatTensorTestCase(int num_elems) { static void BM_TensorResponse(::testing::benchmark::State& state) { const int arg = state.range(0); - string encoded = MakeFloatTensorTestCase(arg); + std::string encoded = MakeFloatTensorTestCase(arg); DummyDevice cpu_device(Env::Default()); size_t bytes = 0; for (auto i : state) { diff --git a/tensorflow/core/distributed_runtime/test_utils.h b/tensorflow/core/distributed_runtime/test_utils.h index e7ad1041dd73ff..b7316299e051c3 100644 --- a/tensorflow/core/distributed_runtime/test_utils.h +++ b/tensorflow/core/distributed_runtime/test_utils.h @@ -124,23 +124,24 @@ class TestWorkerCache : public WorkerCacheInterface { public: virtual ~TestWorkerCache() {} - void AddWorker(const string& target, WorkerInterface* wi) { + void AddWorker(const std::string& target, WorkerInterface* wi) { workers_[target] = wi; } - void AddDevice(const string& device_name, const DeviceLocality& dev_loc) { + void AddDevice(const std::string& device_name, + const DeviceLocality& dev_loc) { localities_[device_name] = dev_loc; } - void ListWorkers(std::vector* workers) const override { + void ListWorkers(std::vector* workers) const override { workers->clear(); for (auto it : workers_) { workers->push_back(it.first); } } - void ListWorkersInJob(const string& job_name, - std::vector* workers) const override { + void ListWorkersInJob(const std::string& job_name, + std::vector* workers) const override { workers->clear(); for (auto it : workers_) { DeviceNameUtils::ParsedName device_name; @@ -152,7 +153,7 @@ class TestWorkerCache : public WorkerCacheInterface { } } - WorkerInterface* GetOrCreateWorker(const string& target) override { + WorkerInterface* GetOrCreateWorker(const std::string& target) override { auto it = workers_.find(target); if (it != workers_.end()) { return it->second; @@ -160,7 +161,8 @@ class TestWorkerCache : public WorkerCacheInterface { return nullptr; } - void ReleaseWorker(const string& target, WorkerInterface* worker) override {} + void ReleaseWorker(const std::string& target, + WorkerInterface* worker) override {} absl::Status GetEagerClientCache( std::unique_ptr* eager_client_cache) override { @@ -172,7 +174,7 @@ class TestWorkerCache : public WorkerCacheInterface { return errors::Unimplemented("Unimplemented."); } - bool GetDeviceLocalityNonBlocking(const string& device, + bool GetDeviceLocalityNonBlocking(const std::string& device, DeviceLocality* locality) override { auto it = localities_.find(device); if (it != localities_.end()) { @@ -182,7 +184,8 @@ class TestWorkerCache : public WorkerCacheInterface { return false; } - void GetDeviceLocalityAsync(const string& device, DeviceLocality* locality, + void GetDeviceLocalityAsync(const std::string& device, + DeviceLocality* locality, StatusCallback done) override { auto it = localities_.find(device); if (it != localities_.end()) { @@ -194,8 +197,8 @@ class TestWorkerCache : public WorkerCacheInterface { } protected: - std::unordered_map workers_; - std::unordered_map localities_; + std::unordered_map workers_; + std::unordered_map localities_; }; } // namespace tensorflow diff --git a/tensorflow/core/distributed_runtime/worker.cc b/tensorflow/core/distributed_runtime/worker.cc index 9fb0a76ad866f9..04b0ee20d2cc8f 100644 --- a/tensorflow/core/distributed_runtime/worker.cc +++ b/tensorflow/core/distributed_runtime/worker.cc @@ -251,7 +251,7 @@ void Worker::DoRunGraph(CallOptions* opts, RunGraphRequestWrapper* request, if (s.ok()) { for (const auto& p : *out) { - const string& key = p.first; + const std::string& key = p.first; const Tensor& val = p.second; response->AddRecv(key, val); } @@ -271,7 +271,7 @@ void Worker::DoPartialRunGraph(CallOptions* opts, MutableRunGraphResponseWrapper* response, StatusCallback done) { const int64_t step_id = request->step_id(); - const string& graph_handle = request->graph_handle(); + const std::string& graph_handle = request->graph_handle(); TRACEPRINTF("PartialRunGraph: %lld", step_id); absl::Status s = recent_request_ids_.TrackUnique( request->request_id(), "PartialRunGraph (Worker)", request); @@ -345,7 +345,7 @@ void Worker::DoPartialRunGraph(CallOptions* opts, if (s.ok()) { // Construct and return the resp. for (const auto& p : *out) { - const string& key = p.first; + const std::string& key = p.first; const Tensor& val = p.second; response->AddRecv(key, val); } @@ -378,7 +378,7 @@ void Worker::CleanupGraphAsync(const CleanupGraphRequest* request, void Worker::CleanupAllAsync(const CleanupAllRequest* request, CleanupAllResponse* response, StatusCallback done) { - std::vector containers; + std::vector containers; for (const auto& c : request->container()) containers.push_back(c); env_->device_mgr->ClearContainers(containers); done(absl::OkStatus()); @@ -474,7 +474,7 @@ void Worker::GetStepSequenceAsync(const GetStepSequenceRequest* request, absl::Status Worker::PrepareRecvTensor(const Rendezvous::ParsedKey& parsed, Device** src_dev) { // Figures out which device the tensor is hosted on. - string local_name = DeviceNameUtils::LocalName(parsed.src_device); + std::string local_name = DeviceNameUtils::LocalName(parsed.src_device); TF_RETURN_IF_ERROR(env_->device_mgr->LookupDevice(local_name, src_dev)); // Does the device have the right incarnation number we expect? diff --git a/tensorflow/core/distributed_runtime/worker_cache.h b/tensorflow/core/distributed_runtime/worker_cache.h index 1ac4de35d9788f..0612a8321d3aac 100644 --- a/tensorflow/core/distributed_runtime/worker_cache.h +++ b/tensorflow/core/distributed_runtime/worker_cache.h @@ -37,22 +37,23 @@ class WorkerCacheInterface { // Updates *workers with strings naming the remote worker tasks to // which open channels have been established. - virtual void ListWorkers(std::vector* workers) const = 0; - virtual void ListWorkersInJob(const string& job_name, - std::vector* workers) const = 0; + virtual void ListWorkers(std::vector* workers) const = 0; + virtual void ListWorkersInJob(const std::string& job_name, + std::vector* workers) const = 0; // If "target" names a remote task for which an RPC channel exists // or can be constructed, returns a pointer to a WorkerInterface object // wrapping that channel. The returned value must be destroyed by // calling `this->ReleaseWorker(target, ret)` - virtual WorkerInterface* GetOrCreateWorker(const string& target) = 0; + virtual WorkerInterface* GetOrCreateWorker(const std::string& target) = 0; // Release a worker previously returned by this->GetOrCreateWorker(target). // // TODO(jeff,sanjay): Consider moving target into WorkerInterface. // TODO(jeff,sanjay): Unify all worker-cache impls and factor out a // per-rpc-subsystem WorkerInterface creator. - virtual void ReleaseWorker(const string& target, WorkerInterface* worker) { + virtual void ReleaseWorker(const std::string& target, + WorkerInterface* worker) { // Subclasses may override to reuse worker objects. delete worker; } @@ -61,13 +62,13 @@ class WorkerCacheInterface { // within its local environment. Returns true if *locality // was set, using only locally cached data. Returns false // if status data for that device was not available. Never blocks. - virtual bool GetDeviceLocalityNonBlocking(const string& device, + virtual bool GetDeviceLocalityNonBlocking(const std::string& device, DeviceLocality* locality) = 0; // Set *locality with the DeviceLocality of the specified remote device // within its local environment. Callback gets Status::OK if *locality // was set. - virtual void GetDeviceLocalityAsync(const string& device, + virtual void GetDeviceLocalityAsync(const std::string& device, DeviceLocality* locality, StatusCallback done) = 0; diff --git a/tensorflow/core/distributed_runtime/worker_cache_logger.cc b/tensorflow/core/distributed_runtime/worker_cache_logger.cc index 2936c3b2667e18..5a1d3d02d4eceb 100644 --- a/tensorflow/core/distributed_runtime/worker_cache_logger.cc +++ b/tensorflow/core/distributed_runtime/worker_cache_logger.cc @@ -68,7 +68,7 @@ bool WorkerCacheLogger::RetrieveLogs(int64_t step_id, StepStats* ss) { return false; } -void WorkerCacheLogger::Save(const string& device, int64_t step_id, +void WorkerCacheLogger::Save(const std::string& device, int64_t step_id, NodeExecStats* ns) { mutex_lock l(mu_); StepLog* sl = &log_map_[step_id]; @@ -84,33 +84,31 @@ void WorkerCacheLogger::Save(const string& device, int64_t step_id, void WorkerCacheLogger::RecordRecvTensor(int64_t step_id, int64_t start_usecs, int64_t end_usecs, - const string& tensor_name, - const string& src_device, - const string& dst_device, + const std::string& tensor_name, + const std::string& src_device, + const std::string& dst_device, int64_t bytes) { RecordDataTransfer(step_id, start_usecs, end_usecs, tensor_name, src_device, dst_device, bytes, "", "RecvTensor"); } -void WorkerCacheLogger::RecordDataTransfer(int64_t step_id, int64_t start_usecs, - int64_t end_usecs, - const string& tensor_name, - const string& src_device, - const string& dst_device, - int64_t bytes, const string& details, - const string& transfer_method_name) { +void WorkerCacheLogger::RecordDataTransfer( + int64_t step_id, int64_t start_usecs, int64_t end_usecs, + const std::string& tensor_name, const std::string& src_device, + const std::string& dst_device, int64_t bytes, const std::string& details, + const std::string& transfer_method_name) { NodeExecStats* ns = new NodeExecStats; ns->set_node_name(transfer_method_name); int64_t elapsed_usecs = end_usecs - start_usecs; if (details.empty()) { auto byte_string = absl::StrCat("[", bytes, "B] "); if (bytes >= 0.1 * 1048576.0) { - byte_string = strings::Printf("[%.1fMB] ", bytes / 1048576.0); + byte_string = absl::StrFormat("[%.1fMB] ", bytes / 1048576.0); } float mbs_rate = (8.0 * static_cast(bytes)) / elapsed_usecs; auto rate_string = (mbs_rate >= 1000.0) - ? strings::Printf("[%.1fGb/s] ", mbs_rate / 1000.0) - : strings::Printf("[%fMb/s] ", mbs_rate); + ? absl::StrFormat("[%.1fGb/s] ", mbs_rate / 1000.0) + : absl::StrFormat("[%fMb/s] ", mbs_rate); auto label = strings::StrCat(byte_string, rate_string, tensor_name, " from ", src_device, " to ", dst_device); ns->set_timeline_label(label); diff --git a/tensorflow/core/distributed_runtime/worker_cache_logger.h b/tensorflow/core/distributed_runtime/worker_cache_logger.h index f5ef19bf6646f7..e7a1ebf0c40708 100644 --- a/tensorflow/core/distributed_runtime/worker_cache_logger.h +++ b/tensorflow/core/distributed_runtime/worker_cache_logger.h @@ -57,20 +57,22 @@ class WorkerCacheLogger { // Generates a NodeExecStats record with the given data, and saves for // later retrieval by RetrieveLogs(). void RecordRecvTensor(int64_t step_id, int64_t start_usecs, int64_t end_usecs, - const string& tensor_name, const string& src_device, - const string& dst_device, int64_t bytes); + const std::string& tensor_name, + const std::string& src_device, + const std::string& dst_device, int64_t bytes); // Generates a NodeExecStats record with the given data, and saves for // later retrieval by RetrieveLogs(). void RecordDataTransfer(int64_t step_id, int64_t start_usecs, - int64_t end_usecs, const string& tensor_name, - const string& src_device, const string& dst_device, - int64_t bytes, const string& details, - const string& transfer_method_name); + int64_t end_usecs, const std::string& tensor_name, + const std::string& src_device, + const std::string& dst_device, int64_t bytes, + const std::string& details, + const std::string& transfer_method_name); private: mutex count_mu_; - int32 want_logging_count_ TF_GUARDED_BY(count_mu_) = 0; + int32_t want_logging_count_ TF_GUARDED_BY(count_mu_) = 0; struct StepLog { StepStats step_stats; @@ -81,7 +83,7 @@ class WorkerCacheLogger { LogMap log_map_ TF_GUARDED_BY(mu_); // Records "ns" in log_map_ under the given device and step. - void Save(const string& device, int64_t step_id, NodeExecStats* ns); + void Save(const std::string& device, int64_t step_id, NodeExecStats* ns); void ClearLogsWithLock() TF_EXCLUSIVE_LOCKS_REQUIRED(mu_); }; diff --git a/tensorflow/core/distributed_runtime/worker_cache_partial.cc b/tensorflow/core/distributed_runtime/worker_cache_partial.cc index 58b130228e00dd..47fdcce387297d 100644 --- a/tensorflow/core/distributed_runtime/worker_cache_partial.cc +++ b/tensorflow/core/distributed_runtime/worker_cache_partial.cc @@ -27,7 +27,7 @@ limitations under the License. namespace tensorflow { bool WorkerCachePartial::GetDeviceLocalityNonBlocking( - const string& device_name, DeviceLocality* locality) { + const std::string& device_name, DeviceLocality* locality) { mutex_lock lock(mu_); // could use reader lock auto iter = device_status_cache_.find(device_name); if (iter != device_status_cache_.end()) { @@ -37,7 +37,7 @@ bool WorkerCachePartial::GetDeviceLocalityNonBlocking( return false; } -void WorkerCachePartial::GetDeviceLocalityAsync(const string& device_name, +void WorkerCachePartial::GetDeviceLocalityAsync(const std::string& device_name, DeviceLocality* locality, StatusCallback done) { if (!GetDeviceLocalityNonBlocking(device_name, locality)) { @@ -55,9 +55,9 @@ void WorkerCachePartial::GetDeviceLocalityAsync(const string& device_name, } absl::Status WorkerCachePartial::RefreshDeviceStatus( - const string& device_name) { - string task; - string device; + const std::string& device_name) { + std::string task; + std::string device; absl::Status s; if (!DeviceNameUtils::SplitDeviceName(device_name, &task, &device)) { s = errors::InvalidArgument("Bad device name to RefreshDeviceStatus: ", diff --git a/tensorflow/core/distributed_runtime/worker_cache_partial.h b/tensorflow/core/distributed_runtime/worker_cache_partial.h index b5a500b86dae00..08e272a3bb6db6 100644 --- a/tensorflow/core/distributed_runtime/worker_cache_partial.h +++ b/tensorflow/core/distributed_runtime/worker_cache_partial.h @@ -31,10 +31,11 @@ namespace tensorflow { // device status attributes. class WorkerCachePartial : public WorkerCacheInterface { public: - bool GetDeviceLocalityNonBlocking(const string& device, + bool GetDeviceLocalityNonBlocking(const std::string& device, DeviceLocality* locality) override; - void GetDeviceLocalityAsync(const string& device, DeviceLocality* locality, + void GetDeviceLocalityAsync(const std::string& device, + DeviceLocality* locality, StatusCallback) override; ~WorkerCachePartial() override {} @@ -47,9 +48,9 @@ class WorkerCachePartial : public WorkerCacheInterface { // Initiate a GetStatusAsync to the remote task named by "task", and // update the cache with all the DeviceAttributes reported. - absl::Status RefreshDeviceStatus(const string& device_name); + absl::Status RefreshDeviceStatus(const std::string& device_name); - typedef std::unordered_map StatusMap; + typedef std::unordered_map StatusMap; StatusMap device_status_cache_ TF_GUARDED_BY(mu_); }; diff --git a/tensorflow/core/distributed_runtime/worker_cache_wrapper.h b/tensorflow/core/distributed_runtime/worker_cache_wrapper.h index 7f709b4fb5c1bb..8917da3825773b 100644 --- a/tensorflow/core/distributed_runtime/worker_cache_wrapper.h +++ b/tensorflow/core/distributed_runtime/worker_cache_wrapper.h @@ -29,11 +29,11 @@ class WorkerCacheWrapper : public WorkerCacheInterface { // Updates *workers with strings naming the remote worker tasks to // which open channels have been established. - void ListWorkers(std::vector* workers) const override { + void ListWorkers(std::vector* workers) const override { return wrapped_->ListWorkers(workers); } - void ListWorkersInJob(const string& job_name, - std::vector* workers) const override { + void ListWorkersInJob(const std::string& job_name, + std::vector* workers) const override { return wrapped_->ListWorkersInJob(job_name, workers); } @@ -41,7 +41,7 @@ class WorkerCacheWrapper : public WorkerCacheInterface { // or can be constructed, returns a pointer to a WorkerInterface object // wrapping that channel. The returned value must be destroyed by // calling `this->ReleaseWorker(target, ret)` - WorkerInterface* GetOrCreateWorker(const string& target) override { + WorkerInterface* GetOrCreateWorker(const std::string& target) override { return wrapped_->GetOrCreateWorker(target); } @@ -50,7 +50,8 @@ class WorkerCacheWrapper : public WorkerCacheInterface { // TODO(jeff,sanjay): Consider moving target into WorkerInterface. // TODO(jeff,sanjay): Unify all worker-cache impls and factor out a // per-rpc-subsystem WorkerInterface creator. - void ReleaseWorker(const string& target, WorkerInterface* worker) override { + void ReleaseWorker(const std::string& target, + WorkerInterface* worker) override { return wrapped_->ReleaseWorker(target, worker); } @@ -69,7 +70,7 @@ class WorkerCacheWrapper : public WorkerCacheInterface { // within its local environment. Returns true if *locality // was set, using only locally cached data. Returns false // if status data for that device was not available. Never blocks. - bool GetDeviceLocalityNonBlocking(const string& device, + bool GetDeviceLocalityNonBlocking(const std::string& device, DeviceLocality* locality) override { return wrapped_->GetDeviceLocalityNonBlocking(device, locality); } @@ -77,7 +78,8 @@ class WorkerCacheWrapper : public WorkerCacheInterface { // Set *locality with the DeviceLocality of the specified remote device // within its local environment. Callback gets Status::OK if *locality // was set. - void GetDeviceLocalityAsync(const string& device, DeviceLocality* locality, + void GetDeviceLocalityAsync(const std::string& device, + DeviceLocality* locality, StatusCallback done) override { return wrapped_->GetDeviceLocalityAsync(device, locality, std::move(done)); } diff --git a/tensorflow/core/distributed_runtime/worker_session.cc b/tensorflow/core/distributed_runtime/worker_session.cc index d9286d0d148843..cb66a4f845f5b7 100644 --- a/tensorflow/core/distributed_runtime/worker_session.cc +++ b/tensorflow/core/distributed_runtime/worker_session.cc @@ -43,16 +43,16 @@ class WorkerFreeListCache : public WorkerCacheInterface { } } - void ListWorkers(std::vector* workers) const override { + void ListWorkers(std::vector* workers) const override { wrapped_->ListWorkers(workers); } - void ListWorkersInJob(const string& job_name, - std::vector* workers) const override { + void ListWorkersInJob(const std::string& job_name, + std::vector* workers) const override { wrapped_->ListWorkersInJob(job_name, workers); } - WorkerInterface* GetOrCreateWorker(const string& target) override { + WorkerInterface* GetOrCreateWorker(const std::string& target) override { { // Fast path if worker has been created. tf_shared_lock l(mu_); @@ -88,16 +88,18 @@ class WorkerFreeListCache : public WorkerCacheInterface { return wrapped_->GetCoordinationClientCache(coordination_client_cache); } - void ReleaseWorker(const string& target, WorkerInterface* worker) override { + void ReleaseWorker(const std::string& target, + WorkerInterface* worker) override { // TODO(jeff,sanjay): Should decrement ref-count when we implement eviction. } - bool GetDeviceLocalityNonBlocking(const string& device, + bool GetDeviceLocalityNonBlocking(const std::string& device, DeviceLocality* locality) override { return wrapped_->GetDeviceLocalityNonBlocking(device, locality); } - void GetDeviceLocalityAsync(const string& device, DeviceLocality* locality, + void GetDeviceLocalityAsync(const std::string& device, + DeviceLocality* locality, StatusCallback done) override { wrapped_->GetDeviceLocalityAsync(device, locality, done); } @@ -121,13 +123,13 @@ class WorkerFreeListCache : public WorkerCacheInterface { // TODO(jeff,sanjay): Eviction when the map becomes too big. mutex mu_; - std::unordered_map workers_ TF_GUARDED_BY(mu_); + std::unordered_map workers_ TF_GUARDED_BY(mu_); }; } // namespace WorkerSession::WorkerSession( - const string& session_name, const string& worker_name, + const std::string& session_name, const std::string& worker_name, std::unique_ptr worker_cache, std::unique_ptr device_mgr, std::unique_ptr graph_mgr, std::unique_ptr remote_device_mgr, @@ -165,7 +167,7 @@ absl::Status WorkerSession::UpdateWorkerCacheAndDevices( /* static */ std::shared_ptr WorkerSession::CreateWithBorrowedDeviceMgr( - const string& session_name, const string& worker_name, + const std::string& session_name, const std::string& worker_name, std::unique_ptr worker_cache, DeviceMgr* borrowed_device_mgr, std::unique_ptr graph_mgr, std::unique_ptr remote_device_mgr, @@ -177,7 +179,7 @@ std::shared_ptr WorkerSession::CreateWithBorrowedDeviceMgr( } WorkerSession::WorkerSession( - const string& session_name, const string& worker_name, + const std::string& session_name, const std::string& worker_name, std::unique_ptr worker_cache, DeviceMgr* borrowed_device_mgr, std::unique_ptr graph_mgr, std::unique_ptr remote_device_mgr, diff --git a/tensorflow/core/distributed_runtime/worker_session.h b/tensorflow/core/distributed_runtime/worker_session.h index e366accf18075b..5f8d66d93b6c69 100644 --- a/tensorflow/core/distributed_runtime/worker_session.h +++ b/tensorflow/core/distributed_runtime/worker_session.h @@ -51,8 +51,8 @@ class WorkerSession { DynamicDeviceMgr* remote_device_mgr() { return remote_device_mgr_.get(); } - const string& session_name() const { return session_name_; } - const string& worker_name() const { return worker_name_; } + const std::string& session_name() const { return session_name_; } + const std::string& worker_name() const { return worker_name_; } WorkerCacheInterface* worker_cache() const { tf_shared_lock l(worker_session_state_mu_); @@ -64,7 +64,7 @@ class WorkerSession { return cluster_flr_.get(); } - WorkerSession(const string& session_name, const string& worker_name, + WorkerSession(const std::string& session_name, const std::string& worker_name, std::unique_ptr worker_cache, std::unique_ptr device_mgr, std::unique_ptr graph_mgr, @@ -72,7 +72,7 @@ class WorkerSession { DistributedFunctionLibraryRuntimeCreator cluster_flr_creator); static std::shared_ptr CreateWithBorrowedDeviceMgr( - const string& session_name, const string& worker_name, + const std::string& session_name, const std::string& worker_name, std::unique_ptr worker_cache, DeviceMgr* borrowed_device_mgr, std::unique_ptr graph_mgr, std::unique_ptr remote_device_mgr, @@ -98,7 +98,7 @@ class WorkerSession { ~WorkerSession(); private: - WorkerSession(const string& session_name, const string& worker_name, + WorkerSession(const std::string& session_name, const std::string& worker_name, std::unique_ptr worker_cache, DeviceMgr* borrowed_device_mgr, std::unique_ptr graph_mgr, @@ -106,10 +106,10 @@ class WorkerSession { DistributedFunctionLibraryRuntimeCreator cluster_flr_creator); // The name of the session. - const string session_name_; + const std::string session_name_; // The name of the worker. E.g., /job:mnist/replica:0/task:1. - const string worker_name_; + const std::string worker_name_; mutable mutex worker_session_state_mu_; // Object from which WorkerInterface instances can be obtained. diff --git a/tensorflow/core/framework/BUILD b/tensorflow/core/framework/BUILD index 9de70eb28c1b07..8a10b3d5557f42 100644 --- a/tensorflow/core/framework/BUILD +++ b/tensorflow/core/framework/BUILD @@ -739,6 +739,7 @@ cc_library( "//waymo/ml/compiler/frontend/kernels:__pkg__", "//waymo/ml/compiler/runtime/alpine/core:__pkg__", "//waymo/ml/woodshed/ops:__pkg__", + "//waymo/perception/training/point_lens/unified_dataset/python/tensorflow:__pkg__", ], deps = [ "//tensorflow/core/lib/core:refcount", @@ -1554,7 +1555,7 @@ tf_proto_library( name = "log_memory_proto", srcs = ["log_memory.proto"], make_default_target_header_only = True, - protodeps = [ + deps = [ ":allocation_description_proto", ":tensor_description_proto", ":tensor_shape_proto", @@ -1572,7 +1573,8 @@ tf_proto_library( name = "graph_proto", srcs = ["graph.proto"], make_default_target_header_only = True, - protodeps = [ + visibility = ["//visibility:public"], + deps = [ ":attr_value_proto", ":function_proto", ":graph_debug_info_proto", @@ -1584,14 +1586,13 @@ tf_proto_library( ":types_proto", ":versions_proto", ], - visibility = ["//visibility:public"], ) tf_proto_library( name = "node_def_proto", srcs = ["node_def.proto"], make_default_target_header_only = True, - protodeps = [ + deps = [ ":attr_value_proto", ":full_type_proto", ":resource_handle_proto", @@ -1623,7 +1624,7 @@ tf_proto_library( name = "tensor_description_proto", srcs = ["tensor_description.proto"], make_default_target_header_only = True, - protodeps = [ + deps = [ ":allocation_description_proto", ":tensor_shape_proto", ":types_proto", @@ -1641,7 +1642,7 @@ tf_proto_library( name = "resource_handle_proto", srcs = ["resource_handle.proto"], make_default_target_header_only = True, - protodeps = [ + deps = [ ":tensor_shape_proto", ":types_proto", ], @@ -1651,7 +1652,7 @@ tf_proto_library( name = "step_stats_proto", srcs = ["step_stats.proto"], make_default_target_header_only = True, - protodeps = [ + deps = [ ":allocation_description_proto", ":tensor_description_proto", ":tensor_shape_proto", @@ -1669,7 +1670,7 @@ tf_proto_library( name = "kernel_def_proto", srcs = ["kernel_def.proto"], make_default_target_header_only = True, - protodeps = [ + deps = [ ":attr_value_proto", ":resource_handle_proto", ":tensor_proto", @@ -1682,7 +1683,11 @@ tf_proto_library( name = "op_def_proto", srcs = ["op_def.proto"], make_default_target_header_only = True, - protodeps = [ + visibility = [ + "//tensorflow/core:__subpackages__", + "//tensorflow/python:__pkg__", + ], + deps = [ ":attr_value_proto", ":full_type_proto", ":resource_handle_proto", @@ -1690,22 +1695,12 @@ tf_proto_library( ":tensor_shape_proto", ":types_proto", ], - visibility = [ - "//tensorflow/core:__subpackages__", - "//tensorflow/python:__pkg__", - ], ) tf_proto_library( name = "attr_value_proto", srcs = ["attr_value.proto"], make_default_target_header_only = True, - protodeps = [ - ":resource_handle_proto", - ":tensor_proto", - ":tensor_shape_proto", - ":types_proto", - ], visibility = [ #internal library, "//tensorflow/core:__subpackages__", @@ -1714,20 +1709,26 @@ tf_proto_library( "//tensorflow/security/fuzzing:__subpackages__", "//waymo/ml/deploy/benchmark:__subpackages__", ], + deps = [ + ":resource_handle_proto", + ":tensor_proto", + ":tensor_shape_proto", + ":types_proto", + ], ) tf_proto_library( name = "full_type_proto", srcs = ["full_type.proto"], make_default_target_header_only = True, - protodeps = [], + deps = [], ) tf_proto_library( name = "tensor_proto", srcs = ["tensor.proto"], make_default_target_header_only = True, - protodeps = [ + deps = [ ":resource_handle_proto", ":tensor_shape_proto", ":types_proto", @@ -1744,7 +1745,7 @@ tf_proto_library( name = "api_def_proto", srcs = ["api_def.proto"], make_default_target_header_only = True, - protodeps = [ + deps = [ ":attr_value_proto", ":resource_handle_proto", ":tensor_proto", @@ -1757,7 +1758,7 @@ tf_proto_library( name = "cpp_shape_inference_proto", srcs = ["cpp_shape_inference.proto"], make_default_target_header_only = True, - protodeps = [ + deps = [ ":full_type_proto", ":tensor_shape_proto", ":types_proto", @@ -1774,7 +1775,7 @@ tf_proto_library( name = "graph_transfer_info_proto", srcs = ["graph_transfer_info.proto"], make_default_target_header_only = True, - protodeps = [ + deps = [ ":types_proto", ], ) @@ -1796,7 +1797,7 @@ tf_proto_library( name = "cost_graph_proto", srcs = ["cost_graph.proto"], make_default_target_header_only = True, - protodeps = [ + deps = [ ":tensor_shape_proto", ":types_proto", ], @@ -1812,7 +1813,10 @@ tf_proto_library( name = "function_proto", srcs = ["function.proto"], make_default_target_header_only = True, - protodeps = [ + visibility = [ + "//tensorflow/python:__pkg__", + ] + default_visibility, + deps = [ ":attr_value_proto", ":node_def_proto", ":op_def_proto", @@ -1821,9 +1825,6 @@ tf_proto_library( ":tensor_shape_proto", ":types_proto", ], - visibility = [ - "//tensorflow/python:__pkg__", - ] + default_visibility, ) # copybara:uncomment_begin(google-only) @@ -1840,14 +1841,14 @@ tf_proto_library( name = "summary_proto", srcs = ["summary.proto"], make_default_target_header_only = True, - protodeps = [ + exports = ["@local_xla//xla/tsl/protobuf:histogram_proto"], + deps = [ ":resource_handle_proto", ":tensor_proto", ":tensor_shape_proto", ":types_proto", "@local_xla//xla/tsl/protobuf:histogram_proto", ], - exports = ["@local_xla//xla/tsl/protobuf:histogram_proto"], ) tf_proto_library( @@ -1860,7 +1861,7 @@ tf_proto_library( name = "dataset_proto", srcs = ["dataset.proto"], make_default_target_header_only = True, - protodeps = [ + deps = [ ":tensor_proto", ":tensor_shape_proto", ":types_proto", @@ -1877,7 +1878,7 @@ tf_proto_library( name = "dataset_options_proto", srcs = ["dataset_options.proto"], make_default_target_header_only = True, - protodeps = [ + deps = [ ":model_proto", ], ) @@ -1886,24 +1887,27 @@ tf_proto_library( name = "optimized_function_graph_proto", srcs = ["optimized_function_graph.proto"], make_default_target_header_only = True, - protodeps = [ - ":types_proto", + deps = [ ":graph_proto", + ":types_proto", ], ) tf_proto_library( name = "protos_all", make_default_target_header_only = True, - protodeps = [ + tags = [ + "alt_dep=//third_party/tensorflow/core:protos_all", + ], + deps = [ ":allocation_description_proto", ":api_def_proto", - ":cpp_shape_inference_proto", ":attr_value_proto", ":cost_graph_proto", - ":dataset_proto", + ":cpp_shape_inference_proto", ":dataset_metadata_proto", ":dataset_options_proto", + ":dataset_proto", ":device_attributes_proto", ":full_type_proto", ":function_proto", @@ -1914,8 +1918,8 @@ tf_proto_library( ":log_memory_proto", ":model_proto", ":node_def_proto", - ":optimized_function_graph_proto", ":op_def_proto", + ":optimized_function_graph_proto", ":reader_base_proto", ":resource_handle_proto", ":step_stats_proto", @@ -1928,9 +1932,6 @@ tf_proto_library( ":variable_proto", ":versions_proto", ], - tags = [ - "alt_dep=//third_party/tensorflow/core:protos_all", - ], ) tf_cc_fuzz_test( diff --git a/tensorflow/core/framework/allocator_test.cc b/tensorflow/core/framework/allocator_test.cc index ba3f396b6c3ef0..76bfb059935786 100644 --- a/tensorflow/core/framework/allocator_test.cc +++ b/tensorflow/core/framework/allocator_test.cc @@ -210,7 +210,7 @@ TEST(CPUAllocatorTest, Sizes) { TEST(CPUAllocatorTest, ProfilerReporting) { // TODO(b/196611863): Make debugging work even without GetAllocatedSize. - void* p = port::AlignedMalloc(8, 1); + void* p = tsl::port::AlignedMalloc(8, static_cast(1)); const std::size_t alloc_size = port::MallocExtension_GetAllocatedSize(p); port::AlignedFree(p); if (alloc_size == 0) { diff --git a/tensorflow/core/framework/function_handle_cache.cc b/tensorflow/core/framework/function_handle_cache.cc index d0d995cbcc3712..e26467011ac2dd 100644 --- a/tensorflow/core/framework/function_handle_cache.cc +++ b/tensorflow/core/framework/function_handle_cache.cc @@ -23,7 +23,7 @@ namespace tensorflow { FunctionHandleCache::FunctionHandleCache(FunctionLibraryRuntime* lib) : lib_(lib), state_handle_( - strings::Printf("%lld", static_cast(random::New64()))) {} + absl::StrFormat("%lld", static_cast(random::New64()))) {} FunctionHandleCache::~FunctionHandleCache() { absl::Status s = Clear(); diff --git a/tensorflow/core/framework/local_rendezvous.cc b/tensorflow/core/framework/local_rendezvous.cc index 6a56c1695d35b9..36e87d36d594fd 100644 --- a/tensorflow/core/framework/local_rendezvous.cc +++ b/tensorflow/core/framework/local_rendezvous.cc @@ -19,6 +19,7 @@ limitations under the License. #include #include +#include "absl/status/status.h" #include "absl/strings/str_format.h" #include "xla/tsl/platform/logging.h" #include "tensorflow/core/activity_watcher/activity.h" @@ -404,8 +405,13 @@ void LocalRendezvous::DoAbort(const absl::Status& status) { mutex_lock l(mu_); status_.Update(status); } - LOG_EVERY_POW_2(INFO) << "Local rendezvous is aborting with status: " - << status; + + // OUT_OF_RANGE implies a normal end of sequence (e.g. for tf.data), + // so we suppress the warning to avoid log noise. + if (status.code() != absl::StatusCode::kOutOfRange) { + LOG_EVERY_POW_2(WARNING) + << "Local rendezvous is aborting with status: " << status; + } // Keeps one Item to make sure the current rendezvous won't be destructed. std::unique_ptr to_delete; diff --git a/tensorflow/core/framework/model_test.cc b/tensorflow/core/framework/model_test.cc index 6ad728f1a0de2c..16e9df7641753b 100644 --- a/tensorflow/core/framework/model_test.cc +++ b/tensorflow/core/framework/model_test.cc @@ -1657,7 +1657,7 @@ TEST_F(ModelTimingTest, TestDefaultParallelismInParallelInterleave) { const int32_t parallelism = 1; const int32_t deterministic = 1; const int32_t cycle_length = 3; - ComputeModelTiming(strings::Printf( + ComputeModelTiming(absl::StrFormat( R"pb( nodes: { key: 1 @@ -1841,7 +1841,7 @@ TEST_P(ParallelInterleaveTimingTest, ScenarioTest) { const int32_t parallelism = std::get<0>(GetParam()); const int32_t deterministic = std::get<1>(GetParam()); const int32_t cycle_length = std::get<2>(GetParam()); - ComputeModelTiming(strings::Printf( + ComputeModelTiming(absl::StrFormat( R"pb( nodes: { key: 1 diff --git a/tensorflow/core/framework/resource_mgr.cc b/tensorflow/core/framework/resource_mgr.cc index c83acfe5329311..0c59566c84261b 100644 --- a/tensorflow/core/framework/resource_mgr.cc +++ b/tensorflow/core/framework/resource_mgr.cc @@ -204,7 +204,7 @@ std::string ResourceMgr::DebugString() const { std::vector text; text.reserve(lines.size()); for (const Line& line : lines) { - text.push_back(strings::Printf( + text.push_back(absl::StrFormat( "%-20s | %-40s | %-40s | %-s", line.container->c_str(), line.type.c_str(), line.resource->c_str(), line.detail.c_str())); } diff --git a/tensorflow/core/framework/tensor.h b/tensorflow/core/framework/tensor.h index 5db5b0bcd74e84..fa19396557bf0a 100644 --- a/tensorflow/core/framework/tensor.h +++ b/tensorflow/core/framework/tensor.h @@ -1095,9 +1095,10 @@ void Tensor::ValueAndTensorBuffer::HostScalarTensorBuffer::operator delete( template Tensor::Tensor(T value, host_scalar_tag tag) { - auto* value_and_buf = static_cast*>( - port::AlignedMalloc(sizeof(typename Tensor::ValueAndTensorBuffer), - EIGEN_MAX_ALIGN_BYTES)); + auto* value_and_buf = + static_cast*>(tsl::port::AlignedMalloc( + sizeof(typename Tensor::ValueAndTensorBuffer), + static_cast(EIGEN_MAX_ALIGN_BYTES))); new (&value_and_buf->value) T(std::move(value)); new (&value_and_buf->tensor_buffer) typename Tensor::ValueAndTensorBuffer::HostScalarTensorBuffer( diff --git a/tensorflow/core/framework/type_index.h b/tensorflow/core/framework/type_index.h index 22c0d608076af5..0277dd1418b524 100644 --- a/tensorflow/core/framework/type_index.h +++ b/tensorflow/core/framework/type_index.h @@ -57,8 +57,9 @@ class TypeIndex { static TypeIndex Make() { #ifdef PLATFORM_CLOUD_TPU static bool hash_bit[1]; - return TypeIndex(static_cast(reinterpret_cast(hash_bit)), - typeid(T).name()); + return TypeIndex( + static_cast(reinterpret_cast(hash_bit)), + typeid(T).name()); #endif #if defined(__GXX_RTTI) || defined(_CPPRTTI) diff --git a/tensorflow/core/framework/variant_op_registry_test.cc b/tensorflow/core/framework/variant_op_registry_test.cc index 2506bdd433242d..8a3563ab64322e 100644 --- a/tensorflow/core/framework/variant_op_registry_test.cc +++ b/tensorflow/core/framework/variant_op_registry_test.cc @@ -231,8 +231,8 @@ TEST(VariantOpUnaryOpRegistryTest, TestBasicGPU) { Variant v_out = VariantValue(); OpKernelContext* null_context_pointer = nullptr; - Status s0 = UnaryOpVariant(null_context_pointer, - ZEROS_LIKE_VARIANT_UNARY_OP, v, &v_out); + absl::Status s0 = UnaryOpVariant( + null_context_pointer, ZEROS_LIKE_VARIANT_UNARY_OP, v, &v_out); EXPECT_FALSE(s0.ok()); EXPECT_TRUE(absl::StrContains(s0.message(), "early exit zeros_like")); @@ -304,7 +304,7 @@ TEST(VariantOpAddRegistryTest, TestBasicGPU) { Variant v_out = VariantValue(); OpKernelContext* null_context_pointer = nullptr; - Status s0 = BinaryOpVariants( + absl::Status s0 = BinaryOpVariants( null_context_pointer, ADD_VARIANT_BINARY_OP, v_a, v_b, &v_out); EXPECT_FALSE(s0.ok()); EXPECT_TRUE(absl::StrContains(s0.message(), "early exit add")); diff --git a/tensorflow/core/function/testing/test_pass.h b/tensorflow/core/function/testing/test_pass.h index 93c2116f5ad996..c3bee77403884c 100644 --- a/tensorflow/core/function/testing/test_pass.h +++ b/tensorflow/core/function/testing/test_pass.h @@ -101,7 +101,8 @@ struct TestPassTfDialect DCHECK(target != nullptr); builder.setInsertionPoint(target); - auto replacement = builder.create( + auto replacement = mlir::TF::AddV2Op::create( + builder, mlir::NameLoc::get( mlir::StringAttr::get(builder.getContext(), "x_plus_y")), target->getResultTypes(), target->getOperand(0), target->getOperand(1)); diff --git a/tensorflow/core/graph/algorithm_test.cc b/tensorflow/core/graph/algorithm_test.cc index 3c6cc215e95bc5..0c560b57044cb4 100644 --- a/tensorflow/core/graph/algorithm_test.cc +++ b/tensorflow/core/graph/algorithm_test.cc @@ -47,11 +47,12 @@ REGISTER_OP("TestBinary") // Compares that the order of nodes in 'inputs' respects the // pair orders described in 'ordered_pairs'. -bool ExpectBefore(const std::vector>& ordered_pairs, - const std::vector& inputs, string* error) { - for (const std::pair& pair : ordered_pairs) { - const string& before_node = pair.first; - const string& after_node = pair.second; +bool ExpectBefore( + const std::vector>& ordered_pairs, + const std::vector& inputs, std::string* error) { + for (const std::pair& pair : ordered_pairs) { + const std::string& before_node = pair.first; + const std::string& after_node = pair.second; bool seen_before = false; bool seen_both = false; for (const Node* node : inputs) { @@ -97,10 +98,10 @@ TEST(AlgorithmTest, ReversePostOrder) { GetReversePostOrder(g, &order); // Check that the order respects the dependencies correctly. - std::vector> reverse_orders = { + std::vector> reverse_orders = { {"W1", "input"}, {"W1", "t1"}, {"W1", "t2"}, {"W1", "t3"}, {"input", "t1"}, {"input", "t3"}, {"t1", "t2"}, {"W2", "t3"}}; - string error; + std::string error; EXPECT_TRUE(ExpectBefore(reverse_orders, order, &error)) << error; // A false ordering should fail the check. @@ -111,7 +112,7 @@ TEST(AlgorithmTest, ReversePostOrder) { GetPostOrder(g, &order); // Check that the order respects the dependencies correctly. - std::vector> orders = { + std::vector> orders = { {"input", "W1"}, {"t1", "W1"}, {"t2", "W1"}, {"t3", "W1"}, {"t1", "input"}, {"t3", "input"}, {"t2", "t1"}, {"t3", "W2"}}; EXPECT_TRUE(ExpectBefore(orders, order, &error)) << error; @@ -131,7 +132,7 @@ TEST(AlgorithmTest, ReversePostOrderStable) { // raw pointer value of Node. Stable post order suppose to remove this // nondeterminism by enforcing an ordering based on node ids. GraphDefBuilder b(GraphDefBuilder::kFailImmediately); - string error; + std::string error; Node* w1 = SourceOp("TestParams", b.opts().WithName("W1")); Node* input = SourceOp("TestInput", b.opts().WithName("input").WithControlInput(w1)); diff --git a/tensorflow/core/graph/benchmark_testlib.h b/tensorflow/core/graph/benchmark_testlib.h index 54716405fd2a6a..98a488d4209a9b 100644 --- a/tensorflow/core/graph/benchmark_testlib.h +++ b/tensorflow/core/graph/benchmark_testlib.h @@ -73,7 +73,7 @@ inline GraphDef CreateGraphDef(int num_nodes, int num_edges_per_node) { const int kNumInNodes = 10 * num_edges_per_node; GraphDef graph_def; - auto create_node = [](const string& name, const string& op) { + auto create_node = [](const std::string& name, const std::string& op) { NodeDef node; node.set_name(name); node.set_op(op); @@ -115,17 +115,17 @@ inline GraphDef CreateRandomGraph(int size) { random::PhiloxRandom philox(0x12345); random::SimplePhilox rnd(&philox); - string prefix = "long_node_name_prefix_to_measure_string_copy_overhead"; + std::string prefix = "long_node_name_prefix_to_measure_string_copy_overhead"; GraphDef graph; for (int i = 0; i < size; ++i) { - const string name = absl::StrCat(prefix, i); - const uint32 num_inputs = rnd.Uniform(std::min(i, 5)); + const std::string name = absl::StrCat(prefix, i); + const uint32_t num_inputs = rnd.Uniform(std::min(i, 5)); NodeDef node; node.set_name(name); for (int n = 0; n < num_inputs; ++n) { - const uint32 input_node = rnd.Uniform(i); + const uint32_t input_node = rnd.Uniform(i); node.add_input(absl::StrCat(prefix, input_node)); } @@ -142,7 +142,7 @@ inline GraphDef CreateFaninFanoutNodeGraph(int num_regular_fanins, bool fanout_unique_index) { GraphDef graph; - auto create_node = [](const string& name) { + auto create_node = [](const std::string& name) { NodeDef node; node.set_name(name); return node; @@ -151,14 +151,14 @@ inline GraphDef CreateFaninFanoutNodeGraph(int num_regular_fanins, NodeDef node = create_node(/*name=*/"node"); for (int i = 0; i < num_regular_fanins; ++i) { - const string input_node_name = absl::StrFormat("in%05d", i); + const std::string input_node_name = absl::StrFormat("in%05d", i); NodeDef input_node = create_node(/*name=*/input_node_name); *graph.add_node() = std::move(input_node); node.add_input(input_node_name); } for (int i = 0; i < num_controlling_fanins; ++i) { - const string input_node_name = absl::StrFormat("control_in%05d", i); + const std::string input_node_name = absl::StrFormat("control_in%05d", i); NodeDef input_node = create_node(/*name=*/input_node_name); *graph.add_node() = std::move(input_node); node.add_input(absl::StrCat("^", input_node_name)); @@ -166,13 +166,13 @@ inline GraphDef CreateFaninFanoutNodeGraph(int num_regular_fanins, for (int i = 0; i < num_regular_fanouts; ++i) { NodeDef output_node = create_node(/*name=*/absl::StrFormat("out%05d", i)); - const string input_node_index = + const std::string input_node_index = fanout_unique_index ? absl::StrCat(node.name(), ":", i) : node.name(); output_node.add_input(input_node_index); *graph.add_node() = std::move(output_node); } - const string controlled_fanout_input = absl::StrCat("^", node.name()); + const std::string controlled_fanout_input = absl::StrCat("^", node.name()); for (int i = 0; i < num_controlled_fanouts; ++i) { NodeDef output_node = create_node(/*name=*/absl::StrFormat("control_out%05d", i)); diff --git a/tensorflow/core/graph/collective_order.cc b/tensorflow/core/graph/collective_order.cc index 9f8a498d88b47e..3ca3748eeb18be 100644 --- a/tensorflow/core/graph/collective_order.cc +++ b/tensorflow/core/graph/collective_order.cc @@ -25,8 +25,9 @@ namespace { // them. absl::Status DiscoverDataDependencies( const Graph* graph, std::vector* collective_nodes, - std::vector* instance_keys, - absl::flat_hash_map>* data_dependencies) { + std::vector* instance_keys, + absl::flat_hash_map>* + data_dependencies) { absl::Status s; // Algorithm: do Reverse DFS starting at sink. `node_leave` is called when // all parents of `node` have been visited. At that point, @@ -69,8 +70,8 @@ absl::Status DiscoverDataDependencies( // If there exists an edge a -> b then `dependency_edges[a]` contains `b` absl::Status CreateControlDependencies( const std::vector& collective_nodes, - const std::vector& instance_keys, - absl::flat_hash_map>* data_dependencies, + const std::vector& instance_keys, + absl::flat_hash_map>* data_dependencies, absl::flat_hash_map>* dependency_edges) { // If there exists some path a -> ... -> b then `all_paths[a]` contains `b` absl::flat_hash_map> all_paths; @@ -158,7 +159,7 @@ absl::Status InsertControlDependencies( } else if (order_type == GraphCollectiveOrder::kAttrs) { // `wait_for` is the inverse of `dependency_edges`, i.e. `wait_for[node]` // contains the list of instance keys for which `node` must wait. - absl::flat_hash_map> wait_for; + absl::flat_hash_map> wait_for; for (const auto& pair : dependency_edges) { int32_t src_instance; TF_RETURN_IF_ERROR( @@ -168,7 +169,8 @@ absl::Status InsertControlDependencies( } } for (const auto& pair : wait_for) { - std::vector wait_for_list(pair.second.begin(), pair.second.end()); + std::vector wait_for_list(pair.second.begin(), + pair.second.end()); pair.first->ClearAttr("wait_for"); pair.first->AddAttr("wait_for", wait_for_list); } @@ -184,9 +186,9 @@ absl::Status InsertControlDependencies( absl::Status OrderCollectives(Graph* graph, GraphCollectiveOrder order_type) { // `instance_keys[i]` corresponds to `collective_nodes[i]` std::vector collective_nodes; - std::vector instance_keys; + std::vector instance_keys; // node -> set of collectives on which node depends. - absl::flat_hash_map> data_dependencies; + absl::flat_hash_map> data_dependencies; TF_RETURN_IF_ERROR(DiscoverDataDependencies( graph, &collective_nodes, &instance_keys, &data_dependencies)); diff --git a/tensorflow/core/graph/collective_order_test.cc b/tensorflow/core/graph/collective_order_test.cc index 46333535cbbaad..2206fc1b309d3b 100644 --- a/tensorflow/core/graph/collective_order_test.cc +++ b/tensorflow/core/graph/collective_order_test.cc @@ -32,11 +32,12 @@ REGISTER_OP("TestParams").Output("o: float"); // `expected_collective_nodes`, and that the list of control edges between these // collective nodes matches `expected_collective_control_edges`. void VerifyGraph(const Graph& graph, - const std::vector& expected_collective_nodes, - const std::vector>& + const std::vector& expected_collective_nodes, + const std::vector>& expected_collective_control_edges) { - std::vector actual_collective_nodes; - std::vector> actual_collective_control_edges; + std::vector actual_collective_nodes; + std::vector> + actual_collective_control_edges; for (const Node* src : graph.nodes()) { if (!src->IsCollective()) { continue; @@ -63,13 +64,13 @@ void VerifyGraph(const Graph& graph, // `wait_for_map`. void VerifyAttrs( const Graph& graph, - const std::unordered_map> wait_for_map) { + const std::unordered_map> wait_for_map) { for (const Node* node : graph.nodes()) { if (node->IsCollective() || wait_for_map.find(node->name()) == wait_for_map.end()) { continue; } - std::vector wait_for_actual; + std::vector wait_for_actual; TF_EXPECT_OK(GetNodeAttr(node->attrs(), "wait_for", &wait_for_actual)); auto wait_for_expected = wait_for_map.at(node->name()); EXPECT_THAT(wait_for_actual, UnorderedElementsAreArray(wait_for_expected)); @@ -77,7 +78,7 @@ void VerifyAttrs( } Node* CollectiveReduceNode(GraphDefBuilder* builder, Node* input, - const string& name, const string& device, + const std::string& name, const std::string& device, int instance_key) { Node* collective_node = ops::UnaryOp("CollectiveReduce", input, @@ -109,8 +110,8 @@ Node* CollectiveReduceNode(GraphDefBuilder* builder, Node* input, // inputs, `id` is identity node. std::unique_ptr InitGraph() { GraphDefBuilder builder(GraphDefBuilder::kFailImmediately); - const string dev0 = "/job:localhost/replica:0/task:0/device:CPU:0"; - const string dev1 = "/job:localhost/replica:0/task:0/device:CPU:1"; + const std::string dev0 = "/job:localhost/replica:0/task:0/device:CPU:0"; + const std::string dev1 = "/job:localhost/replica:0/task:0/device:CPU:1"; Node* a = ops::SourceOp("TestParams", builder.opts().WithName("a").WithDevice(dev0)); Node* b = ops::SourceOp("TestParams", @@ -165,7 +166,7 @@ TEST(CollectiveOrderTest, SimpleOrderAttr) { // `id` is identity node. std::unique_ptr InitGraph2() { GraphDefBuilder builder(GraphDefBuilder::kFailImmediately); - const string dev0 = "/job:localhost/replica:0/task:0/device:CPU:0"; + const std::string dev0 = "/job:localhost/replica:0/task:0/device:CPU:0"; Node* a = ops::SourceOp("TestParams", builder.opts().WithName("a").WithDevice(dev0)); Node* c1 = CollectiveReduceNode(&builder, a, "c1", dev0, 1); @@ -201,7 +202,7 @@ TEST(CollectiveOrderTest, SimpleOrder2) { // std::unique_ptr InitGraphForPruning() { GraphDefBuilder builder(GraphDefBuilder::kFailImmediately); - const string dev0 = "/job:localhost/replica:0/task:0/device:CPU:0"; + const std::string dev0 = "/job:localhost/replica:0/task:0/device:CPU:0"; Node* w = ops::SourceOp("TestParams", builder.opts().WithName("w").WithDevice(dev0)); Node* x = ops::SourceOp("TestParams", diff --git a/tensorflow/core/graph/control_flow.cc b/tensorflow/core/graph/control_flow.cc index 4cd9316a4607e3..e443dadc678c26 100644 --- a/tensorflow/core/graph/control_flow.cc +++ b/tensorflow/core/graph/control_flow.cc @@ -27,7 +27,7 @@ namespace tensorflow { namespace { // Information about a loop frame structure. struct Frame { - string name; + std::string name; // Pointer to the parent frame. The root frame has a pointer to itself. Frame* parent = nullptr; @@ -40,7 +40,7 @@ struct Frame { // Verify that the ControlFlowInfo of the graph has valid loop structure. absl::Status ValidateControlFlowInfo( const Graph* graph, const std::vector& cf_info) { - std::unordered_map frames; + std::unordered_map frames; for (const Node* node : graph->op_nodes()) { const ControlFlowInfo& cf = cf_info[node->id()]; if (!cf.frame || !cf.parent_frame) { @@ -85,7 +85,7 @@ absl::Status ValidateControlFlowInfo( absl::Status BuildControlFlowInfo(const Graph* g, std::vector* info, - std::vector* unreachable_nodes) { + std::vector* unreachable_nodes) { info->clear(); info->resize(g->num_node_ids()); @@ -97,7 +97,7 @@ absl::Status BuildControlFlowInfo(const Graph* g, src_info.frame = src_node; src_info.parent_frame = src_node; - string frame_name; + std::string frame_name; std::deque ready; ready.push_back(src_node); while (!ready.empty()) { @@ -135,7 +135,8 @@ absl::Status BuildControlFlowInfo(const Graph* g, // Process the node 'out'. if (IsEnter(out)) { if (is_visited) { - const string& parent_frame = (*info)[out_parent->id()].frame_name; + const std::string& parent_frame = + (*info)[out_parent->id()].frame_name; if (parent_frame != frame_name) { return errors::InvalidArgument( FormatNodeForError(*out), diff --git a/tensorflow/core/graph/control_flow.h b/tensorflow/core/graph/control_flow.h index c1e2db339122df..b15bb671f7e1ce 100644 --- a/tensorflow/core/graph/control_flow.h +++ b/tensorflow/core/graph/control_flow.h @@ -36,7 +36,7 @@ struct ControlFlowInfo { const Node* frame = nullptr; // frame of a node const Node* parent_frame = nullptr; // parent frame of a node - string frame_name; // frame name of a node + std::string frame_name; // frame name of a node }; // Clear and populate `info` with each node's frame and the level it belongs to. @@ -54,7 +54,7 @@ struct ControlFlowInfo { // which all sane front-ends should satisfy. absl::Status BuildControlFlowInfo( const Graph* g, std::vector* info, - std::vector* unreachable_nodes = nullptr); + std::vector* unreachable_nodes = nullptr); } // namespace tensorflow diff --git a/tensorflow/core/graph/costmodel.cc b/tensorflow/core/graph/costmodel.cc index 37d1e69c5b3c66..6026522f28cfb0 100644 --- a/tensorflow/core/graph/costmodel.cc +++ b/tensorflow/core/graph/costmodel.cc @@ -35,7 +35,7 @@ void CostModel::SuppressInfrequent() { // Find the median of the non-zero counts, and use half of its value // as the cutoff for a "normal" execution mode node. if (count_.empty()) return; - std::vector non_zero; + std::vector non_zero; for (auto v : count_) { if (v > 0) non_zero.push_back(v); } @@ -192,7 +192,7 @@ void CostModel::RecordCount(const Node* node, int count) { count_[id] += count; } -int32 CostModel::TotalCount(const Node* node) const { +int32_t CostModel::TotalCount(const Node* node) const { const int id = Id(node); if (id < 0) return 0; return (static_cast(id) < slot_bytes_.size()) ? count_[id] : 0; @@ -419,7 +419,7 @@ Microseconds CostModel::ComputationTimeEstimate(int64_t math_ops) { void CostModel::IncrementUpdateTimes() { update_times_++; } -int32 CostModel::GetUpdateTimes() const { return update_times_; } +int32_t CostModel::GetUpdateTimes() const { return update_times_; } // ---------------------------------------------------------------------------- // InitCostModel diff --git a/tensorflow/core/graph/costmodel.h b/tensorflow/core/graph/costmodel.h index 795d94720415b5..9bfd9b2a60ce1b 100644 --- a/tensorflow/core/graph/costmodel.h +++ b/tensorflow/core/graph/costmodel.h @@ -31,7 +31,7 @@ limitations under the License. #include "tensorflow/core/platform/protobuf.h" namespace tensorflow { -typedef std::unordered_map +typedef std::unordered_map NodeNameToCostIdMap; class StepStats; @@ -95,7 +95,7 @@ class CostModel { void RecordCount(const Node* node, int num_count); // Returns how many times "node" has been executed. - int32 TotalCount(const Node* node) const; + int32_t TotalCount(const Node* node) const; // Records that "output_slot" of "node" has produced tensors of // aggregated "bytes". @@ -184,7 +184,7 @@ class CostModel { void IncrementUpdateTimes(); // Get the times that the cost model is updated. - int32 GetUpdateTimes() const; + int32_t GetUpdateTimes() const; private: static Bytes MinTensorMemoryUsage(const TensorShapeProto& tensor_shape, @@ -197,13 +197,13 @@ class CostModel { // Nodes and Edges whose count is < this value // get type/byte estimates of 0. - int32 min_count_ = 0; + int32_t min_count_ = 0; // The number of times the cost model is updated. - int32 update_times_ = 0; + int32_t update_times_ = 0; // Number of times each Node has been executed. - std::vector count_; + std::vector count_; // Cumulative execution time. std::vector time_; // Cumulative Bytes output on each channel. diff --git a/tensorflow/core/graph/costmodel_test.cc b/tensorflow/core/graph/costmodel_test.cc index 0e5c2273f53b20..c062f58856523b 100644 --- a/tensorflow/core/graph/costmodel_test.cc +++ b/tensorflow/core/graph/costmodel_test.cc @@ -56,7 +56,7 @@ MATCHER_P(ShapeProtoEquals, other, "") { return true; } -static void InitGraph(const string& s, Graph* graph) { +static void InitGraph(const std::string& s, Graph* graph) { GraphDef graph_def; auto parser = protobuf::TextFormat::Parser(); @@ -97,8 +97,8 @@ Node* FindNode(const Graph& graph, std::string name) { return nullptr; } -Node* AddNode(Graph& graph, const string& name, const string& node_type, - int num_inputs) { +Node* AddNode(Graph& graph, const std::string& name, + const std::string& node_type, int num_inputs) { auto builder = NodeDefBuilder(name, node_type); for (int i = 0; i < num_inputs; ++i) { builder = builder.Input(absl::StrCat("node_", i), i, DT_FLOAT); @@ -114,7 +114,7 @@ Node* AddNode(Graph& graph, const string& name, const string& node_type, } static void GenerateStepStats(Graph* graph, StepStats* step_stats, - const string& device_name) { + const std::string& device_name) { // Fill RunMetadata's step_stats and partition_graphs fields. DeviceStepStats* device_stepstats = step_stats->add_dev_stats(); device_stepstats->set_device(device_name); @@ -150,7 +150,7 @@ TEST(CostModelTest, WorksWithManager) { GenerateStepStats(graph1.get(), &step_stats, "DummyDevice1"); GenerateStepStats(graph2.get(), &step_stats, "DummyDevice2"); StepStatsCollector collector(&step_stats); - std::unordered_map device_map; + std::unordered_map device_map; device_map["DummyDevice1"] = graph1.get(); device_map["DummyDevice2"] = graph2.get(); CostModelManager cost_model_manager; @@ -161,7 +161,7 @@ TEST(CostModelTest, WorksWithManager) { TF_ASSERT_OK( cost_model_manager.AddToCostGraphDef(graph2.get(), &cost_graph_def)); ASSERT_EQ(cost_graph_def.node_size(), 12); - absl::flat_hash_map ids; + absl::flat_hash_map ids; for (auto node : cost_graph_def.node()) { int32_t index = node.id(); auto result = ids.insert({index, node}); diff --git a/tensorflow/core/graph/edgeset.h b/tensorflow/core/graph/edgeset.h index 6d6cb3ff630591..e3f50ef59484ea 100644 --- a/tensorflow/core/graph/edgeset.h +++ b/tensorflow/core/graph/edgeset.h @@ -82,7 +82,7 @@ class EdgeSet { #ifdef NDEBUG void RegisterMutation() {} #else - uint32 mutations_ = 0; + uint32_t mutations_ = 0; void RegisterMutation() { mutations_++; } #endif @@ -127,7 +127,7 @@ class EdgeSet::const_iterator { CHECK_EQ(init_mutations_, owner_->mutations_); } const EdgeSet* owner_ = nullptr; - uint32 init_mutations_ = 0; + uint32_t init_mutations_ = 0; #endif }; diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc index a3e14eac396859..c7acee2bd056eb 100644 --- a/tensorflow/core/graph/graph.cc +++ b/tensorflow/core/graph/graph.cc @@ -190,7 +190,7 @@ void Node::ClearTypeInfo() { absl::Status Node::ShrinkTypeInfo( const absl::flat_hash_map& index_mapping, - const string& type_attr_name, bool update_full_type) { + const std::string& type_attr_name, bool update_full_type) { std::vector dtypes; TF_RETURN_IF_ERROR(GetNodeAttr(def(), type_attr_name, &dtypes)); @@ -239,11 +239,11 @@ const OpDef& Node::op_def() const { return *props_->op_def; } NodeDef* Node::mutable_def() { return &props_->node_def; } -int32 Node::num_inputs() const { return props_->input_types.size(); } +int32_t Node::num_inputs() const { return props_->input_types.size(); } DataType Node::input_type(int32_t i) const { return props_->input_types[i]; } const DataTypeVector& Node::input_types() const { return props_->input_types; } -int32 Node::num_outputs() const { return props_->output_types.size(); } +int32_t Node::num_outputs() const { return props_->output_types.size(); } DataType Node::output_type(int32_t o) const { return props_->output_types[o]; } const DataTypeVector& Node::output_types() const { return props_->output_types; @@ -416,7 +416,7 @@ bool InputTensor::operator==(const InputTensor& other) const { return node == other.node && index == other.index; } -uint64 InputTensor::Hash::operator()(InputTensor const& s) const { +uint64_t InputTensor::Hash::operator()(InputTensor const& s) const { return Hash64Combine(std::hash()(s.node), std::hash()(s.index)); } @@ -427,7 +427,7 @@ bool OutputTensor::operator==(const OutputTensor& other) const { return node == other.node && index == other.index; } -uint64 OutputTensor::Hash::operator()(OutputTensor const& s) const { +uint64_t OutputTensor::Hash::operator()(OutputTensor const& s) const { return Hash64Combine(std::hash()(s.node), std::hash()(s.index)); } @@ -1086,7 +1086,7 @@ GraphDebugInfo Graph::BuildDebugInfo() const { std::string Edge::DebugString() const { auto src_name = src_ ? src_->name().c_str() : ""; auto dst_name = dst_ ? dst_->name().c_str() : ""; - return strings::Printf("[id=%d %s:%d -> %s:%d]", id_, src_name, src_output_, + return absl::StrFormat("[id=%d %s:%d -> %s:%d]", id_, src_name, src_output_, dst_name, dst_input_); } diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h index 6e70b0cdfa8322..10b29e0975625f 100644 --- a/tensorflow/core/graph/graph.h +++ b/tensorflow/core/graph/graph.h @@ -107,11 +107,11 @@ class Node { NodeDef* mutable_def(); // input and output types - int32 num_inputs() const; + int32_t num_inputs() const; DataType input_type(int32_t i) const; const DataTypeVector& input_types() const; - int32 num_outputs() const; + int32_t num_outputs() const; DataType output_type(int32_t o) const; const DataTypeVector& output_types() const; @@ -139,14 +139,14 @@ class Node { // Sets 'original_node_names' field of this node's DebugInfo proto to // 'names'. - void set_original_node_names(const std::vector& names); - void set_original_func_names(const std::vector& names); + void set_original_node_names(const std::vector& names); + void set_original_func_names(const std::vector& names); // Read only access to attributes AttrSlice attrs() const; // Inputs requested by the NodeDef. For the actual inputs, use in_edges. - const protobuf::RepeatedPtrField& requested_inputs() const; + const protobuf::RepeatedPtrField& requested_inputs() const; // Get the neighboring nodes via edges either in or out of this node. This // includes control edges. @@ -220,7 +220,7 @@ class Node { UpdateProperties(); } - void AddAttr(const std::string& name, std::vector&& val) { + void AddAttr(const std::string& name, std::vector&& val) { MoveAttrValue(std::move(val), AddAttrHelper(name)); UpdateProperties(); } @@ -278,7 +278,7 @@ class Node { // update the node's full type information (if present). absl::Status ShrinkTypeInfo( const absl::flat_hash_map& index_mapping, - const string& type_attr_name, bool update_full_type); + const std::string& type_attr_name, bool update_full_type); // Called after an incident non-control edge has changed. Does nothing if not // all input edges are defined. @@ -383,8 +383,8 @@ class Node { // Stores debug information associated with the Node. struct NodeDebugInfo { const std::string name; - std::vector original_node_names; - std::vector original_func_names; + std::vector original_node_names; + std::vector original_func_names; NodeDebugInfo(const Node& n); NodeDebugInfo(const NodeDef& ndef); @@ -407,7 +407,7 @@ struct InputTensor { // A hash function for InputTensors. Nodes are hashed based on their pointer // value. struct Hash { - uint64 operator()(InputTensor const& s) const; + uint64_t operator()(InputTensor const& s) const; }; }; @@ -428,7 +428,7 @@ struct OutputTensor { // A hash function for OutputTensors. Nodes are hashed based on their pointer // value. struct Hash { - uint64 operator()(OutputTensor const& s) const; + uint64_t operator()(OutputTensor const& s) const; }; }; @@ -803,7 +803,7 @@ class Graph { WhileContext** result); // Builds a node name to node pointer index for all nodes in the graph. - std::unordered_map BuildNodeNameIndex() const; + std::unordered_map BuildNodeNameIndex() const; absl::optional>& GetConstArgIndicesCache() const { return const_arg_indices_cache_; @@ -906,16 +906,16 @@ class Graph { // A table of the unique assigned device names. Indices do NOT correspond // to node IDs. Index 0 is always the empty string. - std::vector device_names_; + std::vector device_names_; // Maps unique device names to indices within device_names_[i]. - std::unordered_map device_names_map_; + std::unordered_map device_names_map_; // All the while contexts owned by this graph, keyed by frame name, // corresponding to all the while loops contained in this graph (including // nested loops). The stored contexts are usually accessed via // AddWhileContext() or Node::while_ctx(), but this manages the lifetime. - std::map while_ctxs_; + std::map while_ctxs_; // Cache of the indices of the arguments which need to be constant for the XLA // compilation. diff --git a/tensorflow/core/graph/graph_debug_info_builder_test.cc b/tensorflow/core/graph/graph_debug_info_builder_test.cc index cbe4a8a8ae9287..5680800a5592c5 100644 --- a/tensorflow/core/graph/graph_debug_info_builder_test.cc +++ b/tensorflow/core/graph/graph_debug_info_builder_test.cc @@ -47,7 +47,7 @@ class TestStackTrace : public AbstractStackTrace { StackFrame LastUserFrame() const override { return frames_.back(); } - string ToString(const TracePrintingOptions& opts) const override { + std::string ToString(const TracePrintingOptions& opts) const override { auto frame = LastUserFrame(); return absl::StrCat(frame.file_name, ":", frame.line_number, ":", frame.function_name); diff --git a/tensorflow/core/graph/graph_def_builder.cc b/tensorflow/core/graph/graph_def_builder.cc index 168fc1a0da3da7..a4f08eab66b090 100644 --- a/tensorflow/core/graph/graph_def_builder.cc +++ b/tensorflow/core/graph/graph_def_builder.cc @@ -44,12 +44,12 @@ GraphDefBuilder::Options GraphDefBuilder::Options::WithControlInputs( } GraphDefBuilder::Options GraphDefBuilder::Options::WithNameImpl( absl::string_view name) { - name_ = string(name); + name_ = std::string(name); return *this; } GraphDefBuilder::Options GraphDefBuilder::Options::WithDeviceImpl( absl::string_view device) { - device_ = string(device); + device_ = std::string(device); return *this; } GraphDefBuilder::Options GraphDefBuilder::Options::WithControlInputImpl( @@ -72,7 +72,7 @@ absl::Status GraphDefBuilder::ToGraphDef(GraphDef* graph_def) const { return status_; } -string GraphDefBuilder::Options::GetNameForOp(absl::string_view op) const { +std::string GraphDefBuilder::Options::GetNameForOp(absl::string_view op) const { if (name_.empty()) return graph_->NewName(op); return name_; } @@ -99,14 +99,15 @@ void GraphDefBuilder::Options::UpdateStatus(const absl::Status& status) const { namespace ops { -Node* SourceOp(const string& op_name, const GraphDefBuilder::Options& opts) { +Node* SourceOp(const std::string& op_name, + const GraphDefBuilder::Options& opts) { if (opts.HaveError()) return nullptr; NodeBuilder node_builder(opts.GetNameForOp(op_name), op_name, opts.op_registry()); return opts.FinalizeBuilder(&node_builder); } -Node* UnaryOp(const string& op_name, NodeOut input, +Node* UnaryOp(const std::string& op_name, NodeOut input, const GraphDefBuilder::Options& opts) { if (opts.HaveError()) return nullptr; NodeBuilder node_builder(opts.GetNameForOp(op_name), op_name, @@ -115,7 +116,7 @@ Node* UnaryOp(const string& op_name, NodeOut input, return opts.FinalizeBuilder(&node_builder); } -Node* BinaryOp(const string& op_name, NodeOut a, NodeOut b, +Node* BinaryOp(const std::string& op_name, NodeOut a, NodeOut b, const GraphDefBuilder::Options& opts) { if (opts.HaveError()) return nullptr; NodeBuilder node_builder(opts.GetNameForOp(op_name), op_name, @@ -124,7 +125,7 @@ Node* BinaryOp(const string& op_name, NodeOut a, NodeOut b, return opts.FinalizeBuilder(&node_builder); } -Node* TernaryOp(const string& op_name, NodeOut a, NodeOut b, NodeOut c, +Node* TernaryOp(const std::string& op_name, NodeOut a, NodeOut b, NodeOut c, const GraphDefBuilder::Options& opts) { if (opts.HaveError()) return nullptr; NodeBuilder node_builder(opts.GetNameForOp(op_name), op_name, diff --git a/tensorflow/core/graph/graph_def_builder.h b/tensorflow/core/graph/graph_def_builder.h index b635ece0eab707..afe3aebe55d62c 100644 --- a/tensorflow/core/graph/graph_def_builder.h +++ b/tensorflow/core/graph/graph_def_builder.h @@ -104,14 +104,14 @@ class GraphDefBuilder { // Returns a string representation of the status associated with *this. // Returns the string `"OK"` if the status doesn't have any error. - string StatusToString() const { + std::string StatusToString() const { return status_->ok() ? "OK" : std::string(status_->message()); } // Given the Op type name, return a name for a node of that type. // Uses the value set in WithName() if that has been called. Otherwise, // returns a name built out of the Op type name. - string GetNameForOp(absl::string_view op) const; + std::string GetNameForOp(absl::string_view op) const; // Sets the device, adds control inputs, adds attrs, and calls Finalize(). // If Finalize returns an error, it is saved and this function returns @@ -133,17 +133,17 @@ class GraphDefBuilder { Options WithControlInputsImpl(absl::Span control_inputs); template Options WithAttrImpl(absl::string_view name, T&& value) { - attrs_.emplace_back(string(name), AttrValue()); + attrs_.emplace_back(std::string(name), AttrValue()); SetAttrValue(std::forward(value), &attrs_.back().second); return *this; } Graph* const graph_; absl::Status* const status_; - string name_; - string device_; + std::string name_; + std::string device_; std::vector control_inputs_; - std::vector> attrs_; + std::vector> attrs_; }; // Start building a new graph. @@ -176,7 +176,7 @@ class GraphDefBuilder { // Returns whether a user-defined function with `name` already exists in the // graph. - bool HasFunction(const string& name) { + bool HasFunction(const std::string& name) { return flib_def_.Find(name) != nullptr; } @@ -196,18 +196,19 @@ namespace ops { typedef NodeBuilder::NodeOut NodeOut; // For adding an Op with no inputs to a GraphDefBuilder. -Node* SourceOp(const string& op_name, const GraphDefBuilder::Options& opts); +Node* SourceOp(const std::string& op_name, + const GraphDefBuilder::Options& opts); // For adding an Op with one input to a GraphDefBuilder. -Node* UnaryOp(const string& op_name, NodeOut input, +Node* UnaryOp(const std::string& op_name, NodeOut input, const GraphDefBuilder::Options& opts); // For adding an Op with two inputs to a GraphDefBuilder. -Node* BinaryOp(const string& op_name, NodeOut a, NodeOut b, +Node* BinaryOp(const std::string& op_name, NodeOut a, NodeOut b, const GraphDefBuilder::Options& opts); // For adding an Op with three inputs to a GraphDefBuilder. -Node* TernaryOp(const string& op_name, NodeOut a, NodeOut b, NodeOut c, +Node* TernaryOp(const std::string& op_name, NodeOut a, NodeOut b, NodeOut c, const GraphDefBuilder::Options& opts); } // namespace ops diff --git a/tensorflow/core/graph/graph_node_util.cc b/tensorflow/core/graph/graph_node_util.cc index 3bf14ed2944394..ed6a23e3813d80 100644 --- a/tensorflow/core/graph/graph_node_util.cc +++ b/tensorflow/core/graph/graph_node_util.cc @@ -25,9 +25,11 @@ limitations under the License. namespace tensorflow { -string SummarizeNode(const Node& node) { return SummarizeNodeDef(node.def()); } +std::string SummarizeNode(const Node& node) { + return SummarizeNodeDef(node.def()); +} -string FormatNodeForError(const Node& node) { +std::string FormatNodeForError(const Node& node) { return FormatNodeDefForError(node.def()); } @@ -41,9 +43,10 @@ absl::Status AttachDef(const absl::Status& status, const Node& node, return AttachDef(status, node.def(), allow_multiple_formatted_node); } -absl::btree_set GetMergedNames(const std::vector& from_names, - const std::vector& to_names) { - absl::btree_set merged_names; +absl::btree_set GetMergedNames( + const std::vector& from_names, + const std::vector& to_names) { + absl::btree_set merged_names; merged_names.insert(from_names.begin(), from_names.end()); merged_names.insert(to_names.begin(), to_names.end()); return merged_names; diff --git a/tensorflow/core/graph/graph_node_util.h b/tensorflow/core/graph/graph_node_util.h index 146c4c07ca833a..8d7a44c5fed2e0 100644 --- a/tensorflow/core/graph/graph_node_util.h +++ b/tensorflow/core/graph/graph_node_util.h @@ -29,12 +29,12 @@ class OpDef; // Produce a human-readable version of a Node or NodeDef that is more concise // than a text-format proto. -string SummarizeNode(const Node& node); +std::string SummarizeNode(const Node& node); // Produces a formatted string pattern from the node which can uniquely identify // this node upstream to produce an informative error message. The pattern // followed is: {{node }} -string FormatNodeForError(const Node& node); +std::string FormatNodeForError(const Node& node); // Merges the original node names from the debug information of 'from' to the // debug information of 'to'. diff --git a/tensorflow/core/graph/graph_partition.cc b/tensorflow/core/graph/graph_partition.cc index be5a5423ae57c6..1328c5c8b57b4c 100644 --- a/tensorflow/core/graph/graph_partition.cc +++ b/tensorflow/core/graph/graph_partition.cc @@ -160,16 +160,17 @@ bool IsDstInputOnHost(const Edge* edge, const GraphInfo& info) { // Add a control edge from each input to each recv. void AddReadControl(const std::vector& recvs, - const std::vector& inputs) { + const std::vector& inputs) { for (NodeDef* recv : recvs) { - for (const string& input : inputs) { + for (const std::string& input : inputs) { recv->add_input(absl::StrCat("^", input)); } } } void SetSendRecvAttrs(const PartitionOptions& opts, const Edge* edge, - const string& tensor_name_attr, NodeDefBuilder* builder) { + const std::string& tensor_name_attr, + NodeDefBuilder* builder) { builder->Attr("tensor_name", tensor_name_attr); builder->Attr("send_device", edge->src()->assigned_device_name()); builder->Attr("send_device_incarnation", @@ -184,7 +185,7 @@ void SetSendRecvAttrs(const PartitionOptions& opts, const Edge* edge, NodeDef* AddSend(const PartitionOptions& opts, const GraphInfo& g_info, GraphDef* gdef, const Edge* edge, NodeDefBuilder::NodeOut send_from, int64_t start_time, - const string& tensor_name_attr, absl::Status* status) { + const std::string& tensor_name_attr, absl::Status* status) { const DataType dtype = send_from.data_type; const DataType cast_dtype = opts.should_cast ? opts.should_cast(edge) : dtype; const Node* src = edge->src(); @@ -201,7 +202,7 @@ NodeDef* AddSend(const PartitionOptions& opts, const GraphInfo& g_info, // Add a cast node that casts dtype to cast_dtype. // NOTE(yuanbyu): Only cast for cross-device send/recv. if (dtype != cast_dtype && !NeedSameDeviceSendRecv(edge, g_info)) { - const string cast_op = (host_memory) ? "_HostCast" : "Cast"; + const std::string cast_op = (host_memory) ? "_HostCast" : "Cast"; NodeDefBuilder cast_builder(opts.new_name(src->name()), cast_op, NodeDebugInfo(*src)); cast_builder.Device(src->assigned_device_name()).Input(send_from); @@ -226,7 +227,7 @@ NodeDef* AddSend(const PartitionOptions& opts, const GraphInfo& g_info, } // Add the send node. - const string send_op = (host_memory) ? "_HostSend" : "_Send"; + const std::string send_op = (host_memory) ? "_HostSend" : "_Send"; NodeDefBuilder send_builder(opts.new_name(src->name()), send_op, NodeDebugInfo(*src)); SetSendRecvAttrs(opts, edge, tensor_name_attr, &send_builder); @@ -241,7 +242,7 @@ NodeDef* AddSend(const PartitionOptions& opts, const GraphInfo& g_info, NodeDef* AddRecv(const PartitionOptions& opts, const GraphInfo& g_info, GraphDef* gdef, const Edge* edge, NodeDef** real_recv, - const string& tensor_name_attr, absl::Status* status) { + const std::string& tensor_name_attr, absl::Status* status) { const DataType dtype = EdgeType(edge); const Node* src = edge->src(); const Node* dst = edge->dst(); @@ -285,7 +286,7 @@ NodeDef* AddRecv(const PartitionOptions& opts, const GraphInfo& g_info, } // Add the recv node. - const string recv_op = (host_memory) ? "_HostRecv" : "_Recv"; + const std::string recv_op = (host_memory) ? "_HostRecv" : "_Recv"; NodeDefBuilder recv_builder(opts.new_name(src->name()), recv_op, NodeDebugInfo(*src)); SetSendRecvAttrs(opts, edge, tensor_name_attr, &recv_builder); @@ -298,7 +299,7 @@ NodeDef* AddRecv(const PartitionOptions& opts, const GraphInfo& g_info, // Add the cast node (from cast_dtype to dtype) or an Identity node. if (dtype != cast_dtype) { - const string cast_op = (host_memory) ? "_HostCast" : "Cast"; + const std::string cast_op = (host_memory) ? "_HostCast" : "Cast"; NodeDefBuilder cast_builder(opts.new_name(src->name()), cast_op, NodeDebugInfo(*src)); cast_builder.Attr("DstT", dtype); @@ -339,8 +340,9 @@ NodeDef* AddDummyConst(const PartitionOptions& opts, GraphDef* gdef, // A dummy node for scheduling. NodeDef* AddControlTrigger(const PartitionOptions& opts, GraphDef* gdef, - const string& assigned_device_name, int64_t epoch, - int64_t starttime, absl::Status* status) { + const std::string& assigned_device_name, + int64_t epoch, int64_t starttime, + absl::Status* status) { NodeDef* result = gdef->add_node(); *status = NodeDefBuilder(opts.new_name(absl::StrCat("synch_", epoch)), "ControlTrigger") @@ -398,18 +400,19 @@ void OptimizeControlFlowColocation(Graph* graph) { DFS(*graph, visit, {}); } -string ControlLoopName(const string& name) { +std::string ControlLoopName(const std::string& name) { return absl::StrCat("_cloop", name); } bool IsControlLoop(const Node* node) { - const string& name = node->name(); + const std::string& name = node->name(); return absl::StartsWith(name, "_cloop"); } // An enter node for control flow. -Node* AddControlEnter(Graph* g, const string& node_name, - const string& device_name, const string& frame_name, +Node* AddControlEnter(Graph* g, const std::string& node_name, + const std::string& device_name, + const std::string& frame_name, const int parallel_iterations, absl::Status* status) { NodeBuilder node_builder(node_name, "Enter", g->op_registry()); node_builder.Input({"dummy", 0, DT_FLOAT}); @@ -423,9 +426,9 @@ Node* AddControlEnter(Graph* g, const string& node_name, } // A merge node for control flow. -Node* AddControlMerge(const string& in_name1, const string& in_name2, Graph* g, - const string& node_name, const string& device_name, - absl::Status* status) { +Node* AddControlMerge(const std::string& in_name1, const std::string& in_name2, + Graph* g, const std::string& node_name, + const std::string& device_name, absl::Status* status) { NodeBuilder node_builder(node_name, "Merge", g->op_registry()); node_builder.Input({{in_name1, 0, DT_FLOAT}, {in_name2, 0, DT_FLOAT}}); Node* res_node; @@ -437,7 +440,7 @@ Node* AddControlMerge(const string& in_name1, const string& in_name2, Graph* g, // A switch node for control flow. Node* AddControlSwitch(NodeBuilder::NodeOut input1, NodeBuilder::NodeOut input2, - const string& device_name, + const std::string& device_name, const GraphDefBuilder::Options& bopts) { Node* res_node = ops::BinaryOp("Switch", std::move(input1), std::move(input2), bopts); @@ -447,7 +450,7 @@ Node* AddControlSwitch(NodeBuilder::NodeOut input1, NodeBuilder::NodeOut input2, } // A next_iteration node for control flow. -Node* AddControlNext(NodeBuilder::NodeOut input, const string& device_name, +Node* AddControlNext(NodeBuilder::NodeOut input, const std::string& device_name, const GraphDefBuilder::Options& bopts) { Node* res_node = ops::UnaryOp("NextIteration", std::move(input), bopts); if (bopts.HaveError()) return nullptr; @@ -469,7 +472,7 @@ Node* EmptyConst(const GraphDefBuilder::Options& options) { } // A dummy const node for control flow. -Node* AddControlConst(const string& device_name, +Node* AddControlConst(const std::string& device_name, const GraphDefBuilder::Options& bopts) { Node* res_node = EmptyConst(bopts); if (bopts.HaveError()) return nullptr; @@ -513,21 +516,22 @@ absl::Status AddControlLoop(const PartitionOptions& opts, Graph* g, absl::Status status; GraphDefBuilder::Options bopts(g, &status); const ControlFlowInfo& src_info = (*cf_info)[src->id()]; - const string& device_name = edge->dst()->assigned_device_name(); - const string& frame_name = src_info.frame_name; + const std::string& device_name = edge->dst()->assigned_device_name(); + const std::string& frame_name = src_info.frame_name; int parallel_iterations; status = GetNodeAttr(src_info.frame->attrs(), "parallel_iterations", ¶llel_iterations); if (!status.ok()) return status; // The names of the nodes to be added. - const string& enter_name = + const std::string& enter_name = ControlLoopName(opts.new_name(edge->dst()->name())); - const string& merge_name = + const std::string& merge_name = ControlLoopName(opts.new_name(edge->dst()->name())); - const string& switch_name = + const std::string& switch_name = + ControlLoopName(opts.new_name(edge->dst()->name())); + const std::string& next_name = ControlLoopName(opts.new_name(edge->dst()->name())); - const string& next_name = ControlLoopName(opts.new_name(edge->dst()->name())); // Add the nodes to the graph g. Node* enter = AddControlEnter(g, enter_name, device_name, frame_name, @@ -634,14 +638,14 @@ absl::Status AddControlFlow(const PartitionOptions& opts, Graph* g, OptimizeControlFlowColocation(g); // The map from frames to their LoopCond nodes. - std::unordered_map frame_cond_map; + std::unordered_map frame_cond_map; int num_node_ids = g->num_node_ids(); for (int i = 0; i < num_node_ids; ++i) { Node* node = g->FindNodeId(i); if (node == nullptr) continue; if (IsLoopCond(node)) { - const string& frame_name = cf_info[node->id()].frame_name; + const std::string& frame_name = cf_info[node->id()].frame_name; DCHECK(!frame_name.empty()); frame_cond_map[frame_name] = node; } @@ -655,7 +659,7 @@ absl::Status AddControlFlow(const PartitionOptions& opts, Graph* g, // the merge of the outer loop to the enter of the inner loop. // // A map from to ControlLoop. - std::unordered_map control_loops; + std::unordered_map control_loops; int num_edge_ids = g->num_edge_ids(); for (int i = 0; i < num_edge_ids; ++i) { const Edge* edge = g->FindEdgeId(i); @@ -666,15 +670,15 @@ absl::Status AddControlFlow(const PartitionOptions& opts, Graph* g, // Skip Sink/Source nodes. if (!src->IsOp() || !dst->IsOp()) continue; - const string& src_device = src->assigned_device_name(); - const string& dst_device = dst->assigned_device_name(); + const std::string& src_device = src->assigned_device_name(); + const std::string& dst_device = dst->assigned_device_name(); // Skip local edges. if (src_device == dst_device) continue; const Node* src_frame = OutputFrame(src, cf_info); const Node* dst_frame = InputFrame(dst, cf_info); - const string& src_frame_name = cf_info[src_frame->id()].frame_name; - const string& dst_frame_name = cf_info[dst_frame->id()].frame_name; + const std::string& src_frame_name = cf_info[src_frame->id()].frame_name; + const std::string& dst_frame_name = cf_info[dst_frame->id()].frame_name; // Skip if src and dst are not in the same frame. if (src_frame_name.empty() || src_frame_name != dst_frame_name) { continue; @@ -685,12 +689,12 @@ absl::Status AddControlFlow(const PartitionOptions& opts, Graph* g, // for its outer frame when nested. ControlLoop child_loop; while (true) { - const string& curr_frame_name = cf_info[src_frame->id()].frame_name; + const std::string& curr_frame_name = cf_info[src_frame->id()].frame_name; if (curr_frame_name.empty()) { // We have reached the root frame. if (child_loop.merge != nullptr) { - const string& node_name = opts.new_name(edge->dst()->name()); - const string& device_name = edge->dst()->assigned_device_name(); + const std::string& node_name = opts.new_name(edge->dst()->name()); + const std::string& device_name = edge->dst()->assigned_device_name(); Node* const_node = AddControlConst(device_name, bopts.WithName(node_name)); if (!status.ok()) return status; @@ -700,7 +704,8 @@ absl::Status AddControlFlow(const PartitionOptions& opts, Graph* g, break; } - const string& cl_key = absl::StrCat(curr_frame_name, "$$", dst_device); + const std::string& cl_key = + absl::StrCat(curr_frame_name, "$$", dst_device); auto it = control_loops.find(cl_key); if (it != control_loops.end()) { if (child_loop.enter != nullptr) { @@ -748,15 +753,16 @@ absl::Status AddControlFlow(const PartitionOptions& opts, Graph* g, // Skip Sink/Source nodes. if (!src->IsOp() || !dst->IsOp()) continue; - const string& src_device = src->assigned_device_name(); - const string& dst_device = dst->assigned_device_name(); + const std::string& src_device = src->assigned_device_name(); + const std::string& dst_device = dst->assigned_device_name(); if (src_device != dst_device) { const Node* src_frame = OutputFrame(src, cf_info); const Node* dst_frame = InputFrame(dst, cf_info); - const string& src_frame_name = cf_info[src_frame->id()].frame_name; - const string& dst_frame_name = cf_info[dst_frame->id()].frame_name; + const std::string& src_frame_name = cf_info[src_frame->id()].frame_name; + const std::string& dst_frame_name = cf_info[dst_frame->id()].frame_name; if (!src_frame_name.empty() && src_frame_name == dst_frame_name) { - const string& cl_key = absl::StrCat(dst_frame_name, "$$", dst_device); + const std::string& cl_key = + absl::StrCat(dst_frame_name, "$$", dst_device); ControlLoop loop = control_loops[cl_key]; DCHECK(loop.enter != nullptr); // Note that we'll create multiple duplicate edges if dst has multiple @@ -812,12 +818,13 @@ absl::Status TopologicalSortNodesWithTimePriority( }; // Build initial structures, initial contents of queue. - std::unordered_map> node_to_output_nodes; + std::unordered_map> + node_to_output_nodes; std::unordered_map inputs_needed; for (int n = 0; n < gdef->node_size(); ++n) { const NodeDef* ndef = &gdef->node(n); for (int i = 0; i < ndef->input_size(); ++i) { - node_to_output_nodes[string(ParseTensorName(ndef->input(i)).first)] + node_to_output_nodes[std::string(ParseTensorName(ndef->input(i)).first)] .push_back(ndef); } int64_t start_time; @@ -872,8 +879,9 @@ absl::Status TopologicalSortNodesWithTimePriority( return absl::OkStatus(); } -absl::Status AddControlEdges(const PartitionOptions& opts, - std::unordered_map* partitions) { +absl::Status AddControlEdges( + const PartitionOptions& opts, + std::unordered_map* partitions) { absl::Status status; // TODO(yuanbyu): Very naive for now. To be improved. const int num_epochs = 100; @@ -891,7 +899,7 @@ absl::Status AddControlEdges(const PartitionOptions& opts, // Add a dummy node for every epoch, and add a control edge from the // "last" node in the preceding epoch to the dummy node. - string device_name = gdef->node(0).device(); + std::string device_name = gdef->node(0).device(); int64_t makespan = start_times.back().second; int64_t resolution = (makespan / num_epochs) + 1; @@ -909,7 +917,7 @@ absl::Status AddControlEdges(const PartitionOptions& opts, } dummys.push_back(dummy); if (j > 0) { - string src_name = start_times[j - 1].first->name(); + std::string src_name = start_times[j - 1].first->name(); Graph::AddInput(dummy, src_name, Graph::kControlSlot); } i++; @@ -940,7 +948,7 @@ void SetIncarnation(const PartitionOptions& opts, NodeDef* ndef) { // Not related to send/recv. return; } - const string& send_device = GetNodeAttrString(*ndef, "send_device"); + const std::string& send_device = GetNodeAttrString(*ndef, "send_device"); if (send_device.empty()) { // No known send_device. The runtime will detect it later. return; @@ -968,10 +976,10 @@ void SetIncarnation(const PartitionOptions& opts, GraphDef* gdef) { } absl::Status Partition(const PartitionOptions& opts, Graph* g, - std::unordered_map* partitions) { + std::unordered_map* partitions) { // TODO(b/290689453) Refactor this into smaller functions absl::Status status; - absl::flat_hash_map> + absl::flat_hash_map> debug_info_builders; partitions->clear(); @@ -991,7 +999,7 @@ absl::Status Partition(const PartitionOptions& opts, Graph* g, status = BuildMemoryDeviceInfo(*g, &g_info); if (!status.ok()) return status; - string dstp; + std::string dstp; std::vector inputs; DupRecvTable dup_recv(3); // For a node dst, 'ref_recvs' remembers the recvs introduced by a ref @@ -999,7 +1007,7 @@ absl::Status Partition(const PartitionOptions& opts, Graph* g, // edge to dst. We will add a control edge for every pair in // (ref_recvs x ref_control_inputs). std::vector ref_recvs; - std::vector ref_control_inputs; + std::vector ref_control_inputs; int32_t num_data = 0; int32_t num_control = 0; @@ -1121,7 +1129,7 @@ absl::Status Partition(const PartitionOptions& opts, Graph* g, auto iter = dup_recv.find(key); if (iter != dup_recv.end()) { // We found one. Reuse the data/control transferred already. - const string& recv_node_name = iter->second.recv->name(); + const std::string& recv_node_name = iter->second.recv->name(); if (edge->IsControlEdge()) { Graph::AddInput(dst_def, recv_node_name, Graph::kControlSlot); } else { @@ -1157,7 +1165,7 @@ absl::Status Partition(const PartitionOptions& opts, Graph* g, send_from.Reset(src->name(), edge->src_output(), EdgeType(edge)); } - string tensor_name_attr; + std::string tensor_name_attr; if (opts.get_tensor_name_attr) { tensor_name_attr = opts.get_tensor_name_attr(edge); } else { diff --git a/tensorflow/core/graph/graph_partition.h b/tensorflow/core/graph/graph_partition.h index 59e9fe0e61c35d..c1d9493c76c6b5 100644 --- a/tensorflow/core/graph/graph_partition.h +++ b/tensorflow/core/graph/graph_partition.h @@ -31,19 +31,19 @@ namespace tensorflow { struct PartitionOptions { // A function that returns a location for the execution of a given // Node. - typedef std::function NodeToLocFunc; + typedef std::function NodeToLocFunc; NodeToLocFunc node_to_loc = nullptr; // A function that returns a unique graph node name with the given // prefix. - typedef std::function NewNameFunc; + typedef std::function NewNameFunc; NewNameFunc new_name = nullptr; // A function that returns the incarnation of a device given the // device's fullname. If not found, GetIncarnationFunc should return // kIllegalIncarnation. - static constexpr uint64 kIllegalIncarnation = 0; - typedef std::function GetIncarnationFunc; + static constexpr uint64_t kIllegalIncarnation = 0; + typedef std::function GetIncarnationFunc; GetIncarnationFunc get_incarnation = nullptr; // If specified, flib_def defines a function library that should be @@ -79,7 +79,7 @@ struct PartitionOptions { // Optional customized function to compute the "tensor_name" attr value of // Send/Recv ops inserted during partitioning. - std::function get_tensor_name_attr = nullptr; + std::function get_tensor_name_attr = nullptr; // If true, the `Partition()` function can make destructive changes to the // passed-in `Graph`. @@ -96,13 +96,14 @@ struct PartitionOptions { // // Stores the partitions in *partitions. absl::Status Partition(const PartitionOptions& opts, Graph* input, - std::unordered_map* partitions); + std::unordered_map* partitions); // Add control edges to the partitions to control the ordering // and timing of the recv nodes based on the start times calculated // using some scheduling algorithm. -absl::Status AddControlEdges(const PartitionOptions& opts, - std::unordered_map* partitions); +absl::Status AddControlEdges( + const PartitionOptions& opts, + std::unordered_map* partitions); } // namespace tensorflow diff --git a/tensorflow/core/graph/graph_partition_test.cc b/tensorflow/core/graph/graph_partition_test.cc index 5f3d0a1b4117f2..4f5e431b87df50 100644 --- a/tensorflow/core/graph/graph_partition_test.cc +++ b/tensorflow/core/graph/graph_partition_test.cc @@ -68,21 +68,23 @@ using ::testing::Ne; const char gpu_device[] = "/job:a/replica:0/task:0/device:GPU:0"; -string SplitByDevice(const Node* node) { return node->assigned_device_name(); } +std::string SplitByDevice(const Node* node) { + return node->assigned_device_name(); +} -string DeviceName(const Node* node) { +std::string DeviceName(const Node* node) { char first = node->name()[0]; if (first == 'G') { return gpu_device; } else { - const string cpu_prefix = "/job:a/replica:0/task:0/cpu:"; + const std::string cpu_prefix = "/job:a/replica:0/task:0/cpu:"; int index = first - 'A'; return absl::StrCat(cpu_prefix, index); } } void Partition(const GraphDef& graph_def, - std::unordered_map* partitions) { + std::unordered_map* partitions) { Graph g(OpRegistry::Global()); GraphConstructorOptions opts; TF_CHECK_OK(ConvertGraphDefToGraph(opts, graph_def, &g)); @@ -90,16 +92,18 @@ void Partition(const GraphDef& graph_def, // Assigns devices to each node. Uses 1st letter of the node name as the // device index if no device is specified. for (Node* node : g.nodes()) { - string device_name = !node->requested_device().empty() - ? node->requested_device() - : DeviceName(node); + std::string device_name = !node->requested_device().empty() + ? node->requested_device() + : DeviceName(node); node->set_assigned_device_name(device_name); } PartitionOptions popts; popts.node_to_loc = SplitByDevice; - popts.new_name = [&g](const string& prefix) { return g.NewName(prefix); }; - popts.get_incarnation = [](const string& name) { + popts.new_name = [&g](const std::string& prefix) { + return g.NewName(prefix); + }; + popts.get_incarnation = [](const std::string& name) { return (name[0] - 'A') + 100; }; absl::Status s = Partition(popts, &g, partitions); @@ -116,7 +120,7 @@ void Partition(const GraphDef& graph_def, } void CheckLoopConstruction(const GraphDef& graph_def) { - std::unordered_map partitions; + std::unordered_map partitions; Partition(graph_def, &partitions); for (const auto& kv : partitions) { const GraphDef& gdef = kv.second; @@ -128,7 +132,7 @@ void CheckLoopConstruction(const GraphDef& graph_def) { // _recvs must have a control input if (ndef.op() == "_Recv") { bool has_control = false; - for (const string& input_name : ndef.input()) { + for (const std::string& input_name : ndef.input()) { if (absl::StartsWith(input_name, "^")) { has_control = true; break; @@ -171,10 +175,10 @@ REGISTER_OP("Combine") .Output("o: float") .SetShapeFn(shape_inference::UnknownShape); -Output ConstructOp(const Scope& scope, const string& op_type, +Output ConstructOp(const Scope& scope, const std::string& op_type, const absl::Span inputs) { if (!scope.ok()) return Output(); - const string unique_name = scope.GetUniqueNameForOp(op_type); + const std::string unique_name = scope.GetUniqueNameForOp(op_type); auto builder = NodeBuilder(unique_name, op_type, scope.graph()->op_registry()); for (auto const& input : inputs) { @@ -230,20 +234,20 @@ class GraphPartitionTest : public ::testing::Test { void ExpectMatchA() { GraphDef graph_def; TF_EXPECT_OK(scope_a_.ToGraphDef(&graph_def)); - string a = "/job:a/replica:0/task:0/cpu:0"; + std::string a = "/job:a/replica:0/task:0/cpu:0"; TF_EXPECT_GRAPH_EQ(graph_def, partitions_[a]); } void ExpectMatchB() { GraphDef graph_def; TF_EXPECT_OK(scope_b_.ToGraphDef(&graph_def)); - string b = "/job:a/replica:0/task:0/cpu:1"; + std::string b = "/job:a/replica:0/task:0/cpu:1"; TF_EXPECT_GRAPH_EQ(graph_def, partitions_[b]); } void ExpectFunctions(const FunctionDefLibrary& library, - const std::set& expected_names) { - std::set actual_names; + const std::set& expected_names) { + std::set actual_names; for (const FunctionDef& fdef : library.function()) { actual_names.insert(fdef.signature().name()); } @@ -254,7 +258,7 @@ class GraphPartitionTest : public ::testing::Test { GraphDef in_graph_def_; Scope scope_a_; Scope scope_b_; - std::unordered_map partitions_; + std::unordered_map partitions_; }; TEST_F(GraphPartitionTest, SingleDevice) { @@ -277,8 +281,8 @@ TEST_F(GraphPartitionTest, CrossDeviceData) { Partition(ToGraphDef(), &partitions_); EXPECT_EQ(2, partitions_.size()); - string a = "/job:a/replica:0/task:0/cpu:0"; - string b = "/job:a/replica:0/task:0/cpu:1"; + std::string a = "/job:a/replica:0/task:0/cpu:0"; + std::string b = "/job:a/replica:0/task:0/cpu:1"; a1 = FloatInput(scope_a_.WithOpName("A1")); _Send(scope_a_.WithOpName("A1/_0"), a1, "edge_1_A1", a, 82, b); ExpectMatchA(); @@ -298,8 +302,8 @@ TEST_F(GraphPartitionTest, CrossDeviceControl) { Partition(ToGraphDef(), &partitions_); EXPECT_EQ(2, partitions_.size()); - string a = "/job:a/replica:0/task:0/cpu:0"; - string b = "/job:a/replica:0/task:0/cpu:1"; + std::string a = "/job:a/replica:0/task:0/cpu:0"; + std::string b = "/job:a/replica:0/task:0/cpu:1"; a1 = FloatInput(scope_a_.WithOpName("A1")); auto c = Const(scope_a_.WithOpName("A1/ctrl/_0").WithControlDependencies(a1), {}); @@ -323,8 +327,8 @@ TEST_F(GraphPartitionTest, CrossDeviceData_MultiUse) { Partition(ToGraphDef(), &partitions_); EXPECT_EQ(2, partitions_.size()); - string a = "/job:a/replica:0/task:0/cpu:0"; - string b = "/job:a/replica:0/task:0/cpu:1"; + std::string a = "/job:a/replica:0/task:0/cpu:0"; + std::string b = "/job:a/replica:0/task:0/cpu:1"; a1 = FloatInput(scope_a_.WithOpName("A1")); _Send(scope_a_.WithOpName("A1/_0"), a1, "edge_1_A1", a, 82, b); ExpectMatchA(); @@ -346,8 +350,8 @@ TEST_F(GraphPartitionTest, CrossDeviceControl_MultiUse) { Partition(ToGraphDef(), &partitions_); EXPECT_EQ(2, partitions_.size()); - string a = "/job:a/replica:0/task:0/cpu:0"; - string b = "/job:a/replica:0/task:0/cpu:1"; + std::string a = "/job:a/replica:0/task:0/cpu:0"; + std::string b = "/job:a/replica:0/task:0/cpu:1"; a1 = FloatInput(scope_a_.WithOpName("A1")); auto c = Const(scope_a_.WithOpName("A1/ctrl/_0").WithControlDependencies(a1), {}); @@ -372,8 +376,8 @@ TEST_F(GraphPartitionTest, CrossDevice_DataControl) { Partition(ToGraphDef(), &partitions_); EXPECT_EQ(2, partitions_.size()); - string a = "/job:a/replica:0/task:0/cpu:0"; - string b = "/job:a/replica:0/task:0/cpu:1"; + std::string a = "/job:a/replica:0/task:0/cpu:0"; + std::string b = "/job:a/replica:0/task:0/cpu:1"; a1 = FloatInput(scope_a_.WithOpName("A1")); _Send(scope_a_.WithOpName("A1/_0"), a1, "edge_1_A1", a, 82, b); auto c = @@ -417,7 +421,7 @@ TEST_F(GraphPartitionTest, CrossDeviceLoopSimple1) { auto b1 = Identity(in_.WithOpName("B1"), a3); NextIteration(in_.WithOpName("B5"), b1); - std::unordered_map partitions; + std::unordered_map partitions; Partition(ToGraphDef(), &partitions); for (const auto& kv : partitions) { const GraphDef& gdef = kv.second; @@ -471,10 +475,12 @@ TEST_F(GraphPartitionTest, PartitionIncompleteGraph) { PartitionOptions popts; popts.node_to_loc = SplitByDevice; - popts.new_name = [&g](const string& prefix) { return g.NewName(prefix); }; - popts.get_incarnation = [](const string&) { return 1; }; + popts.new_name = [&g](const std::string& prefix) { + return g.NewName(prefix); + }; + popts.get_incarnation = [](const std::string&) { return 1; }; - std::unordered_map partitions; + std::unordered_map partitions; status = Partition(popts, &g, &partitions); // Partitioning should fail, but not crash like it did before the // changes that accompanied the addition of this test. @@ -498,8 +504,8 @@ TEST_F(GraphPartitionTest, Functions) { EXPECT_EQ(2, partitions_.size()); // Test that partition graphs inherit function library from original graph. - string a = "/job:a/replica:0/task:0/cpu:0"; - string b = "/job:a/replica:0/task:0/cpu:1"; + std::string a = "/job:a/replica:0/task:0/cpu:0"; + std::string b = "/job:a/replica:0/task:0/cpu:1"; // Node "A2" is placed in part `a`, and uses only "XTimesTwo". ExpectFunctions(partitions_[a].library(), {"XTimesTwo"}); @@ -602,7 +608,7 @@ TEST_F(GraphPartitionTest, GraphDebugInfo) { // Expect each partitioned graph to contain the stack traces for its nodes. // A stack trace for A1 should be in the A partition (".../cpu:0"). - string a = "/job:a/replica:0/task:0/cpu:0"; + std::string a = "/job:a/replica:0/task:0/cpu:0"; const GraphDebugInfo& a_debug_info = partitions_[a].debug_info(); StackTracesMap traces = LoadTracesFromDebugInfo(a_debug_info); const auto& a_it = traces.find("A1"); @@ -611,7 +617,7 @@ TEST_F(GraphPartitionTest, GraphDebugInfo) { ::testing::ContainsRegex("alpha.cc.*30")); // Stack traces for B1 and B2 should be in the B partition (".../cpu:1"). - string b = "/job:a/replica:0/task:0/cpu:1"; + std::string b = "/job:a/replica:0/task:0/cpu:1"; const GraphDebugInfo& b_debug_info = partitions_[b].debug_info(); traces = LoadTracesFromDebugInfo(b_debug_info); const auto& b1_it = traces.find("B1"); diff --git a/tensorflow/core/graph/graph_test.cc b/tensorflow/core/graph/graph_test.cc index a5b519365034f2..fb5ce07959a424 100644 --- a/tensorflow/core/graph/graph_test.cc +++ b/tensorflow/core/graph/graph_test.cc @@ -100,13 +100,13 @@ class GraphTest : public ::testing::Test { EXPECT_EQ(edges, graph_.num_edges()); } - Node* AddNodeWithName(const string& name) { + Node* AddNodeWithName(const std::string& name) { Node* node; TF_CHECK_OK(NodeBuilder(name, "NoOp").Finalize(&graph_, &node)); return node; } - Node* FromNodeDef(const string& name, const string& node_type, + Node* FromNodeDef(const std::string& name, const std::string& node_type, int num_inputs) { auto builder = NodeDefBuilder(name, node_type); for (int i = 0; i < num_inputs; ++i) { @@ -122,14 +122,14 @@ class GraphTest : public ::testing::Test { return node; } - void FromGraphDef(const string& gdef_ascii) { + void FromGraphDef(const std::string& gdef_ascii) { GraphDef gdef; CHECK(protobuf::TextFormat::ParseFromString(gdef_ascii, &gdef)); GraphConstructorOptions opts; TF_CHECK_OK(ConvertGraphDefToGraph(opts, gdef, &graph_)); } - Node* FindNode(const string& name) { + Node* FindNode(const std::string& name) { for (Node* node : graph_.nodes()) { if (node->name() == name) return node; } @@ -158,8 +158,8 @@ class GraphTest : public ::testing::Test { private: // Convert a list of nodes to a sorted list of strings so failure messages // are readable. - static std::vector Stringify(const std::vector& nodes) { - std::vector result; + static std::vector Stringify(const std::vector& nodes) { + std::vector result; result.reserve(nodes.size()); for (Node* n : nodes) { result.push_back(n->DebugString()); @@ -322,14 +322,14 @@ TEST_F(GraphTest, NodeIteration) { graph_.RemoveNode(c); // expected = set of all node DebugStrings we expect in the graph - std::set expected; + std::set expected; expected.insert(graph_.source_node()->DebugString()); expected.insert(a->DebugString()); expected.insert(d->DebugString()); expected.insert(graph_.sink_node()->DebugString()); // Verify that iterating through ids gets the same set of nodes. - std::set actual; + std::set actual; for (int id = 0; id < graph_.num_node_ids(); ++id) { Node* node = graph_.FindNodeId(id); if (node != nullptr) { @@ -370,7 +370,7 @@ TEST_F(GraphTest, AddAttr) { n1->AddAttr("_a", "new_attr"); - string attr; + std::string attr; EXPECT_EQ(absl::OkStatus(), GetNodeAttr(n1->attrs(), "_a", &attr)); EXPECT_EQ("new_attr", attr); @@ -389,13 +389,13 @@ TEST_F(GraphTest, AddAttr) { } // Convert edge iteration results into a sorted string. -static string EdgeIter(const Graph& g) { +static std::string EdgeIter(const Graph& g) { std::vector > edges; for (const Edge* e : g.edges()) { edges.push_back(std::make_pair(e->src()->id(), e->dst()->id())); } std::sort(edges.begin(), edges.end()); - string result; + std::string result; for (auto& p : edges) { absl::StrAppend(&result, p.first, "->", p.second, ";"); } @@ -422,9 +422,9 @@ TEST_F(GraphTest, EdgeIteration) { } TEST_F(GraphTest, NewName) { - string a1 = graph_.NewName("A"); - string a2 = graph_.NewName("A"); - string b1 = graph_.NewName("B"); + std::string a1 = graph_.NewName("A"); + std::string a2 = graph_.NewName("A"); + std::string b1 = graph_.NewName("B"); EXPECT_NE(a1, a2); EXPECT_NE(a1, b1); EXPECT_NE(a2, b1); @@ -446,19 +446,19 @@ TEST_F(GraphTest, IsValidNode) { // nullptr absl::Status s = graph_.IsValidNode(nullptr); EXPECT_EQ(error::INVALID_ARGUMENT, s.code()); - EXPECT_EQ(string("Node is null"), s.message()); + EXPECT_EQ(std::string("Node is null"), s.message()); // node id_ is too high s = graph_.IsValidNode(g2_node2); EXPECT_EQ(error::INVALID_ARGUMENT, s.code()); - EXPECT_EQ(string("node id 3 is >= than number of nodes in graph 3"), + EXPECT_EQ(std::string("node id 3 is >= than number of nodes in graph 3"), s.message()); // valid id_ but different ptr s = graph_.IsValidNode(g2_node1); EXPECT_EQ(error::INVALID_ARGUMENT, s.code()); - EXPECT_EQ(string("Node with id 2 is different from the passed in node. " - "Does it belong to a different graph?"), + EXPECT_EQ(std::string("Node with id 2 is different from the passed in node. " + "Does it belong to a different graph?"), s.message()); } @@ -695,8 +695,8 @@ TEST_F(GraphTest, BuildNodeNameIndex) { auto node_name_index = graph_.BuildNodeNameIndex(); EXPECT_EQ(node_name_index.size(), 5); - std::vector node_names{"_SOURCE", "_SINK", "A", "B", "C"}; - for (const string& node_name : node_names) { + std::vector node_names{"_SOURCE", "_SINK", "A", "B", "C"}; + for (const std::string& node_name : node_names) { EXPECT_NE(node_name_index.find(node_name), node_name_index.end()); EXPECT_EQ(node_name_index[node_name], FindNode(node_name)); } diff --git a/tensorflow/core/graph/node_builder.cc b/tensorflow/core/graph/node_builder.cc index e2fe533ce4b238..e29d2d92d4c597 100644 --- a/tensorflow/core/graph/node_builder.cc +++ b/tensorflow/core/graph/node_builder.cc @@ -108,7 +108,7 @@ NodeBuilder& NodeBuilder::Device(absl::string_view device_spec) { } NodeBuilder& NodeBuilder::AssignedDevice(absl::string_view device) { - assigned_device_ = string(device); + assigned_device_ = std::string(device); return *this; } diff --git a/tensorflow/core/graph/node_builder.h b/tensorflow/core/graph/node_builder.h index 6f249371606b3e..476393cae8166b 100644 --- a/tensorflow/core/graph/node_builder.h +++ b/tensorflow/core/graph/node_builder.h @@ -67,8 +67,8 @@ class NodeBuilder { // * a nullptr Node* was passed to the NodeOut constructor, or // * an out-of-range index was passed to the NodeOut constructor. bool error; - string name; - int32 index; + std::string name; + int32_t index; DataType dt; }; @@ -132,7 +132,7 @@ class NodeBuilder { absl::StatusOr Finalize(Graph* graph, bool consume = false); // Accessors for the values set in the constructor. - const string& node_name() const { return def_builder_.node_name(); } + const std::string& node_name() const { return def_builder_.node_name(); } const OpDef& op_def() const { return def_builder_.op_def(); } private: @@ -157,8 +157,8 @@ class NodeBuilder { const OpRegistryInterface* op_registry_; std::vector inputs_; std::vector control_inputs_; - std::vector errors_; - string assigned_device_; + std::vector errors_; + std::string assigned_device_; }; // IMPLEMENTATION ------------------------------------------------------------- diff --git a/tensorflow/core/graph/optimizer_cse.cc b/tensorflow/core/graph/optimizer_cse.cc index 39b53541081659..f18d8a3bca0f1a 100644 --- a/tensorflow/core/graph/optimizer_cse.cc +++ b/tensorflow/core/graph/optimizer_cse.cc @@ -93,9 +93,9 @@ static size_t kIllegalNodeHash = 0; class Hasher { public: - uint64 hash() { return h_ == kIllegalNodeHash ? kIllegalNodeHash + 1 : h_; } + uint64_t hash() { return h_ == kIllegalNodeHash ? kIllegalNodeHash + 1 : h_; } - void MixString(const string& s) { h_ = Hash64(s.data(), s.size(), h_); } + void MixString(const std::string& s) { h_ = Hash64(s.data(), s.size(), h_); } void MixInteger(size_t z) { h_ = Hash64Combine(h_, z); } @@ -122,7 +122,7 @@ class Hasher { // This kBufSize makes sizeof(HashingOutputStream) == 256. It's not chosen // for any particular reason except it's a nice even number of cache lines. static constexpr size_t kBufSize = 228; - static constexpr uint64 kDefaultSeed = 2570847921467975139ULL; + static constexpr uint64_t kDefaultSeed = 2570847921467975139ULL; bool Next(void** data, int* size) override { if (i_ == kBufSize) { // Mix the chunk in. @@ -174,7 +174,7 @@ class Hasher { bool AllowsAliasing() const override { return true; } - uint64 hash() { + uint64_t hash() { if (i_ != 0) { Mix(buf_, i_); i_ = 0; @@ -190,10 +190,10 @@ class Hasher { char buf_[kBufSize]; int i_ = 0; int64_t byte_count_ = 0; - uint64 h_ = kDefaultSeed; + uint64_t h_ = kDefaultSeed; }; - uint64 h_ = HashingOutputStream::kDefaultSeed; + uint64_t h_ = HashingOutputStream::kDefaultSeed; }; size_t OptimizerCSE::NodeHash(const Node* n) { diff --git a/tensorflow/core/graph/optimizer_cse_test.cc b/tensorflow/core/graph/optimizer_cse_test.cc index 94b4cabb2fd884..bac15370ae039e 100644 --- a/tensorflow/core/graph/optimizer_cse_test.cc +++ b/tensorflow/core/graph/optimizer_cse_test.cc @@ -36,7 +36,7 @@ limitations under the License. namespace tensorflow { namespace { -static void InitGraph(const string& s, Graph* graph) { +static void InitGraph(const std::string& s, Graph* graph) { GraphDef graph_def; auto parser = protobuf::TextFormat::Parser(); @@ -50,14 +50,14 @@ class OptimizerCSETest : public ::testing::Test { public: OptimizerCSETest() : graph_(OpRegistry::Global()) {} - void InitGraph(const string& s) { + void InitGraph(const std::string& s) { ::tensorflow::InitGraph(s, &graph_); original_ = CanonicalGraphString(&graph_); } static bool IncludeNode(const Node* n) { return n->IsOp(); } - static string EdgeId(const Node* n, int index) { + static std::string EdgeId(const Node* n, int index) { if (index == 0) { return n->name(); } else if (index == Graph::kControlSlot) { @@ -67,9 +67,9 @@ class OptimizerCSETest : public ::testing::Test { } } - string CanonicalGraphString(Graph* g) { - std::vector nodes; - std::vector edges; + std::string CanonicalGraphString(Graph* g) { + std::vector nodes; + std::vector edges; for (const Node* n : g->nodes()) { if (IncludeNode(n)) { nodes.push_back(absl::StrCat(n->name(), "(", n->type_string(), ")")); @@ -88,21 +88,22 @@ class OptimizerCSETest : public ::testing::Test { absl::StrJoin(edges, ";")); } - string DoCSE(const std::function& consider_fn = nullptr) { - string before = CanonicalGraphString(&graph_); + std::string DoCSE( + const std::function& consider_fn = nullptr) { + std::string before = CanonicalGraphString(&graph_); LOG(ERROR) << "Before rewrites: " << before; OptimizeCSE(&graph_, consider_fn); - string result = CanonicalGraphString(&graph_); + std::string result = CanonicalGraphString(&graph_); LOG(ERROR) << "After rewrites: " << result; return result; } - const string& OriginalGraph() const { return original_; } + const std::string& OriginalGraph() const { return original_; } Graph graph_; - string original_; + std::string original_; }; REGISTER_OP("Input").Output("o: float").SetIsStateful(); @@ -339,8 +340,8 @@ TEST_F(OptimizerCSETest, Constant_Dedup) { EXPECT_EQ(OriginalGraph(), "n/_0(Const);n/_1(Const);n/_2(Const);n/_3(Const);" "n/_4(Const);n/_5(Const);n/_6(Const);n/_7(Const)|"); - std::vector nodes = str_util::Split(DoCSE(), ";|"); - std::set node_set(nodes.begin(), nodes.end()); + std::vector nodes = str_util::Split(DoCSE(), ";|"); + std::set node_set(nodes.begin(), nodes.end()); // Expect exactly one of each type of node to be retained after CSE. EXPECT_EQ(node_set.count("n/_0(Const)") + node_set.count("n/_7(Const)"), 1); EXPECT_EQ(node_set.count("n/_1(Const)") + node_set.count("n/_6(Const)"), 1); @@ -350,14 +351,14 @@ TEST_F(OptimizerCSETest, Constant_Dedup) { void BM_CSE(::testing::benchmark::State& state) { const int op_nodes = state.range(0); - string s; + std::string s; for (int in = 0; in < 10; in++) { - s += strings::Printf("node { name: 'in%04d' op: 'Input'}", in); + s += absl::StrFormat("node { name: 'in%04d' op: 'Input'}", in); } random::PhiloxRandom philox(301, 17); random::SimplePhilox rnd(&philox); for (int op = 0; op < op_nodes; op++) { - s += strings::Printf( + s += absl::StrFormat( "node { name: 'op%04d' op: 'Mul' attr { key: 'T' value { " "type: DT_FLOAT } } input: ['in%04d', 'in%04d' ] }", op, rnd.Uniform(10), rnd.Uniform(10)); diff --git a/tensorflow/core/graph/regularization/simple_delete_test.cc b/tensorflow/core/graph/regularization/simple_delete_test.cc index 2eac003707755f..424c0384823cb8 100644 --- a/tensorflow/core/graph/regularization/simple_delete_test.cc +++ b/tensorflow/core/graph/regularization/simple_delete_test.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/graph/regularization/simple_delete.h" +#include #include #include "absl/status/statusor.h" diff --git a/tensorflow/core/graph/subgraph.cc b/tensorflow/core/graph/subgraph.cc index 1d03877c02583c..697defb2ef2558 100644 --- a/tensorflow/core/graph/subgraph.cc +++ b/tensorflow/core/graph/subgraph.cc @@ -61,7 +61,7 @@ absl::Status FeedInputs( out_feed_types->clear(); out_feed_types->reserve(feed_rewrites.size()); for (size_t i = 0; i < feed_rewrites.size(); ++i) { - const string& t = feed_rewrites[i]->endpoint_name(); + const std::string& t = feed_rewrites[i]->endpoint_name(); TensorId id(ParseTensorName(t)); auto iter = name_index->find(id.first); @@ -127,7 +127,7 @@ absl::Status FetchOutputs( out_fetch_nodes->clear(); out_fetch_nodes->reserve(fetch_rewrites.size()); for (size_t i = 0; i < fetch_rewrites.size(); ++i) { - const string& t = fetch_rewrites[i]->endpoint_name(); + const std::string& t = fetch_rewrites[i]->endpoint_name(); // Parse t into node_name and output_index. TensorId id(ParseTensorName(t)); @@ -174,7 +174,7 @@ absl::Status FetchOutputs( return absl::OkStatus(); } -bool AddNodeToTargets(const string& node_or_tensor_name, +bool AddNodeToTargets(const std::string& node_or_tensor_name, const NameIndex& name_index, std::unordered_set* targets) { TensorId id = ParseTensorName(node_or_tensor_name); @@ -188,17 +188,18 @@ bool AddNodeToTargets(const string& node_or_tensor_name, return true; } -absl::Status PruneForTargets(Graph* g, const NameIndex& name_index, - const std::vector& fetch_nodes, - const absl::Span& target_nodes) { - string not_found; +absl::Status PruneForTargets( + Graph* g, const NameIndex& name_index, + const std::vector& fetch_nodes, + const absl::Span& target_nodes) { + std::string not_found; std::unordered_set targets; for (Node* n : fetch_nodes) { if (!AddNodeToTargets(n->name(), name_index, &targets)) { absl::StrAppend(¬_found, n->name(), " "); } } - for (const string& s : target_nodes) { + for (const std::string& s : target_nodes) { if (!AddNodeToTargets(s, name_index, &targets)) { absl::StrAppend(¬_found, s, " "); } @@ -295,9 +296,9 @@ absl::Status SendFetchRewrite::AddNode(Graph* g, } absl::Status RewriteGraphForExecution( - Graph* g, const absl::Span& fed_outputs, - const absl::Span& fetch_outputs, - const absl::Span& target_node_names, + Graph* g, const absl::Span& fed_outputs, + const absl::Span& fetch_outputs, + const absl::Span& target_node_names, const DeviceAttributes& device_info, bool use_function_convention, RewriteGraphMetadata* out_metadata) { std::vector> feed_rewrites; @@ -305,10 +306,10 @@ absl::Status RewriteGraphForExecution( if (use_function_convention) { for (size_t i = 0; i < fed_outputs.size(); ++i) { feed_rewrites.emplace_back(new ArgFeedRewrite( - &fed_outputs[i], &device_info, static_cast(i))); + &fed_outputs[i], &device_info, static_cast(i))); } } else { - for (const string& fed_output : fed_outputs) { + for (const std::string& fed_output : fed_outputs) { feed_rewrites.emplace_back( new RecvFeedRewrite(&fed_output, &device_info)); } @@ -319,10 +320,10 @@ absl::Status RewriteGraphForExecution( if (use_function_convention) { for (size_t i = 0; i < fetch_outputs.size(); ++i) { fetch_rewrites.emplace_back(new RetvalFetchRewrite( - &fetch_outputs[i], &device_info, static_cast(i))); + &fetch_outputs[i], &device_info, static_cast(i))); } } else { - for (const string& fetch_output : fetch_outputs) { + for (const std::string& fetch_output : fetch_outputs) { fetch_rewrites.emplace_back( new SendFetchRewrite(&fetch_output, &device_info)); } @@ -334,22 +335,22 @@ absl::Status RewriteGraphForExecution( namespace { template -std::vector ConvertToVector(StringContainer field) { - return std::vector(field.begin(), field.end()); +std::vector ConvertToVector(StringContainer field) { + return std::vector(field.begin(), field.end()); } } // namespace absl::Status RewriteGraphForExecution( Graph* g, const std::vector>& feed_rewrites, const std::vector>& fetch_rewrites, - const absl::Span& target_node_names, + const absl::Span& target_node_names, RewriteGraphMetadata* out_metadata) { if (fetch_rewrites.empty() && target_node_names.empty()) { return errors::InvalidArgument( "Must specify at least one target to fetch or execute."); } - std::unordered_set endpoints; + std::unordered_set endpoints; for (const auto& feed_rewrite : feed_rewrites) { auto result = endpoints.insert(feed_rewrite->endpoint_name()); if (!result.second) { diff --git a/tensorflow/core/graph/subgraph.h b/tensorflow/core/graph/subgraph.h index 37013b8f7d09ee..c8843a37d58fa9 100644 --- a/tensorflow/core/graph/subgraph.h +++ b/tensorflow/core/graph/subgraph.h @@ -50,7 +50,8 @@ struct RewriteGraphMetadata { class PruneRewrite { public: // `endpoint_name` and `device_info` must outlive this object. - PruneRewrite(const string* endpoint_name, const DeviceAttributes* device_info) + PruneRewrite(const std::string* endpoint_name, + const DeviceAttributes* device_info) : endpoint_name_(endpoint_name), device_info_(device_info) {} virtual ~PruneRewrite() {} @@ -60,14 +61,14 @@ class PruneRewrite { Node** out_node) = 0; // Returns the name of the tensor to which this rewrite applies. - const string& endpoint_name() { return *endpoint_name_; } + const std::string& endpoint_name() { return *endpoint_name_; } protected: // The device on which the new node will be created. const DeviceAttributes& device_info() { return *device_info_; } private: - const string* const endpoint_name_; // Not owned. + const std::string* const endpoint_name_; // Not owned. const DeviceAttributes* const device_info_; // Not owned. }; @@ -98,9 +99,9 @@ class PruneRewrite { // - fetch output "node:output_index" does not exist in "*g" // - target node "node" does not exist in "*g" absl::Status RewriteGraphForExecution( - Graph* g, const absl::Span& fed_outputs, - const absl::Span& fetch_outputs, - const absl::Span& target_node_names, + Graph* g, const absl::Span& fed_outputs, + const absl::Span& fetch_outputs, + const absl::Span& target_node_names, const DeviceAttributes& device_info, bool use_function_convention, RewriteGraphMetadata* out_metadata); @@ -109,7 +110,7 @@ absl::Status RewriteGraphForExecution( absl::Status RewriteGraphForExecution( Graph* g, const std::vector>& feed_rewrites, const std::vector>& fetch_rewrites, - const absl::Span& target_node_names, + const absl::Span& target_node_names, RewriteGraphMetadata* out_metadata); ///////////////////////////////////////////////////////// @@ -119,14 +120,14 @@ absl::Status RewriteGraphForExecution( // A rewrite action that adds an _Arg node for a fed tensor. class ArgFeedRewrite : public PruneRewrite { public: - ArgFeedRewrite(const string* endpoint_name, + ArgFeedRewrite(const std::string* endpoint_name, const DeviceAttributes* device_info, int32_t arg_index) : PruneRewrite(endpoint_name, device_info), arg_index_(arg_index) {} absl::Status AddNode(Graph* g, NodeBuilder::NodeOut feed_tensor, Node** out_node) override; private: - const int32 arg_index_; + const int32_t arg_index_; }; // A rewrite action that adds a client-terminated _Recv node for a fed tensor. @@ -140,14 +141,14 @@ class RecvFeedRewrite : public PruneRewrite { // A rewrite action that adds a _Retval node for a fetched tensor. class RetvalFetchRewrite : public PruneRewrite { public: - RetvalFetchRewrite(const string* endpoint_name, + RetvalFetchRewrite(const std::string* endpoint_name, const DeviceAttributes* device_info, int32_t retval_index) : PruneRewrite(endpoint_name, device_info), retval_index_(retval_index) {} absl::Status AddNode(Graph* g, NodeBuilder::NodeOut fetch_tensor, Node** out_node) override; private: - const int32 retval_index_; + const int32_t retval_index_; }; // A rewrite action that adds a client-terminated _Send node for a diff --git a/tensorflow/core/graph/subgraph_test.cc b/tensorflow/core/graph/subgraph_test.cc index 31c5cf8a3bb444..a5f4be88e8e5de 100644 --- a/tensorflow/core/graph/subgraph_test.cc +++ b/tensorflow/core/graph/subgraph_test.cc @@ -49,24 +49,24 @@ class SubgraphTest : public ::testing::Test { ~SubgraphTest() override {} - void ExpectOK(const string& gdef_ascii) { + void ExpectOK(const std::string& gdef_ascii) { CHECK(protobuf::TextFormat::ParseFromString(gdef_ascii, &gdef_)); GraphConstructorOptions opts; TF_CHECK_OK(ConvertGraphDefToGraph(opts, gdef_, g_.get())); } - Node* FindNode(const string& name) { + Node* FindNode(const std::string& name) { for (Node* n : g_->nodes()) { if (n->name() == name) return n; } return nullptr; } - bool HasNode(const string& name) { return FindNode(name) != nullptr; } + bool HasNode(const std::string& name) { return FindNode(name) != nullptr; } - void ExpectNodes(const string& nodes) { + void ExpectNodes(const std::string& nodes) { int count = 0; - std::vector actual_nodes; + std::vector actual_nodes; for (Node* n : g_->nodes()) { if (n->IsOp()) { count++; @@ -77,9 +77,9 @@ class SubgraphTest : public ::testing::Test { LOG(INFO) << "Nodes present: " << absl::StrJoin(actual_nodes, " "); - std::vector expected_nodes = str_util::Split(nodes, ','); + std::vector expected_nodes = str_util::Split(nodes, ','); std::sort(expected_nodes.begin(), expected_nodes.end()); - for (const string& s : expected_nodes) { + for (const std::string& s : expected_nodes) { Node* n = FindNode(s); EXPECT_TRUE(n != nullptr) << s; if (n->type_string() == "_Send" || n->type_string() == "_Recv") { @@ -92,7 +92,8 @@ class SubgraphTest : public ::testing::Test { << "\nExpected: " << absl::StrJoin(expected_nodes, ","); } - bool HasEdge(const string& src, int src_out, const string& dst, int dst_in) { + bool HasEdge(const std::string& src, int src_out, const std::string& dst, + int dst_in) { for (const Edge* e : g_->edges()) { if (e->src()->name() == src && e->src_output() == src_out && e->dst()->name() == dst && e->dst_input() == dst_in) @@ -100,20 +101,20 @@ class SubgraphTest : public ::testing::Test { } return false; } - bool HasControlEdge(const string& src, const string& dst) { + bool HasControlEdge(const std::string& src, const std::string& dst) { return HasEdge(src, Graph::kControlSlot, dst, Graph::kControlSlot); } - string Subgraph(const string& fed_str, const string& fetch_str, - const string& targets_str, - bool use_function_convention = false) { + std::string Subgraph(const std::string& fed_str, const std::string& fetch_str, + const std::string& targets_str, + bool use_function_convention = false) { Graph* subgraph = new Graph(OpRegistry::Global()); CopyGraph(*g_, subgraph); - std::vector fed = + std::vector fed = str_util::Split(fed_str, ',', str_util::SkipEmpty()); - std::vector fetch = + std::vector fetch = str_util::Split(fetch_str, ',', str_util::SkipEmpty()); - std::vector targets = + std::vector targets = str_util::Split(targets_str, ',', str_util::SkipEmpty()); subgraph::RewriteGraphMetadata metadata; @@ -355,7 +356,7 @@ void BM_SubgraphHelper(::testing::benchmark::State& state, GraphDefBuilder b(GraphDefBuilder::kFailImmediately); Node* last_node = nullptr; for (int i = 0; i < num_nodes; i++) { - string name = absl::StrCat("N", i); + std::string name = absl::StrCat("N", i); if (i > 0) { last_node = ops::UnaryOp("Op", last_node, b.opts().WithName(name)); } else { @@ -365,12 +366,12 @@ void BM_SubgraphHelper(::testing::benchmark::State& state, TF_CHECK_OK(GraphDefBuilderToGraph(b, &g)); } - std::vector fed; + std::vector fed; if (num_nodes > 1000) { fed.push_back(absl::StrCat("N", num_nodes - 1000)); } - std::vector fetch; - std::vector targets = {absl::StrCat("N", num_nodes - 1)}; + std::vector fetch; + std::vector targets = {absl::StrCat("N", num_nodes - 1)}; for (auto s : state) { Graph* subgraph = new Graph(OpRegistry::Global()); diff --git a/tensorflow/core/graph/tensor_id.h b/tensorflow/core/graph/tensor_id.h index 31b30fa14af463..30caf3857e303c 100644 --- a/tensorflow/core/graph/tensor_id.h +++ b/tensorflow/core/graph/tensor_id.h @@ -44,7 +44,7 @@ struct TensorId : public std::pair { const absl::string_view node() const { return first; } int index() const { return second; } - string ToString() const { + std::string ToString() const { if (second == Graph::kControlSlot) return absl::StrCat("^", first); return absl::StrCat(first, ":", second); } @@ -63,19 +63,19 @@ bool IsTensorIdControl(const TensorId& tensor_id); // Same as TensorId, except owns the backing storage for the op name. This makes // the memory management simpler at the expense of a copy. -struct SafeTensorId : public std::pair { - typedef std::pair Base; +struct SafeTensorId : public std::pair { + typedef std::pair Base; // NOTE(skyewm): this is required on some platforms. I'm not sure why the // using "using Base::pair;" isn't always sufficient. SafeTensorId() : Base() {} - SafeTensorId(const string& str, int idx) : Base(str, idx) {} + SafeTensorId(const std::string& str, int idx) : Base(str, idx) {} SafeTensorId(const TensorId& id); - const string& node() const { return first; } + const std::string& node() const { return first; } int index() const { return second; } - string ToString() const { + std::string ToString() const { if (second == Graph::kControlSlot) return absl::StrCat("^", first); return absl::StrCat(first, ":", second); } diff --git a/tensorflow/core/graph/tensor_id_test.cc b/tensorflow/core/graph/tensor_id_test.cc index 15bffd170642c8..4bec9298680b78 100644 --- a/tensorflow/core/graph/tensor_id_test.cc +++ b/tensorflow/core/graph/tensor_id_test.cc @@ -23,7 +23,9 @@ limitations under the License. namespace tensorflow { namespace { -string ParseHelper(const string& n) { return ParseTensorName(n).ToString(); } +std::string ParseHelper(const std::string& n) { + return ParseTensorName(n).ToString(); +} TEST(TensorIdTest, ParseTensorName) { EXPECT_EQ(ParseHelper("W1"), "W1:0"); @@ -35,8 +37,8 @@ TEST(TensorIdTest, ParseTensorName) { EXPECT_EQ(ParseHelper("^foo"), "^foo"); } -uint32 Skewed(random::SimplePhilox* rnd, int max_log) { - const uint32 space = 1 << (rnd->Rand32() % (max_log + 1)); +uint32_t Skewed(random::SimplePhilox* rnd, int max_log) { + const uint32_t space = 1 << (rnd->Rand32() % (max_log + 1)); return rnd->Rand32() % space; } @@ -44,9 +46,9 @@ void BM_ParseTensorName(::testing::benchmark::State& state) { const int arg = state.range(0); random::PhiloxRandom philox(301, 17); random::SimplePhilox rnd(&philox); - std::vector names; + std::vector names; for (int i = 0; i < 100; i++) { - string name; + std::string name; switch (arg) { case 0: { // Generate random names size_t len = Skewed(&rnd, 4); @@ -92,7 +94,7 @@ void BM_ParseTensorName(::testing::benchmark::State& state) { BENCHMARK(BM_ParseTensorName)->Arg(0)->Arg(1)->Arg(2)->Arg(3)->Arg(4)->Arg(5); TEST(TensorIdTest, IsTensorIdControl) { - string input = "^foo"; + std::string input = "^foo"; TensorId tensor_id = ParseTensorName(input); EXPECT_TRUE(IsTensorIdControl(tensor_id)); @@ -106,7 +108,7 @@ TEST(TensorIdTest, IsTensorIdControl) { } TEST(TensorIdTest, PortZero) { - for (string input : {"foo", "foo:0"}) { + for (std::string input : {"foo", "foo:0"}) { TensorId tensor_id = ParseTensorName(input); EXPECT_EQ("foo", tensor_id.node()); EXPECT_EQ(0, tensor_id.index()); diff --git a/tensorflow/core/graph/testlib.cc b/tensorflow/core/graph/testlib.cc index f83bf238cde9d1..b882361aa8093e 100644 --- a/tensorflow/core/graph/testlib.cc +++ b/tensorflow/core/graph/testlib.cc @@ -32,8 +32,9 @@ namespace tensorflow { namespace test { namespace graph { -Node* Send(Graph* g, Node* input, const string& tensor, const string& sender, - const uint64 sender_incarnation, const string& receiver) { +Node* Send(Graph* g, Node* input, const std::string& tensor, + const std::string& sender, const uint64_t sender_incarnation, + const std::string& receiver) { Node* ret; TF_CHECK_OK(NodeBuilder(g->NewName("n"), "_Send") .Input(input, 0) @@ -46,9 +47,9 @@ Node* Send(Graph* g, Node* input, const string& tensor, const string& sender, return ret; } -Node* Recv(Graph* g, const string& tensor, const string& type, - const string& sender, const uint64 sender_incarnation, - const string& receiver) { +Node* Recv(Graph* g, const std::string& tensor, const std::string& type, + const std::string& sender, const uint64_t sender_incarnation, + const std::string& receiver) { Node* ret; DataType dtype; CHECK(DataTypeFromString(type, &dtype)); @@ -72,7 +73,7 @@ Node* Constant(Graph* g, const Tensor& tensor) { return ret; } -Node* Constant(Graph* g, const Tensor& tensor, const string& name) { +Node* Constant(Graph* g, const Tensor& tensor, const std::string& name) { Node* ret; TF_CHECK_OK(NodeBuilder(name, "Const") .Attr("dtype", tensor.dtype()) @@ -85,7 +86,7 @@ Node* HostConstant(Graph* g, const Tensor& tensor) { return HostConstant(g, tensor, g->NewName("n")); } -Node* HostConstant(Graph* g, const Tensor& tensor, const string& name) { +Node* HostConstant(Graph* g, const Tensor& tensor, const std::string& name) { Node* ret; TF_CHECK_OK(NodeBuilder(name, "HostConst") .Attr("dtype", tensor.dtype()) @@ -104,7 +105,7 @@ Node* Var(Graph* g, const DataType dtype, const TensorShape& shape) { } Node* Var(Graph* g, const DataType dtype, const TensorShape& shape, - const string& name) { + const std::string& name) { Node* ret; TF_CHECK_OK(NodeBuilder(name, "Variable") .Attr("dtype", dtype) @@ -134,7 +135,7 @@ Node* Cumsum(Graph* g, Node* data, Node* axes, bool exclusive, bool reverse) { return ret; } -Node* Reduce(Graph* g, const string& reduce, Node* data, Node* axes, +Node* Reduce(Graph* g, const std::string& reduce, Node* data, Node* axes, bool keep_dims) { Node* ret; TF_CHECK_OK(NodeBuilder(g->NewName("n"), reduce, g->op_registry()) @@ -179,7 +180,7 @@ Node* BatchMatmul(Graph* g, Node* in0, Node* in1, bool adj_x, bool adj_y) { return ret; } -Node* RandomNumberGenerator(const string& op, Graph* g, Node* input, +Node* RandomNumberGenerator(const std::string& op, Graph* g, Node* input, DataType dtype) { Node* ret; TF_CHECK_OK(NodeBuilder(g->NewName("n"), op, g->op_registry()) @@ -222,7 +223,7 @@ Node* RandomPoisson(Graph* g, Node* shape, Node* lam) { return ret; } -Node* Unary(Graph* g, const string& func, Node* input, int index) { +Node* Unary(Graph* g, const std::string& func, Node* input, int index) { Node* ret; TF_CHECK_OK(NodeBuilder(g->NewName("n"), func, g->op_registry()) .Input(input, index) @@ -230,7 +231,7 @@ Node* Unary(Graph* g, const string& func, Node* input, int index) { return ret; } -Node* Binary(Graph* g, const string& func, Node* in0, Node* in1) { +Node* Binary(Graph* g, const std::string& func, Node* in0, Node* in1) { Node* ret; TF_CHECK_OK(NodeBuilder(g->NewName("n"), func, g->op_registry()) .Input(in0) @@ -239,7 +240,7 @@ Node* Binary(Graph* g, const string& func, Node* in0, Node* in1) { return ret; } -Node* Multi(Graph* g, const string& func, absl::Span ins) { +Node* Multi(Graph* g, const std::string& func, absl::Span ins) { Node* ret; auto b = NodeBuilder(g->NewName("n"), func, g->op_registry()); for (Node* n : ins) b = b.Input(n); @@ -271,7 +272,7 @@ Node* Roll(Graph* g, Node* input, Node* shift, Node* axis) { return ret; } -Node* Error(Graph* g, Node* input, const string& errmsg, bool log_error) { +Node* Error(Graph* g, Node* input, const std::string& errmsg, bool log_error) { Node* ret; TF_CHECK_OK(NodeBuilder(g->NewName("n"), "Error") .Input(input) @@ -317,7 +318,7 @@ Node* Switch(Graph* g, Node* in0, Node* in1) { return ret; } -Node* Enter(Graph* g, Node* input, const string& frame_name) { +Node* Enter(Graph* g, Node* input, const std::string& frame_name) { Node* ret; TF_CHECK_OK(NodeBuilder(g->NewName("n"), "Enter") .Input(input) @@ -341,11 +342,11 @@ Node* Merge(Graph* g, Node* in0, Node* in1) { return ret; } -Node* Merge(Graph* g, Node* in0, absl::Span remaining_in) { +Node* Merge(Graph* g, Node* in0, absl::Span remaining_in) { std::vector inputs; inputs.reserve(remaining_in.size() + 1); inputs.emplace_back(in0); - for (const string& in_name : remaining_in) { + for (const std::string& in_name : remaining_in) { inputs.emplace_back(in_name, 0, inputs[0].dt); } @@ -383,7 +384,7 @@ Node* ConcatV2(Graph* g, absl::Span tensors, Node* concat_dim) { return ret; } -Node* Next(Graph* g, const string& name, Node* input) { +Node* Next(Graph* g, const std::string& name, Node* input) { Node* ret; TF_CHECK_OK( NodeBuilder(name, "NextIteration").Input(input).Finalize(g, &ret)); @@ -497,7 +498,7 @@ Node* DiagPart(Graph* g, Node* in, DataType type) { return ret; } -Node* CheckNumerics(Graph* g, Node* in, const string& message) { +Node* CheckNumerics(Graph* g, Node* in, const std::string& message) { Node* ret; TF_CHECK_OK(NodeBuilder(g->NewName("n"), "CheckNumerics") .Input(in) diff --git a/tensorflow/core/graph/testlib.h b/tensorflow/core/graph/testlib.h index df7843f884b17d..f4df5a4ed4d038 100644 --- a/tensorflow/core/graph/testlib.h +++ b/tensorflow/core/graph/testlib.h @@ -39,7 +39,7 @@ void ToGraphDef(Graph* g, GraphDef* def); // Adds a node in "g" producing a constant "tensor". Node* Constant(Graph* g, const Tensor& tensor); -Node* Constant(Graph* g, const Tensor& tensor, const string& name); +Node* Constant(Graph* g, const Tensor& tensor, const std::string& name); // Adds a node in "g" producing a constant "tensor" on the host. // The given node which, unlike the regular Constant above, always @@ -47,26 +47,27 @@ Node* Constant(Graph* g, const Tensor& tensor, const string& name); // in GPU tests where the test Op in question runs on the device // but requires some arguments to be pinned to the host. Node* HostConstant(Graph* g, const Tensor& tensor); -Node* HostConstant(Graph* g, const Tensor& tensor, const string& name); +Node* HostConstant(Graph* g, const Tensor& tensor, const std::string& name); // Adds a variable in "g" of the given "shape" and "dtype". Node* Var(Graph* g, DataType dtype, const TensorShape& shape); Node* Var(Graph* g, DataType dtype, const TensorShape& shape, - const string& name); + const std::string& name); // Adds an assign node in "g" which assigns "val" into "var". Node* Assign(Graph* g, Node* var, Node* val); // Adds a send node "g" sending "input" as a named "tensor" from // "sender" to "receiver". -Node* Send(Graph* g, Node* input, const string& tensor, const string& sender, - uint64 sender_incarnation, const string& receiver); +Node* Send(Graph* g, Node* input, const std::string& tensor, + const std::string& sender, uint64_t sender_incarnation, + const std::string& receiver); // Adds a recv node in "g" receiving a named "tensor" from "sender" // to "receiver". -Node* Recv(Graph* g, const string& tensor, const string& type, - const string& sender, uint64 sender_incarnation, - const string& receiver); +Node* Recv(Graph* g, const std::string& tensor, const std::string& type, + const std::string& sender, uint64_t sender_incarnation, + const std::string& receiver); // Adds a cumsum "node" in "g" doing cumsum(data, axes). Node* Cumsum(Graph* g, Node* data, Node* axes, bool exclusive = false, @@ -74,7 +75,7 @@ Node* Cumsum(Graph* g, Node* data, Node* axes, bool exclusive = false, // Adds a reduction "node" in "g" doing sum(data, axes). "reduce" is // a reduction, e.g., Sum, Max, Min, Mean, etc. -Node* Reduce(Graph* g, const string& reduce, Node* data, Node* axes, +Node* Reduce(Graph* g, const std::string& reduce, Node* data, Node* axes, bool keep_dims = false); // Adds a Matmul node in g doing in0.contract(in1). @@ -89,17 +90,17 @@ Node* BatchMatmul(Graph* g, Node* in0, Node* in1, bool adj_x, bool adj_y); Node* QuantizeToUINT8(Graph* g, Node* data); // Adds a unary function "func" "node" in "g" taking "input". -Node* Unary(Graph* g, const string& func, Node* input, int index = 0); +Node* Unary(Graph* g, const std::string& func, Node* input, int index = 0); // Adds an identity node in "g" taking "input" and producing an // identity copy. Node* Identity(Graph* g, Node* input, int index = 0); // Adds a binary function "func" node in "g" taking "in0" and "in1". -Node* Binary(Graph* g, const string& func, Node* in0, Node* in1); +Node* Binary(Graph* g, const std::string& func, Node* in0, Node* in1); // Adds a function "func" node in "g" taking inputs "ins". -Node* Multi(Graph* g, const string& func, absl::Span ins); +Node* Multi(Graph* g, const std::string& func, absl::Span ins); // Adds a binary add node in "g" doing in0 + in1. Node* Add(Graph* g, Node* in0, Node* in1); @@ -131,7 +132,7 @@ Node* TruncatedNormal(Graph* g, Node* input, DataType dtype); // Adds an error node in "g". The node's computation always // generates an error with the given error message "errmsg". -Node* Error(Graph* g, Node* input, const string& errmsg, +Node* Error(Graph* g, Node* input, const std::string& errmsg, bool log_error = false); // Adds a node that generates a invalid ref output. @@ -150,7 +151,7 @@ Node* NoOp(Graph* g, const std::vector& control_inputs); Node* Switch(Graph* g, Node* in0, Node* in1); // Adds an Enter node in "g", which enters a new frame. -Node* Enter(Graph* g, Node* input, const string& frame_name); +Node* Enter(Graph* g, Node* input, const std::string& frame_name); // Adds an Exit node in "g", which exits a frame. Node* Exit(Graph* g, Node* input); @@ -160,11 +161,11 @@ Node* Merge(Graph* g, Node* in0, Node* in1); // Adds a Merge node in "g". The first input is "in0", the remaining // inputs are only given by their names in remaining_in. -Node* Merge(Graph* g, Node* in0, absl::Span remaining_in); +Node* Merge(Graph* g, Node* in0, absl::Span remaining_in); // Adds a NextIteration node in "g", which makes its input available // to the next iteration. -Node* Next(Graph* g, const string& name, Node* input); +Node* Next(Graph* g, const std::string& name, Node* input); // Adds a LoopCond node in "g", representing the "pivot" termination // condition of a loop. @@ -215,7 +216,7 @@ Node* Diag(Graph* g, Node* in, DataType type); Node* DiagPart(Graph* g, Node* in, DataType type); // Add a CheckNumerics node in "g". -Node* CheckNumerics(Graph* g, Node* in, const string& message); +Node* CheckNumerics(Graph* g, Node* in, const std::string& message); // Add an _Arg node in "g". Node* Arg(Graph* g, int64_t index, DataType type); diff --git a/tensorflow/core/graph/validate.cc b/tensorflow/core/graph/validate.cc index 154d9f26c80cf5..4572ceb9de7897 100644 --- a/tensorflow/core/graph/validate.cc +++ b/tensorflow/core/graph/validate.cc @@ -100,7 +100,7 @@ absl::Status ValidateGraphHasNoCycle(const Graph& graph) { } if (processed < graph.num_nodes()) { - std::vector nodes_in_cycle; + std::vector nodes_in_cycle; for (int i = 0; i < pending_count.size() && nodes_in_cycle.size() < 3; ++i) { if (pending_count[i] != 0) { diff --git a/tensorflow/core/graph/validate_test.cc b/tensorflow/core/graph/validate_test.cc index b593a2c9b63c7e..35e7ebb4cff6e0 100644 --- a/tensorflow/core/graph/validate_test.cc +++ b/tensorflow/core/graph/validate_test.cc @@ -38,7 +38,7 @@ REGISTER_OP("FloatInput").Output("o: float"); REGISTER_OP("Int32Input").Output("o: int32"); TEST(ValidateGraphDefTest, TestValidGraph) { - const string graph_def_str = + const std::string graph_def_str = "node { name: 'A' op: 'FloatInput' }" "node { name: 'B' op: 'FloatInput' }" "node { name: 'C' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }" @@ -50,7 +50,7 @@ TEST(ValidateGraphDefTest, TestValidGraph) { } TEST(ValidateGraphDefTest, GraphWithUnspecifiedDefaultAttr) { - const string graph_def_str = + const std::string graph_def_str = "node { name: 'A' op: 'FloatInput' }" "node { name: 'B' op: 'Int32Input' }" "node { " @@ -74,7 +74,7 @@ TEST(ValidateGraphDefTest, GraphWithUnspecifiedDefaultAttr) { TEST(ValidateGraphDefTest, GraphWithUnspecifiedRequiredAttr) { // "DstT" attribute is missing. - const string graph_def_str = + const std::string graph_def_str = "node { name: 'A' op: 'FloatInput' }" "node { " " name: 'B' op: 'Cast' " @@ -102,7 +102,7 @@ TEST(ValidateGraphDefAgainstOpListTest, GraphWithOpOnlyInOpList) { TF_ASSERT_OK(OpDefBuilder("UniqueSnowflake").Finalize(&op_reg_data)); OpList op_list; *op_list.add_op() = op_reg_data.op_def; - const string graph_def_str = "node { name: 'A' op: 'UniqueSnowflake' }"; + const std::string graph_def_str = "node { name: 'A' op: 'UniqueSnowflake' }"; GraphDef graph_def; auto parser = protobuf::TextFormat::Parser(); CHECK(parser.MergeFromString(graph_def_str, &graph_def)) << graph_def_str; @@ -114,7 +114,7 @@ TEST(ValidateGraphDefAgainstOpListTest, GraphWithGlobalOpNotInOpList) { TF_ASSERT_OK(OpDefBuilder("NotAnywhere").Finalize(&op_reg_data)); OpList op_list; *op_list.add_op() = op_reg_data.op_def; - const string graph_def_str = "node { name: 'A' op: 'FloatInput' }"; + const std::string graph_def_str = "node { name: 'A' op: 'FloatInput' }"; GraphDef graph_def; auto parser = protobuf::TextFormat::Parser(); CHECK(parser.MergeFromString(graph_def_str, &graph_def)) << graph_def_str; @@ -150,7 +150,7 @@ TEST(GetOpListForValidationTest, ShouldStripDocs) { } TEST(VerifyNoDuplicateNodeNames, NoDuplicateNodeNames) { - const string graph_def_str = + const std::string graph_def_str = "node { name: 'A' op: 'FloatInput' }" "node { name: 'B' op: 'Int32Input' }" "node { " @@ -165,7 +165,7 @@ TEST(VerifyNoDuplicateNodeNames, NoDuplicateNodeNames) { } TEST(VerifyNoDuplicateNodeNames, DuplicateNodeNames) { - const string graph_def_str = + const std::string graph_def_str = "node { name: 'A' op: 'FloatInput' }" "node { name: 'A' op: 'Int32Input' }" "node { " @@ -181,7 +181,7 @@ TEST(VerifyNoDuplicateNodeNames, DuplicateNodeNames) { } TEST(ValidateGraphHasNoCycleTest, NoCyclePasses) { - const string graph_def_str = + const std::string graph_def_str = "node { name: 'A' op: 'FloatInput' }" "node { name: 'B' op: 'FloatInput' }" "node { name: 'C' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }" @@ -198,7 +198,7 @@ TEST(ValidateGraphHasNoCycleTest, NoCyclePasses) { } TEST(ValidateGraphHasNoCycleTest, NoCycleWithMergePasses) { - const string graph_def_str = + const std::string graph_def_str = R"EOF( node { name: 'A' op: 'FloatInput' } node { name: 'merge' op: 'Merge' input: [ 'A:0', 'next:0' ] @@ -221,8 +221,8 @@ TEST(ValidateGraphHasNoCycleTest, NoCycleWithMergePasses) { TF_EXPECT_OK(graph::ValidateGraphHasNoCycle(graph)); } -Node* AddNodeFromNodeDef(Graph& graph, const string& name, - const string& node_type, int num_inputs) { +Node* AddNodeFromNodeDef(Graph& graph, const std::string& name, + const std::string& node_type, int num_inputs) { auto builder = NodeDefBuilder(name, node_type); for (int i = 0; i < num_inputs; ++i) { builder = builder.Input(absl::StrCat("node_", i), i, DT_FLOAT); diff --git a/tensorflow/core/graph/while_context.h b/tensorflow/core/graph/while_context.h index e23e9df90afd2d..4f15b7d37c7b18 100644 --- a/tensorflow/core/graph/while_context.h +++ b/tensorflow/core/graph/while_context.h @@ -39,7 +39,7 @@ class WhileContext { std::vector body_inputs, std::vector body_outputs); - const string& frame_name() const { return frame_name_; } + const std::string& frame_name() const { return frame_name_; } const std::vector& enter_nodes() const { return enter_nodes_; } const std::vector& exit_nodes() const { return exit_nodes_; } const OutputTensor& cond_output() const { return cond_output_; } @@ -53,7 +53,7 @@ class WhileContext { // uniquely identified by its frame name. Frames are used by the executor to // manage the iterations of a loop. See the FrameState comment in // core/common_runtime/executor.cc for more details. - const string frame_name_; + const std::string frame_name_; // The enter nodes defining the input loop variables to the while loop. This // vector defines the order of the loop variables. diff --git a/tensorflow/core/grappler/graph_analyzer/gen_node.cc b/tensorflow/core/grappler/graph_analyzer/gen_node.cc index 42dffe79ecabe3..1e355d45a91ec1 100644 --- a/tensorflow/core/grappler/graph_analyzer/gen_node.cc +++ b/tensorflow/core/grappler/graph_analyzer/gen_node.cc @@ -34,7 +34,7 @@ GenNode::GenNode(const NodeDef* node) : node_(node), op_(nullptr) {} absl::Status GenNode::BuildGraphInMap(const GraphDef& source, GenNodeMap* map) { for (const auto& n : source.node()) { - const string& name = n.name(); + const std::string& name = n.name(); if (map->find(name) != map->end()) { // This error code looks more meaningful than ALREADY_EXISTS. return absl::Status(absl::StatusCode::kInvalidArgument, @@ -95,7 +95,7 @@ absl::Status GenNode::ParseInputs(const GenNodeMap* map) { for (int i = 0; i < n_inputs; ++i) { int other_position; - string other_name = ParseNodeName(node_->input(i), &other_position); + std::string other_name = ParseNodeName(node_->input(i), &other_position); auto other_it = map->find(other_name); if (other_it == map->end()) { return absl::Status( @@ -138,8 +138,8 @@ bool GenNode::IsMultiInput(Port port) const { return (it->second.size() > 1); } -GenNode::Port::operator string() const { - string result = this->IsInbound() ? "i" : "o"; +GenNode::Port::operator std::string() const { + std::string result = this->IsInbound() ? "i" : "o"; if (this->IsControl()) { result.append("C"); } else { diff --git a/tensorflow/core/grappler/graph_analyzer/gen_node.h b/tensorflow/core/grappler/graph_analyzer/gen_node.h index 57d5f59ec2ccd7..7194a48a6a2538 100644 --- a/tensorflow/core/grappler/graph_analyzer/gen_node.h +++ b/tensorflow/core/grappler/graph_analyzer/gen_node.h @@ -35,7 +35,7 @@ namespace graph_analyzer { class GenNode; // To find nodes by name. -using GenNodeMap = std::unordered_map>; +using GenNodeMap = std::unordered_map>; // One node in the graph, in the form convenient for traversal and generation of // subgraphs. It refers to the original NodeDef protobuf for most information @@ -51,8 +51,8 @@ class GenNode { explicit GenNode(const NodeDef* node); // Access wrappers. - const string& name() const { return node_->name(); } - const string& opcode() const { return node_->op(); } + const std::string& name() const { return node_->name(); } + const std::string& opcode() const { return node_->op(); } const NodeDef* node_def() const { return node_; } // Parse the inputs of this node and update the map accordingly, creating the @@ -111,7 +111,7 @@ class GenNode { // Convenient for printing. I've really wanted it to be implicit but // ClangTidy insists on making it explicit. - explicit operator string() const; + explicit operator std::string() const; private: explicit Port(IntPort value) : value_(value) {} diff --git a/tensorflow/core/grappler/graph_analyzer/graph_analyzer.cc b/tensorflow/core/grappler/graph_analyzer/graph_analyzer.cc index 2d47abda617615..dde0fb720c0170 100644 --- a/tensorflow/core/grappler/graph_analyzer/graph_analyzer.cc +++ b/tensorflow/core/grappler/graph_analyzer/graph_analyzer.cc @@ -315,16 +315,16 @@ absl::Status GraphAnalyzer::CollateResult() { return absl::OkStatus(); } -std::vector GraphAnalyzer::DumpRawSubgraphs() { - std::vector result; +std::vector GraphAnalyzer::DumpRawSubgraphs() { + std::vector result; for (const auto& it : result_) { result.emplace_back(it->Dump()); } return result; } -std::vector GraphAnalyzer::DumpSubgraphs() { - std::vector result; +std::vector GraphAnalyzer::DumpSubgraphs() { + std::vector result; for (auto ptr : ordered_collation_) { result.emplace_back( absl::StrFormat("%d %s", ptr->count, ptr->sig->ToString())); diff --git a/tensorflow/core/grappler/graph_analyzer/graph_analyzer.h b/tensorflow/core/grappler/graph_analyzer/graph_analyzer.h index 9a321e69b531fb..be46b6843225a6 100644 --- a/tensorflow/core/grappler/graph_analyzer/graph_analyzer.h +++ b/tensorflow/core/grappler/graph_analyzer/graph_analyzer.h @@ -46,7 +46,7 @@ class GraphAnalyzer { absl::Status Run(); // Returns the subgraphs found in Run() printed to text. - std::vector DumpSubgraphs(); + std::vector DumpSubgraphs(); // Prints the subgraphs found in Run() to stdout. absl::Status OutputSubgraphs(); @@ -78,7 +78,7 @@ class GraphAnalyzer { absl::Status CollateResult(); // Returns the raw subgraphs found in FindSubgraphs() printed to text. - std::vector DumpRawSubgraphs(); + std::vector DumpRawSubgraphs(); // Finds and adds appropriately to either partial_ or result_ all the // subgraphs that can be created by extending the parent subgraph by one node. diff --git a/tensorflow/core/grappler/graph_analyzer/graph_analyzer_test.cc b/tensorflow/core/grappler/graph_analyzer/graph_analyzer_test.cc index 9822f5446f4b39..4e9220d3a5c7e5 100644 --- a/tensorflow/core/grappler/graph_analyzer/graph_analyzer_test.cc +++ b/tensorflow/core/grappler/graph_analyzer/graph_analyzer_test.cc @@ -56,10 +56,12 @@ class GraphAnalyzerTest : public ::testing::Test, protected TestGraphs { gran_->ExtendSubgraphAllOrNone(parent, node); } - std::vector DumpRawSubgraphs() { return gran_->DumpRawSubgraphs(); } + std::vector DumpRawSubgraphs() { + return gran_->DumpRawSubgraphs(); + } - std::vector DumpPartials() { - std::vector result; + std::vector DumpPartials() { + std::vector result; for (const auto& it : gran_->partial_) { result.emplace_back(it->Dump()); } @@ -68,7 +70,9 @@ class GraphAnalyzerTest : public ::testing::Test, protected TestGraphs { const GenNodeMap& GetNodes() { return gran_->nodes_; } - GenNode* GetNode(const string& name) { return gran_->nodes_.at(name).get(); } + GenNode* GetNode(const std::string& name) { + return gran_->nodes_.at(name).get(); + } SubgraphPtrSet& GetResult() { return gran_->result_; } SubgraphPtrSet& GetPartial() { return gran_->partial_; } diff --git a/tensorflow/core/grappler/graph_analyzer/graph_analyzer_tool.cc b/tensorflow/core/grappler/graph_analyzer/graph_analyzer_tool.cc index 72662005ecdec7..0b1b3af2ea5571 100644 --- a/tensorflow/core/grappler/graph_analyzer/graph_analyzer_tool.cc +++ b/tensorflow/core/grappler/graph_analyzer/graph_analyzer_tool.cc @@ -31,7 +31,7 @@ namespace grappler { namespace graph_analyzer { // Dies on failure. -static void LoadModel(const string& filename, +static void LoadModel(const std::string& filename, tensorflow::MetaGraphDef* metagraph) { LOG(INFO) << "Loading model from " << filename; absl::Status st; @@ -49,7 +49,7 @@ static void LoadModel(const string& filename, // of train ops (if provided). void MaybePruneGraph(const tensorflow::MetaGraphDef& metagraph, tensorflow::GraphDef* graph) { - std::vector fetch_nodes; + std::vector fetch_nodes; for (const auto& fetch : metagraph.collection_def().at("train_op").node_list().value()) { LOG(INFO) << "Fetch node: " << fetch; @@ -72,7 +72,7 @@ void MaybePruneGraph(const tensorflow::MetaGraphDef& metagraph, } } -void GraphAnalyzerTool(const string& file_name, int n) { +void GraphAnalyzerTool(const std::string& file_name, int n) { if (n < 1) { LOG(FATAL) << "Invalid subgraph size " << n << ", must be at least 1"; } diff --git a/tensorflow/core/grappler/graph_analyzer/graph_analyzer_tool.h b/tensorflow/core/grappler/graph_analyzer/graph_analyzer_tool.h index 5a91fe7dc8eb7d..85f75706acf4cb 100644 --- a/tensorflow/core/grappler/graph_analyzer/graph_analyzer_tool.h +++ b/tensorflow/core/grappler/graph_analyzer/graph_analyzer_tool.h @@ -22,7 +22,7 @@ namespace tensorflow { namespace grappler { namespace graph_analyzer { -void GraphAnalyzerTool(const string& file_name, int n); +void GraphAnalyzerTool(const std::string& file_name, int n); } // end namespace graph_analyzer } // end namespace grappler diff --git a/tensorflow/core/grappler/graph_analyzer/sig_node.cc b/tensorflow/core/grappler/graph_analyzer/sig_node.cc index 9210bf56b8047b..123bd0f060bccf 100644 --- a/tensorflow/core/grappler/graph_analyzer/sig_node.cc +++ b/tensorflow/core/grappler/graph_analyzer/sig_node.cc @@ -99,7 +99,7 @@ void SigNode::ComputeTopoHash0() { last_hashed_nodes_ = next_hashed_nodes_ = node_mask_; // TODO(babkin): include the attributes too, as an option. - size_t hval = std::hash()(opcode()); + size_t hval = std::hash()(opcode()); // Getting the topology of the links in to the hash early should get more // conflicts resolved early. @@ -208,8 +208,8 @@ bool SigNode::operator==(const SigNode& other) const { constexpr int Signature::kMaxGraphSize; -string Signature::ToString() const { - string result; +std::string Signature::ToString() const { + std::string result; for (size_t n = 0; n < nodes.size(); ++n) { // TODO(babkin): add attributes too. result += absl::StrFormat("%d:%s", n, nodes[n]->opcode()); @@ -219,9 +219,9 @@ string Signature::ToString() const { // The link entries are already sorted, by tags and then by the // node ranks. if (link.tag.local.IsInbound()) { - result += - absl::StrFormat("[%s:%s:%d]", string(link.tag.local), - string(link.tag.remote), entry.peer->unique_rank_); + result += absl::StrFormat("[%s:%s:%d]", std::string(link.tag.local), + std::string(link.tag.remote), + entry.peer->unique_rank_); } } result.push_back(','); diff --git a/tensorflow/core/grappler/graph_analyzer/sig_node.h b/tensorflow/core/grappler/graph_analyzer/sig_node.h index 6c0731ebb92b54..2caaf605615796 100644 --- a/tensorflow/core/grappler/graph_analyzer/sig_node.h +++ b/tensorflow/core/grappler/graph_analyzer/sig_node.h @@ -42,7 +42,7 @@ class SigNode; // To find nodes by name. Having the map ordered makes the tests easier, // and it isn't used in production code often enough to get any win from // using an unordered map. -using SigNodeMap = std::map>; +using SigNodeMap = std::map>; // One node in the graph, in the form convenient for generation of the signature // of the graph, and comparison of two (sub)graphs for equivalence. It refers to @@ -61,8 +61,8 @@ class SigNode { explicit SigNode(const NodeDef* node); // Access wrappers. - const string& name() const { return node_->name(); } - const string& opcode() const { return node_->op(); } + const std::string& name() const { return node_->name(); } + const std::string& opcode() const { return node_->op(); } const NodeDef* node_def() const { return node_; } // For extraction of subgraphs into a separate SigNodeMap, copies the links @@ -261,7 +261,7 @@ struct Signature { absl::Status Compute(); // Convert the computed signature to a string representation. - string ToString() const; + std::string ToString() const; SigNodeMap map; // The nodes in the graph, accessible by name. size_t sig_short = 0; // Hash of the signature, for the quick equality check. diff --git a/tensorflow/core/grappler/graph_analyzer/sig_node_test.cc b/tensorflow/core/grappler/graph_analyzer/sig_node_test.cc index 6f38b4dc8b075c..56980ccedf459c 100644 --- a/tensorflow/core/grappler/graph_analyzer/sig_node_test.cc +++ b/tensorflow/core/grappler/graph_analyzer/sig_node_test.cc @@ -312,7 +312,7 @@ TEST_F(SigNodeTest, ComputeTopoHash0) { EXPECT_THAT(RefNextHashedNodes(&sn1), Eq(0x02)); EXPECT_THAT(RefTopoHash(&sn1), SizeIs(1)); - size_t exp_hval = std::hash()(sn1.opcode()); + size_t exp_hval = std::hash()(sn1.opcode()); CombineHash(1, &exp_hval); CombineHash(1, &exp_hval); CombineHash(2, &exp_hval); @@ -640,14 +640,14 @@ class SignatureTest : public SigBaseTest { std::vector countdown; InitPermutation(graph_size, &plain_permutation, &countdown); - std::set signatures; + std::set signatures; std::vector permutation; do { BuildPermutation(plain_permutation, countdown, &permutation); constexpr bool kDebugPermutation = false; if (kDebugPermutation) { - string p; + std::string p; for (int i = 0; i < permutation.size(); ++i) { p.push_back('0' + permutation[i]); } @@ -1070,7 +1070,7 @@ TEST_F(SignatureTest, OrderLinks) { } // How it was ordered in the original graph. - string before = sig_.ToString(); + std::string before = sig_.ToString(); // clang-format off EXPECT_THAT(before, Eq( "0:Mul[i0:o0:5][i0:o0:4][i0:o1:4][i0:o2:3][i0:o2:2][i0:o3:2]," @@ -1084,7 +1084,7 @@ TEST_F(SignatureTest, OrderLinks) { OrderLinks(&sig_); - string after = sig_.ToString(); + std::string after = sig_.ToString(); // clang-format off EXPECT_THAT(after, Eq( "0:Mul[i0:o0:4][i0:o0:5][i0:o1:4][i0:o2:2][i0:o2:3][i0:o3:2]," @@ -1132,7 +1132,7 @@ TEST_F(SignatureTest, ToString) { RefHashIsFinal(sig_.nodes[i]) = true; } - string result = sig_.ToString(); + std::string result = sig_.ToString(); // clang-format off ASSERT_THAT(result, Eq( @@ -1151,14 +1151,14 @@ TEST_F(SignatureTest, Permutation) { std::vector countdown; InitPermutation(5, &plain_permutation, &countdown); - std::set results; + std::set results; std::vector permutation; do { BuildPermutation(plain_permutation, countdown, &permutation); EXPECT_THAT(permutation, SizeIs(5)); - string p; + std::string p; for (int i = 0; i < permutation.size(); ++i) { p.push_back('0' + permutation[i]); } diff --git a/tensorflow/core/grappler/graph_analyzer/subgraph.cc b/tensorflow/core/grappler/graph_analyzer/subgraph.cc index cfa26f243b20df..c08f23d97468cf 100644 --- a/tensorflow/core/grappler/graph_analyzer/subgraph.cc +++ b/tensorflow/core/grappler/graph_analyzer/subgraph.cc @@ -81,9 +81,9 @@ size_t Subgraph::Identity::Hash() const { return result; } -string Subgraph::Dump() { +std::string Subgraph::Dump() { // TODO(babkin): this is simplified for now. - std::vector nodes; + std::vector nodes; for (const auto& n : id_) { if (specific_) { nodes.emplace_back(absl::StrFormat("%s(%s)", n->opcode(), n->name())); diff --git a/tensorflow/core/grappler/graph_analyzer/subgraph.h b/tensorflow/core/grappler/graph_analyzer/subgraph.h index 7d3494cdc43540..140d7d626d8030 100644 --- a/tensorflow/core/grappler/graph_analyzer/subgraph.h +++ b/tensorflow/core/grappler/graph_analyzer/subgraph.h @@ -64,7 +64,7 @@ class Subgraph { size_t Hash() const { return hash_; } // Dump the subgraph information to a string. - string Dump(); + std::string Dump(); // Extract this subgraph into a separate graph representation for signature // building, that includes only the links between the nodes in the subgraph diff --git a/tensorflow/core/grappler/graph_analyzer/subgraph_test.cc b/tensorflow/core/grappler/graph_analyzer/subgraph_test.cc index da29e6cff5d803..2d6849cafbcb57 100644 --- a/tensorflow/core/grappler/graph_analyzer/subgraph_test.cc +++ b/tensorflow/core/grappler/graph_analyzer/subgraph_test.cc @@ -120,15 +120,15 @@ TEST(SubgraphTest, Iteration) { { SubgraphIterator sit(&sg); SubgraphIterator sit2(&sg); - std::vector links; + std::vector links; for (; !sit.AtEnd(); sit.Next()) { EXPECT_TRUE(sit == sit2); sit2.Next(); EXPECT_FALSE(sit == sit2); - links.push_back(absl::StrFormat("[%s,%s,%s]", string(sit.GetPort()), + links.push_back(absl::StrFormat("[%s,%s,%s]", std::string(sit.GetPort()), sit.GetNeighbor().node->name(), - string(sit.GetNeighbor().port))); + std::string(sit.GetNeighbor().port))); } EXPECT_TRUE(sit == sit2); diff --git a/tensorflow/core/grappler/graph_analyzer/test_tools.cc b/tensorflow/core/grappler/graph_analyzer/test_tools.cc index fe24424d81cd1b..f9cc5cda65a40d 100644 --- a/tensorflow/core/grappler/graph_analyzer/test_tools.cc +++ b/tensorflow/core/grappler/graph_analyzer/test_tools.cc @@ -33,15 +33,15 @@ namespace test { //=== Helper methods to construct the nodes. -NodeDef MakeNodeConst(const string& name) { +NodeDef MakeNodeConst(const std::string& name) { NodeDef n; n.set_name(name); n.set_op("Const"); return n; } -NodeDef MakeNode2Arg(const string& name, const string& opcode, - const string& arg1, const string& arg2) { +NodeDef MakeNode2Arg(const std::string& name, const std::string& opcode, + const std::string& arg1, const std::string& arg2) { NodeDef n; n.set_name(name); n.set_op(opcode); @@ -50,9 +50,9 @@ NodeDef MakeNode2Arg(const string& name, const string& opcode, return n; } -NodeDef MakeNode4Arg(const string& name, const string& opcode, - const string& arg1, const string& arg2, const string& arg3, - const string& arg4) { +NodeDef MakeNode4Arg(const std::string& name, const std::string& opcode, + const std::string& arg1, const std::string& arg2, + const std::string& arg3, const std::string& arg4) { NodeDef n; n.set_name(name); n.set_op(opcode); @@ -64,45 +64,47 @@ NodeDef MakeNode4Arg(const string& name, const string& opcode, } // Not really a 2-argument but convenient to construct. -NodeDef MakeNodeShapeN(const string& name, const string& arg1, - const string& arg2) { +NodeDef MakeNodeShapeN(const std::string& name, const std::string& arg1, + const std::string& arg2) { // This opcode is multi-input but not commutative. return MakeNode2Arg(name, "ShapeN", arg1, arg2); } // Not really a 2-argument but convenient to construct. -NodeDef MakeNodeIdentityN(const string& name, const string& arg1, - const string& arg2) { +NodeDef MakeNodeIdentityN(const std::string& name, const std::string& arg1, + const std::string& arg2) { // The argument is of a list type. return MakeNode2Arg(name, "IdentityN", arg1, arg2); } -NodeDef MakeNodeQuantizedConcat(const string& name, const string& arg1, - const string& arg2, const string& arg3, - const string& arg4) { +NodeDef MakeNodeQuantizedConcat(const std::string& name, + const std::string& arg1, + const std::string& arg2, + const std::string& arg3, + const std::string& arg4) { // This opcode has multiple multi-inputs. return MakeNode4Arg(name, "QuantizedConcat", arg1, arg2, arg3, arg4); } //=== Helper methods for analysing the structures. -std::vector DumpLinkMap(const GenNode::LinkMap& link_map) { +std::vector DumpLinkMap(const GenNode::LinkMap& link_map) { // This will order the entries first. - std::map ordered; + std::map ordered; for (const auto& link : link_map) { - string key = string(link.first); + std::string key = std::string(link.first); // Order the other sides too. They may be repeating, so store them // in a multiset. - std::multiset others; + std::multiset others; for (const auto& other : link.second) { - others.emplace( - absl::StrFormat("%s[%s]", other.node->name(), string(other.port))); + others.emplace(absl::StrFormat("%s[%s]", other.node->name(), + std::string(other.port))); } ordered[key] = absl::StrJoin(others, ", "); } // Now dump the result in a predictable order. - std::vector result; + std::vector result; result.reserve(ordered.size()); for (const auto& link : ordered) { result.emplace_back(link.first + ": " + link.second); @@ -110,7 +112,8 @@ std::vector DumpLinkMap(const GenNode::LinkMap& link_map) { return result; } -std::vector DumpLinkHashMap(const SigNode::LinkHashMap& link_hash_map) { +std::vector DumpLinkHashMap( + const SigNode::LinkHashMap& link_hash_map) { // The entries in this map are ordered by hash value which might change // at any point. Re-order them by the link tag. std::map tags; @@ -118,23 +121,24 @@ std::vector DumpLinkHashMap(const SigNode::LinkHashMap& link_hash_map) { tags[entry.second.tag] = entry.first; } - std::vector result; + std::vector result; for (const auto& id : tags) { // For predictability, the nodes need to be sorted. - std::vector nodes; + std::vector nodes; for (const auto& peer : link_hash_map.at(id.second).peers) { nodes.emplace_back(peer->name()); } std::sort(nodes.begin(), nodes.end()); - result.emplace_back(string(id.first.local) + ":" + string(id.first.remote) + - ": " + absl::StrJoin(nodes, ", ")); + result.emplace_back(std::string(id.first.local) + ":" + + std::string(id.first.remote) + ": " + + absl::StrJoin(nodes, ", ")); } return result; } -std::vector DumpHashedPeerVector( +std::vector DumpHashedPeerVector( const SigNode::HashedPeerVector& hashed_peers) { - std::vector result; + std::vector result; // Each subset of nodes with the same hash has to be sorted by name. // Other than that, the vector is already ordered by full tags. diff --git a/tensorflow/core/grappler/graph_analyzer/test_tools.h b/tensorflow/core/grappler/graph_analyzer/test_tools.h index e53c8e9b198cb0..89c6f146e0ab01 100644 --- a/tensorflow/core/grappler/graph_analyzer/test_tools.h +++ b/tensorflow/core/grappler/graph_analyzer/test_tools.h @@ -33,47 +33,49 @@ namespace test { //=== Helper methods to construct the nodes. -NodeDef MakeNodeConst(const string& name); +NodeDef MakeNodeConst(const std::string& name); -NodeDef MakeNode2Arg(const string& name, const string& opcode, - const string& arg1, const string& arg2); +NodeDef MakeNode2Arg(const std::string& name, const std::string& opcode, + const std::string& arg1, const std::string& arg2); -NodeDef MakeNode4Arg(const string& name, const string& opcode, - const string& arg1, const string& arg2, const string& arg3, - const string& arg4); +NodeDef MakeNode4Arg(const std::string& name, const std::string& opcode, + const std::string& arg1, const std::string& arg2, + const std::string& arg3, const std::string& arg4); -inline NodeDef MakeNodeMul(const string& name, const string& arg1, - const string& arg2) { +inline NodeDef MakeNodeMul(const std::string& name, const std::string& arg1, + const std::string& arg2) { return MakeNode2Arg(name, "Mul", arg1, arg2); } // Not really a 2-argument but convenient to construct. -inline NodeDef MakeNodeAddN(const string& name, const string& arg1, - const string& arg2) { +inline NodeDef MakeNodeAddN(const std::string& name, const std::string& arg1, + const std::string& arg2) { return MakeNode2Arg(name, "AddN", arg1, arg2); } -inline NodeDef MakeNodeSub(const string& name, const string& arg1, - const string& arg2) { +inline NodeDef MakeNodeSub(const std::string& name, const std::string& arg1, + const std::string& arg2) { return MakeNode2Arg(name, "Sub", arg1, arg2); } // Has 2 honest outputs. -inline NodeDef MakeNodeBroadcastGradientArgs(const string& name, - const string& arg1, - const string& arg2) { +inline NodeDef MakeNodeBroadcastGradientArgs(const std::string& name, + const std::string& arg1, + const std::string& arg2) { return MakeNode2Arg(name, "BroadcastGradientArgs", arg1, arg2); } -NodeDef MakeNodeShapeN(const string& name, const string& arg1, - const string& arg2); +NodeDef MakeNodeShapeN(const std::string& name, const std::string& arg1, + const std::string& arg2); -NodeDef MakeNodeIdentityN(const string& name, const string& arg1, - const string& arg2); +NodeDef MakeNodeIdentityN(const std::string& name, const std::string& arg1, + const std::string& arg2); -NodeDef MakeNodeQuantizedConcat(const string& name, const string& arg1, - const string& arg2, const string& arg3, - const string& arg4); +NodeDef MakeNodeQuantizedConcat(const std::string& name, + const std::string& arg1, + const std::string& arg2, + const std::string& arg3, + const std::string& arg4); //=== A container of pre-constructed graphs. @@ -106,12 +108,13 @@ class TestGraphs { //=== Helper methods for analysing the structures. -std::vector DumpLinkMap(const GenNode::LinkMap& link_map); +std::vector DumpLinkMap(const GenNode::LinkMap& link_map); // Also checks for the consistency of hash values. -std::vector DumpLinkHashMap(const SigNode::LinkHashMap& link_hash_map); +std::vector DumpLinkHashMap( + const SigNode::LinkHashMap& link_hash_map); -std::vector DumpHashedPeerVector( +std::vector DumpHashedPeerVector( const SigNode::HashedPeerVector& hashed_peers); } // end namespace test diff --git a/tensorflow/core/grappler/inputs/file_input_yielder.cc b/tensorflow/core/grappler/inputs/file_input_yielder.cc index 5d3e91d8dccee1..87fc1d1f141b2e 100644 --- a/tensorflow/core/grappler/inputs/file_input_yielder.cc +++ b/tensorflow/core/grappler/inputs/file_input_yielder.cc @@ -38,7 +38,7 @@ limitations under the License. namespace tensorflow { namespace grappler { -FileInputYielder::FileInputYielder(const std::vector& filenames, +FileInputYielder::FileInputYielder(const std::vector& filenames, size_t max_iterations) : filenames_(filenames), current_file_(0), @@ -64,7 +64,7 @@ bool FileInputYielder::NextItem(GrapplerItem* item) { } } - const string& filename = filenames_[current_file_]; + const std::string& filename = filenames_[current_file_]; ++current_file_; if (!Env::Default()->FileExists(filename).ok()) { @@ -97,12 +97,12 @@ bool FileInputYielder::NextItem(GrapplerItem* item) { metagraph = MetaGraphDef(); return NextItem(item); } else { - std::unordered_set train_ops; - for (const string& val : + std::unordered_set train_ops; + for (const std::string& val : metagraph.collection_def().at("train_op").node_list().value()) { train_ops.insert(NodeName(val)); } - std::unordered_set train_ops_found; + std::unordered_set train_ops_found; for (auto& node : metagraph.graph_def().node()) { if (train_ops.find(node.name()) != train_ops.end()) { train_ops_found.insert(node.name()); @@ -120,7 +120,8 @@ bool FileInputYielder::NextItem(GrapplerItem* item) { } } - const string id = absl::StrCat(Fingerprint64(metagraph.SerializeAsString())); + const std::string id = + absl::StrCat(Fingerprint64(metagraph.SerializeAsString())); ItemConfig cfg; std::unique_ptr new_item = diff --git a/tensorflow/core/grappler/inputs/file_input_yielder.h b/tensorflow/core/grappler/inputs/file_input_yielder.h index f3e9ecb677fdf8..ac1fdb7ac604bc 100644 --- a/tensorflow/core/grappler/inputs/file_input_yielder.h +++ b/tensorflow/core/grappler/inputs/file_input_yielder.h @@ -37,12 +37,12 @@ class FileInputYielder : public InputYielder { // Iterates over the files specified in the list of 'filename' up to // 'max_iterations' times. explicit FileInputYielder( - const std::vector& filenames, + const std::vector& filenames, size_t max_iterations = std::numeric_limits::max()); bool NextItem(GrapplerItem* item) override; private: - const std::vector filenames_; + const std::vector filenames_; size_t current_file_; size_t current_iteration_; size_t max_iterations_; diff --git a/tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.cc b/tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.cc index 7f39582ba663f0..f496d48e28af82 100644 --- a/tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.cc +++ b/tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.cc @@ -44,7 +44,7 @@ namespace grappler { namespace { GraphDef CreateGraphDef(int num_stages, int width, int tensor_size, bool use_multiple_devices, bool insert_queue, - const std::vector& device_names) { + const std::vector& device_names) { using namespace ::tensorflow::ops; // NOLINT(build/namespaces) tensorflow::Scope s = tensorflow::Scope::NewRootScope(); @@ -102,7 +102,7 @@ GraphDef CreateGraphDef(int num_stages, int width, int tensor_size, TrivialTestGraphInputYielder::TrivialTestGraphInputYielder( int num_stages, int width, int tensor_size, bool insert_queue, - const std::vector& device_names) + const std::vector& device_names) : num_stages_(num_stages), width_(width), tensor_size_(tensor_size), diff --git a/tensorflow/core/grappler/inputs/utils.cc b/tensorflow/core/grappler/inputs/utils.cc index 294bb2cead1111..6c6d3be7a25515 100644 --- a/tensorflow/core/grappler/inputs/utils.cc +++ b/tensorflow/core/grappler/inputs/utils.cc @@ -28,21 +28,22 @@ limitations under the License. namespace tensorflow { namespace grappler { -bool FilesExist(const std::vector& files, +bool FilesExist(const std::vector& files, std::vector* status) { return Env::Default()->FilesExist(files, status); } -bool FilesExist(const std::set& files) { - return FilesExist(std::vector(files.begin(), files.end()), nullptr); +bool FilesExist(const std::set& files) { + return FilesExist(std::vector(files.begin(), files.end()), + nullptr); } -bool FileExists(const string& file, absl::Status* status) { +bool FileExists(const std::string& file, absl::Status* status) { *status = Env::Default()->FileExists(file); return status->ok(); } -absl::Status ReadGraphDefFromFile(const string& graph_def_path, +absl::Status ReadGraphDefFromFile(const std::string& graph_def_path, GraphDef* result) { absl::Status status; if (!ReadBinaryProto(Env::Default(), graph_def_path, result).ok()) { @@ -51,7 +52,7 @@ absl::Status ReadGraphDefFromFile(const string& graph_def_path, return status; } -absl::Status ReadMetaGraphDefFromFile(const string& graph_def_path, +absl::Status ReadMetaGraphDefFromFile(const std::string& graph_def_path, MetaGraphDef* result) { absl::Status status; if (!ReadBinaryProto(Env::Default(), graph_def_path, result).ok()) { diff --git a/tensorflow/core/grappler/inputs/utils.h b/tensorflow/core/grappler/inputs/utils.h index 9caefcd836c171..50a35211149f15 100644 --- a/tensorflow/core/grappler/inputs/utils.h +++ b/tensorflow/core/grappler/inputs/utils.h @@ -29,18 +29,18 @@ limitations under the License. namespace tensorflow { namespace grappler { -bool FilesExist(const std::vector& files, +bool FilesExist(const std::vector& files, std::vector* status = nullptr); -bool FilesExist(const std::set& files); +bool FilesExist(const std::set& files); -bool FileExists(const string& file, absl::Status* status); +bool FileExists(const std::string& file, absl::Status* status); // Reads GraphDef from file in either text or raw serialized format. -absl::Status ReadGraphDefFromFile(const string& graph_def_path, +absl::Status ReadGraphDefFromFile(const std::string& graph_def_path, GraphDef* result); // Reads MetaGraphDef from file in either text or raw serialized format. -absl::Status ReadMetaGraphDefFromFile(const string& meta_graph_def_path, +absl::Status ReadMetaGraphDefFromFile(const std::string& meta_graph_def_path, MetaGraphDef* result); } // end namespace grappler diff --git a/tensorflow/core/grappler/inputs/utils_test.cc b/tensorflow/core/grappler/inputs/utils_test.cc index b32229a051fa86..ff2e14fc930244 100644 --- a/tensorflow/core/grappler/inputs/utils_test.cc +++ b/tensorflow/core/grappler/inputs/utils_test.cc @@ -34,7 +34,7 @@ namespace { class UtilsTest : public ::testing::Test { protected: - string BaseDir() { return io::JoinPath(testing::TmpDir(), "base_dir"); } + std::string BaseDir() { return io::JoinPath(testing::TmpDir(), "base_dir"); } void SetUp() override { TF_CHECK_OK(env_->CreateDir(BaseDir())); @@ -70,24 +70,24 @@ class UtilsTest : public ::testing::Test { GraphDef graph_def_; MetaGraphDef meta_graph_def_; - string non_existent_file_; - string actual_file_; - string text_graph_def_file_; - string binary_graph_def_file_; - string text_meta_graph_def_file_; - string binary_meta_graph_def_file_; + std::string non_existent_file_; + std::string actual_file_; + std::string text_graph_def_file_; + std::string binary_graph_def_file_; + std::string text_meta_graph_def_file_; + std::string binary_meta_graph_def_file_; Env* env_ = Env::Default(); }; TEST_F(UtilsTest, FilesExist) { - EXPECT_FALSE(FilesExist(std::vector{{non_existent_file_}})); - EXPECT_FALSE( - FilesExist(std::vector{{non_existent_file_}, {actual_file_}})); - EXPECT_TRUE(FilesExist(std::vector{{actual_file_}})); + EXPECT_FALSE(FilesExist(std::vector{{non_existent_file_}})); + EXPECT_FALSE(FilesExist( + std::vector{{non_existent_file_}, {actual_file_}})); + EXPECT_TRUE(FilesExist(std::vector{{actual_file_}})); std::vector status; EXPECT_FALSE(FilesExist( - std::vector{{non_existent_file_}, {actual_file_}}, &status)); + std::vector{{non_existent_file_}, {actual_file_}}, &status)); EXPECT_EQ(status.size(), 2); EXPECT_FALSE(status[0].ok()); EXPECT_TRUE(status[1].ok()); diff --git a/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc b/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc index 60d08603d472c8..b5e7922697e4ad 100644 --- a/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc +++ b/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc @@ -90,12 +90,12 @@ std::pair GetDeviceGPUArch( } int major, minor; - if (!strings::safe_strto32(split_arch_str[0], &major)) { + if (!absl::SimpleAtoi(split_arch_str[0], &major)) { return {0, 0}; } if (split_arch_str.size() > 1) { - if (strings::safe_strto32(split_arch_str[1], &minor)) { + if (absl::SimpleAtoi(split_arch_str[1], &minor)) { return {major, minor}; } else { return {0, 0}; diff --git a/tensorflow/core/grappler/optimizers/auto_mixed_precision_test.cc b/tensorflow/core/grappler/optimizers/auto_mixed_precision_test.cc index fe7d4eb4f33f67..1f4943889cc06b 100644 --- a/tensorflow/core/grappler/optimizers/auto_mixed_precision_test.cc +++ b/tensorflow/core/grappler/optimizers/auto_mixed_precision_test.cc @@ -72,7 +72,7 @@ Tensor GenerateRandomTensorInRange(const TensorShape& shape, double minval, void VerifyGraphsEquivalent(const GraphDef& original_graph, const GraphDef& optimized_graph, - const string& func) { + const std::string& func) { EXPECT_EQ(original_graph.node_size(), optimized_graph.node_size()) << func; GraphView optimized_view(&optimized_graph); for (int i = 0; i < original_graph.node_size(); ++i) { @@ -146,10 +146,10 @@ class AutoMixedPrecisionTest : public GrapplerTest { void TearDown() override { TF_CHECK_OK(virtual_cluster_->Shutdown()); } - NodeDef* AddSimpleNode(const string& name, const string& op, - const std::vector& inputs, + NodeDef* AddSimpleNode(const std::string& name, const std::string& op, + const std::vector& inputs, GraphDef* graph) const { - std::vector> attributes; + std::vector> attributes; if (op == "AddN" || op == "ShapeN") { AttrValue num_inputs; num_inputs.set_i(inputs.size()); @@ -203,7 +203,8 @@ class AutoMixedPrecisionTest : public GrapplerTest { TF_CHECK_OK(s.ToGraphDef(&item.graph)); auto input_tensor = GenerateRandomTensorInRange( TensorShape({size, size}), input_min, input_max); - std::vector> feed = {{"input", input_tensor}}; + std::vector> feed = { + {"input", input_tensor}}; auto tensors_expected = EvaluateNodes(item.graph, item.fetch, feed); AutoMixedPrecision optimizer(mode_); @@ -564,7 +565,7 @@ TEST_P(AutoMixedPrecisionParamTest, PreserveIdentityAfterVariable) { TF_CHECK_OK(s.ToGraphDef(&item.graph)); auto var1_tensor = GenerateConstantTensor(TensorShape({32, 32}), 3.141593f); - std::vector> feed = {{"var1", var1_tensor}}; + std::vector> feed = {{"var1", var1_tensor}}; auto tensors_expected = EvaluateNodes(item.graph, item.fetch, feed); AutoMixedPrecision optimizer(mode_); @@ -1035,7 +1036,7 @@ TEST_P(AutoMixedPrecisionParamTest, TensorListThroughFunction) { // A separate Tensor List cluster is added to test that it is still changed to // DT_HALF. FunctionDefLibrary function_lib; - const Tensor kShape = test::AsTensor({32, 32}); + const Tensor kShape = test::AsTensor({32, 32}); FunctionDef func1 = FunctionDefHelper::Define( "Func1", {"ihandle: variant", "x: float"}, {"ohandle: variant", "y: float"}, {}, @@ -1120,7 +1121,7 @@ int GetCudaVersion(const Cluster& cluster) { const auto& device_env = device_properties.environment(); auto it = device_env.find("cuda"); if (it != device_env.end()) { - string cuda_version_str = it->second; + std::string cuda_version_str = it->second; return std::stoi(cuda_version_str); } } @@ -1407,7 +1408,7 @@ TEST_F(AutoMixedPrecisionCpuTest, MixedFanout) { class AutoMixedPrecisionSimulateGpuTest : public GrapplerTest { protected: void SetUp() override { - std::unordered_map devices; + std::unordered_map devices; DeviceProperties cpu_device; cpu_device.set_type("CPU"); cpu_device.set_frequency(1000); diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index 857e33bf028c82..8f3603829ffb46 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -95,11 +95,11 @@ class ConstantFoldingTest : public GrapplerTest { TF_EXPECT_OK(status); EXPECT_EQ(7, output.node_size()); - const string snapshot_or_identity = + const std::string snapshot_or_identity = use_snapshot ? "Snapshot" : "Identity"; for (int i = 0; i < output.node_size(); ++i) { const NodeDef& node = output.node(i); - const string& name = node.name(); + const std::string& name = node.name(); if (name == "mul1") { EXPECT_EQ("Const", node.op()); EXPECT_EQ("^x", node.input(0)); @@ -220,7 +220,7 @@ class ConstantFoldingTest : public GrapplerTest { EXPECT_EQ(2, found); // Check that const folded multiplication node has the expected value. - std::vector fetch = {"mul"}; + std::vector fetch = {"mul"}; Tensor value(DT_FLOAT, input_shape); for (int i = 0; i < value.NumElements(); ++i) { value.flat()(i) = i; @@ -309,7 +309,7 @@ TEST_F(ConstantFoldingTest, SimpleFolding) { EXPECT_EQ("d", node_d.name()); EXPECT_EQ("Const", node_d.op()); - std::vector fetch = {"d"}; + std::vector fetch = {"d"}; auto tensors_expected = EvaluateNodes(item.graph, fetch); auto tensors = EvaluateNodes(output, fetch); EXPECT_EQ(1, tensors_expected.size()); @@ -397,7 +397,7 @@ TEST_F(ConstantFoldingTest, AddTree) { auto x_t = GenerateRandomTensor(TensorShape({2, 2})); auto y_t = GenerateRandomTensor(TensorShape({2, 2})); - std::vector fetch = {"add_parent", "mul_parent"}; + std::vector fetch = {"add_parent", "mul_parent"}; auto tensor_expected = EvaluateNodes(item.graph, fetch, {{"x", x_t}, {"y", y_t}}); ASSERT_EQ(fetch.size(), tensor_expected.size()); @@ -453,7 +453,7 @@ TEST_F(ConstantFoldingTest, AddSubtactTree) { // Check that the result nodes have the expected value. auto x_t = GenerateRandomTensor(TensorShape({2, 2})); - std::vector fetch = {"add_parent"}; + std::vector fetch = {"add_parent"}; auto tensor_expected = EvaluateNodes(item.graph, fetch, {{"x", x_t}}); ASSERT_EQ(fetch.size(), tensor_expected.size()); fetch = {"add_parent"}; @@ -478,7 +478,7 @@ TEST_F(ConstantFoldingTest, ConstantPushDown) { ops::Placeholder::Shape(TensorShape({2, 2}))); auto get_op = [&](bool is_commutative, bool is_left_arg_const, - const string& name, const Output& const_arg, + const std::string& name, const Output& const_arg, const Output non_const_arg) -> Output { if (is_add) { if (is_commutative) { @@ -523,7 +523,7 @@ TEST_F(ConstantFoldingTest, ConstantPushDown) { // Check that the result nodes have the expected value. auto x_t = GenerateRandomTensor(TensorShape({2, 2})); - std::vector fetch = {"parent"}; + std::vector fetch = {"parent"}; auto tensor_expected = EvaluateNodes(item.graph, fetch, {{"x", x_t}}); ASSERT_EQ(fetch.size(), tensor_expected.size()); @@ -600,7 +600,7 @@ TEST_F(ConstantFoldingTest, ConstantPushDownBiasAdd) { // Check that the result nodes have the expected value. auto x_mat_t = GenerateRandomTensor(TensorShape({2, 2})); auto x_vec_t = GenerateRandomTensor(TensorShape({2})); - std::vector fetch = item.fetch; + std::vector fetch = item.fetch; auto tensor_expected = EvaluateNodes( item.graph, fetch, {{"x_vec", x_vec_t}, {"x_mat", x_mat_t}}); ASSERT_EQ(fetch.size(), tensor_expected.size()); @@ -615,10 +615,9 @@ TEST_F(ConstantFoldingTest, ConstantPushDownBiasAdd) { // This test fails on ROCm platform (see commit message for details) #ifndef TENSORFLOW_USE_ROCM TEST_F(ConstantFoldingTest, MulConvPushDownTest_Conv2D_ScalarConst) { - for (string data_format : { - "NHWC", + for (std::string data_format : {"NHWC", #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM - "NCHW" + "NCHW" #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM }) { MulConvPushDownTest( @@ -636,10 +635,9 @@ TEST_F(ConstantFoldingTest, MulConvPushDownTest_Conv2D_ScalarConst) { // This test fails on ROCm platform (see commit message for details) #ifndef TENSORFLOW_USE_ROCM TEST_F(ConstantFoldingTest, MulConvPushDownTest_Conv2D_SingletonConst) { - for (string data_format : { - "NHWC", + for (std::string data_format : {"NHWC", #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM - "NCHW" + "NCHW" #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM }) { for (auto mul_const_input_shape : @@ -658,10 +656,9 @@ TEST_F(ConstantFoldingTest, MulConvPushDownTest_Conv2D_SingletonConst) { TEST_F(ConstantFoldingTest, MulConvPushDownTest_Conv2D_SingletonConst_ShapeMismatch) { - for (string data_format : { - "NHWC", + for (std::string data_format : {"NHWC", #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM - "NCHW" + "NCHW" #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM }) { MulConvPushDownTest( @@ -841,18 +838,18 @@ TEST_F(ConstantFoldingTest, NeutralElement) { optimizer.Optimize(/*cluster=*/nullptr, item, &output); TF_EXPECT_OK(status); - const string suffix = + const std::string suffix = (const_type == kConst ? "_const" : (const_type == kLike ? "_like" : "_fill")); - const string zeros_name = strings::StrCat("zeros", suffix); - const string ones_name = strings::StrCat("ones", suffix); - const string ctrl_zeros_name = strings::StrCat("^zeros", suffix); - const string ctrl_ones_name = strings::StrCat("^ones", suffix); + const std::string zeros_name = absl::StrCat("zeros", suffix); + const std::string ones_name = absl::StrCat("ones", suffix); + const std::string ctrl_zeros_name = absl::StrCat("^zeros", suffix); + const std::string ctrl_ones_name = absl::StrCat("^ones", suffix); EXPECT_EQ(const_type == kFill ? 43 : 39, output.node_size()); for (int i = 0; i < output.node_size(); ++i) { const NodeDef& node = output.node(i); - const string& name = node.name(); + const std::string& name = node.name(); if (name == "mul1") { EXPECT_EQ("Const", node.op()); EXPECT_EQ("^x", node.input(0)); @@ -968,8 +965,8 @@ TEST_F(ConstantFoldingTest, NeutralElement) { EXPECT_EQ("y", node.input(0)); EXPECT_EQ(ctrl_zeros_name, node.input(1)); } - const std::set square_zero_const{"mul1", "mul2", "mul5", - "mul6", "matmul1", "matmul2"}; + const std::set square_zero_const{ + "mul1", "mul2", "mul5", "mul6", "matmul1", "matmul2"}; if (square_zero_const.count(name) > 0) { TensorProto t = node.attr().at("value").tensor(); EXPECT_EQ(1, t.float_val_size()); @@ -1029,7 +1026,7 @@ TEST_F(ConstantFoldingTest, StrengthReduce_Reciprocal) { EXPECT_EQ(8, output.node_size()); for (int i = 0; i < output.node_size(); ++i) { const NodeDef& node = output.node(i); - const string& name = node.name(); + const std::string& name = node.name(); if (name == "div_i") { // Integer division is unchanged. EXPECT_EQ("Div", node.op()); @@ -1061,7 +1058,7 @@ TEST_F(ConstantFoldingTest, StrengthReduce_Reciprocal) { } // Check that the reciprocals have the expected value. - std::vector fetch = {"cf_half"}; + std::vector fetch = {"cf_half"}; auto tensor_expected = EvaluateNodes(item.graph, fetch); EXPECT_EQ(fetch.size(), tensor_expected.size()); fetch = {"ConstantFolding/div_f_recip", "ConstantFolding/realdiv_recip"}; @@ -1090,13 +1087,13 @@ TEST_F(ConstantFoldingTest, NeutralElement_PartialShape_UnknownOutputShape) { // Multiplies without any additional ops to supply the output shape. int count = 0; std::vector muls; - std::unordered_set not_converted; - std::unordered_set to_const; - std::unordered_set to_identity; + std::unordered_set not_converted; + std::unordered_set to_const; + std::unordered_set to_identity; for (const auto* x : {&x_known, &x_partially_known, &x_unknown}) { for (const auto* zeros : {&zeros_known, &zeros_partially_known, &zeros_unknown}) { - const string name = strings::StrCat("mul_", count++); + const std::string name = absl::StrCat("mul_", count++); muls.push_back(ops::Mul(s.WithOpName(name), *x, *zeros)); if (x == &x_partially_known && zeros == &zeros_partially_known) { to_identity.insert(name); @@ -1120,7 +1117,7 @@ TEST_F(ConstantFoldingTest, NeutralElement_PartialShape_UnknownOutputShape) { EXPECT_EQ(15, output.node_size()); for (int i = 0; i < output.node_size(); ++i) { const NodeDef& node = output.node(i); - const string& name = node.name(); + const std::string& name = node.name(); if (to_const.count(name) > 0) { EXPECT_EQ("Const", node.op()) << node.name(); } else if (to_identity.count(name) > 0) { @@ -1130,7 +1127,7 @@ TEST_F(ConstantFoldingTest, NeutralElement_PartialShape_UnknownOutputShape) { } } - const std::vector fetch = {"mul_0", "mul_4", "mul_8"}; + const std::vector fetch = {"mul_0", "mul_4", "mul_8"}; auto x_known_t = GenerateRandomTensor(TensorShape({2, 2})); auto x_partially_unknown_t = GenerateRandomTensor(TensorShape({3, 4})); @@ -1166,11 +1163,11 @@ TEST_F(ConstantFoldingTest, NeutralElement_PartialShape_KnownOutputShape) { // will propagate the shape back to the inputs of AddN, making the // output shapes of all its inputs known std::vector muls_deduced_output_shape; - std::unordered_set to_const; + std::unordered_set to_const; int count = 0; for (const auto& x : {x_partially_known, x_unknown}) { for (const auto& zeros : {zeros_partially_known, zeros_unknown}) { - const string name = strings::StrCat("mul_", count++); + const std::string name = absl::StrCat("mul_", count++); muls_deduced_output_shape.push_back( ops::Mul(s.WithOpName(name), x, zeros)); to_const.insert(name); @@ -1193,7 +1190,7 @@ TEST_F(ConstantFoldingTest, NeutralElement_PartialShape_KnownOutputShape) { EXPECT_EQ(10, output.node_size()); for (int i = 0; i < output.node_size(); ++i) { const NodeDef& node = output.node(i); - const string& name = node.name(); + const std::string& name = node.name(); if (to_const.count(name) > 0) { EXPECT_EQ("Const", node.op()) << node.name(); EXPECT_EQ(2, node.input_size()); @@ -1201,7 +1198,7 @@ TEST_F(ConstantFoldingTest, NeutralElement_PartialShape_KnownOutputShape) { EXPECT_TRUE(IsControlInput(node.input(1))); } } - const std::vector fetch = {"addn1"}; + const std::vector fetch = {"addn1"}; auto x_partially_unknown_t = GenerateRandomTensor(TensorShape({2, 2})); auto x_unknown_t = GenerateRandomTensor(TensorShape({2, 2})); @@ -1230,10 +1227,10 @@ TEST_F(ConstantFoldingTest, CreateConstNodes) { MAKE_TEST_GRAPH(float); MAKE_TEST_GRAPH(double); MAKE_TEST_GRAPH(int64_t); - MAKE_TEST_GRAPH(int32); - MAKE_TEST_GRAPH(int16); - MAKE_TEST_GRAPH(int8); - MAKE_TEST_GRAPH(uint8); + MAKE_TEST_GRAPH(int32_t); + MAKE_TEST_GRAPH(int16_t); + MAKE_TEST_GRAPH(int8_t); + MAKE_TEST_GRAPH(uint8_t); #undef MAKE_TEST_GRAPH Output bool_const = ops::Const(s.WithOpName("bool_const"), true, {5}); @@ -1307,7 +1304,7 @@ TEST_F(ConstantFoldingTest, FoldingNodeWithTwoOutputs) { EXPECT_EQ("f", new_d.name()); EXPECT_EQ("Const", new_d.op()); - std::vector fetch = {"e", "f"}; + std::vector fetch = {"e", "f"}; auto tensors_expected = EvaluateNodes(item.graph, fetch); auto tensors = EvaluateNodes(output, fetch); EXPECT_EQ(fetch.size(), tensors_expected.size()); @@ -1338,7 +1335,7 @@ TEST_F(ConstantFoldingTest, ControlDependencies) { absl::Status status = optimizer.Optimize(/*cluster=*/nullptr, item, &output); TF_EXPECT_OK(status); - std::vector expected_nodes = {"dflt", "p1", "p2", "i3"}; + std::vector expected_nodes = {"dflt", "p1", "p2", "i3"}; EXPECT_EQ(output.node_size(), expected_nodes.size()); int i = 0; int found = 0; @@ -1381,8 +1378,8 @@ TEST_F(ConstantFoldingTest, ControlDependenciesEmptyFetch) { absl::Status status = optimizer.Optimize(/*cluster=*/nullptr, item, &output); TF_EXPECT_OK(status); - std::vector expected_nodes = {"dflt", "p1", "p2", "c", - "i1", "i2", "e"}; + std::vector expected_nodes = {"dflt", "p1", "p2", "c", + "i1", "i2", "e"}; EXPECT_EQ(output.node_size(), expected_nodes.size()); int i = 0; int found = 0; @@ -1439,7 +1436,7 @@ TEST_F(ConstantFoldingTest, ControlDependenciesDeduplicate) { absl::Status status = optimizer.Optimize(/*cluster=*/nullptr, item, &output); TF_EXPECT_OK(status); - std::vector expected_nodes = {"dflt", "p1", "p2", "i2"}; + std::vector expected_nodes = {"dflt", "p1", "p2", "i2"}; EXPECT_EQ(output.node_size(), expected_nodes.size()); int i = 0; for (const auto& node : output.node()) { @@ -1466,9 +1463,9 @@ TEST_F(ConstantFoldingTest, VariableNumberOfOutputs) { ops::DynamicPartition part(scope.WithOpName("partition"), input, indices, num_partitions); - std::vector outputs; + std::vector outputs; for (int i = 0; i < num_partitions; ++i) { - string part_out_name = strings::StrCat("part_out", i); + std::string part_out_name = absl::StrCat("part_out", i); ops::Identity partition_out(scope.WithOpName(part_out_name), {part.outputs[i]}); outputs.push_back(part_out_name); @@ -1481,7 +1478,7 @@ TEST_F(ConstantFoldingTest, VariableNumberOfOutputs) { Tensor initial_val(DT_INT32, TensorShape({3})); test::FillIota(&initial_val, 7); for (int i = 1; i < 5; ++i) { - TF_CHECK_OK(NodeDefBuilder(strings::StrCat("in", i), "Const") + TF_CHECK_OK(NodeDefBuilder(absl::StrCat("in", i), "Const") .Attr("dtype", DT_INT32) .Attr("value", initial_val) .Finalize(item.graph.add_node())); @@ -1502,7 +1499,7 @@ TEST_F(ConstantFoldingTest, VariableNumberOfOutputs) { .Finalize(item.graph.add_node())); for (int i = 0; i < 4; ++i) { - string concat_offset_out_name = strings::StrCat("concat_offset_out", i); + std::string concat_offset_out_name = absl::StrCat("concat_offset_out", i); TF_CHECK_OK(NodeDefBuilder(concat_offset_out_name, "Identity") .Attr("T", DT_INT32) .Input("concat_offsets", i, DT_INT32) @@ -1518,8 +1515,8 @@ TEST_F(ConstantFoldingTest, VariableNumberOfOutputs) { int constant_folded = 0; for (const auto& node : output.node()) { - if (node.name().find("part_out") != string::npos || - node.name().find("concat_offset_out") != string::npos) { + if (node.name().find("part_out") != std::string::npos || + node.name().find("concat_offset_out") != std::string::npos) { ++constant_folded; EXPECT_EQ("Const", node.op()); } @@ -1638,7 +1635,7 @@ TEST_F(ConstantFoldingTest, ShapeMaterializationEmptyFetch) { auto v1_t = GenerateRandomTensor(TensorShape({3})); auto v2_t = GenerateRandomTensor(TensorShape({5, 7})); auto v3_t = GenerateRandomTensor(TensorShape({11, 13})); - std::vector fetch_nodes = {"p2"}; + std::vector fetch_nodes = {"p2"}; auto tensors_expected = EvaluateNodes( item.graph, fetch_nodes, {{"v1", v1_t}, {"v2", v2_t}, {"v3", v3_t}}); EXPECT_EQ(1, tensors_expected.size()); @@ -1711,8 +1708,8 @@ TEST_F(ConstantFoldingTest, ShapeMaterializationShapeN) { auto v1_t = GenerateRandomTensor(TensorShape({3, 4})); auto v2_t = GenerateRandomTensor(TensorShape({5, 6})); auto v3_t = GenerateRandomTensor(TensorShape({4, 6})); - const std::vector fetch_nodes = {"i1a", "i1b", "i2a", "i2b", - "i2c", "i3a", "i3b"}; + const std::vector fetch_nodes = {"i1a", "i1b", "i2a", "i2b", + "i2c", "i3a", "i3b"}; auto tensors_expected = EvaluateNodes( item.graph, fetch_nodes, {{"v1", v1_t}, {"v2", v2_t}, {"v3", v3_t}}); EXPECT_EQ(fetch_nodes.size(), tensors_expected.size()); @@ -1814,15 +1811,16 @@ TEST_F(ConstantFoldingTest, SwitchNodesEmptyFetch) { absl::Status status = optimizer.Optimize(/*cluster=*/nullptr, item, &output); TF_EXPECT_OK(status); - std::set present_nodes = {"v_in", "v_ctrl", - "switch", "i", - "p1", "p2", - "m", "false", - "constant", "switch2", - "i2", "i3", - "m2", "ConstantFoldingCtrl/switch_0", - "rank", "size"}; - std::set not_present_nodes = {"ConstantFolding/switch2-0"}; + std::set present_nodes = { + "v_in", "v_ctrl", + "switch", "i", + "p1", "p2", + "m", "false", + "constant", "switch2", + "i2", "i3", + "m2", "ConstantFoldingCtrl/switch_0", + "rank", "size"}; + std::set not_present_nodes = {"ConstantFolding/switch2-0"}; EXPECT_EQ(present_nodes.size(), output.node_size()); int found = 0; for (const auto& node : output.node()) { @@ -1862,7 +1860,7 @@ TEST_F(ConstantFoldingTest, SwitchNodesEmptyFetch) { Tensor v_ctrl_t(DT_BOOL, TensorShape({})); v_ctrl_t.flat()(0) = true; - std::vector fetch_nodes = {"m", "m2"}; + std::vector fetch_nodes = {"m", "m2"}; auto tensors_expected = EvaluateNodes( item.graph, fetch_nodes, {{"v_in", v_in_t}, {"v_ctrl", v_ctrl_t}}); EXPECT_EQ(2, tensors_expected.size()); @@ -1915,15 +1913,16 @@ TEST_F(ConstantFoldingTest, SwitchNodes) { GraphDef output; absl::Status status = optimizer.Optimize(/*cluster=*/nullptr, item, &output); TF_EXPECT_OK(status); - std::set present_nodes = {"v_in", "v_ctrl", - "switch", "i", - "p1", "p2", - "m", "false", - "constant", "switch2", - "i2", "i3", - "m2", "ConstantFoldingCtrl/switch_0"}; - std::set not_present_nodes = {"rank", "size", - "ConstantFolding/switch2-0"}; + std::set present_nodes = { + "v_in", "v_ctrl", + "switch", "i", + "p1", "p2", + "m", "false", + "constant", "switch2", + "i2", "i3", + "m2", "ConstantFoldingCtrl/switch_0"}; + std::set not_present_nodes = {"rank", "size", + "ConstantFolding/switch2-0"}; EXPECT_EQ(present_nodes.size(), output.node_size()); int found = 0; @@ -2584,7 +2583,7 @@ TEST_F(ConstantFoldingTest, MergeConcat_PartialFolding) { } TEST_F(ConstantFoldingTest, PaddingWithZeroSize) { - PaddingWithZeroSize(); + PaddingWithZeroSize(); PaddingWithZeroSize(); } @@ -2770,7 +2769,7 @@ TEST_F(ConstantFoldingTest, SingleElementEmptyAxisReduction) { GenerateRandomTensor(TensorShape({1, 1, 1})); auto input_var_one_dim_t = GenerateRandomTensor(TensorShape({1})); Tensor input_var_axis_t(DT_INT32, TensorShape({1})); - input_var_axis_t.flat()(0) = 0; + input_var_axis_t.flat()(0) = 0; auto tensors_expected = EvaluateNodes(item.graph, item.fetch, {{"input_var_three_dim", input_var_three_dim_t}, @@ -2895,7 +2894,7 @@ TEST_F(ConstantFoldingTest, Packing) { absl::Status status = optimizer.Optimize(/*cluster=*/nullptr, item, &output); TF_EXPECT_OK(status); - const std::vector fetch_nodes = {"i1", "i2"}; + const std::vector fetch_nodes = {"i1", "i2"}; auto tensors_expected = EvaluateNodes(item.graph, fetch_nodes); EXPECT_EQ(fetch_nodes.size(), tensors_expected.size()); auto tensors = EvaluateNodes(output, fetch_nodes); @@ -2971,7 +2970,7 @@ TEST_F(ConstantFoldingTest, MaterializeBroadcastGradientArgs) { absl::Status status = optimizer.Optimize(/*cluster=*/nullptr, item, &output); TF_EXPECT_OK(status); - std::vector fetch_nodes = {"o1", "o2", "p1", "p2"}; + std::vector fetch_nodes = {"o1", "o2", "p1", "p2"}; auto a_t = GenerateRandomTensor(TensorShape({1, 5})); auto g_t = GenerateRandomTensor(TensorShape({1})); auto tensors_expected = @@ -3042,7 +3041,7 @@ TEST_F(ConstantFoldingTest, MaterializeBroadcastGradientArgs_InfiniteLoop) { GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); - std::vector fetch_nodes = {"o1", "o2"}; + std::vector fetch_nodes = {"o1", "o2"}; auto a_t = GenerateRandomTensor(TensorShape({2, 2})); auto tensors_expected = EvaluateNodes(item.graph, fetch_nodes, {{"a", a_t}}); EXPECT_EQ(fetch_nodes.size(), tensors_expected.size()); @@ -3331,7 +3330,7 @@ TEST_F(ConstantFoldingTest, PartialFolding_AssociativeAndCommutative) { }; for (bool use_add_n : {true, false}) { auto fun = use_add_n ? addn_fun : accumulate_fun; - const string op_name = use_add_n ? "AddN" : "AccumulateNV2"; + const std::string op_name = use_add_n ? "AddN" : "AccumulateNV2"; Scope s = Scope::NewRootScope(); Output x = ops::Placeholder(s.WithOpName("x"), DT_FLOAT, ops::Placeholder::Shape(TensorShape({2, 2}))); @@ -3411,7 +3410,7 @@ TEST_F(ConstantFoldingTest, PartialFolding_AssociativeAndCommutative) { } } - std::vector fetch = {"acc0"}; + std::vector fetch = {"acc0"}; auto tensors_expected = EvaluateNodes(item.graph, fetch); auto tensors = EvaluateNodes(output, fetch); EXPECT_EQ(1, tensors_expected.size()); @@ -3613,7 +3612,7 @@ TEST_F(ConstantFoldingTest, TrivialPack) { } EXPECT_EQ(found, 3); - std::vector fetch = {"stack", "stack_no_axis"}; + std::vector fetch = {"stack", "stack_no_axis"}; auto tensors_expected = EvaluateNodes(item.graph, fetch); auto tensors = EvaluateNodes(output, fetch); EXPECT_EQ(2, tensors_expected.size()); @@ -3741,8 +3740,8 @@ TEST_F(ConstantFoldingTest, TensorArraySize) { auto tensors_actual = EvaluateNodes(output, {"dynamic_sz", "static_sz"}); EXPECT_EQ(2, tensors_expected.size()); EXPECT_EQ(2, tensors_actual.size()); - test::ExpectTensorEqual(tensors_expected[0], tensors_actual[0]); - test::ExpectTensorEqual(tensors_expected[1], tensors_actual[1]); + test::ExpectTensorEqual(tensors_expected[0], tensors_actual[0]); + test::ExpectTensorEqual(tensors_expected[1], tensors_actual[1]); } TEST_F(ConstantFoldingTest, FoldingPreservesDenormalFlushing) { @@ -3770,7 +3769,7 @@ TEST_F(ConstantFoldingTest, FoldingPreservesDenormalFlushing) { EXPECT_EQ("c", node_d.name()); EXPECT_EQ("Const", node_d.op()); - std::vector fetch = {"c"}; + std::vector fetch = {"c"}; auto tensors_expected = EvaluateNodes(item.graph, fetch); auto tensors = EvaluateNodes(output, fetch); EXPECT_EQ(1, tensors_expected.size()); @@ -3800,7 +3799,7 @@ TEST_F(ConstantFoldingTest, EvaluatingLargeConstantNoFoldingMergingLoop) { absl::Status status = optimizer.Optimize(/*cluster=*/nullptr, item, &output); TF_EXPECT_OK(status); - std::vector fetch = {"result"}; + std::vector fetch = {"result"}; auto tensors_expected = EvaluateNodes(item.graph, fetch); auto tensors = EvaluateNodes(output, fetch); EXPECT_EQ(1, tensors_expected.size()); @@ -3869,9 +3868,9 @@ class ConstantFoldingCastConstTest : public GrapplerTest { return output; } - void EvaluateAndCompareUnoptimized(const GraphDef& unoptimized_graph, - const GraphDef& optimized_graph, - const std::vector& fetch_nodes) { + void EvaluateAndCompareUnoptimized( + const GraphDef& unoptimized_graph, const GraphDef& optimized_graph, + const std::vector& fetch_nodes) { auto tensors_expected = EvaluateNodes(unoptimized_graph, fetch_nodes); auto tensors = EvaluateNodes(optimized_graph, fetch_nodes); ASSERT_EQ(fetch_nodes.size(), tensors_expected.size()); @@ -4093,8 +4092,8 @@ TEST_F(ConstantFoldingTest, SimplifyCase) { TensorShapeProto* g_shape = output_shapes.mutable_list()->add_shape(); g_shape->set_unknown_rank(true); - const Tensor kZero = test::AsScalar(0); - const Tensor kOne = test::AsScalar(1); + const Tensor kZero = test::AsScalar(0); + const Tensor kOne = test::AsScalar(1); item.graph = test::function::GDef( {NDef("one", "Const", {}, {{"value", index == 0 ? kZero : kOne}, {"dtype", DT_INT32}}, @@ -4265,8 +4264,8 @@ TEST_F(ConstantFoldingTest, SimplifySelect_BroadcastTo) { ASSERT_EQ(node.input_size(), 4); EXPECT_EQ(node.input(0), pred_val ? "then" : "else"); EXPECT_EQ(node.input(1), - strings::StrCat("ConstantFolding/select-broadcastto_shape-", - pred_val ? 1 : 2)); + absl::StrCat("ConstantFolding/select-broadcastto_shape-", + pred_val ? 1 : 2)); EXPECT_EQ(node.input(2), pred_val ? "^else" : "^if"); EXPECT_EQ(node.input(3), pred_val ? "^if" : "^then"); } diff --git a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc index 2854810e3c040f..aef15c4fdf1b2e 100644 --- a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc +++ b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc @@ -252,7 +252,7 @@ absl::Status TransposeContext::InitializeTransposeContext( TF_RETURN_IF_ERROR(status); context->num_nodes = context->graph.node_size(); const auto& nodes_to_preserve = item.NodesToPreserve(); - context->nodes_to_preserve = absl::flat_hash_set( + context->nodes_to_preserve = absl::flat_hash_set( nodes_to_preserve.begin(), nodes_to_preserve.end()); TF_RETURN_IF_ERROR(context->frames.InferFromGraph(context->graph)); return absl::OkStatus(); @@ -262,9 +262,9 @@ absl::Status TransposeContext::InitializeTransposeContext( void TransposeContext::AssignDeviceAndDataFormats( absl::string_view target_device, absl::string_view src_format, absl::string_view dst_format) { - this->target_device = string(target_device); - this->src_format = string(src_format); - this->dst_format = string(dst_format); + this->target_device = std::string(target_device); + this->src_format = std::string(src_format); + this->dst_format = std::string(dst_format); this->src_dim_indices = GetDimensionIndices(src_format); this->dst_dim_indices = GetDimensionIndices(dst_format); this->src_to_dst = GetPermutation(this->src_dim_indices, dst_format); @@ -276,9 +276,9 @@ void TransposeContext::AssignDeviceAndDataFormats( bool Transposer::ShouldProcess(const TransposeContext& context, const utils::MutableNodeView& node) const { const auto* node_def = node.node(); - const string& device_name = GetDeviceName(*node_def); - string device; - string task; + const std::string& device_name = GetDeviceName(*node_def); + std::string device; + std::string task; const bool is_on_target_device = DeviceNameUtils::SplitDeviceName(device_name, &task, &device) && absl::StrContains(absl::AsciiStrToLower(device), @@ -306,12 +306,12 @@ absl::Status Transposer::CreateConstPermNode( DCHECK(!graph_view->HasNode(node_name)); NodeDef node; - node.set_name(string(node_name)); + node.set_name(node_name); node.set_op(kOpConst); - node.set_device(string(device)); + node.set_device(device); if (!control_node_name.empty()) { - node.add_input(string(control_node_name)); + node.add_input(std::string(control_node_name)); } AttrValue attr_data_type; @@ -337,8 +337,8 @@ absl::Status Transposer::CreateTransposeNode( const DataType& data_type, absl::string_view device, TensorShapeProto fanin_shape, absl::Span permutation, absl::string_view control_node_name, utils::MutationNewNode* added_node, - string* transpose_node_name) { - const string node_name = absl::Substitute(name_format, kOpTranspose); + std::string* transpose_node_name) { + const std::string node_name = absl::Substitute(name_format, kOpTranspose); auto* graph_view = context->graph_view.get(); DCHECK(!graph_view->HasNode(node_name)); *transpose_node_name = node_name; @@ -346,7 +346,7 @@ absl::Status Transposer::CreateTransposeNode( NodeDef node; node.set_name(node_name); node.set_op(kOpTranspose); - node.set_device(string(device)); + node.set_device(device); AttrValue attr_data_type; attr_data_type.set_type(data_type); @@ -367,7 +367,7 @@ absl::Status Transposer::CreateTransposeNode( // Create Const Node utils::MutationNewNode const_perm_added_node; - const string const_perm_node_name = + const std::string const_perm_node_name = absl::Substitute(name_format, "PermConst"); TF_RETURN_IF_ERROR(CreateConstPermNode(context, const_perm_node_name, device, permutation, control_node_name, @@ -457,11 +457,11 @@ absl::Status Transposer::CreateDataFormatNode( // Create the node NodeDef node; - node.set_name(string(node_name)); + node.set_name(node_name); // Set up parameters of node. - node.set_op(string(op)); - node.set_device(string(device)); + node.set_op(op); + node.set_device(device); AttrValue attr_data_type; attr_data_type.set_type(data_type); node.mutable_attr()->insert({"T", attr_data_type}); @@ -503,7 +503,7 @@ absl::Status Transposer::UpdateEdge( auto* dst_node_def = dst_node->node(); // TODO(lyandy): Minimize device parsing/fetching. - const string device = GetDeviceName( + const std::string device = GetDeviceName( is_src_format_to_dst_format ? *dst_node_def : *src_node_def); DataType data_type = is_src_format_to_dst_format @@ -515,7 +515,7 @@ absl::Status Transposer::UpdateEdge( .dtype(); utils::MutationNewNode added_node; - string added_node_name; + std::string added_node_name; if (op == kOpTranspose) { TensorShapeProto input_shape_proto; input_shape_proto.set_unknown_rank(true); @@ -527,7 +527,7 @@ absl::Status Transposer::UpdateEdge( input_shape_proto = src_node_shape_attr->list().shape(src_port); } } - const string control_node_name = + const std::string control_node_name = is_in_frame ? AsControlDependency(src_node_def->name()) : ""; const std::vector& permutation = is_src_format_to_dst_format ? context->src_to_dst : context->dst_to_src; @@ -540,7 +540,7 @@ absl::Status Transposer::UpdateEdge( GetDeviceName(*src_node_def), &parsed_name) && parsed_name.type != "CPU" && IsHostMemory(*src_node_def, src_port); - const string node_name = absl::Substitute(name_format, op); + const std::string node_name = absl::Substitute(name_format, op); TF_RETURN_IF_ERROR(CreateDataFormatNode( context, node_name, op, device, data_type, is_fanin_on_host, is_src_format_to_dst_format, &added_node)); @@ -655,40 +655,42 @@ bool Transposer::CanProcessNode(const TransposeContext& context, !(node.NumRegularFanouts() == 0 && node.NumControlledFanouts() == 0); } -string Transposer::GetFaninNameFormat(absl::string_view node_name, int port, - absl::string_view src_format, - absl::string_view dst_format) { +std::string Transposer::GetFaninNameFormat(absl::string_view node_name, + int port, + absl::string_view src_format, + absl::string_view dst_format) { return absl::StrCat(node_name, "-", port, "-$0", src_format, "To", dst_format, "-", kOptimizedSuffix); } -string Transposer::GetFanoutNameFormat(absl::string_view node_name, int port, - int index, absl::string_view src_format, - absl::string_view dst_format) { +std::string Transposer::GetFanoutNameFormat(absl::string_view node_name, + int port, int index, + absl::string_view src_format, + absl::string_view dst_format) { return absl::StrCat(node_name, "-", port, "-", index, "-$0", dst_format, "To", src_format, "-", kOptimizedSuffix); } -string Transposer::LayoutOptimizerNode(absl::string_view node_name) { +std::string Transposer::LayoutOptimizerNode(absl::string_view node_name) { return absl::StrCat(node_name, "-", kOptimizedSuffix); } -string Transposer::GetReshapeNodeNameFormat(absl::string_view node_name, - int index, - absl::string_view src_format, - absl::string_view dst_format) { +std::string Transposer::GetReshapeNodeNameFormat(absl::string_view node_name, + int index, + absl::string_view src_format, + absl::string_view dst_format) { return absl::StrCat(node_name, "-", index, "-", kReshape, src_format, "To", dst_format); } -string Transposer::GetShapeConstNodeNameFormat(absl::string_view node_name, - int index) { +std::string Transposer::GetShapeConstNodeNameFormat(absl::string_view node_name, + int index) { return absl::StrCat(node_name, "-", index, "-", kReshapeConst); } // Layout sensitive transposer. -inline string GetLayoutSensitiveNodeDataFormat( +inline std::string GetLayoutSensitiveNodeDataFormat( const utils::MutableNodeView& node) { const auto* attr = node.GetAttr(kAttrDataFormat); if (attr != nullptr) { @@ -1086,7 +1088,7 @@ inline bool IsValidConstPermTransposeNode(const utils::MutableNodeView& node, return false; } - const auto& tensor_data = tensor.unaligned_flat(); + const auto& tensor_data = tensor.unaligned_flat(); for (int i = 0; i < permutation_size; i++) { if (permutation[i] != tensor_data(i)) { return false; @@ -1252,11 +1254,11 @@ absl::Status BinaryOpTransposer::AddNodeReshape( absl::string_view node_device, absl::string_view input_name, absl::string_view shape_const_node_name, const DataType& data_type) { NodeDef new_node; - new_node.set_name(string(node_name)); - new_node.add_input(string(input_name)); - new_node.add_input(string(shape_const_node_name)); + new_node.set_name(node_name); + new_node.add_input(std::string(input_name)); + new_node.add_input(std::string(shape_const_node_name)); new_node.set_op(kReshape); - new_node.set_device(string(node_device)); + new_node.set_device(node_device); AttrValue attr_type_indices; attr_type_indices.set_type(DT_INT32); @@ -1276,9 +1278,9 @@ absl::Status BinaryOpTransposer::AddNodeShapeConst( absl::string_view node_device, bool node_in_frame, int num_channels, absl::string_view depended_node, int rank) { NodeDef new_node; - new_node.set_name(string(node_name)); + new_node.set_name(node_name); new_node.set_op(kOpConst); - new_node.set_device(string(node_device)); + new_node.set_device(node_device); AttrValue attr_data_type; attr_data_type.set_type(DT_INT32); new_node.mutable_attr()->insert({"dtype", attr_data_type}); @@ -1296,7 +1298,7 @@ absl::Status BinaryOpTransposer::AddNodeShapeConst( // This is to ensure the transpose node and the const node are in the same // frame. // TODO(halehri): Add Test that exercises this condition. - new_node.add_input(AsControlDependency(string(depended_node))); + new_node.add_input(AsControlDependency(std::string(depended_node))); } absl::Status status; @@ -1313,11 +1315,12 @@ absl::Status BinaryOpTransposer::MaybeReshapeVectorFanin( vector_index = 0; } if (vector_index != -1) { - const string& node_name = node->GetName(); - const string& node_device = node->GetDevice(); - string reshape_node_name = LayoutOptimizerNode(GetReshapeNodeNameFormat( - node_name, vector_index, context->src_format, context->dst_format)); - string shape_const_node_name = LayoutOptimizerNode( + const std::string& node_name = node->GetName(); + const std::string& node_device = node->GetDevice(); + std::string reshape_node_name = + LayoutOptimizerNode(GetReshapeNodeNameFormat( + node_name, vector_index, context->src_format, context->dst_format)); + std::string shape_const_node_name = LayoutOptimizerNode( GetShapeConstNodeNameFormat(node_name, vector_index)); const auto& fanin = node->GetRegularFanin(vector_index); auto* fanin_node = fanin.node_view(); @@ -1513,7 +1516,7 @@ bool ReduceTransposer::IsAlongAxis(const Tensor& tensor, for (int i = 0; i < axis_size; ++i) { int local_axis = 0; if (tensor.dtype() == DT_INT32) { - local_axis = tensor.flat()(i); + local_axis = tensor.flat()(i); } else { local_axis = tensor.flat()(i); } @@ -2023,10 +2026,10 @@ absl::Status UnaryGradTransposer::TransposeNode(TransposeContext* context, // Utils. -string GetDeviceName(const NodeDef& node) { return node.device(); } +std::string GetDeviceName(const NodeDef& node) { return node.device(); } bool IsDefaultLayoutSensitiveOp(const NodeDef& node) { - static absl::flat_hash_set* default_layout_sensitive_ops = + static absl::flat_hash_set* default_layout_sensitive_ops = new absl::flat_hash_set( {"AvgPool", "Conv2D", "DepthwiseConv2dNative", "DepthToSpace", "FusedBatchNorm", "FusedBatchNormV2", "FusedBatchNormV3", @@ -2049,7 +2052,7 @@ bool IsLayoutSensitiveOp(const NodeDef& node) { } bool IsDefaultLayoutAgnosticOp(const NodeDef& node) { - static absl::flat_hash_set* agnostic_nodes = + static absl::flat_hash_set* agnostic_nodes = new absl::flat_hash_set({"Abs", "Acos", "Acosh", @@ -2253,7 +2256,7 @@ bool GetValueAttrFromConstInputNode( } bool IsDataFormatOp(const utils::MutableNodeView& node) { - const string& op = node.GetOp(); + const std::string& op = node.GetOp(); return op == kOpDataFormatDimMap || op == kOpDataFormatVecPermute; } diff --git a/tensorflow/core/grappler/optimizers/static_schedule_test.cc b/tensorflow/core/grappler/optimizers/static_schedule_test.cc index b46afca62dc4a3..46ae83f1f04a3b 100644 --- a/tensorflow/core/grappler/optimizers/static_schedule_test.cc +++ b/tensorflow/core/grappler/optimizers/static_schedule_test.cc @@ -40,7 +40,7 @@ class StaticScheduleTest : public ::testing::Test { cpu_device.set_l1_cache_size(32 * 1024); cpu_device.set_l2_cache_size(256 * 1024); cpu_device.set_l3_cache_size(4 * 1024 * 1024); - std::unordered_map devices; + std::unordered_map devices; devices["/job:localhost/replica:0/task:0/cpu:0"] = cpu_device; return std::unique_ptr(new VirtualCluster(devices)); } diff --git a/tensorflow/core/grappler/utils/scc_test.cc b/tensorflow/core/grappler/utils/scc_test.cc index 4fc4e7abaa4339..d4c196167fca43 100644 --- a/tensorflow/core/grappler/utils/scc_test.cc +++ b/tensorflow/core/grappler/utils/scc_test.cc @@ -31,7 +31,7 @@ namespace { class SCCTest : public ::testing::Test { public: void SetUp() override { - std::unordered_map devices; + std::unordered_map devices; DeviceProperties unknown_device; devices["MY_DEVICE"] = unknown_device; cluster_ = std::make_unique(devices); @@ -41,11 +41,11 @@ class SCCTest : public ::testing::Test { void TearDown() override { cluster_.reset(); } protected: - static NodeDef CreateNode(const string& name, - absl::Span inputs) { + static NodeDef CreateNode(const std::string& name, + absl::Span inputs) { NodeDef node; node.set_name(name); - for (const string& input : inputs) { + for (const std::string& input : inputs) { node.add_input(input); } return node; @@ -86,7 +86,7 @@ TEST_F(SCCTest, DisjointCycleAndPath) { *graph.add_node() = CreateNode("h", {"g"}); std::vector nodes; - std::unordered_map name_to_node; + std::unordered_map name_to_node; for (const auto& n : graph.node()) { nodes.push_back(&n); name_to_node[n.name()] = &n; @@ -149,7 +149,7 @@ TEST_F(SCCTest, WikipediaExample) { *graph.add_node() = CreateNode("h", {"h"}); std::vector nodes; - std::unordered_map name_to_node; + std::unordered_map name_to_node; for (const auto& n : graph.node()) { nodes.push_back(&n); name_to_node[n.name()] = &n; @@ -187,7 +187,7 @@ TEST_F(SCCTest, TensorFlowLoop) { with open('/tmp/graph.txt', 'w') as f: f.write(str(tf.get_default_graph().as_graph_def())) */ - const string gdef_ascii = R"EOF( + const std::string gdef_ascii = R"EOF( node { name: "Const" op: "Const" @@ -411,7 +411,7 @@ versions { TEST_F(SCCTest, NestedLoops) { GrapplerItem item; - string filename = io::JoinPath( + std::string filename = io::JoinPath( testing::TensorFlowSrcRoot(), "core/grappler/costs/graph_properties_testdata/nested_loop.pbtxt"); TF_CHECK_OK(ReadGraphDefFromFile(filename, &item.graph)); diff --git a/tensorflow/core/grappler/verifiers/graph_verifier.h b/tensorflow/core/grappler/verifiers/graph_verifier.h index 53d62e4c986d68..0e59d4ed3a28c7 100644 --- a/tensorflow/core/grappler/verifiers/graph_verifier.h +++ b/tensorflow/core/grappler/verifiers/graph_verifier.h @@ -41,7 +41,7 @@ class GraphVerifier { virtual ~GraphVerifier() {} // A name for the verifier. - virtual string name() const = 0; + virtual std::string name() const = 0; // Implement an algorithm to verify the specified graph. // The return value is a Status that represents a concatenation of Status of diff --git a/tensorflow/core/grappler/verifiers/structure_verifier.h b/tensorflow/core/grappler/verifiers/structure_verifier.h index de77933fedac10..968f840b41c8a0 100644 --- a/tensorflow/core/grappler/verifiers/structure_verifier.h +++ b/tensorflow/core/grappler/verifiers/structure_verifier.h @@ -32,7 +32,7 @@ class StructureVerifier : public GraphVerifier { StructureVerifier() {} ~StructureVerifier() override {} - string name() const override { return "structure_verifier"; }; + std::string name() const override { return "structure_verifier"; }; absl::Status Verify(const GraphDef& graph) override; }; diff --git a/tensorflow/core/grappler/verifiers/structure_verifier_test.cc b/tensorflow/core/grappler/verifiers/structure_verifier_test.cc index 562deb5367493c..d01a729d6c0796 100644 --- a/tensorflow/core/grappler/verifiers/structure_verifier_test.cc +++ b/tensorflow/core/grappler/verifiers/structure_verifier_test.cc @@ -35,7 +35,7 @@ namespace { class StructureVerifierTest : public ::testing::Test { protected: StructureVerifierTest() { verifier_ = std::make_unique(); } - void SetGraph(const string& gdef_ascii) { + void SetGraph(const std::string& gdef_ascii) { CHECK(protobuf::TextFormat::ParseFromString(gdef_ascii, &graph_)); } GraphDef graph_; diff --git a/tensorflow/core/kernels/autotune_conv_impl.h b/tensorflow/core/kernels/autotune_conv_impl.h index 63c6a64d1282a7..91530d7bbc269f 100644 --- a/tensorflow/core/kernels/autotune_conv_impl.h +++ b/tensorflow/core/kernels/autotune_conv_impl.h @@ -28,7 +28,7 @@ limitations under the License. namespace tensorflow::internal { template -StatusOr> AutotuneConvImpl( +absl::StatusOr> AutotuneConvImpl( OpKernelContext* ctx, std::vector>>& runners, bool actually_do_autotune, const LaunchFunc& launch_func, @@ -54,10 +54,10 @@ StatusOr> AutotuneConvImpl( TF_ASSIGN_OR_RETURN(auto desc, runner->ToAlgorithmDesc()); se::dnn::ProfileResult profile_result; - Status cudnn_launch_status = + absl::Status cudnn_launch_status = actually_do_autotune ? launch_func(allocator_used, runner, &profile_result) - : OkStatus(); + : absl::OkStatus(); if (!actually_do_autotune) { // Make the result valid according to `is_valid`. profile_result.set_algorithm(desc); diff --git a/tensorflow/core/kernels/batching_util/BUILD b/tensorflow/core/kernels/batching_util/BUILD index cdd9af962f346e..6448e5ae36fead 100644 --- a/tensorflow/core/kernels/batching_util/BUILD +++ b/tensorflow/core/kernels/batching_util/BUILD @@ -535,6 +535,7 @@ tf_cc_test( "@local_xla//xla/tsl/lib/monitoring:cell_reader", "@local_xla//xla/tsl/lib/monitoring:test_utils", "@local_xla//xla/tsl/platform:criticality", + "@local_xla//xla/tsl/platform:statusor", ], ) diff --git a/tensorflow/core/kernels/batching_util/batch_resource_base.cc b/tensorflow/core/kernels/batching_util/batch_resource_base.cc index abc5fa34f3a3ab..8c53a2299110a5 100644 --- a/tensorflow/core/kernels/batching_util/batch_resource_base.cc +++ b/tensorflow/core/kernels/batching_util/batch_resource_base.cc @@ -284,8 +284,12 @@ static auto* mixed_priority_batching_policy_value = void RecordBatchParamMixedPriorityBatchingPolicy( MixedPriorityBatchingPolicy mixed_priority_batching_policy, const std::string& model_name, const std::string& op_name) { - mixed_priority_batching_policy_value->GetCell(model_name, op_name) - ->Set(absl::StrCat(mixed_priority_batching_policy)); + auto policy_str = + GetMixedPriorityBatchingPolicyString(mixed_priority_batching_policy); + if (policy_str.ok()) { + mixed_priority_batching_policy_value->GetCell(model_name, op_name) + ->Set(std::string(*policy_str)); + } } void RecordBatchParamMaxEnqueuedBatches(int64_t max_enqueued_batches, diff --git a/tensorflow/core/kernels/batching_util/batch_resource_base_test.cc b/tensorflow/core/kernels/batching_util/batch_resource_base_test.cc index 0e781747bcf170..a3b813ec6b7a4f 100644 --- a/tensorflow/core/kernels/batching_util/batch_resource_base_test.cc +++ b/tensorflow/core/kernels/batching_util/batch_resource_base_test.cc @@ -37,6 +37,7 @@ limitations under the License. #include "xla/tsl/lib/monitoring/cell_reader.h" #include "xla/tsl/lib/monitoring/test_utils.h" #include "xla/tsl/platform/criticality.h" +#include "xla/tsl/platform/statusor.h" #include "tensorflow/core/common_runtime/cost_constants.h" #include "tensorflow/core/common_runtime/cost_measurement.h" #include "tensorflow/core/common_runtime/cost_measurement_registry.h" @@ -282,9 +283,13 @@ TEST_P(BatchResourceBaseWithPriorityTest, BatchingWithMixedPriorityPolicy) { /*forced_warmup_batch_size=*/0)); } blocking_counter.Wait(); + + TF_ASSERT_OK_AND_ASSIGN(absl::string_view policy_str, + GetMixedPriorityBatchingPolicyString( + GetParam().mixed_priority_batching_policy)); EXPECT_EQ( mixed_priority_policy_reader_->Read("my_model_name", "my_batch_node"), - absl::StrCat(GetParam().mixed_priority_batching_policy)); + policy_str); for (const auto& [batch_size, expected_count] : GetParam().expected_batch_size_count) { diff --git a/tensorflow/core/kernels/batching_util/batch_scheduler.cc b/tensorflow/core/kernels/batching_util/batch_scheduler.cc index 91bfad8642ecd8..e74f6dfe9ddc08 100644 --- a/tensorflow/core/kernels/batching_util/batch_scheduler.cc +++ b/tensorflow/core/kernels/batching_util/batch_scheduler.cc @@ -40,5 +40,23 @@ absl::StatusOr GetMixedPriorityBatchingPolicy( "Unknown mixed priority batching policy: %s", attr_value)); } +absl::StatusOr GetMixedPriorityBatchingPolicyString( + MixedPriorityBatchingPolicy policy) { + switch (policy) { + case MixedPriorityBatchingPolicy::kLowPriorityPaddingWithMaxBatchSize: + return kLowPriorityPaddingWithMaxBatchSizeAttrValue; + case MixedPriorityBatchingPolicy:: + kLowPriorityPaddingWithNextAllowedBatchSize: + return kLowPriorityPaddingWithNextAllowedBatchSizeAttrValue; + case MixedPriorityBatchingPolicy::kPriorityIsolation: + return kPriorityIsolationAttrValue; + case MixedPriorityBatchingPolicy::kPriorityMerge: + return kPriorityMergeAttrValue; + default: + return absl::InvalidArgumentError(absl::StrFormat( + "Unknown mixed priority batching policy: %d", policy)); + } +} + } // namespace serving } // namespace tensorflow diff --git a/tensorflow/core/kernels/batching_util/batch_scheduler.h b/tensorflow/core/kernels/batching_util/batch_scheduler.h index 936473a1884dc9..4060a8b15fbd96 100644 --- a/tensorflow/core/kernels/batching_util/batch_scheduler.h +++ b/tensorflow/core/kernels/batching_util/batch_scheduler.h @@ -70,6 +70,9 @@ enum class MixedPriorityBatchingPolicy { absl::StatusOr GetMixedPriorityBatchingPolicy( absl::string_view attr_value); +absl::StatusOr GetMixedPriorityBatchingPolicyString( + MixedPriorityBatchingPolicy policy); + // The abstract superclass for a unit of work to be done as part of a batch. // // An implementing subclass typically contains (or points to): diff --git a/tensorflow/core/kernels/batching_util/batch_scheduler_test.cc b/tensorflow/core/kernels/batching_util/batch_scheduler_test.cc index d587f482763fde..fce07c171b8e85 100644 --- a/tensorflow/core/kernels/batching_util/batch_scheduler_test.cc +++ b/tensorflow/core/kernels/batching_util/batch_scheduler_test.cc @@ -49,6 +49,12 @@ TEST(MixedPriorityBatchingPolicyTest, InvalidAttrValueError) { absl::StatusCode::kInvalidArgument, ::testing::HasSubstr( "Unknown mixed priority batching policy: invalid_attr_value"))); + EXPECT_THAT( + GetMixedPriorityBatchingPolicyString( + static_cast(4)), + absl_testing::StatusIs( + absl::StatusCode::kInvalidArgument, + ::testing::HasSubstr("Unknown mixed priority batching policy: 4"))); } using MixedPriorityBatchingPolicyParameterizedTest = ::testing::TestWithParam< @@ -59,6 +65,8 @@ TEST_P(MixedPriorityBatchingPolicyParameterizedTest, auto [attr_name, policy] = GetParam(); EXPECT_THAT(GetMixedPriorityBatchingPolicy(attr_name), absl_testing::IsOkAndHolds(Eq(policy))); + EXPECT_THAT(GetMixedPriorityBatchingPolicyString(policy), + absl_testing::IsOkAndHolds(Eq(attr_name))); } INSTANTIATE_TEST_SUITE_P( diff --git a/tensorflow/core/kernels/batching_util/concat_split_util.h b/tensorflow/core/kernels/batching_util/concat_split_util.h index b5354be35c70a9..4ac0100fbdf44a 100644 --- a/tensorflow/core/kernels/batching_util/concat_split_util.h +++ b/tensorflow/core/kernels/batching_util/concat_split_util.h @@ -81,7 +81,7 @@ absl::Status Concat(OpKernelContext* context, (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) if (std::is_same::value) { ConcatGPU(context, inputs_flat, output, &output_flat); - return OkStatus(); + return absl::OkStatus(); } #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM ConcatCPU(context->device(), inputs_flat, &output_flat); @@ -198,9 +198,9 @@ absl::Status SplitCPU(OpKernelContext* context, const Tensor& input, // Handles the general case, on GPU. template -Status SplitGPU(OpKernelContext* context, const Tensor& input, - const gtl::ArraySlice& sizes, - std::vector* outputs) { +absl::Status SplitGPU(OpKernelContext* context, const Tensor& input, + const absl::Span& sizes, + std::vector* outputs) { // TODO(olston, apassos): Implement this. LOG(FATAL) << "Not yet implemented"; // Crash ok } diff --git a/tensorflow/core/kernels/bias_op_gpu.cu.cc b/tensorflow/core/kernels/bias_op_gpu.cu.cc index b9b9c81342439c..dac9640b1e61ff 100644 --- a/tensorflow/core/kernels/bias_op_gpu.cu.cc +++ b/tensorflow/core/kernels/bias_op_gpu.cu.cc @@ -57,23 +57,23 @@ struct AccumulatorType { // Definition of the GPU implementations declared in bias_op.cc. template -__global__ void BiasNHWCKernel(int32 nthreads, const T* __restrict__ input, +__global__ void BiasNHWCKernel(int32_t nthreads, const T* __restrict__ input, const T* __restrict__ bias, - T* __restrict__ output, int32 bias_size) { + T* __restrict__ output, int32_t bias_size) { GPU_1D_KERNEL_LOOP(index, nthreads) { - int32 bias_offset = index % bias_size; + int32_t bias_offset = index % bias_size; output[index] = ldg(input + index) + ldg(bias + bias_offset); } } template -__global__ void BiasNCHWKernel(int32 nthreads, const T* __restrict__ input, +__global__ void BiasNCHWKernel(int32_t nthreads, const T* __restrict__ input, const T* __restrict__ bias, - T* __restrict__ output, int32 bias_size, - int32 image_size) { + T* __restrict__ output, int32_t bias_size, + int32_t image_size) { GPU_1D_KERNEL_LOOP(index, nthreads) { - int32 index2 = index / image_size; - int32 bias_offset = index2 % bias_size; + int32_t index2 = index / image_size; + int32_t bias_offset = index2 % bias_size; output[index] = ldg(input + index) + ldg(bias + bias_offset); } } @@ -82,11 +82,12 @@ __global__ void BiasNCHWKernel(int32 nthreads, const T* __restrict__ input, // dimension. template void BiasGPU::compute(const GPUDevice& d, const T* input, const T* bias, - T* output, int32 batch, int32 height, int32 width, - int depth, int32 channel, TensorFormat data_format) { - const int32 bias_size = channel; - const int32 image_size = height * width * depth; - const int32 total_count = batch * bias_size * image_size; + T* output, int32_t batch, int32_t height, + int32_t width, int depth, int32_t channel, + TensorFormat data_format) { + const int32_t bias_size = channel; + const int32_t image_size = height * width * depth; + const int32_t total_count = batch * bias_size * image_size; if (total_count == 0) { return; } @@ -109,49 +110,49 @@ void BiasGPU::compute(const GPUDevice& d, const T* input, const T* bias, // A naive implementation that is functional on all cases. template -__global__ void BiasGradNHWC_Naive(int32 nthreads, +__global__ void BiasGradNHWC_Naive(int32_t nthreads, const T* __restrict__ output_backprop, T* __restrict__ bias_backprop, - int32 bias_size) { + int32_t bias_size) { GPU_1D_KERNEL_LOOP(index, nthreads) { - int32 bias_offset = index % bias_size; + int32_t bias_offset = index % bias_size; GpuAtomicAdd(bias_backprop + bias_offset, ldg(output_backprop + index)); } } // A naive implementation that is functional on all cases. template -__global__ void BiasGradNCHW_Naive(int32 nthreads, +__global__ void BiasGradNCHW_Naive(int32_t nthreads, const T* __restrict__ output_backprop, T* __restrict__ bias_backprop, - int32 bias_size, int32 image_size) { + int32_t bias_size, int32_t image_size) { GPU_1D_KERNEL_LOOP(index, nthreads) { - int32 index2 = index / image_size; - int32 bias_offset = index2 % bias_size; + int32_t index2 = index / image_size; + int32_t bias_offset = index2 % bias_size; GpuAtomicAdd(bias_backprop + bias_offset, ldg(output_backprop + index)); } } template __global__ void BiasGradNHWC_SharedAtomics( - int32 nthreads, const T* __restrict__ output_backprop, - T* __restrict__ bias_backprop, int32 bias_size) { + int32_t nthreads, const T* __restrict__ output_backprop, + T* __restrict__ bias_backprop, int32_t bias_size) { typedef typename AccumulatorType::type AccT; GPU_DYNAMIC_SHARED_MEM_DECL(8, char, s_buf); AccT* s_data = reinterpret_cast(s_buf); - for (int32 index = threadIdx.x; index < bias_size; index += blockDim.x) { + for (int32_t index = threadIdx.x; index < bias_size; index += blockDim.x) { s_data[index] = AccT(0); } __syncthreads(); - for (int32 index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; + for (int32_t index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { - int32 bias_offset = index % bias_size; + int32_t bias_offset = index % bias_size; GpuAtomicAddShared(s_data + bias_offset, AccT(ldg(output_backprop + index))); } __syncthreads(); - for (int32 index = threadIdx.x; index < bias_size; index += blockDim.x) { + for (int32_t index = threadIdx.x; index < bias_size; index += blockDim.x) { GpuAtomicAdd(bias_backprop + index, T(s_data[index])); } } @@ -159,26 +160,26 @@ __global__ void BiasGradNHWC_SharedAtomics( template __global__ void BiasGradNCHW_SharedAtomics( const T* __restrict__ output_backprop, T* __restrict__ bias_backprop, - int32 batch, int32 bias_size, int32 image_size, int group_size) { + int32_t batch, int32_t bias_size, int32_t image_size, int group_size) { // Initialize the shared memory. typedef typename AccumulatorType::type AccT; - const int32 kSDataSize = 32; + const int32_t kSDataSize = 32; __shared__ AccT s_data[kSDataSize]; - for (int32 index = threadIdx.x; index < kSDataSize; index += blockDim.x) { + for (int32_t index = threadIdx.x; index < kSDataSize; index += blockDim.x) { s_data[index] = AccT(0); } __syncthreads(); // Accumulate all the values within this thread. They all have the same bias // index. - int32 bias_index = blockIdx.x % bias_size; - int32 group_index = blockIdx.x / bias_size; - int32 total_count = batch * image_size; + int32_t bias_index = blockIdx.x % bias_size; + int32_t group_index = blockIdx.x / bias_size; + int32_t total_count = batch * image_size; AccT sum(0); - for (int32 index = group_index * blockDim.x + threadIdx.x; + for (int32_t index = group_index * blockDim.x + threadIdx.x; index < total_count; index += blockDim.x * group_size) { - int32 image_offset = index % image_size; - int32 batch = index / image_size; + int32_t image_offset = index % image_size; + int32_t batch = index / image_size; T val = ldg(output_backprop + (batch * bias_size + bias_index) * image_size + image_offset); sum += AccT(val); @@ -192,11 +193,11 @@ __global__ void BiasGradNCHW_SharedAtomics( // Accumulate the results in the shared memory into the first element. // No syncthreads is needed since this is only in the same warp. - int32 thread_index = threadIdx.x; + int32_t thread_index = threadIdx.x; #if GOOGLE_CUDA if (thread_index < 32) { AccT data = s_data[thread_index]; - for (int32 delta = warpSize / 2; delta > 0; delta /= 2) { + for (int32_t delta = warpSize / 2; delta > 0; delta /= 2) { data += GpuShuffleXorSync(kCudaWarpAll, data, delta); } if (thread_index == 0) { @@ -219,20 +220,20 @@ __global__ void BiasGradNCHW_SharedAtomics( template void BiasGradGPU::compute(const GPUDevice& d, const T* output_backprop, - T* bias_backprop, int32 batch, int32 height, - int32 width, int32 depth, int32 channel, + T* bias_backprop, int32_t batch, int32_t height, + int32_t width, int32_t depth, int32_t channel, TensorFormat data_format) { - const int32 bias_size = channel; - const int32 image_size = height * width * depth; - const int32 total_count = batch * bias_size * image_size; + const int32_t bias_size = channel; + const int32_t image_size = height * width * depth; + const int32_t total_count = batch * bias_size * image_size; if (total_count == 0) { return; } - static constexpr int32 kWarpSize = 32; + static constexpr int32_t kWarpSize = 32; GpuLaunchConfig config = GetGpuLaunchConfig(total_count, d); const int max_shared_memory_size = d.sharedMemPerBlock() / 2; - int32 shared_memory_size = 0; + int32_t shared_memory_size = 0; if (data_format == FORMAT_NHWC) { shared_memory_size = bias_size * sizeof(typename AccumulatorType::type); } diff --git a/tensorflow/core/kernels/bias_op_gpu.h b/tensorflow/core/kernels/bias_op_gpu.h index 0ece14a946cd19..60f17e6de240de 100644 --- a/tensorflow/core/kernels/bias_op_gpu.h +++ b/tensorflow/core/kernels/bias_op_gpu.h @@ -68,12 +68,12 @@ class BiasGradGPUProfileResult { } BiasAddGradGPUMode algorithm() const { return algorithm_; } void set_algorithm(BiasAddGradGPUMode val) { algorithm_ = val; } - uint64 elapsed_time() const { return elapsed_time_; } - void set_elapsed_time(uint64 val) { elapsed_time_ = val; } + uint64_t elapsed_time() const { return elapsed_time_; } + void set_elapsed_time(uint64_t val) { elapsed_time_ = val; } private: BiasAddGradGPUMode algorithm_ = BiasAddGradGPUMode::kInvalid; - uint64 elapsed_time_ = std::numeric_limits::max(); + uint64_t elapsed_time_ = std::numeric_limits::max(); }; } // namespace tensorflow diff --git a/tensorflow/core/kernels/bincount_op_gpu.cu.cc b/tensorflow/core/kernels/bincount_op_gpu.cu.cc index 529fe0b278621f..19a45db1ff01b6 100644 --- a/tensorflow/core/kernels/bincount_op_gpu.cu.cc +++ b/tensorflow/core/kernels/bincount_op_gpu.cu.cc @@ -36,11 +36,11 @@ namespace functor { template struct BincountFunctor { - static Status Compute(OpKernelContext* context, - const typename TTypes::ConstTensor& arr, - const typename TTypes::ConstTensor& weights, - typename TTypes::Tensor& output, - const Tidx num_bins) { + static absl::Status Compute(OpKernelContext* context, + const typename TTypes::ConstTensor& arr, + const typename TTypes::ConstTensor& weights, + typename TTypes::Tensor& output, + const Tidx num_bins) { if (weights.size() != 0) { return errors::Unimplemented( "Weights are not yet supported by the GPU implementation of Bincount." @@ -48,7 +48,7 @@ struct BincountFunctor { " tf.function(jit_compile=True)."); } if (output.size() == 0) { - return OkStatus(); + return absl::OkStatus(); } if (tensorflow::OpDeterminismRequired()) { // TODO(reedwm): Is this really nondeterministic? @@ -88,11 +88,11 @@ struct BincountFunctor { } Tensor temp_storage; TF_RETURN_IF_ERROR(context->allocate_temp( - DataTypeToEnum::value, + DataTypeToEnum::value, TensorShape({static_cast(temp_storage_bytes)}), &temp_storage)); - void* d_temp_storage = temp_storage.flat().data(); + void* d_temp_storage = temp_storage.flat().data(); // The second HistogramEven is to actual run with d_temp_storage // allocated with temp_storage_bytes. err = gpuprim::DeviceHistogram::HistogramEven( @@ -109,7 +109,7 @@ struct BincountFunctor { return errors::Internal( "Could not launch HistogramEven: ", GpuGetErrorString(err), "."); } - return OkStatus(); + return absl::OkStatus(); } }; @@ -126,11 +126,11 @@ __global__ void BincountReduceKernel(const Tidx* in, T* out, const int nthreads, template struct BincountFunctor { - static Status Compute(OpKernelContext* context, - const typename TTypes::ConstTensor& arr, - const typename TTypes::ConstTensor& weights, - typename TTypes::Tensor& output, - const Tidx num_bins) { + static absl::Status Compute(OpKernelContext* context, + const typename TTypes::ConstTensor& arr, + const typename TTypes::ConstTensor& weights, + typename TTypes::Tensor& output, + const Tidx num_bins) { const int nthreads = arr.dimension(0); auto d = context->eigen_gpu_device(); @@ -206,11 +206,11 @@ __global__ void BincountColReduceSharedKernel(const Tidx* in, const T* weights, template struct BincountReduceFunctor { - static Status Compute(OpKernelContext* context, - const typename TTypes::ConstTensor& in, - const typename TTypes::ConstTensor& weights, - typename TTypes::Tensor& out, - const Tidx num_bins) { + static absl::Status Compute(OpKernelContext* context, + const typename TTypes::ConstTensor& in, + const typename TTypes::ConstTensor& weights, + typename TTypes::Tensor& out, + const Tidx num_bins) { const int num_rows = in.dimension(0); const int num_cols = in.dimension(1); diff --git a/tensorflow/core/kernels/broadcast_to_op.cc b/tensorflow/core/kernels/broadcast_to_op.cc index e354966e744549..e58902ddfccc21 100644 --- a/tensorflow/core/kernels/broadcast_to_op.cc +++ b/tensorflow/core/kernels/broadcast_to_op.cc @@ -149,11 +149,11 @@ TF_CALL_float8_e4m3fn(REGISTER_KERNEL); // registration requires all int32 inputs and outputs to be in host memory. REGISTER_KERNEL_BUILDER(Name("BroadcastTo") .Device(DEVICE_GPU) - .TypeConstraint("T") + .TypeConstraint("T") .HostMemory("input") .HostMemory("shape") .HostMemory("output"), - BroadcastToOp); + BroadcastToOp); #endif #if defined(PLUGGABLE_DEVICE_SUPPORTED_MACOS) REGISTER_KERNEL_BUILDER(Name("BroadcastTo") diff --git a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc index 93df5a624e76bd..d69244d0c67cad 100644 --- a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc +++ b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc @@ -34,18 +34,19 @@ typedef Eigen::GpuDevice GPUDevice; template __global__ void BucketizeCustomKernel( - const int32 size_in, const T* __restrict__ in, const int32 size_boundaries, - GpuDeviceArrayStruct boundaries_array, int32* __restrict__ out) { + const int32_t size_in, const T* __restrict__ in, + const int32_t size_boundaries, GpuDeviceArrayStruct boundaries_array, + int32_t* __restrict__ out) { const float* boundaries = GetGpuDeviceArrayOnDevice(&boundaries_array); GPU_DYNAMIC_SHARED_MEM_DECL(sizeof(float), unsigned char, shared_mem); float* shared_mem_boundaries = reinterpret_cast(shared_mem); if (useSharedMem) { - int32 lidx = threadIdx.y * blockDim.x + threadIdx.x; - int32 blockSize = blockDim.x * blockDim.y; + int32_t lidx = threadIdx.y * blockDim.x + threadIdx.x; + int32_t blockSize = blockDim.x * blockDim.y; - for (int32 i = lidx; i < size_boundaries; i += blockSize) { + for (int32_t i = lidx; i < size_boundaries; i += blockSize) { shared_mem_boundaries[i] = boundaries[i]; } @@ -56,11 +57,11 @@ __global__ void BucketizeCustomKernel( GPU_1D_KERNEL_LOOP(i, size_in) { T value = in[i]; - int32 bucket = 0; - int32 count = size_boundaries; + int32_t bucket = 0; + int32_t count = size_boundaries; while (count > 0) { - int32 l = bucket; - int32 step = count / 2; + int32_t l = bucket; + int32_t step = count / 2; l += step; if (!(value < static_cast(boundaries[l]))) { bucket = ++l; @@ -78,10 +79,10 @@ namespace functor { template struct BucketizeFunctor { // PRECONDITION: boundaries_vector must be sorted. - static Status Compute(OpKernelContext* context, - const typename TTypes::ConstTensor& input, - const std::vector& boundaries_vector, - typename TTypes::Tensor& output) { + static absl::Status Compute(OpKernelContext* context, + const typename TTypes::ConstTensor& input, + const std::vector& boundaries_vector, + typename TTypes::Tensor& output) { const GPUDevice& d = context->eigen_device(); GpuDeviceArrayOnHost boundaries_array(context, @@ -93,8 +94,8 @@ struct BucketizeFunctor { TF_RETURN_IF_ERROR(boundaries_array.Finalize()); GpuLaunchConfig config = GetGpuLaunchConfig(input.size(), d); - int32 shared_mem_size = sizeof(float) * boundaries_vector.size(); - const int32 kMaxSharedMemBytes = 16384; + int32_t shared_mem_size = sizeof(float) * boundaries_vector.size(); + const int32_t kMaxSharedMemBytes = 16384; if (shared_mem_size < d.sharedMemPerBlock() && shared_mem_size < kMaxSharedMemBytes) { TF_CHECK_OK(GpuLaunchKernel(BucketizeCustomKernel, @@ -108,7 +109,7 @@ struct BucketizeFunctor { config.thread_per_block, 0, d.stream(), input.size(), input.data(), boundaries_vector.size(), boundaries_array.data(), output.data())); } - return OkStatus(); + return absl::OkStatus(); } }; } // namespace functor diff --git a/tensorflow/core/kernels/cast_op.cc b/tensorflow/core/kernels/cast_op.cc index 0a4e011815b80d..98e35f138363d5 100644 --- a/tensorflow/core/kernels/cast_op.cc +++ b/tensorflow/core/kernels/cast_op.cc @@ -184,10 +184,10 @@ class GpuCastOp : public CastOpBase { } private: - Status Prepare() { + absl::Status Prepare() { if (external_src_dtype_ == external_dst_dtype_) { work_ = nullptr; // Identity - return OkStatus(); + return absl::OkStatus(); } if (src_dtype_ == DT_BOOL) { work_ = GetGpuCastFromBool(dst_dtype_); @@ -228,7 +228,7 @@ class GpuCastOp : public CastOpBase { } else if (src_dtype_ == DT_UINT4) { work_ = GetGpuCastFromUint4(dst_dtype_); } - return work_ == nullptr ? Unimplemented() : OkStatus(); + return work_ == nullptr ? Unimplemented() : absl::OkStatus(); } }; #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -263,14 +263,14 @@ CURRY_TYPES2(REGISTER_CAST_GPU, std::complex); CURRY_TYPES2(REGISTER_CAST_GPU, std::complex); #else REGISTER_CAST_GPU(bool, bfloat16); -REGISTER_CAST_GPU(int8, bfloat16); -REGISTER_CAST_GPU(int16, bfloat16); -REGISTER_CAST_GPU(int32, bfloat16); -REGISTER_CAST_GPU(int64, bfloat16); -REGISTER_CAST_GPU(uint8, bfloat16); -REGISTER_CAST_GPU(uint16, bfloat16); -REGISTER_CAST_GPU(uint32, bfloat16); -REGISTER_CAST_GPU(uint64, bfloat16); +REGISTER_CAST_GPU(int8_t, bfloat16); +REGISTER_CAST_GPU(int16_t, bfloat16); +REGISTER_CAST_GPU(int32_t, bfloat16); +REGISTER_CAST_GPU(int64_t, bfloat16); +REGISTER_CAST_GPU(uint8_t, bfloat16); +REGISTER_CAST_GPU(uint16_t, bfloat16); +REGISTER_CAST_GPU(uint32_t, bfloat16); +REGISTER_CAST_GPU(uint64_t, bfloat16); REGISTER_CAST_GPU(Eigen::half, bfloat16); REGISTER_CAST_GPU(float, bfloat16); REGISTER_CAST_GPU(double, bfloat16); @@ -301,43 +301,43 @@ REGISTER_CAST_GPU(float8_e4m3fn, float8_e5m2); REGISTER_CAST_GPU(float8_e4m3fn, float8_e4m3fn); REGISTER_CAST_GPU(int4, int4); -REGISTER_CAST_GPU(int4, int8); -REGISTER_CAST_GPU(int4, int16); -REGISTER_CAST_GPU(int4, int32); +REGISTER_CAST_GPU(int4, int8_t); +REGISTER_CAST_GPU(int4, int16_t); +REGISTER_CAST_GPU(int4, int32_t); REGISTER_CAST_GPU(int4, int64_t); REGISTER_CAST_GPU(int4, uint4); -REGISTER_CAST_GPU(int4, uint8); -REGISTER_CAST_GPU(int4, uint16); -REGISTER_CAST_GPU(int4, uint32); +REGISTER_CAST_GPU(int4, uint8_t); +REGISTER_CAST_GPU(int4, uint16_t); +REGISTER_CAST_GPU(int4, uint32_t); REGISTER_CAST_GPU(int4, uint64_t); -REGISTER_CAST_GPU(int8, int4); -REGISTER_CAST_GPU(int16, int4); -REGISTER_CAST_GPU(int32, int4); +REGISTER_CAST_GPU(int8_t, int4); +REGISTER_CAST_GPU(int16_t, int4); +REGISTER_CAST_GPU(int32_t, int4); REGISTER_CAST_GPU(int64_t, int4); REGISTER_CAST_GPU(uint4, int4); -REGISTER_CAST_GPU(uint8, int4); -REGISTER_CAST_GPU(uint16, int4); -REGISTER_CAST_GPU(uint32, int4); +REGISTER_CAST_GPU(uint8_t, int4); +REGISTER_CAST_GPU(uint16_t, int4); +REGISTER_CAST_GPU(uint32_t, int4); REGISTER_CAST_GPU(uint64_t, int4); -REGISTER_CAST_GPU(uint4, int8); -REGISTER_CAST_GPU(uint4, int16); -REGISTER_CAST_GPU(uint4, int32); +REGISTER_CAST_GPU(uint4, int8_t); +REGISTER_CAST_GPU(uint4, int16_t); +REGISTER_CAST_GPU(uint4, int32_t); REGISTER_CAST_GPU(uint4, int64_t); REGISTER_CAST_GPU(uint4, uint4); -REGISTER_CAST_GPU(uint4, uint8); -REGISTER_CAST_GPU(uint4, uint16); -REGISTER_CAST_GPU(uint4, uint32); +REGISTER_CAST_GPU(uint4, uint8_t); +REGISTER_CAST_GPU(uint4, uint16_t); +REGISTER_CAST_GPU(uint4, uint32_t); REGISTER_CAST_GPU(uint4, uint64_t); -REGISTER_CAST_GPU(int8, uint4); -REGISTER_CAST_GPU(int16, uint4); -REGISTER_CAST_GPU(int32, uint4); +REGISTER_CAST_GPU(int8_t, uint4); +REGISTER_CAST_GPU(int16_t, uint4); +REGISTER_CAST_GPU(int32_t, uint4); REGISTER_CAST_GPU(int64_t, uint4); -REGISTER_CAST_GPU(uint8, uint4); -REGISTER_CAST_GPU(uint16, uint4); -REGISTER_CAST_GPU(uint32, uint4); +REGISTER_CAST_GPU(uint8_t, uint4); +REGISTER_CAST_GPU(uint16_t, uint4); +REGISTER_CAST_GPU(uint32_t, uint4); REGISTER_CAST_GPU(uint64_t, uint4); #undef REGISTER_CAST_GPU diff --git a/tensorflow/core/kernels/cast_op_impl_int64.cc b/tensorflow/core/kernels/cast_op_impl_int64.cc index 7963edda7afaca..5f5552edd519ca 100644 --- a/tensorflow/core/kernels/cast_op_impl_int64.cc +++ b/tensorflow/core/kernels/cast_op_impl_int64.cc @@ -38,7 +38,7 @@ CastFunctorType GetCpuCastFromInt64(DataType dst_dtype) { (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) CastFunctorType GetGpuCastFromInt64(DataType dst_dtype) { #if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) - CAST_CASE(GPUDevice, int64, bfloat16); + CAST_CASE(GPUDevice, int64_t, bfloat16); #else CURRY_TYPES3(CAST_CASE, GPUDevice, int64); #endif diff --git a/tensorflow/core/kernels/check_numerics_op_gpu.cu.cc b/tensorflow/core/kernels/check_numerics_op_gpu.cu.cc index b1d2b3954aa91d..31ceecab9a84ee 100644 --- a/tensorflow/core/kernels/check_numerics_op_gpu.cu.cc +++ b/tensorflow/core/kernels/check_numerics_op_gpu.cu.cc @@ -38,10 +38,10 @@ typedef Eigen::GpuDevice GPUDevice; template __global__ void CheckNumericsKernel(const T* __restrict__ data, int size, int abnormal_detected[2]) { - const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; - const int32 total_thread_count = gridDim.x * blockDim.x; + const int32_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + const int32_t total_thread_count = gridDim.x * blockDim.x; - int32 offset = thread_id; + int32_t offset = thread_id; while (offset < size) { if (isnan(data[offset])) { @@ -61,10 +61,10 @@ __global__ void CheckNumericsKernel(const T* __restrict__ data, int size, template __global__ void CheckNumericsKernelV2(const T* __restrict__ data, int size, int abnormal_detected[3]) { - const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; - const int32 total_thread_count = gridDim.x * blockDim.x; + const int32_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + const int32_t total_thread_count = gridDim.x * blockDim.x; - int32 offset = thread_id; + int32_t offset = thread_id; while (offset < size) { if (isnan(data[offset])) { @@ -85,8 +85,8 @@ template struct CheckNumericsLaunch { void Run(const GPUDevice& d, const T* data, int size, int abnormal_detected[2]) { - const int32 block_size = d.maxGpuThreadsPerBlock(); - const int32 num_blocks = + const int32_t block_size = d.maxGpuThreadsPerBlock(); + const int32_t num_blocks = (d.getNumGpuMultiProcessors() * d.maxGpuThreadsPerMultiProcessor()) / block_size; @@ -103,8 +103,8 @@ template struct CheckNumericsLaunchV2 { void Run(const GPUDevice& d, const T* data, int size, int abnormal_detected[3]) { - const int32 block_size = d.maxGpuThreadsPerBlock(); - const int32 num_blocks = + const int32_t block_size = d.maxGpuThreadsPerBlock(); + const int32_t num_blocks = (d.getNumGpuMultiProcessors() * d.maxGpuThreadsPerMultiProcessor()) / block_size; diff --git a/tensorflow/core/kernels/collective_nccl.cc b/tensorflow/core/kernels/collective_nccl.cc index c44680b27124aa..9e69fb36115602 100644 --- a/tensorflow/core/kernels/collective_nccl.cc +++ b/tensorflow/core/kernels/collective_nccl.cc @@ -22,10 +22,11 @@ limitations under the License. namespace tensorflow { -NcclBase::NcclBase(CollectiveType type, const string& name) +NcclBase::NcclBase(CollectiveType type, const std::string& name) : type_(type), name_(name), col_ctx_(nullptr), col_params_(nullptr) {} -Status NcclBase::InitializeCollectiveParams(CollectiveParams* col_params) { +absl::Status NcclBase::InitializeCollectiveParams( + CollectiveParams* col_params) { if (type_ != col_params->instance.type) { return errors::Internal("Expected initialized type ", type_, " to match type in CollectiveParams ", @@ -60,10 +61,10 @@ Status NcclBase::InitializeCollectiveParams(CollectiveParams* col_params) { ", expected name ", expected_name); } - return OkStatus(); + return absl::OkStatus(); } -Status NcclBase::InitializeCollectiveContext( +absl::Status NcclBase::InitializeCollectiveContext( std::shared_ptr col_ctx) { col_ctx_ = col_ctx; col_params_ = col_ctx->col_params.get(); diff --git a/tensorflow/core/kernels/collective_nccl.h b/tensorflow/core/kernels/collective_nccl.h index 4fc4bebb008e3c..26a096fa3f8bb4 100644 --- a/tensorflow/core/kernels/collective_nccl.h +++ b/tensorflow/core/kernels/collective_nccl.h @@ -22,19 +22,20 @@ namespace tensorflow { class NcclBase : public CollectiveImplementationInterface { public: - explicit NcclBase(CollectiveType type, const string& name); + explicit NcclBase(CollectiveType type, const std::string& name); ~NcclBase() override = default; // No-op for this collective implementation. - Status InitializeCollectiveParams(CollectiveParams* col_params) override; + absl::Status InitializeCollectiveParams( + CollectiveParams* col_params) override; // Initializes the device objects and device localities. - Status InitializeCollectiveContext( + absl::Status InitializeCollectiveContext( std::shared_ptr col_ctx) override; protected: const CollectiveType type_; - const string name_; + const std::string name_; std::shared_ptr col_ctx_; const CollectiveParams* col_params_; // Not owned }; diff --git a/tensorflow/core/kernels/collective_nccl_test.cc b/tensorflow/core/kernels/collective_nccl_test.cc index b01eb56b44baca..47cbc6ed4e388b 100644 --- a/tensorflow/core/kernels/collective_nccl_test.cc +++ b/tensorflow/core/kernels/collective_nccl_test.cc @@ -141,7 +141,7 @@ class NcclTestBase : public ::testing::Test { if (VLOG_IS_ON(3)) { string str_buf; for (const auto& x : expected) { - strings::StrAppend(&str_buf, " ", x); + absl::StrAppend(&str_buf, " ", x); } VLOG(3) << "Expected output " << str_buf; } diff --git a/tensorflow/core/kernels/concat_lib_gpu.cc b/tensorflow/core/kernels/concat_lib_gpu.cc index 4237a8a6c8b438..58cdf8afd02485 100644 --- a/tensorflow/core/kernels/concat_lib_gpu.cc +++ b/tensorflow/core/kernels/concat_lib_gpu.cc @@ -74,8 +74,9 @@ void ConcatGPU( inputs_flat, Tensor* output, typename TTypes::Tensor* output_flat) { if (inputs_flat.size() < 16) { - if (output->NumElements() < std::numeric_limits::max()) { - ConcatGPUSlice(c->eigen_gpu_device(), inputs_flat, output_flat); + if (output->NumElements() < std::numeric_limits::max()) { + ConcatGPUSlice(c->eigen_gpu_device(), inputs_flat, + output_flat); } else { ConcatGPUSlice(c->eigen_gpu_device(), inputs_flat, output_flat); @@ -84,8 +85,8 @@ void ConcatGPU( // Switching indexing to int64 might cause performance issues. // Hence, we keep int32 indexing in the GPU kernel unless we need to // switch to int64. - if (output->NumElements() < std::numeric_limits::max()) { - ConcatGPUCall(c, inputs_flat, output_flat); + if (output->NumElements() < std::numeric_limits::max()) { + ConcatGPUCall(c, inputs_flat, output_flat); } else { ConcatGPUCall(c, inputs_flat, output_flat); } diff --git a/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc b/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc index a6cece16d20ddf..58b6957a120f2a 100644 --- a/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc +++ b/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc @@ -126,7 +126,7 @@ void ConcatGPUSlice( Eigen::array size; size[0] = inputs_flat[i]->dimension(0); size[1] = inputs_flat[i]->dimension(1); - if (std::is_same::value) { + if (std::is_same::value) { To32Bit(*output).slice(offset, size).device(gpu_device) = To32Bit(*inputs_flat[i]); } else { @@ -159,7 +159,7 @@ void ConcatGPUImpl(const Eigen::GpuDevice& gpu_device, // on most processors // possibly due to decreasing occupancy // 4096 inputs is a lot, most code will take the smem path - const int32 kMaxSmemBytesPerformance = 16384; + const int32_t kMaxSmemBytesPerformance = 16384; if (smem_usage < smem_max && smem_usage < kMaxSmemBytesPerformance) { TF_CHECK_OK(GpuLaunchKernel( concat_variable_kernel, config.block_count, diff --git a/tensorflow/core/kernels/conv_2d_gpu.h b/tensorflow/core/kernels/conv_2d_gpu.h index 60d2e83194eefa..1afa68d87430ed 100644 --- a/tensorflow/core/kernels/conv_2d_gpu.h +++ b/tensorflow/core/kernels/conv_2d_gpu.h @@ -1046,7 +1046,7 @@ template struct SwapDimension1And2InTensor3 { typedef GPUDevice Device; void operator()(const Device& d, const T* in, - const gtl::ArraySlice& combined_dims, T* out) { + const absl::Span& combined_dims, T* out) { Dimension<3> input_dims = {static_cast(combined_dims[0]), static_cast(combined_dims[1]), static_cast(combined_dims[2])}; @@ -1060,7 +1060,7 @@ template struct SwapDimension0And2InTensor3 { typedef GPUDevice Device; void operator()(const Device& d, const T* in, - const gtl::ArraySlice& combined_dims, T* out) { + const absl::Span& combined_dims, T* out) { Dimension<3> input_dims = {static_cast(combined_dims[0]), static_cast(combined_dims[1]), static_cast(combined_dims[2])}; diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc index 4b647d1e6f5a43..65696babf71ddc 100644 --- a/tensorflow/core/kernels/conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/conv_grad_input_ops.cc @@ -48,7 +48,7 @@ template struct LaunchConv2DBackpropInputOp; // A dummy type to group forward backward data autotune results together. struct ConvBackwardDataAutotuneGroup { - static string name() { return "ConvBwdData"; } + static std::string name() { return "ConvBwdData"; } }; typedef AutotuneSingleton -struct LaunchConv2DBackpropInputOp { +struct LaunchConv2DBackpropInputOp { void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune, const Tensor& out_backprop, const Tensor& filter, int row_dilation, int col_dilation, int row_stride, int col_stride, const Padding& padding, const std::vector& explicit_paddings, Tensor* in_backprop, TensorFormat data_format) { - LaunchConv2DBackpropInputOpImpl launcher; + LaunchConv2DBackpropInputOpImpl launcher; launcher(ctx, use_cudnn, cudnn_use_autotune, out_backprop, filter, row_dilation, col_dilation, row_stride, col_stride, padding, explicit_paddings, in_backprop, data_format); @@ -85,8 +85,8 @@ void LaunchConv2DBackpropInputOpGpuImpl( using se::dnn::AlgorithmDesc; using se::dnn::ProfileResult; - std::vector strides(4, 1); - std::vector dilations(4, 1); + std::vector strides(4, 1); + std::vector dilations(4, 1); auto input_h = GetTensorDimIndex(data_format, 'H'); auto input_w = GetTensorDimIndex(data_format, 'W'); strides[input_h] = row_stride; @@ -147,10 +147,10 @@ void LaunchConv2DBackpropInputOpGpuImpl( dims.spatial_dims[0].stride == 1 && dims.spatial_dims[1].stride == 1 && data_format == FORMAT_NHWC && (padding == VALID || padding == SAME)) { // 1x1 filter, so call cublas directly. - const uint64 m = dims.batch_size * dims.spatial_dims[0].input_size * - dims.spatial_dims[1].input_size; - const uint64 k = dims.out_depth; - const uint64 n = dims.in_depth; + const uint64_t m = dims.batch_size * dims.spatial_dims[0].input_size * + dims.spatial_dims[1].input_size; + const uint64_t k = dims.out_depth; + const uint64_t n = dims.in_depth; auto a_ptr = AsDeviceMemory(out_backprop.template flat().data(), out_backprop.template flat().size()); @@ -175,10 +175,10 @@ void LaunchConv2DBackpropInputOpGpuImpl( data_format == FORMAT_NHWC) { // The input data and filter have the same height/width, and we are not // using grouped convolution, so call cublas directly. - const uint64 m = dims.batch_size; - const uint64 k = dims.out_depth; - const uint64 n = dims.spatial_dims[0].input_size * - dims.spatial_dims[1].input_size * dims.in_depth; + const uint64_t m = dims.batch_size; + const uint64_t k = dims.out_depth; + const uint64_t n = dims.spatial_dims[0].input_size * + dims.spatial_dims[1].input_size * dims.in_depth; auto a_ptr = AsDeviceMemory(out_backprop.template flat().data(), out_backprop.template flat().size()); @@ -282,7 +282,8 @@ void LaunchConv2DBackpropInputOpGpuImpl( // (2) NHWC -> OHWI Tensor transformed_filter; - const auto transform_filter = [&](FilterTensorFormat dst_format) -> Status { + const auto transform_filter = + [&](FilterTensorFormat dst_format) -> absl::Status { VLOG(4) << "Transform filter tensor from " << ToString(FORMAT_HWIO) << " to " << ToString(dst_format); @@ -300,7 +301,7 @@ void LaunchConv2DBackpropInputOpGpuImpl( To32Bit(filter.tensor()), To32Bit(transformed_filter.tensor())); - return OkStatus(); + return absl::OkStatus(); }; if (compute_data_format == FORMAT_NCHW) { @@ -394,7 +395,7 @@ void LaunchConv2DBackpropInputOpGpuImpl( auto autotune_entry = std::move(entry_or).value(); DnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize, ctx); - Status cudnn_launch_status = + absl::Status cudnn_launch_status = LaunchAutotunedConv(autotune_entry, &scratch_allocator, se::dnn::ConvolutionKind::BACKWARD_DATA, stream, input_desc, in_backprop_ptr, filter_desc, filter_ptr, @@ -534,23 +535,23 @@ DECLARE_GPU_SPEC(double); #undef DECLARE_GPU_SPEC template <> -void SpatialConvolutionBackwardInputFunc::operator()( - const GPUDevice&, typename TTypes::Tensor, - typename TTypes::ConstTensor, - typename TTypes::ConstTensor, Eigen::DenseIndex, +void SpatialConvolutionBackwardInputFunc::operator()( + const GPUDevice&, typename TTypes::Tensor, + typename TTypes::ConstTensor, + typename TTypes::ConstTensor, Eigen::DenseIndex, Eigen::DenseIndex, Eigen::DenseIndex, Eigen::DenseIndex); extern template struct SpatialConvolutionBackwardInputFunc; template <> void SpatialConvolutionBackwardInputWithExplicitPaddingFunc< - GPUDevice, int32>::operator()(const GPUDevice&, - typename TTypes::Tensor, - typename TTypes::ConstTensor, - typename TTypes::ConstTensor, - Eigen::DenseIndex, Eigen::DenseIndex, - Eigen::DenseIndex, Eigen::DenseIndex, - Eigen::DenseIndex, Eigen::DenseIndex, - Eigen::DenseIndex, Eigen::DenseIndex); + GPUDevice, int32_t>::operator()(const GPUDevice&, + typename TTypes::Tensor, + typename TTypes::ConstTensor, + typename TTypes::ConstTensor, + Eigen::DenseIndex, Eigen::DenseIndex, + Eigen::DenseIndex, Eigen::DenseIndex, + Eigen::DenseIndex, Eigen::DenseIndex, + Eigen::DenseIndex, Eigen::DenseIndex); extern template struct SpatialConvolutionBackwardInputWithExplicitPaddingFunc< GPUDevice, int32>; @@ -578,9 +579,9 @@ REGISTER_KERNEL_BUILDER(Name("Conv2DBackpropInput") Conv2DBackpropInputOp); REGISTER_KERNEL_BUILDER(Name("Conv2DBackpropInput") .Device(DEVICE_GPU) - .TypeConstraint("T") + .TypeConstraint("T") .HostMemory("input_sizes"), - Conv2DBackpropInputOp); + Conv2DBackpropInputOp); // To be used inside depthwise_conv_grad_op.cc. // TODO(reedwm): Move this and the definition to depthwise_conv_grad_op.cc. diff --git a/tensorflow/core/kernels/conv_grad_shape_utils.cc b/tensorflow/core/kernels/conv_grad_shape_utils.cc index 42e114ad33581d..a7e53647b72bf9 100644 --- a/tensorflow/core/kernels/conv_grad_shape_utils.cc +++ b/tensorflow/core/kernels/conv_grad_shape_utils.cc @@ -53,10 +53,10 @@ namespace { absl::Status ConvBackpropExtractAndVerifyDimension( absl::string_view label, const TensorShape& input_shape, const TensorShape& filter_shape, const TensorShape& output_shape, - const absl::Span dilations, const std::vector& strides, - Padding padding, int64_t padding_before, int64_t padding_after, - int spatial_dim, int filter_spatial_dim, - ConvBackpropSpatialDimension* dim) { + const absl::Span dilations, + const std::vector& strides, Padding padding, + int64_t padding_before, int64_t padding_after, int spatial_dim, + int filter_spatial_dim, ConvBackpropSpatialDimension* dim) { dim->input_size = input_shape.dim_size(spatial_dim); dim->filter_size = filter_shape.dim_size(filter_spatial_dim); dim->output_size = output_shape.dim_size(spatial_dim); @@ -96,9 +96,10 @@ absl::Status ConvBackpropComputeDimensionsV2( absl::string_view label, int num_spatial_dims, const TensorShape& input_shape, const TensorShape& filter_shape, const TensorShape& out_backprop_shape, - const absl::Span dilations, const std::vector& strides, - Padding padding, absl::Span explicit_paddings, - TensorFormat data_format, ConvBackpropDimensions* dims) { + const absl::Span dilations, + const std::vector& strides, Padding padding, + absl::Span explicit_paddings, TensorFormat data_format, + ConvBackpropDimensions* dims) { // The + 2 in the following line is for the batch and feature dimensions. const int num_dims = num_spatial_dims + 2; if (input_shape.dims() != num_dims) { @@ -161,9 +162,9 @@ absl::Status ConvBackpropComputeDimensionsV2( absl::Status ConvBackpropComputeDimensions( absl::string_view label, int num_spatial_dims, const TensorShape& input_shape, const TensorShape& filter_shape, - const TensorShape& out_backprop_shape, const std::vector& strides, + const TensorShape& out_backprop_shape, const std::vector& strides, Padding padding, TensorFormat data_format, ConvBackpropDimensions* dims) { - static constexpr std::array one_dilations = {{1, 1, 1, 1, 1}}; + static constexpr std::array one_dilations = {{1, 1, 1, 1, 1}}; return ConvBackpropComputeDimensionsV2( label, num_spatial_dims, input_shape, filter_shape, out_backprop_shape, one_dilations, strides, padding, /*explicit_paddings=*/{}, data_format, @@ -181,13 +182,13 @@ absl::Status Conv2DBackpropComputeInputShape( } if (input_sizes.dim_size(0) == 4) { - return TensorShapeUtils::MakeShape(input_sizes.vec(), input_shape); + return TensorShapeUtils::MakeShape(input_sizes.vec(), input_shape); } if (input_sizes.dim_size(0) == 2) { const int batch_size = GetTensorDim(out_backprop_shape, data_format, 'N'); - const int output_height = input_sizes.vec()(0); - const int output_width = input_sizes.vec()(1); + const int output_height = input_sizes.vec()(0); + const int output_width = input_sizes.vec()(1); const int output_depth = filter_shape.dim_size(2); if (output_height < 0 || output_width < 0) { return errors::InvalidArgument( diff --git a/tensorflow/core/kernels/conv_grad_shape_utils.h b/tensorflow/core/kernels/conv_grad_shape_utils.h index d83c1bb25ee02f..cc0708c4fe4f74 100644 --- a/tensorflow/core/kernels/conv_grad_shape_utils.h +++ b/tensorflow/core/kernels/conv_grad_shape_utils.h @@ -69,7 +69,7 @@ struct ConvBackpropDimensions { absl::Status ConvBackpropComputeDimensions( absl::string_view label, int num_spatial_dims, const TensorShape& input_shape, const TensorShape& filter_shape, - const TensorShape& out_backprop_shape, const std::vector& strides, + const TensorShape& out_backprop_shape, const std::vector& strides, Padding padding, TensorFormat data_format, ConvBackpropDimensions* dims); // The V2 version computes the same outputs with arbitrary dilation rate and @@ -78,8 +78,8 @@ absl::Status ConvBackpropComputeDimensions( absl::Status ConvBackpropComputeDimensionsV2( absl::string_view label, int num_spatial_dims, const TensorShape& input_shape, const TensorShape& filter_shape, - const TensorShape& out_backprop_shape, absl::Span dilations, - const std::vector& strides, Padding padding, + const TensorShape& out_backprop_shape, absl::Span dilations, + const std::vector& strides, Padding padding, absl::Span explicit_paddings, TensorFormat data_format, ConvBackpropDimensions* dims); diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc index c604f3bf4bbc4d..8a9c8e8aa8a132 100644 --- a/tensorflow/core/kernels/conv_ops.cc +++ b/tensorflow/core/kernels/conv_ops.cc @@ -58,7 +58,7 @@ absl::Status InitConv2DParameters(const OpKernelConstruction* context, TF_RETURN_IF_ERROR( context->GetAttr("explicit_paddings", ¶ms->explicit_paddings)); } - string data_format_string; + std::string data_format_string; TF_RETURN_IF_ERROR(context->GetAttr("data_format", &data_format_string)); TF_REQUIRES(FormatFromString(data_format_string, ¶ms->data_format), errors::InvalidArgument("Invalid data format")); diff --git a/tensorflow/core/kernels/conv_ops.h b/tensorflow/core/kernels/conv_ops.h index 65c63fec1e439f..199cd94c99cbaa 100644 --- a/tensorflow/core/kernels/conv_ops.h +++ b/tensorflow/core/kernels/conv_ops.h @@ -45,8 +45,8 @@ template struct LaunchConvOp { void operator()(OpKernelContext* context, bool cudnn_use_autotune, const Tensor& input, const Tensor& filter, - const std::vector& dilations, - const std::vector& strides, Padding padding, + const std::vector& dilations, + const std::vector& strides, Padding padding, const std::vector& explicit_paddings, TensorFormat data_format, Tensor* output); }; @@ -85,13 +85,13 @@ struct Im2ColBufferResource : public ResourceBase { // the buffer memory held by this resource. mutex mu; T* data; - string DebugString() const { return "Im2ColBufferResource"; } + std::string DebugString() const { return "Im2ColBufferResource"; } }; // Convolution parameters specified by Op attributes. struct Conv2DParameters { - std::vector dilations; - std::vector strides; + std::vector dilations; + std::vector strides; Padding padding; TensorFormat data_format; std::vector explicit_paddings; diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc index 72bad756b4d0fd..00c02ccd51c711 100644 --- a/tensorflow/core/kernels/conv_ops_3d.cc +++ b/tensorflow/core/kernels/conv_ops_3d.cc @@ -65,7 +65,7 @@ template class Conv3DOp : public BinaryOp { public: explicit Conv3DOp(OpKernelConstruction* context) : BinaryOp(context) { - string data_format; + std::string data_format; OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); OP_REQUIRES(context, FormatFromString(data_format, &data_format_), errors::InvalidArgument("Invalid data format")); @@ -175,8 +175,8 @@ class Conv3DOp : public BinaryOp { } private: - std::vector dilation_; - std::vector stride_; + std::vector dilation_; + std::vector stride_; Padding padding_; TensorFormat data_format_; bool cudnn_use_autotune_; diff --git a/tensorflow/core/kernels/conv_ops_benchmark_test.cc b/tensorflow/core/kernels/conv_ops_benchmark_test.cc index 183372705aa3df..779fbb7a50bcd6 100644 --- a/tensorflow/core/kernels/conv_ops_benchmark_test.cc +++ b/tensorflow/core/kernels/conv_ops_benchmark_test.cc @@ -189,7 +189,7 @@ static int64_t Conv2DWithPostOpsFlops(int batch, int height, int width, template static Conv2DWithBiasAndActivationGraph Conv2DWithBiasAndActivation( int batch, int height, int width, int in_depth, int filter_w, int filter_h, - int out_depth, const string& activation_type, + int out_depth, const std::string& activation_type, TensorFormat data_format = FORMAT_NHWC) { Conv2DWithBiasGraph conv_graph = Conv2DWithBias(batch, height, width, in_depth, filter_w, filter_h, @@ -249,7 +249,7 @@ static Conv2DWithBatchNormGraph Conv2DWithBatchNorm( template static Conv2DWithBatchNormAndActivationGraph Conv2DWithBatchNormAndActivation( int batch, int height, int width, int in_depth, int filter_w, int filter_h, - int out_depth, const string& activation_type, + int out_depth, const std::string& activation_type, TensorFormat data_format = FORMAT_NHWC) { Conv2DWithBatchNormGraph conv_graph = Conv2DWithBatchNorm(batch, height, width, in_depth, filter_w, filter_h, @@ -271,11 +271,10 @@ static Conv2DWithBatchNormAndActivationGraph Conv2DWithBatchNormAndActivation( // Creates a tensorflow graph with a single FusedConv2D (with BiasAdd) node and // fuses into it additional computations (e.g. Relu). template -static Graph* FusedConv2DWithBias(int batch, int height, int width, - int in_depth, int filter_w, int filter_h, - int out_depth, - const std::vector& fused_ops = {}, - TensorFormat data_format = FORMAT_NHWC) { +static Graph* FusedConv2DWithBias( + int batch, int height, int width, int in_depth, int filter_w, int filter_h, + int out_depth, const std::vector& fused_ops = {}, + TensorFormat data_format = FORMAT_NHWC) { Graph* graph = new Graph(OpRegistry::Global()); Tensor images_t = data_format == FORMAT_NHWC @@ -341,7 +340,7 @@ static Graph* FusedConv2DWithBias(int batch, int height, int width, template static Graph* FusedConv2DWithBatchNorm( int batch, int height, int width, int in_depth, int filter_w, int filter_h, - int out_depth, const std::vector& fused_ops = {}, + int out_depth, const std::vector& fused_ops = {}, TensorFormat data_format = FORMAT_NHWC) { Graph* graph = new Graph(OpRegistry::Global()); diff --git a/tensorflow/core/kernels/conv_ops_bfloat16.cc b/tensorflow/core/kernels/conv_ops_bfloat16.cc index 37507841647f0b..d2b9bc71b5d3a3 100644 --- a/tensorflow/core/kernels/conv_ops_bfloat16.cc +++ b/tensorflow/core/kernels/conv_ops_bfloat16.cc @@ -110,8 +110,8 @@ void LaunchConvOp::operator()( Tensor* output) { // Get spatial dims for dilations and strides. int spatial_dims = input.dims() - 2; - gtl::InlinedVector strides_spatial(spatial_dims); - gtl::InlinedVector dilations_spatial(spatial_dims); + absl::InlinedVector strides_spatial(spatial_dims); + absl::InlinedVector dilations_spatial(spatial_dims); for (int i = 0; i < spatial_dims; ++i) { strides_spatial[i] = GetTensorDim(strides, data_format, static_cast(i + '0')); @@ -166,9 +166,9 @@ void LaunchConv2DOp::operator()( const std::vector& explicit_paddings, Tensor* output, TensorFormat data_format) { // Cast strides and dilations. - gtl::InlinedVector casted_strides = {row_stride, col_stride}; - gtl::InlinedVector casted_dilations = {row_dilation, - col_dilation}; + absl::InlinedVector casted_strides = {row_stride, col_stride}; + absl::InlinedVector casted_dilations = {row_dilation, + col_dilation}; auto* stream = ctx->op_device_context()->stream(); const bool cast_to_float = !IsBF16SupportedInOps(stream); diff --git a/tensorflow/core/kernels/conv_ops_fused_image_transform.cc b/tensorflow/core/kernels/conv_ops_fused_image_transform.cc index 8887103240c9d7..ef031685c4093e 100644 --- a/tensorflow/core/kernels/conv_ops_fused_image_transform.cc +++ b/tensorflow/core/kernels/conv_ops_fused_image_transform.cc @@ -711,7 +711,7 @@ class FusedResizeConv2DUsingGemmOp : public OpKernel { // Compute the shape of the output tensor, and allocate it. TensorShape padded_shape; - TTypes::ConstMatrix paddings_matrix = paddings.matrix(); + TTypes::ConstMatrix paddings_matrix = paddings.matrix(); for (int d = 0; d < dims; ++d) { const int32_t before = paddings_matrix(d, 0); // Pad before existing elements. @@ -867,7 +867,7 @@ class FusedResizeConv2DUsingGemmOp : public OpKernel { } private: - std::vector strides_; + std::vector strides_; Padding padding_; bool align_corners_; int offset_; diff --git a/tensorflow/core/kernels/conv_ops_fused_impl.h b/tensorflow/core/kernels/conv_ops_fused_impl.h index 51a33288c8e8bb..154f43a226cfdb 100644 --- a/tensorflow/core/kernels/conv_ops_fused_impl.h +++ b/tensorflow/core/kernels/conv_ops_fused_impl.h @@ -307,7 +307,7 @@ struct LaunchFusedConv2DOp { }; template <> -struct LaunchFusedConv2DOp; +struct LaunchFusedConv2DOp; template <> struct LaunchFusedConv2DOp; @@ -732,7 +732,7 @@ class FusedConv2DOp : public OpKernel { // convolution with BiasAdd, but in practice it doesn't work, cuDNN ignores // this parameter and always does Relu activation. if (std::is_same::value) { - if (std::is_same::value || std::is_same::value) { + if (std::is_same::value || std::is_same::value) { patterns = {{FCT::kBiasAdd, {"BiasAdd"}}, {FCT::kBiasAddWithRelu, {"BiasAdd", "Relu"}}}; } else { diff --git a/tensorflow/core/kernels/conv_ops_fused_int8.cc b/tensorflow/core/kernels/conv_ops_fused_int8.cc index 7f919d5087dbbe..e23864960c1568 100644 --- a/tensorflow/core/kernels/conv_ops_fused_int8.cc +++ b/tensorflow/core/kernels/conv_ops_fused_int8.cc @@ -300,9 +300,8 @@ struct LaunchFusedConv2DOpCpuInt8Helper { }; template <> -struct LaunchFusedConv2DOp - : LaunchFusedConv2DOpCpuInt8Helper { -}; +struct LaunchFusedConv2DOp + : LaunchFusedConv2DOpCpuInt8Helper {}; template <> struct LaunchFusedConv2DOp diff --git a/tensorflow/core/kernels/conv_ops_gpu.cc b/tensorflow/core/kernels/conv_ops_gpu.cc index d781f26094e583..1570c1dc0d00e9 100644 --- a/tensorflow/core/kernels/conv_ops_gpu.cc +++ b/tensorflow/core/kernels/conv_ops_gpu.cc @@ -88,7 +88,7 @@ StatusOr> AutotuneFusedConv( auto* stream = ctx->op_device_context()->stream(); if (!autotune_map->Find(params, &autotune_entry)) { - profiler::ScopedAnnotation trace("cudnn_autotuning"); + tsl::profiler::ScopedAnnotation trace("cudnn_autotuning"); se::TfAllocatorAdapter tf_allocator_adapter(ctx->device()->GetAllocator({}), stream); @@ -253,7 +253,7 @@ StatusOr> AutotuneUnfusedConv( auto* stream = ctx->op_device_context()->stream(); if (!autotune_map->Find(conv_parameters, &autotune_entry)) { - profiler::ScopedAnnotation annotation("cudnn_autotuning"); + tsl::profiler::ScopedAnnotation annotation("cudnn_autotuning"); #if GOOGLE_CUDA se::TfAllocatorAdapter tf_allocator_adapter(ctx->device()->GetAllocator({}), diff --git a/tensorflow/core/kernels/conv_ops_gpu.h b/tensorflow/core/kernels/conv_ops_gpu.h index faf028935d3dd8..74274977e10897 100644 --- a/tensorflow/core/kernels/conv_ops_gpu.h +++ b/tensorflow/core/kernels/conv_ops_gpu.h @@ -71,18 +71,18 @@ class DnnScratchAllocator : public se::ScratchAllocator { DnnScratchAllocator(int64_t memory_limit, OpKernelContext* context) : memory_limit_(memory_limit), total_byte_size_(0), context_(context) {} int64 GetMemoryLimitInBytes() override { return memory_limit_; } - tsl::StatusOr> AllocateBytes( + absl::StatusOr> AllocateBytes( int64_t byte_size) override { Tensor temporary_memory; if (byte_size < 0) { - return tsl::Status{absl::StatusCode::kInvalidArgument, - "Requested negative byte size!"}; + return absl::Status{absl::StatusCode::kInvalidArgument, + "Requested negative byte size!"}; } if (byte_size > memory_limit_) { - return tsl::Status{absl::StatusCode::kUnavailable, - absl::StrCat("Requested memory size (", byte_size, - ") exceeds the max memory limit (", - memory_limit_, ").")}; + return absl::Status{absl::StatusCode::kUnavailable, + absl::StrCat("Requested memory size (", byte_size, + ") exceeds the max memory limit (", + memory_limit_, ").")}; } AllocationAttributes allocation_attr; allocation_attr.retry_on_failure = false; @@ -90,7 +90,7 @@ class DnnScratchAllocator : public se::ScratchAllocator { DT_UINT8, TensorShape({byte_size}), &temporary_memory, AllocatorAttributes(), allocation_attr)); if (!allocation_status.ok()) { - return tsl::Status{ + return absl::Status{ absl::StatusCode::kUnavailable, absl::StrCat("Failed to allocate the requested memory size (", byte_size, ").")}; @@ -99,7 +99,7 @@ class DnnScratchAllocator : public se::ScratchAllocator { // allocator. allocated_tensors_.push_back(temporary_memory); total_byte_size_ += byte_size; - return tsl::StatusOr>( + return absl::StatusOr>( AsDeviceMemory(temporary_memory.flat().data(), temporary_memory.flat().size())); } @@ -118,7 +118,8 @@ typedef Eigen::GpuDevice GPUDevice; // autotuning with a cache, or by falling back to a default if // 'cudnn_use_autotune' is true and cuDNN is the statically-chosen DNN backend. template -StatusOr> AutotuneFusedConv( +absl::StatusOr> +AutotuneFusedConv( bool cudnn_use_autotune, AutotuneMap>* autotune_map, @@ -135,7 +136,7 @@ StatusOr> AutotuneFusedConv( se::DeviceMemory side_input_ptr, int64_t scratch_size); template -StatusOr> AutotuneUnfusedConv( +absl::StatusOr> AutotuneUnfusedConv( bool cudnn_use_autotune, AutotuneMap>* autotune_map, const ConvParameters& conv_parameters, OpKernelContext* ctx, @@ -158,7 +159,7 @@ AllocateScratchOrFallback(se::ScratchAllocator* scratch_allocator, auto workspace_size = selected_runner->GetWorkspaceSize(); - se::DeviceMemoryBase scratch_memory; + stream_executor::DeviceAddressBase scratch_memory; if (workspace_size > 0) { auto scratch_or = scratch_allocator->AllocateBytes(workspace_size); if (scratch_or.ok()) { @@ -209,9 +210,10 @@ Status LaunchAutotunedConv(const AutotuneEntry& autotune_entry, AllocateScratchOrFallback( scratch_allocator, primary, no_scratch_fallback)); auto& runner = *std::get(runner_and_scratch); - return runner(stream, nullptr, - std::get(runner_and_scratch), in_ptr, - filter_ptr, out_ptr); + return runner( + stream, nullptr, + std::get(runner_and_scratch), + in_ptr, filter_ptr, out_ptr); } else { auto dnn = stream->parent()->AsDnn(); if (dnn == nullptr) { @@ -234,7 +236,7 @@ Status LaunchAutotunedConv(const AutotuneEntry& autotune_entry, std::unique_ptr runner = std::move(runner_or).value(); - se::DeviceMemoryBase scratch_memory; + stream_executor::DeviceAddressBase scratch_memory; int64_t workspace_size = runner->GetWorkspaceSize(); if (workspace_size > 0) { auto scratch_or = scratch_allocator->AllocateBytes(workspace_size); diff --git a/tensorflow/core/kernels/conv_ops_impl.h b/tensorflow/core/kernels/conv_ops_impl.h index 0d3fc798bbe3c2..3d5a0ac76e5b5b 100644 --- a/tensorflow/core/kernels/conv_ops_impl.h +++ b/tensorflow/core/kernels/conv_ops_impl.h @@ -178,13 +178,13 @@ struct LaunchGrouped { std::array shuffle({3, 0, 1, 2, 4}); // Compute pre shuffle dimemnsions. - auto pre_shuffle = [&](const Tensor& tensor) -> std::array { + auto pre_shuffle = [&](const Tensor& tensor) -> std::array { return {tensor.dim_size(0), tensor.dim_size(1), tensor.dim_size(2), num_groups, tensor.dim_size(3) / num_groups}; }; // Compute post shuffle dimemnsions. - auto post_shuffle = [&](const Tensor& tensor) -> std::array { + auto post_shuffle = [&](const Tensor& tensor) -> std::array { return {num_groups, tensor.dim_size(0), tensor.dim_size(1), tensor.dim_size(2), tensor.dim_size(3) / num_groups}; }; @@ -262,8 +262,8 @@ template struct LaunchConvOp { void operator()(OpKernelContext* context, bool cudnn_use_autotune, const Tensor& input, const Tensor& filter, - const std::vector& dilations, - const std::vector& strides, const Padding padding, + const std::vector& dilations, + const std::vector& strides, const Padding padding, const std::vector& explicit_paddings, TensorFormat data_format, Tensor* output) { // For now just calling existing launchers based on spatial dimensions. @@ -292,7 +292,7 @@ class ConvOp : public BinaryOp { OP_REQUIRES(context, groups_ == 1, absl::UnimplementedError( "Grouped/Depthwise Convolutions are not supported yet.")); - string data_format_str; + std::string data_format_str; OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format_str)); OP_REQUIRES(context, data_format_str == "CHANNELS_LAST" || diff --git a/tensorflow/core/kernels/conv_ops_int32.cc b/tensorflow/core/kernels/conv_ops_int32.cc index 46320bded04997..a582aeb4b7277c 100644 --- a/tensorflow/core/kernels/conv_ops_int32.cc +++ b/tensorflow/core/kernels/conv_ops_int32.cc @@ -30,12 +30,12 @@ template struct Conv2DOp; // CPU implementation, don't register this EigenTensor-based version. #if !defined(USE_GEMM_FOR_CONV) REGISTER_KERNEL_BUILDER( - Name("Conv2D").Device(DEVICE_CPU).TypeConstraint("T"), - Conv2DOp); + Name("Conv2D").Device(DEVICE_CPU).TypeConstraint("T"), + Conv2DOp); #endif // USE_GEMM_FOR_CONV REGISTER_KERNEL_BUILDER( - Name("Conv").Device(DEVICE_CPU).TypeConstraint("T"), - ConvOp); + Name("Conv").Device(DEVICE_CPU).TypeConstraint("T"), + ConvOp); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM template <> diff --git a/tensorflow/core/kernels/conv_ops_test.cc b/tensorflow/core/kernels/conv_ops_test.cc index 929d5cb51b4c08..caff583b570092 100644 --- a/tensorflow/core/kernels/conv_ops_test.cc +++ b/tensorflow/core/kernels/conv_ops_test.cc @@ -86,8 +86,9 @@ class FusedResizePadConvOpTest : public OpsTestBase { const int right_padding = 0; AddInputFromArray(image.shape(), image.flat()); - AddInputFromArray(TensorShape({2}), {resized_height, resized_width}); - AddInputFromArray( + AddInputFromArray(TensorShape({2}), + {resized_height, resized_width}); + AddInputFromArray( TensorShape({4, 2}), {0, 0, top_padding, bottom_padding, left_padding, right_padding, 0, 0}); AddInputFromArray(filter.shape(), filter.flat()); @@ -128,8 +129,8 @@ class FusedResizePadConvOpTest : public OpsTestBase { int resize_height, int y_padding, int x_padding, int filter_size, int filter_count, bool resize_align_corners, - const string& pad_mode, int stride, - const string& padding, DataType dtype) { + const std::string& pad_mode, int stride, + const std::string& padding, DataType dtype) { Scope root = tensorflow::Scope::NewRootScope(); using namespace ::tensorflow::ops; // NOLINT(build/namespaces) @@ -188,8 +189,9 @@ class FusedResizePadConvOpTest : public OpsTestBase { void CompareFusedPadOnlyAndSeparate(int input_width, int input_height, int input_depth, int y_padding, int x_padding, int filter_size, - int filter_count, const string& pad_mode, - int stride, const string& padding, + int filter_count, + const std::string& pad_mode, int stride, + const std::string& padding, DataType dtype) { Scope root = tensorflow::Scope::NewRootScope(); using namespace ::tensorflow::ops; // NOLINT(build/namespaces) @@ -488,7 +490,7 @@ class FusedConv2DOpTest : public OpsTestBase { static constexpr int kImageBatchCount = 8; static constexpr bool kIsInt8 = - std::is_same::value || std::is_same::value; + std::is_same::value || std::is_same::value; using BiasAddGraphRunner = std::function& explicit_paddings, Tensor* output, bool allow_gpu_device = false, int stride = 1) { Scope root = tensorflow::Scope::NewRootScope(); @@ -780,7 +782,7 @@ class FusedConv2DOpTest : public OpsTestBase { TensorShape shape = arg_data.shape(); Tensor arg_data_float = Tensor(dtype_args, shape); for (int index = 0; index < arg_data.NumElements(); index++) { - int8 v = *(reinterpret_cast(arg_data.data()) + index); + int8_t v = *(reinterpret_cast(arg_data.data()) + index); *(reinterpret_cast(arg_data_float.data()) + index) = static_cast(v); } @@ -886,7 +888,7 @@ class FusedConv2DOpTest : public OpsTestBase { void ExpectMatch(const Tensor& x, const Tensor& y, double atol) { constexpr bool exact_match = - std::is_same::value || std::is_same::value; + std::is_same::value || std::is_same::value; if (exact_match) { test::ExpectEqual(x, y); } else { @@ -903,7 +905,7 @@ class FusedConv2DOpTest : public OpsTestBase { constexpr int int8_scale = 80; - using ConvT = typename std::conditional::type; + using ConvT = typename std::conditional::type; DataType dtype_conv = DataTypeToEnum::v(); TensorShape image_shape{image_batch_count, image_height, image_width, @@ -1120,7 +1122,7 @@ class FusedConv2DOpTest : public OpsTestBase { // Verifies that computing Conv2D+FusedBatchNorm+{Activation} in a graph is // identical to FusedConv2D. void VerifyConv2DWithBatchNormAndActivation( - const string& activation, int filter_size, int filter_count, + const std::string& activation, int filter_size, int filter_count, const std::vector& explicit_paddings = {}, int depth = kDepth, int image_width = kImageWidth, int image_height = kImageHeight, int image_batch_count = kImageBatchCount) { @@ -1353,7 +1355,7 @@ REGISTER_TYPED_TEST_SUITE_P(FusedConv2DWithBatchNormOpTest, // SpatialConvolutionAndActivation); #endif -using FusedBiasAddDataTypes = ::testing::Types; +using FusedBiasAddDataTypes = ::testing::Types; INSTANTIATE_TYPED_TEST_SUITE_P(Test, FusedConv2DWithBiasOpTest, FusedBiasAddDataTypes); diff --git a/tensorflow/core/kernels/conv_ops_using_gemm.cc b/tensorflow/core/kernels/conv_ops_using_gemm.cc index 3ebd3a4fa76d93..531b6377b2ff64 100644 --- a/tensorflow/core/kernels/conv_ops_using_gemm.cc +++ b/tensorflow/core/kernels/conv_ops_using_gemm.cc @@ -433,7 +433,7 @@ class Conv2DUsingGemmOp : public BinaryOp { explicit Conv2DUsingGemmOp(OpKernelConstruction* context) : BinaryOp(context) { OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_)); - string data_format; + std::string data_format; OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); OP_REQUIRES(context, FormatFromString(data_format, &data_format_), errors::InvalidArgument("Invalid data format")); @@ -557,7 +557,7 @@ class Conv2DUsingGemmOp : public BinaryOp { } private: - std::vector strides_; + std::vector strides_; Padding padding_; TensorFormat data_format_; diff --git a/tensorflow/core/kernels/count_up_to_op.cc b/tensorflow/core/kernels/count_up_to_op.cc index 5abc17a8aa2aaf..fe0709186c6809 100644 --- a/tensorflow/core/kernels/count_up_to_op.cc +++ b/tensorflow/core/kernels/count_up_to_op.cc @@ -102,7 +102,7 @@ class ResourceCountUpToOp : public OpKernel { Name("ResourceCountUpTo").TypeConstraint("T").Device(DEVICE_CPU), \ ResourceCountUpToOp) -REGISTER(int32); +REGISTER(int32_t); REGISTER(int64_t); #undef REGISTER diff --git a/tensorflow/core/kernels/ctc_decoder_ops.cc b/tensorflow/core/kernels/ctc_decoder_ops.cc index 401f1572298d9b..7c6d9132dd2142 100644 --- a/tensorflow/core/kernels/ctc_decoder_ops.cc +++ b/tensorflow/core/kernels/ctc_decoder_ops.cc @@ -91,7 +91,7 @@ class CTCDecodeHelper { " batch_size: ", batch_size); } - auto seq_len_t = (*seq_len)->vec(); + auto seq_len_t = (*seq_len)->vec(); for (int b = 0; b < batch_size; ++b) { if (!(seq_len_t(b) <= max_time)) { @@ -220,7 +220,7 @@ class CTCGreedyDecoderOp : public OpKernel { input_list_t.emplace_back(inputs_t.data() + t * batch_size * num_classes, batch_size, num_classes); } - auto seq_len_t = seq_len->vec(); + auto seq_len_t = seq_len->vec(); auto log_prob_t = log_prob->matrix(); log_prob_t.setZero(); @@ -309,7 +309,7 @@ class CTCBeamSearchDecoderOp : public OpKernel { &decoded_values, &decoded_shape)); auto inputs_t = inputs->tensor(); - auto seq_len_t = seq_len->vec(); + auto seq_len_t = seq_len->vec(); auto log_prob_t = log_prob->matrix(); const TensorShape& inputs_shape = inputs->shape(); diff --git a/tensorflow/core/kernels/ctc_loss_op.cc b/tensorflow/core/kernels/ctc_loss_op.cc index 63d31fcf62d46d..a1b851feb206db 100644 --- a/tensorflow/core/kernels/ctc_loss_op.cc +++ b/tensorflow/core/kernels/ctc_loss_op.cc @@ -127,7 +127,7 @@ class CTCLossOp : public OpKernel { errors::InvalidArgument("len(sequence_length) != batch_size. ", "len(sequence_length): ", seq_len->dim_size(0), " batch_size: ", batch_size)); - auto seq_len_t = seq_len->vec(); + auto seq_len_t = seq_len->vec(); OP_REQUIRES(ctx, labels_indices->dim_size(0) == labels_values->dim_size(0), errors::InvalidArgument( @@ -166,7 +166,7 @@ class CTCLossOp : public OpKernel { 0, " and ", batch_size, " but saw: ", batch_indices)); - auto values = g.values(); + auto values = g.values(); std::vector* b_values = &labels_t[batch_indices]; b_values->resize(values.size()); for (int i = 0; i < values.size(); ++i) (*b_values)[i] = values(i); diff --git a/tensorflow/core/kernels/cudnn_pooling_gpu.h b/tensorflow/core/kernels/cudnn_pooling_gpu.h index 970eb533318bb4..d344bb09da1c39 100644 --- a/tensorflow/core/kernels/cudnn_pooling_gpu.h +++ b/tensorflow/core/kernels/cudnn_pooling_gpu.h @@ -39,9 +39,9 @@ class DnnPooling3dOp { public: static void Compute(OpKernelContext* context, se::dnn::PoolingMode pooling_mode, - const std::array& size, - const std::array& stride, - const std::array& padding, + const std::array& size, + const std::array& stride, + const std::array& padding, TensorFormat data_format, const Tensor& tensor_in, Tensor* output); }; @@ -53,10 +53,10 @@ class DnnPooling3dGradOp { public: static void Compute(OpKernelContext* context, se::dnn::PoolingMode pooling_mode, - const std::array& window, - const std::array& stride, - const std::array& padding, - const std::array& output_size, + const std::array& window, + const std::array& stride, + const std::array& padding, + const std::array& output_size, TensorFormat data_format, const Tensor& out_backprop, const TensorShape& tensor_in_shape, const Tensor* tensor_in, const Tensor* tensor_out, diff --git a/tensorflow/core/kernels/cudnn_rnn_ops.cc b/tensorflow/core/kernels/cudnn_rnn_ops.cc index f81c3176424843..00ce115511e76d 100644 --- a/tensorflow/core/kernels/cudnn_rnn_ops.cc +++ b/tensorflow/core/kernels/cudnn_rnn_ops.cc @@ -1924,7 +1924,7 @@ class CudnnRNNForwardOpV2 << algo_config->algorithm()->tensor_ops_enabled() << ")."; return OkStatus(); } - profiler::ScopedAnnotation trace("cudnn_autotuning"); + tsl::profiler::ScopedAnnotation trace("cudnn_autotuning"); // Create temp tensors when profiling backprop pass. auto data_type = input->dtype(); diff --git a/tensorflow/core/kernels/cwise_op_abs.cc b/tensorflow/core/kernels/cwise_op_abs.cc index 0f32478dcc7dc6..4f3e04d7cd4c7f 100644 --- a/tensorflow/core/kernels/cwise_op_abs.cc +++ b/tensorflow/core/kernels/cwise_op_abs.cc @@ -18,7 +18,7 @@ limitations under the License. namespace tensorflow { REGISTER8(UnaryOp, CPU, "Abs", functor::abs, Eigen::half, bfloat16, float, - double, int8, int16, int32, int64_t); + double, int8_t, int16_t, int32_t, int64_t); REGISTER2(UnaryOp, CPU, "ComplexAbs", functor::abs, complex64, complex128); @@ -44,7 +44,7 @@ REGISTER_KERNEL_BUILDER(Name("Abs") .Device(DEVICE_DEFAULT) .HostMemory("x") .HostMemory("y") - .TypeConstraint("T"), - UnaryOp>); + .TypeConstraint("T"), + UnaryOp>); } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_acos.cc b/tensorflow/core/kernels/cwise_op_acos.cc index f2ca5677469f18..50fce03c1e0f95 100644 --- a/tensorflow/core/kernels/cwise_op_acos.cc +++ b/tensorflow/core/kernels/cwise_op_acos.cc @@ -17,9 +17,9 @@ limitations under the License. namespace tensorflow { REGISTER5(UnaryOp, CPU, "Acos", functor::acos, Eigen::half, bfloat16, float, - double, int8); -REGISTER5(UnaryOp, CPU, "Acos", functor::acos, int16, int32, int64_t, complex64, - complex128); + double, int8_t); +REGISTER5(UnaryOp, CPU, "Acos", functor::acos, int16_t, int32_t, int64_t, + complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) diff --git a/tensorflow/core/kernels/cwise_op_add_1.cc b/tensorflow/core/kernels/cwise_op_add_1.cc index fdb3de69b65033..35483f244836fa 100644 --- a/tensorflow/core/kernels/cwise_op_add_1.cc +++ b/tensorflow/core/kernels/cwise_op_add_1.cc @@ -16,11 +16,11 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER6(BinaryOp, CPU, "Add", functor::add, float, Eigen::half, double, int32, - int64_t, bfloat16); +REGISTER6(BinaryOp, CPU, "Add", functor::add, float, Eigen::half, double, + int32_t, int64_t, bfloat16); REGISTER6(BinaryOp, CPU, "AddV2", functor::add, float, Eigen::half, double, - int32, int64_t, bfloat16); + int32_t, int64_t, bfloat16); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -55,14 +55,14 @@ REGISTER_KERNEL_BUILDER(Name("Add") .HostMemory("x") .HostMemory("y") .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); + .TypeConstraint("T"), + BinaryOp>); REGISTER_KERNEL_BUILDER(Name("AddV2") .Device(DEVICE_DEFAULT) .HostMemory("x") .HostMemory("y") .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); + .TypeConstraint("T"), + BinaryOp>); } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_add_2.cc b/tensorflow/core/kernels/cwise_op_add_2.cc index 088a10fcb738f7..bb897eedca48b0 100644 --- a/tensorflow/core/kernels/cwise_op_add_2.cc +++ b/tensorflow/core/kernels/cwise_op_add_2.cc @@ -22,13 +22,13 @@ namespace tensorflow { // sharded files, only make its register calls when not __ANDROID_TYPES_SLIM__. #if !defined(__ANDROID_TYPES_SLIM__) -REGISTER6(BinaryOp, CPU, "Add", functor::add, int8, int16, complex64, uint8, - complex128, tstring); +REGISTER6(BinaryOp, CPU, "Add", functor::add, int8_t, int16_t, complex64, + uint8_t, complex128, tstring); // Notice: String is excluded to allow marking AddV2 is_commutative and // is_aggregate. -REGISTER8(BinaryOp, CPU, "AddV2", functor::add, int8, int16, complex64, uint8, - uint16, uint32, uint64, complex128); +REGISTER8(BinaryOp, CPU, "AddV2", functor::add, int8_t, int16_t, complex64, + uint8_t, uint16_t, uint32_t, uint64_t, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) diff --git a/tensorflow/core/kernels/cwise_op_bitwise_and.cc b/tensorflow/core/kernels/cwise_op_bitwise_and.cc index 6509665e89864d..927c017cbabb82 100644 --- a/tensorflow/core/kernels/cwise_op_bitwise_and.cc +++ b/tensorflow/core/kernels/cwise_op_bitwise_and.cc @@ -17,8 +17,8 @@ limitations under the License. namespace tensorflow { -REGISTER8(BinaryOp, CPU, "BitwiseAnd", functor::bitwise_and, int8, int16, int32, - int64_t, uint8, uint16, uint32, uint64); +REGISTER8(BinaryOp, CPU, "BitwiseAnd", functor::bitwise_and, int8_t, int16_t, + int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) diff --git a/tensorflow/core/kernels/cwise_op_bitwise_or.cc b/tensorflow/core/kernels/cwise_op_bitwise_or.cc index d974d249fac06f..aab01711419c2c 100644 --- a/tensorflow/core/kernels/cwise_op_bitwise_or.cc +++ b/tensorflow/core/kernels/cwise_op_bitwise_or.cc @@ -19,8 +19,8 @@ namespace tensorflow { #if !defined(MLIR_GENERATED_CPU_KERNELS_ENABLED) || \ !defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED) -REGISTER8(BinaryOp, CPU, "BitwiseOr", functor::bitwise_or, int8, int16, int32, - int64_t, uint8, uint16, uint32, uint64); +REGISTER8(BinaryOp, CPU, "BitwiseOr", functor::bitwise_or, int8_t, int16_t, + int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t); #endif #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/tensorflow/core/kernels/cwise_op_bitwise_xor.cc b/tensorflow/core/kernels/cwise_op_bitwise_xor.cc index 831f4d86c48277..a7a7c91fde59f0 100644 --- a/tensorflow/core/kernels/cwise_op_bitwise_xor.cc +++ b/tensorflow/core/kernels/cwise_op_bitwise_xor.cc @@ -17,8 +17,8 @@ limitations under the License. namespace tensorflow { -REGISTER8(BinaryOp, CPU, "BitwiseXor", functor::bitwise_xor, int8, int16, int32, - int64_t, uint8, uint16, uint32, uint64); +REGISTER8(BinaryOp, CPU, "BitwiseXor", functor::bitwise_xor, int8_t, int16_t, + int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) diff --git a/tensorflow/core/kernels/cwise_op_clip.cc b/tensorflow/core/kernels/cwise_op_clip.cc index 949b162509ecff..f8cbd536b24731 100644 --- a/tensorflow/core/kernels/cwise_op_clip.cc +++ b/tensorflow/core/kernels/cwise_op_clip.cc @@ -269,12 +269,12 @@ REGISTER_CPU_KERNEL(Eigen::half); REGISTER_CPU_KERNEL(float); REGISTER_CPU_KERNEL(double); REGISTER_CPU_KERNEL(bfloat16); -REGISTER_CPU_KERNEL(int8); -REGISTER_CPU_KERNEL(int16); -REGISTER_CPU_KERNEL(int32); +REGISTER_CPU_KERNEL(int8_t); +REGISTER_CPU_KERNEL(int16_t); +REGISTER_CPU_KERNEL(int32_t); REGISTER_CPU_KERNEL(int64_t); -REGISTER_CPU_KERNEL(uint8); -REGISTER_CPU_KERNEL(uint16); +REGISTER_CPU_KERNEL(uint8_t); +REGISTER_CPU_KERNEL(uint16_t); REGISTER_CPU_KERNEL(std::complex); REGISTER_CPU_KERNEL(std::complex); #undef REGISTER_CPU_KERNEL diff --git a/tensorflow/core/kernels/cwise_op_clip_gpu.cu.cc b/tensorflow/core/kernels/cwise_op_clip_gpu.cu.cc index e84885218ea1a3..f22d3bd3db7c96 100644 --- a/tensorflow/core/kernels/cwise_op_clip_gpu.cu.cc +++ b/tensorflow/core/kernels/cwise_op_clip_gpu.cu.cc @@ -24,11 +24,11 @@ limitations under the License. namespace tensorflow { template -__global__ void UnaryClipCustomKernel(const int32 size_in, - const T *__restrict__ in0, - const T *__restrict__ in1, - const T *__restrict__ in2, - T *__restrict__ out) { +__global__ void UnaryClipCustomKernel(const int32_t size_in, + const T* __restrict__ in0, + const T* __restrict__ in1, + const T* __restrict__ in2, + T* __restrict__ out) { GPU_1D_KERNEL_LOOP(i, size_in) { T value = in2[0] < in0[i] ? in2[0] : in0[i]; out[i] = value < in1[0] ? in1[0] : value; @@ -36,11 +36,11 @@ __global__ void UnaryClipCustomKernel(const int32 size_in, } template -__global__ void BinaryRightClipCustomKernel(const int32 size_in, - const T *__restrict__ in0, - const T *__restrict__ in1, - const T *__restrict__ in2, - T *__restrict__ out) { +__global__ void BinaryRightClipCustomKernel(const int32_t size_in, + const T* __restrict__ in0, + const T* __restrict__ in1, + const T* __restrict__ in2, + T* __restrict__ out) { GPU_1D_KERNEL_LOOP(i, size_in) { T value = in2[i] < in0[i] ? in2[i] : in0[i]; out[i] = value < in1[0] ? in1[0] : value; @@ -48,11 +48,11 @@ __global__ void BinaryRightClipCustomKernel(const int32 size_in, } template -__global__ void BinaryLeftClipCustomKernel(const int32 size_in, - const T *__restrict__ in0, - const T *__restrict__ in1, - const T *__restrict__ in2, - T *__restrict__ out) { +__global__ void BinaryLeftClipCustomKernel(const int32_t size_in, + const T* __restrict__ in0, + const T* __restrict__ in1, + const T* __restrict__ in2, + T* __restrict__ out) { GPU_1D_KERNEL_LOOP(i, size_in) { T value = in2[0] < in0[i] ? in2[0] : in0[i]; out[i] = value < in1[i] ? in1[i] : value; diff --git a/tensorflow/core/kernels/cwise_op_div.cc b/tensorflow/core/kernels/cwise_op_div.cc index 8840579bdeccee..d537a7f39e0753 100644 --- a/tensorflow/core/kernels/cwise_op_div.cc +++ b/tensorflow/core/kernels/cwise_op_div.cc @@ -19,10 +19,10 @@ namespace tensorflow { REGISTER6(BinaryOp, CPU, "Div", functor::div, float, Eigen::half, double, bfloat16, complex64, complex128); -REGISTER8(BinaryOp, CPU, "Div", functor::safe_div, uint8, uint16, uint32, - uint64, int8, int16, int32, int64_t); -REGISTER8(BinaryOp, CPU, "TruncateDiv", functor::safe_div, uint8, uint16, - uint32, uint64, int8, int16, int32, int64_t); +REGISTER8(BinaryOp, CPU, "Div", functor::safe_div, uint8_t, uint16_t, uint32_t, + uint64_t, int8_t, int16_t, int32_t, int64_t); +REGISTER8(BinaryOp, CPU, "TruncateDiv", functor::safe_div, uint8_t, uint16_t, + uint32_t, uint64_t, int8_t, int16_t, int32_t, int64_t); REGISTER4(BinaryOp, CPU, "TruncateDiv", functor::truncate_div_real, Eigen::half, bfloat16, float, double); REGISTER6(BinaryOp, CPU, "RealDiv", functor::div, float, Eigen::half, double, @@ -35,8 +35,8 @@ REGISTER_KERNEL_BUILDER(Name("Div") .HostMemory("x") .HostMemory("y") .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); + .TypeConstraint("T"), + BinaryOp>); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) diff --git a/tensorflow/core/kernels/cwise_op_equal_to_1.cc b/tensorflow/core/kernels/cwise_op_equal_to_1.cc index 87b499c96fdd05..7aecd4f62b2bf7 100644 --- a/tensorflow/core/kernels/cwise_op_equal_to_1.cc +++ b/tensorflow/core/kernels/cwise_op_equal_to_1.cc @@ -17,9 +17,9 @@ limitations under the License. namespace tensorflow { REGISTER7(BinaryOp, CPU, "Equal", functor::equal_to, float, Eigen::half, double, - uint8, int8, int16, bfloat16); -REGISTER8(BinaryOp, CPU, "Equal", functor::equal_to, uint16, uint32, uint64, - qint8, qint16, quint8, quint16, qint32); + uint8_t, int8_t, int16_t, bfloat16); +REGISTER8(BinaryOp, CPU, "Equal", functor::equal_to, uint16_t, uint32_t, + uint64_t, qint8, qint16, quint8, quint16, qint32); REGISTER_KERNEL_BUILDER( Name("ApproximateEqual").Device(DEVICE_CPU).TypeConstraint("T"), ApproximateEqualOp); @@ -32,8 +32,8 @@ REGISTER_KERNEL_BUILDER(Name("Equal") .HostMemory("x") .HostMemory("y") .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); + .TypeConstraint("T"), + BinaryOp>); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) diff --git a/tensorflow/core/kernels/cwise_op_equal_to_2.cc b/tensorflow/core/kernels/cwise_op_equal_to_2.cc index 29f15d7f7d9a67..e17cda8f2fbab6 100644 --- a/tensorflow/core/kernels/cwise_op_equal_to_2.cc +++ b/tensorflow/core/kernels/cwise_op_equal_to_2.cc @@ -22,8 +22,8 @@ namespace tensorflow { // sharded files, only make its register calls when not __ANDROID_TYPES_SLIM__. #if !defined(__ANDROID_TYPES_SLIM__) -REGISTER6(BinaryOp, CPU, "Equal", functor::equal_to, int32, int64_t, complex64, - complex128, tstring, bool); +REGISTER6(BinaryOp, CPU, "Equal", functor::equal_to, int32_t, int64_t, + complex64, complex128, tstring, bool); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) REGISTER6(BinaryOp, GPU, "Equal", functor::equal_to, int8, int16, int64, diff --git a/tensorflow/core/kernels/cwise_op_floor_div.cc b/tensorflow/core/kernels/cwise_op_floor_div.cc index 0b4584988ad526..95c5652548004a 100644 --- a/tensorflow/core/kernels/cwise_op_floor_div.cc +++ b/tensorflow/core/kernels/cwise_op_floor_div.cc @@ -17,8 +17,8 @@ limitations under the License. namespace tensorflow { -REGISTER8(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8, uint16, - uint32, uint64, int8, int16, int32, int64_t); +REGISTER8(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8_t, uint16_t, + uint32_t, uint64_t, int8_t, int16_t, int32_t, int64_t); REGISTER4(BinaryOp, CPU, "FloorDiv", functor::floor_div_real, float, Eigen::half, bfloat16, double); @@ -49,7 +49,7 @@ REGISTER_KERNEL_BUILDER(Name("FloorDiv") .HostMemory("x") .HostMemory("y") .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); + .TypeConstraint("T"), + BinaryOp>); } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_floor_mod.cc b/tensorflow/core/kernels/cwise_op_floor_mod.cc index 590f6e592df7d3..9cc40508e1adce 100644 --- a/tensorflow/core/kernels/cwise_op_floor_mod.cc +++ b/tensorflow/core/kernels/cwise_op_floor_mod.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER8(BinaryOp, CPU, "FloorMod", functor::safe_floor_mod, int8, int16, - int32, int64_t, uint8, uint16, uint32, uint64); +REGISTER8(BinaryOp, CPU, "FloorMod", functor::safe_floor_mod, int8_t, int16_t, + int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t); REGISTER4(BinaryOp, CPU, "FloorMod", functor::floor_fmod, Eigen::half, bfloat16, float, double); @@ -39,7 +39,7 @@ REGISTER_KERNEL_BUILDER(Name("FloorMod") .HostMemory("x") .HostMemory("y") .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); + .TypeConstraint("T"), + BinaryOp>); } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_greater.cc b/tensorflow/core/kernels/cwise_op_greater.cc index b05e875e19b3d9..1cd27097ce66fe 100644 --- a/tensorflow/core/kernels/cwise_op_greater.cc +++ b/tensorflow/core/kernels/cwise_op_greater.cc @@ -17,8 +17,9 @@ limitations under the License. namespace tensorflow { REGISTER9(BinaryOp, CPU, "Greater", functor::greater, float, Eigen::half, - double, int32, int64_t, uint8, uint16, uint32, uint64); -REGISTER3(BinaryOp, CPU, "Greater", functor::greater, int8, int16, bfloat16); + double, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t); +REGISTER3(BinaryOp, CPU, "Greater", functor::greater, int8_t, int16_t, + bfloat16); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) REGISTER6(BinaryOp, GPU, "Greater", functor::greater, float, Eigen::half, @@ -44,6 +45,6 @@ REGISTER_KERNEL_BUILDER(Name("Greater") .HostMemory("x") .HostMemory("y") .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); + .TypeConstraint("T"), + BinaryOp>); } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_greater_equal.cc b/tensorflow/core/kernels/cwise_op_greater_equal.cc index 7ccfc5501a6fda..1c9e7df836deb7 100644 --- a/tensorflow/core/kernels/cwise_op_greater_equal.cc +++ b/tensorflow/core/kernels/cwise_op_greater_equal.cc @@ -17,9 +17,10 @@ limitations under the License. namespace tensorflow { REGISTER9(BinaryOp, CPU, "GreaterEqual", functor::greater_equal, float, - Eigen::half, double, int32, int64_t, uint8, uint16, uint32, uint64); -REGISTER3(BinaryOp, CPU, "GreaterEqual", functor::greater_equal, int8, int16, - bfloat16); + Eigen::half, double, int32_t, int64_t, uint8_t, uint16_t, uint32_t, + uint64_t); +REGISTER3(BinaryOp, CPU, "GreaterEqual", functor::greater_equal, int8_t, + int16_t, bfloat16); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) REGISTER9(BinaryOp, GPU, "GreaterEqual", functor::greater_equal, float, @@ -45,7 +46,7 @@ REGISTER_KERNEL_BUILDER(Name("GreaterEqual") .HostMemory("x") .HostMemory("y") .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); + .TypeConstraint("T"), + BinaryOp>); } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_invert.cc b/tensorflow/core/kernels/cwise_op_invert.cc index 5e5e827217ca1b..2f54bd8292b3b6 100644 --- a/tensorflow/core/kernels/cwise_op_invert.cc +++ b/tensorflow/core/kernels/cwise_op_invert.cc @@ -17,8 +17,8 @@ limitations under the License. namespace tensorflow { -REGISTER8(UnaryOp, CPU, "Invert", functor::invert, int8, int16, int32, int64_t, - uint8, uint16, uint32, uint64); +REGISTER8(UnaryOp, CPU, "Invert", functor::invert, int8_t, int16_t, int32_t, + int64_t, uint8_t, uint16_t, uint32_t, uint64_t); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) diff --git a/tensorflow/core/kernels/cwise_op_left_shift.cc b/tensorflow/core/kernels/cwise_op_left_shift.cc index 19c9f84ead760e..76632030feec8f 100644 --- a/tensorflow/core/kernels/cwise_op_left_shift.cc +++ b/tensorflow/core/kernels/cwise_op_left_shift.cc @@ -17,8 +17,8 @@ limitations under the License. namespace tensorflow { -REGISTER8(BinaryOp, CPU, "LeftShift", functor::left_shift, int8, int16, int32, - int64_t, uint8, uint16, uint32, uint64); +REGISTER8(BinaryOp, CPU, "LeftShift", functor::left_shift, int8_t, int16_t, + int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) diff --git a/tensorflow/core/kernels/cwise_op_less.cc b/tensorflow/core/kernels/cwise_op_less.cc index 9c7535fae31365..62dd9a18a5d86e 100644 --- a/tensorflow/core/kernels/cwise_op_less.cc +++ b/tensorflow/core/kernels/cwise_op_less.cc @@ -17,9 +17,9 @@ limitations under the License. namespace tensorflow { REGISTER5(BinaryOp, CPU, "Less", functor::less, float, Eigen::half, double, - bfloat16, int32); -REGISTER7(BinaryOp, CPU, "Less", functor::less, uint8, uint16, uint32, uint64, - int8, int16, int64_t); + bfloat16, int32_t); +REGISTER7(BinaryOp, CPU, "Less", functor::less, uint8_t, uint16_t, uint32_t, + uint64_t, int8_t, int16_t, int64_t); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) @@ -45,6 +45,6 @@ REGISTER_KERNEL_BUILDER(Name("Less") .HostMemory("x") .HostMemory("y") .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); + .TypeConstraint("T"), + BinaryOp>); } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_less_equal.cc b/tensorflow/core/kernels/cwise_op_less_equal.cc index b6acec213cb6ff..e17272986381fb 100644 --- a/tensorflow/core/kernels/cwise_op_less_equal.cc +++ b/tensorflow/core/kernels/cwise_op_less_equal.cc @@ -17,9 +17,9 @@ limitations under the License. namespace tensorflow { REGISTER5(BinaryOp, CPU, "LessEqual", functor::less_equal, float, Eigen::half, - bfloat16, double, int32); -REGISTER7(BinaryOp, CPU, "LessEqual", functor::less_equal, int64_t, uint8, - uint16, uint32, uint64, int8, int16); + bfloat16, double, int32_t); +REGISTER7(BinaryOp, CPU, "LessEqual", functor::less_equal, int64_t, uint8_t, + uint16_t, uint32_t, uint64_t, int8_t, int16_t); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) @@ -45,7 +45,7 @@ REGISTER_KERNEL_BUILDER(Name("LessEqual") .HostMemory("x") .HostMemory("y") .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); + .TypeConstraint("T"), + BinaryOp>); } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_maximum.cc b/tensorflow/core/kernels/cwise_op_maximum.cc index 9be2a3a0fc9062..74db589e7783d2 100644 --- a/tensorflow/core/kernels/cwise_op_maximum.cc +++ b/tensorflow/core/kernels/cwise_op_maximum.cc @@ -18,8 +18,8 @@ limitations under the License. namespace tensorflow { REGISTER4(BinaryOp, CPU, "Maximum", functor::maximum, float, Eigen::half, bfloat16, double); -REGISTER8(BinaryOp, CPU, "Maximum", functor::maximum, int8, uint8, int16, - uint16, int32, uint32, int64_t, uint64); +REGISTER8(BinaryOp, CPU, "Maximum", functor::maximum, int8_t, uint8_t, int16_t, + uint16_t, int32_t, uint32_t, int64_t, uint64_t); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) REGISTER6(BinaryOp, GPU, "Maximum", functor::maximum, float, Eigen::half, @@ -44,7 +44,7 @@ REGISTER_KERNEL_BUILDER(Name("Maximum") .HostMemory("x") .HostMemory("y") .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); + .TypeConstraint("T"), + BinaryOp>); } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_minimum.cc b/tensorflow/core/kernels/cwise_op_minimum.cc index 67d1c6a8452517..5a101acf5e47ce 100644 --- a/tensorflow/core/kernels/cwise_op_minimum.cc +++ b/tensorflow/core/kernels/cwise_op_minimum.cc @@ -18,8 +18,8 @@ limitations under the License. namespace tensorflow { REGISTER4(BinaryOp, CPU, "Minimum", functor::minimum, float, Eigen::half, bfloat16, double); -REGISTER8(BinaryOp, CPU, "Minimum", functor::minimum, int8, uint8, int16, - uint16, int32, uint32, int64_t, uint64); +REGISTER8(BinaryOp, CPU, "Minimum", functor::minimum, int8_t, uint8_t, int16_t, + uint16_t, int32_t, uint32_t, int64_t, uint64_t); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) REGISTER6(BinaryOp, GPU, "Minimum", functor::minimum, float, Eigen::half, @@ -45,7 +45,7 @@ REGISTER_KERNEL_BUILDER(Name("Minimum") .HostMemory("x") .HostMemory("y") .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); + .TypeConstraint("T"), + BinaryOp>); } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_mod.cc b/tensorflow/core/kernels/cwise_op_mod.cc index 32fd740a38ccca..51b91ceb85c2fd 100644 --- a/tensorflow/core/kernels/cwise_op_mod.cc +++ b/tensorflow/core/kernels/cwise_op_mod.cc @@ -16,9 +16,9 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER2(BinaryOp, CPU, "Mod", functor::safe_mod, int32, int64_t); +REGISTER2(BinaryOp, CPU, "Mod", functor::safe_mod, int32_t, int64_t); REGISTER2(BinaryOp, CPU, "Mod", functor::fmod, float, double); -REGISTER2(BinaryOp, CPU, "TruncateMod", functor::safe_mod, int32, int64_t); +REGISTER2(BinaryOp, CPU, "TruncateMod", functor::safe_mod, int32_t, int64_t); REGISTER2(BinaryOp, CPU, "TruncateMod", functor::fmod, float, double); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -45,13 +45,13 @@ REGISTER_KERNEL_BUILDER(Name("Mod") .HostMemory("x") .HostMemory("y") .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); + .TypeConstraint("T"), + BinaryOp>); REGISTER_KERNEL_BUILDER(Name("TruncateMod") .Device(DEVICE_DEFAULT) .HostMemory("x") .HostMemory("y") .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); + .TypeConstraint("T"), + BinaryOp>); } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_mul_1.cc b/tensorflow/core/kernels/cwise_op_mul_1.cc index 9af3108676258b..cc6fd91248766c 100644 --- a/tensorflow/core/kernels/cwise_op_mul_1.cc +++ b/tensorflow/core/kernels/cwise_op_mul_1.cc @@ -17,8 +17,8 @@ limitations under the License. namespace tensorflow { -REGISTER6(BinaryOp, CPU, "Mul", functor::mul, float, Eigen::half, double, uint8, - int32, bfloat16); +REGISTER6(BinaryOp, CPU, "Mul", functor::mul, float, Eigen::half, double, + uint8_t, int32_t, bfloat16); REGISTER6(BinaryOp, CPU, "MulNoNan", functor::mul_no_nan, Eigen::half, float, double, complex64, complex128, bfloat16); @@ -53,8 +53,8 @@ REGISTER_KERNEL_BUILDER(Name("Mul") .HostMemory("x") .HostMemory("y") .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); + .TypeConstraint("T"), + BinaryOp>); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) diff --git a/tensorflow/core/kernels/cwise_op_mul_2.cc b/tensorflow/core/kernels/cwise_op_mul_2.cc index 66ed75f469ebb5..31080a3e01bc0a 100644 --- a/tensorflow/core/kernels/cwise_op_mul_2.cc +++ b/tensorflow/core/kernels/cwise_op_mul_2.cc @@ -22,8 +22,8 @@ namespace tensorflow { // sharded files, only make its register calls when not __ANDROID_TYPES_SLIM__. #if !defined(__ANDROID_TYPES_SLIM__) -REGISTER8(BinaryOp, CPU, "Mul", functor::mul, int8, uint16, uint32, uint64, - int16, int64_t, complex64, complex128); +REGISTER8(BinaryOp, CPU, "Mul", functor::mul, int8_t, uint16_t, uint32_t, + uint64_t, int16_t, int64_t, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) REGISTER8(BinaryOp, GPU, "Mul", functor::mul, int8, uint16, uint32, uint64, diff --git a/tensorflow/core/kernels/cwise_op_neg_1.cc b/tensorflow/core/kernels/cwise_op_neg_1.cc index f3bd66c8986e5c..7f589ece2e313f 100644 --- a/tensorflow/core/kernels/cwise_op_neg_1.cc +++ b/tensorflow/core/kernels/cwise_op_neg_1.cc @@ -16,7 +16,7 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER4(UnaryOp, CPU, "Neg", functor::neg, int8, int16, int32, int64_t); +REGISTER4(UnaryOp, CPU, "Neg", functor::neg, int8_t, int16_t, int32_t, int64_t); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) @@ -37,6 +37,6 @@ REGISTER_KERNEL_BUILDER(Name("Neg") .Device(DEVICE_DEFAULT) .HostMemory("x") .HostMemory("y") - .TypeConstraint("T"), - UnaryOp>); + .TypeConstraint("T"), + UnaryOp>); } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc b/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc index 22b8ff992ce37b..6e787b88bb1694 100644 --- a/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc +++ b/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc @@ -17,17 +17,17 @@ limitations under the License. namespace tensorflow { REGISTER7(BinaryOp, CPU, "NotEqual", functor::not_equal_to, float, Eigen::half, - double, uint8, int8, int16, bfloat16); -REGISTER8(BinaryOp, CPU, "NotEqual", functor::not_equal_to, uint16, uint32, - uint64, qint8, qint16, quint8, quint16, qint32); + double, uint8_t, int8_t, int16_t, bfloat16); +REGISTER8(BinaryOp, CPU, "NotEqual", functor::not_equal_to, uint16_t, uint32_t, + uint64_t, qint8, qint16, quint8, quint16, qint32); REGISTER_KERNEL_BUILDER(Name("NotEqual") .Device(DEVICE_DEFAULT) .HostMemory("x") .HostMemory("y") .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); + .TypeConstraint("T"), + BinaryOp>); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) diff --git a/tensorflow/core/kernels/cwise_op_not_equal_to_2.cc b/tensorflow/core/kernels/cwise_op_not_equal_to_2.cc index 9f5a2508733ebe..537a8c4c0b8bf9 100644 --- a/tensorflow/core/kernels/cwise_op_not_equal_to_2.cc +++ b/tensorflow/core/kernels/cwise_op_not_equal_to_2.cc @@ -22,7 +22,7 @@ namespace tensorflow { // sharded files, only make its register calls when not __ANDROID_TYPES_SLIM__. #if !defined(__ANDROID_TYPES_SLIM__) -REGISTER6(BinaryOp, CPU, "NotEqual", functor::not_equal_to, int32, int64_t, +REGISTER6(BinaryOp, CPU, "NotEqual", functor::not_equal_to, int32_t, int64_t, complex64, complex128, tstring, bool); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) diff --git a/tensorflow/core/kernels/cwise_op_pow.cc b/tensorflow/core/kernels/cwise_op_pow.cc index d052c6021faf37..ae21c4613f1bc4 100644 --- a/tensorflow/core/kernels/cwise_op_pow.cc +++ b/tensorflow/core/kernels/cwise_op_pow.cc @@ -18,7 +18,8 @@ limitations under the License. namespace tensorflow { REGISTER6(BinaryOp, CPU, "Pow", functor::pow, float, Eigen::half, bfloat16, double, complex64, complex128); -REGISTER4(BinaryOp, CPU, "Pow", functor::safe_pow, int8, int16, int32, int64_t); +REGISTER4(BinaryOp, CPU, "Pow", functor::safe_pow, int8_t, int16_t, int32_t, + int64_t); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) diff --git a/tensorflow/core/kernels/cwise_op_right_shift.cc b/tensorflow/core/kernels/cwise_op_right_shift.cc index 7386c3674ba9c0..cc960b023390a1 100644 --- a/tensorflow/core/kernels/cwise_op_right_shift.cc +++ b/tensorflow/core/kernels/cwise_op_right_shift.cc @@ -17,8 +17,8 @@ limitations under the License. namespace tensorflow { -REGISTER8(BinaryOp, CPU, "RightShift", functor::right_shift, int8, int16, int32, - int64_t, uint8, uint16, uint32, uint64); +REGISTER8(BinaryOp, CPU, "RightShift", functor::right_shift, int8_t, int16_t, + int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) diff --git a/tensorflow/core/kernels/cwise_op_round.cc b/tensorflow/core/kernels/cwise_op_round.cc index fa3289f9e57f32..bab42c5b58f5cc 100644 --- a/tensorflow/core/kernels/cwise_op_round.cc +++ b/tensorflow/core/kernels/cwise_op_round.cc @@ -18,7 +18,7 @@ limitations under the License. namespace tensorflow { REGISTER6(UnaryOp, CPU, "Round", functor::round, Eigen::half, float, double, - bfloat16, int32, int64_t); + bfloat16, int32_t, int64_t); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) diff --git a/tensorflow/core/kernels/cwise_op_select.cc b/tensorflow/core/kernels/cwise_op_select.cc index bf572572eace3d..5ef7a4008c8728 100644 --- a/tensorflow/core/kernels/cwise_op_select.cc +++ b/tensorflow/core/kernels/cwise_op_select.cc @@ -289,7 +289,7 @@ REGISTER_SELECT_GPU(bool); REGISTER_SELECT_GPU(Eigen::half); REGISTER_SELECT_GPU(float); REGISTER_SELECT_GPU(double); -REGISTER_SELECT_GPU(int32); +REGISTER_SELECT_GPU(int32_t); REGISTER_SELECT_GPU(int64_t); REGISTER_SELECT_GPU(complex64); REGISTER_SELECT_GPU(complex128); diff --git a/tensorflow/core/kernels/cwise_op_sign.cc b/tensorflow/core/kernels/cwise_op_sign.cc index 895280a22ab890..632e4a8cce12d5 100644 --- a/tensorflow/core/kernels/cwise_op_sign.cc +++ b/tensorflow/core/kernels/cwise_op_sign.cc @@ -18,7 +18,8 @@ limitations under the License. namespace tensorflow { REGISTER6(UnaryOp, CPU, "Sign", functor::sign, float, double, Eigen::half, bfloat16, complex64, complex128); -REGISTER4(UnaryOp, CPU, "Sign", functor::sign, int8, int16, int32, int64_t); +REGISTER4(UnaryOp, CPU, "Sign", functor::sign, int8_t, int16_t, int32_t, + int64_t); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) REGISTER6(UnaryOp, GPU, "Sign", functor::sign, float, Eigen::half, double, @@ -41,7 +42,7 @@ REGISTER_KERNEL_BUILDER(Name("Sign") .Device(DEVICE_DEFAULT) .HostMemory("x") .HostMemory("y") - .TypeConstraint("T"), - UnaryOp>); + .TypeConstraint("T"), + UnaryOp>); } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_square.cc b/tensorflow/core/kernels/cwise_op_square.cc index e8122ba19a2632..ddca86ae25b7c9 100644 --- a/tensorflow/core/kernels/cwise_op_square.cc +++ b/tensorflow/core/kernels/cwise_op_square.cc @@ -18,9 +18,9 @@ limitations under the License. namespace tensorflow { REGISTER7(UnaryOp, CPU, "Square", functor::square, float, Eigen::half, double, - int32, int64_t, complex64, complex128); -REGISTER7(UnaryOp, CPU, "Square", functor::square, bfloat16, int8, int16, uint8, - uint16, uint32, uint64); + int32_t, int64_t, complex64, complex128); +REGISTER7(UnaryOp, CPU, "Square", functor::square, bfloat16, int8_t, int16_t, + uint8_t, uint16_t, uint32_t, uint64_t); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) @@ -45,7 +45,7 @@ REGISTER_KERNEL_BUILDER(Name("Square") .Device(DEVICE_DEFAULT) .HostMemory("x") .HostMemory("y") - .TypeConstraint("T"), - UnaryOp>); + .TypeConstraint("T"), + UnaryOp>); } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_squared_difference.cc b/tensorflow/core/kernels/cwise_op_squared_difference.cc index 2a34dd2c5290aa..c6f3fe2567afea 100644 --- a/tensorflow/core/kernels/cwise_op_squared_difference.cc +++ b/tensorflow/core/kernels/cwise_op_squared_difference.cc @@ -17,7 +17,7 @@ limitations under the License. namespace tensorflow { REGISTER8(BinaryOp, CPU, "SquaredDifference", functor::squared_difference, - float, Eigen::half, double, bfloat16, int32, int64_t, complex64, + float, Eigen::half, double, bfloat16, int32_t, int64_t, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) @@ -37,8 +37,8 @@ REGISTER_KERNEL_BUILDER( .HostMemory("x") .HostMemory("y") .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); + .TypeConstraint("T"), + BinaryOp>); REGISTER_KERNEL_BUILDER( Name("SquaredDifference") @@ -46,7 +46,7 @@ REGISTER_KERNEL_BUILDER( .HostMemory("x") .HostMemory("y") .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); + .TypeConstraint("T"), + BinaryOp>); } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_sub.cc b/tensorflow/core/kernels/cwise_op_sub.cc index db8c81db3cf460..b4eb0447115d22 100644 --- a/tensorflow/core/kernels/cwise_op_sub.cc +++ b/tensorflow/core/kernels/cwise_op_sub.cc @@ -16,12 +16,12 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER8(BinaryOp, CPU, "Sub", functor::sub, float, Eigen::half, double, int32, - int64_t, bfloat16, complex64, complex128); +REGISTER8(BinaryOp, CPU, "Sub", functor::sub, float, Eigen::half, double, + int32_t, int64_t, bfloat16, complex64, complex128); #if !defined(__ANDROID_TYPES_SLIM__) // Sub op for int8, uint8, int16, uint16 -REGISTER6(BinaryOp, CPU, "Sub", functor::sub, int8, uint8, int16, uint16, - uint32, uint64); +REGISTER6(BinaryOp, CPU, "Sub", functor::sub, int8_t, uint8_t, int16_t, + uint16_t, uint32_t, uint64_t); #else // We only register the first type when we have multi-argument calls in the // case where we're trying to reduce executable size, but it turns out that the @@ -53,7 +53,7 @@ REGISTER_KERNEL_BUILDER(Name("Sub") .HostMemory("x") .HostMemory("y") .HostMemory("z") - .TypeConstraint("T"), - BinaryOp>); + .TypeConstraint("T"), + BinaryOp>); } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_ops_common.cc b/tensorflow/core/kernels/cwise_ops_common.cc index 733fa8af2f9cae..a202e3717938e9 100644 --- a/tensorflow/core/kernels/cwise_ops_common.cc +++ b/tensorflow/core/kernels/cwise_ops_common.cc @@ -36,7 +36,7 @@ void BinaryOpShared::SetComputeError(OpKernelContext* ctx) { // associated information. This is sufficient for now, since the only binary // ops that have compute errors are integer division and mod, and the only // error they produce is zero division. - const string& op = ctx->op_kernel().type_string(); + const std::string& op = ctx->op_kernel().type_string(); if ((op == "Div" || op == "Mod" || op == "FloorMod" || op == "FloorDiv") && DataTypeIsInteger(ctx->op_kernel().input_type(0))) { ctx->CtxFailure(errors::InvalidArgument("Integer division by zero")); @@ -62,7 +62,7 @@ BinaryOpShared::BinaryOpState::BinaryOpState(OpKernelContext* ctx) TryGetNodeAttr(ctx->op_kernel().def(), "incompatible_shape_error", &(incompatible_shape_error)); if (has_attr && !incompatible_shape_error) { - const string& op = ctx->op_kernel().type_string(); + const std::string& op = ctx->op_kernel().type_string(); OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &out)); result = (op == "NotEqual"); return; diff --git a/tensorflow/core/kernels/cwise_ops_test.cc b/tensorflow/core/kernels/cwise_ops_test.cc index 41b851fda99b4b..5c6663b666aea3 100644 --- a/tensorflow/core/kernels/cwise_ops_test.cc +++ b/tensorflow/core/kernels/cwise_ops_test.cc @@ -27,7 +27,7 @@ namespace { // Creates a Graph which applies a unary "func" on a 3D tensor of // type T with "num" elements. template -static Graph* Unary(const string& func, int num, DataType dtype) { +static Graph* Unary(const std::string& func, int num, DataType dtype) { Graph* g = new Graph(OpRegistry::Global()); Tensor data(dtype, TensorShape({64, 64, num / (64 * 64)})); CHECK_GT(data.NumElements(), 0); @@ -97,7 +97,7 @@ BM_UNARY(gpu, Round, float, DT_FLOAT); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM // data func scalar. -Graph* BinaryScalar(int num, const string& func) { +Graph* BinaryScalar(int num, const std::string& func) { Graph* g = new Graph(OpRegistry::Global()); Tensor lhs(DT_FLOAT, TensorShape({64, 64, num / (64 * 64)})); lhs.flat().setRandom(); diff --git a/tensorflow/core/kernels/data/batch_dataset_op.cc b/tensorflow/core/kernels/data/batch_dataset_op.cc index 62122f5d50987f..1813f7e9e02005 100644 --- a/tensorflow/core/kernels/data/batch_dataset_op.cc +++ b/tensorflow/core/kernels/data/batch_dataset_op.cc @@ -73,7 +73,7 @@ class BatchDatasetOp::Dataset : public DatasetBase { op_version_(op_version), traceme_metadata_( {{"batch_size", - strings::Printf("%lld", static_cast(batch_size))}, + absl::StrFormat("%lld", static_cast(batch_size))}, {"drop_remainder", drop_remainder ? "true" : "false"}, {"parallel_copy", parallel_copy ? "true" : "false"}}) { input_->Ref(); @@ -106,7 +106,7 @@ class BatchDatasetOp::Dataset : public DatasetBase { ~Dataset() override { input_->Unref(); } std::unique_ptr MakeIteratorInternal( - const string& prefix) const override { + const std::string& prefix) const override { name_utils::IteratorPrefixParams params; params.op_version = op_version_; return std::make_unique(Iterator::Params{ @@ -121,7 +121,7 @@ class BatchDatasetOp::Dataset : public DatasetBase { return output_shapes_; } - string DebugString() const override { + std::string DebugString() const override { name_utils::DatasetDebugStringParams params; params.op_version = op_version_; params.set_args(batch_size_); @@ -146,9 +146,9 @@ class BatchDatasetOp::Dataset : public DatasetBase { return input_->CheckExternalState(); } - absl::Status Get(OpKernelContext* ctx, int64 index, + absl::Status Get(OpKernelContext* ctx, int64_t index, std::vector* out_tensors) const override { - const int64 cardinality = Cardinality(); + const int64_t cardinality = Cardinality(); if (index < 0 || index >= cardinality) { return errors::OutOfRange("Index out of range [0, ", cardinality, "):", index); diff --git a/tensorflow/core/kernels/data/cache_dataset_ops.cc b/tensorflow/core/kernels/data/cache_dataset_ops.cc index 1cc826e8c17b3d..ad5ba2464ce9c3 100644 --- a/tensorflow/core/kernels/data/cache_dataset_ops.cc +++ b/tensorflow/core/kernels/data/cache_dataset_ops.cc @@ -86,7 +86,7 @@ class DatasetRandomAccessCache { // Extends the temporary cache up to a given index and then updates // out_tensors with the element at that index. - absl::Status Get(OpKernelContext* ctx, int64 index, + absl::Status Get(OpKernelContext* ctx, int64_t index, std::vector* out_tensors) { if (!iter_resource_) { TF_ASSIGN_OR_RETURN(iter_resource_, @@ -104,7 +104,7 @@ class DatasetRandomAccessCache { std::vector> GetCacheData() { return cache_; } private: - absl::Status ExtendTempCacheToIndex(int64 index, OpKernelContext* ctx) { + absl::Status ExtendTempCacheToIndex(int64_t index, OpKernelContext* ctx) { bool end_of_sequence; while (cache_.size() <= index) { std::vector out_tensors; @@ -169,7 +169,7 @@ class IteratorRandomAccessCache { class CacheDatasetOp::FileDatasetBase : public DatasetBase { public: FileDatasetBase(OpKernelContext* ctx, const DatasetBase* input, - string filename, Env* env) + std::string filename, Env* env) : DatasetBase(DatasetContext(ctx)), input_(input), filename_(std::move(filename)), @@ -184,7 +184,7 @@ class CacheDatasetOp::FileDatasetBase : public DatasetBase { ~FileDatasetBase() override { input_->Unref(); } std::unique_ptr MakeIteratorInternal( - const string& prefix) const override { + const std::string& prefix) const override { name_utils::IteratorPrefixParams params; params.dataset_prefix = kFileDatasetPrefix; return std::make_unique(FileIterator::Params{ @@ -199,7 +199,7 @@ class CacheDatasetOp::FileDatasetBase : public DatasetBase { return input_->output_shapes(); } - string DebugString() const override { + std::string DebugString() const override { name_utils::DatasetDebugStringParams params; params.dataset_prefix = kFileDatasetPrefix; return name_utils::DatasetDebugString(kDatasetType, params); @@ -225,7 +225,7 @@ class CacheDatasetOp::FileDatasetBase : public DatasetBase { private: static size_t StringPaddingSize(size_t num_tensors) { - return strings::Printf(kPaddingSizeStrFormat, num_tensors - 1).size(); + return absl::StrFormat(kPaddingSizeStrFormat, num_tensors - 1).size(); } std::string FormatName(size_t item_index, size_t tensor_index) const { @@ -328,14 +328,14 @@ class CacheDatasetOp::FileDatasetBase : public DatasetBase { ~FileWriterIterator() override { if (!dataset()->env_->FileExists(MetaFilename(filename_)).ok()) { LOG(WARNING) << kIncompleteCacheErrorMessage; - std::vector cache_files; + std::vector cache_files; absl::Status s = dataset()->env_->GetMatchingPaths( absl::StrCat(filename_, "*"), &cache_files); if (!s.ok()) { LOG(WARNING) << "Failed to get matching files on " << filename_ << "* : " << s.ToString(); } - for (const string& path : cache_files) { + for (const std::string& path : cache_files) { s = dataset()->env_->DeleteFile(path); if (!s.ok()) { LOG(WARNING) << "Failed to delete " << path << " : " @@ -387,7 +387,7 @@ class CacheDatasetOp::FileDatasetBase : public DatasetBase { size_t tensor_index = 0; for (const Tensor& t : *out_tensors) { DCHECK_LT(tensor_index, dataset()->num_tensors_); - string key = dataset()->FormatName(cur_index_, tensor_index++); + std::string key = dataset()->FormatName(cur_index_, tensor_index++); TF_RETURN_IF_ERROR(writer_->Add(key, t)); } if (*end_of_sequence) { @@ -576,9 +576,9 @@ class CacheDatasetOp::FileDatasetBase : public DatasetBase { std::unique_ptr input_impl_ TF_GUARDED_BY(mu_); // The current prefix for the cache file. This is equal to // `StrCat(dataset()->filename_, "_", shard_id_)`. - string filename_; + std::string filename_; std::unique_ptr writer_ TF_GUARDED_BY(mu_); - string lockfile_ TF_GUARDED_BY(mu_); + std::string lockfile_ TF_GUARDED_BY(mu_); bool lockfile_created_ TF_GUARDED_BY(mu_); bool iteration_completed_ TF_GUARDED_BY(mu_); }; // FileWriterIterator @@ -730,7 +730,7 @@ class CacheDatasetOp::FileDataset : public CacheDatasetOp::FileDatasetBase { class CacheDatasetOp::FileDatasetV2 : public CacheDatasetOp::FileDatasetBase { public: explicit FileDatasetV2(OpKernelContext* ctx, const DatasetBase* input, - string filename, Env* env, + std::string filename, Env* env, const Tensor& resource_handle) : FileDatasetBase(ctx, input, filename, env), resource_handle_(resource_handle) {} @@ -768,7 +768,7 @@ class CacheDatasetOp::MemoryDatasetBase : public DatasetBase { ~MemoryDatasetBase() override { input_->Unref(); } std::unique_ptr MakeIteratorInternal( - const string& prefix) const override { + const std::string& prefix) const override { name_utils::IteratorPrefixParams params; params.dataset_prefix = kMemoryDatasetPrefix; return std::make_unique( @@ -785,7 +785,7 @@ class CacheDatasetOp::MemoryDatasetBase : public DatasetBase { return input_->output_shapes(); } - string DebugString() const override { + std::string DebugString() const override { name_utils::DatasetDebugStringParams params; params.dataset_prefix = kMemoryDatasetPrefix; return name_utils::DatasetDebugString(kDatasetType, params); @@ -795,7 +795,7 @@ class CacheDatasetOp::MemoryDatasetBase : public DatasetBase { return input_->Cardinality(options); }; - absl::Status Get(OpKernelContext* ctx, int64 index, + absl::Status Get(OpKernelContext* ctx, int64_t index, std::vector* out_tensors) const override { mutex_lock l(mu_); @@ -815,7 +815,7 @@ class CacheDatasetOp::MemoryDatasetBase : public DatasetBase { return dataset_random_access_cache_->Get(ctx, index, out_tensors); } - absl::Status Get(AnyContext ctx, int64 index, + absl::Status Get(AnyContext ctx, int64_t index, std::vector* out_tensors) const override { mutex_lock l(mu_); if (!iterator_random_access_cache_) { @@ -1182,7 +1182,7 @@ void CacheDatasetOp::MakeDataset(OpKernelContext* ctx, DatasetBase* input, OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, kFileName, &filename)); if (filename.empty()) { static std::atomic resource_id_counter(0); - const string& container = ctx->resource_manager()->default_container(); + const std::string& container = ctx->resource_manager()->default_container(); auto name = strings::StrCat(ctx->op_kernel().name(), "/", kMemoryCache, "_", resource_id_counter.fetch_add(1)); if (op_version_ == 2) { diff --git a/tensorflow/core/kernels/data/cache_dataset_ops_test.cc b/tensorflow/core/kernels/data/cache_dataset_ops_test.cc index 2ccf09149c4c34..ec4067c15110c3 100644 --- a/tensorflow/core/kernels/data/cache_dataset_ops_test.cc +++ b/tensorflow/core/kernels/data/cache_dataset_ops_test.cc @@ -32,10 +32,10 @@ constexpr char kMemoryDatasetPrefix[] = "Memory"; class CacheDatasetParams : public DatasetParams { public: template - CacheDatasetParams(T input_dataset_params, string filename, + CacheDatasetParams(T input_dataset_params, std::string filename, DataTypeVector output_dtypes, std::vector output_shapes, - string node_name) + std::string node_name) : DatasetParams(std::move(output_dtypes), std::move(output_shapes), std::move(node_name)), filename_(filename) { @@ -51,7 +51,8 @@ class CacheDatasetParams : public DatasetParams { return {filename_tensor}; } - absl::Status GetInputNames(std::vector* input_names) const override { + absl::Status GetInputNames( + std::vector* input_names) const override { *input_names = {CacheDatasetOp::kInputDataset, CacheDatasetOp::kFileName}; return absl::OkStatus(); } @@ -63,12 +64,14 @@ class CacheDatasetParams : public DatasetParams { return absl::OkStatus(); } - string dataset_type() const override { return CacheDatasetOp::kDatasetType; } + std::string dataset_type() const override { + return CacheDatasetOp::kDatasetType; + } - string filename() const { return filename_; } + std::string filename() const { return filename_; } private: - string filename_; + std::string filename_; }; class CacheDatasetOpTest : public DatasetOpsTestBase { @@ -82,14 +85,14 @@ class CacheDatasetOpTest : public DatasetOpsTestBase { ~CacheDatasetOpTest() override { if (!cache_filename_.empty()) { - std::vector cache_files; + std::vector cache_files; absl::Status s = device_->env()->GetMatchingPaths( absl::StrCat(cache_filename_, "*"), &cache_files); if (!s.ok()) { LOG(WARNING) << "Failed to get matching files on " << cache_filename_ << "* : " << s; } - for (const string& path : cache_files) { + for (const std::string& path : cache_files) { s = device_->env()->DeleteFile(path); if (!s.ok()) { LOG(WARNING) << "Failed to delete " << path << " : " << s; diff --git a/tensorflow/core/kernels/data/cache_ops.cc b/tensorflow/core/kernels/data/cache_ops.cc index 0dce7f73215f92..0338ca1b3fcfc8 100644 --- a/tensorflow/core/kernels/data/cache_ops.cc +++ b/tensorflow/core/kernels/data/cache_ops.cc @@ -80,7 +80,7 @@ AnonymousMemoryCacheHandleOp::AnonymousMemoryCacheHandleOp( /* ref_counting */ true, /* return_deleter */ true) {} -string AnonymousMemoryCacheHandleOp::name() { return kMemoryCache; } +std::string AnonymousMemoryCacheHandleOp::name() { return kMemoryCache; } absl::Status AnonymousMemoryCacheHandleOp::CreateResource( OpKernelContext* ctx, std::unique_ptr flib_def, diff --git a/tensorflow/core/kernels/data/cache_ops.h b/tensorflow/core/kernels/data/cache_ops.h index e1e58ae9c1df89..f91f261ea79bec 100644 --- a/tensorflow/core/kernels/data/cache_ops.h +++ b/tensorflow/core/kernels/data/cache_ops.h @@ -62,7 +62,7 @@ class MemoryCacheManager : public ResourceBase { public: MemoryCacheManager() : cache_(std::make_shared()) {} - string DebugString() const override; + std::string DebugString() const override; std::shared_ptr get() { return cache_; } @@ -77,7 +77,7 @@ class AnonymousMemoryCacheHandleOp explicit AnonymousMemoryCacheHandleOp(OpKernelConstruction* ctx); private: - string name() override; + std::string name() override; absl::Status CreateResource( OpKernelContext* ctx, std::unique_ptr flib_def, std::unique_ptr pflr, diff --git a/tensorflow/core/kernels/data/concatenate_dataset_op.cc b/tensorflow/core/kernels/data/concatenate_dataset_op.cc index 6d4bfc88504a7e..d9fed39b07ba88 100644 --- a/tensorflow/core/kernels/data/concatenate_dataset_op.cc +++ b/tensorflow/core/kernels/data/concatenate_dataset_op.cc @@ -105,7 +105,7 @@ class ConcatenateDatasetOp::Dataset : public DatasetBase { } std::unique_ptr MakeIteratorInternal( - const string& prefix) const override { + const std::string& prefix) const override { return std::make_unique(Iterator::Params{ this, name_utils::IteratorPrefix(kDatasetType, prefix)}); } @@ -124,7 +124,7 @@ class ConcatenateDatasetOp::Dataset : public DatasetBase { return output_shapes_; } - string DebugString() const override { + std::string DebugString() const override { return name_utils::DatasetDebugString(kDatasetType); } @@ -155,7 +155,7 @@ class ConcatenateDatasetOp::Dataset : public DatasetBase { return to_concatenate_->CheckExternalState(); } - absl::Status Get(OpKernelContext* ctx, int64 index, + absl::Status Get(OpKernelContext* ctx, int64_t index, std::vector* out_tensors) const override { TF_RETURN_IF_ERROR(CheckRandomAccessCompatible(index)); if (index < input_cardinality_) { diff --git a/tensorflow/core/kernels/data/dataset_ops.cc b/tensorflow/core/kernels/data/dataset_ops.cc index b3c114ce833a08..cafd1d4880b379 100644 --- a/tensorflow/core/kernels/data/dataset_ops.cc +++ b/tensorflow/core/kernels/data/dataset_ops.cc @@ -120,7 +120,7 @@ void DatasetToGraphOp::Compute(OpKernelContext* ctx) { DatasetCardinalityOp::DatasetCardinalityOp(OpKernelConstruction* ctx) : OpKernel(ctx), cardinality_options_(new CardinalityOptions) { if (ctx->HasAttr(kCardinalityOptions)) { - string options_serialized; + std::string options_serialized; OP_REQUIRES_OK(ctx, ctx->GetAttr(kCardinalityOptions, &options_serialized)); if (!options_serialized.empty()) cardinality_options_->ParseFromString(options_serialized); @@ -141,7 +141,7 @@ void DatasetFromGraphOp::Compute(OpKernelContext* ctx) { GraphDef graph_def; OP_REQUIRES(ctx, graph_def.ParseFromString(graph_def_string), errors::InvalidArgument("Could not parse GraphDef")); - string output_node; + std::string output_node; for (const auto& node : graph_def.node()) { if (node.op() == FunctionLibraryDefinition::kRetOp) { output_node = node.input(0); diff --git a/tensorflow/core/kernels/data/experimental/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/experimental/parallel_interleave_dataset_op.cc index 88e7f1528d4c83..3163f4e62c320a 100644 --- a/tensorflow/core/kernels/data/experimental/parallel_interleave_dataset_op.cc +++ b/tensorflow/core/kernels/data/experimental/parallel_interleave_dataset_op.cc @@ -112,9 +112,9 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { output_shapes_(output_shapes), traceme_metadata_( {{"block_length", - strings::Printf("%lld", static_cast(block_length))}, + absl::StrFormat("%lld", static_cast(block_length))}, {"cycle_length", - strings::Printf("%lld", static_cast(cycle_length))}, + absl::StrFormat("%lld", static_cast(cycle_length))}, {"deterministic", deterministic.IsDeterministic() || deterministic.IsDefault() ? "true" @@ -126,7 +126,7 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { ~Dataset() override { input_->Unref(); } std::unique_ptr MakeIteratorInternal( - const string& prefix) const override { + const std::string& prefix) const override { name_utils::IteratorPrefixParams params; params.op_version = op_version_; bool deterministic = @@ -143,7 +143,7 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { return output_shapes_; } - string DebugString() const override { + std::string DebugString() const override { name_utils::DatasetDebugStringParams params; params.op_version = op_version_; return name_utils::DatasetDebugString(kDatasetType, params); @@ -949,7 +949,7 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { absl::Status WriteWorkerStateLocked(IteratorStateWriter* writer, int index) TF_EXCLUSIVE_LOCKS_REQUIRED(mu_, ckpt_mu_) { - string iterator_name = + std::string iterator_name = strings::StrCat(prefix(), "::", kWorker, "_", index); TF_RETURN_IF_ERROR(writer->WriteScalar(iterator_name, kInputSize, workers_[index].input.size())); @@ -975,7 +975,7 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { absl::Status ReadWorkerStateLocked(IteratorContext* ctx, IteratorStateReader* reader, int index) TF_EXCLUSIVE_LOCKS_REQUIRED(mu_, ckpt_mu_) { - string worker_prefix = + std::string worker_prefix = strings::StrCat(prefix(), "::", kWorker, "_", index); // Restore inputs. int64_t input_size; @@ -1009,7 +1009,7 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { IteratorStateWriter* writer, int index) TF_EXCLUSIVE_LOCKS_REQUIRED(mu_, ckpt_mu_) { - string iterator_name = + std::string iterator_name = strings::StrCat(prefix(), "::", kWorkerThread, "_", index); if (worker_thread_states_[index].iterator != nullptr) { TF_RETURN_IF_ERROR( @@ -1043,7 +1043,7 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { IteratorStateReader* reader, int index, WorkerThreadState* state) { - string worker_prefix = + std::string worker_prefix = strings::StrCat(prefix(), "::", kWorkerThread, "_", index); // Restore inputs. int64_t input_size; @@ -1083,8 +1083,8 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { absl::Status WriteOutputElemLocked(IteratorStateWriter* writer, const OutputElem& output_elem, - const string& iterator_name, - const string& prefix) + const std::string& iterator_name, + const std::string& prefix) TF_EXCLUSIVE_LOCKS_REQUIRED(mu_, ckpt_mu_) { TF_RETURN_IF_ERROR(WriteStatusLocked(writer, iterator_name, absl::StrCat(prefix, "_", kStatus), @@ -1103,8 +1103,8 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { absl::Status ReadOutputElemLocked(IteratorContext* ctx, IteratorStateReader* reader, OutputElem* output_elem, - const string& iterator_name, - const string& prefix) { + const std::string& iterator_name, + const std::string& prefix) { TF_RETURN_IF_ERROR(ReadStatusLocked(reader, iterator_name, absl::StrCat(prefix, "_", kStatus), &output_elem->status)); @@ -1123,8 +1123,8 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { } absl::Status WriteStatusLocked(IteratorStateWriter* writer, - const string& iterator_name, - const string& prefix, + const std::string& iterator_name, + const std::string& prefix, const absl::Status& status) TF_EXCLUSIVE_LOCKS_REQUIRED(mu_, ckpt_mu_) { TF_RETURN_IF_ERROR( @@ -1139,8 +1139,9 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { } absl::Status ReadStatusLocked(IteratorStateReader* reader, - const string& iterator_name, - const string& prefix, absl::Status* status) { + const std::string& iterator_name, + const std::string& prefix, + absl::Status* status) { int64_t code_int; TF_RETURN_IF_ERROR(reader->ReadScalar( iterator_name, absl::StrCat(prefix, "_", kCode), &code_int)); diff --git a/tensorflow/core/kernels/data/experimental/random_dataset_op_test.cc b/tensorflow/core/kernels/data/experimental/random_dataset_op_test.cc index a3e38ce4aeab90..f5d94b30bbd7ba 100644 --- a/tensorflow/core/kernels/data/experimental/random_dataset_op_test.cc +++ b/tensorflow/core/kernels/data/experimental/random_dataset_op_test.cc @@ -80,7 +80,7 @@ class RandomDatasetParams : public DatasetParams { bool rerandomize_each_iteration, DataTypeVector output_dtypes, std::vector output_shapes, - string node_name) + std::string node_name) : DatasetParams(std::move(output_dtypes), std::move(output_shapes), std::move(node_name)), seed_(CreateTensor(TensorShape({}), {seed})), @@ -98,7 +98,8 @@ class RandomDatasetParams : public DatasetParams { return {seed_, seed2_, seed_generator_resource_}; } - absl::Status GetInputNames(std::vector* input_names) const override { + absl::Status GetInputNames( + std::vector* input_names) const override { *input_names = {RandomDatasetOp::kSeed, RandomDatasetOp::kSeed2}; if (op_version_ == 2) { input_names->emplace_back("seed_generator"); @@ -117,7 +118,9 @@ class RandomDatasetParams : public DatasetParams { return absl::OkStatus(); } - string dataset_type() const override { return RandomDatasetOp::kDatasetType; } + std::string dataset_type() const override { + return RandomDatasetOp::kDatasetType; + } private: Tensor seed_; diff --git a/tensorflow/core/kernels/data/experimental/save_dataset_op_test.cc b/tensorflow/core/kernels/data/experimental/save_dataset_op_test.cc index fe2315e35bd6a4..01f96cb04ed82e 100644 --- a/tensorflow/core/kernels/data/experimental/save_dataset_op_test.cc +++ b/tensorflow/core/kernels/data/experimental/save_dataset_op_test.cc @@ -38,7 +38,7 @@ class SaveDatasetV2Params : public DatasetParams { std::vector func_lib, bool use_shard_func, DataTypeVector output_dtypes, std::vector output_shapes, - string node_name, DataTypeVector type_arguments) + std::string node_name, DataTypeVector type_arguments) : DatasetParams(std::move(output_dtypes), std::move(output_shapes), std::move(node_name)), path_(path), @@ -59,7 +59,8 @@ class SaveDatasetV2Params : public DatasetParams { return input_tensors; } - absl::Status GetInputNames(std::vector* input_names) const override { + absl::Status GetInputNames( + std::vector* input_names) const override { input_names->clear(); input_names->emplace_back(SaveDatasetV2Op::kInputDataset); input_names->emplace_back(SaveDatasetV2Op::kPath); @@ -78,11 +79,13 @@ class SaveDatasetV2Params : public DatasetParams { return absl::OkStatus(); } - string path() const { return path_; } + std::string path() const { return path_; } - string dataset_type() const override { return SaveDatasetV2Op::kDatasetType; } + std::string dataset_type() const override { + return SaveDatasetV2Op::kDatasetType; + } - string op_name() const override { return "SaveDatasetV2"; } + std::string op_name() const override { return "SaveDatasetV2"; } std::vector func_lib() const override { return func_lib_; } diff --git a/tensorflow/core/kernels/data/experimental/sleep_dataset_op.cc b/tensorflow/core/kernels/data/experimental/sleep_dataset_op.cc index b765f96d60e71c..ff15bd00f4e1c6 100644 --- a/tensorflow/core/kernels/data/experimental/sleep_dataset_op.cc +++ b/tensorflow/core/kernels/data/experimental/sleep_dataset_op.cc @@ -60,7 +60,7 @@ class SleepDatasetOp : public UnaryDatasetOpKernel { ~Dataset() override { input_->Unref(); } std::unique_ptr MakeIteratorInternal( - const string& prefix) const override { + const std::string& prefix) const override { return std::make_unique( Iterator::Params{this, absl::StrCat(prefix, "::Sleep")}); } @@ -72,7 +72,9 @@ class SleepDatasetOp : public UnaryDatasetOpKernel { return input_->output_shapes(); } - string DebugString() const override { return "SleepDatasetOp::Dataset"; } + std::string DebugString() const override { + return "SleepDatasetOp::Dataset"; + } int64_t CardinalityInternal(CardinalityOptions options) const override { return input_->Cardinality(options); diff --git a/tensorflow/core/kernels/data/experimental/sql_dataset_op.cc b/tensorflow/core/kernels/data/experimental/sql_dataset_op.cc index cab138c9903c42..3ab56ba9af36bd 100644 --- a/tensorflow/core/kernels/data/experimental/sql_dataset_op.cc +++ b/tensorflow/core/kernels/data/experimental/sql_dataset_op.cc @@ -72,7 +72,7 @@ class SqlDatasetOp : public DatasetOpKernel { // TODO(b/64276826) Change this check when we add support for other // databases. OP_REQUIRES(ctx, driver_name == "sqlite", - errors::InvalidArgument(tensorflow::strings::Printf( + errors::InvalidArgument(absl::StrFormat( "The database type, %s, is not supported by SqlDataset. " "The set of supported databases is: {'sqlite'}.", driver_name.c_str()))); diff --git a/tensorflow/core/kernels/data/finalize_dataset_op_test.cc b/tensorflow/core/kernels/data/finalize_dataset_op_test.cc index 2077cc28c161ec..c076e2dcc4dc77 100644 --- a/tensorflow/core/kernels/data/finalize_dataset_op_test.cc +++ b/tensorflow/core/kernels/data/finalize_dataset_op_test.cc @@ -31,7 +31,7 @@ class FinalizeDatasetParams : public DatasetParams { template FinalizeDatasetParams(T input_dataset_params, DataTypeVector output_dtypes, std::vector output_shapes, - string node_name) + std::string node_name) : DatasetParams(std::move(output_dtypes), std::move(output_shapes), std::move(node_name)), has_captured_ref_(false) { @@ -40,7 +40,8 @@ class FinalizeDatasetParams : public DatasetParams { std::vector GetInputTensors() const override { return {}; } - absl::Status GetInputNames(std::vector* input_names) const override { + absl::Status GetInputNames( + std::vector* input_names) const override { input_names->emplace_back(FinalizeDatasetOp::kInputDataset); return absl::OkStatus(); } @@ -52,7 +53,7 @@ class FinalizeDatasetParams : public DatasetParams { return absl::OkStatus(); } - string dataset_type() const override { return "Finalize"; } + std::string dataset_type() const override { return "Finalize"; } private: bool has_captured_ref_; diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc index d10513763fa726..a4a3bb4c77afeb 100644 --- a/tensorflow/core/kernels/data/iterator_ops.cc +++ b/tensorflow/core/kernels/data/iterator_ops.cc @@ -583,7 +583,7 @@ AnonymousIteratorHandleOp::AnonymousIteratorHandleOp( OP_REQUIRES_OK(context, context->GetAttr(kOutputShapes, &output_shapes_)); } -string AnonymousIteratorHandleOp::name() { return kAnonymousIterator; } +std::string AnonymousIteratorHandleOp::name() { return kAnonymousIterator; } absl::Status AnonymousIteratorHandleOp::CreateResource( OpKernelContext* ctx, std::unique_ptr flib_def, @@ -725,7 +725,7 @@ class OneShotIteratorOp : public AsyncOpKernel { graph_def_version_(ctx->graph_def_version()) { - string shared_name; + std::string shared_name; OP_REQUIRES_OK(ctx, ctx->GetAttr("shared_name", &shared_name)); OP_REQUIRES(ctx, shared_name.empty(), errors::InvalidArgument("OneShotIteratorOp does not currently " @@ -837,9 +837,10 @@ class OneShotIteratorOp : public AsyncOpKernel { &f_handle)); FunctionLibraryRuntime::Options opts; opts.cancellation_manager = ctx->cancellation_manager(); - ScopedStepContainer step_container(opts.step_id, [ctx](const string& name) { - ctx->resource_manager()->Cleanup(name).IgnoreError(); - }); + ScopedStepContainer step_container( + opts.step_id, [ctx](const std::string& name) { + ctx->resource_manager()->Cleanup(name).IgnoreError(); + }); opts.step_container = &step_container; opts.runner = ctx->runner(); opts.run_all_kernels_inline = ctx->run_all_kernels_inline(); diff --git a/tensorflow/core/kernels/data/zip_dataset_op.cc b/tensorflow/core/kernels/data/zip_dataset_op.cc index b381f28def6ea4..e5ef9d1451cd69 100644 --- a/tensorflow/core/kernels/data/zip_dataset_op.cc +++ b/tensorflow/core/kernels/data/zip_dataset_op.cc @@ -84,7 +84,7 @@ class ZipDatasetOp::Dataset : public DatasetBase { } std::unique_ptr MakeIteratorInternal( - const string& prefix) const override { + const std::string& prefix) const override { return std::make_unique(Iterator::Params{ this, name_utils::IteratorPrefix(kDatasetType, prefix)}); } @@ -103,7 +103,7 @@ class ZipDatasetOp::Dataset : public DatasetBase { return output_shapes_; } - string DebugString() const override { + std::string DebugString() const override { return name_utils::DatasetDebugString(kDatasetType); } @@ -137,7 +137,7 @@ class ZipDatasetOp::Dataset : public DatasetBase { return absl::OkStatus(); } - absl::Status Get(OpKernelContext* ctx, int64 index, + absl::Status Get(OpKernelContext* ctx, int64_t index, std::vector* out_tensors) const override { TF_RETURN_IF_ERROR(CheckRandomAccessCompatible(index)); out_tensors->reserve(output_dtypes().size()); diff --git a/tensorflow/core/kernels/data/zip_dataset_op_test.cc b/tensorflow/core/kernels/data/zip_dataset_op_test.cc index ce62706e224c2f..bfde48fb9509d8 100644 --- a/tensorflow/core/kernels/data/zip_dataset_op_test.cc +++ b/tensorflow/core/kernels/data/zip_dataset_op_test.cc @@ -42,7 +42,7 @@ class ZipDatasetParams : public DatasetParams { ZipDatasetParams(std::vector input_dataset_params, DataTypeVector output_dtypes, std::vector output_shapes, - int num_input_datasets, string node_name) + int num_input_datasets, std::string node_name) : DatasetParams(std::move(output_dtypes), std::move(output_shapes), std::move(node_name)), num_input_datasets_(num_input_datasets) { @@ -57,7 +57,8 @@ class ZipDatasetParams : public DatasetParams { std::vector GetInputTensors() const override { return {}; } - absl::Status GetInputNames(std::vector* input_names) const override { + absl::Status GetInputNames( + std::vector* input_names) const override { input_names->clear(); for (int i = 0; i < num_input_datasets_; ++i) { input_names->emplace_back( @@ -75,10 +76,12 @@ class ZipDatasetParams : public DatasetParams { return absl::OkStatus(); } - string dataset_type() const override { return ZipDatasetOp::kDatasetType; } + std::string dataset_type() const override { + return ZipDatasetOp::kDatasetType; + } private: - int32 num_input_datasets_; + int32_t num_input_datasets_; }; class ZipDatasetOpTest : public DatasetOpsTestBase {}; diff --git a/tensorflow/core/kernels/depthwise_conv_grad_op.cc b/tensorflow/core/kernels/depthwise_conv_grad_op.cc index 78ca7948e55c0f..db7cf3f31f7849 100644 --- a/tensorflow/core/kernels/depthwise_conv_grad_op.cc +++ b/tensorflow/core/kernels/depthwise_conv_grad_op.cc @@ -560,7 +560,7 @@ class DepthwiseConv2dNativeBackpropInputOp : public OpKernel { errors::InvalidArgument("Sliding window strides field must " "specify 4 dimensions")); - string data_format; + std::string data_format; OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); OP_REQUIRES(context, FormatFromString(data_format, &data_format_), errors::InvalidArgument("Invalid data format")); @@ -619,7 +619,7 @@ class DepthwiseConv2dNativeBackpropInputOp : public OpKernel { "Conv2DBackpropInput: input_sizes input must be 1-dim, not ", input_sizes.dims())); TensorShape input_shape; - const int32* in_sizes_data = input_sizes.template flat().data(); + const int32_t* in_sizes_data = input_sizes.template flat().data(); for (int i = 0; i < input_sizes.NumElements(); ++i) { OP_REQUIRES(context, in_sizes_data[i] >= 0, @@ -695,7 +695,7 @@ class DepthwiseConv2dNativeBackpropInputOp : public OpKernel { bool use_cudnn_grouped_conv_; private: - std::vector strides_; + std::vector strides_; Padding padding_; std::vector explicit_paddings_; TensorFormat data_format_; @@ -1071,7 +1071,7 @@ class DepthwiseConv2dNativeBackpropFilterOp : public OpKernel { errors::InvalidArgument("Sliding window strides field must " "specify 4 dimensions")); - string data_format; + std::string data_format; OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); OP_REQUIRES(context, FormatFromString(data_format, &data_format_), errors::InvalidArgument("Invalid data format")); @@ -1129,7 +1129,8 @@ class DepthwiseConv2dNativeBackpropFilterOp : public OpKernel { "Conv2DBackpropFilter: filter_sizes input must be 1-dim, not ", filter_sizes.dims())); TensorShape filter_shape; - const int32* filter_sizes_data = filter_sizes.template flat().data(); + const int32_t* filter_sizes_data = + filter_sizes.template flat().data(); for (int i = 0; i < filter_sizes.NumElements(); ++i) { OP_REQUIRES(context, filter_sizes_data[i] >= 0, errors::InvalidArgument("Dimension ", i, @@ -1249,7 +1250,7 @@ class DepthwiseConv2dNativeBackpropFilterOp : public OpKernel { bool use_cudnn_grouped_conv_; private: - std::vector strides_; + std::vector strides_; Padding padding_; std::vector explicit_paddings_; TensorFormat data_format_; diff --git a/tensorflow/core/kernels/fft_ops.cc b/tensorflow/core/kernels/fft_ops.cc index bcc81f903b84f6..5743b6d6cc8cc7 100644 --- a/tensorflow/core/kernels/fft_ops.cc +++ b/tensorflow/core/kernels/fft_ops.cc @@ -599,11 +599,11 @@ class CufftScratchAllocator : public se::ScratchAllocator { CufftScratchAllocator(int64_t memory_limit, OpKernelContext* context) : memory_limit_(memory_limit), total_byte_size_(0), context_(context) {} int64_t GetMemoryLimitInBytes() override { return memory_limit_; } - tsl::StatusOr> AllocateBytes( + absl::StatusOr> AllocateBytes( int64_t byte_size) override { Tensor temporary_memory; if (byte_size > memory_limit_) { - return tsl::StatusOr>(); + return absl::StatusOr>(); } AllocationAttributes allocation_attr; allocation_attr.retry_on_failure = false; @@ -611,13 +611,13 @@ class CufftScratchAllocator : public se::ScratchAllocator { DT_UINT8, TensorShape({byte_size}), &temporary_memory, AllocatorAttributes(), allocation_attr)); if (!allocation_status.ok()) { - return tsl::StatusOr>(); + return absl::StatusOr>(); } // Hold the reference of the allocated tensors until the end of the // allocator. allocated_tensors_.push_back(temporary_memory); total_byte_size_ += byte_size; - return tsl::StatusOr>( + return absl::StatusOr>( AsDeviceMemory(temporary_memory.flat().data(), temporary_memory.flat().size())); } diff --git a/tensorflow/core/kernels/fused_batch_norm_op.cc b/tensorflow/core/kernels/fused_batch_norm_op.cc index 3d510e4b50dadd..015b4ac8fafa7a 100644 --- a/tensorflow/core/kernels/fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/fused_batch_norm_op.cc @@ -717,7 +717,8 @@ class CudnnBatchNormAllocatorInTemp : public ScratchAllocator { return std::numeric_limits::max(); } - StatusOr> AllocateBytes(int64_t byte_size) override { + absl::StatusOr> AllocateBytes( + int64_t byte_size) override { Tensor temporary_memory; const DataType tf_data_type = DataTypeToEnum::v(); int64_t allocate_count = @@ -770,7 +771,8 @@ class CudnnBatchNormAllocatorInOutput : public ScratchAllocator { return std::numeric_limits::max(); } - StatusOr> AllocateBytes(int64_t byte_size) override { + absl::StatusOr> AllocateBytes( + int64_t byte_size) override { output_allocated = true; DCHECK(total_byte_size_ == 0) << "Reserve space allocator can only be called once"; @@ -787,7 +789,7 @@ class CudnnBatchNormAllocatorInOutput : public ScratchAllocator { auto memory_uint8 = DeviceMemory::MakeFromByteSize( temporary_memory->template flat().data(), temporary_memory->template flat().size() * sizeof(T)); - return StatusOr>(memory_uint8); + return absl::StatusOr>(memory_uint8); } int64_t TotalByteSize() { return total_byte_size_; } diff --git a/tensorflow/core/kernels/fuzzing/example_proto_fast_parsing_fuzz.cc b/tensorflow/core/kernels/fuzzing/example_proto_fast_parsing_fuzz.cc index 73fa3c4b74e296..794f51cd1cb394 100644 --- a/tensorflow/core/kernels/fuzzing/example_proto_fast_parsing_fuzz.cc +++ b/tensorflow/core/kernels/fuzzing/example_proto_fast_parsing_fuzz.cc @@ -62,7 +62,7 @@ class FuzzExampleProtoFastParsing : public FuzzSession { // TODO(dga): Test the batch case also. Tensor input_tensor(tensorflow::DT_STRING, TensorShape({})); input_tensor.scalar()() = - string(reinterpret_cast(data), size); + std::string(reinterpret_cast(data), size); RunInputs({{"input", input_tensor}}); } }; diff --git a/tensorflow/core/kernels/fuzzing/fuzz_session.h b/tensorflow/core/kernels/fuzzing/fuzz_session.h index 09c7563d2efd17..d178208a1a35e0 100644 --- a/tensorflow/core/kernels/fuzzing/fuzz_session.h +++ b/tensorflow/core/kernels/fuzzing/fuzz_session.h @@ -81,7 +81,7 @@ class FuzzSession { // Initializes the FuzzSession. Not safe for multithreading. // Separate init function because the call to virtual BuildGraphDef // can't be put into the constructor. - Status InitIfNeeded() { + absl::Status InitIfNeeded() { if (initialized_) { return absl::OkStatus(); } @@ -96,7 +96,7 @@ class FuzzSession { GraphDef graph_def; TF_CHECK_OK(root.ToGraphDef(&graph_def)); - Status status = session_->Create(graph_def); + absl::Status status = session_->Create(graph_def); if (!status.ok()) { // This is FATAL, because this code is designed to fuzz an op // within a session. Failure to create the session means we @@ -111,20 +111,20 @@ class FuzzSession { // any returned output. // Note: We are ignoring Status from Run here since fuzzers don't need to // check it (as that will slow them down and printing/logging is useless). - void RunInputs(const std::vector >& inputs) { + void RunInputs(const std::vector >& inputs) { RunInputsWithStatus(inputs).IgnoreError(); } // Same as RunInputs but don't ignore status - Status RunInputsWithStatus( - const std::vector >& inputs) { + absl::Status RunInputsWithStatus( + const std::vector >& inputs) { return session_->Run(inputs, {}, {"output"}, nullptr); } // Dispatches to FuzzImpl; small amount of sugar to keep the code // of the per-op fuzzers tiny. int Fuzz(const uint8_t* data, size_t size) { - Status status = InitIfNeeded(); + absl::Status status = InitIfNeeded(); TF_CHECK_OK(status) << "Fuzzer graph initialization failed: " << status.message(); // No return value from fuzzing: Success is defined as "did not @@ -146,7 +146,7 @@ class FuzzStringInputOp : public FuzzSession { void FuzzImpl(const uint8_t* data, size_t size) final { Tensor input_tensor(tensorflow::DT_STRING, TensorShape({})); input_tensor.scalar()() = - string(reinterpret_cast(data), size); + std::string(reinterpret_cast(data), size); RunInputs({{"input", input_tensor}}); } }; diff --git a/tensorflow/core/kernels/fuzzing/one_hot_fuzz.cc b/tensorflow/core/kernels/fuzzing/one_hot_fuzz.cc index 08af574ac9ae4e..458329000ca349 100644 --- a/tensorflow/core/kernels/fuzzing/one_hot_fuzz.cc +++ b/tensorflow/core/kernels/fuzzing/one_hot_fuzz.cc @@ -42,7 +42,7 @@ class FuzzOneHot : public FuzzSession { void FuzzImpl(const uint8_t* data, size_t size) override { int64_t input_size; int32_t depth; - uint8 on, off; + uint8_t on, off; const uint8_t* input_data; if (size > 3) { @@ -51,7 +51,7 @@ class FuzzOneHot : public FuzzSession { if (size > kMaxSize) { size = kMaxSize; } - depth = static_cast(data[0]); + depth = static_cast(data[0]); on = data[1]; off = data[2]; input_size = static_cast(size - 3); @@ -69,13 +69,13 @@ class FuzzOneHot : public FuzzSession { Tensor on_tensor(tensorflow::DT_UINT8, TensorShape({})); Tensor off_tensor(tensorflow::DT_UINT8, TensorShape({})); - auto flat_tensor = input_tensor.flat(); + auto flat_tensor = input_tensor.flat(); for (size_t i = 0; i < input_size; i++) { flat_tensor(i) = input_data[i]; } - depth_tensor.scalar()() = depth; - on_tensor.scalar()() = on; - off_tensor.scalar()() = off; + depth_tensor.scalar()() = depth; + on_tensor.scalar()() = on; + off_tensor.scalar()() = off; RunInputs({{"input", input_tensor}, {"depth", depth_tensor}, diff --git a/tensorflow/core/kernels/fuzzing/parse_tensor_op_fuzz.cc b/tensorflow/core/kernels/fuzzing/parse_tensor_op_fuzz.cc index de3ae36dc75d56..a8cc47e599ee43 100644 --- a/tensorflow/core/kernels/fuzzing/parse_tensor_op_fuzz.cc +++ b/tensorflow/core/kernels/fuzzing/parse_tensor_op_fuzz.cc @@ -53,7 +53,8 @@ class FuzzParseTensor : public FuzzSession { // detects another similar OOM. // After adding `-fsanitize=null` to ASAN (cl/317376103), the memory // footprint increased, so we lower the maximum threshold to 2^18. - string as_string = string(reinterpret_cast(data), size); + std::string as_string = + std::string(reinterpret_cast(data), size); TensorProto proto; if (!ParseProtoUnlimited(&proto, as_string)) { LOG(WARNING) << "Unable to parse proto of tensor\n"; diff --git a/tensorflow/core/kernels/fuzzing/scatter_nd_fuzz.cc b/tensorflow/core/kernels/fuzzing/scatter_nd_fuzz.cc index 5104711ad3048f..81f489b2080d80 100644 --- a/tensorflow/core/kernels/fuzzing/scatter_nd_fuzz.cc +++ b/tensorflow/core/kernels/fuzzing/scatter_nd_fuzz.cc @@ -67,7 +67,7 @@ class FuzzScatterNd : public FuzzSession { // Subsequent elements give the contents of the shape tensor. // To not get out of memory, reduce all dimensions to at most kMaxDim - auto flat_shape = shape_tensor.flat(); + auto flat_shape = shape_tensor.flat(); for (i = 0; i < shape_dims; i++) { flat_shape(i) = data[data_ix++] % kMaxDim; } @@ -94,7 +94,7 @@ class FuzzScatterNd : public FuzzSession { Tensor indices_tensor(tensorflow::DT_INT32, TensorShape(indices_dims)); // Rest of the buffer is used to fill in the indices_tensor - auto flat_indices = indices_tensor.flat(); + auto flat_indices = indices_tensor.flat(); for (i = 0; i < num_indices && data_ix < size; i++) { flat_indices(i) = data[data_ix++]; } @@ -118,7 +118,7 @@ class FuzzScatterNd : public FuzzSession { Tensor updates_tensor(tensorflow::DT_INT32, TensorShape(updates_dims)); // We don't care about the values in the updates_tensor, make them all be 1 - auto flat_updates = updates_tensor.flat(); + auto flat_updates = updates_tensor.flat(); for (i = 0; i < num_indices; i++) { flat_updates(i) = 1; } diff --git a/tensorflow/core/kernels/image/non_max_suppression_op_gpu_test.cc b/tensorflow/core/kernels/image/non_max_suppression_op_gpu_test.cc index bcdc406d85201a..151a956ca22fd4 100644 --- a/tensorflow/core/kernels/image/non_max_suppression_op_gpu_test.cc +++ b/tensorflow/core/kernels/image/non_max_suppression_op_gpu_test.cc @@ -195,8 +195,7 @@ TEST_F(NonMaxSuppressionV2GPUOpTest, TestInconsistentBoxAndScoreShapes) { Status s = RunOpKernel(); ASSERT_FALSE(s.ok()); - EXPECT_TRUE( - str_util::StrContains(s.ToString(), "scores has incompatible shape")) + EXPECT_TRUE(absl::StrContains(s.ToString(), "scores has incompatible shape")) << s; } @@ -210,7 +209,7 @@ TEST_F(NonMaxSuppressionV2GPUOpTest, TestInvalidIOUThreshold) { ASSERT_FALSE(s.ok()); EXPECT_TRUE( - str_util::StrContains(s.ToString(), "iou_threshold must be in [0, 1]")) + absl::StrContains(s.ToString(), "iou_threshold must be in [0, 1]")) << s; } diff --git a/tensorflow/core/kernels/immutable_constant_op.cc b/tensorflow/core/kernels/immutable_constant_op.cc index be0194413a3b81..4fbd1edfba920a 100644 --- a/tensorflow/core/kernels/immutable_constant_op.cc +++ b/tensorflow/core/kernels/immutable_constant_op.cc @@ -26,7 +26,7 @@ class MemmappedTensorAllocator : public Allocator { public: MemmappedTensorAllocator() {} - absl::Status InitializeFromRegion(const string& name, Env* env) { + absl::Status InitializeFromRegion(const std::string& name, Env* env) { const auto status = env->NewReadOnlyMemoryRegionFromFile(name, &memory_region_); if (!status.ok()) { @@ -34,7 +34,7 @@ class MemmappedTensorAllocator : public Allocator { } return absl::OkStatus(); } - string Name() override { return "MemmappedTensorAllocator"; } + std::string Name() override { return "MemmappedTensorAllocator"; } void* AllocateRaw(size_t alignment, size_t num_bytes) override { if ((reinterpret_cast(memory_region_->data())) % alignment != 0) { diff --git a/tensorflow/core/kernels/immutable_constant_op.h b/tensorflow/core/kernels/immutable_constant_op.h index 264abc8401b3b4..cd645686bddcfa 100644 --- a/tensorflow/core/kernels/immutable_constant_op.h +++ b/tensorflow/core/kernels/immutable_constant_op.h @@ -38,7 +38,7 @@ class ImmutableConstantOp : public OpKernel { static constexpr char const* kMemoryRegionNameAttr = "memory_region_name"; private: - string region_name_; + std::string region_name_; DataType dtype_; TensorShape shape_; ImmutableConstantOp(const ImmutableConstantOp&) = delete; diff --git a/tensorflow/core/kernels/immutable_constant_op_test.cc b/tensorflow/core/kernels/immutable_constant_op_test.cc index 1cfed79bf3318e..955d3f8751c12a 100644 --- a/tensorflow/core/kernels/immutable_constant_op_test.cc +++ b/tensorflow/core/kernels/immutable_constant_op_test.cc @@ -40,7 +40,7 @@ constexpr size_t kTestTensorSizeBytes = kTestTensorSize * sizeof(float); class TestReadOnlyMemoryRegion : public ReadOnlyMemoryRegion { public: TestReadOnlyMemoryRegion() = delete; - explicit TestReadOnlyMemoryRegion(uint64 length) + explicit TestReadOnlyMemoryRegion(uint64_t length) : memptr_(cpu_allocator()->AllocateRaw(kTestAlignment, length)), length_(length) {} ~TestReadOnlyMemoryRegion() override { @@ -48,11 +48,11 @@ class TestReadOnlyMemoryRegion : public ReadOnlyMemoryRegion { } const void* data() override { return memptr_; } float* GetWritableDataStart() { return reinterpret_cast(memptr_); } - uint64 length() override { return length_; } + uint64_t length() override { return length_; } protected: void* memptr_; - uint64 length_; + uint64_t length_; }; // A mock file system and environment class that creates ReadOnlyMemoryRegion @@ -65,7 +65,7 @@ class TestFileSystem : public NullFileSystem { using NullFileSystem::NewReadOnlyMemoryRegionFromFile; absl::Status NewReadOnlyMemoryRegionFromFile( - const string& fname, TransactionToken* token, + const std::string& fname, TransactionToken* token, std::unique_ptr* result) override { float val = 0; absl::string_view scheme, host, path; @@ -146,13 +146,13 @@ TEST(ImmutableConstantOpTest, ExecutionError) { error::INTERNAL); } -absl::Status CreateTempFileFloat(Env* env, float value, uint64 size, - string* filename) { - const string dir = testing::TmpDir(); +absl::Status CreateTempFileFloat(Env* env, float value, uint64_t size, + std::string* filename) { + const std::string dir = testing::TmpDir(); *filename = io::JoinPath(dir, absl::StrCat("file_", value)); std::unique_ptr file; TF_RETURN_IF_ERROR(env->NewWritableFile(*filename, &file)); - for (uint64 i = 0; i < size; ++i) { + for (uint64_t i = 0; i < size; ++i) { absl::string_view sp(static_cast(static_cast(&value)), sizeof(value)); TF_RETURN_IF_ERROR(file->Append(sp)); @@ -166,7 +166,7 @@ TEST(ImmutableConstantOpTest, FromFile) { Env* env = Env::Default(); auto root = Scope::NewRootScope().ExitOnError(); - string two_file, three_file; + std::string two_file, three_file; TF_ASSERT_OK(CreateTempFileFloat(env, 2.0f, 1000, &two_file)); TF_ASSERT_OK(CreateTempFileFloat(env, 3.0f, 1000, &three_file)); auto node1 = ops::ImmutableConst(root, DT_FLOAT, kFileTensorShape, two_file); @@ -191,9 +191,10 @@ TEST(ImmutableConstantOpTest, FromFile) { EXPECT_EQ(outputs.front().flat()(2), 2.0f * 3.0f); } -absl::Status CreateTempFileBadString(Env* env, char value, uint64 size, - const string suffix, string* filename) { - const string dir = testing::TmpDir(); +absl::Status CreateTempFileBadString(Env* env, char value, uint64_t size, + const std::string suffix, + std::string* filename) { + const std::string dir = testing::TmpDir(); *filename = io::JoinPath(dir, absl::StrCat("file_", suffix)); std::unique_ptr file; TF_RETURN_IF_ERROR(env->NewWritableFile(*filename, &file)); @@ -207,7 +208,7 @@ TEST(ImmutableConstantOpTest, FromFileStringUnimplmented) { Env* env = Env::Default(); auto root = Scope::NewRootScope().ExitOnError(); - string bad_file; + std::string bad_file; TF_ASSERT_OK(CreateTempFileBadString(env, '\xe2', 128, "bad_e2", &bad_file)); auto result = ops::ImmutableConst(root, DT_STRING, kFileTensorShape, bad_file); diff --git a/tensorflow/core/kernels/in_topk_op.cc b/tensorflow/core/kernels/in_topk_op.cc index 169d331ad24487..20e12a56e8778f 100644 --- a/tensorflow/core/kernels/in_topk_op.cc +++ b/tensorflow/core/kernels/in_topk_op.cc @@ -89,15 +89,15 @@ REGISTER_KERNEL_BUILDER(Name("InTopK") .HostMemory("predictions") .HostMemory("targets") .HostMemory("precision") - .TypeConstraint("T"), - InTopK); + .TypeConstraint("T"), + InTopK); REGISTER_KERNEL_BUILDER(Name("InTopK") .Device(DEVICE_CPU) .HostMemory("predictions") .HostMemory("targets") .HostMemory("precision") .TypeConstraint("T"), - InTopK); + InTopK); REGISTER_KERNEL_BUILDER(Name("InTopKV2") .Device(DEVICE_CPU) @@ -105,8 +105,8 @@ REGISTER_KERNEL_BUILDER(Name("InTopKV2") .HostMemory("targets") .HostMemory("k") .HostMemory("precision") - .TypeConstraint("T"), - InTopK); + .TypeConstraint("T"), + InTopK); REGISTER_KERNEL_BUILDER(Name("InTopKV2") .Device(DEVICE_CPU) .HostMemory("predictions") @@ -114,7 +114,7 @@ REGISTER_KERNEL_BUILDER(Name("InTopKV2") .HostMemory("k") .HostMemory("precision") .TypeConstraint("T"), - InTopK); + InTopK); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -129,18 +129,18 @@ namespace functor { typename TTypes::Vec output); \ extern template struct InTopKFunctor; -DECLARE_GPU_SPEC(float, int32); +DECLARE_GPU_SPEC(float, int32_t); DECLARE_GPU_SPEC(float, int64_t); #undef DECLARE_GPU_SPEC } // namespace functor REGISTER_KERNEL_BUILDER( - Name("InTopKV2").Device(DEVICE_GPU).TypeConstraint("T"), - InTopK); + Name("InTopKV2").Device(DEVICE_GPU).TypeConstraint("T"), + InTopK); REGISTER_KERNEL_BUILDER( Name("InTopKV2").Device(DEVICE_GPU).TypeConstraint("T"), - InTopK); + InTopK); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/tensorflow/core/kernels/in_topk_op.h b/tensorflow/core/kernels/in_topk_op.h index 877777642ebeb6..ad10dad72bf717 100644 --- a/tensorflow/core/kernels/in_topk_op.h +++ b/tensorflow/core/kernels/in_topk_op.h @@ -62,7 +62,7 @@ struct InTopKFunctor { int64_t k_val = k.k_value; if (k.k_tensor != nullptr) { if (k.k_tensor->dtype() == DT_INT32) { - k_val = k.k_tensor->scalar()(); + k_val = k.k_tensor->scalar()(); } else { k_val = k.k_tensor->scalar()(); } diff --git a/tensorflow/core/kernels/in_topk_op_gpu.cu.cc b/tensorflow/core/kernels/in_topk_op_gpu.cu.cc index cd1d3e88b510bf..b011a24cb1ed1e 100644 --- a/tensorflow/core/kernels/in_topk_op_gpu.cu.cc +++ b/tensorflow/core/kernels/in_topk_op_gpu.cu.cc @@ -39,7 +39,7 @@ template __global__ void ComputePredictionMaskKernel( const T* __restrict__ predictions, // dims: [ num_targets x num_classes ] const TargetT* __restrict__ targets, // dims: [ num_targets ] - int64* __restrict__ mask, // dims: [ num_targets x num_classes ] + int64_t* __restrict__ mask, // dims: [ num_targets x num_classes ] int num_targets, int num_classes) { GPU_1D_KERNEL_LOOP(i, num_targets * num_classes) { const int batch_index = i / num_classes; @@ -67,7 +67,8 @@ __global__ void ComputePredictionMaskKernel( // larger than the target, or to '-1' if target class in invalid of predictions // in a batch have non-finite values. struct MaskSum { - __host__ __device__ int64 operator()(const int64& a, const int64& b) const { + __host__ __device__ int64_t operator()(const int64_t& a, + const int64_t& b) const { if (a < 0 || b < 0) return -1; else @@ -77,8 +78,8 @@ struct MaskSum { namespace reduction_op_helper { template <> -struct IdentityValue { - int64 operator()() { return 0; } +struct IdentityValue { + int64_t operator()() { return 0; } }; } // namespace reduction_op_helper @@ -138,8 +139,8 @@ struct InTopKFunctor { auto in = predictions_mask.matrix(); auto out = num_larger_prediction.flat(); - ReduceImpl>( - context, (int64*)out.data(), (int64*)in.data(), in.rank(), + ReduceImpl>( + context, (int64_t*)out.data(), (int64_t*)in.data(), in.rank(), in.dimension(0), in.rank() >= 2 ? in.dimension(1) : 1, in.rank() >= 3 ? in.dimension(2) : 1, out.rank(), Dims<1>(1), MaskSum()); @@ -152,8 +153,9 @@ struct InTopKFunctor { if (k.k_tensor->dtype() == DT_INT32) { output.device(d) = (cnt >= cnt.constant(0)) && - (cnt < k.k_tensor->flat().template cast().broadcast( - Dims<1>(num_targets))); + (cnt < + k.k_tensor->flat().template cast().broadcast( + Dims<1>(num_targets))); } else { output.device(d) = (cnt >= cnt.constant(0)) && diff --git a/tensorflow/core/kernels/inplace_ops.cc b/tensorflow/core/kernels/inplace_ops.cc index 45db7d3b2d3f49..6948cd86c1f8b1 100644 --- a/tensorflow/core/kernels/inplace_ops.cc +++ b/tensorflow/core/kernels/inplace_ops.cc @@ -106,7 +106,7 @@ class ParallelConcatUpdate : public OpKernel { } private: - int32 loc_; + int32_t loc_; }; template @@ -199,7 +199,7 @@ REGISTER_KERNEL_BUILDER(Name("_ParallelConcatUpdate") .HostMemory("value") .HostMemory("update") .HostMemory("output") - .TypeConstraint("T"), + .TypeConstraint("T"), ParallelConcatUpdate); #endif @@ -251,7 +251,7 @@ namespace functor { template void DoInplaceOp(const CPUDevice& d, InplaceOpType op, const Tensor& i, const Tensor& v, Tensor* y) { - auto Ti = i.flat(); + auto Ti = i.flat(); auto Tv = v.flat_outer_dims(); auto Ty = y->flat_outer_dims(); auto nrows = Ty.dimension(0); @@ -274,7 +274,7 @@ void DoInplaceOp(const CPUDevice& d, InplaceOpType op, const Tensor& i, // String type only supports inplace update. void DoInplaceStringUpdateOp(const CPUDevice& d, const Tensor& i, const Tensor& v, Tensor* y) { - auto Ti = i.flat(); + auto Ti = i.flat(); auto Tv = v.flat_outer_dims(); auto Ty = y->flat_outer_dims(); auto nrows = Ty.dimension(0); @@ -398,10 +398,10 @@ class EmptyOp : public OpKernel { ctx, TensorShapeUtils::IsVector(shape.shape()), errors::InvalidArgument("shape must be a vector of int32, got shape ", shape.shape().DebugString())); - auto dims = shape.flat(); + auto dims = shape.flat(); TensorShape out_shape; OP_REQUIRES_OK(ctx, TensorShapeUtils::MakeShape( - reinterpret_cast(dims.data()), + reinterpret_cast(dims.data()), dims.size(), &out_shape)); Tensor* out = nullptr; OP_REQUIRES_OK(ctx, ctx->allocate_output(0, out_shape, &out)); @@ -463,7 +463,7 @@ REGISTER(uint8_t); REGISTER(int64_t); REGISTER(uint64_t); -REGISTER_EMPTY(int32, GPU); +REGISTER_EMPTY(int32_t, GPU); #undef REGISTER #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -474,7 +474,7 @@ REGISTER_KERNEL_BUILDER(Name("InplaceUpdate") .HostMemory("i") .HostMemory("v") .HostMemory("y") - .TypeConstraint("T"), + .TypeConstraint("T"), InplaceOp); REGISTER_KERNEL_BUILDER(Name("InplaceAdd") .Device(DEVICE_DEFAULT) @@ -482,7 +482,7 @@ REGISTER_KERNEL_BUILDER(Name("InplaceAdd") .HostMemory("i") .HostMemory("v") .HostMemory("y") - .TypeConstraint("T"), + .TypeConstraint("T"), InplaceOp); REGISTER_KERNEL_BUILDER(Name("InplaceSub") .Device(DEVICE_DEFAULT) @@ -490,14 +490,14 @@ REGISTER_KERNEL_BUILDER(Name("InplaceSub") .HostMemory("i") .HostMemory("v") .HostMemory("y") - .TypeConstraint("T"), + .TypeConstraint("T"), InplaceOp); REGISTER_KERNEL_BUILDER(Name("DeepCopy") .Device(DEVICE_DEFAULT) .HostMemory("x") .HostMemory("y") - .TypeConstraint("T"), + .TypeConstraint("T"), CopyOp); } // end namespace diff --git a/tensorflow/core/kernels/inplace_ops_functor_gpu.cu.cc b/tensorflow/core/kernels/inplace_ops_functor_gpu.cu.cc index 001b6a45e35c5d..6ba369ebdb4346 100644 --- a/tensorflow/core/kernels/inplace_ops_functor_gpu.cu.cc +++ b/tensorflow/core/kernels/inplace_ops_functor_gpu.cu.cc @@ -27,13 +27,13 @@ namespace functor { typedef Eigen::GpuDevice Device; template -__global__ void DoParallelConcatOpKernel(int nthreads, const int64 rows, - const int64 cols, int32 loc, +__global__ void DoParallelConcatOpKernel(int nthreads, const int64_t rows, + const int64_t cols, int32_t loc, const T* __restrict__ src, T* __restrict__ dst) { GPU_1D_KERNEL_LOOP(idx, nthreads) { - int64 c = idx % cols; - int64 r = (loc % rows + rows) % rows; // Guard index range. + int64_t c = idx % cols; + int64_t r = (loc % rows + rows) % rows; // Guard index range. T* p = dst + r * cols + c; const T* q = src + idx; *p = ldg(q); @@ -41,24 +41,24 @@ __global__ void DoParallelConcatOpKernel(int nthreads, const int64 rows, } template -Status DoParallelConcatUpdate(const Device& d, const Tensor& value, int32 loc, - Tensor* output) { - const int64 nelem = value.NumElements(); +absl::Status DoParallelConcatUpdate(const Device& d, const Tensor& value, + int32_t loc, Tensor* output) { + const int64_t nelem = value.NumElements(); GpuLaunchConfig cfg = GetGpuLaunchConfig(nelem, d); auto Toutput = output->flat_outer_dims(); - const int64 nrows = Toutput.dimension(0); - const int64 ncols = Toutput.dimension(1); + const int64_t nrows = Toutput.dimension(0); + const int64_t ncols = Toutput.dimension(1); const T* src = value.flat().data(); T* dst = output->flat().data(); TF_CHECK_OK(GpuLaunchKernel( DoParallelConcatOpKernel, cfg.block_count, cfg.thread_per_block, 0, d.stream(), cfg.virtual_thread_count, nrows, ncols, loc, src, dst)); - return OkStatus(); + return absl::OkStatus(); } template <> -Status DoParallelConcat(const Device& d, const Tensor& value, int32 loc, - Tensor* output) { +absl::Status DoParallelConcat(const Device& d, const Tensor& value, int32_t loc, + Tensor* output) { CHECK_EQ(value.dtype(), output->dtype()); switch (value.dtype()) { #define CASE(type) \ @@ -77,18 +77,18 @@ Status DoParallelConcat(const Device& d, const Tensor& value, int32 loc, return errors::InvalidArgument("Unsupported data type: ", DataTypeString(value.dtype())); } - return OkStatus(); + return absl::OkStatus(); } template -__global__ void DoInplaceOpKernel(int nthreads, const int64 rows, - const int64 cols, const int64 n, +__global__ void DoInplaceOpKernel(int nthreads, const int64_t rows, + const int64_t cols, const int64_t n, const T* __restrict__ src, - const int32* __restrict__ rowids, + const int32_t* __restrict__ rowids, T* __restrict__ dst) { GPU_1D_KERNEL_LOOP(idx, nthreads) { - int64 r = idx / cols; - int64 c = idx % cols; + int64_t r = idx / cols; + int64_t c = idx % cols; r = (rowids[r] % rows + rows) % rows; // Guard index range. T* p = dst + r * cols + c; const T* q = src + idx; @@ -109,15 +109,15 @@ __global__ void DoInplaceOpKernel(int nthreads, const int64 rows, template void DoInplaceOp(const Device& d, InplaceOpType op, const Tensor& i, const Tensor& v, Tensor* y) { - const int64 nelem = v.NumElements(); + const int64_t nelem = v.NumElements(); GpuLaunchConfig cfg = GetGpuLaunchConfig(nelem, d); auto Ty = y->flat_outer_dims(); - const int64 nrows = Ty.dimension(0); - const int64 ncols = Ty.dimension(1); - const int64 n = i.NumElements(); + const int64_t nrows = Ty.dimension(0); + const int64_t ncols = Ty.dimension(1); + const int64_t n = i.NumElements(); const T* src = v.flat().data(); // TODO(sjhwang): Check that first dimension fits in int32 range. - const int32* rowids = i.flat().data(); + const int32_t* rowids = i.flat().data(); T* dst = y->flat().data(); switch (op) { case I_UPDATE: @@ -144,15 +144,15 @@ void DoInplaceOp(const Device& d, InplaceOpType op, const Tensor& i, template void DoInplaceOp(const Device& d, InplaceOpType op, const Tensor& i, const Tensor& v, Tensor* y) { - const int64 nelem = v.NumElements(); + const int64_t nelem = v.NumElements(); GpuLaunchConfig cfg = GetGpuLaunchConfig(nelem, d); auto Ty = y->flat_outer_dims(); - const int64 nrows = Ty.dimension(0); - const int64 ncols = Ty.dimension(1); - const int64 n = i.NumElements(); + const int64_t nrows = Ty.dimension(0); + const int64_t ncols = Ty.dimension(1); + const int64_t n = i.NumElements(); const bool* src = v.flat().data(); // TODO(sjhwang): Check that first dimension fits in int32 range. - const int32* rowids = i.flat().data(); + const int32_t* rowids = i.flat().data(); bool* dst = y->flat().data(); if (op == I_UPDATE) { TF_CHECK_OK(GpuLaunchKernel(DoInplaceOpKernel, @@ -163,8 +163,8 @@ void DoInplaceOp(const Device& d, InplaceOpType op, const Tensor& i, } template <> -Status DoInplace(const Device& d, InplaceOpType op, const Tensor& i, - const Tensor& v, Tensor* y) { +absl::Status DoInplace(const Device& d, InplaceOpType op, const Tensor& i, + const Tensor& v, Tensor* y) { CHECK_EQ(v.dtype(), y->dtype()); switch (v.dtype()) { #define CASE(type) \ @@ -186,11 +186,11 @@ Status DoInplace(const Device& d, InplaceOpType op, const Tensor& i, return errors::InvalidArgument("Unsupported data type from DoInplace: ", DataTypeString(v.dtype())); } - return OkStatus(); + return absl::OkStatus(); } template <> -Status DoCopy(const Device& d, const Tensor& x, Tensor* y) { +absl::Status DoCopy(const Device& d, const Tensor& x, Tensor* y) { CHECK_EQ(x.dtype(), y->dtype()); switch (x.dtype()) { #define CASE(type) \ @@ -214,7 +214,7 @@ Status DoCopy(const Device& d, const Tensor& x, Tensor* y) { return errors::InvalidArgument("Unsupported dtype from DoCopy: ", DataTypeString(x.dtype())); } - return OkStatus(); + return absl::OkStatus(); } } // end namespace functor diff --git a/tensorflow/core/kernels/list_kernels.cc b/tensorflow/core/kernels/list_kernels.cc index 51c0d4b6654034..3919cb763171c7 100644 --- a/tensorflow/core/kernels/list_kernels.cc +++ b/tensorflow/core/kernels/list_kernels.cc @@ -48,7 +48,7 @@ typedef Eigen::ThreadPoolDevice CPUDevice; absl::Status TensorShapeFromTensor(const Tensor& t, PartialTensorShape* out) { if (t.shape() == TensorShape({})) { - if ((t.dtype() == DT_INT32 && t.scalar()() == -1) || + if ((t.dtype() == DT_INT32 && t.scalar()() == -1) || (t.dtype() == DT_INT64 && t.scalar()() == -1)) { *out = PartialTensorShape(); return absl::OkStatus(); @@ -61,7 +61,7 @@ absl::Status TensorShapeFromTensor(const Tensor& t, PartialTensorShape* out) { t.shape().dims()); } if (t.dtype() == DT_INT32) { - return PartialTensorShape::MakePartialShape(t.vec().data(), + return PartialTensorShape::MakePartialShape(t.vec().data(), t.NumElements(), out); } else if (t.dtype() == DT_INT64) { return PartialTensorShape::MakePartialShape(t.vec().data(), @@ -157,7 +157,7 @@ class EmptyTensorList : public OpKernel { OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape{}, &result, attr)); TensorList empty; empty.element_dtype = element_dtype_; - empty.max_num_elements = max_num_elements_t.scalar()(); + empty.max_num_elements = max_num_elements_t.scalar()(); PartialTensorShape element_shape; OP_REQUIRES_OK(ctx, TensorShapeFromTensor(ctx->input(0), &element_shape)); empty.element_shape = element_shape; @@ -257,7 +257,7 @@ class TensorListLength : public OpKernel { OP_REQUIRES_OK(c, GetInputList(c, 0, &l)); Tensor* result; OP_REQUIRES_OK(c, c->allocate_output(0, TensorShape{}, &result)); - result->scalar()() = l->tensors().size(); + result->scalar()() = l->tensors().size(); } }; @@ -287,7 +287,7 @@ class TensorListElementShape : public OpKernel { if (l->element_shape.unknown_rank()) { OP_REQUIRES_OK(c, c->allocate_output(0, TensorShape({}), &result)); if (result->dtype() == DT_INT32) { - result->scalar()() = -1; + result->scalar()() = -1; } else { result->scalar()() = -1; } @@ -296,7 +296,7 @@ class TensorListElementShape : public OpKernel { 0, TensorShape{l->element_shape.dims()}, &result)); for (int i = 0; i < l->element_shape.dims(); ++i) { if (result->dtype() == DT_INT32) { - result->flat()(i) = l->element_shape.dim_size(i); + result->flat()(i) = l->element_shape.dim_size(i); } else { result->flat()(i) = l->element_shape.dim_size(i); } @@ -336,7 +336,7 @@ class TensorListReserve : public OpKernel { errors::InvalidArgument( "The num_elements to reserve must be a tensor size 1, but got ", c->input(1).shape())); - int32_t num_elements = c->input(1).scalar()(); + int32_t num_elements = c->input(1).scalar()(); OP_REQUIRES(c, num_elements >= 0, errors::InvalidArgument("The num_elements to reserve must be a " "non negative number, but got ", @@ -384,7 +384,7 @@ class TensorListResize : public OpKernel { OP_REQUIRES_OK(c, GetInputList(c, 0, &input_list)); OP_REQUIRES(c, TensorShapeUtils::IsScalar(c->input(1).shape()), errors::InvalidArgument("size must be a scalar")); - int32_t size = c->input(1).scalar()(); + int32_t size = c->input(1).scalar()(); OP_REQUIRES( c, size >= 0, errors::InvalidArgument( @@ -473,7 +473,7 @@ class TensorListSetItem : public OpKernel { " list shape: ", l->element_shape.DebugString())); TensorList* output_list = nullptr; OP_REQUIRES_OK(c, ForwardInputOrCreateNewList(c, 0, 0, *l, &output_list)); - int32_t index = c->input(1).scalar()(); + int32_t index = c->input(1).scalar()(); if (!resize_if_index_out_of_bounds_) { OP_REQUIRES(c, index < l->tensors().size(), errors::InvalidArgument("Trying to modify element ", index, diff --git a/tensorflow/core/kernels/list_kernels.h b/tensorflow/core/kernels/list_kernels.h index 9837b08716afae..5af26a518f0b18 100644 --- a/tensorflow/core/kernels/list_kernels.h +++ b/tensorflow/core/kernels/list_kernels.h @@ -80,8 +80,8 @@ template inline void SetZero(OpKernelContext* ctx, Tensor& tensor) { #ifdef PLUGGABLE_DEVICE_SUPPORTED if (IsPluggableDevice(ctx)) { - auto ptr = - se::DeviceMemoryBase(tensor.flat().data(), tensor.TotalBytes()); + auto ptr = stream_executor::DeviceAddressBase(tensor.flat().data(), + tensor.TotalBytes()); auto stream = ctx->op_device_context()->stream(); auto result = stream->MemZero(&ptr, tensor.TotalBytes()).ok(); DCHECK_EQ(true, result); @@ -101,8 +101,10 @@ inline void CopyTensorPluggableDevice(OpKernelContext* ctx, Tensor& src, auto src_t = src.unaligned_flat(); auto dst_t = dst.flat(); DCHECK(DataTypeCanUseMemcpy(DataTypeToEnum::v())); - auto src_ptr = se::DeviceMemoryBase(src_t.data(), src.TotalBytes()); - auto dst_ptr = se::DeviceMemoryBase(dst_t.data(), dst.TotalBytes()); + auto src_ptr = + stream_executor::DeviceAddressBase(src_t.data(), src.TotalBytes()); + auto dst_ptr = + stream_executor::DeviceAddressBase(dst_t.data(), dst.TotalBytes()); auto stream = ctx->op_device_context()->stream(); auto result = stream->Memcpy(&dst_ptr, src_ptr, src.TotalBytes()).ok(); DCHECK_EQ(true, result); @@ -133,7 +135,7 @@ void ConcatPluggableDevice( size_t num_inputs = inputs.size(); std::vector sizes; sizes.reserve(num_inputs); - int64 row_size = 0; + int64_t row_size = 0; for (const auto& input : inputs) { sizes.push_back(input->dimension(1)); row_size += sizes.back(); @@ -145,12 +147,13 @@ void ConcatPluggableDevice( for (const auto& input : inputs) { inp.push_back(&(*input)(0, 0)); } - const int64 dim0 = output->dimension(0); - for (int64 i = 0; i < dim0; ++i) { - for (int64 j = 0; j < num_inputs; ++j) { + const int64_t dim0 = output->dimension(0); + for (int64_t i = 0; i < dim0; ++i) { + for (int64_t j = 0; j < num_inputs; ++j) { auto size = sizes[j]; - se::DeviceMemoryBase out_base{out, size * sizeof(T)}; - se::DeviceMemoryBase inp_base{const_cast(inp[j]), size * sizeof(T)}; + stream_executor::DeviceAddressBase out_base{out, size * sizeof(T)}; + stream_executor::DeviceAddressBase inp_base{const_cast(inp[j]), + size * sizeof(T)}; OP_REQUIRES_OK(context, stream->Memcpy(&out_base, inp_base, size * sizeof(T))); out += size; @@ -284,7 +287,7 @@ class TensorListGetItem : public OpKernel { DataTypeString(element_dtype_), " but list elements ", DataTypeString(l->element_dtype))); - int32_t index = c->input(1).scalar()(); + int32_t index = c->input(1).scalar()(); OP_REQUIRES(c, index < l->tensors().size(), errors::InvalidArgument("Trying to access element ", index, " in a list with ", l->tensors().size(), @@ -693,7 +696,7 @@ class TensorListGather : public OpKernel { // element tensors. if (!tensor_list->element_shape.IsFullyDefined()) { for (int index = 0; index < indices.NumElements(); ++index) { - const int i = indices.flat()(index); + const int i = indices.flat()(index); OP_REQUIRES(c, 0 <= i && i < tensor_list->tensors().size(), absl::InvalidArgumentError(absl::StrCat( @@ -728,7 +731,7 @@ class TensorListGather : public OpKernel { inputs_flat.reserve(indices.NumElements()); Tensor zeros; for (int index = 0; index < indices.NumElements(); ++index) { - const int i = indices.flat()(index); + const int i = indices.flat()(index); OP_REQUIRES( c, i < tensor_list->tensors().size(), errors::InvalidArgument("Index ", i, " out o range; list only has ", @@ -832,7 +835,7 @@ absl::Status Scatter(OpKernelContext* c, const Tensor& value, const auto copy_tensor = IsPluggableDevice(c) ? &CopyTensorPluggableDevice : &CopyTensor; for (int index = 0; index < indices.NumElements(); ++index) { - const int i = indices.flat()(index); + const int i = indices.flat()(index); Tensor tmp = value.Slice(index, index + 1); TensorShape tmp_shape = tmp.shape(); tmp_shape.RemoveDim(0); @@ -885,7 +888,7 @@ class TensorListScatterIntoExistingList : public OpKernel { // Resize the list if needed to accommodate all indices. TensorList* output_list = nullptr; OP_REQUIRES_OK(c, ForwardInputOrCreateNewList(c, 0, 0, *l, &output_list)); - const auto indices_vec = indices.vec(); + const auto indices_vec = indices.vec(); int32_t max_index = (indices.NumElements() == 0) ? -1 @@ -956,7 +959,7 @@ class TensorListScatter : public OpKernel { { int highest_index = -1; for (int index = 0; index < indices.NumElements(); ++index) { - const int i = indices.flat()(index); + const int i = indices.flat()(index); OP_REQUIRES( c, i >= 0, errors::InvalidArgument( diff --git a/tensorflow/core/kernels/listdiff_op.cc b/tensorflow/core/kernels/listdiff_op.cc index 92d461aba58c8e..eb0a6eec9345aa 100644 --- a/tensorflow/core/kernels/listdiff_op.cc +++ b/tensorflow/core/kernels/listdiff_op.cc @@ -48,7 +48,7 @@ class ListDiffOp : public OpKernel { const auto Ty = y.vec(); const size_t y_size = Ty.size(); - OP_REQUIRES(context, x_size < std::numeric_limits::max(), + OP_REQUIRES(context, x_size < std::numeric_limits::max(), errors::InvalidArgument("x too large for int32 indexing")); std::unordered_set y_set; diff --git a/tensorflow/core/kernels/load_and_remap_matrix_op.cc b/tensorflow/core/kernels/load_and_remap_matrix_op.cc index c746fec71d5e4d..a952da3595ccda 100644 --- a/tensorflow/core/kernels/load_and_remap_matrix_op.cc +++ b/tensorflow/core/kernels/load_and_remap_matrix_op.cc @@ -133,11 +133,11 @@ class LoadAndRemapMatrixOp : public OpKernel { errors::InvalidArgument("The `ckpt_path` tensor must have exactly one " "element, got tensor of shape ", ckpt_path_t->shape().DebugString())); - const string& ckpt_path = ckpt_path_t->scalar()(); + const std::string& ckpt_path = ckpt_path_t->scalar()(); const Tensor* old_tensor_name_t; OP_REQUIRES_OK(context, context->input("old_tensor_name", &old_tensor_name_t)); - const string& old_tensor_name = old_tensor_name_t->scalar()(); + const std::string& old_tensor_name = old_tensor_name_t->scalar()(); LOG(INFO) << "Processing checkpoint : " << ckpt_path; BundleReader reader(context->env(), ckpt_path); diff --git a/tensorflow/core/kernels/logging_ops.cc b/tensorflow/core/kernels/logging_ops.cc index 904e84d21778aa..b589d918626f1d 100644 --- a/tensorflow/core/kernels/logging_ops.cc +++ b/tensorflow/core/kernels/logging_ops.cc @@ -64,7 +64,7 @@ void AssertOp::Compute(OpKernelContext* ctx) { if (cond.scalar()()) { return; } - string msg = "assertion failed: "; + std::string msg = "assertion failed: "; for (int i = 1; i < ctx->num_inputs(); ++i) { absl::StrAppend(&msg, "[", ctx->input(i).SummarizeValue(summarize_), "]"); if (i < ctx->num_inputs() - 1) absl::StrAppend(&msg, " "); @@ -98,7 +98,7 @@ class PrintOp : public OpKernel { if (call_counter_ >= first_n_) return; call_counter_++; } - string msg; + std::string msg; absl::StrAppend(&msg, message_); for (int i = 1; i < ctx->num_inputs(); ++i) { absl::StrAppend(&msg, "[", ctx->input(i).SummarizeValue(summarize_), "]"); @@ -110,8 +110,8 @@ class PrintOp : public OpKernel { mutex mu_; int64_t call_counter_ TF_GUARDED_BY(mu_) = 0; int64_t first_n_ = 0; - int32 summarize_ = 0; - string message_; + int32_t summarize_ = 0; + std::string message_; }; REGISTER_KERNEL_BUILDER(Name("Print").Device(DEVICE_CPU), PrintOp); @@ -130,8 +130,8 @@ class PrintV2Op : public OpKernel { std::end(valid_output_streams_), output_stream_); if (output_stream_index == std::end(valid_output_streams_)) { - string error_msg = absl::StrCat("Unknown output stream: ", output_stream_, - ", Valid streams are:"); + std::string error_msg = absl::StrCat( + "Unknown output stream: ", output_stream_, ", Valid streams are:"); for (auto valid_stream : valid_output_streams_) { absl::StrAppend(&error_msg, " ", valid_stream); } @@ -146,9 +146,9 @@ class PrintV2Op : public OpKernel { ctx, TensorShapeUtils::IsScalar(input_->shape()), errors::InvalidArgument("Input is expected to be scalar, but got ", input_->shape())); - const string& msg = input_->scalar()(); + const std::string& msg = input_->scalar()(); - string ended_msg = absl::StrCat(msg, end_); + std::string ended_msg = absl::StrCat(msg, end_); if (!file_path_.empty()) { // Outputs to a file at the specified path. @@ -172,8 +172,8 @@ class PrintV2Op : public OpKernel { } else if (output_stream_ == "log(error)") { LOG(ERROR) << ended_msg << std::flush; } else { - string error_msg = absl::StrCat("Unknown output stream: ", output_stream_, - ", Valid streams are:"); + std::string error_msg = absl::StrCat( + "Unknown output stream: ", output_stream_, ", Valid streams are:"); for (auto valid_stream : valid_output_streams_) { absl::StrAppend(&error_msg, " ", valid_stream); } @@ -186,10 +186,10 @@ class PrintV2Op : public OpKernel { "log(warning)", "log(error)"}; private: - string end_; + std::string end_; // Either output_stream_ or file_path_ (but not both) will be non-empty. - string output_stream_; - string file_path_; + std::string output_stream_; + std::string file_path_; // If output_stream_ is a file path, extracts it to file_path_ and clears // output_stream_; otherwise sets file_paths_ to "". diff --git a/tensorflow/core/kernels/logging_ops.h b/tensorflow/core/kernels/logging_ops.h index 5cb1213998f499..f5a58643d8e1a3 100644 --- a/tensorflow/core/kernels/logging_ops.h +++ b/tensorflow/core/kernels/logging_ops.h @@ -25,7 +25,7 @@ class AssertOp : public OpKernel { void Compute(OpKernelContext* ctx) override; private: - int32 summarize_ = 0; + int32_t summarize_ = 0; }; } // namespace tensorflow diff --git a/tensorflow/core/kernels/logging_ops_test.cc b/tensorflow/core/kernels/logging_ops_test.cc index 7efdeac7d1db9f..fbce44642938db 100644 --- a/tensorflow/core/kernels/logging_ops_test.cc +++ b/tensorflow/core/kernels/logging_ops_test.cc @@ -34,7 +34,7 @@ namespace { class PrintingV2GraphTest : public OpsTestBase { protected: - absl::Status Init(const string& output_stream = "log(warning)") { + absl::Status Init(const std::string& output_stream = "log(warning)") { TF_CHECK_OK(NodeDefBuilder("op", "PrintV2") .Input(FakeInput(DT_STRING)) .Attr("output_stream", output_stream) @@ -61,8 +61,8 @@ TEST_F(PrintingV2GraphTest, InvalidInputRank) { class PrintingGraphTest : public OpsTestBase { protected: - absl::Status Init(DataType input_type1, DataType input_type2, string msg = "", - int first_n = -1, int summarize = 3) { + absl::Status Init(DataType input_type1, DataType input_type2, + std::string msg = "", int first_n = -1, int summarize = 3) { TF_CHECK_OK(NodeDefBuilder("op", "Print") .Input(FakeInput(input_type1)) .Input(FakeInput(2, input_type2)) @@ -76,58 +76,58 @@ class PrintingGraphTest : public OpsTestBase { TEST_F(PrintingGraphTest, Int32Success_6) { TF_ASSERT_OK(Init(DT_INT32, DT_INT32)); - AddInputFromArray(TensorShape({6}), {1, 2, 3, 4, 5, 6}); - AddInputFromArray(TensorShape({6}), {1, 2, 3, 4, 5, 6}); - AddInputFromArray(TensorShape({6}), {1, 2, 3, 4, 5, 6}); + AddInputFromArray(TensorShape({6}), {1, 2, 3, 4, 5, 6}); + AddInputFromArray(TensorShape({6}), {1, 2, 3, 4, 5, 6}); + AddInputFromArray(TensorShape({6}), {1, 2, 3, 4, 5, 6}); TF_ASSERT_OK(RunOpKernel()); Tensor expected(allocator(), DT_INT32, TensorShape({6})); - test::FillValues(&expected, {1, 2, 3, 4, 5, 6}); - test::ExpectTensorEqual(expected, *GetOutput(0)); + test::FillValues(&expected, {1, 2, 3, 4, 5, 6}); + test::ExpectTensorEqual(expected, *GetOutput(0)); } TEST_F(PrintingGraphTest, Int32Success_Summarize6) { TF_ASSERT_OK(Init(DT_INT32, DT_INT32, "", -1, 6)); - AddInputFromArray(TensorShape({6}), {1, 2, 3, 4, 5, 6}); - AddInputFromArray(TensorShape({6}), {1, 2, 3, 4, 5, 6}); - AddInputFromArray(TensorShape({6}), {1, 2, 3, 4, 5, 6}); + AddInputFromArray(TensorShape({6}), {1, 2, 3, 4, 5, 6}); + AddInputFromArray(TensorShape({6}), {1, 2, 3, 4, 5, 6}); + AddInputFromArray(TensorShape({6}), {1, 2, 3, 4, 5, 6}); TF_ASSERT_OK(RunOpKernel()); Tensor expected(allocator(), DT_INT32, TensorShape({6})); - test::FillValues(&expected, {1, 2, 3, 4, 5, 6}); - test::ExpectTensorEqual(expected, *GetOutput(0)); + test::FillValues(&expected, {1, 2, 3, 4, 5, 6}); + test::ExpectTensorEqual(expected, *GetOutput(0)); } TEST_F(PrintingGraphTest, StringSuccess) { TF_ASSERT_OK(Init(DT_INT32, DT_STRING)); - AddInputFromArray(TensorShape({6}), {1, 2, 3, 4, 5, 6}); + AddInputFromArray(TensorShape({6}), {1, 2, 3, 4, 5, 6}); AddInputFromArray(TensorShape({}), {"foo"}); AddInputFromArray(TensorShape({}), {"bar"}); TF_ASSERT_OK(RunOpKernel()); Tensor expected(allocator(), DT_INT32, TensorShape({6})); - test::FillValues(&expected, {1, 2, 3, 4, 5, 6}); - test::ExpectTensorEqual(expected, *GetOutput(0)); + test::FillValues(&expected, {1, 2, 3, 4, 5, 6}); + test::ExpectTensorEqual(expected, *GetOutput(0)); } TEST_F(PrintingGraphTest, MsgSuccess) { TF_ASSERT_OK(Init(DT_INT32, DT_STRING, "Message: ")); - AddInputFromArray(TensorShape({6}), {1, 2, 3, 4, 5, 6}); + AddInputFromArray(TensorShape({6}), {1, 2, 3, 4, 5, 6}); AddInputFromArray(TensorShape({}), {"foo"}); AddInputFromArray(TensorShape({}), {"bar"}); TF_ASSERT_OK(RunOpKernel()); Tensor expected(allocator(), DT_INT32, TensorShape({6})); - test::FillValues(&expected, {1, 2, 3, 4, 5, 6}); - test::ExpectTensorEqual(expected, *GetOutput(0)); + test::FillValues(&expected, {1, 2, 3, 4, 5, 6}); + test::ExpectTensorEqual(expected, *GetOutput(0)); } TEST_F(PrintingGraphTest, FirstNSuccess) { TF_ASSERT_OK(Init(DT_INT32, DT_STRING, "", 3)); - AddInputFromArray(TensorShape({6}), {1, 2, 3, 4, 5, 6}); + AddInputFromArray(TensorShape({6}), {1, 2, 3, 4, 5, 6}); AddInputFromArray(TensorShape({}), {"foo"}); AddInputFromArray(TensorShape({}), {"bar"}); // run 4 times but we only print 3 as intended for (int i = 0; i < 4; i++) TF_ASSERT_OK(RunOpKernel()); Tensor expected(allocator(), DT_INT32, TensorShape({6})); - test::FillValues(&expected, {1, 2, 3, 4, 5, 6}); - test::ExpectTensorEqual(expected, *GetOutput(0)); + test::FillValues(&expected, {1, 2, 3, 4, 5, 6}); + test::ExpectTensorEqual(expected, *GetOutput(0)); } class TimestampTest : public OpsTestBase { diff --git a/tensorflow/core/kernels/lookup_ops_test.cc b/tensorflow/core/kernels/lookup_ops_test.cc index 2a57a46cf165f0..fb13ccc162eb90 100644 --- a/tensorflow/core/kernels/lookup_ops_test.cc +++ b/tensorflow/core/kernels/lookup_ops_test.cc @@ -51,8 +51,8 @@ class MockHashTable : public lookup::HashTable { ~MockHashTable() override { alive = false; } }; -typedef int32 key_dtype; -typedef int32 value_dtype; +typedef int32_t key_dtype; +typedef int32_t value_dtype; REGISTER_KERNEL_BUILDER( Name("MockAnonymousHashTable") diff --git a/tensorflow/core/kernels/lookup_table_init_op.cc b/tensorflow/core/kernels/lookup_table_init_op.cc index 27cc76ee11b945..c936cad9addd6d 100644 --- a/tensorflow/core/kernels/lookup_table_init_op.cc +++ b/tensorflow/core/kernels/lookup_table_init_op.cc @@ -111,7 +111,7 @@ class InitializeTableFromTextFileOp : public OpKernel { if (ctx->HasAttr("offset")) { OP_REQUIRES_OK(ctx, ctx->GetAttr("offset", &offset_)); } - string delimiter; + std::string delimiter; OP_REQUIRES_OK(ctx, ctx->GetAttr("delimiter", &delimiter)); OP_REQUIRES(ctx, delimiter.size() == 1, errors::InvalidArgument("delimiter should be only 1 char")); @@ -137,7 +137,8 @@ class InitializeTableFromTextFileOp : public OpKernel { errors::InvalidArgument("filename should be a single string, but got ", vocab_filename_tensor.shape().DebugString())); - const string& vocab_filename = vocab_filename_tensor.scalar()(); + const std::string& vocab_filename = + vocab_filename_tensor.scalar()(); OP_REQUIRES(ctx, !vocab_filename.empty(), errors::InvalidArgument("filename cannot be empty.")); diff --git a/tensorflow/core/kernels/lookup_table_init_op.h b/tensorflow/core/kernels/lookup_table_init_op.h index e94db921bfd237..f6e246486a4532 100644 --- a/tensorflow/core/kernels/lookup_table_init_op.h +++ b/tensorflow/core/kernels/lookup_table_init_op.h @@ -22,7 +22,7 @@ namespace tensorflow { namespace lookup { // Helper function to initialize an InitializableLookupTable from a text file. -absl::Status InitializeTableFromTextFile(const string& filename, +absl::Status InitializeTableFromTextFile(const std::string& filename, int64_t vocab_size, char delimiter, int32_t key_index, int32_t value_index, Env* env, diff --git a/tensorflow/core/kernels/lookup_table_op.cc b/tensorflow/core/kernels/lookup_table_op.cc index 49a28dc324b9fb..54d2c8cca1669e 100644 --- a/tensorflow/core/kernels/lookup_table_op.cc +++ b/tensorflow/core/kernels/lookup_table_op.cc @@ -411,11 +411,11 @@ class MutableHashTableOfTensors final : public LookupInterface { namespace { template -inline uint64 HashScalar(const T& key) { - return static_cast(key); +inline uint64_t HashScalar(const T& key) { + return static_cast(key); } -inline uint64 HashScalar(const tstring& key) { return Hash64(key); } +inline uint64_t HashScalar(const tstring& key) { return Hash64(key); } // If the given shape is a scalar return {1} instead. Otherwise leave it alone. TensorShape MaybeVectorizeShape(const TensorShape& shape) { @@ -523,7 +523,7 @@ class MutableDenseHashTable final : public LookupInterface { const int64_t bit_mask = num_buckets_ - 1; // TODO(andreasst): parallelize using work_sharder for (int64_t i = 0; i < num_elements; ++i) { - const uint64 key_hash = HashKey(key_matrix, i); + const uint64_t key_hash = HashKey(key_matrix, i); if (empty_key_hash_ == key_hash && IsEqualKey(empty_key_matrix, 0, key_matrix, i)) { return errors::InvalidArgument( @@ -693,7 +693,7 @@ class MutableDenseHashTable final : public LookupInterface { deleted_key_.template shaped({1, key_size}); const int64_t bit_mask = num_buckets_ - 1; for (int64_t i = 0; i < num_elements; ++i) { - const uint64 key_hash = HashKey(key_matrix, i); + const uint64_t key_hash = HashKey(key_matrix, i); if (empty_key_hash_ == key_hash && IsEqualKey(empty_key_tensor, 0, key_matrix, i)) { if (ignore_empty_and_deleted_key) { @@ -760,7 +760,7 @@ class MutableDenseHashTable final : public LookupInterface { const auto deleted_key_flat = deleted_key_.template flat(); const int64_t bit_mask = num_buckets_ - 1; for (int64_t i = 0; i < num_elements; ++i) { - const uint64 key_hash = HashKey(key_matrix, i); + const uint64_t key_hash = HashKey(key_matrix, i); if (empty_key_hash_ == key_hash && IsEqualKey(empty_key_tensor, 0, key_matrix, i)) { return errors::InvalidArgument( @@ -843,11 +843,11 @@ class MutableDenseHashTable final : public LookupInterface { return DoInsert(ctx, old_key_buckets, old_value_buckets, true); } - uint64 HashKey(typename TTypes::ConstMatrix key, int64_t index) const { + uint64_t HashKey(typename TTypes::ConstMatrix key, int64_t index) const { if (key_shape_.num_elements() == 1) { return HashScalar(key(index, 0)); } - uint64 result = 0; + uint64_t result = 0; for (int64_t i = 0; i < key_shape_.num_elements(); ++i) { result = Hash64Combine(result, HashScalar(key(index, i))); } @@ -876,9 +876,9 @@ class MutableDenseHashTable final : public LookupInterface { Tensor key_buckets_ TF_GUARDED_BY(mu_); Tensor value_buckets_ TF_GUARDED_BY(mu_); Tensor empty_key_; - uint64 empty_key_hash_; + uint64_t empty_key_hash_; Tensor deleted_key_; - uint64 deleted_key_hash_; + uint64_t deleted_key_hash_; }; } // namespace lookup @@ -1103,19 +1103,19 @@ REGISTER_KERNEL_BUILDER(Name("LookupTableImportV2").Device(DEVICE_CPU), AnonymousLookupTableOp, \ key_dtype, value_dtype>) -REGISTER_KERNEL(int32, double); -REGISTER_KERNEL(int32, float); -REGISTER_KERNEL(int32, int32); -REGISTER_KERNEL(int32, tstring); +REGISTER_KERNEL(int32_t, double); +REGISTER_KERNEL(int32_t, float); +REGISTER_KERNEL(int32_t, int32_t); +REGISTER_KERNEL(int32_t, tstring); REGISTER_KERNEL(int64_t, double); REGISTER_KERNEL(int64_t, float); -REGISTER_KERNEL(int64_t, int32); +REGISTER_KERNEL(int64_t, int32_t); REGISTER_KERNEL(int64_t, int64_t); REGISTER_KERNEL(int64_t, tstring); REGISTER_KERNEL(tstring, bool); REGISTER_KERNEL(tstring, double); REGISTER_KERNEL(tstring, float); -REGISTER_KERNEL(tstring, int32); +REGISTER_KERNEL(tstring, int32_t); REGISTER_KERNEL(tstring, int64_t); REGISTER_KERNEL(tstring, tstring); @@ -1146,19 +1146,19 @@ REGISTER_KERNEL(tstring, tstring); lookup::MutableHashTableOfScalars, \ key_dtype, value_dtype>) -REGISTER_KERNEL(int32, double); -REGISTER_KERNEL(int32, float); -REGISTER_KERNEL(int32, int32); +REGISTER_KERNEL(int32_t, double); +REGISTER_KERNEL(int32_t, float); +REGISTER_KERNEL(int32_t, int32_t); REGISTER_KERNEL(int64_t, double); REGISTER_KERNEL(int64_t, float); -REGISTER_KERNEL(int64_t, int32); +REGISTER_KERNEL(int64_t, int32_t); REGISTER_KERNEL(int64_t, int64_t); REGISTER_KERNEL(int64_t, tstring); REGISTER_KERNEL(int64_t, Variant); REGISTER_KERNEL(tstring, bool); REGISTER_KERNEL(tstring, double); REGISTER_KERNEL(tstring, float); -REGISTER_KERNEL(tstring, int32); +REGISTER_KERNEL(tstring, int32_t); REGISTER_KERNEL(tstring, int64_t); #undef REGISTER_KERNEL @@ -1188,18 +1188,18 @@ REGISTER_KERNEL(tstring, int64_t); lookup::MutableHashTableOfTensors, \ key_dtype, value_dtype>) -REGISTER_KERNEL(int32, double); -REGISTER_KERNEL(int32, float); -REGISTER_KERNEL(int32, int32); +REGISTER_KERNEL(int32_t, double); +REGISTER_KERNEL(int32_t, float); +REGISTER_KERNEL(int32_t, int32_t); REGISTER_KERNEL(int64_t, double); REGISTER_KERNEL(int64_t, float); -REGISTER_KERNEL(int64_t, int32); +REGISTER_KERNEL(int64_t, int32_t); REGISTER_KERNEL(int64_t, int64_t); REGISTER_KERNEL(int64_t, tstring); REGISTER_KERNEL(tstring, bool); REGISTER_KERNEL(tstring, double); REGISTER_KERNEL(tstring, float); -REGISTER_KERNEL(tstring, int32); +REGISTER_KERNEL(tstring, int32_t); REGISTER_KERNEL(tstring, int64_t); #undef REGISTER_KERNEL @@ -1229,19 +1229,19 @@ REGISTER_KERNEL(tstring, int64_t); lookup::MutableDenseHashTable, key_dtype, \ value_dtype>) -REGISTER_KERNEL(int32, double); -REGISTER_KERNEL(int32, float); -REGISTER_KERNEL(int32, int32); +REGISTER_KERNEL(int32_t, double); +REGISTER_KERNEL(int32_t, float); +REGISTER_KERNEL(int32_t, int32_t); REGISTER_KERNEL(int64_t, bool); REGISTER_KERNEL(int64_t, double); REGISTER_KERNEL(int64_t, float); -REGISTER_KERNEL(int64_t, int32); +REGISTER_KERNEL(int64_t, int32_t); REGISTER_KERNEL(int64_t, int64_t); REGISTER_KERNEL(int64_t, Variant); REGISTER_KERNEL(tstring, bool); REGISTER_KERNEL(tstring, double); REGISTER_KERNEL(tstring, float); -REGISTER_KERNEL(tstring, int32); +REGISTER_KERNEL(tstring, int32_t); REGISTER_KERNEL(tstring, int64_t); REGISTER_KERNEL(tstring, ResourceHandle); diff --git a/tensorflow/core/kernels/lookup_table_op.h b/tensorflow/core/kernels/lookup_table_op.h index daa7f6e32dc9dd..840720d2e3e61d 100644 --- a/tensorflow/core/kernels/lookup_table_op.h +++ b/tensorflow/core/kernels/lookup_table_op.h @@ -300,7 +300,7 @@ class HashTable : public InitializableLookupTable { return absl::OkStatus(); }; - absl::Status DoLazyPrepare(std::function size_fn) override { + absl::Status DoLazyPrepare(std::function size_fn) override { return DoPrepare(size_fn()); } diff --git a/tensorflow/core/kernels/lookup_util.cc b/tensorflow/core/kernels/lookup_util.cc index 3576b6c7339bd1..744b2e9c21b5ac 100644 --- a/tensorflow/core/kernels/lookup_util.cc +++ b/tensorflow/core/kernels/lookup_util.cc @@ -37,13 +37,13 @@ static const int kInputBufferSize = 1 * 1024 * 1024; /* bytes */ static const int kLineNumber = -1; static const int kWholeLine = -2; -absl::Status GetNumLinesInTextFile(Env* env, const string& vocab_file, +absl::Status GetNumLinesInTextFile(Env* env, const std::string& vocab_file, int64_t* num_lines) { std::unique_ptr file; TF_RETURN_IF_ERROR(env->NewRandomAccessFile(vocab_file, &file)); io::InputBuffer input_buffer(file.get(), kInputBufferSize); - string line; + std::string line; absl::Status s = input_buffer.ReadLine(&line); int64_t next_id = 0; while (s.ok()) { @@ -81,9 +81,10 @@ class TextFileLineIterator // - Index -1 means the line number stored in int64. // - Index >= 0 represent index (starting at zero) of the split line based on // delimiter. - absl::Status Init(const string& filename, int64_t vocab_size, char delimiter, - DataType key_dtype, int64_t key_index, DataType value_dtype, - int64_t value_index, int64_t offset, Env* env) { + absl::Status Init(const std::string& filename, int64_t vocab_size, + char delimiter, DataType key_dtype, int64_t key_index, + DataType value_dtype, int64_t value_index, int64_t offset, + Env* env) { filename_ = filename; vocab_size_ = vocab_size; delimiter_ = delimiter; @@ -108,7 +109,7 @@ class TextFileLineIterator void Next() override { if (!valid_) return; - string line; + std::string line; status_ = input_buffer_->ReadLine(&line); if (!status_.ok()) { if (absl::IsOutOfRange(status_) && vocab_size_ != -1 && @@ -137,7 +138,7 @@ class TextFileLineIterator return; } - std::vector tokens; + std::vector tokens; if (!ignore_split_) { tokens = str_util::Split(line, delimiter_); const auto expected_size = @@ -197,7 +198,7 @@ class TextFileLineIterator int64_t next_id_; int64_t offset_; int64_t vocab_size_; - string filename_; + std::string filename_; char delimiter_; absl::Status status_; bool ignore_split_; @@ -206,13 +207,14 @@ class TextFileLineIterator // Set the corresponding value from line or tokens based on 'index' into the // tensor 't'. The value is transformed to the given data type 'dtype'. - absl::Status SetValue(const string& line, const std::vector& tokens, - int64_t index, Tensor* tensor) { + absl::Status SetValue(const std::string& line, + const std::vector& tokens, int64_t index, + Tensor* tensor) { if (index == kLineNumber) { tensor->flat()(0) = next_id_ + offset_; return absl::OkStatus(); } - const string& token = (index == kWholeLine) ? line : tokens[index]; + const std::string& token = (index == kWholeLine) ? line : tokens[index]; const DataType& dtype = tensor->dtype(); switch (dtype) { case DT_INT32: { @@ -222,7 +224,7 @@ class TextFileLineIterator return errors::InvalidArgument("Field ", token, " in line ", next_id_, " is not a valid int32."); } - tensor->flat()(0) = value + offset_; + tensor->flat()(0) = value + offset_; } break; case DT_INT64: { int64_t value; @@ -267,7 +269,7 @@ class TextFileLineIterator }; absl::Status GetTableHandle(absl::string_view input_name, OpKernelContext* ctx, - string* container, string* table_handle) { + std::string* container, std::string* table_handle) { { mutex* mu; TF_RETURN_IF_ERROR(ctx->input_ref_mutex(input_name, &mu)); @@ -300,8 +302,8 @@ absl::Status GetResourceLookupTable(absl::string_view input_name, absl::Status GetReferenceLookupTable(absl::string_view input_name, OpKernelContext* ctx, LookupInterface** table) { - string container; - string table_handle; + std::string container; + std::string table_handle; TF_RETURN_IF_ERROR( GetTableHandle(input_name, ctx, &container, &table_handle)); return ctx->resource_manager()->Lookup(container, table_handle, table); @@ -335,8 +337,8 @@ absl::Status GetInitializableLookupTable(absl::string_view input_name, handle.name(), " is not initializable"); } } else { - string container; - string table_handle; + std::string container; + std::string table_handle; TF_RETURN_IF_ERROR( GetTableHandle(input_name, ctx, &container, &table_handle)); TF_RETURN_IF_ERROR(ctx->resource_manager()->Lookup(container, table_handle, @@ -353,7 +355,7 @@ absl::Status GetInitializableLookupTable(absl::string_view input_name, absl::Status CheckTableDataTypes(const LookupInterface& table, DataType key_dtype, DataType value_dtype, - const string& table_name) { + const std::string& table_name) { if (table.key_dtype() != key_dtype || table.value_dtype() != value_dtype) { return errors::InvalidArgument( "Conflicting key/value dtypes ", DataTypeString(key_dtype), "->", @@ -365,7 +367,7 @@ absl::Status CheckTableDataTypes(const LookupInterface& table, } // Helper function to initialize an InitializableLookupTable from a text file. -absl::Status InitializeTableFromTextFile(const string& filename, +absl::Status InitializeTableFromTextFile(const std::string& filename, int64_t vocab_size, char delimiter, int32_t key_index, int32_t value_index, int64_t offset, Env* env, @@ -376,7 +378,7 @@ absl::Status InitializeTableFromTextFile(const string& filename, } absl::Status InitializeTableFromTextFile( - const string& filename, int64_t vocab_size, char delimiter, + const std::string& filename, int64_t vocab_size, char delimiter, int32_t key_index, int32_t value_index, int64_t offset, Env* env, std::unique_ptr serializer, InitializableLookupTable* table) { diff --git a/tensorflow/core/kernels/lookup_util.h b/tensorflow/core/kernels/lookup_util.h index 677c6a5659fc23..e48718ad805bdb 100644 --- a/tensorflow/core/kernels/lookup_util.h +++ b/tensorflow/core/kernels/lookup_util.h @@ -53,10 +53,10 @@ absl::Status GetInitializableLookupTable(absl::string_view input_name, // table's data types. absl::Status CheckTableDataTypes(const LookupInterface& table, DataType key_dtype, DataType value_dtype, - const string& table_name); + const std::string& table_name); // Initializes `table` from `filename`. -absl::Status InitializeTableFromTextFile(const string& filename, +absl::Status InitializeTableFromTextFile(const std::string& filename, int64_t vocab_size, char delimiter, int32_t key_index, int32_t value_index, int64_t offset, Env* env, @@ -65,7 +65,7 @@ absl::Status InitializeTableFromTextFile(const string& filename, // Initializes `table` from `filename`. `func` may specify how to represent the // initializer as a graphdef, so that the table can be serialized as metadata. absl::Status InitializeTableFromTextFile( - const string& filename, int64_t vocab_size, char delimiter, + const std::string& filename, int64_t vocab_size, char delimiter, int32_t key_index, int32_t value_index, int64_t offset, Env* env, std::unique_ptr serializer, InitializableLookupTable* table); diff --git a/tensorflow/core/kernels/lrn_op_test.cc b/tensorflow/core/kernels/lrn_op_test.cc index a4843b04d84b1b..3c8515d522501b 100644 --- a/tensorflow/core/kernels/lrn_op_test.cc +++ b/tensorflow/core/kernels/lrn_op_test.cc @@ -40,13 +40,13 @@ class LRNFloatTest : public OpsTestBase { protected: LRNFloatTest() : philox_(123, 17), rand_(&philox_) {} - int GetIntAttr(const string& name) { + int GetIntAttr(const std::string& name) { int value; TF_CHECK_OK(GetNodeAttr(*node_def(), name, &value)); return value; } - float GetFloatAttr(const string& name) { + float GetFloatAttr(const std::string& name) { float value; TF_CHECK_OK(GetNodeAttr(*node_def(), name, &value)); return value; diff --git a/tensorflow/core/kernels/map_kernels.h b/tensorflow/core/kernels/map_kernels.h index 6949ff554a286b..ab57ba02dccbc4 100644 --- a/tensorflow/core/kernels/map_kernels.h +++ b/tensorflow/core/kernels/map_kernels.h @@ -102,7 +102,7 @@ class TensorMapSize : public OpKernel { OP_REQUIRES_OK(ctx, GetInputMap(ctx, 0, &map)); Tensor* result; OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape{}, &result)); - result->scalar()() = map->tensors().size(); + result->scalar()() = map->tensors().size(); } }; diff --git a/tensorflow/core/kernels/map_stage_op.cc b/tensorflow/core/kernels/map_stage_op.cc index 14787c38e72502..12e018dfdd311d 100644 --- a/tensorflow/core/kernels/map_stage_op.cc +++ b/tensorflow/core/kernels/map_stage_op.cc @@ -489,7 +489,7 @@ class StagingMap : public ResourceBase { return map_.size(); } - string DebugString() const override { return "StagingMap"; } + std::string DebugString() const override { return "StagingMap"; } }; template @@ -736,7 +736,7 @@ class MapSizeOp : public OpKernel { OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &size)); // Set it to the actual size - size->scalar().setConstant(map->size()); + size->scalar().setConstant(map->size()); } }; @@ -766,7 +766,7 @@ class MapIncompleteSizeOp : public OpKernel { OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &size)); // Set it to the actual size - size->scalar().setConstant(map->incomplete_size()); + size->scalar().setConstant(map->incomplete_size()); } }; diff --git a/tensorflow/core/kernels/matching_files_op.cc b/tensorflow/core/kernels/matching_files_op.cc index 515e58d518a129..c48e6aeeab3bad 100644 --- a/tensorflow/core/kernels/matching_files_op.cc +++ b/tensorflow/core/kernels/matching_files_op.cc @@ -43,7 +43,7 @@ class MatchingFilesOp : public OpKernel { const auto patterns = patterns_t->flat(); int num_patterns = patterns.size(); int num_files = 0; - std::vector> all_fnames(num_patterns); + std::vector> all_fnames(num_patterns); for (int i = 0; i < num_patterns; i++) { OP_REQUIRES_OK(context, context->env()->GetMatchingPaths(patterns(i), &all_fnames[i])); diff --git a/tensorflow/core/kernels/matmul_op_fused.cc b/tensorflow/core/kernels/matmul_op_fused.cc index 4e6a8d5266608d..343eba3db82f97 100644 --- a/tensorflow/core/kernels/matmul_op_fused.cc +++ b/tensorflow/core/kernels/matmul_op_fused.cc @@ -199,7 +199,7 @@ struct LaunchFusedMatMulOp { namespace { #if GOOGLE_CUDA || TF_HIPBLASLT -StatusOr GetBlasLtEpilogOp( +absl::StatusOr GetBlasLtEpilogOp( FusedComputationType fusion) { if (fusion == FusedComputationType::kBiasAdd) { return se::gpu::BlasLt::Epilogue::kBias; @@ -235,7 +235,7 @@ se::blas::AlgorithmConfig AutotuneMatmul( // scratch space is deallocated between runs. BlasScratchAllocator scratch_allocator(context); - Status cublaslt_launch = + absl::Status cublaslt_launch = launch_func(scratch_allocator, i, &profile_result); VLOG(4) << " Autotune algorithm " << i @@ -265,7 +265,7 @@ se::blas::AlgorithmConfig AutotuneMatmul( #endif template -StatusOr> AutotuneMatMulImpl( +absl::StatusOr> AutotuneMatMulImpl( OpKernelContext* ctx, std::vector>>& runners, bool actually_do_autotune, const LaunchFunc& launch_func, @@ -292,10 +292,10 @@ StatusOr> AutotuneMatMulImpl( TF_ASSIGN_OR_RETURN(auto desc, runner->ToAlgorithmDesc()); se::dnn::ProfileResult profile_result; - Status cudnn_launch_status = + absl::Status cudnn_launch_status = actually_do_autotune ? launch_func(allocator_used, runner, &profile_result) - : OkStatus(); + : absl::OkStatus(); if (!actually_do_autotune) { // Make the result valid according to `is_valid`. profile_result.set_algorithm(desc); @@ -329,7 +329,7 @@ StatusOr> AutotuneMatMulImpl( } struct FusedMatmulAutotuneGroup { - static string name() { return "FusedMatmul"; } + static std::string name() { return "FusedMatmul"; } }; typedef AutotuneSingleton -StatusOr> AutotuneFusedMatmul( +absl::StatusOr> +AutotuneFusedMatmul( bool cudnn_use_autotune, AutotuneMap>* autotune_map, @@ -350,7 +351,7 @@ StatusOr> AutotuneFusedMatmul( AutotuneEntry autotune_entry; auto* stream = ctx->op_device_context()->stream(); if (!autotune_map->Find(params, &autotune_entry)) { - profiler::ScopedAnnotation trace("cudnn_autotuning"); + tsl::profiler::ScopedAnnotation trace("cudnn_autotuning"); se::TfAllocatorAdapter tf_allocator_adapter(ctx->device()->GetAllocator({}), stream); @@ -371,7 +372,7 @@ StatusOr> AutotuneFusedMatmul( auto launch_func = [&](se::ScratchAllocator* allocator_used, const std::unique_ptr& runner, - se::dnn::ProfileResult* profile_result) -> Status { + se::dnn::ProfileResult* profile_result) -> absl::Status { TF_ASSIGN_OR_RETURN(auto scratch, allocator_used->AllocateBytes( runner->GetWorkspaceSize())); return (*runner)(stream, profile_result, scratch, a_ptr, b_ptr, bias_ptr, @@ -562,8 +563,9 @@ struct LaunchFusedMatMulOp { auto runner_and_scratch = std::move(runner_and_scratch_or).value(); auto& runner = *std::get(runner_and_scratch); - Status cudnn_launch_status = runner( - stream, nullptr, std::get(runner_and_scratch), + absl::Status cudnn_launch_status = runner( + stream, nullptr, + std::get(runner_and_scratch), a_ptr, b_ptr, bias_ptr, c_ptr); OP_REQUIRES_OK(context, cudnn_launch_status); return; diff --git a/tensorflow/core/kernels/matmul_op_impl.h b/tensorflow/core/kernels/matmul_op_impl.h index f4991bc1fe252a..628e6d8dabceb2 100644 --- a/tensorflow/core/kernels/matmul_op_impl.h +++ b/tensorflow/core/kernels/matmul_op_impl.h @@ -477,7 +477,7 @@ struct LaunchBatchMatMul { namespace { // A dummy type to group matmul autotune results together. struct BlasLtMatmulAutoTuneGroup { - static string name() { return "MatmulLt"; } + static std::string name() { return "MatmulLt"; } }; typedef AutotuneSingleton; + using DeviceMemoryBytes = stream_executor::DeviceAddress; BlasScratchAllocator(OpKernelContext* context) : memory_limit_(0), total_byte_size_(0), context_(context) {} @@ -503,21 +503,22 @@ class BlasScratchAllocator : public se::ScratchAllocator { int64_t GetMemoryLimitInBytes() override { return memory_limit_; } - tsl::StatusOr AllocateBytes(int64_t byte_size) override { + absl::StatusOr AllocateBytes( + int64_t byte_size) override { Tensor temporary_memory; if (memory_limit_ > 0 && byte_size > memory_limit_) { - return tsl::Status{ + return absl::Status{ absl::StatusCode::kUnavailable, absl::StrCat("Requested memory size (", byte_size, ") exceeds the memory limit (", memory_limit_, ").")}; } AllocationAttributes allocation_attr; allocation_attr.retry_on_failure = false; - Status allocation_status(context_->allocate_temp( + absl::Status allocation_status(context_->allocate_temp( DT_UINT8, TensorShape({byte_size}), &temporary_memory)); if (!allocation_status.ok()) { - return tsl::Status{ + return absl::Status{ absl::StatusCode::kUnavailable, absl::StrCat("Failed to allocate requested memory of (", byte_size, ").")}; @@ -526,11 +527,12 @@ class BlasScratchAllocator : public se::ScratchAllocator { // allocator. allocated_tensors_.push_back(temporary_memory); total_byte_size_ += byte_size; - return tsl::StatusOr(DeviceMemoryBytes::MakeFromByteSize( - temporary_memory.flat().data(), - temporary_memory.flat().size())); + return absl::StatusOr( + DeviceMemoryBytes::MakeFromByteSize( + temporary_memory.flat().data(), + temporary_memory.flat().size())); } - int64 TotalByteSize() { return total_byte_size_; } + int64_t TotalByteSize() { return total_byte_size_; } private: int64_t memory_limit_; @@ -548,9 +550,9 @@ struct LaunchBatchMatMul { se::blas::Transpose trans[] = {se::blas::Transpose::kNoTranspose, se::blas::Transpose::kTranspose, se::blas::Transpose::kConjugateTranspose}; - const uint64 m = in_x.dim_size(adj_x || trans_x ? 2 : 1); - const uint64 k = in_x.dim_size(adj_x || trans_x ? 1 : 2); - const uint64 n = in_y.dim_size(adj_y || trans_y ? 1 : 2); + const uint64_t m = in_x.dim_size(adj_x || trans_x ? 2 : 1); + const uint64_t k = in_x.dim_size(adj_x || trans_x ? 1 : 2); + const uint64_t n = in_y.dim_size(adj_y || trans_y ? 1 : 2); const int64_t batch_size = bcast.output_batch_size(); auto blas_transpose_a = trans[adj_x ? 2 : (trans_x ? 1 : 0)]; auto blas_transpose_b = trans[adj_y ? 2 : (trans_y ? 1 : 0)]; @@ -574,9 +576,9 @@ struct LaunchBatchMatMul { auto* a_base_ptr = in_x.template flat().data(); auto* b_base_ptr = in_y.template flat().data(); auto* c_base_ptr = out->template flat().data(); - uint64 a_stride; - uint64 b_stride; - uint64 c_stride; + uint64_t a_stride; + uint64_t b_stride; + uint64_t c_stride; bool is_full_broadcast = std::min(bcast.x_batch_size(), bcast.y_batch_size()) == 1; @@ -658,9 +660,11 @@ struct LaunchBatchMatMul { // Create a new scratch allocator with every autotuning run so that // scratch space is deallocated between runs. BlasScratchAllocator scratch_allocator(context, max_scratch_size); - Status cublas_launch_status = plan_and_algorithms->ExecuteOnStream( - stream, *a_ptrs[0], *b_ptrs[0], *c_ptrs[0], i, - scratch_allocator, se::DeviceMemoryBase{}, &profile_result); + absl::Status cublas_launch_status = + plan_and_algorithms->ExecuteOnStream( + stream, *a_ptrs[0], *b_ptrs[0], *c_ptrs[0], i, + scratch_allocator, stream_executor::DeviceAddressBase{}, + &profile_result); VLOG(4) << " Autotune algorithm " << i << " result: " << profile_result.elapsed_time_in_ms() diff --git a/tensorflow/core/kernels/matmul_op_real.cc b/tensorflow/core/kernels/matmul_op_real.cc index 46fbf83a53e067..54049fb852c008 100644 --- a/tensorflow/core/kernels/matmul_op_real.cc +++ b/tensorflow/core/kernels/matmul_op_real.cc @@ -29,18 +29,18 @@ TF_CALL_int64(REGISTER_BATCH_MATMUL_CPU); REGISTER_BATCH_MATMUL_TOUT_CPU(bfloat16, bfloat16, bfloat16); REGISTER_BATCH_MATMUL_TOUT_CPU(float, float, float); REGISTER_BATCH_MATMUL_TOUT_CPU(double, double, double); -REGISTER_BATCH_MATMUL_TOUT_CPU(int16, int16, int16); -REGISTER_BATCH_MATMUL_TOUT_CPU(int32, int32, int32); +REGISTER_BATCH_MATMUL_TOUT_CPU(int16_t, int16_t, int16_t); +REGISTER_BATCH_MATMUL_TOUT_CPU(int32_t, int32_t, int32_t); REGISTER_BATCH_MATMUL_TOUT_CPU(int64_t, int64_t, int64_t); -REGISTER_BATCH_MATMUL_TOUT_CPU(int8, int8, int32); -REGISTER_BATCH_MATMUL_TOUT_CPU(uint8, int8, int32); -REGISTER_BATCH_MATMUL_TOUT_CPU(int8, uint8, int32); -REGISTER_BATCH_MATMUL_TOUT_CPU(uint8, uint8, int32); - -REGISTER_BATCH_MATMUL_TOUT_CPU(bfloat16, int8, bfloat16); -REGISTER_BATCH_MATMUL_TOUT_CPU(bfloat16, uint8, bfloat16); -REGISTER_BATCH_MATMUL_TOUT_CPU(int8, bfloat16, bfloat16); -REGISTER_BATCH_MATMUL_TOUT_CPU(uint8, bfloat16, bfloat16); +REGISTER_BATCH_MATMUL_TOUT_CPU(int8_t, int8_t, int32_t); +REGISTER_BATCH_MATMUL_TOUT_CPU(uint8_t, int8_t, int32_t); +REGISTER_BATCH_MATMUL_TOUT_CPU(int8_t, uint8_t, int32_t); +REGISTER_BATCH_MATMUL_TOUT_CPU(uint8_t, uint8_t, int32_t); + +REGISTER_BATCH_MATMUL_TOUT_CPU(bfloat16, int8_t, bfloat16); +REGISTER_BATCH_MATMUL_TOUT_CPU(bfloat16, uint8_t, bfloat16); +REGISTER_BATCH_MATMUL_TOUT_CPU(int8_t, bfloat16, bfloat16); +REGISTER_BATCH_MATMUL_TOUT_CPU(uint8_t, bfloat16, bfloat16); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM TF_CALL_GPU_NUMBER_TYPES(REGISTER_BATCH_MATMUL_GPU); diff --git a/tensorflow/core/kernels/matmul_op_test.cc b/tensorflow/core/kernels/matmul_op_test.cc index 4562998b2848aa..e755ceb2beed1a 100644 --- a/tensorflow/core/kernels/matmul_op_test.cc +++ b/tensorflow/core/kernels/matmul_op_test.cc @@ -52,7 +52,7 @@ class FusedMatMulOpTest : public OpsTestBase { // of 'fetch' node into the output Tensor. Optional `fetch_node` parameter // allows to define a fetch node directly using a NodeDef for the ops that are // not supported by the C++ Api. - void RunAndFetch(const tensorflow::Scope& root, const string& fetch, + void RunAndFetch(const tensorflow::Scope& root, const std::string& fetch, Tensor* output, bool allow_gpu_device, const NodeDef* fetch_node = nullptr, absl::Status* last_status = nullptr) { @@ -97,7 +97,8 @@ class FusedMatMulOpTest : public OpsTestBase { // to compare GPU vs CPU numbers, so place all nodes on CPU in this case. const bool place_all_on_gpu = allow_gpu_device && has_gpu_device; - const string device = place_all_on_gpu ? "/device:GPU:0" : "/device:CPU:0"; + const std::string device = + place_all_on_gpu ? "/device:GPU:0" : "/device:CPU:0"; for (NodeDef& mutable_node : *graph.mutable_node()) { mutable_node.set_device(device); } @@ -137,7 +138,7 @@ class FusedMatMulOpTest : public OpsTestBase { void RunMatMulWithBiasAndActivation( const Tensor& lhs_data, const Tensor& rhs_data, const Tensor& bias_data, - bool transpose_a, bool transpose_b, const string& activation_type, + bool transpose_a, bool transpose_b, const std::string& activation_type, Tensor* output, bool allow_gpu_device = false) { Scope root = tensorflow::Scope::NewRootScope(); @@ -175,8 +176,8 @@ class FusedMatMulOpTest : public OpsTestBase { void RunFusedMatMulOp(const Tensor& lhs_data, const Tensor& rhs_data, const std::vector& args_data, - const std::vector& fused_ops, bool transpose_a, - bool transpose_b, Tensor* output, + const std::vector& fused_ops, + bool transpose_a, bool transpose_b, Tensor* output, bool allow_gpu_device = false, bool* test_skipped = nullptr) { Scope root = tensorflow::Scope::NewRootScope(); @@ -295,7 +296,7 @@ class FusedMatMulOpTest : public OpsTestBase { // to FusedMatMul. void VerifyConv2DWithBiasAndActivation(int m, int k, int n, bool transpose_a, bool transpose_b, - const string& activation) { + const std::string& activation) { bool use_gpu_device = activation == "Relu" || (this->kTValueType == DT_HALF); const BiasAddGraphRunner run_default = @@ -372,7 +373,7 @@ static auto GetActivations(DataType dtype) { } TYPED_TEST_P(FusedMatMulWithBiasOpTest, MatMul256x128x64WithActivation) { - for (const string& activation : GetActivations(this->kTValueType)) { + for (const std::string& activation : GetActivations(this->kTValueType)) { this->VerifyConv2DWithBiasAndActivation(256, 128, 64, false, false, activation); this->VerifyConv2DWithBiasAndActivation(256, 128, 64, true, false, @@ -385,21 +386,21 @@ TYPED_TEST_P(FusedMatMulWithBiasOpTest, MatMul256x128x64WithActivation) { } TYPED_TEST_P(FusedMatMulWithBiasOpTest, MatMul1x256x256WithActivation) { - for (const string& activation : GetActivations(this->kTValueType)) { + for (const std::string& activation : GetActivations(this->kTValueType)) { this->VerifyConv2DWithBiasAndActivation(1, 256, 256, false, false, activation); } } TYPED_TEST_P(FusedMatMulWithBiasOpTest, MatMul256x256x1WithActivation) { - for (const string& activation : GetActivations(this->kTValueType)) { + for (const std::string& activation : GetActivations(this->kTValueType)) { this->VerifyConv2DWithBiasAndActivation(256, 256, 1, false, false, activation); } } TYPED_TEST_P(FusedMatMulWithBiasOpTest, MatMul1x256x1WithActivation) { - for (const string& activation : GetActivations(this->kTValueType)) { + for (const std::string& activation : GetActivations(this->kTValueType)) { this->VerifyConv2DWithBiasAndActivation(1, 256, 1, false, false, activation); } diff --git a/tensorflow/core/kernels/matmul_util.cc b/tensorflow/core/kernels/matmul_util.cc index 3675018709dfc0..cd3a950f8f5c69 100644 --- a/tensorflow/core/kernels/matmul_util.cc +++ b/tensorflow/core/kernels/matmul_util.cc @@ -36,8 +36,7 @@ int64_t GetWorkspaceLimit(int64_t default_value_in_bytes) { if (workspace_limit_in_mb_str != nullptr && strcmp(workspace_limit_in_mb_str, "") != 0) { int64_t scratch_limit_in_mb = -1; - if (strings::safe_strto64(workspace_limit_in_mb_str, - &scratch_limit_in_mb)) { + if (absl::SimpleAtoi(workspace_limit_in_mb_str, &scratch_limit_in_mb)) { return scratch_limit_in_mb * (1 << 20); } else { LOG(WARNING) << "Invalid value for TF_CUBLAS_WORKSPACE_LIMIT_IN_MB: " @@ -77,7 +76,7 @@ struct BlasLtMatmulPlanMap { int MatmulMaxAutotuneAlgorithmCount() { int64_t value; - Status status = + absl::Status status = ReadInt64FromEnvVar("TF_MATMUL_AUTOTUNE_MAX_ALGORITHMS", 10, &value); if (!status.ok()) { LOG(ERROR) << status.message(); @@ -90,7 +89,7 @@ int MatmulMaxAutotuneAlgorithmCount() { return value; } -StatusOr GetBlasComputationType( +absl::StatusOr GetBlasComputationType( se::blas::DataType dtype) { using se::blas::ComputationType; static bool use_f32_for_f16_computation = MatmulDoFP32ComputationFP16Input(); @@ -114,9 +113,11 @@ StatusOr GetBlasComputationType( } // namespace -/* static */ StatusOr PlanAndAlgorithms::GetOrCreate( - se::Stream* stream, const BlasLtMatmulPlanParams& params, - absl::Mutex** ppmu, std::optional max_algorithm_count) { +/* static */ absl::StatusOr +PlanAndAlgorithms::GetOrCreate(se::Stream* stream, + const BlasLtMatmulPlanParams& params, + absl::Mutex** ppmu, + std::optional max_algorithm_count) { static const int64_t max_scratch_size = GetWorkspaceLimit(1LL << 32); // 4GB by default static const int64_t max_autotune_algorithm_count = @@ -189,25 +190,27 @@ StatusOr GetBlasComputationType( return ptr->second.get(); } -Status PlanAndAlgorithms::ExecuteOnStream( - se::Stream* stream, const se::DeviceMemoryBase& a, - const se::DeviceMemoryBase& b, se::DeviceMemoryBase& c, - size_t algorithm_idx, se::ScratchAllocator& scratch_allocator, - const se::DeviceMemoryBase& bias, +absl::Status PlanAndAlgorithms::ExecuteOnStream( + se::Stream* stream, const stream_executor::DeviceAddressBase& a, + const stream_executor::DeviceAddressBase& b, + stream_executor::DeviceAddressBase& c, size_t algorithm_idx, + se::ScratchAllocator& scratch_allocator, + const stream_executor::DeviceAddressBase& bias, se::blas::ProfileResult* profile_result) const { if (!plan || algorithm_idx >= algorithms.size()) { return errors::Internal("MatmulPlan or algorithms are not initialized!"); } TF_RETURN_IF_ERROR(plan->SetAlgorithm(algorithms[algorithm_idx])); - return plan->ExecuteOnStream(stream, a, b, c, c, - bias, // bias_buffer - se::DeviceMemoryBase{}, // aux_buffer - se::DeviceMemoryBase{}, // a_scale_buffer - se::DeviceMemoryBase{}, // b_scale_buffer - se::DeviceMemoryBase{}, // c_scale_buffer - se::DeviceMemoryBase{}, // d_scale_buffer - se::DeviceMemoryBase{}, // d_amax_buffer - scratch_allocator, profile_result); + return plan->ExecuteOnStream( + stream, a, b, c, c, + bias, // bias_buffer + stream_executor::DeviceAddressBase{}, // aux_buffer + stream_executor::DeviceAddressBase{}, // a_scale_buffer + stream_executor::DeviceAddressBase{}, // b_scale_buffer + stream_executor::DeviceAddressBase{}, // c_scale_buffer + stream_executor::DeviceAddressBase{}, // d_scale_buffer + stream_executor::DeviceAddressBase{}, // d_amax_buffer + scratch_allocator, profile_result); } } // namespace tensorflow diff --git a/tensorflow/core/kernels/matmul_util.h b/tensorflow/core/kernels/matmul_util.h index 0bf7f8acb48cf1..abcbe0ad1bea44 100644 --- a/tensorflow/core/kernels/matmul_util.h +++ b/tensorflow/core/kernels/matmul_util.h @@ -51,15 +51,17 @@ struct BlasLtMatmulPlanParams { }; struct PlanAndAlgorithms { - static StatusOr GetOrCreate( + static absl::StatusOr GetOrCreate( se::Stream* stream, const BlasLtMatmulPlanParams& params, absl::Mutex** pmu, std::optional max_algorithm_count = std::nullopt); - Status ExecuteOnStream( - se::Stream* stream, const se::DeviceMemoryBase& a, - const se::DeviceMemoryBase& b, se::DeviceMemoryBase& c, - size_t algorithm_idx, se::ScratchAllocator& scratch_allocator, - const se::DeviceMemoryBase& bias = se::DeviceMemoryBase{}, + absl::Status ExecuteOnStream( + se::Stream* stream, const stream_executor::DeviceAddressBase& a, + const stream_executor::DeviceAddressBase& b, + stream_executor::DeviceAddressBase& c, size_t algorithm_idx, + se::ScratchAllocator& scratch_allocator, + const stream_executor::DeviceAddressBase& bias = + stream_executor::DeviceAddressBase{}, se::blas::ProfileResult* profile_result = nullptr) const; se::gpu::BlasLt::MatmulPlanPtr plan; diff --git a/tensorflow/core/kernels/maxpooling_op.cc b/tensorflow/core/kernels/maxpooling_op.cc index bc99ad59db4543..a9de19492d1aff 100644 --- a/tensorflow/core/kernels/maxpooling_op.cc +++ b/tensorflow/core/kernels/maxpooling_op.cc @@ -227,7 +227,7 @@ template class MaxPoolingGradOp : public OpKernel { public: explicit MaxPoolingGradOp(OpKernelConstruction* context) : OpKernel(context) { - string data_format; + std::string data_format; OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); OP_REQUIRES(context, FormatFromString(data_format, &data_format_), errors::InvalidArgument("Invalid data format")); @@ -289,16 +289,16 @@ class MaxPoolingGradOp : public OpKernel { OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum::v(), tensor_out.shape(), &tensor_out_arg_max)); - std::vector ksize = ksize_; - std::vector stride = stride_; + std::vector ksize = ksize_; + std::vector stride = stride_; if (context->num_inputs() == 5) { const Tensor& tensor_ksize = context->input(3); - auto value_ksize = tensor_ksize.flat(); + auto value_ksize = tensor_ksize.flat(); ksize.resize(tensor_ksize.shape().num_elements()); std::copy_n(&value_ksize(0), ksize.size(), ksize.begin()); const Tensor& tensor_stride = context->input(4); - auto value_stride = tensor_stride.flat(); + auto value_stride = tensor_stride.flat(); stride.resize(tensor_stride.shape().num_elements()); std::copy_n(&value_stride(0), stride.size(), stride.begin()); } @@ -351,8 +351,8 @@ class MaxPoolingGradOp : public OpKernel { } private: - std::vector ksize_; - std::vector stride_; + std::vector ksize_; + std::vector stride_; Padding padding_; std::vector explicit_paddings_; TensorFormat data_format_; @@ -366,7 +366,7 @@ class MaxPoolingGradOp : public OpKernel { typedef Eigen::GpuDevice Device; explicit MaxPoolingGradOp(OpKernelConstruction* context) : OpKernel(context) { - string data_format; + std::string data_format; OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); OP_REQUIRES(context, FormatFromString(data_format, &data_format_), errors::InvalidArgument("Invalid data format")); @@ -412,16 +412,16 @@ class MaxPoolingGradOp : public OpKernel { TensorShape output_shape = tensor_in.shape(); - std::vector ksize = ksize_; - std::vector stride = stride_; + std::vector ksize = ksize_; + std::vector stride = stride_; if (context->num_inputs() == 5) { const Tensor& tensor_ksize = context->input(3); - auto value_ksize = tensor_ksize.flat(); + auto value_ksize = tensor_ksize.flat(); ksize.resize(tensor_ksize.shape().num_elements()); std::copy_n(&value_ksize(0), ksize.size(), ksize.begin()); const Tensor& tensor_stride = context->input(4); - auto value_stride = tensor_stride.flat(); + auto value_stride = tensor_stride.flat(); stride.resize(tensor_stride.shape().num_elements()); std::copy_n(&value_stride(0), stride.size(), stride.begin()); } @@ -452,8 +452,8 @@ class MaxPoolingGradOp : public OpKernel { } private: - std::vector ksize_; - std::vector stride_; + std::vector ksize_; + std::vector stride_; Padding padding_; std::vector explicit_paddings_; TensorFormat data_format_; @@ -473,7 +473,7 @@ class MaxPoolingGradGradOp : public OpKernel { public: explicit MaxPoolingGradGradOp(OpKernelConstruction* context) : OpKernel(context) { - string data_format; + std::string data_format; OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); OP_REQUIRES(context, FormatFromString(data_format, &data_format_), errors::InvalidArgument("Invalid data format")); @@ -518,16 +518,16 @@ class MaxPoolingGradGradOp : public OpKernel { context, out_grad_backprop.dims() == 4, errors::InvalidArgument("out_grad_backprop must be 4-dimensional")); - std::vector ksize = ksize_; - std::vector stride = stride_; + std::vector ksize = ksize_; + std::vector stride = stride_; if (context->num_inputs() == 5) { const Tensor& tensor_ksize = context->input(3); - auto value_ksize = tensor_ksize.flat(); + auto value_ksize = tensor_ksize.flat(); ksize.resize(tensor_ksize.shape().num_elements()); std::copy_n(&value_ksize(0), ksize.size(), ksize.begin()); const Tensor& tensor_stride = context->input(4); - auto value_stride = tensor_stride.flat(); + auto value_stride = tensor_stride.flat(); stride.resize(tensor_stride.shape().num_elements()); std::copy_n(&value_stride(0), stride.size(), stride.begin()); } @@ -683,8 +683,8 @@ class MaxPoolingGradGradOp : public OpKernel { params.tensor_in_batch, shard_cost, shard); } - std::vector ksize_; - std::vector stride_; + std::vector ksize_; + std::vector stride_; Padding padding_; TensorFormat data_format_; }; @@ -698,7 +698,7 @@ class MaxPoolingGradGradOp : public OpKernel { explicit MaxPoolingGradGradOp(OpKernelConstruction* context) : OpKernel(context) { - string data_format; + std::string data_format; OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); OP_REQUIRES(context, FormatFromString(data_format, &data_format_), errors::InvalidArgument("Invalid data format")); @@ -739,16 +739,16 @@ class MaxPoolingGradGradOp : public OpKernel { OP_REQUIRES_OK(context, context->allocate_output(0, tensor_out.shape(), &output)); - std::vector ksize = ksize_; - std::vector stride = stride_; + std::vector ksize = ksize_; + std::vector stride = stride_; if (context->num_inputs() == 5) { const Tensor& tensor_ksize = context->input(3); - auto value_ksize = tensor_ksize.flat(); + auto value_ksize = tensor_ksize.flat(); ksize.resize(tensor_ksize.shape().num_elements()); std::copy_n(&value_ksize(0), ksize.size(), ksize.begin()); const Tensor& tensor_stride = context->input(4); - auto value_stride = tensor_stride.flat(); + auto value_stride = tensor_stride.flat(); stride.resize(tensor_stride.shape().num_elements()); std::copy_n(&value_stride(0), stride.size(), stride.begin()); } @@ -798,8 +798,8 @@ class MaxPoolingGradGradOp : public OpKernel { } private: - std::vector ksize_; - std::vector stride_; + std::vector ksize_; + std::vector stride_; Padding padding_; TensorFormat data_format_; bool use_dnn_; @@ -815,7 +815,7 @@ class MaxPoolingNoMaskOp : public OpKernel { public: explicit MaxPoolingNoMaskOp(OpKernelConstruction* context) : OpKernel(context) { - string data_format; + std::string data_format; OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); OP_REQUIRES(context, FormatFromString(data_format, &data_format_), errors::InvalidArgument("Invalid data format")); @@ -866,8 +866,8 @@ class MaxPoolingNoMaskOp : public OpKernel { } private: - std::vector ksize_; - std::vector stride_; + std::vector ksize_; + std::vector stride_; Padding padding_; TensorFormat data_format_; }; @@ -877,7 +877,7 @@ class MaxPoolingNoMaskV2Op : public OpKernel { public: explicit MaxPoolingNoMaskV2Op(OpKernelConstruction* context) : OpKernel(context) { - string data_format; + std::string data_format; OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); OP_REQUIRES(context, FormatFromString(data_format, &data_format_), errors::InvalidArgument("Invalid data format")); @@ -912,17 +912,17 @@ class MaxPoolingNoMaskV2Op : public OpKernel { void Compute(OpKernelContext* context) override { const Tensor& tensor_in = context->input(0); - std::vector ksize = ksize_; - std::vector stride = stride_; + std::vector ksize = ksize_; + std::vector stride = stride_; if (context->num_inputs() != 1) { const Tensor& tensor_ksize = context->input(1); - auto value_ksize = tensor_ksize.flat(); + auto value_ksize = tensor_ksize.flat(); ksize.resize(tensor_ksize.shape().num_elements()); std::copy_n(&value_ksize(0), ksize.size(), ksize.begin()); const Tensor& tensor_stride = context->input(2); - auto value_stride = tensor_stride.flat(); + auto value_stride = tensor_stride.flat(); stride.resize(tensor_stride.shape().num_elements()); std::copy_n(&value_stride(0), stride.size(), stride.begin()); } @@ -956,8 +956,8 @@ class MaxPoolingNoMaskV2Op : public OpKernel { } private: - std::vector ksize_; - std::vector stride_; + std::vector ksize_; + std::vector stride_; Padding padding_; TensorFormat data_format_; }; @@ -1036,8 +1036,8 @@ class MaxPoolingWithArgmaxOp : public OpKernel { } private: - std::vector ksize_; - std::vector stride_; + std::vector ksize_; + std::vector stride_; Padding padding_; bool propagate_nans_; bool include_batch_in_index_; @@ -1109,7 +1109,7 @@ class MaxPoolingGradWithArgmaxOp : public OpKernel { public: explicit MaxPoolingGradWithArgmaxOp(OpKernelConstruction* context) : OpKernel(context) { - string data_format_str; + std::string data_format_str; if (std::is_same::value) { OP_REQUIRES(context, !tensorflow::OpDeterminismRequired(), errors::Unimplemented("Determinism is not yet supported " @@ -1187,8 +1187,8 @@ class MaxPoolingGradWithArgmaxOp : public OpKernel { } private: - std::vector ksize_; - std::vector stride_; + std::vector ksize_; + std::vector stride_; Padding padding_; TensorFormat data_format_; bool include_batch_in_index_; @@ -1257,8 +1257,8 @@ class MaxPoolingGradGradWithArgmaxOp : public OpKernel { } private: - std::vector ksize_; - std::vector stride_; + std::vector ksize_; + std::vector stride_; Padding padding_; bool include_batch_in_index_; }; @@ -1270,7 +1270,7 @@ class MaxPoolingNoMaskOp : public OpKernel { typedef GPUDevice Device; explicit MaxPoolingNoMaskOp(OpKernelConstruction* context) : OpKernel(context) { - string data_format; + std::string data_format; OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); OP_REQUIRES(context, FormatFromString(data_format, &data_format_), errors::InvalidArgument("Invalid data format")); @@ -1372,8 +1372,8 @@ class MaxPoolingNoMaskOp : public OpKernel { } private: - std::vector ksize_; - std::vector stride_; + std::vector ksize_; + std::vector stride_; Padding padding_; std::vector explicit_paddings_; TensorFormat data_format_; @@ -1386,7 +1386,7 @@ class MaxPoolingNoMaskV2Op : public OpKernel { typedef GPUDevice Device; explicit MaxPoolingNoMaskV2Op(OpKernelConstruction* context) : OpKernel(context) { - string data_format; + std::string data_format; OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); OP_REQUIRES(context, FormatFromString(data_format, &data_format_), errors::InvalidArgument("Invalid data format")); @@ -1413,17 +1413,17 @@ class MaxPoolingNoMaskV2Op : public OpKernel { void Compute(OpKernelContext* context) override { const Tensor& tensor_in = context->input(0); - std::vector ksize = ksize_; - std::vector stride = stride_; + std::vector ksize = ksize_; + std::vector stride = stride_; if (context->num_inputs() != 1) { const Tensor& tensor_ksize = context->input(1); - auto value_ksize = tensor_ksize.flat(); + auto value_ksize = tensor_ksize.flat(); ksize.resize(tensor_ksize.shape().num_elements()); std::copy_n(&value_ksize(0), ksize.size(), ksize.begin()); const Tensor& tensor_stride = context->input(2); - auto value_stride = tensor_stride.flat(); + auto value_stride = tensor_stride.flat(); stride.resize(tensor_stride.shape().num_elements()); std::copy_n(&value_stride(0), stride.size(), stride.begin()); } @@ -1471,8 +1471,8 @@ class MaxPoolingNoMaskV2Op : public OpKernel { } private: - std::vector ksize_; - std::vector stride_; + std::vector ksize_; + std::vector stride_; Padding padding_; std::vector explicit_paddings_; TensorFormat data_format_; diff --git a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc index 759811dd74ec47..e7799161eba16c 100644 --- a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc +++ b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc @@ -70,7 +70,7 @@ __global__ void MaxPoolForwardNCHW( const int channels, const int height, const int width, const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_t, - const int pad_l, dtype* __restrict__ top_data, int64* __restrict__ mask, + const int pad_l, dtype* __restrict__ top_data, int64_t* __restrict__ mask, const bool include_batch_in_index) { GPU_1D_KERNEL_LOOP(index, nthreads) { int pw = index % pooled_width; @@ -110,13 +110,13 @@ __global__ void MaxPoolForwardNCHW( // the same X, y coordinate. // (so channels = outer_channels, output_size = real output size / 4). __global__ void MaxPoolForwardNoMaskKernel_NCHW_VECT_C( - const int nthreads, const int32* __restrict__ bottom_data, const int height, - const int width, const int channels, const int pooled_height, - const int pooled_width, const int kernel_h, const int kernel_w, - const int stride_h, const int stride_w, const int pad_t, const int pad_l, - int32* __restrict__ top_data) { + const int nthreads, const int32_t* __restrict__ bottom_data, + const int height, const int width, const int channels, + const int pooled_height, const int pooled_width, const int kernel_h, + const int kernel_w, const int stride_h, const int stride_w, const int pad_t, + const int pad_l, int32_t* __restrict__ top_data) { // TODO(pauldonnelly): Implement a better optimized version of this kernel. - const int32 kMinINT8X4 = 0x80808080; + const int32_t kMinINT8X4 = 0x80808080; GPU_1D_KERNEL_LOOP(index, nthreads) { int pw = index % pooled_width; int ph = (index / pooled_width) % pooled_height; @@ -128,8 +128,8 @@ __global__ void MaxPoolForwardNoMaskKernel_NCHW_VECT_C( int wend = min(wstart + kernel_w, width); hstart = max(hstart, 0); wstart = max(wstart, 0); - int32 maxval = kMinINT8X4; - const int32* bottom_data_n = bottom_data + n * channels * height * width; + int32_t maxval = kMinINT8X4; + const int32_t* bottom_data_n = bottom_data + n * channels * height * width; for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { int idx = (c * height + h) * width + w; @@ -147,7 +147,7 @@ __global__ void MaxPoolForwardNHWC( const int width, const int channels, const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_t, const int pad_l, - dtype* __restrict__ top_data, int64* __restrict__ mask, + dtype* __restrict__ top_data, int64_t* __restrict__ mask, const bool include_batch_in_index) { GPU_1D_KERNEL_LOOP(index, nthreads) { int n = index; @@ -203,7 +203,7 @@ __global__ void MaxPoolForwardNHWC( template __global__ void MaxPoolBackward(const int nthreads, const dtype* __restrict__ top_diff, - const int64* __restrict__ mask, + const int64_t* __restrict__ mask, const int top_offset, const int bottom_offset, dtype* __restrict__ bottom_diff, const bool include_batch_in_index) { @@ -332,7 +332,7 @@ __global__ void MaxPoolGradBackwardNoMaskNHWC( template __global__ void MaxPoolGradBackward(const int nthreads, const dtype* __restrict__ top_diff, - const int64* __restrict__ mask, + const int64_t* __restrict__ mask, const int top_offset, const int bottom_offset, dtype* __restrict__ bottom_diff, @@ -353,11 +353,11 @@ namespace functor { // Note: channels is the outer channels (dim 1) which has already been // divided by 4. bool MaxPoolForwardNoMask_NCHW_VECT_C::operator()( - const int32* bottom_data, const int batch, const int height, + const int32_t* bottom_data, const int batch, const int height, const int width, int channels, const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_t, const int pad_l, - int32* top_data, const Eigen::GpuDevice& d) { + int32_t* top_data, const Eigen::GpuDevice& d) { const int kThreadsPerBlock = 1024; const int output_size = batch * channels * pooled_height * pooled_width; if (output_size == 0) return true; @@ -377,7 +377,7 @@ bool MaxPoolForwardWithOptionalArgmax::operator()( const int channels, const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_t, const int pad_l, T* top_data, - int64* mask, const Eigen::GpuDevice& d, bool propagate_nans, + int64_t* mask, const Eigen::GpuDevice& d, bool propagate_nans, const bool include_batch_in_index) { const int kThreadsPerBlock = 1024; const int output_size = batch * channels * pooled_height * pooled_width; @@ -405,7 +405,7 @@ bool MaxPoolForwardWithOptionalArgmax::operator()( template bool MaxPoolBackwardWithArgmax::operator()( const int output_size, const int input_size, const T* top_diff, - const int64* mask, const int top_offset, const int bottom_offset, + const int64_t* mask, const int top_offset, const int bottom_offset, T* bottom_diff, const Eigen::GpuDevice& d, const bool include_batch_in_index) { const int kThreadsPerBlock = 1024; @@ -454,7 +454,7 @@ bool MaxPoolGradBackwardNoMask::operator()( template bool MaxPoolGradBackwardWithArgmax::operator()( const int output_size, const int input_size, const T* top_diff, - const int64* mask, const int top_offset, const int bottom_offset, + const int64_t* mask, const int top_offset, const int bottom_offset, T* bottom_diff, const Eigen::GpuDevice& d, const bool include_batch_in_index) { if (input_size == 0) return true; diff --git a/tensorflow/core/kernels/maxpooling_op_gpu.h b/tensorflow/core/kernels/maxpooling_op_gpu.h index 650a01e3ff0dc1..3e8ba784d9714e 100644 --- a/tensorflow/core/kernels/maxpooling_op_gpu.h +++ b/tensorflow/core/kernels/maxpooling_op_gpu.h @@ -44,11 +44,11 @@ struct MaxPoolForwardWithOptionalArgmax { }; struct MaxPoolForwardNoMask_NCHW_VECT_C { - bool operator()(const int32* bottom_data, const int batch, const int height, + bool operator()(const int32_t* bottom_data, const int batch, const int height, const int width, int channels, const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, - const int pad_t, const int pad_l, int32* top_data, + const int pad_t, const int pad_l, int32_t* top_data, const Eigen::GpuDevice& d); }; diff --git a/tensorflow/core/kernels/merge_v2_checkpoints_op_test.cc b/tensorflow/core/kernels/merge_v2_checkpoints_op_test.cc index cc838aace88f33..d1185f0d5d7998 100644 --- a/tensorflow/core/kernels/merge_v2_checkpoints_op_test.cc +++ b/tensorflow/core/kernels/merge_v2_checkpoints_op_test.cc @@ -34,7 +34,8 @@ limitations under the License. namespace tensorflow { namespace { -void WriteCheckpoint(const string& prefix, absl::Span names, +void WriteCheckpoint(const std::string& prefix, + absl::Span names, absl::Span tensors) { BundleWriter writer(Env::Default(), prefix); ASSERT_TRUE(names.size() == tensors.size()); @@ -65,12 +66,12 @@ class MergeV2CheckpointsOpTest : public OpsTestBase { void RunMergeTest(bool delete_old_dirs, bool allow_missing_files) { // Writes two checkpoints. - const std::vector prefixes = { + const std::vector prefixes = { io::JoinPath(testing::TmpDir(), "worker0/ckpt0"), io::JoinPath(testing::TmpDir(), "worker1/ckpt1"), io::JoinPath(testing::TmpDir(), "merged/ckpt") /* merged prefix */}; // In a different directory, to exercise "delete_old_dirs". - const string& kMergedPrefix = prefixes[2]; + const std::string& kMergedPrefix = prefixes[2]; // Only write this particular checkpoint if we do not allow missing files. if (!allow_missing_files) { @@ -123,9 +124,10 @@ class MergeV2CheckpointsOpTest : public OpsTestBase { for (int i = 0; i < 2; ++i) { // If we allow missing files, the first checkpoint file did not exist. if (allow_missing_files && i == 0) continue; - int directory_found = Env::Default() - ->IsDirectory(string(io::Dirname(prefixes[i]))) - .raw_code(); + int directory_found = + Env::Default() + ->IsDirectory(std::string(io::Dirname(prefixes[i]))) + .raw_code(); if (delete_old_dirs) { EXPECT_EQ(error::NOT_FOUND, directory_found); } else { diff --git a/tensorflow/core/kernels/mfcc_op.cc b/tensorflow/core/kernels/mfcc_op.cc index 2c5f9560aaa31c..760781605239fb 100644 --- a/tensorflow/core/kernels/mfcc_op.cc +++ b/tensorflow/core/kernels/mfcc_op.cc @@ -49,7 +49,7 @@ class MfccOp : public OpKernel { errors::InvalidArgument( "Input sample_rate should be a scalar tensor, got ", sample_rate_tensor.shape().DebugString(), " instead.")); - const int32_t sample_rate = sample_rate_tensor.scalar()(); + const int32_t sample_rate = sample_rate_tensor.scalar()(); const int spectrogram_channels = spectrogram.dim_size(2); const int spectrogram_samples = spectrogram.dim_size(1); @@ -105,8 +105,8 @@ class MfccOp : public OpKernel { private: float upper_frequency_limit_; float lower_frequency_limit_; - int32 filterbank_channel_count_; - int32 dct_coefficient_count_; + int32_t filterbank_channel_count_; + int32_t dct_coefficient_count_; }; REGISTER_KERNEL_BUILDER(Name("Mfcc").Device(DEVICE_CPU), MfccOp); diff --git a/tensorflow/core/kernels/mkl/BUILD b/tensorflow/core/kernels/mkl/BUILD index 99786dee930818..702fae1c2b37ea 100644 --- a/tensorflow/core/kernels/mkl/BUILD +++ b/tensorflow/core/kernels/mkl/BUILD @@ -427,7 +427,6 @@ tf_cc_test_mkl( size = "small", srcs = ["mkl_fused_batch_norm_op_test.cc"], linkstatic = 1, - tags = ["cuda-only"], # fails on AMD Rome CPUs as of 2021-03-29 deps = [ ":mkl_conv_op", ":mkl_fused_batch_norm_op", diff --git a/tensorflow/core/kernels/mlir_generated/BUILD b/tensorflow/core/kernels/mlir_generated/BUILD index b2f1e1d16579bf..7105823f79f543 100644 --- a/tensorflow/core/kernels/mlir_generated/BUILD +++ b/tensorflow/core/kernels/mlir_generated/BUILD @@ -557,7 +557,6 @@ tf_cuda_cc_test( tags = tf_cuda_tests_tags() + [ "no_cuda", # TODO(b/196608406): re-enable "no_cuda_asan", # TODO(b/171341759): re-enable. - "cuda-only", ], deps = [ ":base_binary_ops_test", diff --git a/tensorflow/core/kernels/multinomial_op_gpu.cu.cc b/tensorflow/core/kernels/multinomial_op_gpu.cu.cc index 6136177effa4f3..9a76c85aba09c7 100644 --- a/tensorflow/core/kernels/multinomial_op_gpu.cu.cc +++ b/tensorflow/core/kernels/multinomial_op_gpu.cu.cc @@ -39,8 +39,8 @@ using GPUDevice = Eigen::GpuDevice; // Kernel for Multinomial op. Data is interpreted to have the following shapes: // scores: [B, S, C]; maxima: [B, S]; output: [B, S]. template -__global__ void MultinomialKernel(int32 nthreads, const int32 num_classes, - const int32 num_samples, +__global__ void MultinomialKernel(int32_t nthreads, const int32_t num_classes, + const int32_t num_samples, const float* __restrict__ scores, const float* __restrict__ maxima, OutputType* __restrict__ output) { @@ -113,7 +113,7 @@ struct MultinomialFunctor { // Necessary for atomicMax() inside the kernel. output.device(d) = output.constant(0LL); - const int32 work_items = batch_size * num_samples * num_classes; + const int32_t work_items = batch_size * num_samples * num_classes; GpuLaunchConfig config = GetGpuLaunchConfig(work_items, d); TF_CHECK_OK(GpuLaunchKernel( MultinomialKernel, config.block_count, diff --git a/tensorflow/core/kernels/multinomial_op_test.cc b/tensorflow/core/kernels/multinomial_op_test.cc index df2d0af01c7bea..e7ce0bddbd6119 100644 --- a/tensorflow/core/kernels/multinomial_op_test.cc +++ b/tensorflow/core/kernels/multinomial_op_test.cc @@ -29,7 +29,7 @@ static Graph* Multinomial(int batch_size, int num_classes, int num_samples) { Tensor logits_t(DT_FLOAT, TensorShape({batch_size, num_classes})); Tensor num_samples_t(DT_INT32, TensorShape()); logits_t.flat().setRandom(); - num_samples_t.scalar().setConstant(num_samples); + num_samples_t.scalar().setConstant(num_samples); Node* ret; TF_CHECK_OK(NodeBuilder(g->NewName("multinomial"), "Multinomial") diff --git a/tensorflow/core/kernels/mutex_ops.cc b/tensorflow/core/kernels/mutex_ops.cc index 8fa7170c3c0c59..61a745df498cdd 100644 --- a/tensorflow/core/kernels/mutex_ops.cc +++ b/tensorflow/core/kernels/mutex_ops.cc @@ -36,7 +36,7 @@ namespace { class Mutex : public ResourceBase { public: - explicit Mutex(OpKernelContext* c, const string& name) + explicit Mutex(OpKernelContext* c, const std::string& name) : locked_(false), thread_pool_(new thread::ThreadPool( c->env(), ThreadOptions(), @@ -46,7 +46,9 @@ class Mutex : public ResourceBase { VLOG(2) << "Creating mutex with name " << name << ": " << this; } - string DebugString() const override { return absl::StrCat("Mutex ", name_); } + std::string DebugString() const override { + return absl::StrCat("Mutex ", name_); + } class LockReleaser { public: @@ -127,7 +129,7 @@ class Mutex : public ResourceBase { condition_variable cv_ TF_GUARDED_BY(mu_); bool locked_ TF_GUARDED_BY(mu_); std::unique_ptr thread_pool_; - string name_; + std::string name_; }; } // namespace diff --git a/tensorflow/core/kernels/nccl_ops.cc b/tensorflow/core/kernels/nccl_ops.cc index 00242596140499..77eb070e628576 100644 --- a/tensorflow/core/kernels/nccl_ops.cc +++ b/tensorflow/core/kernels/nccl_ops.cc @@ -52,7 +52,7 @@ class NcclAsyncOpBase : public AsyncOpKernel { OP_REQUIRES_OK(c, c->GetAttr("shared_name", &collective_prefix_)); } - string GetCollectiveKey(OpKernelContext* c) { + std::string GetCollectiveKey(OpKernelContext* c) { return strings::StrCat(collective_prefix_, ";", c->step_id(), ";", c->frame_iter().frame_id, ":", c->frame_iter().iter_id); @@ -62,7 +62,7 @@ class NcclAsyncOpBase : public AsyncOpKernel { private: int num_devices_; - string collective_prefix_; + std::string collective_prefix_; NcclAsyncOpBase(const NcclAsyncOpBase&) = delete; void operator=(const NcclAsyncOpBase&) = delete; @@ -71,7 +71,7 @@ class NcclAsyncOpBase : public AsyncOpKernel { class NcclReduceOpBase : public NcclAsyncOpBase { public: explicit NcclReduceOpBase(OpKernelConstruction* c) : NcclAsyncOpBase(c) { - string reduction; + std::string reduction; OP_REQUIRES_OK(c, c->GetAttr("reduction", &reduction)); if (reduction == "min") { reduction_op_ = ncclMin; @@ -106,7 +106,7 @@ class NcclAllReduceOpKernel : public NcclReduceOpBase { OP_REQUIRES_OK_ASYNC( c, c->forward_input_or_allocate_output({0}, 0, input->shape(), &output), done); - auto actual_done = [c, done](Status s) { + auto actual_done = [c, done](absl::Status s) { OP_REQUIRES_OK_ASYNC(c, s, done); done(); }; @@ -137,7 +137,7 @@ class NcclReduceSendKernel : public NcclReduceOpBase { : NcclReduceOpBase(c) {} void ComputeAsync(OpKernelContext* c, DoneCallback done) override { - auto actual_done = [c, done](Status s) { + auto actual_done = [c, done](absl::Status s) { OP_REQUIRES_OK_ASYNC(c, s, done); done(); }; @@ -173,7 +173,7 @@ class NcclReduceRecvKernel : public NcclReduceOpBase { OP_REQUIRES_OK_ASYNC(c, c->allocate_output(0, input->shape(), &output), done); - auto actual_done = [c, done](Status s) { + auto actual_done = [c, done](absl::Status s) { OP_REQUIRES_OK_ASYNC(c, s, done); done(); }; @@ -207,7 +207,7 @@ class NcclBroadcastSendKernel : public NcclAsyncOpBase { : NcclAsyncOpBase(c) {} void ComputeAsync(OpKernelContext* c, DoneCallback done) override { - auto actual_done = [c, done](Status s) { + auto actual_done = [c, done](absl::Status s) { OP_REQUIRES_OK_ASYNC(c, s, done); done(); }; @@ -239,11 +239,11 @@ class NcclBroadcastRecvKernel : public NcclAsyncOpBase { const Tensor& shape_t = c->input(0); TensorShape shape; OP_REQUIRES_OK_ASYNC( - c, TensorShapeUtils::MakeShape(shape_t.vec(), &shape), done); + c, TensorShapeUtils::MakeShape(shape_t.vec(), &shape), done); Tensor* output; OP_REQUIRES_OK_ASYNC(c, c->allocate_output(0, shape, &output), done); - auto actual_done = [c, done](Status s) { + auto actual_done = [c, done](absl::Status s) { OP_REQUIRES_OK_ASYNC(c, s, done); done(); }; diff --git a/tensorflow/core/kernels/nn_ops_test.cc b/tensorflow/core/kernels/nn_ops_test.cc index 87b3f4d98d344f..dfaad0122c6e57 100644 --- a/tensorflow/core/kernels/nn_ops_test.cc +++ b/tensorflow/core/kernels/nn_ops_test.cc @@ -57,8 +57,9 @@ limitations under the License. namespace tensorflow { -static void SetConstOp(const string& name, std::initializer_list dims, - DataType data_type, NodeDef* node) { +static void SetConstOp(const std::string& name, + std::initializer_list dims, DataType data_type, + NodeDef* node) { Tensor tensor(data_type, TensorShape(dims)); for (int64_t i = 0; i < tensor.NumElements(); ++i) { switch (data_type) { @@ -81,13 +82,13 @@ static void SetConstOp(const string& name, std::initializer_list dims, .Finalize(node)); } -static void SetConstSizesOp(const string& name, const std::vector& sizes, - NodeDef* node) { +static void SetConstSizesOp(const std::string& name, + const std::vector& sizes, NodeDef* node) { TensorShape shape; shape.AddDim(sizes.size()); Tensor tensor(DT_INT32, shape); for (int64_t i = 0; i < tensor.NumElements(); ++i) { - tensor.flat()(i) = sizes[i]; + tensor.flat()(i) = sizes[i]; } TF_CHECK_OK(NodeDefBuilder(name, "Const") .Attr("dtype", DT_INT32) @@ -112,7 +113,7 @@ static void BM_ConvFloat(::testing::benchmark::State& state, int batch, int filter_rows, int filter_cols, CONV_OP op, int num_threads, int stride, Padding padding, bool use_gpu, DataType data_type, - const string& label) { + const std::string& label) { if (!IsGoogleCudaEnabled() && use_gpu) { state.SkipWithError( absl::StrCat("Skipping GPU test (no --config=cuda): ", label)); @@ -159,19 +160,19 @@ static void BM_ConvFloat(::testing::benchmark::State& state, int batch, SetConstOp("output_backprop", {batch, out_rows, out_cols, out_depth}, data_type, graph.add_node()); SetConstSizesOp("input_sizes", - std::vector({batch, rows, cols, in_depth}), + std::vector({batch, rows, cols, in_depth}), graph.add_node()); SetConstSizesOp( "filter_sizes", - std::vector({filter_rows, filter_cols, in_depth, out_depth}), + std::vector({filter_rows, filter_cols, in_depth, out_depth}), graph.add_node()); - SetConstSizesOp("resize_size", std::vector({rows, cols}), + SetConstSizesOp("resize_size", std::vector({rows, cols}), graph.add_node()); TensorShape paddings_shape({4, 2}); Tensor paddings_tensor(DT_INT32, paddings_shape); for (int64_t i = 0; i < paddings_tensor.NumElements(); ++i) { - paddings_tensor.flat()(i) = 0; + paddings_tensor.flat()(i) = 0; } TF_CHECK_OK(NodeDefBuilder("paddings", "Const") .Attr("dtype", DT_INT32) @@ -234,7 +235,7 @@ static void BM_ConvFloat(::testing::benchmark::State& state, int batch, GraphConstructorOptions opts; TF_CHECK_OK(ConvertGraphDefToGraph(opts, graph, g)); - string device = use_gpu ? "gpu" : "cpu"; + std::string device = use_gpu ? "gpu" : "cpu"; test::Benchmark(device, g, &options, nullptr, nullptr, "", /*old_benchmark_api*/ false) .Run(state); @@ -540,7 +541,7 @@ static void BM_ConvFloatDepthwise(::testing::benchmark::State& state, int batch, int filter_rows, int filter_cols, DEPTHWISE_CONV_OP op, int num_threads, int stride, Padding padding, bool use_gpu, - const string& label) { + const std::string& label) { if (!IsGoogleCudaEnabled() && use_gpu) { state.SkipWithError( absl::StrCat("Skipping GPU test (no --config=cuda): ", label)); @@ -594,10 +595,10 @@ static void BM_ConvFloatDepthwise(::testing::benchmark::State& state, int batch, SetConstOp("output_backprop", {batch, out_rows, out_cols, out_depth}, dtype, graph.add_node()); SetConstSizesOp("input_sizes", - std::vector({batch, rows, cols, in_depth}), + std::vector({batch, rows, cols, in_depth}), graph.add_node()); SetConstSizesOp("filter_sizes", - std::vector( + std::vector( {filter_rows, filter_cols, in_depth, depth_multiplier}), graph.add_node()); @@ -637,7 +638,7 @@ static void BM_ConvFloatDepthwise(::testing::benchmark::State& state, int batch, GraphConstructorOptions opts; TF_CHECK_OK(ConvertGraphDefToGraph(opts, graph, g)); - string device = use_gpu ? "gpu" : "cpu"; + std::string device = use_gpu ? "gpu" : "cpu"; test::Benchmark(device, g, &options, nullptr, nullptr, "", /*old_benchmark_api=*/false) .Run(state); @@ -788,7 +789,7 @@ BM_ConvFloatDepthwiseBk_All(bfloat16); static void BM_LRNFloat(::testing::benchmark::State& state, int depth, int cols, int rows, int batch_size, int range, int num_threads, - const string& label) { + const std::string& label) { std::unique_ptr device( DeviceFactory::NewDevice("CPU", {}, "/job:a/replica:0/task:0")); @@ -869,7 +870,7 @@ AvgPooling Op static void BM_AvgPool(::testing::benchmark::State& state, int batch_size, int rows, int cols, int depth, int kernel_rows, int kernel_cols, int stride, Padding padding, - int num_threads, const string& label) { + int num_threads, const std::string& label) { std::unique_ptr device( DeviceFactory::NewDevice("CPU", {}, "/job:a/replica:0/task:0")); @@ -960,7 +961,7 @@ BM_AvgPoolFwdCPU(32, 14, 14, 576, 3, 3, 2, SAME, 4, "avgpool10_SAME"); static void BM_AvgPoolBk(::testing::benchmark::State& state, int batch_size, int rows, int cols, int depth, int kernel_rows, int kernel_cols, int stride, Padding padding, - int num_threads, const string& label) { + int num_threads, const std::string& label) { std::unique_ptr device( DeviceFactory::NewDevice("CPU", {}, "/job:a/replica:0/task:0")); @@ -979,9 +980,9 @@ static void BM_AvgPoolBk(::testing::benchmark::State& state, int batch_size, TensorShape output_shape({batch_size, out_height, out_width, depth}); TensorShape shape2({4}); Tensor input_shape_tensor(DT_INT32, shape2); - int32 input_dims[] = {batch_size, rows, cols, depth}; + int32_t input_dims[] = {batch_size, rows, cols, depth}; for (int i = 0; i < 4; i++) { - input_shape_tensor.flat()(i) = input_dims[i]; + input_shape_tensor.flat()(i) = input_dims[i]; } inputs.push_back({nullptr, &input_shape_tensor}); @@ -1063,7 +1064,7 @@ MaxPooling Op static void BM_MaxPool(::testing::benchmark::State& state, int batch_size, int rows, int cols, int depth, int kernel_rows, int kernel_cols, int stride, Padding padding, - int num_threads, const string& label) { + int num_threads, const std::string& label) { SessionOptions options; options.config.set_intra_op_parallelism_threads(num_threads); @@ -1158,7 +1159,8 @@ BM_MaxPoolFwdCPU(32, 14, 14, 576, 3, 3, 2, SAME, 4, "maxpool10_SAME"); static void BM_MaxPoolBk(::testing::benchmark::State& state, int batch_size, int rows, int cols, int depth, int kernel_rows, int kernel_cols, int stride, Padding padding, - int num_threads, bool use_gpu, const string& label) { + int num_threads, bool use_gpu, + const std::string& label) { if (!IsGoogleCudaEnabled() && use_gpu) { state.SkipWithError( absl::StrCat("Skipping GPU test (no --config=cuda): ", label)); @@ -1192,7 +1194,7 @@ static void BM_MaxPoolBk(::testing::benchmark::State& state, int batch_size, TF_CHECK_OK(root.status()); Graph* g = new Graph(OpRegistry::Global()); TF_CHECK_OK(root.ToGraph(g)); - string device = use_gpu ? "gpu" : "cpu"; + std::string device = use_gpu ? "gpu" : "cpu"; test::Benchmark(device, g, /*old_benchmark_api*/ false).Run(state); state.SetItemsProcessed(batch_size * rows * cols * depth * @@ -1252,7 +1254,7 @@ Run benchmark with: */ static void BM_ReluFloat(::testing::benchmark::State& state, int batch_size, int rows, int cols, int depth, int num_threads, - const string& label) { + const std::string& label) { std::unique_ptr device( DeviceFactory::NewDevice("CPU", {}, "/job:a/replica:0/task:0")); @@ -1323,7 +1325,7 @@ Run benchmark with: */ static void BM_SoftplusFloat(::testing::benchmark::State& state, int batch_size, int rows, int cols, int depth, int num_threads, - const string& label) { + const std::string& label) { std::unique_ptr device( DeviceFactory::NewDevice("CPU", {}, "/job:a/replica:0/task:0")); @@ -1392,7 +1394,7 @@ BM_Softplus(32, 14, 14, 576, 4, "softplus10"); static void BM_ImageNetSoftmaxFwd(::testing::benchmark::State& state, int batch_size, int node_depth, int num_threads, bool use_gpu, - const string& label) { + const std::string& label) { if (!IsGoogleCudaEnabled() && use_gpu) { state.SkipWithError( absl::StrCat("Skipping GPU test (no --config=cuda): ", label)); @@ -1409,7 +1411,7 @@ static void BM_ImageNetSoftmaxFwd(::testing::benchmark::State& state, TF_CHECK_OK(root.status()); Graph* g = new Graph(OpRegistry::Global()); TF_CHECK_OK(root.ToGraph(g)); - string device = use_gpu ? "gpu" : "cpu"; + std::string device = use_gpu ? "gpu" : "cpu"; SessionOptions opts; opts.config.set_inter_op_parallelism_threads(1); opts.config.set_intra_op_parallelism_threads(num_threads); @@ -1444,7 +1446,8 @@ BM_ImageNetSoftmaxFwd(8192, 1024, 1, true, "softmax32"); BM_ImageNetSoftmaxFwd(8192, 32768, 1, true, "softmax128"); static void BM_TopK(::testing::benchmark::State& state, int rows, int cols, - int k, int num_threads, bool use_gpu, const string& label) { + int k, int num_threads, bool use_gpu, + const std::string& label) { if (!IsGoogleCudaEnabled() && use_gpu) { state.SkipWithError( absl::StrCat("Skipping GPU test (no --config=cuda): ", label)); @@ -1458,14 +1461,14 @@ static void BM_TopK(::testing::benchmark::State& state, int rows, int cols, input.flat().setRandom(); Tensor input_k(DT_INT32, TensorShape({})); - input_k.scalar()() = k; + input_k.scalar()() = k; auto top_k = ops::TopK(root, input, input_k, ops::TopK::Sorted(true)); TF_CHECK_OK(root.status()); Graph* g = new Graph(OpRegistry::Global()); TF_CHECK_OK(root.ToGraph(g)); - string device = use_gpu ? "gpu" : "cpu"; + std::string device = use_gpu ? "gpu" : "cpu"; SessionOptions opts; opts.config.set_inter_op_parallelism_threads(1); opts.config.set_intra_op_parallelism_threads(num_threads); diff --git a/tensorflow/core/kernels/nth_element_op.cc b/tensorflow/core/kernels/nth_element_op.cc index 104a4c9421d188..12db3b63d8cdad 100644 --- a/tensorflow/core/kernels/nth_element_op.cc +++ b/tensorflow/core/kernels/nth_element_op.cc @@ -43,7 +43,7 @@ class NthElementOp : public OpKernel { OP_REQUIRES( context, TensorShapeUtils::IsScalar(n_in.shape()), errors::InvalidArgument("N must be scalar but has rank ", n_in.dims())); - int n = n_in.scalar()(); + int n = n_in.scalar()(); OP_REQUIRES(context, n >= 0, errors::InvalidArgument("n must be non-negative but is ", n)); diff --git a/tensorflow/core/kernels/one_hot_op.cc b/tensorflow/core/kernels/one_hot_op.cc index 1a7ef6a9a46d0f..4a205ac3503f2e 100644 --- a/tensorflow/core/kernels/one_hot_op.cc +++ b/tensorflow/core/kernels/one_hot_op.cc @@ -77,7 +77,7 @@ class OneHotOp : public OpKernel { const int axis = (axis_ == -1) ? indices_dims : axis_; // The one-hot dimension. - const int32_t depth_v = depth.scalar()(); + const int32_t depth_v = depth.scalar()(); OP_REQUIRES( ctx, depth_v >= 0, errors::InvalidArgument("depth must be non-negative, got: ", depth_v)); @@ -122,7 +122,7 @@ class OneHotOp : public OpKernel { } private: - int32 axis_; + int32_t axis_; OneHotOp(const OneHotOp&) = delete; void operator=(const OneHotOp&) = delete; diff --git a/tensorflow/core/kernels/one_hot_op_test.cc b/tensorflow/core/kernels/one_hot_op_test.cc index 6801b29e2509f7..09cb9b8d9388ea 100644 --- a/tensorflow/core/kernels/one_hot_op_test.cc +++ b/tensorflow/core/kernels/one_hot_op_test.cc @@ -30,13 +30,13 @@ static Graph* OneHot(int batch_size, int num_classes, int axis) { std::mt19937 gen(rd()); std::uniform_int_distribution<> dist(0, num_classes - 1); - auto indices_t = indices.flat(); + auto indices_t = indices.flat(); for (int i = 0; i < batch_size; ++i) { indices_t(i) = dist(gen); } Tensor depth(DT_INT32, TensorShape({})); - depth.scalar()() = num_classes; + depth.scalar()() = num_classes; Tensor on_value(DT_FLOAT, TensorShape({})); on_value.scalar()() = 1.0f; diff --git a/tensorflow/core/kernels/ops_testutil.cc b/tensorflow/core/kernels/ops_testutil.cc index 4efbac731bcaf2..ec0c6a1adcadf5 100644 --- a/tensorflow/core/kernels/ops_testutil.cc +++ b/tensorflow/core/kernels/ops_testutil.cc @@ -176,7 +176,7 @@ void OpsTestBase::CreateContext() { params_->frame_iter = FrameAndIter(0, 0); params_->inputs = inputs_; params_->op_kernel = kernel_.get(); - step_container_.reset(new ScopedStepContainer(0, [](const string&) {})); + step_container_.reset(new ScopedStepContainer(0, [](const std::string&) {})); params_->step_container = step_container_.get(); test::SetOutputAttrs(params_.get(), &out_alloc_attrs_); params_->slice_reader_cache = &slice_reader_cache_wrapper_; diff --git a/tensorflow/core/kernels/ops_testutil.h b/tensorflow/core/kernels/ops_testutil.h index ef4a7cd5142cde..da2ccad9cbba72 100644 --- a/tensorflow/core/kernels/ops_testutil.h +++ b/tensorflow/core/kernels/ops_testutil.h @@ -119,7 +119,7 @@ class OpsTestBase : public ::testing::Test { // Adds a Resource type as input. If is empty, uses the default // container name. template - void AddResourceInput(const string& container, const string& name, + void AddResourceInput(const std::string& container, const std::string& name, T* resource) { CHECK_GT(input_types_.size(), inputs_.size()) << "Adding more inputs than types; perhaps you need to call MakeOp"; diff --git a/tensorflow/core/kernels/pack_op.cc b/tensorflow/core/kernels/pack_op.cc index 0e60b84dc9ff25..f4c1db06bad961 100644 --- a/tensorflow/core/kernels/pack_op.cc +++ b/tensorflow/core/kernels/pack_op.cc @@ -168,8 +168,8 @@ REGISTER_KERNEL_BUILDER(Name("Pack") .Device(DEVICE_GPU) .HostMemory("values") .HostMemory("output") - .TypeConstraint("T"), - PackOp); + .TypeConstraint("T"), + PackOp); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/tensorflow/core/kernels/pad_op.cc b/tensorflow/core/kernels/pad_op.cc index c650648147adf8..890a9954faa4a7 100644 --- a/tensorflow/core/kernels/pad_op.cc +++ b/tensorflow/core/kernels/pad_op.cc @@ -400,38 +400,38 @@ TF_CALL_uint8(REGISTER_GPU_KERNEL); // registration requires all int32 inputs and outputs to be in host memory. REGISTER_KERNEL_BUILDER(Name("Pad") .Device(DEVICE_GPU) - .TypeConstraint("T") - .TypeConstraint("Tpaddings") + .TypeConstraint("T") + .TypeConstraint("Tpaddings") .HostMemory("input") .HostMemory("paddings") .HostMemory("output"), - PadOp); + PadOp); REGISTER_KERNEL_BUILDER(Name("Pad") .Device(DEVICE_GPU) - .TypeConstraint("T") + .TypeConstraint("T") .TypeConstraint("Tpaddings") .HostMemory("input") .HostMemory("paddings") .HostMemory("output"), - PadOp); + PadOp); REGISTER_KERNEL_BUILDER(Name("PadV2") .Device(DEVICE_GPU) - .TypeConstraint("T") - .TypeConstraint("Tpaddings") + .TypeConstraint("T") + .TypeConstraint("Tpaddings") .HostMemory("input") .HostMemory("paddings") .HostMemory("constant_values") .HostMemory("output"), - PadOp); + PadOp); REGISTER_KERNEL_BUILDER(Name("PadV2") .Device(DEVICE_GPU) - .TypeConstraint("T") + .TypeConstraint("T") .TypeConstraint("Tpaddings") .HostMemory("input") .HostMemory("paddings") .HostMemory("constant_values") .HostMemory("output"), - PadOp); + PadOp); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM } // end namespace tensorflow diff --git a/tensorflow/core/kernels/padding_fifo_queue.cc b/tensorflow/core/kernels/padding_fifo_queue.cc index 3b50099fb9997c..bd9a07006a8870 100644 --- a/tensorflow/core/kernels/padding_fifo_queue.cc +++ b/tensorflow/core/kernels/padding_fifo_queue.cc @@ -36,7 +36,8 @@ namespace tensorflow { PaddingFIFOQueue::PaddingFIFOQueue( int capacity, const DataTypeVector& component_dtypes, - const std::vector& component_shapes, const string& name) + const std::vector& component_shapes, + const std::string& name) : FIFOQueue(capacity, component_dtypes, ConvertShapesPartialDimensionsToZero(component_shapes), name), partial_shapes_(component_shapes) {} diff --git a/tensorflow/core/kernels/padding_fifo_queue.h b/tensorflow/core/kernels/padding_fifo_queue.h index 74107e80b1977b..f05862ff9b3bdd 100644 --- a/tensorflow/core/kernels/padding_fifo_queue.h +++ b/tensorflow/core/kernels/padding_fifo_queue.h @@ -36,7 +36,7 @@ class PaddingFIFOQueue : public FIFOQueue { public: PaddingFIFOQueue(int32_t capacity, const DataTypeVector& component_dtypes, const std::vector& component_shapes, - const string& name); + const std::string& name); absl::Status Initialize() override; diff --git a/tensorflow/core/kernels/parameterized_truncated_normal_op.cc b/tensorflow/core/kernels/parameterized_truncated_normal_op.cc index 782c22c5efd43c..66ec30bc4a2136 100644 --- a/tensorflow/core/kernels/parameterized_truncated_normal_op.cc +++ b/tensorflow/core/kernels/parameterized_truncated_normal_op.cc @@ -777,8 +777,8 @@ class StatelessParameterizedTruncatedNormal : public OpKernel { shape_tensor.shape().DebugString())); TensorShape output_shape; if (shape_tensor.dtype() == DataType::DT_INT32) { - OP_REQUIRES_OK(ctx, TensorShapeUtils::MakeShape(shape_tensor.vec(), - &output_shape)); + OP_REQUIRES_OK(ctx, TensorShapeUtils::MakeShape( + shape_tensor.vec(), &output_shape)); } else { OP_REQUIRES_OK(ctx, TensorShapeUtils::MakeShape( shape_tensor.vec(), &output_shape)); diff --git a/tensorflow/core/kernels/parameterized_truncated_normal_op_gpu.cu.cc b/tensorflow/core/kernels/parameterized_truncated_normal_op_gpu.cu.cc index e7b76653dc329e..0fbb33816c8b14 100644 --- a/tensorflow/core/kernels/parameterized_truncated_normal_op_gpu.cu.cc +++ b/tensorflow/core/kernels/parameterized_truncated_normal_op_gpu.cu.cc @@ -51,16 +51,16 @@ typedef Eigen::GpuDevice GPUDevice; template __global__ void __launch_bounds__(1024) - TruncatedNormalKernel(random::PhiloxRandom gen, T* data, int64 num_batches, - int64 samples_per_batch, int64 num_elements, - const T* __restrict__ means, bool single_mean, - const T* __restrict__ stddevs, bool single_stddev, - const T* __restrict__ minvals, bool single_minval, - const T* __restrict__ maxvals, bool single_maxval, - int64 kMaxIterations) { - const int32 max_samples_per_item = 2 * kMaxIterations; + TruncatedNormalKernel(random::PhiloxRandom gen, T* data, + int64_t num_batches, int64_t samples_per_batch, + int64_t num_elements, const T* __restrict__ means, + bool single_mean, const T* __restrict__ stddevs, + bool single_stddev, const T* __restrict__ minvals, + bool single_minval, const T* __restrict__ maxvals, + bool single_maxval, int64_t kMaxIterations) { + const int32_t max_samples_per_item = 2 * kMaxIterations; // Initial offset as given by GPU_1D_KERNEL_LOOP. - const int32 initial_offset = blockIdx.x * blockDim.x + threadIdx.x; + const int32_t initial_offset = blockIdx.x * blockDim.x + threadIdx.x; gen.Skip(max_samples_per_item * initial_offset); typedef random::UniformDistribution Uniform; typedef random::NormalDistribution Normal; @@ -82,15 +82,15 @@ __global__ void __launch_bounds__(1024) // skips max_samples_per_item in the generator. Then after generating this // item, we need to skip the samples for one element for every thread to get // to the next element that we actually process. - const int32 samples_between_processed_elements = + const int32_t samples_between_processed_elements = max_samples_per_item * (gridDim.x * blockDim.x); GPU_1D_KERNEL_LOOP(offset, num_elements) { // Track how many more samples we need to skip before we process the next // element. - int32 remaining_samples = samples_between_processed_elements; + int32_t remaining_samples = samples_between_processed_elements; - const int64 batch_id = offset / samples_per_batch; + const int64_t batch_id = offset / samples_per_batch; T mean = means[single_mean ? 0 : batch_id]; const T input_stddev = stddevs[single_stddev ? 0 : batch_id]; T minval = minvals[single_minval ? 0 : batch_id]; @@ -231,8 +231,8 @@ __global__ void __launch_bounds__(1024) // Partial specialization for GPU template struct TruncatedNormalFunctor { - void operator()(OpKernelContext* ctx, const GPUDevice& d, int64 num_batches, - int64 samples_per_batch, int64 num_elements, + void operator()(OpKernelContext* ctx, const GPUDevice& d, int64_t num_batches, + int64_t samples_per_batch, int64_t num_elements, typename TTypes::ConstFlat means, typename TTypes::ConstFlat stddevs, typename TTypes::ConstFlat minvals, diff --git a/tensorflow/core/kernels/parameterized_truncated_normal_op_test.cc b/tensorflow/core/kernels/parameterized_truncated_normal_op_test.cc index 1d5865587e0c13..1257b8da742ce2 100644 --- a/tensorflow/core/kernels/parameterized_truncated_normal_op_test.cc +++ b/tensorflow/core/kernels/parameterized_truncated_normal_op_test.cc @@ -27,7 +27,7 @@ namespace tensorflow { static Graph* PTruncatedNormal(int num_batches, int samples_per_batch) { Graph* g = new Graph(OpRegistry::Global()); Tensor shape_t(DT_INT32, TensorShape({2})); - shape_t.flat().setValues({num_batches, samples_per_batch}); + shape_t.flat().setValues({num_batches, samples_per_batch}); // Use mean 0 and stdev 1 Tensor means_t(DT_FLOAT, TensorShape({num_batches})); @@ -56,7 +56,7 @@ static Graph* PTruncatedNormal(int num_batches, int samples_per_batch) { static Graph* PTruncatedNormal2SD(int num_batches, int samples_per_batch) { Graph* g = new Graph(OpRegistry::Global()); Tensor shape_t(DT_INT32, TensorShape({2})); - shape_t.flat().setValues({num_batches, samples_per_batch}); + shape_t.flat().setValues({num_batches, samples_per_batch}); Tensor means_t(DT_FLOAT, TensorShape({num_batches})); means_t.flat().setConstant(0.0); @@ -83,7 +83,7 @@ static Graph* PTruncatedNormal2SD(int num_batches, int samples_per_batch) { static Graph* PTruncatedNormalOneTail(int num_batches, int samples_per_batch) { Graph* g = new Graph(OpRegistry::Global()); Tensor shape_t(DT_INT32, TensorShape({2})); - shape_t.flat().setValues({num_batches, samples_per_batch}); + shape_t.flat().setValues({num_batches, samples_per_batch}); Tensor means_t(DT_FLOAT, TensorShape({num_batches})); means_t.flat().setConstant(0.0); diff --git a/tensorflow/core/kernels/parse_tensor_test.cc b/tensorflow/core/kernels/parse_tensor_test.cc index 1473eff064e3ea..d5a40489b64fd3 100644 --- a/tensorflow/core/kernels/parse_tensor_test.cc +++ b/tensorflow/core/kernels/parse_tensor_test.cc @@ -106,8 +106,9 @@ TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_double) { } TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_int64) { - MakeOp(TensorShape({2, 3, 4}), - [](int x) -> int64 { return static_cast(x - 10); }); + MakeOp(TensorShape({2, 3, 4}), [](int x) -> int64_t { + return static_cast(x - 10); + }); TF_ASSERT_OK(RunOpKernel()); Tensor parse_output; ParseSerializedOutput(GetOutput(0), &parse_output); @@ -115,48 +116,50 @@ TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_int64) { } TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_int32) { - MakeOp(TensorShape({4, 2}), - [](int x) -> int32 { return static_cast(x + 7); }); + MakeOp(TensorShape({4, 2}), + [](int x) -> int32_t { return static_cast(x + 7); }); TF_ASSERT_OK(RunOpKernel()); Tensor parse_output; - ParseSerializedOutput(GetOutput(0), &parse_output); - test::ExpectTensorEqual(parse_output, GetInput(0)); + ParseSerializedOutput(GetOutput(0), &parse_output); + test::ExpectTensorEqual(parse_output, GetInput(0)); } TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_int16) { - MakeOp(TensorShape({8}), - [](int x) -> int16 { return static_cast(x + 18); }); + MakeOp(TensorShape({8}), [](int x) -> int16_t { + return static_cast(x + 18); + }); TF_ASSERT_OK(RunOpKernel()); Tensor parse_output; - ParseSerializedOutput(GetOutput(0), &parse_output); - test::ExpectTensorEqual(parse_output, GetInput(0)); + ParseSerializedOutput(GetOutput(0), &parse_output); + test::ExpectTensorEqual(parse_output, GetInput(0)); } TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_int8) { - MakeOp(TensorShape({2}), - [](int x) -> int8 { return static_cast(x + 8); }); + MakeOp(TensorShape({2}), + [](int x) -> int8_t { return static_cast(x + 8); }); TF_ASSERT_OK(RunOpKernel()); Tensor parse_output; - ParseSerializedOutput(GetOutput(0), &parse_output); - test::ExpectTensorEqual(parse_output, GetInput(0)); + ParseSerializedOutput(GetOutput(0), &parse_output); + test::ExpectTensorEqual(parse_output, GetInput(0)); } TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_uint16) { - MakeOp(TensorShape({1, 3}), - [](int x) -> uint16 { return static_cast(x + 2); }); + MakeOp(TensorShape({1, 3}), [](int x) -> uint16_t { + return static_cast(x + 2); + }); TF_ASSERT_OK(RunOpKernel()); Tensor parse_output; - ParseSerializedOutput(GetOutput(0), &parse_output); - test::ExpectTensorEqual(parse_output, GetInput(0)); + ParseSerializedOutput(GetOutput(0), &parse_output); + test::ExpectTensorEqual(parse_output, GetInput(0)); } TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_uint8) { - MakeOp(TensorShape({2, 1, 1}), - [](int x) -> uint8 { return static_cast(x + 1); }); + MakeOp(TensorShape({2, 1, 1}), + [](int x) -> uint8_t { return static_cast(x + 1); }); TF_ASSERT_OK(RunOpKernel()); Tensor parse_output; - ParseSerializedOutput(GetOutput(0), &parse_output); - test::ExpectTensorEqual(parse_output, GetInput(0)); + ParseSerializedOutput(GetOutput(0), &parse_output); + test::ExpectTensorEqual(parse_output, GetInput(0)); } TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_complex64) { diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc index 97b08ce6fd2982..bbff2dc35654ad 100644 --- a/tensorflow/core/kernels/partitioned_function_ops.cc +++ b/tensorflow/core/kernels/partitioned_function_ops.cc @@ -43,9 +43,9 @@ PartitionedCallOp::PartitionedCallOp(OpKernelConstruction* ctx) shared_rendezvous_(false) { OP_REQUIRES_OK( ctx, ctx->GetAttr(FunctionLibraryDefinition::kFuncAttr, func_.get())); - string deprecated_config_serialized; + std::string deprecated_config_serialized; OP_REQUIRES_OK(ctx, ctx->GetAttr("config", &deprecated_config_serialized)); - string config_proto_serialized; + std::string config_proto_serialized; OP_REQUIRES_OK(ctx, ctx->GetAttr("config_proto", &config_proto_serialized)); OP_REQUIRES( ctx, @@ -232,7 +232,7 @@ void PartitionedCallOp::RunFunction(FunctionLibraryRuntime::Handle handle, FunctionLibraryRuntime::Options run_opts; ResourceMgr* resource_mgr = lib->device()->resource_manager(); ScopedStepContainer* step_container = new ScopedStepContainer( - run_opts.step_id, [resource_mgr](const string& name) { + run_opts.step_id, [resource_mgr](const std::string& name) { resource_mgr->Cleanup(name).IgnoreError(); }); run_opts.step_container = step_container; @@ -251,13 +251,13 @@ void PartitionedCallOp::RunFunction(FunctionLibraryRuntime::Handle handle, } std::vector* rets = new std::vector; - const string& func_name = func_->name(); + const std::string& func_name = func_->name(); tsl::profiler::TraceMe trace_me("PartitionedCallOp"); lib->Run(run_opts, handle, inputs, rets, [rets, done = std::move(done), ctx, func_name, step_container](const absl::Status& status) { if (!status.ok()) { - const string function_and_msg = + const std::string function_and_msg = absl::StrCat(errors::FormatFunctionForError(func_name), " ", status.message()); ctx->SetStatus( diff --git a/tensorflow/core/kernels/partitioned_function_ops.h b/tensorflow/core/kernels/partitioned_function_ops.h index 2b2ec8ea959f7c..f38ad56e8a9f73 100644 --- a/tensorflow/core/kernels/partitioned_function_ops.h +++ b/tensorflow/core/kernels/partitioned_function_ops.h @@ -57,7 +57,7 @@ class PartitionedCallOp : public AsyncOpKernel { // Using unique pointers to avoid including proto headers in kernel headers std::unique_ptr func_; std::unique_ptr config_proto_; - string executor_type_; + std::string executor_type_; bool shared_rendezvous_; mutex mu_; // Cache the handle per FLR because this kernel may be instantiated for diff --git a/tensorflow/core/kernels/pooling_ops_3d.cc b/tensorflow/core/kernels/pooling_ops_3d.cc index 28e24e79fe0bcf..a63a176032f953 100644 --- a/tensorflow/core/kernels/pooling_ops_3d.cc +++ b/tensorflow/core/kernels/pooling_ops_3d.cc @@ -46,8 +46,8 @@ typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; Pool3dParameters::Pool3dParameters(OpKernelContext* context, - const std::vector& ksize, - const std::vector& stride, + const std::vector& ksize, + const std::vector& stride, Padding padding, TensorFormat data_format, const TensorShape& tensor_in_shape) { // For maxpooling, tensor_in should have 4 dimensions. @@ -97,9 +97,9 @@ absl::Status Pool3dParameters::forward_output_shape(TensorShape* shape) { template struct LaunchPoolingOp { static void launch(OpKernelContext* context, const Tensor& tensor_in, - const std::array& window, - const std::array& stride, - const std::array& padding, + const std::array& window, + const std::array& stride, + const std::array& padding, TensorFormat data_format, Padding padding_type, Tensor* output) { output->tensor().device(context->eigen_device()) = @@ -112,9 +112,9 @@ struct LaunchPoolingOp { template struct LaunchPoolingOp { static void launch(OpKernelContext* context, const Tensor& tensor_in, - const std::array& window, - const std::array& stride, - const std::array& padding, + const std::array& window, + const std::array& stride, + const std::array& padding, TensorFormat data_format, Padding padding_type, Tensor* output) { output->tensor().device(context->eigen_device()) = @@ -128,7 +128,7 @@ template class Pooling3DOp : public UnaryOp { public: explicit Pooling3DOp(OpKernelConstruction* context) : UnaryOp(context) { - string data_format; + std::string data_format; OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); OP_REQUIRES(context, FormatFromString(data_format, &data_format_), errors::InvalidArgument("Invalid data format")); @@ -204,8 +204,8 @@ class Pooling3DOp : public UnaryOp { } private: - std::vector ksize_; - std::vector stride_; + std::vector ksize_; + std::vector stride_; Padding padding_; TensorFormat data_format_; }; @@ -214,10 +214,10 @@ template struct LaunchMaxPooling3dGradOp { static void launch(OpKernelContext* context, const Tensor& tensor_in, const Tensor& tensor_out, const Tensor& out_backprop, - const std::array& window, - const std::array& stride, - const std::array& out, - const std::array& padding, + const std::array& window, + const std::array& stride, + const std::array& out, + const std::array& padding, TensorFormat data_format, Tensor* output) { output->flat().setZero(); for (int64_t p = 0; p < out_backprop.dim_size(3); ++p) { @@ -307,7 +307,7 @@ class MaxPooling3dGradOp : public OpKernel { public: explicit MaxPooling3dGradOp(OpKernelConstruction* context) : OpKernel(context) { - string data_format; + std::string data_format; OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); OP_REQUIRES(context, FormatFromString(data_format, &data_format_), errors::InvalidArgument("Invalid data format")); @@ -391,8 +391,8 @@ class MaxPooling3dGradOp : public OpKernel { } private: - std::vector ksize_; - std::vector stride_; + std::vector ksize_; + std::vector stride_; Padding padding_; TensorFormat data_format_; }; @@ -402,10 +402,10 @@ struct LaunchAvgPooling3dGradOp { static void launch(OpKernelContext* context, const TensorShape& tensor_in_shape, const Tensor& out_backprop, - const std::array& window, - const std::array& stride, - const std::array& output_shape, - const std::array& padding, + const std::array& window, + const std::array& stride, + const std::array& output_shape, + const std::array& padding, TensorFormat data_format, Tensor* output) { OP_REQUIRES( context, tensor_in_shape.dim_size(0) == out_backprop.dim_size(0), @@ -487,7 +487,7 @@ class AvgPooling3dGradOp : public OpKernel { public: explicit AvgPooling3dGradOp(OpKernelConstruction* context) : OpKernel(context) { - string data_format; + std::string data_format; OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); OP_REQUIRES(context, FormatFromString(data_format, &data_format_), errors::InvalidArgument("Invalid data format")); @@ -536,7 +536,7 @@ class AvgPooling3dGradOp : public OpKernel { errors::InvalidArgument("out_backprop must be 5-dimensional")); TensorShape output_shape; - auto shape_vec = tensor_in_shape.vec(); + auto shape_vec = tensor_in_shape.vec(); for (int64_t i = 0; i < tensor_in_shape.NumElements(); ++i) { OP_REQUIRES_OK(context, output_shape.AddDimWithStatus(shape_vec(i))); } @@ -568,8 +568,8 @@ class AvgPooling3dGradOp : public OpKernel { } private: - std::vector ksize_; - std::vector stride_; + std::vector ksize_; + std::vector stride_; Padding padding_; TensorFormat data_format_; }; @@ -693,7 +693,7 @@ class MaxPooling3dGradGradOp : public OpKernel { public: explicit MaxPooling3dGradGradOp(OpKernelConstruction* context) : OpKernel(context) { - string data_format; + std::string data_format; OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); OP_REQUIRES(context, FormatFromString(data_format, &data_format_), errors::InvalidArgument("Invalid data format")); @@ -779,8 +779,8 @@ class MaxPooling3dGradGradOp : public OpKernel { } private: - std::vector ksize_; - std::vector stride_; + std::vector ksize_; + std::vector stride_; Padding padding_; TensorFormat data_format_; }; @@ -816,9 +816,9 @@ TF_CALL_bfloat16(REGISTER_CPU_KERNELS); template struct LaunchPoolingOp { static void launch(OpKernelContext* context, const Tensor& tensor_in, - const std::array& window, - const std::array& stride, - const std::array& padding, + const std::array& window, + const std::array& stride, + const std::array& padding, TensorFormat data_format, Padding padding_type, Tensor* output) { DnnPooling3dOp::Compute(context, se::dnn::PoolingMode::kAverage, window, @@ -829,9 +829,9 @@ struct LaunchPoolingOp { template struct LaunchPoolingOp { static void launch(OpKernelContext* context, const Tensor& tensor_in, - const std::array& window, - const std::array& stride, - const std::array& padding, + const std::array& window, + const std::array& stride, + const std::array& padding, TensorFormat data_format, Padding padding_type, Tensor* output) { DnnPooling3dOp::Compute(context, se::dnn::PoolingMode::kMaximum, window, @@ -843,10 +843,10 @@ template struct LaunchMaxPooling3dGradOp { static void launch(OpKernelContext* context, const Tensor& tensor_in, const Tensor& tensor_out, const Tensor& out_backprop, - const std::array& window, - const std::array& stride, - const std::array& out, - const std::array& padding, + const std::array& window, + const std::array& stride, + const std::array& out, + const std::array& padding, TensorFormat data_format, Tensor* input_backprop) { const TensorShape output_shape = tensor_in.shape(); DnnPooling3dGradOp::Compute(context, se::dnn::PoolingMode::kMaximum, @@ -861,10 +861,10 @@ struct LaunchAvgPooling3dGradOp { static void launch(OpKernelContext* context, const TensorShape& tensor_in_shape, const Tensor& out_backprop, - const std::array& window, - const std::array& stride, - const std::array& out, - const std::array& padding, + const std::array& window, + const std::array& stride, + const std::array& out, + const std::array& padding, TensorFormat data_format, Tensor* output) { DnnPooling3dGradOp::Compute( context, se::dnn::PoolingMode::kAverage, window, stride, padding, out, diff --git a/tensorflow/core/kernels/pooling_ops_3d.h b/tensorflow/core/kernels/pooling_ops_3d.h index c0a589ff95092a..edc59f89f760bb 100644 --- a/tensorflow/core/kernels/pooling_ops_3d.h +++ b/tensorflow/core/kernels/pooling_ops_3d.h @@ -39,8 +39,8 @@ struct LaunchMaxPooling3dGradGradOp; // A helper class to manage sizes and shapes for 3d pooling operations. struct Pool3dParameters { // Updates context->status if there is an invalid input. - Pool3dParameters(OpKernelContext* context, const std::vector& ksize, - const std::vector& stride, Padding padding, + Pool3dParameters(OpKernelContext* context, const std::vector& ksize, + const std::vector& stride, Padding padding, TensorFormat data_format, const TensorShape& tensor_in_shape); diff --git a/tensorflow/core/kernels/pooling_ops_common.cc b/tensorflow/core/kernels/pooling_ops_common.cc index 4ccca647c154aa..24ed53d027442e 100644 --- a/tensorflow/core/kernels/pooling_ops_common.cc +++ b/tensorflow/core/kernels/pooling_ops_common.cc @@ -49,34 +49,35 @@ struct RawType { template <> struct RawType { - using type = int8; + using type = int8_t; }; #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM template struct PadInputWithNegativeInf { - Status operator()(const GPUDevice& d, - typename TTypes::ConstTensor in, - int input_pad_top, int input_pad_bottom, int input_pad_left, - int input_pad_right, typename TTypes::Tensor out, - TensorFormat format) { + absl::Status operator()(const GPUDevice& d, + typename TTypes::ConstTensor in, + int input_pad_top, int input_pad_bottom, + int input_pad_left, int input_pad_right, + typename TTypes::Tensor out, + TensorFormat format) { T padding_value = -std::numeric_limits::infinity(); functor::PadInput()( d, in, {{input_pad_top, input_pad_left}}, {{input_pad_bottom, input_pad_right}}, out, format, padding_value); - return OkStatus(); + return absl::OkStatus(); } }; template <> struct PadInputWithNegativeInf { - Status operator()(const GPUDevice& d, - typename TTypes::ConstTensor in, - int input_pad_top, int input_pad_bottom, int input_pad_left, - int input_pad_right, - typename TTypes::Tensor out, - TensorFormat format) { + absl::Status operator()(const GPUDevice& d, + typename TTypes::ConstTensor in, + int input_pad_top, int input_pad_bottom, + int input_pad_left, int input_pad_right, + typename TTypes::Tensor out, + TensorFormat format) { return errors::InvalidArgument( "Explicit padding not yet supported with qint8"); } @@ -117,8 +118,8 @@ absl::Status CheckPaddingSize(int64_t window_rows, int64_t window_cols, } PoolParameters::PoolParameters(OpKernelContext* context, - const std::vector& ksize, - const std::vector& stride, + const std::vector& ksize, + const std::vector& stride, Padding padding, std::vector explicit_paddings, TensorFormat data_format, @@ -227,8 +228,8 @@ absl::Status PoolParameters::forward_output_shape(TensorShape* shape) { template void DnnPoolingImpl(OpKernelContext* context, se::dnn::PoolingMode pooling_mode, - const std::vector& size, - const std::vector& stride, Padding padding, + const std::vector& size, + const std::vector& stride, Padding padding, std::vector explicit_paddings, TensorFormat data_format, const Tensor& tensor_in, const TensorShape& tensor_out_shape, bool propagate_nans, @@ -438,14 +439,12 @@ void DnnPoolingImpl(OpKernelContext* context, se::dnn::PoolingMode pooling_mode, } template -void DnnPoolingOp::Compute(OpKernelContext* context, - se::dnn::PoolingMode pooling_mode, - const std::vector& size, - const std::vector& stride, Padding padding, - std::vector explicit_paddings, - TensorFormat data_format, const Tensor& tensor_in, - const TensorShape& tensor_out_shape, - bool propagate_nans) { +void DnnPoolingOp::Compute( + OpKernelContext* context, se::dnn::PoolingMode pooling_mode, + const std::vector& size, const std::vector& stride, + Padding padding, std::vector explicit_paddings, + TensorFormat data_format, const Tensor& tensor_in, + const TensorShape& tensor_out_shape, bool propagate_nans) { Tensor* tensor_out = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, tensor_out_shape, &tensor_out)); @@ -457,7 +456,7 @@ void DnnPoolingOp::Compute(OpKernelContext* context, template <> void DnnPoolingOp::Compute( OpKernelContext* context, se::dnn::PoolingMode pooling_mode, - const std::vector& size, const std::vector& stride, + const std::vector& size, const std::vector& stride, Padding padding, std::vector explicit_paddings, TensorFormat data_format, const Tensor& tensor_in, const TensorShape& tensor_out_shape, bool propagate_nans) { @@ -511,14 +510,14 @@ DECLARE_GPU_SPEC(float); DECLARE_GPU_SPEC(Eigen::half); DECLARE_GPU_SPEC(Eigen::bfloat16); DECLARE_GPU_SPEC(double); -DECLARE_GPU_SPEC(int32); +DECLARE_GPU_SPEC(int32_t); } // namespace functor template void DnnPoolingGradImpl(OpKernelContext* context, se::dnn::PoolingMode pooling_mode, - const std::vector& size, - const std::vector& stride, Padding padding, + const std::vector& size, + const std::vector& stride, Padding padding, std::vector explicit_paddings, TensorFormat data_format, const Tensor* tensor_in, const Tensor* tensor_out, const Tensor& out_backprop, @@ -856,7 +855,7 @@ void DnnPoolingGradImpl(OpKernelContext* context, template void DnnPoolingGradOp::Compute( OpKernelContext* context, se::dnn::PoolingMode pooling_mode, - const std::vector& size, const std::vector& stride, + const std::vector& size, const std::vector& stride, Padding padding, std::vector explicit_paddings, TensorFormat data_format, const Tensor* tensor_in, const Tensor* tensor_out, const Tensor& out_backprop, const TensorShape& tensor_in_shape, @@ -873,7 +872,7 @@ void DnnPoolingGradOp::Compute( template <> void DnnPoolingGradOp::Compute( OpKernelContext* context, se::dnn::PoolingMode pooling_mode, - const std::vector& size, const std::vector& stride, + const std::vector& size, const std::vector& stride, Padding padding, std::vector explicit_paddings, TensorFormat data_format, const Tensor* tensor_in, const Tensor* tensor_out, const Tensor& out_backprop, const TensorShape& tensor_in_shape, diff --git a/tensorflow/core/kernels/pooling_ops_common.h b/tensorflow/core/kernels/pooling_ops_common.h index bb5dda562af672..cced70b25d4a39 100644 --- a/tensorflow/core/kernels/pooling_ops_common.h +++ b/tensorflow/core/kernels/pooling_ops_common.h @@ -47,8 +47,8 @@ struct PoolParameters { // Updates context->status if there is an invalid input. // explicit_paddings has eight elements if padding==EXPLIICT, and zero // elements otherwise. - PoolParameters(OpKernelContext* context, const std::vector& ksize, - const std::vector& stride, Padding padding, + PoolParameters(OpKernelContext* context, const std::vector& ksize, + const std::vector& stride, Padding padding, std::vector explicit_paddings, TensorFormat data_format, const TensorShape& tensor_in_shape); @@ -90,7 +90,7 @@ template class MaxPoolingOp : public OpKernel { public: explicit MaxPoolingOp(OpKernelConstruction* context) : OpKernel(context) { - string data_format; + std::string data_format; auto status = context->GetAttr("data_format", &data_format); if (status.ok()) { OP_REQUIRES(context, FormatFromString(data_format, &data_format_), @@ -297,8 +297,8 @@ class MaxPoolingOp : public OpKernel { } } - std::vector ksize_; - std::vector stride_; + std::vector ksize_; + std::vector stride_; Padding padding_; std::vector explicit_paddings_; TensorFormat data_format_; @@ -314,12 +314,12 @@ struct LaunchMaxPoolingNoMask_NCHW_VECT_C { const Tensor& input, Tensor* output) { #if GOOGLE_CUDA bool status = functor::MaxPoolForwardNoMask_NCHW_VECT_C()( - reinterpret_cast(input.flat().data()), + reinterpret_cast(input.flat().data()), params.tensor_in_batch, params.tensor_in_rows, params.tensor_in_cols, params.depth, params.out_height, params.out_width, params.window_rows, params.window_cols, params.row_stride, params.col_stride, params.pad_top, params.pad_left, - reinterpret_cast(output->flat().data()), + reinterpret_cast(output->flat().data()), context->eigen_gpu_device()); if (!status) { context->SetStatus(errors::Internal( @@ -338,7 +338,7 @@ template class MaxPoolingV2Op : public OpKernel { public: explicit MaxPoolingV2Op(OpKernelConstruction* context) : OpKernel(context) { - string data_format; + std::string data_format; auto status = context->GetAttr("data_format", &data_format); if (status.ok()) { OP_REQUIRES(context, FormatFromString(data_format, &data_format_), @@ -375,17 +375,17 @@ class MaxPoolingV2Op : public OpKernel { void Compute(OpKernelContext* context) override { const Tensor& tensor_in = context->input(0); - std::vector ksize = ksize_; - std::vector stride = stride_; + std::vector ksize = ksize_; + std::vector stride = stride_; if (context->num_inputs() != 1) { const Tensor& tensor_ksize = context->input(1); - auto value_ksize = tensor_ksize.flat(); + auto value_ksize = tensor_ksize.flat(); ksize.resize(tensor_ksize.shape().num_elements()); std::copy_n(&value_ksize(0), ksize.size(), ksize.begin()); const Tensor& tensor_stride = context->input(2); - auto value_stride = tensor_stride.flat(); + auto value_stride = tensor_stride.flat(); stride.resize(tensor_stride.shape().num_elements()); std::copy_n(&value_stride(0), stride.size(), stride.begin()); } @@ -572,8 +572,8 @@ class MaxPoolingV2Op : public OpKernel { } } - std::vector ksize_; - std::vector stride_; + std::vector ksize_; + std::vector stride_; Padding padding_; TensorFormat data_format_; }; diff --git a/tensorflow/core/kernels/pooling_ops_common_gpu.h b/tensorflow/core/kernels/pooling_ops_common_gpu.h index c5d51e5935677a..7a891ddd63f2b3 100644 --- a/tensorflow/core/kernels/pooling_ops_common_gpu.h +++ b/tensorflow/core/kernels/pooling_ops_common_gpu.h @@ -41,8 +41,8 @@ class DnnPoolingOp { typedef GPUDevice Device; static void Compute(OpKernelContext* context, se::dnn::PoolingMode pooling_mode, - const std::vector& size, - const std::vector& stride, Padding padding, + const std::vector& size, + const std::vector& stride, Padding padding, std::vector explicit_paddings, TensorFormat data_format, const Tensor& tensor_in, const TensorShape& tensor_out_shape, bool propagate_nans); @@ -57,8 +57,8 @@ class DnnPoolingGradOp { typedef GPUDevice Device; static void Compute(OpKernelContext* context, se::dnn::PoolingMode pooling_mode, - const std::vector& size, - const std::vector& stride, Padding padding, + const std::vector& size, + const std::vector& stride, Padding padding, std::vector explicit_paddings, TensorFormat data_format, const Tensor* tensor_in, const Tensor* tensor_out, const Tensor& out_backprop, diff --git a/tensorflow/core/kernels/population_count_op.cc b/tensorflow/core/kernels/population_count_op.cc index 9d0fc7530ae889..c43415982f257a 100644 --- a/tensorflow/core/kernels/population_count_op.cc +++ b/tensorflow/core/kernels/population_count_op.cc @@ -49,7 +49,7 @@ class PopulationCountOp : public OpKernel { OP_REQUIRES_OK(c, c->allocate_output(0, input_t.shape(), &output_t)); auto input = input_t.flat(); - auto output = output_t->flat(); + auto output = output_t->flat(); functor::PopulationCount popcnt; popcnt(c, input, output); @@ -77,7 +77,7 @@ namespace functor { namespace { template -inline uint8 PopCnt(const T v); +inline uint8_t PopCnt(const T v); #define POPCNT(T, N) \ template <> \ @@ -86,13 +86,13 @@ inline uint8 PopCnt(const T v); } POPCNT(int8_t, 8); -POPCNT(uint8, 8); +POPCNT(uint8_t, 8); POPCNT(int16_t, 16); -POPCNT(uint16, 16); +POPCNT(uint16_t, 16); POPCNT(int32_t, 32); -POPCNT(uint32, 32); +POPCNT(uint32_t, 32); POPCNT(int64_t, 64); -POPCNT(uint64, 64); +POPCNT(uint64_t, 64); #undef POPCNT @@ -101,9 +101,9 @@ POPCNT(uint64, 64); template struct PopulationCount { void operator()(OpKernelContext* c, typename TTypes::ConstFlat input, - TTypes::Flat output) { + TTypes::Flat output) { const T* input_ptr = input.data(); - uint8* output_ptr = output.data(); + uint8_t* output_ptr = output.data(); auto shard = [input_ptr, output_ptr](int64_t start, int64_t limit) { for (int64_t i = start; i < limit; ++i) { output_ptr[i] = PopCnt(input_ptr[i]); @@ -113,8 +113,9 @@ struct PopulationCount { // Approximating cost of popcnt: convert T to int64 // (std::bitset constructor) and convert int64 to uint8 // (bitset.count() -> output). The .count() itself is relatively cheap. - const double total_cost = (Eigen::TensorOpCost::CastCost() + - Eigen::TensorOpCost::CastCost()); + const double total_cost = + (Eigen::TensorOpCost::CastCost() + + Eigen::TensorOpCost::CastCost()); const int64_t shard_cost = (total_cost >= static_cast(std::numeric_limits::max())) ? std::numeric_limits::max() diff --git a/tensorflow/core/kernels/population_count_op.h b/tensorflow/core/kernels/population_count_op.h index 2c9812967366d8..b9811e59c3ea38 100644 --- a/tensorflow/core/kernels/population_count_op.h +++ b/tensorflow/core/kernels/population_count_op.h @@ -28,7 +28,7 @@ namespace functor { template struct PopulationCount { void operator()(OpKernelContext* c, typename TTypes::ConstFlat input, - TTypes::Flat output); + TTypes::Flat output); }; } // namespace functor diff --git a/tensorflow/core/kernels/population_count_op_gpu.cu.cc b/tensorflow/core/kernels/population_count_op_gpu.cu.cc index 5f2f14cfba0fb7..7df72b3a8f0b84 100644 --- a/tensorflow/core/kernels/population_count_op_gpu.cu.cc +++ b/tensorflow/core/kernels/population_count_op_gpu.cu.cc @@ -35,34 +35,34 @@ namespace functor { template __global__ void PopulationCountKernel(const int size, const T* __restrict__ input, - uint8* __restrict__ output) { + uint8_t* __restrict__ output) { GPU_1D_KERNEL_LOOP(i, size) { output[i] = __popc(ldg(input + i)); } } template <> __global__ void PopulationCountKernel(const int size, - const int8* __restrict__ input, - uint8* __restrict__ output) { + const int8_t* __restrict__ input, + uint8_t* __restrict__ output) { // For some reason, __popc on a negative int8 gets confused. GPU_1D_KERNEL_LOOP(i, size) { - output[i] = __popc(ldg(reinterpret_cast(input + i))); + output[i] = __popc(ldg(reinterpret_cast(input + i))); } } template <> __global__ void PopulationCountKernel(const int size, - const int16* __restrict__ input, - uint8* __restrict__ output) { + const int16_t* __restrict__ input, + uint8_t* __restrict__ output) { // For some reason, __popc on a negative int16 gets confused. GPU_1D_KERNEL_LOOP(i, size) { - output[i] = __popc(ldg(reinterpret_cast(input + i))); + output[i] = __popc(ldg(reinterpret_cast(input + i))); } } template <> -__global__ void PopulationCountKernel(const int size, - const int64* __restrict__ input, - uint8* __restrict__ output) { +__global__ void PopulationCountKernel( + const int size, const int64_t* __restrict__ input, + uint8_t* __restrict__ output) { GPU_1D_KERNEL_LOOP(i, size) { output[i] = __popcll(ldg(input + i)); } } diff --git a/tensorflow/core/kernels/priority_queue.cc b/tensorflow/core/kernels/priority_queue.cc index 56ea77fdbcf2ca..490cc338ddb99c 100644 --- a/tensorflow/core/kernels/priority_queue.cc +++ b/tensorflow/core/kernels/priority_queue.cc @@ -37,7 +37,7 @@ namespace tensorflow { PriorityQueue::PriorityQueue(int32_t capacity, const DataTypeVector& component_dtypes, const std::vector& component_shapes, - const string& name) + const std::string& name) : TypedQueue(capacity, component_dtypes, component_shapes, name) {} absl::Status PriorityQueue::Initialize() { diff --git a/tensorflow/core/kernels/priority_queue.h b/tensorflow/core/kernels/priority_queue.h index f7ca800a66bf7a..46408300778673 100644 --- a/tensorflow/core/kernels/priority_queue.h +++ b/tensorflow/core/kernels/priority_queue.h @@ -50,7 +50,7 @@ class PriorityQueue public: PriorityQueue(int32_t capacity, const DataTypeVector& component_dtypes, const std::vector& component_shapes, - const string& name); + const std::string& name); absl::Status Initialize() override; // Must be called before any other method. @@ -69,7 +69,7 @@ class PriorityQueue absl::Status MatchesPriorityNodeDefTypes(const NodeDef& node_def) const; absl::Status MatchesPriorityNodeDefShapes(const NodeDef& node_def) const; - int32 size() const override { + int32_t size() const override { mutex_lock lock(mu_); return queues_[0].size(); } diff --git a/tensorflow/core/kernels/quantization_utils.h b/tensorflow/core/kernels/quantization_utils.h index 88bee91121641a..f10b5e823d4143 100644 --- a/tensorflow/core/kernels/quantization_utils.h +++ b/tensorflow/core/kernels/quantization_utils.h @@ -84,7 +84,7 @@ T FloatToQuantized(float input, float range_min, float range_max) { static_cast(Eigen::NumTraits::highest()); quantized = std::max(quantized, lowest_quantized); quantized = std::min(quantized, highest_quantized); - return static_cast(static_cast(quantized)); + return static_cast(static_cast(quantized)); } template @@ -284,7 +284,7 @@ inline void RequantizeManyInNewRangeReference(const qint32* input, int64_t quantized_int64 = round_intermediate >> fp_shift; quantized_int64 = std::max(quantized_int64, int64_t{0}); quantized_int64 = std::min(quantized_int64, int64_t{255}); - output[index] = static_cast(static_cast(quantized_int64)); + output[index] = static_cast(static_cast(quantized_int64)); } } @@ -310,7 +310,7 @@ inline void RequantizeManyInNewRange8To32BitReference( int64_t output_value = code_0_int64 + (input_value * mult_int32); output_value = std::max(output_value, lowest_quantized); output_value = std::min(output_value, highest_quantized); - output[i] = static_cast(output_value); + output[i] = static_cast(output_value); } } @@ -725,7 +725,7 @@ inline void RequantizeManyInNewRangeUsingEigen( auto intermediate = fp_value.unaryExpr(int64_right_shift_op()); auto input_requantized = intermediate.cwiseMax(int64_t{0}) .cwiseMin(int64_t{255}) - .template cast() + .template cast() .template cast(); output->flat().device(device) = input_requantized; } diff --git a/tensorflow/core/kernels/quantization_utils_test.cc b/tensorflow/core/kernels/quantization_utils_test.cc index 689e98cfebb2de..6c0251b7249484 100644 --- a/tensorflow/core/kernels/quantization_utils_test.cc +++ b/tensorflow/core/kernels/quantization_utils_test.cc @@ -60,7 +60,7 @@ void TestRequantizeMany(Eigen::ThreadPoolDevice* eigen_device, float input_min, &o_tensor); } - const string tolerance_str = absl::StrCat("+-", tolerance); + const std::string tolerance_str = absl::StrCat("+-", tolerance); for (size_t value_index = 0; value_index < values_count; ++value_index) { int e = expected_values[value_index]; int v = output_values(value_index); @@ -96,7 +96,7 @@ void TestRequantizeMany8To32Bit(float input_min, float input_max, input_max, output_min, output_max, output_values.data()); - const string tolerance_str = absl::StrCat("+-", tolerance); + const std::string tolerance_str = absl::StrCat("+-", tolerance); for (int value_index = 0; value_index < values_count; ++value_index) { const qint32 e = expected_values[value_index]; const qint32 v = output_values(value_index); @@ -143,7 +143,7 @@ void TestRequantizeManyInNewRange32To8Bit( qint32 high = Eigen::NumTraits::highest(); std::vector vals{low, high}; int num_steps = 14419; - qint32 step = static_cast((1LL << 32) / num_steps); + qint32 step = static_cast((1LL << 32) / num_steps); qint32 v = low + static_cast(1); for (int i = 0; i < num_steps; ++i) { vals.push_back(v); @@ -405,7 +405,7 @@ void TestQuantizedToFloatInPlaceUsingEigen( input_array(i) = Eigen::NumTraits::lowest() + i; } else { int64_t offset = static_cast(q_range / values_count * i); - input_array(i) = static_cast( + input_array(i) = static_cast( std::min(Eigen::NumTraits::lowest() + offset, Eigen::NumTraits::highest())); } @@ -662,8 +662,8 @@ void TestOverflowWithEigen() { // because the implementation does a bounds check using float, not int32. test::FillValues( &expected, - {static_cast(-2147483648), static_cast(-2147483648), - static_cast(2147483520), static_cast(2147483520)}); + {static_cast(-2147483648), static_cast(-2147483648), + static_cast(2147483520), static_cast(2147483520)}); FloatToQuantizedStruct f2q(input_min, input_max); Tensor output(DT_QINT32, shape); diff --git a/tensorflow/core/kernels/quantize_and_dequantize_op.cc b/tensorflow/core/kernels/quantize_and_dequantize_op.cc index e34601a86b6b77..64e7ec09c46eed 100644 --- a/tensorflow/core/kernels/quantize_and_dequantize_op.cc +++ b/tensorflow/core/kernels/quantize_and_dequantize_op.cc @@ -57,7 +57,7 @@ class QuantizeAndDequantizeV2Op : public OpKernel { " with signed_input_ ", signed_input_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("range_given", &range_given_)); - string round_mode_string; + std::string round_mode_string; OP_REQUIRES_OK(ctx, ctx->GetAttr("round_mode", &round_mode_string)); OP_REQUIRES( ctx, @@ -284,7 +284,7 @@ class QuantizeAndDequantizeV3Op : public OpKernel { "be a scalar. Got dimensions: ", num_bits_tensor.dims())); - const int num_bits_val = num_bits_tensor.scalar()(); + const int num_bits_val = num_bits_tensor.scalar()(); OP_REQUIRES(ctx, num_bits_val > 0 && num_bits_val < (signed_input_ ? 62 : 63), InvalidArgument("num_bits is out of range: ", num_bits_val, diff --git a/tensorflow/core/kernels/quantize_and_dequantize_op_test.cc b/tensorflow/core/kernels/quantize_and_dequantize_op_test.cc index 0d5b923ecbd0e7..b93292f83d677a 100644 --- a/tensorflow/core/kernels/quantize_and_dequantize_op_test.cc +++ b/tensorflow/core/kernels/quantize_and_dequantize_op_test.cc @@ -86,7 +86,7 @@ TEST_F(QuantizeAndDequantizeTest, Convert_scalar_tensor_V3) { AddInputFromArray(TensorShape({1}), {-3.5}); AddInputFromArray(TensorShape({}), {0.0}); // Min AddInputFromArray(TensorShape({}), {0.0}); // Max - AddInputFromArray(TensorShape({}), {8}); // num_bits + AddInputFromArray(TensorShape({}), {8}); // num_bits TF_ASSERT_OK(RunOpKernel()); Tensor expected(allocator(), DT_FLOAT, TensorShape({1})); @@ -103,7 +103,7 @@ TEST_F(QuantizeAndDequantizeTest, Convert_scalar_tensor_V3) { template std::vector ScalePerSliceAlongAxis(std::vector dims, int axis, const std::vector& data) { - uint32 seed = 123; + uint32_t seed = 123; int64_t out_size = 1; for (int dim : dims) { out_size *= dim; @@ -292,7 +292,7 @@ TEST_F(QuantizeAndDequantizeTest, Convert_1D_tensor_with_int8_V3) { AddInputFromArray(TensorShape({6}), {-1, -0.5, 0, 0.3, 0.8, 0.555}); AddInputFromArray(TensorShape({}), {0.0}); // Min AddInputFromArray(TensorShape({}), {0.0}); // Max - AddInputFromArray(TensorShape({}), {8}); // num_bits + AddInputFromArray(TensorShape({}), {8}); // num_bits // With int8, the tensor is quantized to {-128, -64, 0, 38, 102, 71}. // Scale is: 1/128 @@ -337,7 +337,7 @@ TEST_P(ParameterizedQuantizeAndDequantizeTest, std::vector init_value(num_slices, 0.0f); AddInputFromArray(range_shape, init_value); // Min AddInputFromArray(range_shape, init_value); // Max - AddInputFromArray(TensorShape({}), {8}); // num_bits + AddInputFromArray(TensorShape({}), {8}); // num_bits // With int8, the values in the tensor are quantized to // {-127, -63, 0, 38, 102, 70, 64}. @@ -490,7 +490,7 @@ TEST_F(QuantizeAndDequantizeTest, Convert_1D_tensor_with_int4_V3) { AddInputFromArray(TensorShape({6}), {-1, -0.5, 0, 0.3, 0.8, 0.555}); AddInputFromArray(TensorShape({}), {0.0}); // Min AddInputFromArray(TensorShape({}), {0.0}); // Max - AddInputFromArray(TensorShape({}), {4}); // num_bits + AddInputFromArray(TensorShape({}), {4}); // num_bits // With int4, the tensor is quantized to {-8, -4, 0, 2, 6, 4}. // Scale is: 1/8 @@ -583,7 +583,7 @@ TEST_F(QuantizeAndDequantizeTest, Convert_2D_tensor_with_int8_range_given_V3) { {-0.8, -0.5, 0, 0.3, 0.8, 0.555, -2, 33}); AddInputFromArray(TensorShape({}), {-1.0}); // Min AddInputFromArray(TensorShape({}), {1.0}); // Max - AddInputFromArray(TensorShape({}), {8}); // num_bits + AddInputFromArray(TensorShape({}), {8}); // num_bits // Note that the range is given as [-1, 1]. // With int8, the tensor is quantized to {-102, -64, 0, 38, 102, 70, -128, @@ -664,7 +664,7 @@ TEST_F(QuantizeAndDequantizeTest, Convert_4D_tensor_with_uint8_range_given_V3) { AddInputFromArray(TensorShape({2, 2, 1, 1}), {-0.5, 0, 0.3, 0.8}); AddInputFromArray(TensorShape({}), {0.0}); // Min AddInputFromArray(TensorShape({}), {1.0}); // Max - AddInputFromArray(TensorShape({}), {8}); // num_bits + AddInputFromArray(TensorShape({}), {8}); // num_bits // Note that the range is given as [0, 1]. // With int8, the tensor is quantized to {0, 0, 76, 204} @@ -712,7 +712,7 @@ TEST_F(QuantizeAndDequantizeTest, Convert_tensor_with_all_0_V3) { AddInputFromArray(TensorShape({2, 2, 1, 1}), {0, 0, 0, 0}); AddInputFromArray(TensorShape({}), {0.0}); // Min AddInputFromArray(TensorShape({}), {0.0}); // Max - AddInputFromArray(TensorShape({}), {8}); // num_bits + AddInputFromArray(TensorShape({}), {8}); // num_bits TF_ASSERT_OK(RunOpKernel()); Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 2, 1, 1})); @@ -755,7 +755,7 @@ TEST_F(QuantizeAndDequantizeTest, Invalid_range_given_V3) { AddInputFromArray(TensorShape({2, 2, 1, 1}), {-0.5, 0, 0.3, 0.8}); AddInputFromArray(TensorShape({}), {1.0}); // Min AddInputFromArray(TensorShape({}), {0.0}); // Max - AddInputFromArray(TensorShape({}), {8}); // num_bits + AddInputFromArray(TensorShape({}), {8}); // num_bits absl::Status s = RunOpKernel(); EXPECT_TRUE(absl::StrContains(s.ToString(), @@ -778,7 +778,7 @@ TEST_F(QuantizeAndDequantizeTest, Invalid_axis_given_V3) { AddInputFromArray(TensorShape({2, 2, 1, 1}), {-0.5, 0, 0.3, 0.8}); AddInputFromArray(TensorShape({}), {1.0}); // Min AddInputFromArray(TensorShape({}), {0.0}); // Max - AddInputFromArray(TensorShape({}), {8}); // num_bits + AddInputFromArray(TensorShape({}), {8}); // num_bits EXPECT_THAT( RunOpKernel(), diff --git a/tensorflow/core/kernels/quantize_down_and_shrink_range.cc b/tensorflow/core/kernels/quantize_down_and_shrink_range.cc index 02ca323b991f68..9a49f96d4c6024 100644 --- a/tensorflow/core/kernels/quantize_down_and_shrink_range.cc +++ b/tensorflow/core/kernels/quantize_down_and_shrink_range.cc @@ -64,9 +64,9 @@ class QuantizeDownAndShrinkRangeOp : public OpKernel { // See QuantizationRangeOp as well, which has a copy of this logic. auto input_array = input.flat(); const int32_t input_lowest_quantized = - static_cast(Eigen::NumTraits::lowest()); + static_cast(Eigen::NumTraits::lowest()); const int32_t input_highest_quantized = - static_cast(Eigen::NumTraits::highest()); + static_cast(Eigen::NumTraits::highest()); T1 actual_min_quantized = input_highest_quantized; T1 actual_max_quantized = input_lowest_quantized; for (int i = 0; i < input_array.size(); ++i) { diff --git a/tensorflow/core/kernels/quantize_op.cc b/tensorflow/core/kernels/quantize_op.cc index c6cdbed7c0d5f6..c63c07a394b6c6 100644 --- a/tensorflow/core/kernels/quantize_op.cc +++ b/tensorflow/core/kernels/quantize_op.cc @@ -67,7 +67,7 @@ class QuantizeV2Op : public OpKernel { : (static_cast(std::numeric_limits::max()) - static_cast(std::numeric_limits::min()) + 1) / 2.0f; - string mode_string; + std::string mode_string; OP_REQUIRES_OK(ctx, ctx->GetAttr("mode", &mode_string)); OP_REQUIRES(ctx, (mode_string == "MIN_COMBINED" || mode_string == "MIN_FIRST" || @@ -83,7 +83,7 @@ class QuantizeV2Op : public OpKernel { mode_ = QUANTIZE_MODE_SCALED; } - string round_mode_string; + std::string round_mode_string; OP_REQUIRES_OK(ctx, ctx->GetAttr("round_mode", &round_mode_string)); OP_REQUIRES(ctx, (round_mode_string == "HALF_AWAY_FROM_ZERO" || diff --git a/tensorflow/core/kernels/quantize_op_test.cc b/tensorflow/core/kernels/quantize_op_test.cc index 76fe2e9f963bef..ec486ba87dc990 100644 --- a/tensorflow/core/kernels/quantize_op_test.cc +++ b/tensorflow/core/kernels/quantize_op_test.cc @@ -62,7 +62,7 @@ TEST_F(QuantizedOpTest, QuantizeV2) { template std::vector ScalePerSliceAlongAxis(std::vector dims, int axis, const std::vector& data) { - uint32 seed = 123; + uint32_t seed = 123; std::minstd_rand rng(seed); int64_t out_size = 1; for (int dim : dims) { @@ -373,14 +373,14 @@ TEST_F(QuantizedOpTest, QuantizeV2_32Bit) { Tensor expected(allocator(), DT_QINT32, TensorShape({element_count})); test::FillValues(&expected, { - std::numeric_limits::min(), + std::numeric_limits::min(), 0, - static_cast(1.0f * (1 << 23)), - static_cast(1.25f * (1 << 23)), - static_cast(1.75f * (1 << 23)), - static_cast(127.0f * (1 << 23)), - static_cast(255.0f * (1 << 23)), - std::numeric_limits::max(), + static_cast(1.0f * (1 << 23)), + static_cast(1.25f * (1 << 23)), + static_cast(1.75f * (1 << 23)), + static_cast(127.0f * (1 << 23)), + static_cast(255.0f * (1 << 23)), + std::numeric_limits::max(), }); // We expect there will be some fuzziness in the lower bits, since this is // converting from float. diff --git a/tensorflow/core/kernels/quantized_add_op.cc b/tensorflow/core/kernels/quantized_add_op.cc index 5cf7ed1456034e..e8904e8a088395 100644 --- a/tensorflow/core/kernels/quantized_add_op.cc +++ b/tensorflow/core/kernels/quantized_add_op.cc @@ -149,7 +149,7 @@ void ScalarAddition(OpKernelContext* context, const quint8* full_input, full_input_in_output_range_64 = std::min(full_input_in_output_range_64, highest_quantized); const int32_t full_input_in_output_range = - static_cast(full_input_in_output_range_64); + static_cast(full_input_in_output_range_64); output[i] = full_input_in_output_range + scalar_in_output_range; } } @@ -272,13 +272,15 @@ void VectorAddition(OpKernelContext* context, const quint8* x_data, float min_x, int64_t x_in_output_range_64 = x_0_int64 + (x_value * x_mult_int32); x_in_output_range_64 = std::max(x_in_output_range_64, lowest_quantized); x_in_output_range_64 = std::min(x_in_output_range_64, highest_quantized); - const int32_t x_in_output_range = static_cast(x_in_output_range_64); + const int32_t x_in_output_range = + static_cast(x_in_output_range_64); const int64_t y_value = static_cast(y_data[i]); int64_t y_in_output_range_64 = y_0_int64 + (y_value * y_mult_int32); y_in_output_range_64 = std::max(y_in_output_range_64, lowest_quantized); y_in_output_range_64 = std::min(y_in_output_range_64, highest_quantized); - const int32_t y_in_output_range = static_cast(y_in_output_range_64); + const int32_t y_in_output_range = + static_cast(y_in_output_range_64); output[i] = x_in_output_range + y_in_output_range; } @@ -430,7 +432,7 @@ void VectorTensorAddition(const quint8* vector_data, float min_vector, vector_in_output_range_64 = std::min(vector_in_output_range_64, highest_quantized); const int32_t vector_in_output_range = - static_cast(vector_in_output_range_64); + static_cast(vector_in_output_range_64); const int64_t tensor_value = static_cast(tensor_data[i]); int64_t tensor_in_output_range_64 = @@ -440,7 +442,7 @@ void VectorTensorAddition(const quint8* vector_data, float min_vector, tensor_in_output_range_64 = std::min(tensor_in_output_range_64, highest_quantized); const int32_t tensor_in_output_range = - static_cast(tensor_in_output_range_64); + static_cast(tensor_in_output_range_64); output[i] = vector_in_output_range + tensor_in_output_range; } diff --git a/tensorflow/core/kernels/quantized_concat_op.cc b/tensorflow/core/kernels/quantized_concat_op.cc index 7f7c59e2f40fc5..613fef99ea67c9 100644 --- a/tensorflow/core/kernels/quantized_concat_op.cc +++ b/tensorflow/core/kernels/quantized_concat_op.cc @@ -183,7 +183,7 @@ class QuantizedConcatOp : public OpKernel { errors::InvalidArgument( "Concat dim tensor should be a scalar integer, but got shape ", concat_dim_tensor->shape().DebugString())); - const int32_t concat_dim = concat_dim_tensor->scalar()(); + const int32_t concat_dim = concat_dim_tensor->scalar()(); OpInputList values; OP_REQUIRES_OK(context, context->input_list("values", &values)); const size_t N = values.size(); diff --git a/tensorflow/core/kernels/quantized_concat_op_test.cc b/tensorflow/core/kernels/quantized_concat_op_test.cc index 81f8b718d2b41e..cebe247f77f460 100644 --- a/tensorflow/core/kernels/quantized_concat_op_test.cc +++ b/tensorflow/core/kernels/quantized_concat_op_test.cc @@ -88,7 +88,7 @@ void QuantizedConcatTest::TestInvalidMinMax(const Tensor& first_min, Tensor second_quantized(DT_QUINT8, {1}); test::FillValues(&second_quantized, {1}); - AddInputFromArray(TensorShape({}), {0}); + AddInputFromArray(TensorShape({}), {0}); AddInputFromArray(first_quantized.shape(), first_quantized.flat()); AddInputFromArray(second_quantized.shape(), @@ -144,7 +144,7 @@ void QuantizedConcatTest::TestSmall8Bit(float first_min, float first_max, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}); - AddInputFromArray(TensorShape({}), {0}); + AddInputFromArray(TensorShape({}), {0}); AddInputFromArray(first_quantized.shape(), first_quantized.flat()); AddInputFromArray(second_quantized.shape(), @@ -210,7 +210,7 @@ void QuantizedConcatTest::TestSmall32Bit(float first_min, float first_max, {100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600, 1700, 1800, 1900, 2000, 2100, 2200, 2300, 2400}); - AddInputFromArray(TensorShape({}), {0}); + AddInputFromArray(TensorShape({}), {0}); AddInputFromArray(first_quantized.shape(), first_quantized.flat()); AddInputFromArray(second_quantized.shape(), @@ -272,7 +272,7 @@ void QuantizedConcatTest::TestSecondDim8Bit(float first_min, float first_max, {1, 2, 3, 4, 5, 6, 13, 14, 15, 16, 17, 18, 7, 8, 9, 10, 11, 12, 19, 20, 21, 22, 23, 24}); - AddInputFromArray(TensorShape({}), {1}); + AddInputFromArray(TensorShape({}), {1}); AddInputFromArray(first_quantized.shape(), first_quantized.flat()); AddInputFromArray(second_quantized.shape(), @@ -303,7 +303,7 @@ static void ConcatHelper(::testing::benchmark::State& state, const int kDim1 = 100; TensorShape shape({kDim1, dim2}); - Tensor concat_dim = test::AsScalar(concat_dimension); + Tensor concat_dim = test::AsScalar(concat_dimension); Tensor in0(dt, shape); in0.flat().setRandom(); Tensor in1(dt, shape); diff --git a/tensorflow/core/kernels/quantized_conv_ops.cc b/tensorflow/core/kernels/quantized_conv_ops.cc index 3f3e2743d674f4..14072547b310e7 100644 --- a/tensorflow/core/kernels/quantized_conv_ops.cc +++ b/tensorflow/core/kernels/quantized_conv_ops.cc @@ -62,8 +62,9 @@ class ReferenceConvFunctor { int output_shift, int output_offset, int output_mult) { // Set up some constants we need for the output down-shifting and // saturation. - const int32_t highest = static_cast(Eigen::NumTraits::highest()); - const int32_t lowest = static_cast(Eigen::NumTraits::lowest()); + const int32_t highest = + static_cast(Eigen::NumTraits::highest()); + const int32_t lowest = static_cast(Eigen::NumTraits::lowest()); // When we're converting the 32 bit accumulator to a lower bit depth, we // need to add on 0.5 in fixed-point terms to make the operation round half @@ -150,7 +151,7 @@ class ReferenceConvFunctor { // We're promoting the T1 type to a higher bit depth here as // we do the subtraction. input_value = - static_cast(input_source_value) - input_offset; + static_cast(input_source_value) - input_offset; } else { input_value = 0; } @@ -161,7 +162,7 @@ class ReferenceConvFunctor { (in_channel * filter_count) + out_channel]; // Another promotion to 32 bit, as above. const int32_t filter_value = - static_cast(filter_source_value) - filter_offset; + static_cast(filter_source_value) - filter_offset; total += (input_value * filter_value); } } @@ -406,9 +407,9 @@ class Im2ColConvFunctor { // The gemmlowp optimized library only works for a particular set of // data types, so check if we meet those requirements and fall back to a // slower reference implementation if not. - const uint8* im2col_data_as_uint8 = &(im2col_buffer->value); - const uint8* filter_data_as_uint8 = &(filter_data->value); - int32* output_data_as_int32 = &(chunk_output_data->value); + const uint8_t* im2col_data_as_uint8 = &(im2col_buffer->value); + const uint8_t* filter_data_as_uint8 = &(filter_data->value); + int32_t* output_data_as_int32 = &(chunk_output_data->value); // All of the transpose_* variables are currently compile-time consts, // so we could just hard-code these values too, but that would break if // anybody changed those values in the future (e.g. to match the ability @@ -472,7 +473,7 @@ class QuantizedConv2DOp : public OpKernel { context, (strides_[0] == 1 && strides_[3] == 1), errors::InvalidArgument("Current implementation does not yet support " "strides in the batch and depth dimensions.")); - std::vector dilations; + std::vector dilations; OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations)); OP_REQUIRES(context, dilations.size() == 4, errors::InvalidArgument("Dilations field must " @@ -612,7 +613,7 @@ class QuantizedConv2DOp : public OpKernel { } private: - std::vector strides_; + std::vector strides_; Padding padding_; }; diff --git a/tensorflow/core/kernels/quantized_matmul_op.cc b/tensorflow/core/kernels/quantized_matmul_op.cc index ae65dc3b5e38ce..5f7143e183991a 100644 --- a/tensorflow/core/kernels/quantized_matmul_op.cc +++ b/tensorflow/core/kernels/quantized_matmul_op.cc @@ -38,9 +38,9 @@ template void GemmlowpMultiply(OpKernelContext* op_context, const quint8* a_data, const quint8* b_data, qint32* c_data, int m, int n, int k, int offset_a, int offset_b, int lda, int ldb, int ldc) { - const uint8* a_data_as_uint8 = &(a_data->value); - const uint8* b_data_as_uint8 = &(b_data->value); - int32* c_data_as_int32 = &(c_data->value); + const uint8_t* a_data_as_uint8 = &(a_data->value); + const uint8_t* b_data_as_uint8 = &(b_data->value); + int32_t* c_data_as_int32 = &(c_data->value); static const gemmlowp::MapOrder ResultOrder = !TransposeC ? gemmlowp::MapOrder::RowMajor : gemmlowp::MapOrder::ColMajor; static const gemmlowp::MapOrder LhsOrder = diff --git a/tensorflow/core/kernels/quantized_mul_op.cc b/tensorflow/core/kernels/quantized_mul_op.cc index fed18e3a6f917d..9028137e49949d 100644 --- a/tensorflow/core/kernels/quantized_mul_op.cc +++ b/tensorflow/core/kernels/quantized_mul_op.cc @@ -38,9 +38,9 @@ void ScalarMultiply(OpKernelContext* context, const T* full_input, T scalar_input, int32_t scalar_input_offset, Toutput* output) { const int32_t scalar_minus_offset = - static_cast(scalar_input) - scalar_input_offset; + static_cast(scalar_input) - scalar_input_offset; for (int i = 0; i < num_elements; ++i) { - output[i] = (static_cast(full_input[i]) - full_input_offset) * + output[i] = (static_cast(full_input[i]) - full_input_offset) * scalar_minus_offset; } } @@ -115,8 +115,8 @@ void VectorMultiply(OpKernelContext* context, const T* x_data, int32_t offset_x, const T* y_data, int32_t offset_y, int64_t num_elements, Toutput* output) { for (int i = 0; i < num_elements; ++i) { - output[i] = (static_cast(x_data[i]) - offset_x) * - (static_cast(y_data[i]) - offset_y); + output[i] = (static_cast(x_data[i]) - offset_x) * + (static_cast(y_data[i]) - offset_y); } } @@ -193,8 +193,8 @@ void VectorTensorMultiply(const T* vector_data, int32_t vector_offset, Toutput* output) { for (int i = 0; i < tensor_num_elements; ++i) { const int64_t vector_i = i % vector_num_elements; - output[i] = (static_cast(vector_data[vector_i]) - vector_offset) * - (static_cast(tensor_data[i]) - tensor_offset); + output[i] = (static_cast(vector_data[vector_i]) - vector_offset) * + (static_cast(tensor_data[i]) - tensor_offset); } } diff --git a/tensorflow/core/kernels/quantized_pooling_ops.cc b/tensorflow/core/kernels/quantized_pooling_ops.cc index 5efedd082c4aea..5a05d1635c1d6b 100644 --- a/tensorflow/core/kernels/quantized_pooling_ops.cc +++ b/tensorflow/core/kernels/quantized_pooling_ops.cc @@ -95,8 +95,9 @@ class QuantizedAvgPoolingOp : public OpKernel { params.forward_output_shape(¶ms_forward_output_shape)); OP_REQUIRES_OK(context, context->allocate_output( 0, params_forward_output_shape, &output)); - const int32_t highest = static_cast(Eigen::NumTraits::highest()); - const int32_t lowest = static_cast(Eigen::NumTraits::lowest()); + const int32_t highest = + static_cast(Eigen::NumTraits::highest()); + const int32_t lowest = static_cast(Eigen::NumTraits::lowest()); // TODO(vrv): Switch this to the Eigen::Tensor version of // SpatialAvgPooling once that version is running quickly. @@ -105,12 +106,12 @@ class QuantizedAvgPoolingOp : public OpKernel { Tensor int32_output(DT_INT32, params_forward_output_shape); // Cast input to int32 tensor and call SpatialAvgPool. Tensor int32_input(DT_INT32, tensor_in.shape()); - int32_input.flat() = tensor_in.flat().template cast(); - SpatialAvgPool(context, &int32_output, int32_input, params, - padding_); + int32_input.flat() = tensor_in.flat().template cast(); + SpatialAvgPool(context, &int32_output, int32_input, params, + padding_); // Clamp the int32 output back into quantized space. - output->flat() = int32_output.flat() + output->flat() = int32_output.flat() .cwiseMax(lowest) .cwiseMin(highest) .template cast(); @@ -124,8 +125,8 @@ class QuantizedAvgPoolingOp : public OpKernel { } private: - std::vector ksize_; - std::vector stride_; + std::vector ksize_; + std::vector stride_; Padding padding_; }; diff --git a/tensorflow/core/kernels/quantized_reshape_op_test.cc b/tensorflow/core/kernels/quantized_reshape_op_test.cc index a7066f98f39e99..a2c7b60bbc71db 100644 --- a/tensorflow/core/kernels/quantized_reshape_op_test.cc +++ b/tensorflow/core/kernels/quantized_reshape_op_test.cc @@ -56,7 +56,7 @@ TEST_F(QuantizedReshapeTest, Reshape) { expected.flat()(i) = quint8(i); } AddInputFromArray(input.shape(), input.flat()); - AddInputFromList({3}, {5, 10, 4}); // shape + AddInputFromList({3}, {5, 10, 4}); // shape AddInputFromArray(TensorShape({1}), {-10}); AddInputFromArray(TensorShape({1}), {20}); TF_ASSERT_OK(RunOpKernel()); diff --git a/tensorflow/core/kernels/quantized_resize_bilinear_op.cc b/tensorflow/core/kernels/quantized_resize_bilinear_op.cc index 2efdd38dc6ef45..4e6f072973b3e1 100644 --- a/tensorflow/core/kernels/quantized_resize_bilinear_op.cc +++ b/tensorflow/core/kernels/quantized_resize_bilinear_op.cc @@ -132,7 +132,7 @@ inline T ComputeLerp(const T top_left, const T top_right, const T bottom_left, MulOffset(bottom_right, bottom_left, x_lerp); const T_CALC out = top + (bottom - top) / RESOLUTION_MULT * y_lerp; return static_cast( - static_cast((out + RESOLUTION_MULT / 2) / RESOLUTION_MULT)); + static_cast((out + RESOLUTION_MULT / 2) / RESOLUTION_MULT)); } #ifdef QUANTIZED_RESIZE_BILINEAR_USE_NEON @@ -266,7 +266,7 @@ inline void OutputLerpForChannels(const InterpolationCache& xs, } template -inline void OutputLerp8x8x1(const InterpolationCache& xs, +inline void OutputLerp8x8x1(const InterpolationCache& xs, const int64_t x_start, const int16_t ys_ilerp, const float min, const float max, const quint8* const ys_input_lower_ptr, @@ -284,7 +284,7 @@ inline void OutputLerp8x8x1(const InterpolationCache& xs, #else for (int x = x_start; x < x_start + 8; ++x) { - OutputLerpForChannels( + OutputLerpForChannels( xs, x, ys_ilerp, 1, min, max, ys_input_lower_ptr, ys_input_upper_ptr, output_y_ptr); } @@ -292,7 +292,7 @@ inline void OutputLerp8x8x1(const InterpolationCache& xs, } template -inline void OutputLerp8x8x3(const InterpolationCache& xs, +inline void OutputLerp8x8x3(const InterpolationCache& xs, const int64_t x_start, const int16_t ys_ilerp, const float min, const float max, const quint8* const ys_input_lower_ptr, @@ -325,7 +325,7 @@ inline void OutputLerp8x8x3(const InterpolationCache& xs, #else for (int x = x_start; x < x_start + 8; ++x) { - OutputLerpForChannels( + OutputLerpForChannels( xs, x, ys_ilerp, 3, min, max, ys_input_lower_ptr, ys_input_upper_ptr, output_y_ptr); } @@ -333,7 +333,7 @@ inline void OutputLerp8x8x3(const InterpolationCache& xs, } template -inline void OutputLerp32x4x1(const InterpolationCache& xs, +inline void OutputLerp32x4x1(const InterpolationCache& xs, const int64_t x_start, const int32_t ys_ilerp, const float min, const float max, const qint32* const ys_input_lower_ptr, @@ -373,7 +373,7 @@ inline void OutputLerp32x4x1(const InterpolationCache& xs, #else for (int x = x_start; x < x_start + 4; ++x) { - OutputLerpForChannels( + OutputLerpForChannels( xs, x, ys_ilerp, 1, min, max, ys_input_lower_ptr, ys_input_upper_ptr, output_y_ptr); } @@ -381,7 +381,7 @@ inline void OutputLerp32x4x1(const InterpolationCache& xs, } template -inline void OutputLerp32x4x3(const InterpolationCache& xs, +inline void OutputLerp32x4x3(const InterpolationCache& xs, const int64_t x_start, const int32_t ys_ilerp, const float min, const float max, const qint32* const ys_input_lower_ptr, @@ -458,7 +458,7 @@ inline void OutputLerp32x4x3(const InterpolationCache& xs, #else for (int x = x_start; x < x_start + 4; ++x) { - OutputLerpForChannels( + OutputLerpForChannels( xs, x, ys_ilerp, 3, min, max, ys_input_lower_ptr, ys_input_upper_ptr, output_y_ptr); } @@ -543,10 +543,10 @@ void ResizeImage(typename TTypes::ConstTensor images, CHECK_NOTNULL(output); - const InterpolationCache xs = - BuildLerpCache(out_width, in_width, width_scale, channels, - RESOLUTION, half_pixel_centers); - const InterpolationCache ys = BuildLerpCache( + const InterpolationCache xs = + BuildLerpCache(out_width, in_width, width_scale, channels, + RESOLUTION, half_pixel_centers); + const InterpolationCache ys = BuildLerpCache( out_height, in_height, height_scale, 1, RESOLUTION, half_pixel_centers); const int64_t in_row_size = in_width * channels; @@ -581,7 +581,7 @@ void ResizeImage(typename TTypes::ConstTensor images, } } for (; x < out_width; ++x) { - OutputLerpForChannels( + OutputLerpForChannels( xs, x, ys_ilerp, channels, in_min, in_max, ys_input_lower_ptr, ys_input_upper_ptr, output_y_ptr); } @@ -606,10 +606,10 @@ void ResizeImage(typename TTypes::ConstTensor images, CHECK_NOTNULL(output); - const InterpolationCache xs = - BuildLerpCache(out_width, in_width, width_scale, channels, - RESOLUTION, half_pixel_centers); - const InterpolationCache ys = BuildLerpCache( + const InterpolationCache xs = + BuildLerpCache(out_width, in_width, width_scale, channels, + RESOLUTION, half_pixel_centers); + const InterpolationCache ys = BuildLerpCache( out_height, in_height, height_scale, 1, RESOLUTION, half_pixel_centers); const int64_t in_row_size = in_width * channels; @@ -646,7 +646,7 @@ void ResizeImage(typename TTypes::ConstTensor images, } } for (; x < out_width; ++x) { - OutputLerpForChannels( + OutputLerpForChannels( xs, x, ys_ilerp, channels, in_min, in_max, ys_input_lower_ptr, ys_input_upper_ptr, output_y_ptr); } diff --git a/tensorflow/core/kernels/quantized_resize_bilinear_op_test.cc b/tensorflow/core/kernels/quantized_resize_bilinear_op_test.cc index 52c66efd890ea6..8c2426ee6621b7 100644 --- a/tensorflow/core/kernels/quantized_resize_bilinear_op_test.cc +++ b/tensorflow/core/kernels/quantized_resize_bilinear_op_test.cc @@ -171,8 +171,8 @@ void CheckTensorValue(const T* in_data, const T* out_data, const int batch_size, const float val = QuantizedToFloat(qval, min, max); if (!relative) { const int q_tolerance = std::round(tolerance); - EXPECT_TRUE(std::abs(static_cast(ref_qval) - - static_cast(qval)) <= q_tolerance) + EXPECT_TRUE(std::abs(static_cast(ref_qval) - + static_cast(qval)) <= q_tolerance) << "ref = " << ref_val << ", val = " << val << ", " << b << ", " << y << ", " << x << ", " << c << ", qval = " << qval << ", ref qval = " << ref_qval << ", " << q_tolerance; @@ -197,7 +197,7 @@ void TestResizeBilinear(const Tensor& image_tensor, const DataType dt, Scope root = Scope::NewRootScope(); Output placeholder = ops::Placeholder(root.WithOpName("placeholder"), dt); - Output size = ops::Const(root.WithOpName("size"), new_size); + Output size = ops::Const(root.WithOpName("size"), new_size); Output in_min = ops::Const(root.WithOpName("min"), min); Output in_max = ops::Const(root.WithOpName("max"), max); diff --git a/tensorflow/core/kernels/queue_base.cc b/tensorflow/core/kernels/queue_base.cc index 4274c775bd1557..e62b4cdf2db9d6 100644 --- a/tensorflow/core/kernels/queue_base.cc +++ b/tensorflow/core/kernels/queue_base.cc @@ -51,7 +51,7 @@ absl::Status HandleSliceToElement(const Tensor& parent, Tensor* element, QueueBase::QueueBase(int32_t capacity, const DataTypeVector& component_dtypes, const std::vector& component_shapes, - const string& name) + const std::string& name) : capacity_(capacity), component_dtypes_(component_dtypes), component_shapes_(component_shapes), @@ -78,8 +78,9 @@ absl::Status QueueBase::ValidateTupleCommon(const Tuple& tuple) const { } // static -string QueueBase::ShapeListString(const absl::Span& shapes) { - string result = "["; +std::string QueueBase::ShapeListString( + const absl::Span& shapes) { + std::string result = "["; bool first = true; for (const TensorShape& shape : shapes) { absl::StrAppend(&result, first ? "" : ", ", shape.DebugString()); @@ -90,7 +91,7 @@ string QueueBase::ShapeListString(const absl::Span& shapes) { } absl::Status QueueBase::MatchesNodeDefOp(const NodeDef& node_def, - const string& op) const { + const std::string& op) const { if (node_def.op() != op) { return errors::InvalidArgument("Shared queue '", name_, "' has type '", op, "' that does not match type of Node '", diff --git a/tensorflow/core/kernels/queue_base.h b/tensorflow/core/kernels/queue_base.h index d39ab45498b843..e55693b4d540d4 100644 --- a/tensorflow/core/kernels/queue_base.h +++ b/tensorflow/core/kernels/queue_base.h @@ -46,7 +46,7 @@ class QueueBase : public QueueInterface { // name: A name to use for the queue. QueueBase(int32_t capacity, const DataTypeVector& component_dtypes, const std::vector& component_shapes, - const string& name); + const std::string& name); // Implementations of QueueInterface methods -------------------------------- const DataTypeVector& component_dtypes() const override { @@ -64,7 +64,7 @@ class QueueBase : public QueueInterface { return component_shapes_; } - int32 capacity() const { return capacity_; } + int32_t capacity() const { return capacity_; } bool is_closed() const override { mutex_lock lock(mu_); @@ -103,7 +103,7 @@ class QueueBase : public QueueInterface { }; // Returns the number of components in a queue-element tuple. - int32 num_components() const { return component_dtypes_.size(); } + int32_t num_components() const { return component_dtypes_.size(); } // True if shapes were specified. If so, inputs will be validated // against them, etc. @@ -135,26 +135,27 @@ class QueueBase : public QueueInterface { ~QueueBase() override; // Helpers for implementing MatchesNodeDef(). - static string ShapeListString(const absl::Span& shapes); + static std::string ShapeListString( + const absl::Span& shapes); absl::Status MatchesNodeDefOp(const NodeDef& node_def, - const string& op) const; + const std::string& op) const; absl::Status MatchesNodeDefCapacity(const NodeDef& node_def, int32_t capacity) const; absl::Status MatchesNodeDefTypes(const NodeDef& node_def) const; absl::Status MatchesNodeDefShapes(const NodeDef& node_def) const; protected: - const int32 capacity_; + const int32_t capacity_; const DataTypeVector component_dtypes_; const std::vector component_shapes_; - const string name_; + const std::string name_; mutable mutex mu_; bool closed_ TF_GUARDED_BY(mu_); struct Attempt; typedef std::function RunCallback; struct Attempt { - int32 elements_requested; + int32_t elements_requested; DoneCallback done_callback; // must be run outside mu_ OpKernelContext* context; CancellationManager* cancellation_manager; // not owned diff --git a/tensorflow/core/kernels/queue_op.cc b/tensorflow/core/kernels/queue_op.cc index e16c6034de4596..2f77020256080a 100644 --- a/tensorflow/core/kernels/queue_op.cc +++ b/tensorflow/core/kernels/queue_op.cc @@ -210,7 +210,7 @@ DequeueManyOp::DequeueManyOp(OpKernelConstruction* context) void DequeueManyOp::ComputeAsync(OpKernelContext* ctx, QueueInterface* queue, DoneCallback callback) { const Tensor& Tnum_elements = ctx->input(1); - int32_t num_elements = Tnum_elements.flat()(0); + int32_t num_elements = Tnum_elements.flat()(0); OP_REQUIRES_ASYNC(ctx, num_elements >= 0, errors::InvalidArgument("DequeueManyOp requested ", @@ -283,7 +283,7 @@ DequeueUpToOp::DequeueUpToOp(OpKernelConstruction* context) void DequeueUpToOp::ComputeAsync(OpKernelContext* ctx, QueueInterface* queue, DoneCallback callback) { const Tensor& Tnum_elements = ctx->input(1); - int32_t num_elements = Tnum_elements.flat()(0); + int32_t num_elements = Tnum_elements.flat()(0); OP_REQUIRES_ASYNC(ctx, num_elements >= 0, errors::InvalidArgument("DequeueUpToOp requested ", @@ -349,7 +349,7 @@ void QueueSizeOp::ComputeAsync(OpKernelContext* ctx, QueueInterface* queue, DoneCallback callback) { Tensor* Tqueue_size = nullptr; OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &Tqueue_size)); - Tqueue_size->flat().setConstant(queue->size()); + Tqueue_size->flat().setConstant(queue->size()); callback(); } diff --git a/tensorflow/core/kernels/queue_op.h b/tensorflow/core/kernels/queue_op.h index 57a771d91fcb50..4c5c1ee10b0433 100644 --- a/tensorflow/core/kernels/queue_op.h +++ b/tensorflow/core/kernels/queue_op.h @@ -39,7 +39,7 @@ class QueueOp : public ResourceOpKernel { protected: // Variables accessible by subclasses - int32 capacity_; + int32_t capacity_; DataTypeVector component_types_; private: diff --git a/tensorflow/core/kernels/ragged_cross_op.cc b/tensorflow/core/kernels/ragged_cross_op.cc index 9e7d0d52ac2ae7..9612e6bcdbabfb 100644 --- a/tensorflow/core/kernels/ragged_cross_op.cc +++ b/tensorflow/core/kernels/ragged_cross_op.cc @@ -51,7 +51,7 @@ class FeatureReader { virtual int64_t FeatureCount(int64_t batch) const = 0; // Copies the value for the specified feature to `out`. - virtual void ReadValue(int64_t batch, int64_t n, uint64* out) const = 0; + virtual void ReadValue(int64_t batch, int64_t n, uint64_t* out) const = 0; virtual void ReadValue(int64_t batch, int64_t n, tstring* out) const = 0; virtual ~FeatureReader() {} @@ -70,10 +70,10 @@ void CopyToString(const tstring& src, tstring* dst) { void CopyToString(int64_t src, tstring* dst) { *dst = std::to_string(src); } // Copies a feature value `src` to an int64 fingerprint `dst`. -void CopyToFingerprint(const tstring& feature, uint64* dst) { +void CopyToFingerprint(const tstring& feature, uint64_t* dst) { *dst = Fingerprint64(feature); } -void CopyToFingerprint(int64_t feature, uint64* dst) { *dst = feature; } +void CopyToFingerprint(int64_t feature, uint64_t* dst) { *dst = feature; } // A FeatureReader that is backed by a ragged tensor. template @@ -87,7 +87,7 @@ class RaggedFeatureReader : public FeatureReader { return row_splits_(batch + 1) - row_splits_(batch); } - void ReadValue(int64_t batch, int64_t n, uint64* out) const override { + void ReadValue(int64_t batch, int64_t n, uint64_t* out) const override { CopyToFingerprint(values_(row_splits_(batch) + n), out); } @@ -110,7 +110,7 @@ class DenseFeatureReader : public FeatureReader { int64_t FeatureCount(int64_t batch) const override { return feature_count_; } - void ReadValue(int64_t batch, int64_t n, uint64* out) const override { + void ReadValue(int64_t batch, int64_t n, uint64_t* out) const override { CopyToFingerprint(values_(batch, n), out); } @@ -145,7 +145,7 @@ class SparseFeatureReader : public FeatureReader { return row_splits_[batch + 1] - row_splits_[batch]; } - void ReadValue(int64_t batch, int64_t n, uint64* out) const override { + void ReadValue(int64_t batch, int64_t n, uint64_t* out) const override { CopyToFingerprint(values_(row_splits_[batch] + n), out); } @@ -179,7 +179,7 @@ class OutputWriterImpl : public OutputWriter { using FlatSplits = typename TTypes::ConstFlat; OutputWriterImpl(const FeatureReaders& features, int64_t num_buckets, - uint64 hash_key, const Tensor* splits_out, + uint64_t hash_key, const Tensor* splits_out, Tensor* values_out) : features_(features), num_buckets_(num_buckets), @@ -220,9 +220,9 @@ class OutputWriterImpl : public OutputWriter { void WriteCombination(int64_t batch_index, const std::vector& combination, int64_t* out) { // Do the fingerprint concatenation on uint64. - uint64 hashed_output = hash_key_; + uint64_t hashed_output = hash_key_; for (size_t i = 0; i < combination.size(); ++i) { - uint64 hash_i; + uint64_t hash_i; features_[i]->ReadValue(batch_index, combination[i], &hash_i); hashed_output = FingerprintCat64(hashed_output, hash_i); } @@ -254,7 +254,7 @@ class OutputWriterImpl : public OutputWriter { const FeatureReaders& features_; const int64_t num_buckets_; - const uint64 hash_key_; + const uint64_t hash_key_; FlatSplits splits_out_; FlatValues values_out_; }; @@ -263,7 +263,7 @@ class OutputWriterImpl : public OutputWriter { // given tensors. std::unique_ptr MakeOutputWriter(const FeatureReaders& features, int64_t num_buckets, - uint64 hash_key, + uint64_t hash_key, const Tensor* splits_out, Tensor* values_out) { if (values_out->dtype() == DT_INT64) { @@ -271,7 +271,7 @@ std::unique_ptr MakeOutputWriter(const FeatureReaders& features, return std::make_unique>( features, num_buckets, hash_key, splits_out, values_out); } else { - return std::make_unique>( + return std::make_unique>( features, num_buckets, hash_key, splits_out, values_out); } } else { @@ -279,7 +279,7 @@ std::unique_ptr MakeOutputWriter(const FeatureReaders& features, return std::make_unique>( features, num_buckets, hash_key, splits_out, values_out); } else { - return std::make_unique>( + return std::make_unique>( features, num_buckets, hash_key, splits_out, values_out); } } @@ -298,7 +298,7 @@ class RaggedCrossOp : public OpKernel { // supported by REGISTER_OP. int64_t signed_hash_key_; OP_REQUIRES_OK(context, context->GetAttr("hash_key", &signed_hash_key_)); - hash_key_ = static_cast(signed_hash_key_); + hash_key_ = static_cast(signed_hash_key_); int num_sparse; OP_REQUIRES_OK(context, context->GetAttr("Nsparse", &num_sparse)); @@ -542,7 +542,7 @@ class RaggedCrossOp : public OpKernel { new RaggedFeatureReader(values, splits)); } else { features->emplace_back( - new RaggedFeatureReader(values, splits)); + new RaggedFeatureReader(values, splits)); } } else { if (splits.dtype() == DT_INT64) { @@ -550,7 +550,7 @@ class RaggedCrossOp : public OpKernel { new RaggedFeatureReader(values, splits)); } else { features->emplace_back( - new RaggedFeatureReader(values, splits)); + new RaggedFeatureReader(values, splits)); } } return absl::OkStatus(); @@ -632,7 +632,7 @@ class RaggedCrossOp : public OpKernel { } int64_t num_buckets_; - uint64 hash_key_; + uint64_t hash_key_; std::vector ragged_values_types_; std::vector ragged_splits_types_; std::vector sparse_values_types_; @@ -642,8 +642,8 @@ class RaggedCrossOp : public OpKernel { REGISTER_KERNEL_BUILDER(Name("RaggedCross") .Device(DEVICE_CPU) - .TypeConstraint("out_row_splits_type"), - RaggedCrossOp); + .TypeConstraint("out_row_splits_type"), + RaggedCrossOp); REGISTER_KERNEL_BUILDER(Name("RaggedCross") .Device(DEVICE_CPU) .TypeConstraint("out_row_splits_type"), diff --git a/tensorflow/core/kernels/ragged_gather_op_test.cc b/tensorflow/core/kernels/ragged_gather_op_test.cc index ca070524a62acc..cebccdd360f2d4 100644 --- a/tensorflow/core/kernels/ragged_gather_op_test.cc +++ b/tensorflow/core/kernels/ragged_gather_op_test.cc @@ -65,7 +65,7 @@ TEST_F(RaggedGatherOpTest, RaggedGather) { // indices = [2, 1, 0, 3] // params = [[.1, .2, .3], [], [.4, .5, .6, .7], [.8, .9]] // params.shape = [4, None] - BuildRaggedGatherGraph( + BuildRaggedGatherGraph( TensorShape({4}), // indices.shape {2, 1, 0, 3}, // indices {{0, 3, 3, 7, 9}}, // params_nested_splits @@ -87,7 +87,7 @@ TEST_F(RaggedGatherOpTest, RaggedGather_3DParams) { // indices = [2, 1, 0, 2, 3] // params = [[[]], [[.1, 2], [.3]], [], [[.4, .5], [.6, .7, .8]], [[.9]]] // params.shape = [5, None, None] - BuildRaggedGatherGraph( + BuildRaggedGatherGraph( TensorShape({5}), // indices.shape {2, 1, 0, 2, 3}, // indices {{0, 1, 3, 3, 5, 6}, {0, 0, 2, 3, 5, 8, 9}}, // params_nested_splits @@ -111,7 +111,7 @@ TEST_F(RaggedGatherOpTest, RaggedGather_4DParams) { // indices = [2, 1, 0, 2] // params = [[[]], [[[1, 2], [3, 4], [5, 6]], [[7, 8]]], []] // params.shape = [4, None, None, 2] - BuildRaggedGatherGraph( + BuildRaggedGatherGraph( TensorShape({4}), // indices.shape {2, 1, 0, 2}, // indices {{0, 1, 3, 3}, {0, 0, 3, 4}}, // params_nested_splits @@ -129,15 +129,15 @@ TEST_F(RaggedGatherOpTest, RaggedGather_4DParams) { test::AsTensor({0, 0, 2, 3, 3})); test::ExpectTensorEqual(*GetOutput(1), test::AsTensor({0, 3, 4, 4})); - test::ExpectTensorEqual( + test::ExpectTensorEqual( *GetOutput(2), - test::AsTensor({1, 2, 3, 4, 5, 6, 7, 8}, TensorShape({4, 2}))); + test::AsTensor({1, 2, 3, 4, 5, 6, 7, 8}, TensorShape({4, 2}))); } TEST_F(RaggedGatherOpTest, RaggedGather_2DIndices) { // indices = [[2, 1], [0, 3]] // params = [[.1, .2, .3], [], [.4, .5, .6, .7], [.8, .9]] - BuildRaggedGatherGraph( + BuildRaggedGatherGraph( TensorShape({2, 2}), // indices.shape {2, 1, 0, 3}, // indices {{0, 3, 3, 7, 9}}, // params_nested_splits @@ -161,7 +161,7 @@ TEST_F(RaggedGatherOpTest, RaggedGather_2DIndices) { TEST_F(RaggedGatherOpTest, RaggedGather_ScalarIndices) { // indices = 2 // params = [[.1, .2, .3], [], [.4, .5, .6, .7], [.8, .9]] - BuildRaggedGatherGraph( + BuildRaggedGatherGraph( TensorShape({}), // indices.shape {2}, // indices {{0, 3, 3, 7, 9}}, // params_nested_splits @@ -178,7 +178,7 @@ TEST_F(RaggedGatherOpTest, RaggedGather_ScalarIndices) { TEST_F(RaggedGatherOpTest, RaggedGather_OutOfBounds) { // indices = [2, 10] // params = [[.1, .2, .3], [], [.4, .5, .6, .7], [.8, .9]] - BuildRaggedGatherGraph( + BuildRaggedGatherGraph( TensorShape({2}), // indices.shape {2, 10}, // indices {{0, 3, 3, 7, 9}}, // params_nested_splits @@ -189,7 +189,7 @@ TEST_F(RaggedGatherOpTest, RaggedGather_OutOfBounds) { } TEST_F(RaggedGatherOpTest, InvalidSplitsNotSorted) { - BuildRaggedGatherGraph( + BuildRaggedGatherGraph( TensorShape({2}), // indices.shape {0, 2}, // indices {{0, 3, 5, 2, 9}}, // params_nested_splits @@ -200,7 +200,7 @@ TEST_F(RaggedGatherOpTest, InvalidSplitsNotSorted) { } TEST_F(RaggedGatherOpTest, InvalidSplitsNegative) { - BuildRaggedGatherGraph( + BuildRaggedGatherGraph( TensorShape({2}), // indices.shape {0, 2}, // indices {{-1, 3, 2, 7, 9}}, // params_nested_splits @@ -211,7 +211,7 @@ TEST_F(RaggedGatherOpTest, InvalidSplitsNegative) { } TEST_F(RaggedGatherOpTest, InvalidSplitsEmpty) { - BuildRaggedGatherGraph( + BuildRaggedGatherGraph( TensorShape({0}), // indices.shape {}, // indices {{}}, // params_nested_splits @@ -222,7 +222,7 @@ TEST_F(RaggedGatherOpTest, InvalidSplitsEmpty) { } TEST_F(RaggedGatherOpTest, InvalidSplitsTooBig) { - BuildRaggedGatherGraph( + BuildRaggedGatherGraph( TensorShape({2}), // indices.shape {0, 2}, // indices {{0, 20, 40, 80, 100}}, // params_nested_splits @@ -234,7 +234,7 @@ TEST_F(RaggedGatherOpTest, InvalidSplitsTooBig) { } TEST_F(RaggedGatherOpTest, BadValuesShape) { - BuildRaggedGatherGraph( + BuildRaggedGatherGraph( TensorShape({0}), // indices.shape {}, // indices {{0}}, // params_nested_splits diff --git a/tensorflow/core/kernels/ragged_range_op_test.cc b/tensorflow/core/kernels/ragged_range_op_test.cc index 699531a8d3647c..9a951af9017a36 100644 --- a/tensorflow/core/kernels/ragged_range_op_test.cc +++ b/tensorflow/core/kernels/ragged_range_op_test.cc @@ -90,10 +90,10 @@ TEST_F(RaggedRangeOpTest, RangeSizeOverflow) { } TEST_F(RaggedRangeOpTest, RangeSizeOverflow2) { - BuildRaggedRangeGraph(); - AddInputFromArray(TensorShape({}), {static_cast(5e18)}); - AddInputFromArray(TensorShape({}), {static_cast(-5e18)}); - AddInputFromArray(TensorShape({}), {-1}); + BuildRaggedRangeGraph(); + AddInputFromArray(TensorShape({}), {static_cast(5e18)}); + AddInputFromArray(TensorShape({}), {static_cast(-5e18)}); + AddInputFromArray(TensorShape({}), {-1}); EXPECT_EQ(absl::StrCat("Requires ((limit - start) / delta) <= ", std::numeric_limits::max()), diff --git a/tensorflow/core/kernels/ragged_tensor_to_sparse_kernel.cc b/tensorflow/core/kernels/ragged_tensor_to_sparse_kernel.cc index 7f92a50133ce99..ffb186af87ece4 100644 --- a/tensorflow/core/kernels/ragged_tensor_to_sparse_kernel.cc +++ b/tensorflow/core/kernels/ragged_tensor_to_sparse_kernel.cc @@ -228,8 +228,8 @@ class RaggedTensorToSparseOp : public OpKernel { REGISTER_KERNEL_BUILDER(Name("RaggedTensorToSparse") .Device(DEVICE_CPU) - .TypeConstraint("Tsplits"), - RaggedTensorToSparseOp); + .TypeConstraint("Tsplits"), + RaggedTensorToSparseOp); REGISTER_KERNEL_BUILDER(Name("RaggedTensorToSparse") .Device(DEVICE_CPU) diff --git a/tensorflow/core/kernels/ragged_tensor_to_tensor_op.cc b/tensorflow/core/kernels/ragged_tensor_to_tensor_op.cc index 516a0cddcb6acc..28820593a4b5c5 100644 --- a/tensorflow/core/kernels/ragged_tensor_to_tensor_op.cc +++ b/tensorflow/core/kernels/ragged_tensor_to_tensor_op.cc @@ -445,8 +445,8 @@ void copy_array(tstring* dst, const tstring* src, } template <> -void copy_array(tstring* dst, const tstring* src, - int32_t size) { +void copy_array(tstring* dst, const tstring* src, + int32_t size) { slow_copy_array(dst, src, size); } @@ -460,8 +460,8 @@ void copy_array(Eigen::half* dst, const Eigen::half* src, } template <> -void copy_array(Eigen::half* dst, const Eigen::half* src, - int32_t size) { +void copy_array(Eigen::half* dst, const Eigen::half* src, + int32_t size) { slow_copy_array(dst, src, size); } diff --git a/tensorflow/core/kernels/ragged_tensor_to_tensor_op_test.cc b/tensorflow/core/kernels/ragged_tensor_to_tensor_op_test.cc index b0f53598d32de9..e23a2c07ed861b 100644 --- a/tensorflow/core/kernels/ragged_tensor_to_tensor_op_test.cc +++ b/tensorflow/core/kernels/ragged_tensor_to_tensor_op_test.cc @@ -51,7 +51,8 @@ class RaggedTensorToTensorOpTest : public ::tensorflow::OpsTestBase { // Builds the tensorflow test graph for RaggedTensorToTensor. template void BuildRaggedTensorToTensorGraph( - const TensorShape& shape, const std::vector& row_partition_types, + const TensorShape& shape, + const std::vector& row_partition_types, const ShapeAndValues& values, const ShapeAndValues& default_value, const std::vector>& row_partition_tensors) { @@ -95,12 +96,13 @@ TEST_F(RaggedTensorToTensorOpTest, RaggedTensorToTensor) { // indices = [2, 1, 0, 3] // params = [[.1, .2, .3], [], [.4, .5, .6, .7], [.8, .9]] // params.shape = [4, None] - BuildRaggedTensorToTensorGraph( + BuildRaggedTensorToTensorGraph( TensorShape({4, 4}), // shape {"FIRST_DIM_SIZE", "VALUE_ROWIDS"}, // row_partition_types createVector({.1, .2, .3, .4, .5, .6, .7, .8, .9}), // values createScalar(1.5), // default_value - {createScalar(4), createVector({0, 0, 0, 2, 2, 2, 2, 3, 3})} + {createScalar(4), + createVector({0, 0, 0, 2, 2, 2, 2, 3, 3})} // row_partition_tensors ); @@ -117,12 +119,12 @@ TEST_F(RaggedTensorToTensorOpTest, RaggedTensorToTensor) { TEST_F(RaggedTensorToTensorOpTest, RaggedTensorToTensorRowSplits) { // indices = [2, 1, 0, 3] // params = [[.1, .2, .3], [], [.4, .5, .6, .7], [.8, .9]] - BuildRaggedTensorToTensorGraph( + BuildRaggedTensorToTensorGraph( TensorShape({4, 4}), // shape {"ROW_SPLITS"}, // row_partition_types createVector({.1, .2, .3, .4, .5, .6, .7, .8, .9}), // values - createScalar(1.5), // default_value - {createVector({0, 3, 3, 7, 9})} // row_partition_tensors + createScalar(1.5), // default_value + {createVector({0, 3, 3, 7, 9})} // row_partition_tensors ); TF_ASSERT_OK(RunOpKernel()); @@ -143,16 +145,16 @@ TEST_F(RaggedTensorToTensorOpTest, RaggedTensorToTensor_3DParams) { // [[.4, .5], [.6, .7, .8]], // [[.9]] // ] - BuildRaggedTensorToTensorGraph( + BuildRaggedTensorToTensorGraph( TensorShape({5, 2, 3}), // shape {"FIRST_DIM_SIZE", "VALUE_ROWIDS", "VALUE_ROWIDS"}, // row_partition_types createVector({.1, .2, .3, .4, .5, .6, .7, .8, .9}), // values createScalar(1.5), // default_value { - createScalar(5), - createVector({0, 1, 1, 3, 3, 4}), - createVector({1, 1, 2, 3, 3, 4, 4, 4, 5}), + createScalar(5), + createVector({0, 1, 1, 3, 3, 4}), + createVector({1, 1, 2, 3, 3, 4, 4, 4, 5}), } // row_partition_tensors ); TF_ASSERT_OK(RunOpKernel()); @@ -181,14 +183,14 @@ TEST_F(RaggedTensorToTensorOpTest, RaggedTensorToTensor_3DParamsRowSplits) { // [[.4, .5], [.6, .7, .8]], // [[.9]] // ] - BuildRaggedTensorToTensorGraph( + BuildRaggedTensorToTensorGraph( TensorShape({5, 2, 3}), // shape {"ROW_SPLITS", "ROW_SPLITS"}, // row_partition_types createVector({.1, .2, .3, .4, .5, .6, .7, .8, .9}), // values createScalar(1.5), // default_value { - createVector({0, 1, 3, 3, 5, 6}), - createVector({0, 0, 2, 3, 5, 8, 9}), + createVector({0, 1, 3, 3, 5, 6}), + createVector({0, 0, 2, 3, 5, 8, 9}), } // row_partition_tensors ); TF_ASSERT_OK(RunOpKernel()); @@ -249,15 +251,16 @@ TEST_F(RaggedTensorToTensorOpTest, RaggedTensorToTensor_4DParams) { // [] // ] // params.shape = [3, 2, 3, 2] - BuildRaggedTensorToTensorGraph( + BuildRaggedTensorToTensorGraph( TensorShape({4, 2, 3, 2}), // shape {"FIRST_DIM_SIZE", "VALUE_ROWIDS", "VALUE_ROWIDS", - "VALUE_ROWIDS"}, // row_partition_types - createVector({1, 2, 3, 4, 5, 6, 7, 8}), // values - createScalar(15), // default_value - {createScalar(5), createVector({0, 1, 1}), - createVector({1, 1, 1, 2}), - createVector({0, 0, 1, 1, 2, 2, 3, 3})} // row_partition_tensors + "VALUE_ROWIDS"}, // row_partition_types + createVector({1, 2, 3, 4, 5, 6, 7, 8}), // values + createScalar(15), // default_value + {createScalar(5), createVector({0, 1, 1}), + createVector({1, 1, 1, 2}), + createVector({0, 0, 1, 1, 2, 2, 3, 3})} + // row_partition_tensors ); TF_ASSERT_OK(RunOpKernel()); @@ -277,9 +280,9 @@ TEST_F(RaggedTensorToTensorOpTest, RaggedTensorToTensor_4DParams) { // [[15,15],[15,15],[15,15]], // ] // params.shape = [3, 2, 3, 2] - test::ExpectTensorEqual( + test::ExpectTensorEqual( *GetOutput(0), - test::AsTensor( + test::AsTensor( {15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 1, 2, 3, 4, 5, 6, 7, 8, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15}, @@ -296,14 +299,14 @@ TEST_F(RaggedTensorToTensorOpTest, RaggedTensorToTensor_4DParamsRowSplit) { // [] // ] // params.shape = [3, 2, 3, 2] - BuildRaggedTensorToTensorGraph( + BuildRaggedTensorToTensorGraph( TensorShape({4, 2, 3, 2}), // shape {"ROW_SPLITS", "ROW_SPLITS", "ROW_SPLITS"}, // row_partition_types - createVector({1, 2, 3, 4, 5, 6, 7, 8}), // values - createScalar(15), // default_value - {createVector({0, 1, 3}), createVector({0, 0, 3, 4}), - createVector({0, 2, 4, 6, 8})} // row_partition_tensors + createVector({1, 2, 3, 4, 5, 6, 7, 8}), // values + createScalar(15), // default_value + {createVector({0, 1, 3}), createVector({0, 0, 3, 4}), + createVector({0, 2, 4, 6, 8})} // row_partition_tensors ); TF_ASSERT_OK(RunOpKernel()); @@ -323,9 +326,9 @@ TEST_F(RaggedTensorToTensorOpTest, RaggedTensorToTensor_4DParamsRowSplit) { // [[15,15],[15,15],[15,15]], // ] // params.shape = [3, 2, 3, 2] - test::ExpectTensorEqual( + test::ExpectTensorEqual( *GetOutput(0), - test::AsTensor( + test::AsTensor( {15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 1, 2, 3, 4, 5, 6, 7, 8, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15}, @@ -334,12 +337,13 @@ TEST_F(RaggedTensorToTensorOpTest, RaggedTensorToTensor_4DParamsRowSplit) { TEST_F(RaggedTensorToTensorOpTest, RaggedTensorToTensorContractExpanded) { // params = [[.1, .2, .3], [], [.4, .5, .6, .7], [.8, .9]] - BuildRaggedTensorToTensorGraph( + BuildRaggedTensorToTensorGraph( TensorShape({3, 5}), // shape {"FIRST_DIM_SIZE", "VALUE_ROWIDS"}, // row_partition_types createVector({.1, .2, .3, .4, .5, .6, .7, .8, .9}), // values createScalar(1.5), // default_value - {createScalar(4), createVector({0, 0, 0, 2, 2, 2, 2, 3, 3})} + {createScalar(4), + createVector({0, 0, 0, 2, 2, 2, 2, 3, 3})} // row_partition_tensors ); @@ -357,14 +361,15 @@ TEST_F(RaggedTensorToTensorOpTest, RaggedTensorToTensorContractExpanded) { // Adds a dense dimension. TEST_F(RaggedTensorToTensorOpTest, RaggedTensorToTensorContractExpandedDense) { // params = [[.1, .2, .3], [], [.4, .5, .6, .7], [.8, .9]] - BuildRaggedTensorToTensorGraph( + BuildRaggedTensorToTensorGraph( TensorShape({3, 5, 2}), // shape {"FIRST_DIM_SIZE", "VALUE_ROWIDS"}, // row_partition_types ShapeAndValues{TensorShape({9, 2}), {.1, 1.1, .2, 1.2, .3, 1.3, .4, 1.4, .5, 1.5, .6, 1.6, .7, 1.7, .8, 1.8, .9, 1.9}}, // values createScalar(1.5), // default_value - {createScalar(4), createVector({0, 0, 0, 2, 2, 2, 2, 3, 3})} + {createScalar(4), + createVector({0, 0, 0, 2, 2, 2, 2, 3, 3})} // row_partition_tensors ); @@ -386,12 +391,13 @@ TEST_F(RaggedTensorToTensorOpTest, RaggedTensorToTensorConstrained) { // [.4, .5, .6, .7], // [.8, .9]] // constrained to (3, 3) - BuildRaggedTensorToTensorGraph( + BuildRaggedTensorToTensorGraph( TensorShape({3, 3}), // shape {"FIRST_DIM_SIZE", "VALUE_ROWIDS"}, // row_partition_types createVector({.1, .2, .3, .4, .5, .6, .7, .8, .9}), // values createScalar(1.5), // default_value - {createScalar(4), createVector({0, 0, 0, 2, 2, 2, 2, 3, 3})} + {createScalar(4), + createVector({0, 0, 0, 2, 2, 2, 2, 3, 3})} // row_partition_tensors ); @@ -418,16 +424,16 @@ TEST_F(RaggedTensorToTensorOpTest, RaggedTensorToTensor_3DParamsConstrained) { // [[.9]] // ] // params.shape = [5, None, None] - BuildRaggedTensorToTensorGraph( + BuildRaggedTensorToTensorGraph( TensorShape({4, 1, 2}), // shape {"FIRST_DIM_SIZE", "VALUE_ROWIDS", "VALUE_ROWIDS"}, // row_partition_types createVector({.1, .2, .3, .4, .5, .6, .7, .8, .9}), // values createScalar(1.5), // default_value { - createScalar(5), - createVector({0, 1, 1, 3, 3, 4}), - createVector({1, 1, 2, 3, 3, 4, 4, 4, 5}), + createScalar(5), + createVector({0, 1, 1, 3, 3, 4}), + createVector({1, 1, 2, 3, 3, 4, 4, 4, 5}), } // row_partition_tensors ); TF_ASSERT_OK(RunOpKernel()); @@ -457,15 +463,16 @@ TEST_F(RaggedTensorToTensorOpTest, RaggedTensorToTensor_4DParamsConstrained) { // [] // ] // params.shape = [3, 2, 3, 2] - BuildRaggedTensorToTensorGraph( + BuildRaggedTensorToTensorGraph( TensorShape({2, 2, 2, 2}), // shape {"FIRST_DIM_SIZE", "VALUE_ROWIDS", "VALUE_ROWIDS", - "VALUE_ROWIDS"}, // row_partition_types - createVector({1, 2, 3, 4, 5, 6, 7, 8}), // values - createScalar(15), // default_value - {createScalar(5), createVector({0, 1, 1}), - createVector({1, 1, 1, 2}), - createVector({0, 0, 1, 1, 2, 2, 3, 3})} // row_partition_tensors + "VALUE_ROWIDS"}, // row_partition_types + createVector({1, 2, 3, 4, 5, 6, 7, 8}), // values + createScalar(15), // default_value + {createScalar(5), createVector({0, 1, 1}), + createVector({1, 1, 1, 2}), + createVector({0, 0, 1, 1, 2, 2, 3, 3})} + // row_partition_tensors ); TF_ASSERT_OK(RunOpKernel()); @@ -480,25 +487,38 @@ TEST_F(RaggedTensorToTensorOpTest, RaggedTensorToTensor_4DParamsConstrained) { // ], // ] // params.shape = [3, 2, 3, 2] - test::ExpectTensorEqual(*GetOutput(0), test::AsTensor( - { - 15, 15, 15, 15, // - 15, 15, 15, 15, // - 1, 2, 3, 4, // - 7, 8, 15, 15, // - }, - TensorShape({2, 2, 2, 2}))); + test::ExpectTensorEqual(*GetOutput(0), + test::AsTensor( + { + 15, + 15, + 15, + 15, // + 15, + 15, + 15, + 15, // + 1, + 2, + 3, + 4, // + 7, + 8, + 15, + 15, // + }, + TensorShape({2, 2, 2, 2}))); } TEST_F(RaggedTensorToTensorOpTest, ShapeWrongDimensions) { - BuildRaggedTensorToTensorGraph( + BuildRaggedTensorToTensorGraph( TensorShape({10, 7, 10, 20}), // shape {"FIRST_DIM_SIZE", "VALUE_ROWIDS", - "VALUE_ROWIDS"}, // row_partition_types - createVector({1, 2, 3, 4}), // values - createScalar(15), // default_value - {createScalar(5), createVector({0, 1, 1}), - createVector({1, 1, 1, 2})} // row_partition_tensors + "VALUE_ROWIDS"}, // row_partition_types + createVector({1, 2, 3, 4}), // values + createScalar(15), // default_value + {createScalar(5), createVector({0, 1, 1}), + createVector({1, 1, 1, 2})} // row_partition_tensors ); // Fails with an invalid argument. EXPECT_EQ(absl::IsInvalidArgument(RunOpKernel()), true); @@ -508,7 +528,7 @@ class RaggedTensorToTensorOpUnknownShapeTest : public ::tensorflow::OpsTestBase { protected: std::unique_ptr op_; - void SetAttributes(const absl::Span row_partition_types, + void SetAttributes(const absl::Span row_partition_types, int num_row_partition_tensors) { op_ = std::make_unique("RaggedTensorToTensor"); SetAttrValue(row_partition_types, @@ -519,7 +539,8 @@ class RaggedTensorToTensorOpUnknownShapeTest }; TEST_F(RaggedTensorToTensorOpUnknownShapeTest, ValueRowIDs) { - SetAttributes(absl::Span{"FIRST_DIM_SIZE", "VALUE_ROWIDS"}, 2); + SetAttributes(absl::Span{"FIRST_DIM_SIZE", "VALUE_ROWIDS"}, + 2); INFER_OK(*op_, "?;?;?;?;?", "?"); INFER_OK(*op_, "?;[6];[];[];[6]", "[?,?]"); @@ -544,7 +565,7 @@ TEST_F(RaggedTensorToTensorOpUnknownShapeTest, ValueRowIDs) { TEST_F(RaggedTensorToTensorOpUnknownShapeTest, RowSplits) { // RaggedTensorToTensor(param_splits+, param_values, indices) -> [splits+, // values] - SetAttributes(absl::Span{"ROW_SPLITS"}, 1); + SetAttributes(absl::Span{"ROW_SPLITS"}, 1); // value, default_value, ROW_SPLITS INFER_OK(*op_, "?;?;?;?", "?"); diff --git a/tensorflow/core/kernels/ragged_tensor_to_variant_op.cc b/tensorflow/core/kernels/ragged_tensor_to_variant_op.cc index b4d7fc8395b614..a46f40d177778c 100644 --- a/tensorflow/core/kernels/ragged_tensor_to_variant_op.cc +++ b/tensorflow/core/kernels/ragged_tensor_to_variant_op.cc @@ -256,7 +256,7 @@ class RaggedTensorToVariantGradientOp : public OpKernel { auto flat_row_splits = row_splits.flat(); TensorShape dense_values_shape; OP_REQUIRES_OK(context, - TensorShapeUtils::MakeShape(context->input(2).vec(), + TensorShapeUtils::MakeShape(context->input(2).vec(), &dense_values_shape)); // Validate row_splits. diff --git a/tensorflow/core/kernels/ragged_tensor_to_variant_op_test.cc b/tensorflow/core/kernels/ragged_tensor_to_variant_op_test.cc index 95bd16bbbcbafe..f25f8b34198702 100644 --- a/tensorflow/core/kernels/ragged_tensor_to_variant_op_test.cc +++ b/tensorflow/core/kernels/ragged_tensor_to_variant_op_test.cc @@ -232,7 +232,7 @@ TEST_F(RaggedTensorToVariantKernelTest, NonEmptyBatchedInputInt32Splits) { const std::vector batched_values = {0, 1, 1, 2, 2, 3, 4, 5, 6, 7, 8, 9, 8, 9}; - BuildEncodeRaggedTensorGraph( + BuildEncodeRaggedTensorGraph( {batched_splits_1, batched_splits_2, batched_splits_3}, TensorShape({14}), batched_values, true); TF_ASSERT_OK(RunOpKernel()); @@ -240,12 +240,12 @@ TEST_F(RaggedTensorToVariantKernelTest, NonEmptyBatchedInputInt32Splits) { const auto& encoded_list = GetOutput(0)->vec(); EXPECT_EQ(encoded_list.size(), 2); - ExpectRaggedTensorVariantEqual( - CreateVariantFromRagged( + ExpectRaggedTensorVariantEqual( + CreateVariantFromRagged( {{0, 1, 3, 4, 5, 6}, {0, 2, 3, 4, 5, 6, 7}}, {0, 1, 1, 2, 2, 3, 4}), *encoded_list(0).get()); - ExpectRaggedTensorVariantEqual( - CreateVariantFromRagged( + ExpectRaggedTensorVariantEqual( + CreateVariantFromRagged( {{0, 1, 2, 3, 4, 5}, {0, 1, 2, 5, 6, 7}}, {5, 6, 7, 8, 9, 8, 9}), *encoded_list(1).get()); } diff --git a/tensorflow/core/kernels/ragged_tensor_to_variant_op_test.h b/tensorflow/core/kernels/ragged_tensor_to_variant_op_test.h index 7dc63ac8fbf7f8..87cfc50f8a268a 100644 --- a/tensorflow/core/kernels/ragged_tensor_to_variant_op_test.h +++ b/tensorflow/core/kernels/ragged_tensor_to_variant_op_test.h @@ -140,7 +140,7 @@ class RaggedTensorToVariantGradientKernelTest void BuildEncodeRaggedTensorGradientGraph( const std::vector& encoded_ragged_grad, const std::vector& row_splits, - const std::vector& dense_values_shape) { + const std::vector& dense_values_shape) { const auto values_dtype = DataTypeToEnum::v(); const auto splits_dtype = DataTypeToEnum::v(); @@ -161,8 +161,8 @@ class RaggedTensorToVariantGradientKernelTest AddInputFromArray(TensorShape({splits_size}), row_splits); int64_t dense_values_shape_size = dense_values_shape.size(); - AddInputFromArray(TensorShape({dense_values_shape_size}), - dense_values_shape); + AddInputFromArray(TensorShape({dense_values_shape_size}), + dense_values_shape); } template diff --git a/tensorflow/core/kernels/ragged_tensor_variant.cc b/tensorflow/core/kernels/ragged_tensor_variant.cc index b6b70a283c7c48..5608888b5500d1 100644 --- a/tensorflow/core/kernels/ragged_tensor_variant.cc +++ b/tensorflow/core/kernels/ragged_tensor_variant.cc @@ -22,9 +22,11 @@ limitations under the License. namespace tensorflow { -string RaggedTensorVariant::TypeName() const { return "RaggedTensorVariant"; } +std::string RaggedTensorVariant::TypeName() const { + return "RaggedTensorVariant"; +} -string RaggedTensorVariant::DebugString() const { +std::string RaggedTensorVariant::DebugString() const { return absl::StrCat( "RaggedTensorVariant(dtype=", DataTypeString(values_.dtype()), ", ragged_rank=", nested_splits_.size(), ", splits_dtype=", diff --git a/tensorflow/core/kernels/ragged_tensor_variant.h b/tensorflow/core/kernels/ragged_tensor_variant.h index 1d2066b0dcf457..c75505a603c531 100644 --- a/tensorflow/core/kernels/ragged_tensor_variant.h +++ b/tensorflow/core/kernels/ragged_tensor_variant.h @@ -41,8 +41,8 @@ class RaggedTensorVariant { : values_(std::move(values)), nested_splits_(nested_splits) {} // Variant support methods. - string TypeName() const; - string DebugString() const; + std::string TypeName() const; + std::string DebugString() const; void Encode(VariantTensorData* data) const; bool Decode(const VariantTensorData& data); diff --git a/tensorflow/core/kernels/random_binomial_op.cc b/tensorflow/core/kernels/random_binomial_op.cc index 98118b78eb5b58..875744b86ecf47 100644 --- a/tensorflow/core/kernels/random_binomial_op.cc +++ b/tensorflow/core/kernels/random_binomial_op.cc @@ -360,8 +360,8 @@ class RandomBinomialOp : public OpKernel { TensorShape bcast_shape = BCast::ToShape(bcast.output_shape()); TensorShape output_shape; if (shape_tensor.dtype() == DataType::DT_INT32) { - OP_REQUIRES_OK(ctx, TensorShapeUtils::MakeShape(shape_tensor.vec(), - &output_shape)); + OP_REQUIRES_OK(ctx, TensorShapeUtils::MakeShape( + shape_tensor.vec(), &output_shape)); } else { OP_REQUIRES_OK(ctx, TensorShapeUtils::MakeShape( shape_tensor.vec(), &output_shape)); @@ -380,11 +380,11 @@ class RandomBinomialOp : public OpKernel { const int64_t num_sample_dims = (shape_tensor.dim_size(0) - bcast.output_shape().size()); for (int64_t i = 0; i < num_sample_dims; ++i) { - samples_per_batch *= shape_tensor.flat()(i); + samples_per_batch *= shape_tensor.flat()(i); } int64_t num_batches = 1; for (int64_t i = num_sample_dims; i < shape_tensor.dim_size(0); ++i) { - num_batches *= shape_tensor.flat()(i); + num_batches *= shape_tensor.flat()(i); } const int64_t num_elements = num_batches * samples_per_batch; @@ -409,8 +409,9 @@ class RandomBinomialOp : public OpKernel { errors::InvalidArgument("Unsupported algorithm id: ", alg)); static_assert(std::is_same::value, "StateElementType must be int64"); - static_assert(std::is_same::value, - "PhiloxRandom::ResultElementType must be uint32"); + static_assert( + std::is_same::value, + "PhiloxRandom::ResultElementType must be uint32"); OP_REQUIRES(ctx, var_tensor_flat.size() >= PHILOX_MIN_STATE_SIZE, errors::InvalidArgument( "For Philox algorithm, the size of state must be at least ", @@ -478,8 +479,8 @@ class StatelessRandomBinomialOp : public OpKernel { TensorShape bcast_shape = BCast::ToShape(bcast.output_shape()); TensorShape output_shape; if (shape_tensor.dtype() == DataType::DT_INT32) { - OP_REQUIRES_OK(ctx, TensorShapeUtils::MakeShape(shape_tensor.vec(), - &output_shape)); + OP_REQUIRES_OK(ctx, TensorShapeUtils::MakeShape( + shape_tensor.vec(), &output_shape)); } else { OP_REQUIRES_OK(ctx, TensorShapeUtils::MakeShape( shape_tensor.vec(), &output_shape)); @@ -494,14 +495,14 @@ class StatelessRandomBinomialOp : public OpKernel { (shape_tensor.dim_size(0) - bcast.output_shape().size()); for (int64_t i = 0; i < num_sample_dims; ++i) { samples_per_batch *= shape_tensor.dtype() == DataType::DT_INT32 - ? shape_tensor.flat()(i) - : shape_tensor.flat()(i); + ? shape_tensor.flat()(i) + : shape_tensor.flat()(i); } int64_t num_batches = 1; for (int64_t i = num_sample_dims; i < shape_tensor.dim_size(0); ++i) { num_batches *= shape_tensor.dtype() == DataType::DT_INT32 - ? shape_tensor.flat()(i) - : shape_tensor.flat()(i); + ? shape_tensor.flat()(i) + : shape_tensor.flat()(i); } const int64_t num_elements = num_batches * samples_per_batch; @@ -557,7 +558,7 @@ class StatelessRandomBinomialOp : public OpKernel { REGISTER_ALL(Eigen::half); REGISTER_ALL(float); REGISTER_ALL(double); -REGISTER_ALL(int32); +REGISTER_ALL(int32_t); REGISTER_ALL(int64_t); #undef REGISTER diff --git a/tensorflow/core/kernels/random_binomial_op_test.cc b/tensorflow/core/kernels/random_binomial_op_test.cc index 80af07f13a4083..9e715b5afccf92 100644 --- a/tensorflow/core/kernels/random_binomial_op_test.cc +++ b/tensorflow/core/kernels/random_binomial_op_test.cc @@ -28,7 +28,7 @@ static Graph* RandomBinomialGraph(double count, double prob, int num_batches, int samples_per_batch) { Graph* g = new Graph(OpRegistry::Global()); Tensor shape_t(DT_INT32, TensorShape({2})); - shape_t.flat().setValues({num_batches, samples_per_batch}); + shape_t.flat().setValues({num_batches, samples_per_batch}); Tensor counts_t(DT_FLOAT, TensorShape({num_batches})); counts_t.flat().setConstant(count); diff --git a/tensorflow/core/kernels/random_index_shuffle_test.cc b/tensorflow/core/kernels/random_index_shuffle_test.cc index 259f484cc344ca..02458f4aa99f49 100644 --- a/tensorflow/core/kernels/random_index_shuffle_test.cc +++ b/tensorflow/core/kernels/random_index_shuffle_test.cc @@ -32,11 +32,11 @@ class RandomIndexShuffleTest : public ::testing::TestWithParam { // Check that we do a correct bijection. TEST_P(RandomIndexShuffleTest, Bijection) { - const std::array& key = {42, 73, 1991}; + const std::array& key = {42, 73, 1991}; const uint64_t max_value = GetMaxValue(); std::vector seen(max_value + 1, false); for (uint64_t value = 0; value <= max_value; ++value) { - const uint64 output_value = + const uint64_t output_value = index_shuffle(value, key, max_value, /* rounds= */ 4); EXPECT_GE(output_value, 0); EXPECT_LE(output_value, max_value); diff --git a/tensorflow/core/kernels/random_op.cc b/tensorflow/core/kernels/random_op.cc index 7624b56b50b587..87179f9fef5e8f 100644 --- a/tensorflow/core/kernels/random_op.cc +++ b/tensorflow/core/kernels/random_op.cc @@ -157,7 +157,7 @@ class RandomGammaOp : public OpKernel { shape_t.DebugString())); TensorShape samples_shape; if (shape_t.dtype() == DataType::DT_INT32) { - auto vec = shape_t.flat(); + auto vec = shape_t.flat(); OP_REQUIRES_OK(ctx, TensorShapeUtils::MakeShape(vec.data(), vec.size(), &samples_shape)); } else if (shape_t.dtype() == DataType::DT_INT64) { diff --git a/tensorflow/core/kernels/random_op.h b/tensorflow/core/kernels/random_op.h index ea16f54ec9acb4..cef648707d3422 100644 --- a/tensorflow/core/kernels/random_op.h +++ b/tensorflow/core/kernels/random_op.h @@ -40,8 +40,8 @@ typedef Eigen::ThreadPoolDevice CPUDevice; // nullptr, they provide the input; otherwise `gen` provides the input. template struct FillPhiloxRandom { - void operator()(OpKernelContext* ctx, const CPUDevice& d, const uint64* key, - const uint64* counter, random::PhiloxRandom gen, + void operator()(OpKernelContext* ctx, const CPUDevice& d, const uint64_t* key, + const uint64_t* counter, random::PhiloxRandom gen, typename Distribution::ResultElementType* data, int64_t size, Distribution dist); }; @@ -51,8 +51,8 @@ typedef Eigen::GpuDevice GPUDevice; // Declares the partially GPU-specialized functor struct. template struct FillPhiloxRandom { - void operator()(OpKernelContext* ctx, const GPUDevice& d, const uint64* key, - const uint64* counter, random::PhiloxRandom gen, + void operator()(OpKernelContext* ctx, const GPUDevice& d, const uint64_t* key, + const uint64_t* counter, random::PhiloxRandom gen, typename Distribution::ResultElementType* data, int64_t size, Distribution dist); }; diff --git a/tensorflow/core/kernels/random_op_cpu.h b/tensorflow/core/kernels/random_op_cpu.h index cfa927c1e539ea..7d7a16dcc6a3fc 100644 --- a/tensorflow/core/kernels/random_op_cpu.h +++ b/tensorflow/core/kernels/random_op_cpu.h @@ -60,8 +60,8 @@ using random::SingleSampleAdapter; template struct FillPhiloxRandom { typedef typename Distribution::ResultElementType T; - void operator()(OpKernelContext* ctx, const Device&, const uint64* key, - const uint64* counter, random::PhiloxRandom gen, T* data, + void operator()(OpKernelContext* ctx, const Device&, const uint64_t* key, + const uint64_t* counter, random::PhiloxRandom gen, T* data, int64_t size, Distribution dist) { OP_REQUIRES( ctx, false, @@ -156,8 +156,8 @@ struct FillPhiloxRandomTask { // It splits the work into several tasks and run them in parallel template void FillPhiloxRandom::operator()( - OpKernelContext* ctx, const CPUDevice&, const uint64* key, - const uint64* counter, random::PhiloxRandom gen, + OpKernelContext* ctx, const CPUDevice&, const uint64_t* key, + const uint64_t* counter, random::PhiloxRandom gen, typename Distribution::ResultElementType* data, int64_t size, Distribution dist) { if (key != nullptr && counter != nullptr) { diff --git a/tensorflow/core/kernels/random_op_gpu.h b/tensorflow/core/kernels/random_op_gpu.h index f8efa21daba8ff..dbb66c2148397d 100644 --- a/tensorflow/core/kernels/random_op_gpu.h +++ b/tensorflow/core/kernels/random_op_gpu.h @@ -34,17 +34,17 @@ struct FillPhiloxRandomKernel; template struct FillPhiloxRandomKernel { typedef typename Distribution::ResultElementType T; - PHILOX_DEVICE_INLINE void Run(const uint64* key, const uint64* counter, - random::PhiloxRandom gen, T* data, int64 size, + PHILOX_DEVICE_INLINE void Run(const uint64_t* key, const uint64_t* counter, + random::PhiloxRandom gen, T* data, int64_t size, Distribution dist); }; template struct FillPhiloxRandomKernel { typedef typename Distribution::ResultElementType T; - PHILOX_DEVICE_INLINE void Run(const uint64* key, const uint64* counter, + PHILOX_DEVICE_INLINE void Run(const uint64_t* key, const uint64_t* counter, random::PhiloxRandom base_gen, T* data, - int64 size, Distribution dist); + int64_t size, Distribution dist); }; template @@ -83,14 +83,14 @@ class SampleCopier { }; template <> -class SampleCopier { +class SampleCopier { public: // Copies the elements from the array to buf. buf must be 128-bit aligned, // which is true for tensor data, and all offsets that are a multiple of the // vector size (because the vectors are 128 bits long). inline __device__ void operator()( - int32* __restrict__ buf, - const tensorflow::random::Array& array) const { + int32_t* __restrict__ buf, + const tensorflow::random::Array& array) const { ::int4 vec; vec.x = array[0]; vec.y = array[1]; @@ -119,14 +119,14 @@ class SampleCopier { }; template <> -class SampleCopier { +class SampleCopier { public: // Copies the elements from the array to buf. buf must be 128-bit aligned, // which is true for tensor data, and all offsets that are a multiple of the // vector size (because the vectors are 128 bits long). inline __device__ void operator()( - int64* __restrict__ buf, - const tensorflow::random::Array& array) const { + int64_t* __restrict__ buf, + const tensorflow::random::Array& array) const { longlong2 vec; vec.x = array[0]; vec.y = array[1]; @@ -139,13 +139,13 @@ class SampleCopier { // distribution. Each output takes a fixed number of samples. template PHILOX_DEVICE_INLINE void FillPhiloxRandomKernel::Run( - const uint64* key, const uint64* counter, random::PhiloxRandom gen, T* data, - int64 size, Distribution dist) { + const uint64_t* key, const uint64_t* counter, random::PhiloxRandom gen, + T* data, int64_t size, Distribution dist) { const int kGroupSize = Distribution::kResultElementCount; - const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; - const int32 total_thread_count = gridDim.x * blockDim.x; - int64 offset = thread_id * kGroupSize; + const int32_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + const int32_t total_thread_count = gridDim.x * blockDim.x; + int64_t offset = thread_id * kGroupSize; if (key != nullptr && counter != nullptr) { gen = GetPhiloxRandomFromCounterKeyMem(counter, key); } @@ -174,8 +174,8 @@ PHILOX_DEVICE_INLINE void FillPhiloxRandomKernel::Run( // distribution. Each output takes a variable number of samples. template PHILOX_DEVICE_INLINE void FillPhiloxRandomKernel::Run( - const uint64* key, const uint64* counter, random::PhiloxRandom base_gen, - T* data, int64 size, Distribution dist) { + const uint64_t* key, const uint64_t* counter, random::PhiloxRandom base_gen, + T* data, int64_t size, Distribution dist) { if (key != nullptr && counter != nullptr) { base_gen = GetPhiloxRandomFromCounterKeyMem(counter, key); } @@ -189,10 +189,10 @@ PHILOX_DEVICE_INLINE void FillPhiloxRandomKernel::Run( kReservedSamplesPerOutput / PhiloxRandom::kResultElementCount; - const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x; - const int32 total_thread_count = gridDim.x * blockDim.x; - int64 group_index = thread_id; - int64 offset = group_index * kGroupSize; + const int32_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + const int32_t total_thread_count = gridDim.x * blockDim.x; + int64_t group_index = thread_id; + int64_t offset = group_index * kGroupSize; while (offset < size) { // Since each output takes a variable number of samples, we need to @@ -219,10 +219,10 @@ PHILOX_DEVICE_INLINE void FillPhiloxRandomKernel::Run( // A simple launch pad to call the correct function templates to fill the data template __global__ void __launch_bounds__(1024) - FillPhiloxRandomKernelLaunch(const uint64* key, const uint64* counter, + FillPhiloxRandomKernelLaunch(const uint64_t* key, const uint64_t* counter, random::PhiloxRandom base_gen, typename Distribution::ResultElementType* data, - int64 size, Distribution dist) { + int64_t size, Distribution dist) { FillPhiloxRandomKernel() .Run(key, counter, base_gen, data, size, dist); @@ -231,13 +231,13 @@ __global__ void __launch_bounds__(1024) // Partial specialization for GPU template void FillPhiloxRandom::operator()( - OpKernelContext*, const GPUDevice& d, const uint64* key, - const uint64* counter, random::PhiloxRandom gen, - typename Distribution::ResultElementType* data, int64 size, + OpKernelContext*, const GPUDevice& d, const uint64_t* key, + const uint64_t* counter, random::PhiloxRandom gen, + typename Distribution::ResultElementType* data, int64_t size, Distribution dist) { if (size == 0) return; - const int32 block_size = d.maxGpuThreadsPerBlock(); - const int32 num_blocks = + const int32_t block_size = d.maxGpuThreadsPerBlock(); + const int32_t num_blocks = std::min( d.getNumGpuMultiProcessors() * d.maxGpuThreadsPerMultiProcessor(), size + block_size - 1) / diff --git a/tensorflow/core/kernels/random_op_test.cc b/tensorflow/core/kernels/random_op_test.cc index 7292ce4ec8a3f0..5abe81f27f31e2 100644 --- a/tensorflow/core/kernels/random_op_test.cc +++ b/tensorflow/core/kernels/random_op_test.cc @@ -26,13 +26,13 @@ namespace tensorflow { namespace { Tensor VecShape(int64_t v) { - if (v >= std::numeric_limits::max()) { + if (v >= std::numeric_limits::max()) { Tensor shape(DT_INT64, TensorShape({1})); shape.vec()(0) = v; return shape; } else { Tensor shape(DT_INT32, TensorShape({1})); - shape.vec()(0) = v; + shape.vec()(0) = v; return shape; } } diff --git a/tensorflow/core/kernels/random_ops_util.h b/tensorflow/core/kernels/random_ops_util.h index b990456965ff59..c203181d575818 100644 --- a/tensorflow/core/kernels/random_ops_util.h +++ b/tensorflow/core/kernels/random_ops_util.h @@ -26,20 +26,21 @@ using random::PhiloxRandom; // The following 2 functions use the contract "lower 32 bits for the first // uint32, higher 32 bits for the second". Note that this is endian-neutral, // unlike a direct memory copy `memcpy(output, &input, 8)`. -PHILOX_DEVICE_INLINE void Uint64ToUint32s(uint64 input, uint32* output1, - uint32* output2) { - *output1 = static_cast(input); - *output2 = static_cast(input >> 32); +PHILOX_DEVICE_INLINE void Uint64ToUint32s(uint64_t input, uint32_t* output1, + uint32_t* output2) { + *output1 = static_cast(input); + *output2 = static_cast(input >> 32); } -PHILOX_DEVICE_INLINE uint64 Uint32sToUint64(uint32 input1, uint32 input2) { - auto u64_1 = static_cast(input1); - auto u64_2 = static_cast(input2); +PHILOX_DEVICE_INLINE uint64_t Uint32sToUint64(uint32_t input1, + uint32_t input2) { + auto u64_1 = static_cast(input1); + auto u64_2 = static_cast(input2); return u64_1 | (u64_2 << 32); } PHILOX_DEVICE_INLINE PhiloxRandom::ResultType GetCounterFromMem( - uint64 const* ptr) { + const uint64_t* ptr) { PhiloxRandom::ResultType counter; Uint64ToUint32s(ptr[0], &counter[0], &counter[1]); Uint64ToUint32s(ptr[1], &counter[2], &counter[3]); @@ -47,24 +48,24 @@ PHILOX_DEVICE_INLINE PhiloxRandom::ResultType GetCounterFromMem( } PHILOX_DEVICE_INLINE void WriteCounterToMem( - PhiloxRandom::ResultType const& counter, uint64* ptr) { + PhiloxRandom::ResultType const& counter, uint64_t* ptr) { ptr[0] = Uint32sToUint64(counter[0], counter[1]); ptr[1] = Uint32sToUint64(counter[2], counter[3]); } -PHILOX_DEVICE_INLINE PhiloxRandom::Key GetKeyFromMem(uint64 const* ptr) { +PHILOX_DEVICE_INLINE PhiloxRandom::Key GetKeyFromMem(const uint64_t* ptr) { PhiloxRandom::Key key; Uint64ToUint32s(ptr[0], &key[0], &key[1]); return key; } PHILOX_DEVICE_INLINE void WriteKeyToMem(PhiloxRandom::Key const& key, - uint64* ptr) { + uint64_t* ptr) { *ptr = Uint32sToUint64(key[0], key[1]); } PHILOX_DEVICE_INLINE PhiloxRandom GetPhiloxRandomFromCounterKeyMem( - uint64 const* counter_ptr, uint64 const* key_ptr) { + const uint64_t* counter_ptr, const uint64_t* key_ptr) { return PhiloxRandom(GetCounterFromMem(counter_ptr), GetKeyFromMem(key_ptr)); } diff --git a/tensorflow/core/kernels/random_poisson_op.cc b/tensorflow/core/kernels/random_poisson_op.cc index 3c703f5d0ca0d7..9b1f93584ad86b 100644 --- a/tensorflow/core/kernels/random_poisson_op.cc +++ b/tensorflow/core/kernels/random_poisson_op.cc @@ -351,7 +351,7 @@ TF_CALL_double(REGISTER); REGISTER_ALL(Eigen::half); REGISTER_ALL(float); REGISTER_ALL(double); -REGISTER_ALL(int32); +REGISTER_ALL(int32_t); REGISTER_ALL(int64_t); #undef REGISTER_ALL diff --git a/tensorflow/core/kernels/random_poisson_op_test.cc b/tensorflow/core/kernels/random_poisson_op_test.cc index ea2541d8bdf1b2..4d8f62a2e142d8 100644 --- a/tensorflow/core/kernels/random_poisson_op_test.cc +++ b/tensorflow/core/kernels/random_poisson_op_test.cc @@ -24,13 +24,13 @@ namespace tensorflow { namespace { Tensor VecShape(int64_t v) { - if (v >= std::numeric_limits::max()) { + if (v >= std::numeric_limits::max()) { Tensor shape(DT_INT64, TensorShape({1})); shape.vec()(0) = v; return shape; } else { Tensor shape(DT_INT32, TensorShape({1})); - shape.vec()(0) = v; + shape.vec()(0) = v; return shape; } } diff --git a/tensorflow/core/kernels/random_shuffle_queue_op.cc b/tensorflow/core/kernels/random_shuffle_queue_op.cc index 856357489bdfab..c9c83d381e6ff9 100644 --- a/tensorflow/core/kernels/random_shuffle_queue_op.cc +++ b/tensorflow/core/kernels/random_shuffle_queue_op.cc @@ -45,7 +45,7 @@ class RandomShuffleQueue : public TypedQueue > { RandomShuffleQueue(int32_t capacity, int32_t min_after_dequeue, int64_t seed, int64_t seed2, const DataTypeVector& component_dtypes, const std::vector& component_shapes, - const string& name); + const std::string& name); absl::Status Initialize() override; // Must be called before any other method. @@ -61,7 +61,7 @@ class RandomShuffleQueue : public TypedQueue > { CallbackWithTuple callback) override; absl::Status MatchesNodeDef(const NodeDef& node_def) override; - int32 size() const override { + int32_t size() const override { mutex_lock lock(mu_); return queues_[0].size(); } @@ -78,7 +78,7 @@ class RandomShuffleQueue : public TypedQueue > { OpKernelContext* ctx, Tensor* out_tensor); - const int32 min_after_dequeue_; + const int32_t min_after_dequeue_; const int64_t original_seed_; const int64_t original_seed2_; @@ -93,7 +93,7 @@ class RandomShuffleQueue : public TypedQueue > { RandomShuffleQueue::RandomShuffleQueue( int32_t capacity, int32_t min_after_dequeue, int64_t seed, int64_t seed2, const DataTypeVector& component_dtypes, - const std::vector& component_shapes, const string& name) + const std::vector& component_shapes, const std::string& name) : TypedQueue(capacity, component_dtypes, component_shapes, name), min_after_dequeue_(min_after_dequeue), original_seed_(seed), @@ -503,7 +503,7 @@ class RandomShuffleQueueOp : public TypedQueueOp { return CreateTypedQueue(queue, ret); } - int32 min_after_dequeue_; + int32_t min_after_dequeue_; int64_t seed_; int64_t seed2_; std::vector component_shapes_; diff --git a/tensorflow/core/kernels/range_sampler.cc b/tensorflow/core/kernels/range_sampler.cc index db4f97c3e925de..2f8fb60c3b9f44 100644 --- a/tensorflow/core/kernels/range_sampler.cc +++ b/tensorflow/core/kernels/range_sampler.cc @@ -248,7 +248,7 @@ FixedUnigramSampler::FixedUnigramSampler(int64_t range, float distortion, } absl::Status FixedUnigramSampler::SetDistributionSampler( - Env* env, const string& vocab_file) { + Env* env, const std::string& vocab_file) { TF_RETURN_IF_ERROR(LoadFromFile(env, vocab_file, distortion_)); if (!TF_PREDICT_TRUE(FixedUnigramSampler::range() == weights_.size())) return (errors::InvalidArgument("range is ", FixedUnigramSampler::range(), @@ -287,18 +287,18 @@ void FixedUnigramSampler::FillReservedIds(int32_t num_reserved_ids) { } absl::Status FixedUnigramSampler::LoadFromFile(Env* env, - const string& vocab_file, + const std::string& vocab_file, float distortion) { std::unique_ptr file; TF_RETURN_IF_ERROR(env->NewRandomAccessFile(vocab_file, &file)); io::InputBuffer in(file.get(), 262144 /*bytes*/); - string line; + std::string line; int32_t word_id = weights_.size(); while (in.ReadLine(&line).ok()) { // The vocabulary file should be in csv like format, with the last // field the weight associated with the word. - std::vector cols = str_util::Split(line, ','); + std::vector cols = str_util::Split(line, ','); if (cols.empty()) continue; // Skip entries that do not belong to this shard. if (word_id % num_shards_ == shard_) { diff --git a/tensorflow/core/kernels/range_sampler.h b/tensorflow/core/kernels/range_sampler.h index c49bbcc5b1eede..cecb681cd4e973 100644 --- a/tensorflow/core/kernels/range_sampler.h +++ b/tensorflow/core/kernels/range_sampler.h @@ -208,7 +208,7 @@ class FixedUnigramSampler : public RangeSampler { int32_t num_shards, int32_t shard); // The vocab_file is assumed to be a CSV, with the last entry of each row a // value representing the counts or probabilities for the corresponding ID. - absl::Status SetDistributionSampler(Env* env, const string& vocab_file); + absl::Status SetDistributionSampler(Env* env, const std::string& vocab_file); absl::Status SetDistributionSampler(const std::vector& unigrams); float Probability(int64_t value) const override; @@ -225,14 +225,14 @@ class FixedUnigramSampler : public RangeSampler { // Sharding information of the sampler. The whole vocabulary is sharded // into num_shards_ smaller ranges and each sampler is responsible for one // such smaller range, identified by the shard number. - int32 num_shards_; - int32 shard_; + int32_t num_shards_; + int32_t shard_; float distortion_; // Fill the sampler with the appropriate number of reserved IDs. void FillReservedIds(int32_t num_reserved_ids); // Load IDs to sample from a CSV file. It is assumed that the last item of // each row contains a count or probability for the corresponding ID. - absl::Status LoadFromFile(Env* env, const string& vocab_file, + absl::Status LoadFromFile(Env* env, const std::string& vocab_file, float distortion); // Load from an in-memory array. void LoadFromUnigrams(const std::vector& unigrams, float distortion); diff --git a/tensorflow/core/kernels/range_sampler_test.cc b/tensorflow/core/kernels/range_sampler_test.cc index 1aeadc634ccea3..93891f10446311 100644 --- a/tensorflow/core/kernels/range_sampler_test.cc +++ b/tensorflow/core/kernels/range_sampler_test.cc @@ -157,7 +157,7 @@ static const char kVocabContent[] = "w9,256"; TEST_F(RangeSamplerTest, FixedUnigramProbabilities) { Env* env = Env::Default(); - string fname = io::JoinPath(testing::TmpDir(), "vocab_file"); + std::string fname = io::JoinPath(testing::TmpDir(), "vocab_file"); TF_CHECK_OK(WriteStringToFile(env, fname, kVocabContent)); FixedUnigramSampler* test_sampler = new FixedUnigramSampler(9, 0.8, 0, 1, 0); TF_CHECK_OK(test_sampler->SetDistributionSampler(env, fname)); @@ -169,7 +169,7 @@ TEST_F(RangeSamplerTest, FixedUnigramProbabilities) { } TEST_F(RangeSamplerTest, FixedUnigramNoExistingFilename) { Env* env = Env::Default(); - string fname = "NoExistingFile"; + std::string fname = "NoExistingFile"; FixedUnigramSampler* test_sampler = new FixedUnigramSampler(9, 0.8, 0, 1, 0); absl::Status s = test_sampler->SetDistributionSampler(env, fname); sampler_.reset(test_sampler); @@ -177,7 +177,7 @@ TEST_F(RangeSamplerTest, FixedUnigramNoExistingFilename) { } TEST_F(RangeSamplerTest, FixedUnigramNoMatchingRangeWeights) { Env* env = Env::Default(); - string fname = io::JoinPath(testing::TmpDir(), "vocab_file"); + std::string fname = io::JoinPath(testing::TmpDir(), "vocab_file"); TF_CHECK_OK(WriteStringToFile(env, fname, kVocabContent)); FixedUnigramSampler* test_sampler = new FixedUnigramSampler(8, 0.8, 0, 1, 0); absl::Status s = test_sampler->SetDistributionSampler(env, fname); @@ -186,7 +186,7 @@ TEST_F(RangeSamplerTest, FixedUnigramNoMatchingRangeWeights) { } TEST_F(RangeSamplerTest, FixedUnigramChecksum) { Env* env = Env::Default(); - string fname = io::JoinPath(testing::TmpDir(), "vocab_file"); + std::string fname = io::JoinPath(testing::TmpDir(), "vocab_file"); TF_CHECK_OK(WriteStringToFile(env, fname, kVocabContent)); FixedUnigramSampler* test_sampler = new FixedUnigramSampler(9, 0.8, 0, 1, 0); TF_CHECK_OK(test_sampler->SetDistributionSampler(env, fname)); @@ -195,7 +195,7 @@ TEST_F(RangeSamplerTest, FixedUnigramChecksum) { } TEST_F(RangeSamplerTest, FixedUnigramHistogram) { Env* env = Env::Default(); - string fname = io::JoinPath(testing::TmpDir(), "vocab_file"); + std::string fname = io::JoinPath(testing::TmpDir(), "vocab_file"); TF_CHECK_OK(WriteStringToFile(env, fname, kVocabContent)); FixedUnigramSampler* test_sampler = new FixedUnigramSampler(9, 0.8, 0, 1, 0); TF_CHECK_OK(test_sampler->SetDistributionSampler(env, fname)); @@ -204,7 +204,7 @@ TEST_F(RangeSamplerTest, FixedUnigramHistogram) { } TEST_F(RangeSamplerTest, FixedUnigramProbabilitiesReserve1) { Env* env = Env::Default(); - string fname = io::JoinPath(testing::TmpDir(), "vocab_file"); + std::string fname = io::JoinPath(testing::TmpDir(), "vocab_file"); TF_CHECK_OK(WriteStringToFile(env, fname, kVocabContent)); FixedUnigramSampler* test_sampler = new FixedUnigramSampler(10, 0.8, 1, 1, 0); TF_CHECK_OK(test_sampler->SetDistributionSampler(env, fname)); @@ -217,7 +217,7 @@ TEST_F(RangeSamplerTest, FixedUnigramProbabilitiesReserve1) { } TEST_F(RangeSamplerTest, FixedUnigramProbabilitiesReserve2) { Env* env = Env::Default(); - string fname = io::JoinPath(testing::TmpDir(), "vocab_file"); + std::string fname = io::JoinPath(testing::TmpDir(), "vocab_file"); TF_CHECK_OK(WriteStringToFile(env, fname, kVocabContent)); FixedUnigramSampler* test_sampler = new FixedUnigramSampler(11, 0.8, 2, 1, 0); TF_CHECK_OK(test_sampler->SetDistributionSampler(env, fname)); diff --git a/tensorflow/core/kernels/record_input_op.cc b/tensorflow/core/kernels/record_input_op.cc index 1fae7e40af9abd..d1c3fbd1f70cb9 100644 --- a/tensorflow/core/kernels/record_input_op.cc +++ b/tensorflow/core/kernels/record_input_op.cc @@ -30,13 +30,13 @@ class RecordInputOp : public OpKernel { TYPE FIELD; \ OP_REQUIRES_OK(ctx, ctx->GetAttr(#FIELD, &FIELD)); - GETATTR(string, file_pattern); + GETATTR(std::string, file_pattern); GETATTR(int64_t, file_random_seed); GETATTR(float, file_shuffle_shift_ratio); GETATTR(int64_t, file_buffer_size); GETATTR(int64_t, file_parallelism); GETATTR(int64_t, batch_size); - GETATTR(string, compression_type); + GETATTR(std::string, compression_type); #undef GETATTR OP_REQUIRES_OK(ctx, ctx->GetAttr("compression_type", &compression_type)); diff --git a/tensorflow/core/kernels/record_yielder.cc b/tensorflow/core/kernels/record_yielder.cc index db8d59515c2f43..e186c92e7c3b30 100644 --- a/tensorflow/core/kernels/record_yielder.cc +++ b/tensorflow/core/kernels/record_yielder.cc @@ -82,10 +82,10 @@ bool RecordYielder::ShouldFinish(const absl::Status& s) { return stop_ || !status_.ok(); } -static absl::Status MatchFiles(const string& patterns, - std::vector* filenames) { +static absl::Status MatchFiles(const std::string& patterns, + std::vector* filenames) { for (const auto& file_pattern : str_util::Split(patterns, ',')) { - std::vector tmp_filenames; + std::vector tmp_filenames; TF_RETURN_IF_ERROR( Env::Default()->GetMatchingPaths(file_pattern, &tmp_filenames)); filenames->insert(filenames->end(), @@ -102,7 +102,7 @@ void RecordYielder::MainLoop() { num_records_added_in_epoch_ = 0; // Finds all files. - std::vector filenames; + std::vector filenames; absl::Status s = MatchFiles(opts_.file_pattern, &filenames); if (filenames.empty()) { @@ -121,7 +121,7 @@ void RecordYielder::MainLoop() { std::shuffle(filenames.begin(), filenames.end(), shuffle_rnd); // Left-shift the filename list. - const std::vector::size_type num = filenames.size(); + const std::vector::size_type num = filenames.size(); int64_t shift; if (0 <= opts_.file_shuffle_shift_ratio && opts_.file_shuffle_shift_ratio < 1) { @@ -136,7 +136,8 @@ void RecordYielder::MainLoop() { for (int i = 0; i < N; ++i) { Shard* shard = &shards[i]; shard->index = i; - for (std::vector::size_type j = i; j < filenames.size(); j += N) { + for (std::vector::size_type j = i; j < filenames.size(); + j += N) { shard->filenames.push_back(filenames[j]); } thread_->Schedule([this, shard]() { ShardLoop(shard); }); @@ -172,7 +173,7 @@ void RecordYielder::MainLoop() { main_loop_done_.Notify(); } -bool RecordYielder::Add(std::vector* values) { +bool RecordYielder::Add(std::vector* values) { mutex_lock l(mu_); while (!BufNotFull()) { buf_not_full_.wait(l); @@ -197,9 +198,9 @@ bool RecordYielder::Add(std::vector* values) { } void RecordYielder::ShardLoop(Shard* shard) { - std::vector values; + std::vector values; const int64_t kRecords = 16; - for (const string& filename : shard->filenames) { + for (const std::string& filename : shard->filenames) { std::unique_ptr file; if (ShouldFinish(absl::OkStatus())) break; absl::Status s = Env::Default()->NewRandomAccessFile(filename, &file); @@ -211,7 +212,7 @@ void RecordYielder::ShardLoop(Shard* shard) { io::RecordReaderOptions::CreateRecordReaderOptions( opts_.compression_type); io::RecordReader rdr(file.get(), options); - uint64 offset = 0; + uint64_t offset = 0; tstring record; while (true) { absl::Status s = rdr.ReadRecord(&offset, &record); diff --git a/tensorflow/core/kernels/record_yielder.h b/tensorflow/core/kernels/record_yielder.h index 6184a283ecece1..8f201082eac5f4 100644 --- a/tensorflow/core/kernels/record_yielder.h +++ b/tensorflow/core/kernels/record_yielder.h @@ -59,7 +59,7 @@ class RecordYielder { public: struct Options { // Glob pattern for tfrecords. - string file_pattern; + std::string file_pattern; // Random seed. It determines how data files are shuffled and how // records are shuffled. @@ -73,13 +73,13 @@ class RecordYielder { float file_shuffle_shift_ratio = 0; // Randomization buffer keeps these many records. - uint64 bufsize = 1; + uint64_t bufsize = 1; // Uses these many concurrent tfrecord iterators to iterate through // tfrecords. - int32 parallelism = 1; + int32_t parallelism = 1; - string compression_type; + std::string compression_type; }; explicit RecordYielder(OpKernelConstruction* context, @@ -116,7 +116,7 @@ class RecordYielder { std::mt19937_64 rnd_ TF_GUARDED_BY(mu_); // Randomization buffer. - std::vector buf_ TF_GUARDED_BY(mu_); + std::vector buf_ TF_GUARDED_BY(mu_); // True iff we are draining an epoch. bool epoch_end_ = false; @@ -145,14 +145,14 @@ class RecordYielder { // any. return stop_ || !status_.ok() || (epoch_end_ && !buf_.empty()) || (!epoch_end_ && - buf_.size() >= std::max(1, opts_.bufsize / 2)); + buf_.size() >= std::max(1, opts_.bufsize / 2)); } void MainLoop(); struct Shard; void ShardLoop(Shard* shard); bool ShouldFinish(const absl::Status& s); - bool Add(std::vector* values); + bool Add(std::vector* values); }; } // namespace tensorflow diff --git a/tensorflow/core/kernels/reduce_join_op.cc b/tensorflow/core/kernels/reduce_join_op.cc index 6ee2ef0139a427..e05e4c3b4d6030 100644 --- a/tensorflow/core/kernels/reduce_join_op.cc +++ b/tensorflow/core/kernels/reduce_join_op.cc @@ -47,7 +47,7 @@ const absl::InlinedVector GetStrides(const TensorShape& shape) { // nonspecified dimensions set to 0. Dimensions must be ordered from outer-most // to inner-most with respect to the subset linear index. inline int64_t LinearSubIndexToFullIndex( - int64_t output_index, const absl::InlinedVector& dim_list, + int64_t output_index, const absl::InlinedVector& dim_list, const TensorShape& input_shape, const absl::InlinedVector& strides) { int64_t result = 0; @@ -63,7 +63,7 @@ inline int64_t LinearSubIndexToFullIndex( // Computes the number of input elements reduced per output element. int64_t GetReductionIterSize( - const absl::InlinedVector& reduced_indices, + const absl::InlinedVector& reduced_indices, const TensorShape& input_shape) { int64_t result = 1; for (int32_t reduce_dim : reduced_indices) { @@ -74,12 +74,12 @@ int64_t GetReductionIterSize( // Computes a list of all true reduced indices, accounting for negative // indices. -absl::InlinedVector GetReducedIndices(const Tensor& reduction_indices, - int32_t input_dims) { - const auto reduction_indices_flat = reduction_indices.flat(); +absl::InlinedVector GetReducedIndices( + const Tensor& reduction_indices, int32_t input_dims) { + const auto reduction_indices_flat = reduction_indices.flat(); const int32_t reduction_dims = reduction_indices_flat.size(); - absl::InlinedVector reduced_indices(reduction_dims); + absl::InlinedVector reduced_indices(reduction_dims); for (int32_t i = 0; i < reduction_dims; ++i) { reduced_indices[i] = reduction_indices_flat(reduction_dims - i - 1); reduced_indices[i] += reduced_indices[i] < 0 ? input_dims : 0; @@ -91,7 +91,7 @@ absl::InlinedVector GetReducedIndices(const Tensor& reduction_indices, // Appends all unreduced dimensions to the given vector. void MakeUnreducedIndices(absl::InlinedVector index_is_reduced, int32_t input_dims, - absl::InlinedVector* unreduced_indices) { + absl::InlinedVector* unreduced_indices) { for (int32_t index = 0; index < input_dims; ++index) { if (!index_is_reduced[index]) unreduced_indices->push_back(index); } @@ -128,7 +128,7 @@ class ReduceJoinOp : public OpKernel { const int32_t input_dims = input_shape.dims(); const Tensor& reduction_indices = context->input(1); - const auto reduction_indices_flat = reduction_indices.flat(); + const auto reduction_indices_flat = reduction_indices.flat(); const int32_t reduction_dims = reduction_indices_flat.size(); absl::InlinedVector index_is_reduced(input_dims, false); @@ -146,9 +146,9 @@ class ReduceJoinOp : public OpKernel { index_is_reduced[true_reduce_index] = true; } - absl::InlinedVector reduced_indices = + absl::InlinedVector reduced_indices = GetReducedIndices(reduction_indices, input_dims); - absl::InlinedVector unreduced_indices; + absl::InlinedVector unreduced_indices; MakeUnreducedIndices(index_is_reduced, input_dims, &unreduced_indices); const auto strides = GetStrides(input_shape); @@ -179,7 +179,7 @@ class ReduceJoinOp : public OpKernel { private: bool keep_dims_; - string separator_; + std::string separator_; }; REGISTER_KERNEL_BUILDER(Name("ReduceJoin").Device(DEVICE_CPU), ReduceJoinOp); diff --git a/tensorflow/core/kernels/reduction_ops.h b/tensorflow/core/kernels/reduction_ops.h index 11b3ae7f328cd3..defa95f8c0f65d 100644 --- a/tensorflow/core/kernels/reduction_ops.h +++ b/tensorflow/core/kernels/reduction_ops.h @@ -118,12 +118,12 @@ struct ReduceEigenImpl("Tidx") + .TypeConstraint("Tidx") .Device(DEVICE_CPU) .HostMemory("reduction_indices"), - ReductionOp); + ReductionOp); REGISTER_KERNEL_BUILDER( Name("All") .TypeConstraint("Tidx") .Device(DEVICE_CPU) .HostMemory("reduction_indices"), - ReductionOp); + ReductionOp); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER_KERNEL_BUILDER( diff --git a/tensorflow/core/kernels/reduction_ops_any.cc b/tensorflow/core/kernels/reduction_ops_any.cc index 0d5b531b6b9d22..9675bbccc0f7e2 100644 --- a/tensorflow/core/kernels/reduction_ops_any.cc +++ b/tensorflow/core/kernels/reduction_ops_any.cc @@ -19,16 +19,16 @@ namespace tensorflow { REGISTER_KERNEL_BUILDER( Name("Any") - .TypeConstraint("Tidx") + .TypeConstraint("Tidx") .Device(DEVICE_CPU) .HostMemory("reduction_indices"), - ReductionOp); + ReductionOp); REGISTER_KERNEL_BUILDER( Name("Any") .TypeConstraint("Tidx") .Device(DEVICE_CPU) .HostMemory("reduction_indices"), - ReductionOp); + ReductionOp); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER_KERNEL_BUILDER( diff --git a/tensorflow/core/kernels/reduction_ops_common.cc b/tensorflow/core/kernels/reduction_ops_common.cc index 60f5b9462f8366..028743cf9c3d18 100644 --- a/tensorflow/core/kernels/reduction_ops_common.cc +++ b/tensorflow/core/kernels/reduction_ops_common.cc @@ -44,10 +44,10 @@ TensorShape ReductionHelper::shuffled_shape() { return shape; } -absl::InlinedVector ReductionHelper::permutation() { +absl::InlinedVector ReductionHelper::permutation() { const int dims = data_reshape_.size(); const int unreduced_dims = (dims + !reduce_first_axis_) / 2; - absl::InlinedVector perm(dims); + absl::InlinedVector perm(dims); for (int i = 0; i < unreduced_dims; i++) { perm[i] = 2 * i + reduce_first_axis_; } @@ -84,7 +84,7 @@ absl::Status ReductionHelper::Simplify(const Tensor& data, const Tensor& axis, // bitmap[i] indicates whether to reduce data along i-th axis. absl::InlinedVector bitmap(data.dims(), false); if (axis.dtype() == DT_INT32) { - TF_RETURN_IF_ERROR(SimplifyHelper(data, axis, bitmap)); + TF_RETURN_IF_ERROR(SimplifyHelper(data, axis, bitmap)); } else { TF_RETURN_IF_ERROR(SimplifyHelper(data, axis, bitmap)); } diff --git a/tensorflow/core/kernels/reduction_ops_common.h b/tensorflow/core/kernels/reduction_ops_common.h index 6ce777f748a777..daab208f725bec 100644 --- a/tensorflow/core/kernels/reduction_ops_common.h +++ b/tensorflow/core/kernels/reduction_ops_common.h @@ -114,7 +114,7 @@ class ReductionHelper { TensorShape shuffled_shape(); // Permutation of reduced dims needed to put reduction dimensions at the end - absl::InlinedVector permutation(); + absl::InlinedVector permutation(); private: bool reduce_first_axis_; // True if need to reduce the 0-th dimension. diff --git a/tensorflow/core/kernels/reduction_ops_max.cc b/tensorflow/core/kernels/reduction_ops_max.cc index 59d7c89b7795be..54025c4e612fe2 100644 --- a/tensorflow/core/kernels/reduction_ops_max.cc +++ b/tensorflow/core/kernels/reduction_ops_max.cc @@ -67,23 +67,23 @@ REGISTER_GPU_KERNELS(int64_t); // A special DEVICE_DEFAULT kernel for int32. // TODO(b/25387198): Also enable int32 in device memory. This kernel // registration requires all int32 inputs and outputs to be in host memory. -REGISTER_KERNEL_BUILDER( - Name("Max") - .Device(DEVICE_DEFAULT) - .HostMemory("reduction_indices") - .HostMemory("input") - .HostMemory("output") - .TypeConstraint("T") - .TypeConstraint("Tidx"), - ReductionOp>); -REGISTER_KERNEL_BUILDER( - Name("Max") - .Device(DEVICE_DEFAULT) - .HostMemory("reduction_indices") - .HostMemory("input") - .HostMemory("output") - .TypeConstraint("T") - .TypeConstraint("Tidx"), - ReductionOp>); +REGISTER_KERNEL_BUILDER(Name("Max") + .Device(DEVICE_DEFAULT) + .HostMemory("reduction_indices") + .HostMemory("input") + .HostMemory("output") + .TypeConstraint("T") + .TypeConstraint("Tidx"), + ReductionOp>); +REGISTER_KERNEL_BUILDER(Name("Max") + .Device(DEVICE_DEFAULT) + .HostMemory("reduction_indices") + .HostMemory("input") + .HostMemory("output") + .TypeConstraint("T") + .TypeConstraint("Tidx"), + ReductionOp>); } // namespace tensorflow diff --git a/tensorflow/core/kernels/reduction_ops_min.cc b/tensorflow/core/kernels/reduction_ops_min.cc index d493cc7514b5d1..b81cd549373d2e 100644 --- a/tensorflow/core/kernels/reduction_ops_min.cc +++ b/tensorflow/core/kernels/reduction_ops_min.cc @@ -65,24 +65,23 @@ REGISTER_GPU_KERNELS(double); // A special DEVICE_DEFAULT kernel for int32. // TODO(b/25387198): Also enable int32 in device memory. This kernel // registration requires all int32 inputs and outputs to be in host memory. -REGISTER_KERNEL_BUILDER( - Name("Min") - .Device(DEVICE_DEFAULT) - .HostMemory("reduction_indices") - .HostMemory("input") - .HostMemory("output") - .TypeConstraint("T") - .TypeConstraint("Tidx"), - ReductionOp>); -REGISTER_KERNEL_BUILDER( - Name("Min") - .Device(DEVICE_DEFAULT) - .HostMemory("reduction_indices") - .HostMemory("input") - .HostMemory("output") - .TypeConstraint("T") - .TypeConstraint("Tidx"), - ReductionOp>); - +REGISTER_KERNEL_BUILDER(Name("Min") + .Device(DEVICE_DEFAULT) + .HostMemory("reduction_indices") + .HostMemory("input") + .HostMemory("output") + .TypeConstraint("T") + .TypeConstraint("Tidx"), + ReductionOp>); +REGISTER_KERNEL_BUILDER(Name("Min") + .Device(DEVICE_DEFAULT) + .HostMemory("reduction_indices") + .HostMemory("input") + .HostMemory("output") + .TypeConstraint("T") + .TypeConstraint("Tidx"), + ReductionOp>); } // namespace tensorflow diff --git a/tensorflow/core/kernels/reduction_ops_sum.cc b/tensorflow/core/kernels/reduction_ops_sum.cc index e28964905fe85c..43b921c40829ce 100644 --- a/tensorflow/core/kernels/reduction_ops_sum.cc +++ b/tensorflow/core/kernels/reduction_ops_sum.cc @@ -60,24 +60,24 @@ TF_CALL_COMPLEX_TYPES(REGISTER_GPU_KERNELS); // A special DEVICE_DEFAULT kernel for int32. // TODO(b/25387198): Also enable int32 in device memory. This kernel // registration requires all int32 inputs and outputs to be in host memory. -REGISTER_KERNEL_BUILDER( - Name("Sum") - .Device(DEVICE_DEFAULT) - .TypeConstraint("T") - .TypeConstraint("Tidx") - .HostMemory("input") - .HostMemory("output") - .HostMemory("reduction_indices"), - ReductionOp>); -REGISTER_KERNEL_BUILDER( - Name("Sum") - .Device(DEVICE_DEFAULT) - .TypeConstraint("T") - .TypeConstraint("Tidx") - .HostMemory("input") - .HostMemory("output") - .HostMemory("reduction_indices"), - ReductionOp>); +REGISTER_KERNEL_BUILDER(Name("Sum") + .Device(DEVICE_DEFAULT) + .TypeConstraint("T") + .TypeConstraint("Tidx") + .HostMemory("input") + .HostMemory("output") + .HostMemory("reduction_indices"), + ReductionOp>); +REGISTER_KERNEL_BUILDER(Name("Sum") + .Device(DEVICE_DEFAULT) + .TypeConstraint("T") + .TypeConstraint("Tidx") + .HostMemory("input") + .HostMemory("output") + .HostMemory("reduction_indices"), + ReductionOp>); #endif diff --git a/tensorflow/core/kernels/reduction_ops_test.cc b/tensorflow/core/kernels/reduction_ops_test.cc index cb5fda312752ac..4c77592f5dbf36 100644 --- a/tensorflow/core/kernels/reduction_ops_test.cc +++ b/tensorflow/core/kernels/reduction_ops_test.cc @@ -24,58 +24,58 @@ namespace tensorflow { // Creates a Graph which "reduce"s a 3D float tensor of "num" elements // into a scalar. template -static Graph* ToScalar(const string& reduce, int num_x, int num_y) { +static Graph* ToScalar(const std::string& reduce, int num_x, int num_y) { auto* g = new Graph(OpRegistry::Global()); Tensor data(DataTypeToEnum::value, TensorShape({num_x, num_y})); data.flat().setRandom(); Tensor axes(DT_INT32, TensorShape({2})); - axes.flat()(0) = 0; - axes.flat()(1) = 1; + axes.flat()(0) = 0; + axes.flat()(1) = 1; test::graph::Reduce(g, reduce, test::graph::Constant(g, data), test::graph::Constant(g, axes)); return g; } -static Graph* ColReduce(const string& reduce, int num_x, int num_y) { +static Graph* ColReduce(const std::string& reduce, int num_x, int num_y) { auto* g = new Graph(OpRegistry::Global()); Tensor data(DT_FLOAT, TensorShape({num_x, num_y})); data.flat().setRandom(); Tensor axes(DT_INT32, TensorShape({1})); - axes.flat()(0) = 0; + axes.flat()(0) = 0; test::graph::Reduce(g, reduce, test::graph::Constant(g, data), test::graph::Constant(g, axes)); return g; } -static Graph* RowReduce(const string& reduce, int num_x, int num_y) { +static Graph* RowReduce(const std::string& reduce, int num_x, int num_y) { auto* g = new Graph(OpRegistry::Global()); Tensor data(DT_FLOAT, TensorShape({num_x, num_y})); data.flat().setRandom(); Tensor axes(DT_INT32, TensorShape({1})); - axes.flat()(0) = 1; + axes.flat()(0) = 1; test::graph::Reduce(g, reduce, test::graph::Constant(g, data), test::graph::Constant(g, axes)); return g; } -static Graph* ThreeDYReduce(const string& reduce, int num_y, int num_z) { +static Graph* ThreeDYReduce(const std::string& reduce, int num_y, int num_z) { auto* g = new Graph(OpRegistry::Global()); Tensor data(DT_FLOAT, TensorShape({4, num_y, num_z})); data.flat().setRandom(); Tensor axes(DT_INT32, TensorShape({1})); - axes.flat()(0) = 1; + axes.flat()(0) = 1; test::graph::Reduce(g, reduce, test::graph::Constant(g, data), test::graph::Constant(g, axes)); return g; } -static Graph* ThreeDXZReduce(const string& reduce, int num_y, int num_z) { +static Graph* ThreeDXZReduce(const std::string& reduce, int num_y, int num_z) { auto* g = new Graph(OpRegistry::Global()); Tensor data(DT_FLOAT, TensorShape({4, num_y, num_z})); data.flat().setRandom(); Tensor axes(DT_INT32, TensorShape({2})); - axes.flat()(0) = 0; - axes.flat()(1) = 2; + axes.flat()(0) = 0; + axes.flat()(1) = 2; test::graph::Reduce(g, reduce, test::graph::Constant(g, data), test::graph::Constant(g, axes)); return g; @@ -85,7 +85,7 @@ static Graph* ThreeDXZReduce(const string& reduce, int num_y, int num_z) { // into a scalar on a "device". Runs the bench for "iters" times. template static void ReduceToScalar(::testing::benchmark::State& state, - const string& device, const string& reduce, + const std::string& device, const std::string& reduce, int num_x, int num_y) { test::Benchmark(device, ToScalar(reduce, num_x, num_y), /*old_benchmark_api*/ false) @@ -97,8 +97,8 @@ static void ReduceToScalar(::testing::benchmark::State& state, } static void DoRowReduce(::testing::benchmark::State& state, - const string& device, const string& reduce, int num_x, - int num_y) { + const std::string& device, const std::string& reduce, + int num_x, int num_y) { test::Benchmark(device, RowReduce(reduce, num_x, num_y), /*old_benchmark_api*/ false) .Run(state); @@ -109,8 +109,8 @@ static void DoRowReduce(::testing::benchmark::State& state, } static void DoColReduce(::testing::benchmark::State& state, - const string& device, const string& reduce, int num_x, - int num_y) { + const std::string& device, const std::string& reduce, + int num_x, int num_y) { test::Benchmark(device, ColReduce(reduce, num_x, num_y), /*old_benchmark_api*/ false) .Run(state); @@ -121,8 +121,8 @@ static void DoColReduce(::testing::benchmark::State& state, } static void Do3DYReduce(::testing::benchmark::State& state, - const string& device, const string& reduce, int num_x, - int num_y) { + const std::string& device, const std::string& reduce, + int num_x, int num_y) { test::Benchmark(device, ThreeDYReduce(reduce, num_x, num_y), /*old_benchmark_api*/ false) .Run(state); @@ -133,8 +133,8 @@ static void Do3DYReduce(::testing::benchmark::State& state, } static void Do3DXZReduce(::testing::benchmark::State& state, - const string& device, const string& reduce, int num_x, - int num_y) { + const std::string& device, const std::string& reduce, + int num_x, int num_y) { test::Benchmark(device, ThreeDXZReduce(reduce, num_x, num_y), /*old_benchmark_api*/ false) .Run(state); diff --git a/tensorflow/core/kernels/reference_gemm.h b/tensorflow/core/kernels/reference_gemm.h index 9d0bb60ed436b4..e90656fd36b298 100644 --- a/tensorflow/core/kernels/reference_gemm.h +++ b/tensorflow/core/kernels/reference_gemm.h @@ -64,8 +64,8 @@ void ReferenceGemm(bool transpose_a, bool transpose_b, bool transpose_c, c_j_stride = 1; } - const int32_t highest = static_cast(Eigen::NumTraits::highest()); - const int32_t lowest = static_cast(Eigen::NumTraits::lowest()); + const int32_t highest = static_cast(Eigen::NumTraits::highest()); + const int32_t lowest = static_cast(Eigen::NumTraits::lowest()); const int32_t rounding = (shift_c < 1) ? 0 : (1 << (shift_c - 1)); int i, j, l; @@ -74,9 +74,9 @@ void ReferenceGemm(bool transpose_a, bool transpose_b, bool transpose_c, int32_t total = 0; for (l = 0; l < k; l++) { const size_t a_index = ((i * a_i_stride) + (l * a_l_stride)); - const int32_t a_value = static_cast(a[a_index]) - offset_a; + const int32_t a_value = static_cast(a[a_index]) - offset_a; const size_t b_index = ((j * b_j_stride) + (l * b_l_stride)); - const int32_t b_value = static_cast(b[b_index]) - offset_b; + const int32_t b_value = static_cast(b[b_index]) - offset_b; total += (a_value * b_value); } const size_t c_index = ((i * c_i_stride) + (j * c_j_stride)); diff --git a/tensorflow/core/kernels/regex_full_match_op.cc b/tensorflow/core/kernels/regex_full_match_op.cc index ddcc165cf5fd18..23be3bd76534fd 100644 --- a/tensorflow/core/kernels/regex_full_match_op.cc +++ b/tensorflow/core/kernels/regex_full_match_op.cc @@ -41,7 +41,7 @@ class RegexFullMatchOp : public OpKernel { OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(pattern_tensor->shape()), errors::InvalidArgument("Pattern must be scalar, but received ", pattern_tensor->shape().DebugString())); - const string pattern = pattern_tensor->flat()(0); + const std::string pattern = pattern_tensor->flat()(0); std::shared_ptr regex = CachedRE2(pattern); OP_REQUIRES(ctx, regex->ok(), errors::InvalidArgument("Invalid pattern: ", pattern, @@ -57,7 +57,7 @@ class RegexFullMatchOp : public OpKernel { } private: - std::shared_ptr CachedRE2(const string& pattern) { + std::shared_ptr CachedRE2(const std::string& pattern) { { tf_shared_lock l(mu_); if (regex_ != nullptr && regex_->pattern() == pattern) { @@ -88,7 +88,7 @@ REGISTER_KERNEL_BUILDER(Name("RegexFullMatch").Device(DEVICE_CPU), class StaticRegexFullMatchOp : public OpKernel { public: explicit StaticRegexFullMatchOp(OpKernelConstruction* ctx) : OpKernel(ctx) { - string pattern; + std::string pattern; OP_REQUIRES_OK(ctx, ctx->GetAttr("pattern", &pattern)); re_ = std::make_unique(pattern); OP_REQUIRES(ctx, re_->ok(), diff --git a/tensorflow/core/kernels/regex_replace_op_test.cc b/tensorflow/core/kernels/regex_replace_op_test.cc index 73979d41222f3b..41ee85d7e4b02b 100644 --- a/tensorflow/core/kernels/regex_replace_op_test.cc +++ b/tensorflow/core/kernels/regex_replace_op_test.cc @@ -67,8 +67,9 @@ Tensor GetTestTensor(int batch) { return t; } -Graph* SetupRegexReplaceGraph(const Tensor& input, const string& input_pattern, - const string& input_rewrite) { +Graph* SetupRegexReplaceGraph(const Tensor& input, + const std::string& input_pattern, + const std::string& input_rewrite) { Graph* g = new Graph(OpRegistry::Global()); Tensor pattern(DT_STRING, TensorShape({})); pattern.flat().setConstant(input_pattern); @@ -103,8 +104,8 @@ BENCHMARK(BM_RegexReplace) ->Arg(128) ->Arg(256); -Graph* SetupStaticGraph(const Tensor& input, const string& input_pattern, - const string& rewrite) { +Graph* SetupStaticGraph(const Tensor& input, const std::string& input_pattern, + const std::string& rewrite) { Graph* g = new Graph(OpRegistry::Global()); TF_CHECK_OK(NodeBuilder("static_regex_replace_op", "StaticRegexReplace") diff --git a/tensorflow/core/kernels/relu_op_gpu.cu.cc b/tensorflow/core/kernels/relu_op_gpu.cu.cc index a1bba19fc27506..6cfc5354f95419 100644 --- a/tensorflow/core/kernels/relu_op_gpu.cu.cc +++ b/tensorflow/core/kernels/relu_op_gpu.cu.cc @@ -44,10 +44,10 @@ namespace functor { __global__ void ReluGradHalfKernel(const Eigen::half* __restrict__ gradient, const Eigen::half* __restrict__ feature, Eigen::half* __restrict__ backprop, - int32 count) { - int32 half2_count = count >> 1; - int32 index = blockIdx.x * blockDim.x + threadIdx.x; - const int32 total_device_threads = gridDim.x * blockDim.x; + int32_t count) { + int32_t half2_count = count >> 1; + int32_t index = blockIdx.x * blockDim.x + threadIdx.x; + const int32_t total_device_threads = gridDim.x * blockDim.x; while (index < half2_count) { // The fast branch. @@ -97,9 +97,9 @@ __global__ void ReluGradHalfKernel(const Eigen::half* __restrict__ gradient, __global__ void ReluGradHalfKernelVector( const Eigen::half* __restrict__ gradient, const Eigen::half* __restrict__ feature, Eigen::half* __restrict__ backprop, - int32 count) { - int32 half8_count = count / VectorSizeElements; - int32 index = blockIdx.x * blockDim.x + threadIdx.x; + int32_t count) { + int32_t half8_count = count / VectorSizeElements; + int32_t index = blockIdx.x * blockDim.x + threadIdx.x; if (index < half8_count) { // Cast to xx_h8 for vector load and store. @@ -174,17 +174,17 @@ struct ReluGrad { auto backprop_ptr = reinterpret_cast(backprop.data()); bool aligned = gradient_ptr % 16 == 0 && feature_ptr % 16 == 0 && backprop_ptr % 16 == 0; - int32 count = gradient.size(); - constexpr int32 kThreadInBlock = 512; + int32_t count = gradient.size(); + constexpr int32_t kThreadInBlock = 512; if (count == 0) return; if (aligned) { - int32 half8_count = Eigen::divup(count, VectorSizeElements); - int32 kBlock = Eigen::divup(half8_count, kThreadInBlock); + int32_t half8_count = Eigen::divup(count, VectorSizeElements); + int32_t kBlock = Eigen::divup(half8_count, kThreadInBlock); TF_CHECK_OK(GpuLaunchKernel( ReluGradHalfKernelVector, kBlock, kThreadInBlock, 0, d.stream(), gradient.data(), feature.data(), backprop.data(), count)); } else { - int32 half2_count = Eigen::divup(count, 2); + int32_t half2_count = Eigen::divup(count, 2); GpuLaunchConfig config = GetGpuLaunchConfigFixedBlockSize( half2_count, d, ReluGradHalfKernel, 0, kThreadInBlock); TF_CHECK_OK(GpuLaunchKernel( @@ -195,8 +195,8 @@ struct ReluGrad { }; __global__ void Relu_int8x4_kernel(int vect_count, - const int32* __restrict__ input, - int32* __restrict__ output) { + const int32_t* __restrict__ input, + int32_t* __restrict__ output) { CUDA_1D_KERNEL_LOOP(index, vect_count) { #if GOOGLE_CUDA output[index] = __vmaxs4(input[index], 0); @@ -221,17 +221,17 @@ struct Relu { // 'output' should have the same size as 'input'. void operator()(const Device& d, typename TTypes::ConstTensor input, typename TTypes::Tensor output) { - int32 count = input.size(); + int32_t count = input.size(); if (count == 0) return; - int32 vect_count = Eigen::divup(count, 4); - constexpr int32 kThreadInBlock = 512; + int32_t vect_count = Eigen::divup(count, 4); + constexpr int32_t kThreadInBlock = 512; GpuLaunchConfig config = GetGpuLaunchConfigFixedBlockSize( vect_count, d, Relu_int8x4_kernel, 0, kThreadInBlock); TF_CHECK_OK(GpuLaunchKernel( Relu_int8x4_kernel, config.block_count, config.thread_per_block, 0, - d.stream(), vect_count, reinterpret_cast(input.data()), - reinterpret_cast(output.data()))); + d.stream(), vect_count, reinterpret_cast(input.data()), + reinterpret_cast(output.data()))); } }; diff --git a/tensorflow/core/kernels/reshape_op.h b/tensorflow/core/kernels/reshape_op.h index 554c142b9db87a..127381c00034f7 100644 --- a/tensorflow/core/kernels/reshape_op.h +++ b/tensorflow/core/kernels/reshape_op.h @@ -61,8 +61,8 @@ class ReshapeOp : public OpKernel { switch (sizes.dtype()) { case DT_INT32: OP_REQUIRES_OK(context, - ValidateSizes(sizes, &product, &unknown_index, - &shape, &sizes_has_zero_dim)); + ValidateSizes(sizes, &product, &unknown_index, + &shape, &sizes_has_zero_dim)); break; case DT_INT64: OP_REQUIRES_OK(context, @@ -145,7 +145,7 @@ class ReshapeOp : public OpKernel { *has_zero_dim = true; } else { if (MultiplyWithoutOverflow(shape->num_elements(), size) < 0) { - string msg; + std::string msg; for (int ii = 0; ii < num_dims; ++ii) { if (ii != 0) { absl::StrAppend(&msg, ", "); diff --git a/tensorflow/core/kernels/reshape_util_gpu.cu.cc b/tensorflow/core/kernels/reshape_util_gpu.cu.cc index 22f09a0ee92aa8..f3c48ef42c9ae4 100644 --- a/tensorflow/core/kernels/reshape_util_gpu.cu.cc +++ b/tensorflow/core/kernels/reshape_util_gpu.cu.cc @@ -36,7 +36,7 @@ __global__ void ReshapeSparseTensorKernel( GPU_1D_KERNEL_LOOP(sparse_index, nnz) { const Tindex* input_index = &input_indices[sparse_index * input_rank]; Tindex* output_index = &output_indices[sparse_index * output_rank]; - int64 dense_index = 0; // int64 to avoid overflow if Tindex is int32 + int64_t dense_index = 0; // int64 to avoid overflow if Tindex is int32 // Flatten input index from slowest- to fastest-changing dimension. for (int i = 0; i < input_rank; ++i) { dense_index = dense_index * input_shape[i] + input_index[i]; @@ -55,14 +55,14 @@ __global__ void ReshapeSparseTensorKernel( namespace functor { template <> -Status ReshapeSparseTensorFunctor::operator()( +absl::Status ReshapeSparseTensorFunctor::operator()( OpKernelContext* context, const TensorShape& input_shape, const TensorShape& output_shape, typename TTypes::ConstMatrix input_indices, typename TTypes::Matrix output_indices) const { - const int64 input_rank = input_shape.dims(); - const int64 output_rank = output_shape.dims(); - const int64 nnz = input_indices.dimension(0); + const int64_t input_rank = input_shape.dims(); + const int64_t output_rank = output_shape.dims(); + const int64_t nnz = input_indices.dimension(0); // We copy input_shape and output_shape to the GPU and then launch a kernel // to compute output_indices. Tensor input_shape_gpu_t; @@ -75,16 +75,16 @@ Status ReshapeSparseTensorFunctor::operator()( auto output_shape_gpu = output_shape_gpu_t.flat(); se::Stream* stream = context->op_device_context()->stream(); if (!stream) return errors::Internal("No GPU stream available."); - se::DeviceMemoryBase input_shape_gpu_mem(input_shape_gpu.data(), - input_rank * sizeof(int64)); + stream_executor::DeviceAddressBase input_shape_gpu_mem( + input_shape_gpu.data(), input_rank * sizeof(int64_t)); TF_RETURN_IF_ERROR(stream->Memcpy(&input_shape_gpu_mem, input_shape.dim_sizes().data(), - input_rank * sizeof(int64))); - se::DeviceMemoryBase output_shape_gpu_mem(output_shape_gpu.data(), - output_rank * sizeof(int64)); + input_rank * sizeof(int64_t))); + stream_executor::DeviceAddressBase output_shape_gpu_mem( + output_shape_gpu.data(), output_rank * sizeof(int64_t)); TF_RETURN_IF_ERROR(stream->Memcpy(&output_shape_gpu_mem, output_shape.dim_sizes().data(), - output_rank * sizeof(int64))); + output_rank * sizeof(int64_t))); const GPUDevice& device = context->template eigen_device(); auto config = GetGpuLaunchConfig(nnz, device); return GpuLaunchKernel(ReshapeSparseTensorKernel, config.block_count, diff --git a/tensorflow/core/kernels/resource_ops_test.cc b/tensorflow/core/kernels/resource_ops_test.cc index ffc2815d4201d3..43df25dc056eb5 100644 --- a/tensorflow/core/kernels/resource_ops_test.cc +++ b/tensorflow/core/kernels/resource_ops_test.cc @@ -42,7 +42,7 @@ class MockResource : public ResourceBase { *alive_ = false; } } - string DebugString() const override { return ""; } + std::string DebugString() const override { return ""; } bool* alive_; int payload_; }; @@ -103,7 +103,7 @@ TEST_F(MockHandleCreationOpTest, RefCounting) { // Feed and run AddInputFromArray(TensorShape({}), {reinterpret_cast(&alive)}); - AddInputFromArray(TensorShape({}), {payload}); + AddInputFromArray(TensorShape({}), {payload}); TF_ASSERT_OK(RunOpKernel()); EXPECT_TRUE(alive); diff --git a/tensorflow/core/kernels/resource_variable_ops.h b/tensorflow/core/kernels/resource_variable_ops.h index 1c8d79988a2457..53a52e6cda4303 100644 --- a/tensorflow/core/kernels/resource_variable_ops.h +++ b/tensorflow/core/kernels/resource_variable_ops.h @@ -32,9 +32,9 @@ class VarHandleOp : public OpKernel { private: // Same fields as in ResourceHandleOp. bool is_anonymous_; - string container_; - string name_; - string debug_name_; + std::string container_; + std::string name_; + std::string debug_name_; Tensor const_tensor_; DtypeAndPartialTensorShape dtype_and_shape_; diff --git a/tensorflow/core/kernels/restore_op_test.cc b/tensorflow/core/kernels/restore_op_test.cc index 15dacaf6d93c45..16bfd01ab4f335 100644 --- a/tensorflow/core/kernels/restore_op_test.cc +++ b/tensorflow/core/kernels/restore_op_test.cc @@ -62,8 +62,8 @@ Tensor MakeInput(const TensorShape& shape, } TEST_F(RestoreOpTest, RestoreSimple) { - const string filename = io::JoinPath(testing::TmpDir(), "tensor_simple"); - const std::vector tensor_names = { + const std::string filename = io::JoinPath(testing::TmpDir(), "tensor_simple"); + const std::vector tensor_names = { "tensor_bool", "tensor_int", "tensor_float", "tensor_double", "tensor_qint8", "tensor_qint32", "tensor_uint8", "tensor_int8", "tensor_int16", "tensor_int64", "tensor_string", "tensor_complex64", @@ -103,7 +103,7 @@ TEST_F(RestoreOpTest, RestoreSimple) { // Input #1 is the tensor names Tensor input_1 = MakeInput( TensorShape({static_cast(tensor_names.size())}), - [&tensor_names](int x) -> string { return tensor_names[x]; }); + [&tensor_names](int x) -> std::string { return tensor_names[x]; }); inputs.push_back({nullptr, &input_1}); // Input #2 is a 1-d bool tensor @@ -111,8 +111,8 @@ TEST_F(RestoreOpTest, RestoreSimple) { MakeInput(TensorShape({2}), [](int x) -> bool { return x != 0; }); inputs.push_back({nullptr, &input_2}); // Input #3 is a 1-d integer tensor - Tensor input_3 = MakeInput(TensorShape({10}), - [](int x) -> int32 { return x + 1; }); + Tensor input_3 = MakeInput(TensorShape({10}), + [](int x) -> int32_t { return x + 1; }); inputs.push_back({nullptr, &input_3}); // Input #4 is a 2-d float tensor Tensor input_4 = MakeInput(TensorShape({2, 4}), [](int x) -> float { @@ -136,24 +136,25 @@ TEST_F(RestoreOpTest, RestoreSimple) { }); inputs.push_back({nullptr, &input_7}); // Input #8 is a 1-d uint8 tensor - Tensor input_8 = MakeInput(TensorShape({11}), - [](int x) -> uint8 { return x + 1; }); + Tensor input_8 = MakeInput(TensorShape({11}), + [](int x) -> uint8_t { return x + 1; }); inputs.push_back({nullptr, &input_8}); // Input #9 is a 1-d int8 tensor - Tensor input_9 = - MakeInput(TensorShape({7}), [](int x) -> int8 { return x - 7; }); + Tensor input_9 = MakeInput(TensorShape({7}), + [](int x) -> int8_t { return x - 7; }); inputs.push_back({nullptr, &input_9}); // Input #10 is a 1-d int16 tensor - Tensor input_10 = MakeInput(TensorShape({7}), - [](int x) -> int16 { return x - 8; }); + Tensor input_10 = MakeInput( + TensorShape({7}), [](int x) -> int16_t { return x - 8; }); inputs.push_back({nullptr, &input_10}); // Input #11 is a 1-d int64 tensor - Tensor input_11 = MakeInput(TensorShape({9}), - [](int x) -> int64 { return x - 9; }); + Tensor input_11 = MakeInput( + TensorShape({9}), [](int x) -> int64_t { return x - 9; }); inputs.push_back({nullptr, &input_11}); // Input #12 is a 1-d string tensor Tensor input_12 = MakeInput( - TensorShape({2}), [](int x) -> string { return x ? "yes" : "no"; }); + TensorShape({2}), + [](int x) -> std::string { return x ? "yes" : "no"; }); inputs.push_back({nullptr, &input_12}); // Input #13 is a 1-d complex64 tensor Tensor input_13 = MakeInput( @@ -212,7 +213,7 @@ TEST_F(RestoreOpTest, RestoreSimple) { TensorShape expected({10}); EXPECT_TRUE(output->shape().IsSameSize(expected)); for (int i = 0; i < 10; ++i) { - EXPECT_EQ(i + 1, output->flat()(i)); + EXPECT_EQ(i + 1, output->flat()(i)); } } // The 2-d float tensor @@ -273,7 +274,7 @@ TEST_F(RestoreOpTest, RestoreSimple) { TensorShape expected({11}); EXPECT_TRUE(output->shape().IsSameSize(expected)); for (int i = 0; i < 11; ++i) { - EXPECT_EQ(i + 1, output->flat()(i)); + EXPECT_EQ(i + 1, output->flat()(i)); } } // The 1-d int8 tensor @@ -285,7 +286,7 @@ TEST_F(RestoreOpTest, RestoreSimple) { TensorShape expected({7}); EXPECT_TRUE(output->shape().IsSameSize(expected)); for (int i = 0; i < 7; ++i) { - EXPECT_EQ(i - 7, output->flat()(i)); + EXPECT_EQ(i - 7, output->flat()(i)); } } // The 1-d int16 tensor @@ -297,7 +298,7 @@ TEST_F(RestoreOpTest, RestoreSimple) { TensorShape expected({7}); EXPECT_TRUE(output->shape().IsSameSize(expected)); for (int i = 0; i < 7; ++i) { - EXPECT_EQ(i - 8, output->flat()(i)); + EXPECT_EQ(i - 8, output->flat()(i)); } } // The 1-d int64 tensor @@ -373,8 +374,8 @@ class RestoreSliceOpTest : public OpsTestBase { }; TEST_F(RestoreSliceOpTest, RestoreInt) { - const string filename = io::JoinPath(testing::TmpDir(), "tensor_int"); - const string tensor_name = "tensor_int"; + const std::string filename = io::JoinPath(testing::TmpDir(), "tensor_int"); + const std::string tensor_name = "tensor_int"; // We first need to write a tensor using the save_op { @@ -412,7 +413,7 @@ TEST_F(RestoreSliceOpTest, RestoreInt) { // Input #2 is a 4x16 integer tensor. Tensor input_2(DT_INT32, TensorShape({4, 16})); for (int64_t i = 0; i < input_2.NumElements(); ++i) { - input_2.flat()(i) = i + 1; + input_2.flat()(i) = i + 1; } inputs.push_back({nullptr, &input_2}); @@ -433,7 +434,7 @@ TEST_F(RestoreSliceOpTest, RestoreInt) { // Now we restore MakeRestoreSliceOp(DT_INT32); - string shape_and_slice = "4 16 0,2:-"; + std::string shape_and_slice = "4 16 0,2:-"; // Add a file name AddInput(TensorShape({}), [&filename](int x) -> tstring { return filename; }); @@ -452,7 +453,7 @@ TEST_F(RestoreSliceOpTest, RestoreInt) { TensorShape expected({2, 16}); EXPECT_TRUE(output->shape().IsSameSize(expected)); for (int64_t i = 0; i < expected.num_elements(); ++i) { - EXPECT_EQ(i + 1, output->flat()(i)); + EXPECT_EQ(i + 1, output->flat()(i)); } } diff --git a/tensorflow/core/kernels/restore_v2_op_test.cc b/tensorflow/core/kernels/restore_v2_op_test.cc index c102cc42e2063f..0a66a0f31d4366 100644 --- a/tensorflow/core/kernels/restore_v2_op_test.cc +++ b/tensorflow/core/kernels/restore_v2_op_test.cc @@ -61,9 +61,9 @@ class RestoreV2OpTest : public OpsTestBase { } void RunTest(absl::string_view save_op_to_use) { - const string filename = + const std::string filename = io::JoinPath(testing::TmpDir(), "tensor_simple-", save_op_to_use); - const std::vector tensor_names = { + const std::vector tensor_names = { "tensor_bool", "tensor_int", "tensor_float", "tensor_double", "tensor_qint8", "tensor_qint32", "tensor_uint8", "tensor_int8", "tensor_int16", "tensor_int64", "tensor_complex64", "tensor_half"}; @@ -114,12 +114,12 @@ class RestoreV2OpTest : public OpsTestBase { // Input #1 is the tensor names Tensor input_1 = MakeInput( TensorShape({static_cast(tensor_names.size())}), - [&tensor_names](int x) -> string { return tensor_names[x]; }); + [&tensor_names](int x) -> std::string { return tensor_names[x]; }); inputs.push_back({nullptr, &input_1}); Tensor shape_and_slices = MakeInput( TensorShape({static_cast(tensor_names.size())}), - [](int x) -> string { return "" /* saves in full */; }); + [](int x) -> std::string { return "" /* saves in full */; }); if (save_op_to_use != "Save") { inputs.push_back({nullptr, &shape_and_slices}); } @@ -129,8 +129,8 @@ class RestoreV2OpTest : public OpsTestBase { [](int x) -> bool { return x != 0; }); inputs.push_back({nullptr, &input_2}); // Input #3 is a 1-d integer tensor - Tensor input_3 = MakeInput(TensorShape({10}), - [](int x) -> int32 { return x + 1; }); + Tensor input_3 = MakeInput( + TensorShape({10}), [](int x) -> int32_t { return x + 1; }); inputs.push_back({nullptr, &input_3}); // Input #4 is a 2-d float tensor Tensor input_4 = MakeInput( @@ -154,20 +154,20 @@ class RestoreV2OpTest : public OpsTestBase { }); inputs.push_back({nullptr, &input_7}); // Input #8 is a 1-d uint8 tensor - Tensor input_8 = MakeInput(TensorShape({11}), - [](int x) -> uint8 { return x + 1; }); + Tensor input_8 = MakeInput( + TensorShape({11}), [](int x) -> uint8_t { return x + 1; }); inputs.push_back({nullptr, &input_8}); // Input #9 is a 1-d int8 tensor - Tensor input_9 = MakeInput(TensorShape({7}), - [](int x) -> int8 { return x - 7; }); + Tensor input_9 = MakeInput(TensorShape({7}), + [](int x) -> int8_t { return x - 7; }); inputs.push_back({nullptr, &input_9}); // Input #10 is a 1-d int16 tensor - Tensor input_10 = MakeInput(TensorShape({7}), - [](int x) -> int16 { return x - 8; }); + Tensor input_10 = MakeInput( + TensorShape({7}), [](int x) -> int16_t { return x - 8; }); inputs.push_back({nullptr, &input_10}); // Input #11 is a 1-d int64 tensor Tensor input_11 = MakeInput( - TensorShape({9}), [](int x) -> int64 { return x - 9; }); + TensorShape({9}), [](int x) -> int64_t { return x - 9; }); inputs.push_back({nullptr, &input_11}); // Input #12 is a 1-d complex64 tensor Tensor input_13 = MakeInput( @@ -222,7 +222,7 @@ class RestoreV2OpTest : public OpsTestBase { TensorShape expected({10}); EXPECT_TRUE(output->shape().IsSameSize(expected)); for (int i = 0; i < 10; ++i) { - EXPECT_EQ(i + 1, output->flat()(i)); + EXPECT_EQ(i + 1, output->flat()(i)); } } // The 2-d float tensor @@ -283,7 +283,7 @@ class RestoreV2OpTest : public OpsTestBase { TensorShape expected({11}); EXPECT_TRUE(output->shape().IsSameSize(expected)); for (int i = 0; i < 11; ++i) { - EXPECT_EQ(i + 1, output->flat()(i)); + EXPECT_EQ(i + 1, output->flat()(i)); } } // The 1-d int8 tensor @@ -295,7 +295,7 @@ class RestoreV2OpTest : public OpsTestBase { TensorShape expected({7}); EXPECT_TRUE(output->shape().IsSameSize(expected)); for (int i = 0; i < 7; ++i) { - EXPECT_EQ(i - 7, output->flat()(i)); + EXPECT_EQ(i - 7, output->flat()(i)); } } // The 1-d int16 tensor @@ -307,7 +307,7 @@ class RestoreV2OpTest : public OpsTestBase { TensorShape expected({7}); EXPECT_TRUE(output->shape().IsSameSize(expected)); for (int i = 0; i < 7; ++i) { - EXPECT_EQ(i - 8, output->flat()(i)); + EXPECT_EQ(i - 8, output->flat()(i)); } } // The 1-d int64 tensor diff --git a/tensorflow/core/kernels/reverse_op_test.cc b/tensorflow/core/kernels/reverse_op_test.cc index 09606abc6c61e6..632a5136db8280 100644 --- a/tensorflow/core/kernels/reverse_op_test.cc +++ b/tensorflow/core/kernels/reverse_op_test.cc @@ -115,17 +115,17 @@ class ReverseOpTest : public OpsTestBase { } }; -TEST_F(ReverseOpTest, Reverse_0_uint8) { Reverse_0(); } +TEST_F(ReverseOpTest, Reverse_0_uint8) { Reverse_0(); } -TEST_F(ReverseOpTest, Reverse_0_int8) { Reverse_0(); } +TEST_F(ReverseOpTest, Reverse_0_int8) { Reverse_0(); } -TEST_F(ReverseOpTest, Reverse_0_uint16) { Reverse_0(); } +TEST_F(ReverseOpTest, Reverse_0_uint16) { Reverse_0(); } -TEST_F(ReverseOpTest, Reverse_0_int16) { Reverse_0(); } +TEST_F(ReverseOpTest, Reverse_0_int16) { Reverse_0(); } TEST_F(ReverseOpTest, Reverse_0_float) { Reverse_0(); } -TEST_F(ReverseOpTest, Reverse_0_int32) { Reverse_0(); } +TEST_F(ReverseOpTest, Reverse_0_int32) { Reverse_0(); } TEST_F(ReverseOpTest, Reverse_0_int64) { Reverse_0(); } @@ -135,17 +135,17 @@ TEST_F(ReverseOpTest, Reverse_0_complex64) { Reverse_0(); } TEST_F(ReverseOpTest, Reverse_0_complex128) { Reverse_0(); } -TEST_F(ReverseOpTest, Reverse_234_uint8) { Reverse_234(); } +TEST_F(ReverseOpTest, Reverse_234_uint8) { Reverse_234(); } -TEST_F(ReverseOpTest, Reverse_234_int8) { Reverse_234(); } +TEST_F(ReverseOpTest, Reverse_234_int8) { Reverse_234(); } -TEST_F(ReverseOpTest, Reverse_234_uint16) { Reverse_234(); } +TEST_F(ReverseOpTest, Reverse_234_uint16) { Reverse_234(); } -TEST_F(ReverseOpTest, Reverse_234_int16) { Reverse_234(); } +TEST_F(ReverseOpTest, Reverse_234_int16) { Reverse_234(); } TEST_F(ReverseOpTest, Reverse_234_float) { Reverse_234(); } -TEST_F(ReverseOpTest, Reverse_234_int32) { Reverse_234(); } +TEST_F(ReverseOpTest, Reverse_234_int32) { Reverse_234(); } TEST_F(ReverseOpTest, Reverse_234_int64) { Reverse_234(); } @@ -155,17 +155,17 @@ TEST_F(ReverseOpTest, Reverse_234_complex64) { Reverse_234(); } TEST_F(ReverseOpTest, Reverse_234_complex128) { Reverse_234(); } -TEST_F(ReverseOpTest, Reverse_1234_uint8) { Reverse_1234(); } +TEST_F(ReverseOpTest, Reverse_1234_uint8) { Reverse_1234(); } -TEST_F(ReverseOpTest, Reverse_1234_int8) { Reverse_1234(); } +TEST_F(ReverseOpTest, Reverse_1234_int8) { Reverse_1234(); } -TEST_F(ReverseOpTest, Reverse_1234_uint16) { Reverse_1234(); } +TEST_F(ReverseOpTest, Reverse_1234_uint16) { Reverse_1234(); } -TEST_F(ReverseOpTest, Reverse_1234_int16) { Reverse_1234(); } +TEST_F(ReverseOpTest, Reverse_1234_int16) { Reverse_1234(); } TEST_F(ReverseOpTest, Reverse_1234_float) { Reverse_1234(); } -TEST_F(ReverseOpTest, Reverse_1234_int32) { Reverse_1234(); } +TEST_F(ReverseOpTest, Reverse_1234_int32) { Reverse_1234(); } TEST_F(ReverseOpTest, Reverse_1234_int64) { Reverse_1234(); } @@ -190,7 +190,7 @@ static Graph* Reverse(const TensorShape& shape, int reverse_axis) { Tensor data(DataTypeToEnum::value, shape); data.flat().setRandom(); Tensor axes(DT_INT32, TensorShape({1})); - axes.flat()(0) = reverse_axis; + axes.flat()(0) = reverse_axis; test::graph::Reverse(g, test::graph::Constant(g, data), test::graph::Constant(g, axes)); return g; @@ -229,8 +229,8 @@ void BM_ReverseRowsOf1Channel_1T_uint8(::testing::benchmark::State& state) { const int outer_dim = state.range(0); const int middle_dim = state.range(1); - RunReverseRowsBenchmark(state, outer_dim, middle_dim, - 1 /* intra_threads */, 1 /* channels */); + RunReverseRowsBenchmark(state, outer_dim, middle_dim, + 1 /* intra_threads */, 1 /* channels */); } BENCHMARK(BM_ReverseRowsOf1Channel_1T_uint8) @@ -257,8 +257,8 @@ void BM_ReverseRowsOf1Channel_4T_uint8(::testing::benchmark::State& state) { const int outer_dim = state.range(0); const int middle_dim = state.range(1); - RunReverseRowsBenchmark(state, outer_dim, middle_dim, - 4 /* intra_threads */, 1 /* channels */); + RunReverseRowsBenchmark(state, outer_dim, middle_dim, + 4 /* intra_threads */, 1 /* channels */); } BENCHMARK(BM_ReverseRowsOf1Channel_4T_uint8) @@ -286,8 +286,8 @@ void BM_ReverseRowsOf3Channels_1T_uint8(::testing::benchmark::State& state) { const int outer_dim = state.range(0); const int middle_dim = state.range(1); - RunReverseRowsBenchmark(state, outer_dim, middle_dim, - 1 /* intra_threads */, 3 /* channels */); + RunReverseRowsBenchmark(state, outer_dim, middle_dim, + 1 /* intra_threads */, 3 /* channels */); } BENCHMARK(BM_ReverseRowsOf3Channels_1T_uint8) @@ -316,8 +316,8 @@ void BM_ReverseRowsOf3Channels_4T_uint8(::testing::benchmark::State& state) { const int outer_dim = state.range(0); const int middle_dim = state.range(1); - RunReverseRowsBenchmark(state, outer_dim, middle_dim, - 4 /* intra_threads */, 3 /* channels */); + RunReverseRowsBenchmark(state, outer_dim, middle_dim, + 4 /* intra_threads */, 3 /* channels */); } BENCHMARK(BM_ReverseRowsOf3Channels_4T_uint8) ->UseRealTime() @@ -344,8 +344,8 @@ void BM_ReverseRowsOf4Channels_1T_uint8(::testing::benchmark::State& state) { const int outer_dim = state.range(0); const int middle_dim = state.range(1); - RunReverseRowsBenchmark(state, outer_dim, middle_dim, - 1 /* intra_threads */, 4 /* channels */); + RunReverseRowsBenchmark(state, outer_dim, middle_dim, + 1 /* intra_threads */, 4 /* channels */); } BENCHMARK(BM_ReverseRowsOf4Channels_1T_uint8) @@ -372,8 +372,8 @@ void BM_ReverseRowsOf4Channels_4T_uint8(::testing::benchmark::State& state) { const int outer_dim = state.range(0); const int middle_dim = state.range(1); - RunReverseRowsBenchmark(state, outer_dim, middle_dim, - 4 /* intra_threads */, 4 /* channels */); + RunReverseRowsBenchmark(state, outer_dim, middle_dim, + 4 /* intra_threads */, 4 /* channels */); } BENCHMARK(BM_ReverseRowsOf4Channels_4T_uint8) diff --git a/tensorflow/core/kernels/reverse_sequence_op.cc b/tensorflow/core/kernels/reverse_sequence_op.cc index 139520ece5e2a0..7d33356a169ccf 100644 --- a/tensorflow/core/kernels/reverse_sequence_op.cc +++ b/tensorflow/core/kernels/reverse_sequence_op.cc @@ -98,8 +98,8 @@ void CheckErrorsGPU(OpKernelContext* context, int batch_dim, int seq_dim) { } template <> -void CheckErrors(OpKernelContext* context, int batch_dim, - int seq_dim) { +void CheckErrors(OpKernelContext* context, int batch_dim, + int seq_dim) { CheckErrorsGPU(context, batch_dim, seq_dim); } @@ -164,8 +164,8 @@ class ReverseSequenceOp : public OpKernel { } private: - int32 batch_dim_; - int32 seq_dim_; + int32_t batch_dim_; + int32_t seq_dim_; ReverseSequenceOp(const ReverseSequenceOp&) = delete; void operator=(const ReverseSequenceOp&) = delete; diff --git a/tensorflow/core/kernels/reverse_sequence_op.h b/tensorflow/core/kernels/reverse_sequence_op.h index f25794f3a2ad39..7db47a4b8bbce3 100644 --- a/tensorflow/core/kernels/reverse_sequence_op.h +++ b/tensorflow/core/kernels/reverse_sequence_op.h @@ -49,8 +49,8 @@ class ReverseGenerator { private: typename TTypes::ConstTensor input_; - int32 batch_dim_; - int32 seq_dim_; + int32_t batch_dim_; + int32_t seq_dim_; typename TTypes::ConstVec seq_lengths_; }; diff --git a/tensorflow/core/kernels/roll_op_gpu.cu.cc b/tensorflow/core/kernels/roll_op_gpu.cu.cc index dca487fc060003..130bdd206b67fd 100644 --- a/tensorflow/core/kernels/roll_op_gpu.cu.cc +++ b/tensorflow/core/kernels/roll_op_gpu.cu.cc @@ -30,15 +30,15 @@ typedef Eigen::GpuDevice GPUDevice; namespace { template -__global__ void RollKernel(const int32 nthreads, const int32 num_dims, +__global__ void RollKernel(const int32_t nthreads, const int32_t num_dims, const T* __restrict__ input, T* __restrict__ output, - const int32* __restrict__ dim_size, - const int32* __restrict__ threshold, - const int64* __restrict__ dim_range) { + const int32_t* __restrict__ dim_size, + const int32_t* __restrict__ threshold, + const int64_t* __restrict__ dim_range) { CUDA_1D_KERNEL_LOOP(out_idx, nthreads) { - int64 offset = 0; + int64_t offset = 0; for (int i = 0; i < num_dims; i++) { - const int64 stride = dim_range[i] / dim_size[i]; + const int64_t stride = dim_range[i] / dim_size[i]; const int shift = dim_size[i] - threshold[i]; const int indx = (out_idx / stride) % dim_size[i]; const int shifted_indx = (indx + shift) % dim_size[i]; @@ -53,21 +53,22 @@ namespace functor { template struct Roll { - void operator()(const OpKernelContext* context, const int64 num_elements, - const int num_dims, const gtl::ArraySlice dim_size, + void operator()(const OpKernelContext* context, const int64_t num_elements, + const int num_dims, const absl::Span dim_size, const T* input, T* output, - const gtl::ArraySlice threshold, - const gtl::ArraySlice dim_range, const int64 isd) { + const absl::Span threshold, + const absl::Span dim_range, + const int64_t isd) { if (!num_elements) return; const GPUDevice& d = context->eigen_device(); - auto dim_bytes = sizeof(int32) * dim_size.size(); + auto dim_bytes = sizeof(int32_t) * dim_size.size(); auto dim_buf = d.allocate(dim_bytes); - auto thres_bytes = sizeof(int32) * threshold.size(); + auto thres_bytes = sizeof(int32_t) * threshold.size(); auto thres_buf = d.allocate(thres_bytes); - auto range_bytes = sizeof(int64) * dim_range.size(); + auto range_bytes = sizeof(int64_t) * dim_range.size(); auto range_buf = d.allocate(range_bytes); d.memcpyHostToDevice(dim_buf, dim_size.data(), dim_bytes); @@ -76,12 +77,12 @@ struct Roll { GpuLaunchConfig cfg = GetGpuLaunchConfig(num_elements, d); - TF_CHECK_OK(GpuLaunchKernel(RollKernel, cfg.block_count, - cfg.thread_per_block, 0, d.stream(), - cfg.virtual_thread_count, num_dims, input, - output, reinterpret_cast(dim_buf), - reinterpret_cast(thres_buf), - reinterpret_cast(range_buf))); + TF_CHECK_OK( + GpuLaunchKernel(RollKernel, cfg.block_count, cfg.thread_per_block, 0, + d.stream(), cfg.virtual_thread_count, num_dims, input, + output, reinterpret_cast(dim_buf), + reinterpret_cast(thres_buf), + reinterpret_cast(range_buf))); d.deallocate(dim_buf); d.deallocate(thres_buf); diff --git a/tensorflow/core/kernels/scan_ops.cc b/tensorflow/core/kernels/scan_ops.cc index 7e9054f997172d..f9dac8363f8f37 100644 --- a/tensorflow/core/kernels/scan_ops.cc +++ b/tensorflow/core/kernels/scan_ops.cc @@ -104,7 +104,7 @@ namespace functor { DECLARE(Eigen::internal::ProdReducer, T); TF_CALL_GPU_NUMBER_TYPES(DECLARE_FOR_ALL_REDUCERS); -DECLARE_FOR_ALL_REDUCERS(int32); +DECLARE_FOR_ALL_REDUCERS(int32_t); DECLARE_FOR_ALL_REDUCERS(int64_t); #undef DECLARE_FOR_ALL_REDUCERS @@ -151,7 +151,7 @@ TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS); .HostMemory("axis"), \ ScanOp, int64>) TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); -REGISTER_GPU_KERNELS(int32); +REGISTER_GPU_KERNELS(int32_t); REGISTER_GPU_KERNELS(int64_t); #undef REGISTER_GPU_KERNELS #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -190,7 +190,7 @@ TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS); .HostMemory("axis"), \ ScanOp, int64>) TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); -REGISTER_GPU_KERNELS(int32); +REGISTER_GPU_KERNELS(int32_t); REGISTER_GPU_KERNELS(int64_t); #undef REGISTER_GPU_KERNELS #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/tensorflow/core/kernels/scatter_functor_gpu.cu.h b/tensorflow/core/kernels/scatter_functor_gpu.cu.h index 61868b7853e400..e4f43d51b46075 100644 --- a/tensorflow/core/kernels/scatter_functor_gpu.cu.h +++ b/tensorflow/core/kernels/scatter_functor_gpu.cu.h @@ -97,7 +97,7 @@ __global__ void ScatterOpCustomKernel(T* __restrict__ params, // Ignore indices that are out of range. continue; } - int64 params_i = param_first_index * update_block + (i % update_block); + int64_t params_i = param_first_index * update_block + (i % update_block); body(¶ms[params_i], ldg(updates + updates_i)); } } diff --git a/tensorflow/core/kernels/scatter_nd_op.cc b/tensorflow/core/kernels/scatter_nd_op.cc index 7d61e1aa2f257e..d5e3b2ad9eb0a9 100644 --- a/tensorflow/core/kernels/scatter_nd_op.cc +++ b/tensorflow/core/kernels/scatter_nd_op.cc @@ -1040,10 +1040,10 @@ absl::Status DoScatterNdOnCpu(OpKernelContext* c, const Tensor& indices, // and the GPU implementation is not. Tensor inputs to this function must be on // the GPU. template -Status DoScatterNdOnCpu(OpKernelContext* c, const Tensor& indices, - const Tensor& updates, const TensorShape& shape, - Tensor* out, bool allocate, - BadIndicesPolicy bad_indices_policy) { +absl::Status DoScatterNdOnCpu(OpKernelContext* c, const Tensor& indices, + const Tensor& updates, const TensorShape& shape, + Tensor* out, bool allocate, + BadIndicesPolicy bad_indices_policy) { AllocatorAttributes alloc_attr; alloc_attr.set_on_host(true); alloc_attr.set_gpu_compatible(true); @@ -1053,7 +1053,7 @@ Status DoScatterNdOnCpu(OpKernelContext* c, const Tensor& indices, Tensor host_indices; TF_RETURN_IF_ERROR(c->allocate_temp(indices.dtype(), indices.shape(), &host_indices, alloc_attr)); - se::DeviceMemoryBase indices_ptr( + stream_executor::DeviceAddressBase indices_ptr( const_cast(indices).flat().data(), indices.flat().size() * sizeof(Index)); TF_RETURN_IF_ERROR(stream->Memcpy(host_indices.flat().data(), @@ -1063,7 +1063,7 @@ Status DoScatterNdOnCpu(OpKernelContext* c, const Tensor& indices, Tensor host_updates; TF_RETURN_IF_ERROR(c->allocate_temp(updates.dtype(), updates.shape(), &host_updates, alloc_attr)); - se::DeviceMemoryBase updates_ptr( + stream_executor::DeviceAddressBase updates_ptr( const_cast(updates).flat().data(), updates.flat().size() * sizeof(T)); TF_RETURN_IF_ERROR(stream->Memcpy(host_updates.flat().data(), updates_ptr, @@ -1078,8 +1078,8 @@ Status DoScatterNdOnCpu(OpKernelContext* c, const Tensor& indices, fill(c->eigen_device(), host_out.flat()); } else { CHECK_NOTNULL(out); // Crash OK - se::DeviceMemoryBase out_ptr(out->flat().data(), - out->flat().size() * sizeof(T)); + stream_executor::DeviceAddressBase out_ptr( + out->flat().data(), out->flat().size() * sizeof(T)); TF_RETURN_IF_ERROR(stream->Memcpy(host_out.flat().data(), out_ptr, host_out.NumElements() * sizeof(T))); } @@ -1090,13 +1090,13 @@ Status DoScatterNdOnCpu(OpKernelContext* c, const Tensor& indices, bad_indices_policy)); // Copy 'host_out' to device. - se::DeviceMemoryBase out_ptr(out->flat().data(), - out->flat().size() * sizeof(T)); + stream_executor::DeviceAddressBase out_ptr(out->flat().data(), + out->flat().size() * sizeof(T)); TF_RETURN_IF_ERROR(stream->Memcpy(&out_ptr, host_out.flat().data(), host_out.NumElements() * sizeof(T))); // Block host, since 'host_out' cannot be destructed until the copy is done. TF_RETURN_IF_ERROR(stream->BlockHostUntilDone()); - return OkStatus(); + return absl::OkStatus(); } #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc b/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc index fd1d4747c40982..ae2402b2a228e1 100644 --- a/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc +++ b/tensorflow/core/kernels/scatter_nd_op_gpu.cu.cc @@ -98,7 +98,7 @@ template __global__ void ScatterNdOpKernel( const Index* indices, const T* updates, T* out, const Eigen::array output_shape_prefix, - const Eigen::array batch_strides, const int64 num_indices, + const Eigen::array batch_strides, const int64_t num_indices, const Index slice_size) { auto update = LeftUpdate(); @@ -141,7 +141,7 @@ struct ScatterNdFunctor { const Eigen::DenseIndex batch_size = Tindices.dimension(0); // Index batch_strides[IXDIM]; - Eigen::array batch_strides; + Eigen::array batch_strides; if (IXDIM > 0) { batch_strides[IXDIM - 1] = 1; } diff --git a/tensorflow/core/kernels/searchsorted_op_gpu.cu.cc b/tensorflow/core/kernels/searchsorted_op_gpu.cu.cc index 67602db6164561..10448882a9296d 100644 --- a/tensorflow/core/kernels/searchsorted_op_gpu.cu.cc +++ b/tensorflow/core/kernels/searchsorted_op_gpu.cu.cc @@ -60,15 +60,16 @@ __global__ void LowerBoundKernel(const T* __restrict__ sorted_inputs, namespace functor { template struct UpperBoundFunctor { - static Status Compute(OpKernelContext* context, - const typename TTypes::ConstTensor& sorted_inputs, - const typename TTypes::ConstTensor& values, - int batch_size, int num_inputs, int num_values, - typename TTypes::Tensor* output) { + static absl::Status Compute( + OpKernelContext* context, + const typename TTypes::ConstTensor& sorted_inputs, + const typename TTypes::ConstTensor& values, int batch_size, + int num_inputs, int num_values, + typename TTypes::Tensor* output) { const GPUDevice& device = context->eigen_device(); if (values.size() == 0) { // GetGpuLaunchConfig requires work_element_count > 0 - return OkStatus(); + return absl::OkStatus(); } GpuLaunchConfig config = GetGpuLaunchConfig(values.size(), device); @@ -77,21 +78,22 @@ struct UpperBoundFunctor { config.thread_per_block, 0, device.stream(), sorted_inputs.data(), batch_size, num_inputs, num_values, values.data(), output->data())); - return OkStatus(); + return absl::OkStatus(); } }; template struct LowerBoundFunctor { - static Status Compute(OpKernelContext* context, - const typename TTypes::ConstTensor& sorted_inputs, - const typename TTypes::ConstTensor& values, - int batch_size, int num_inputs, int num_values, - typename TTypes::Tensor* output) { + static absl::Status Compute( + OpKernelContext* context, + const typename TTypes::ConstTensor& sorted_inputs, + const typename TTypes::ConstTensor& values, int batch_size, + int num_inputs, int num_values, + typename TTypes::Tensor* output) { const GPUDevice& device = context->eigen_device(); if (values.size() == 0) { // GetGpuLaunchConfig requires work_element_count > 0 - return OkStatus(); + return absl::OkStatus(); } GpuLaunchConfig config = GetGpuLaunchConfig(values.size(), device); @@ -100,7 +102,7 @@ struct LowerBoundFunctor { config.thread_per_block, 0, device.stream(), sorted_inputs.data(), batch_size, num_inputs, num_values, values.data(), output->data())); - return OkStatus(); + return absl::OkStatus(); } }; } // namespace functor diff --git a/tensorflow/core/kernels/segment_reduction_ops_gpu.cu.h b/tensorflow/core/kernels/segment_reduction_ops_gpu.cu.h index f0ba0ce2c27572..dc63e6c5602956 100644 --- a/tensorflow/core/kernels/segment_reduction_ops_gpu.cu.h +++ b/tensorflow/core/kernels/segment_reduction_ops_gpu.cu.h @@ -165,7 +165,7 @@ __global__ void SegmentMeanNormalizeKernel( } template -Status LaunchSegmentMeanNormalizeKernel( +absl::Status LaunchSegmentMeanNormalizeKernel( const GPUDevice& d, SegmentId nsegments, Index ninner, const Index* __restrict__ segment_offsets, // [nsegments + 1] T* __restrict__ output) { // [nsegments, ninner] @@ -195,7 +195,7 @@ __global__ void SegmentSetEmptyKernel( } template -Status LaunchSegmentSetEmptyKernel( +absl::Status LaunchSegmentSetEmptyKernel( const GPUDevice& d, SegmentId nsegments, Index ninner, const Index* __restrict__ segment_offsets, // [nsegments + 1] const T empty_value, @@ -263,7 +263,7 @@ __global__ void SegmentOffsetsKernel( // value at segment_offsets[nsegments] is set to the end index of the last valid // ID (e.g., nsegments if all IDs are valid). template -Status LaunchSegmentOffsetsKernel( +absl::Status LaunchSegmentOffsetsKernel( const GPUDevice& d, Toffsets size, Tsegmentids nsegments, const Tsegmentids* segment_ids, // [size] Toffsets* segment_offsets) { // [nsegments + 1] @@ -397,7 +397,7 @@ __global__ void SegmentReduceVectorKernel( template -Status LaunchSegmentReduceVectorKernel( +absl::Status LaunchSegmentReduceVectorKernel( const GPUDevice& d, Toffsets nouter, Toffsets ninner_vec, Tsegmentids nsegments, ReduceOp reduce_op, Tinit initial_value, Tinit empty_segment_value, bool is_mean, bool is_sqrtn, @@ -467,7 +467,7 @@ __global__ void SegmentReduceEpilogueKernel( // be a higher-precision type than the output type Tvec (e.g., float vs. half). template -Status LaunchSegmentReduceEpilogueKernel( +absl::Status LaunchSegmentReduceEpilogueKernel( const GPUDevice& d, Tsegmentids nsegments, Tinit empty_segment_value, bool is_mean, bool is_sqrtn, const Treducevec* output_raw, // [nsegments] @@ -542,7 +542,7 @@ MakeLookupAndScaleAndCastInputsIterator(const Tvec* input_vec, template -Status SegmentReduceGPUImplNoInnerDim( +absl::Status SegmentReduceGPUImplNoInnerDim( OpKernelContext* ctx, Toffsets nouter, Tsegmentids nsegments, ReduceOp reduce_op, Tinit initial_value, Tinit empty_segment_value, bool is_mean, bool is_sqrtn, @@ -568,7 +568,7 @@ Status SegmentReduceGPUImplNoInnerDim( TensorShape({static_cast(nsegments * sizeof(Treducevec))}), &output_raw)); output_raw_ptr = - reinterpret_cast(output_raw.flat().data()); + reinterpret_cast(output_raw.flat().data()); } auto input_iter = MakeLookupAndScaleAndCastInputsIterator( @@ -586,13 +586,13 @@ Status SegmentReduceGPUImplNoInnerDim( device, nsegments, empty_segment_value, is_mean, is_sqrtn, output_raw_ptr, segment_offsets, output_vec)); } - return OkStatus(); + return absl::OkStatus(); } template -Status SegmentReduceGPUImpl( +absl::Status SegmentReduceGPUImpl( OpKernelContext* ctx, Toffsets nouter, Toffsets ninner_vec, Tsegmentids nsegments, ReduceOp reduce_op, Tinit initial_value, Tinit empty_segment_value, bool is_mean, bool is_sqrtn, @@ -648,12 +648,13 @@ struct SegmentReduceGPUVectorized { struct Impl { template - Status operator()(OpKernelContext* ctx, Toffsets nouter, Toffsets ninner, - Tsegmentids nsegments, ReduceOp reduce_op, - T initial_value, T empty_segment_value, bool is_mean, - bool is_sqrtn, const T* input, - const Tsegmentids* segment_ids, const Tindices* indices, - const Tweights* weights, T* output) { + absl::Status operator()(OpKernelContext* ctx, Toffsets nouter, + Toffsets ninner, Tsegmentids nsegments, + ReduceOp reduce_op, T initial_value, + T empty_segment_value, bool is_mean, bool is_sqrtn, + const T* input, const Tsegmentids* segment_ids, + const Tindices* indices, const Tweights* weights, + T* output) { DCHECK_EQ(ninner % vec_size, 0); DCHECK_EQ(reinterpret_cast(input) % vec_size, 0); DCHECK_EQ(reinterpret_cast(output) % vec_size, 0); @@ -682,16 +683,16 @@ struct SegmentReduceGPUVectorized { // Note: Treduce is to allow reducing in higher precision than T. template -Status SegmentReduceGPU(OpKernelContext* ctx, Toffsets nouter, Toffsets ninner, - Tsegmentids nsegments, ReduceOp reduce_op, - T initial_value, T empty_segment_value, bool is_mean, - bool is_sqrtn, - const T* input, // [nouter or any, ninner] - const Tsegmentids* segment_ids, // [nouter] - const Tindices* indices, // [nouter] (optional) - const Tweights* weights, // [nouter or any] (optional) - T* output) { // [nsegments, ninner] - if (ninner == 0 || nsegments == 0) return OkStatus(); +absl::Status SegmentReduceGPU( + OpKernelContext* ctx, Toffsets nouter, Toffsets ninner, + Tsegmentids nsegments, ReduceOp reduce_op, T initial_value, + T empty_segment_value, bool is_mean, bool is_sqrtn, + const T* input, // [nouter or any, ninner] + const Tsegmentids* segment_ids, // [nouter] + const Tindices* indices, // [nouter] (optional) + const Tweights* weights, // [nouter or any] (optional) + T* output) { // [nsegments, ninner] + if (ninner == 0 || nsegments == 0) return absl::OkStatus(); return DispatchToVectorized< T, SegmentReduceGPUVectorized::template Impl>( MinAlignmentOf(input, output, ninner), ctx, nouter, ninner, nsegments, @@ -716,7 +717,7 @@ __global__ void SegmentWeightsKernel( } template -Status LaunchSegmentWeightsKernel( +absl::Status LaunchSegmentWeightsKernel( const GPUDevice& d, SegmentId nsegments, SparseSegmentReductionOperation operation, const Index* segment_offsets, // [nsegments + 1] @@ -945,7 +946,7 @@ struct UnsortedSegmentFunctor { }; template -Status SparseSegmentReductionFunctor::operator()( +absl::Status SparseSegmentReductionFunctor::operator()( OpKernelContext* context, bool is_mean, bool is_sqrtn, T default_value, typename TTypes::ConstTensor input, typename TTypes::ConstVec indices, @@ -1087,7 +1088,7 @@ __global__ void ScatterUniqueIndicesKernel( template -Status LaunchScatterUniqueIndicesKernel( +absl::Status LaunchScatterUniqueIndicesKernel( const GPUDevice& d, Toffsets nouter, EdgeIndicatorIter sorted_indices_edge_indicator, // [nouter] const TindicesCompact* __restrict__ sorted_indices, // [nouter] @@ -1122,7 +1123,7 @@ struct SparseSegmentGradV2Functor { const int64_t nouter64 = indices_vec.dimension(0); // Note: nouter and ninner are not expected to be huge, so we use int32 to // save memory bandwidth. - using Toffsets = int32; + using Toffsets = int32_t; OP_REQUIRES_ASYNC(context, nouter64 <= std::numeric_limits::max(), absl::InvalidArgumentError( absl::StrCat("Indices vector of length ", nouter64, @@ -1140,7 +1141,7 @@ struct SparseSegmentGradV2Functor { // worth it because the vector is used multiple times). // Note that we can currently assume int32 is safe because the op's dense // output_dim0 input is always int32. - using TindicesCompact = int32; + using TindicesCompact = int32_t; Tensor tmp_indices_internal; const TindicesCompact* indices_internal_ptr; if constexpr (std::is_same::value) { @@ -1163,9 +1164,9 @@ struct SparseSegmentGradV2Functor { context, operation, nouter, ninner, nsegments, input_flat.data(), tmp_indices_internal, indices_internal_ptr, segment_vec, dense_output_shape, done); - } else if (sizeof(Tsegmentids) > sizeof(int32) && - nsegments <= std::numeric_limits::max()) { - CastSegmentIdsThenImpl( + } else if (sizeof(Tsegmentids) > sizeof(int32_t) && + nsegments <= std::numeric_limits::max()) { + CastSegmentIdsThenImpl( context, operation, nouter, ninner, nsegments, input_flat.data(), tmp_indices_internal, indices_internal_ptr, segment_vec, dense_output_shape, done); @@ -1295,12 +1296,13 @@ struct SparseSegmentGradV2Functor { ScratchSpace last_idx_host(context, 1, /*on_host=*/true); OP_REQUIRES_OK_ASYNC( context, - stream->Memcpy(last_idx_host.mutable_data(), - se::DeviceMemoryBase(const_cast( - sorted_indices_unique_ids_ptr) + - (nouter - 1), - sizeof(*last_idx_host.data())), - sizeof(*last_idx_host.data())), + stream->Memcpy( + last_idx_host.mutable_data(), + stream_executor::DeviceAddressBase( + const_cast(sorted_indices_unique_ids_ptr) + + (nouter - 1), + sizeof(*last_idx_host.data())), + sizeof(*last_idx_host.data())), done); auto async_finish_computation = diff --git a/tensorflow/core/kernels/spacetobatch_functor_gpu.cu.cc b/tensorflow/core/kernels/spacetobatch_functor_gpu.cu.cc index bdeb782dc47e49..4bc9c22b33bb00 100644 --- a/tensorflow/core/kernels/spacetobatch_functor_gpu.cu.cc +++ b/tensorflow/core/kernels/spacetobatch_functor_gpu.cu.cc @@ -31,11 +31,11 @@ typedef Eigen::GpuDevice GPUDevice; // GPU kernel. template struct S2BParameters { - int32 space_tensor_batch; - int32 batch_tensor_shape[NUM_BLOCK_DIMS + 2]; - int32 space_tensor_spatial_shape[NUM_BLOCK_DIMS]; - int32 pad_start[NUM_BLOCK_DIMS]; - int32 block_shape[NUM_BLOCK_DIMS]; + int32_t space_tensor_batch; + int32_t batch_tensor_shape[NUM_BLOCK_DIMS + 2]; + int32_t space_tensor_spatial_shape[NUM_BLOCK_DIMS]; + int32_t pad_start[NUM_BLOCK_DIMS]; + int32_t block_shape[NUM_BLOCK_DIMS]; }; // GPU kernel for space-to-batch (if B2S = false) and batch-to-space conversion @@ -44,13 +44,13 @@ struct S2BParameters { // To simplify template implementation given lack of constexpr if, both the // input and output pointers are non-const. template -__global__ void S2B(const int32 nthreads, T* __restrict__ space_tensor_ptr, +__global__ void S2B(const int32_t nthreads, T* __restrict__ space_tensor_ptr, S2BParameters args, T* __restrict__ batch_tensor_ptr) { GPU_1D_KERNEL_LOOP(batch_tensor_idx, nthreads) { - int32 remaining_batch_tensor_idx = batch_tensor_idx; + int32_t remaining_batch_tensor_idx = batch_tensor_idx; - int32 batch_tensor_pos[NUM_BLOCK_DIMS + 2]; + int32_t batch_tensor_pos[NUM_BLOCK_DIMS + 2]; for (int dim = NUM_BLOCK_DIMS + 1; dim >= 1; --dim) { batch_tensor_pos[dim] = @@ -59,17 +59,17 @@ __global__ void S2B(const int32 nthreads, T* __restrict__ space_tensor_ptr, } batch_tensor_pos[0] = remaining_batch_tensor_idx; - int32 remaining_block_idx = batch_tensor_pos[0] / args.space_tensor_batch; - int32 space_tensor_idx = batch_tensor_pos[NUM_BLOCK_DIMS + 1]; - int32 space_tensor_stride = args.batch_tensor_shape[NUM_BLOCK_DIMS + 1]; - const int32 space_tensor_batch_pos = + int32_t remaining_block_idx = batch_tensor_pos[0] / args.space_tensor_batch; + int32_t space_tensor_idx = batch_tensor_pos[NUM_BLOCK_DIMS + 1]; + int32_t space_tensor_stride = args.batch_tensor_shape[NUM_BLOCK_DIMS + 1]; + const int32_t space_tensor_batch_pos = batch_tensor_pos[0] % args.space_tensor_batch; for (int block_dim = NUM_BLOCK_DIMS - 1; block_dim >= 0; --block_dim) { - int32 offset = remaining_block_idx; + int32_t offset = remaining_block_idx; if (block_dim > 0) { offset %= args.block_shape[block_dim]; } - int32 space_tensor_pos = + int32_t space_tensor_pos = batch_tensor_pos[block_dim + 1] * args.block_shape[block_dim] + offset - args.pad_start[block_dim]; if (space_tensor_pos < 0 || @@ -102,45 +102,45 @@ template struct SpaceToBatchFunctor { using SpaceT = typename std::conditional::type; using BatchT = typename std::conditional::type; - Status operator()( + absl::Status operator()( const GPUDevice& d, typename TTypes::Tensor space_tensor, - const int64 block_shape[NUM_BLOCK_DIMS], - const int64 paddings[NUM_BLOCK_DIMS * 2], + const int64_t block_shape[NUM_BLOCK_DIMS], + const int64_t paddings[NUM_BLOCK_DIMS * 2], typename TTypes::Tensor batch_tensor) { // Kernel execution fails if number of elements is zero. if (batch_tensor.size() == 0) { - return OkStatus(); + return absl::OkStatus(); } S2BParameters args; args.space_tensor_batch = space_tensor.dimension(0); for (int block_dim = 0; block_dim < NUM_BLOCK_DIMS; ++block_dim) { - if (block_shape[block_dim] > std::numeric_limits::max()) { + if (block_shape[block_dim] > std::numeric_limits::max()) { return errors::InvalidArgument("block_shape value exceeds 2^32-1"); } args.block_shape[block_dim] = block_shape[block_dim]; if (space_tensor.dimension(block_dim + 1) > - std::numeric_limits::max()) { + std::numeric_limits::max()) { return errors::InvalidArgument("space_tensor dimension exceeds 2^32-1"); } args.space_tensor_spatial_shape[block_dim] = space_tensor.dimension(block_dim + 1); - if (paddings[block_dim * 2] > std::numeric_limits::max()) { + if (paddings[block_dim * 2] > std::numeric_limits::max()) { return errors::InvalidArgument("paddings/crops value exceeds 2^32-1"); } args.pad_start[block_dim] = paddings[block_dim * 2]; } - int64 total_count = 1; + int64_t total_count = 1; for (int dim = 0; dim < NUM_BLOCK_DIMS + 2; ++dim) { args.batch_tensor_shape[dim] = batch_tensor.dimension(dim); total_count *= args.batch_tensor_shape[dim]; } - if (total_count > std::numeric_limits::max()) { + if (total_count > std::numeric_limits::max()) { return errors::InvalidArgument( "number of batch_tensor elements exceeds 2^32-1"); } GpuLaunchConfig config = - GetGpuLaunchConfig(static_cast(total_count), d); + GetGpuLaunchConfig(static_cast(total_count), d); return GpuLaunchKernel(S2B, config.block_count, config.thread_per_block, 0, d.stream(), config.virtual_thread_count, diff --git a/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc b/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc index 8bb9474ca9b524..97acca5442890d 100644 --- a/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc +++ b/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc @@ -29,7 +29,7 @@ typedef Eigen::GpuDevice GPUDevice; // Space2Depth kernel for FORMAT_NHWC. // See 'spacetodepth_op.h' for a more detailed description. template -__global__ void S2D_NHWC(const int32 nthreads, +__global__ void S2D_NHWC(const int32_t nthreads, const dtype* __restrict__ input_ptr, const int block_size, const int batch_size, const int input_height, const int input_width, @@ -61,7 +61,7 @@ __global__ void S2D_NHWC(const int32 nthreads, // Space2Depth kernel for FORMAT_NCHW. // See 'spacetodepth_op.h' for a more detailed description. template -__global__ void S2D_NCHW(const int32 nthreads, +__global__ void S2D_NCHW(const int32_t nthreads, const dtype* __restrict__ input_ptr, const int block_size, const int output_width, const int input_depth_by_output_height, @@ -99,7 +99,7 @@ __global__ void S2D_NCHW(const int32 nthreads, // Space2Depth kernel for FORMAT_NCHW using a loop over block area. // See 'spacetodepth_op.h' for functional specification. template -__global__ void S2D_NCHW_LOOP(const int32 nthreads, +__global__ void S2D_NCHW_LOOP(const int32_t nthreads, const dtype* __restrict__ input, const int output_width, const int input_width, const int input_depth_by_output_area, diff --git a/tensorflow/core/kernels/sparse/add_op.cc b/tensorflow/core/kernels/sparse/add_op.cc index c454241c1574c2..ef440aa870dfe3 100644 --- a/tensorflow/core/kernels/sparse/add_op.cc +++ b/tensorflow/core/kernels/sparse/add_op.cc @@ -93,19 +93,19 @@ class CSRSparseMatrixAddFunctor { Tensor c_batch_ptr_t(cpu_allocator(), DT_INT32, TensorShape({batch_size + 1})); - auto c_batch_ptr = c_batch_ptr_t.vec(); + auto c_batch_ptr = c_batch_ptr_t.vec(); c_batch_ptr(0) = 0; Tensor c_row_ptr_t; TF_RETURN_IF_ERROR(ctx_->allocate_temp( DT_INT32, TensorShape({batch_size * (rows + 1)}), &c_row_ptr_t)); - auto c_row_ptr = c_row_ptr_t.vec(); + auto c_row_ptr = c_row_ptr_t.vec(); // Set the output row pointers to zero, in case we hit any empty // combinations of rows in a and b. - functor::SetZeroFunctor set_zero; + functor::SetZeroFunctor set_zero; const Device& d = ctx_->eigen_device(); - set_zero(d, c_row_ptr_t.flat()); + set_zero(d, c_row_ptr_t.flat()); size_t maxWorkspaceSize = 0; for (int i = 0; i < batch_size; ++i) { @@ -125,7 +125,7 @@ class CSRSparseMatrixAddFunctor { Tensor temp; TF_RETURN_IF_ERROR(ctx_->allocate_temp( DT_INT8, TensorShape({static_cast(maxWorkspaceSize)}), &temp)); - void* workspace = temp.flat().data(); + void* workspace = temp.flat().data(); for (int i = 0; i < batch_size; ++i) { // Calculate output sizes for all minibatch entries. @@ -138,8 +138,8 @@ class CSRSparseMatrixAddFunctor { a.values_vec(i), a_dense_shape}; ConstCSRComponent b_comp{b.row_pointers_vec(i), b.col_indices_vec(i), b.values_vec(i), b_dense_shape}; - TTypes::UnalignedVec c_row_ptr_i(&c_row_ptr(i * (rows + 1)), - rows + 1); + TTypes::UnalignedVec c_row_ptr_i(&c_row_ptr(i * (rows + 1)), + rows + 1); int c_nnz_i; TF_RETURN_IF_ERROR(csr_geam.GetOutputStructure( a_comp, b_comp, c_row_ptr_i, &c_nnz_i, workspace)); @@ -281,17 +281,18 @@ struct CSRSparseMatrixAdd beta_(beta), initialized_(false) {} - Status Initialize() { + absl::Status Initialize() { TF_RETURN_IF_ERROR(cuda_sparse_.Initialize()); TF_RETURN_IF_ERROR(descrA_.Initialize()); TF_RETURN_IF_ERROR(descrB_.Initialize()); TF_RETURN_IF_ERROR(descrC_.Initialize()); initialized_ = true; - return OkStatus(); + return absl::OkStatus(); } - Status GetWorkspaceSize(const ConstCSRComponent& a, - const ConstCSRComponent& b, size_t* bufferSize) { + absl::Status GetWorkspaceSize(const ConstCSRComponent& a, + const ConstCSRComponent& b, + size_t* bufferSize) { DCHECK(initialized_); const int m = a.row_ptr.size() - 1; @@ -313,13 +314,13 @@ struct CSRSparseMatrixAdd b.row_ptr.data(), b.col_ind.data(), descrC_.descr(), null_T, null_int, null_int, bufferSize)); - return OkStatus(); + return absl::OkStatus(); } - Status GetOutputStructure(const ConstCSRComponent& a, - const ConstCSRComponent& b, - TTypes::UnalignedVec c_row_ptr, - int* output_nnz, void* workspace) { + absl::Status GetOutputStructure(const ConstCSRComponent& a, + const ConstCSRComponent& b, + TTypes::UnalignedVec c_row_ptr, + int* output_nnz, void* workspace) { DCHECK(initialized_); const int m = a.row_ptr.size() - 1; @@ -343,11 +344,12 @@ struct CSRSparseMatrixAdd return errors::Internal( "CSRAdd: CsrgeamNnz returned nnzTotalDevHostPtr < 0: ", *output_nnz); } - return OkStatus(); + return absl::OkStatus(); } - Status Compute(const ConstCSRComponent& a, const ConstCSRComponent& b, - CSRComponent* c, void* workspace) { + absl::Status Compute(const ConstCSRComponent& a, + const ConstCSRComponent& b, CSRComponent* c, + void* workspace) { DCHECK(initialized_); const int m = a.row_ptr.size() - 1; @@ -368,7 +370,7 @@ struct CSRSparseMatrixAdd b.row_ptr.data(), b.col_ind.data(), descrC_.descr(), c->values.data(), c->row_ptr.data(), c->col_ind.data(), workspace)); - return OkStatus(); + return absl::OkStatus(); } private: diff --git a/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_dense_op.cc b/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_dense_op.cc index 2991f7bad9af89..311469571aaf9f 100644 --- a/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_dense_op.cc +++ b/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_dense_op.cc @@ -73,9 +73,9 @@ class CSRSparseMatrixToDenseCPUOp : public OpKernel { const int64_t num_rows = dense_shape((rank == 2) ? 0 : 1); const int64_t num_cols = dense_shape((rank == 2) ? 1 : 2); - auto batch_ptrs = csr_sparse_matrix->batch_pointers().vec(); - auto row_ptr = csr_sparse_matrix->row_pointers().vec(); - auto col_ind = csr_sparse_matrix->col_indices().vec(); + auto batch_ptrs = csr_sparse_matrix->batch_pointers().vec(); + auto row_ptr = csr_sparse_matrix->row_pointers().vec(); + auto col_ind = csr_sparse_matrix->col_indices().vec(); auto values = csr_sparse_matrix->values().vec(); TensorShape dense_tensor_shape; @@ -159,14 +159,14 @@ class CSRSparseMatrixToDenseGPUOp : public OpKernel { functor::CSRSparseMatrixToCOOSparseMatrix csr_to_coo; auto indices = indices_t.matrix(); - auto csr_row_ptr = csr_sparse_matrix->row_pointers().vec(); - auto coo_col_ind = csr_sparse_matrix->col_indices().vec(); - auto batch_ptrs = csr_sparse_matrix->batch_pointers().vec(); + auto csr_row_ptr = csr_sparse_matrix->row_pointers().vec(); + auto coo_col_ind = csr_sparse_matrix->col_indices().vec(); + auto batch_ptrs = csr_sparse_matrix->batch_pointers().vec(); Tensor coo_row_ind_t; OP_REQUIRES_OK(c, c->allocate_temp(DT_INT32, TensorShape({total_nnz}), &coo_row_ind_t)); - auto coo_row_ind = coo_row_ind_t.vec(); + auto coo_row_ind = coo_row_ind_t.vec(); // TODO(ebrevdo): just write a custom kernel that converts from // csr to dense. @@ -176,9 +176,9 @@ class CSRSparseMatrixToDenseGPUOp : public OpKernel { // No copying required. Avoid failure case below. continue; } - const TTypes::UnalignedConstVec csr_row_ptr_i( + const TTypes::UnalignedConstVec csr_row_ptr_i( &csr_row_ptr((rows + 1) * i), rows + 1); - const TTypes::UnalignedVec coo_row_ind_i( + const TTypes::UnalignedVec coo_row_ind_i( &coo_row_ind(csr_sparse_matrix->batch_offset(i)), nnz_i); OP_REQUIRES_OK(c, csr_to_coo(c, csr_row_ptr_i, coo_row_ind_i)); } @@ -237,20 +237,20 @@ REGISTER_GPU(complex128) namespace functor { template <> struct COOSparseMatrixToSparseTensor { - Status operator()(OpKernelContext* ctx, - TTypes::ConstVec host_dense_shape, - TTypes::ConstVec host_batch_ptrs, - TTypes::Vec coo_row_ind, - TTypes::ConstVec coo_col_ind, - TTypes::Matrix indices); + absl::Status operator()(OpKernelContext* ctx, + TTypes::ConstVec host_dense_shape, + TTypes::ConstVec host_batch_ptrs, + TTypes::Vec coo_row_ind, + TTypes::ConstVec coo_col_ind, + TTypes::Matrix indices); }; extern template struct COOSparseMatrixToSparseTensor; template <> struct CSRSparseMatrixToCOOSparseMatrix { - Status operator()(OpKernelContext* c, - TTypes::UnalignedVec csr_row_ptr, - TTypes::UnalignedVec coo_row_ind); + absl::Status operator()(OpKernelContext* c, + TTypes::UnalignedVec csr_row_ptr, + TTypes::UnalignedVec coo_row_ind); }; extern template struct CSRSparseMatrixToCOOSparseMatrix; diff --git a/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_sparse_tensor_op.cc b/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_sparse_tensor_op.cc index 403af12bb8fb52..07448230f398fb 100644 --- a/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_sparse_tensor_op.cc +++ b/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_sparse_tensor_op.cc @@ -91,9 +91,9 @@ class CSRSparseMatrixToSparseTensorCPUOp : public OpKernel { c, c->allocate_output(0, TensorShape({total_nnz, rank}), &indices)); auto indices_flat = indices->template flat(); - auto csr_row_ptr = csr_sparse_matrix->row_pointers().vec(); - auto csr_col_ind = csr_sparse_matrix->col_indices().vec(); - auto batch_ptrs = csr_sparse_matrix->batch_pointers().vec(); + auto csr_row_ptr = csr_sparse_matrix->row_pointers().vec(); + auto csr_col_ind = csr_sparse_matrix->col_indices().vec(); + auto batch_ptrs = csr_sparse_matrix->batch_pointers().vec(); // Process the individual batches in parallel using a threadpool. auto shard = [&](int64_t batch_begin, int64_t batch_end) { @@ -165,14 +165,14 @@ class CSRSparseMatrixToSparseTensorGPUOp : public OpKernel { functor::CSRSparseMatrixToCOOSparseMatrix csr_to_coo; auto indices = indices_t->matrix(); - auto csr_row_ptr = csr_sparse_matrix->row_pointers().vec(); - auto coo_col_ind = csr_sparse_matrix->col_indices().vec(); - auto batch_ptrs = csr_sparse_matrix->batch_pointers().vec(); + auto csr_row_ptr = csr_sparse_matrix->row_pointers().vec(); + auto coo_col_ind = csr_sparse_matrix->col_indices().vec(); + auto batch_ptrs = csr_sparse_matrix->batch_pointers().vec(); Tensor coo_row_ind_t; OP_REQUIRES_OK(c, c->allocate_temp(DT_INT32, TensorShape({total_nnz}), &coo_row_ind_t)); - auto coo_row_ind = coo_row_ind_t.vec(); + auto coo_row_ind = coo_row_ind_t.vec(); // TODO(ebrevdo): Convert to one or two single kernel calls, // where the kernels are batch-friendly. @@ -182,9 +182,9 @@ class CSRSparseMatrixToSparseTensorGPUOp : public OpKernel { // No copying required. Avoid failure case below. continue; } - const TTypes::UnalignedConstVec csr_row_ptr_i( + const TTypes::UnalignedConstVec csr_row_ptr_i( &csr_row_ptr((rows + 1) * i), rows + 1); - const TTypes::UnalignedVec coo_row_ind_i( + const TTypes::UnalignedVec coo_row_ind_i( &coo_row_ind(csr_sparse_matrix->batch_offset(i)), nnz_i); OP_REQUIRES_OK(c, csr_to_coo(c, csr_row_ptr_i, coo_row_ind_i)); } @@ -222,20 +222,20 @@ REGISTER_GPU(complex128) namespace functor { template <> struct COOSparseMatrixToSparseTensor { - Status operator()(OpKernelContext* ctx, - TTypes::ConstVec host_dense_shape, - TTypes::ConstVec host_batch_ptrs, - TTypes::Vec coo_row_ind, - TTypes::ConstVec coo_col_ind, - TTypes::Matrix indices); + absl::Status operator()(OpKernelContext* ctx, + TTypes::ConstVec host_dense_shape, + TTypes::ConstVec host_batch_ptrs, + TTypes::Vec coo_row_ind, + TTypes::ConstVec coo_col_ind, + TTypes::Matrix indices); }; extern template struct COOSparseMatrixToSparseTensor; template <> struct CSRSparseMatrixToCOOSparseMatrix { - Status operator()(OpKernelContext* c, - TTypes::UnalignedVec csr_row_ptr, - TTypes::UnalignedVec coo_row_ind); + absl::Status operator()(OpKernelContext* c, + TTypes::UnalignedVec csr_row_ptr, + TTypes::UnalignedVec coo_row_ind); }; extern template struct CSRSparseMatrixToCOOSparseMatrix; diff --git a/tensorflow/core/kernels/sparse/dense_to_csr_sparse_matrix_op.cc b/tensorflow/core/kernels/sparse/dense_to_csr_sparse_matrix_op.cc index 6e635d140ad7df..eda72f21e674f9 100644 --- a/tensorflow/core/kernels/sparse/dense_to_csr_sparse_matrix_op.cc +++ b/tensorflow/core/kernels/sparse/dense_to_csr_sparse_matrix_op.cc @@ -99,15 +99,16 @@ class DenseToCSRSparseMatrixCPUOp : public OpKernel { TensorShape({(num_rows + 1) * batch_size})); // Fill the row pointers with zeros. - functor::SetZeroFunctor set_zero; - set_zero(ctx->eigen_device(), csr_row_ptr.flat()); + functor::SetZeroFunctor set_zero; + set_zero(ctx->eigen_device(), csr_row_ptr.flat()); // Convert from COO to CSR format. functor::SparseTensorToCSRSparseMatrixCPUFunctor coo_to_csr; OP_REQUIRES_OK( - ctx, coo_to_csr(batch_size, num_rows, num_cols, - indices.matrix(), batch_ptr.vec(), - csr_row_ptr.vec(), csr_col_ind.vec())); + ctx, + coo_to_csr(batch_size, num_rows, num_cols, indices.matrix(), + batch_ptr.vec(), csr_row_ptr.vec(), + csr_col_ind.vec())); CSRSparseMatrix output_csr_matrix; OP_REQUIRES_OK(ctx, CSRSparseMatrix::CreateCSRSparseMatrix( @@ -173,7 +174,7 @@ class DenseToCSRSparseMatrixGPUOp : public AsyncOpKernel { const int64_t rows = dense_tensor_shape.dim_size((rank == 2) ? 0 : 1); const int64_t cols = dense_tensor_shape.dim_size((rank == 2) ? 1 : 2); - ScratchSpace nnz_per_batch_host(c, batch_size, /*on_host*/ true); + ScratchSpace nnz_per_batch_host(c, batch_size, /*on_host*/ true); Tensor nnz_per_batch_device_t; if (rank == 2) { @@ -184,7 +185,7 @@ class DenseToCSRSparseMatrixGPUOp : public AsyncOpKernel { c->allocate_temp(DT_INT32, TensorShape({batch_size}), &nnz_per_batch_device_t), done); - auto nnz_per_batch_device = nnz_per_batch_device_t.vec(); + auto nnz_per_batch_device = nnz_per_batch_device_t.vec(); functor::CalculateNNZPerBatchMatrixFromIndices calculate_nnz_from_indices; @@ -193,14 +194,14 @@ class DenseToCSRSparseMatrixGPUOp : public AsyncOpKernel { c, calculate_nnz_from_indices(c, indices, nnz_per_batch_device), done); - stream_executor::DeviceMemoryBase nnz_per_batch_device_ptr( + stream_executor::DeviceAddressBase nnz_per_batch_device_ptr( static_cast(nnz_per_batch_device.data())); OP_REQUIRES_OK_ASYNC( c, stream->Memcpy(nnz_per_batch_host.mutable_data() /*host_dst*/, nnz_per_batch_device_ptr /*gpu_src*/, - batch_size * sizeof(int32) /*size*/), + batch_size * sizeof(int32_t) /*size*/), done); } @@ -215,7 +216,7 @@ class DenseToCSRSparseMatrixGPUOp : public AsyncOpKernel { // tensor by the time we get here; we can unreference it. nnz_per_batch_device_ref.Unref(); - auto nnz_per_batch = nnz_per_batch_host.tensor().vec(); + auto nnz_per_batch = nnz_per_batch_host.tensor().vec(); { // Ensure that within the callback, the proper GPU settings are @@ -226,7 +227,7 @@ class DenseToCSRSparseMatrixGPUOp : public AsyncOpKernel { // Extract out the values. Tensor temp_values_t; OP_REQUIRES_OK_ASYNC(c, - (functor::DoGatherNd( + (functor::DoGatherNd( c, params_t, indices_t, &temp_values_t)), done); const Tensor& values_t = const_cast(temp_values_t); @@ -248,7 +249,7 @@ class DenseToCSRSparseMatrixGPUOp : public AsyncOpKernel { Tensor batch_ptr_t(cpu_allocator(), DT_INT32, TensorShape({batch_size + 1})); - auto batch_ptr = batch_ptr_t.vec(); + auto batch_ptr = batch_ptr_t.vec(); auto indices = indices_t.matrix(); batch_ptr(0) = 0; @@ -285,9 +286,9 @@ class DenseToCSRSparseMatrixGPUOp : public AsyncOpKernel { &csr_row_ptr_t), done); - auto coo_row_ind = coo_row_ind_t.vec(); - auto coo_col_ind = coo_col_ind_t.vec(); - auto csr_row_ptr = csr_row_ptr_t.vec(); + auto coo_row_ind = coo_row_ind_t.vec(); + auto coo_col_ind = coo_col_ind_t.vec(); + auto csr_row_ptr = csr_row_ptr_t.vec(); // Convert SparseTensor rep to coo row ind, coo col ind. if (total_nnz > 0) { @@ -301,8 +302,8 @@ class DenseToCSRSparseMatrixGPUOp : public AsyncOpKernel { // a bug if you have empty coo rows. // TODO(ebrevdo): File bug w/ nvidia so coo2csr can handle // zero-element input coo rows. - functor::SetZeroFunctor set_zero; - set_zero(d, csr_row_ptr_t.flat()); + functor::SetZeroFunctor set_zero; + set_zero(d, csr_row_ptr_t.flat()); functor::COOSparseMatrixToCSRSparseMatrix coo_to_csr; for (int i = 0; i < batch_size; ++i) { @@ -312,9 +313,9 @@ class DenseToCSRSparseMatrixGPUOp : public AsyncOpKernel { // handled by the SetZero above. } else { // Convert coo to csr. - auto coo_row_ind_i = - TTypes::UnalignedVec(&coo_row_ind(batch_ptr(i)), nnz_i); - auto csr_row_ptr_i = TTypes::UnalignedVec( + auto coo_row_ind_i = TTypes::UnalignedVec( + &coo_row_ind(batch_ptr(i)), nnz_i); + auto csr_row_ptr_i = TTypes::UnalignedVec( &csr_row_ptr((rows + 1) * i), rows + 1); OP_REQUIRES_OK_ASYNC( c, coo_to_csr(c, rows, cols, coo_row_ind_i, csr_row_ptr_i), @@ -367,9 +368,9 @@ REGISTER_GPU(GPU, complex128) namespace functor { template <> -Status CalculateNNZPerBatchMatrixFromIndices::operator()( +absl::Status CalculateNNZPerBatchMatrixFromIndices::operator()( OpKernelContext* c, TTypes::ConstMatrix indices, - TTypes::Vec nnz_per_batch); + TTypes::Vec nnz_per_batch); extern template struct CalculateNNZPerBatchMatrixFromIndices; template <> @@ -383,9 +384,9 @@ extern template struct SparseTensorToCOOSparseMatrix; template <> struct COOSparseMatrixToCSRSparseMatrix { - Status operator()(OpKernelContext* c, const int rows, const int cols, - TTypes::UnalignedVec coo_row_ind, - TTypes::UnalignedVec csr_row_ptr) { + absl::Status operator()(OpKernelContext* c, const int rows, const int cols, + TTypes::UnalignedVec coo_row_ind, + TTypes::UnalignedVec csr_row_ptr) { GpuSparse cuda_sparse(c); TF_RETURN_IF_ERROR(cuda_sparse.Initialize()); return cuda_sparse.Coo2csr(coo_row_ind.data(), diff --git a/tensorflow/core/kernels/sparse/kernels.cc b/tensorflow/core/kernels/sparse/kernels.cc index ca7009f942112f..dd84b556e002ab 100644 --- a/tensorflow/core/kernels/sparse/kernels.cc +++ b/tensorflow/core/kernels/sparse/kernels.cc @@ -31,8 +31,8 @@ namespace functor { absl::Status SparseTensorToCSRSparseMatrixCPUFunctor::operator()( int64_t batch_size, int num_rows, int num_cols, - TTypes::ConstMatrix indices, TTypes::Vec batch_ptr, - TTypes::Vec csr_row_ptr, TTypes::Vec csr_col_ind) { + TTypes::ConstMatrix indices, TTypes::Vec batch_ptr, + TTypes::Vec csr_row_ptr, TTypes::Vec csr_col_ind) { // Validate inputs. if (batch_ptr.size() != batch_size + 1) { return errors::InvalidArgument( diff --git a/tensorflow/core/kernels/sparse/kernels.h b/tensorflow/core/kernels/sparse/kernels.h index aff14ca07910fa..14441de5d3cad3 100644 --- a/tensorflow/core/kernels/sparse/kernels.h +++ b/tensorflow/core/kernels/sparse/kernels.h @@ -42,7 +42,7 @@ template struct CalculateNNZPerBatchMatrixFromIndices { absl::Status operator()(OpKernelContext* c, TTypes::ConstMatrix indices, - TTypes::Vec nnz_per_batch); + TTypes::Vec nnz_per_batch); }; // Split a subset of a SparseTensors' indices into two vectors: @@ -63,8 +63,8 @@ template struct SparseTensorToCOOSparseMatrix { void operator()(const Device& d, TTypes::ConstVec host_dense_shape, TTypes::ConstMatrix indices, - TTypes::Vec coo_row_ind, - TTypes::Vec coo_col_ind); + TTypes::Vec coo_row_ind, + TTypes::Vec coo_col_ind); }; // Write coo batch, row, and column vectors to output matrix indices: @@ -89,9 +89,9 @@ template struct COOSparseMatrixToSparseTensor { absl::Status operator()(OpKernelContext* c, TTypes::ConstVec host_dense_shape, - TTypes::ConstVec host_batch_ptrs, - TTypes::Vec coo_row_ind, - TTypes::ConstVec coo_col_ind, + TTypes::ConstVec host_batch_ptrs, + TTypes::Vec coo_row_ind, + TTypes::ConstVec coo_col_ind, TTypes::Matrix indices); }; @@ -105,8 +105,8 @@ struct COOSparseMatrixToSparseTensor { template struct COOSparseMatrixToCSRSparseMatrix { absl::Status operator()(OpKernelContext* c, const int rows, const int cols, - TTypes::UnalignedVec coo_row_ind, - TTypes::UnalignedVec csr_row_ptr); + TTypes::UnalignedVec coo_row_ind, + TTypes::UnalignedVec csr_row_ptr); }; // Convert a matrix of (batched) coo row and column indices to CSR SparseMatrix @@ -126,9 +126,9 @@ struct COOSparseMatrixToCSRSparseMatrix { struct SparseTensorToCSRSparseMatrixCPUFunctor { absl::Status operator()(int64_t batch_size, int num_rows, int num_cols, TTypes::ConstMatrix indices, - TTypes::Vec batch_ptr, - TTypes::Vec csr_row_ptr, - TTypes::Vec csr_col_ind); + TTypes::Vec batch_ptr, + TTypes::Vec csr_row_ptr, + TTypes::Vec csr_col_ind); }; // Convert a vector of csr row pointers to coo row indices. @@ -141,8 +141,8 @@ struct SparseTensorToCSRSparseMatrixCPUFunctor { template struct CSRSparseMatrixToCOOSparseMatrix { absl::Status operator()(OpKernelContext* c, - TTypes::UnalignedConstVec csr_row_ptr, - TTypes::UnalignedVec coo_row_ind); + TTypes::UnalignedConstVec csr_row_ptr, + TTypes::UnalignedVec coo_row_ind); }; // Calculates C = matmul(A, B) or C = matmul(A, B)^T, where A is in CSR format @@ -176,10 +176,10 @@ struct CSRStructureModifyingFunctor { const ConstCSRComponent& b, size_t* bufferSize) = 0; - virtual absl::Status GetOutputStructure(const ConstCSRComponent& a, - const ConstCSRComponent& b, - TTypes::UnalignedVec c_row_ptr, - int* output_nnz, void* workspace) = 0; + virtual absl::Status GetOutputStructure( + const ConstCSRComponent& a, const ConstCSRComponent& b, + TTypes::UnalignedVec c_row_ptr, int* output_nnz, + void* workspace) = 0; virtual absl::Status Compute(const ConstCSRComponent& a, const ConstCSRComponent& b, diff --git a/tensorflow/core/kernels/sparse/kernels_gpu.cu.cc b/tensorflow/core/kernels/sparse/kernels_gpu.cu.cc index 3427538ff98ba4..3c1c79a5f02d6b 100644 --- a/tensorflow/core/kernels/sparse/kernels_gpu.cu.cc +++ b/tensorflow/core/kernels/sparse/kernels_gpu.cu.cc @@ -37,22 +37,22 @@ namespace functor { namespace { struct StridedDataReader { - StridedDataReader(const int64* begin, int stride) + StridedDataReader(const int64_t* begin, int stride) : begin_(begin), stride_(stride) {} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int operator()(int idx) const { return static_cast(ldg(begin_ + idx * stride_)); } - const int64* begin_; + const int64_t* begin_; const int stride_; }; } // namespace template <> -Status CalculateNNZPerBatchMatrixFromIndices::operator()( +absl::Status CalculateNNZPerBatchMatrixFromIndices::operator()( OpKernelContext* c, TTypes::ConstMatrix indices, - TTypes::Vec nnz_per_batch) { + TTypes::Vec nnz_per_batch) { const auto& cu_stream = GetGpuStream(c); const int total_nnz = indices.dimension(0); @@ -96,9 +96,9 @@ Status CalculateNNZPerBatchMatrixFromIndices::operator()( TF_RETURN_IF_ERROR(c->allocate_temp( DT_INT8, TensorShape({static_cast(temp_storage_bytes)}), &temp_storage)); - DCHECK_NE(temp_storage.flat().data(), nullptr); + DCHECK_NE(temp_storage.flat().data(), nullptr); auto second_success = gpuprim::DeviceHistogram::HistogramEven( - /*d_temp_storage*/ temp_storage.flat().data(), + /*d_temp_storage*/ temp_storage.flat().data(), /*temp_storage_bytes&*/ temp_storage_bytes, /*d_samples*/ indices_first_column, /*d_histogram*/ nnz_per_batch.data(), @@ -116,13 +116,13 @@ Status CalculateNNZPerBatchMatrixFromIndices::operator()( temp_storage_bytes, ", status: ", GpuGetErrorString(second_success)); } - return OkStatus(); + return absl::OkStatus(); } // TODO(ebrevdo): Write a custom batch-friendly impl of this to update // the SparseTensor indices directly. template <> -Status CSRSparseMatrixToCOOSparseMatrix::operator()( +absl::Status CSRSparseMatrixToCOOSparseMatrix::operator()( OpKernelContext* c, TTypes::UnalignedVec csr_row_ptr, TTypes::UnalignedVec coo_row_ind) { GpuSparse gpu_sparse(c); @@ -133,7 +133,7 @@ Status CSRSparseMatrixToCOOSparseMatrix::operator()( } template -__global__ void SparseTensorToCOOMatrixKernel(const int64* indices, +__global__ void SparseTensorToCOOMatrixKernel(const int64_t* indices, int* coo_rows_out, int* coo_cols_out, int size) { const int offset = (stride == 3) ? 1 : 0; @@ -168,7 +168,8 @@ void SparseTensorToCOOSparseMatrix::operator()( __global__ void COOMatrixToSparseTensorKernel2D(const int* coo_rows, const int* coo_cols, - int64* indices_out, int size) { + int64_t* indices_out, + int size) { GPU_1D_KERNEL_LOOP(i, size) { indices_out[i * 2] = static_cast(ldg(coo_rows + i)); indices_out[i * 2 + 1] = static_cast(ldg(coo_cols + i)); @@ -191,7 +192,7 @@ __device__ inline int BinarySearchRange(int* range, int n, int x) { } __global__ void COOMatrixToSparseTensorKernel3D( - const int* coo_rows, const int* coo_cols, int64* indices_out, + const int* coo_rows, const int* coo_cols, int64_t* indices_out, GpuDeviceArrayStruct batch_ptr_s, const int batch_size, const int size) { // Step 1: access the batch ptrs and copy to shared memory. @@ -214,7 +215,7 @@ __global__ void COOMatrixToSparseTensorKernel3D( } template <> -Status COOSparseMatrixToSparseTensor::operator()( +absl::Status COOSparseMatrixToSparseTensor::operator()( OpKernelContext* c, TTypes::ConstVec host_dense_shape, TTypes::ConstVec host_batch_ptr, TTypes::Vec coo_row_ind, TTypes::ConstVec coo_col_ind, TTypes::Matrix indices) { @@ -234,7 +235,7 @@ Status COOSparseMatrixToSparseTensor::operator()( config.block_count, config.thread_per_block, 0, d.stream(), coo_row_ind.data(), coo_col_ind.data(), indices.data(), size)); - return OkStatus(); + return absl::OkStatus(); } else { const int batch_size = host_dense_shape(0); GpuDeviceArrayOnHost batch_ptr_copy(c, host_batch_ptr.size()); @@ -251,7 +252,7 @@ Status COOSparseMatrixToSparseTensor::operator()( config.thread_per_block, shared_memory_size, d.stream(), coo_row_ind.data(), coo_col_ind.data(), indices.data(), batch_ptr_copy.data(), batch_size, size)); - return OkStatus(); + return absl::OkStatus(); } } @@ -281,10 +282,10 @@ __global__ void CSRSparseMatrixBatchMulVecKernel3D( } template -Status CSRSparseMatrixBatchMulVecImpl(OpKernelContext* ctx, - const CSRSparseMatrix& a, - typename TTypes::ConstFlat b, - CSRSparseMatrix* c) { +absl::Status CSRSparseMatrixBatchMulVecImpl(OpKernelContext* ctx, + const CSRSparseMatrix& a, + typename TTypes::ConstFlat b, + CSRSparseMatrix* c) { DCHECK_EQ(a.dims(), 3); const int total_nnz = a.total_nnz(); Tensor c_values_t; @@ -321,7 +322,7 @@ Status CSRSparseMatrixBatchMulVecImpl(OpKernelContext* ctx, config.thread_per_block, shared_memory_size, d.stream(), a_values.data(), b.data(), c_values.data(), batch_ptr_copy.data(), batch_size, total_nnz)); - return OkStatus(); + return absl::OkStatus(); } #define DEFINE_SPARSE_MUL_VEC_GPU(T) \ @@ -416,12 +417,12 @@ __global__ void CSRSparseMatrixSoftmaxKernel3D( } template -Status CSRSparseMatrixSoftmaxGPUImpl(OpKernelContext* ctx, - const CSRSparseMatrix& logits, - typename TTypes::Vec softmax_values) { +absl::Status CSRSparseMatrixSoftmaxGPUImpl( + OpKernelContext* ctx, const CSRSparseMatrix& logits, + typename TTypes::Vec softmax_values) { auto host_dense_shape = logits.dense_shape().vec(); - auto host_batch_ptr = logits.batch_pointers().vec(); - auto row_ptr = logits.row_pointers().vec(); + auto host_batch_ptr = logits.batch_pointers().vec(); + auto row_ptr = logits.row_pointers().vec(); auto logits_values = logits.values().vec(); const int ndims = host_dense_shape.size(); @@ -459,7 +460,7 @@ Status CSRSparseMatrixSoftmaxGPUImpl(OpKernelContext* ctx, logits_values.data(), softmax_values.data())); } - return OkStatus(); + return absl::OkStatus(); } #define DEFINE_SOFTMAX_GPU(T) \ @@ -604,18 +605,19 @@ __global__ void CSRSparseMatrixSoftmaxGradKernel3D( } template -Status CSRSparseMatrixSoftmaxGradGPUImpl( +absl::Status CSRSparseMatrixSoftmaxGradGPUImpl( OpKernelContext* ctx, const CSRSparseMatrix& softmax, const CSRSparseMatrix& grad_softmax, typename TTypes::Vec gradient_values) { auto host_dense_shape = softmax.dense_shape().vec(); - auto softmax_host_batch_ptr = softmax.batch_pointers().vec(); - auto softmax_row_ptr = softmax.row_pointers().vec(); - auto softmax_col_ind = softmax.col_indices().vec(); + auto softmax_host_batch_ptr = softmax.batch_pointers().vec(); + auto softmax_row_ptr = softmax.row_pointers().vec(); + auto softmax_col_ind = softmax.col_indices().vec(); auto softmax_values = softmax.values().vec(); - auto grad_softmax_host_batch_ptr = grad_softmax.batch_pointers().vec(); - auto grad_softmax_row_ptr = grad_softmax.row_pointers().vec(); - auto grad_softmax_col_ind = grad_softmax.col_indices().vec(); + auto grad_softmax_host_batch_ptr = + grad_softmax.batch_pointers().vec(); + auto grad_softmax_row_ptr = grad_softmax.row_pointers().vec(); + auto grad_softmax_col_ind = grad_softmax.col_indices().vec(); auto grad_softmax_values = grad_softmax.values().vec(); const int ndims = host_dense_shape.size(); @@ -666,7 +668,7 @@ Status CSRSparseMatrixSoftmaxGradGPUImpl( grad_softmax_values.data(), gradient_values.data())); } - return OkStatus(); + return absl::OkStatus(); } #define DEFINE_SOFTMAX_GRAD_GPU(T) \ diff --git a/tensorflow/core/kernels/sparse/kernels_test.cc b/tensorflow/core/kernels/sparse/kernels_test.cc index 018b8b77a81e34..dc13ef62256357 100644 --- a/tensorflow/core/kernels/sparse/kernels_test.cc +++ b/tensorflow/core/kernels/sparse/kernels_test.cc @@ -38,19 +38,19 @@ TEST(SparseTensorToCSRSparseMatrix, SingleBatchConversion) { test::AsTensor({0, 0, 2, 3, 2, 4, 3, 0}, TensorShape({4, 2})); Tensor batch_ptr(DT_INT32, {2}); Tensor csr_col_ind(DT_INT32, {4}); - auto csr_row_ptr = test::AsTensor({0, 0, 0, 0, 0}); + auto csr_row_ptr = test::AsTensor({0, 0, 0, 0, 0}); functor::SparseTensorToCSRSparseMatrixCPUFunctor coo_to_csr; TF_EXPECT_OK(coo_to_csr(/*batch_size=*/1, /*num_rows=*/4, /*num_cols=*/5, indices.template matrix(), - batch_ptr.vec(), csr_row_ptr.vec(), - csr_col_ind.vec())); - - test::ExpectTensorEqual(batch_ptr, test::AsTensor({0, 4})); - test::ExpectTensorEqual(csr_row_ptr, - test::AsTensor({0, 1, 1, 3, 4})); - test::ExpectTensorEqual(csr_col_ind, - test::AsTensor({0, 3, 4, 0})); + batch_ptr.vec(), csr_row_ptr.vec(), + csr_col_ind.vec())); + + test::ExpectTensorEqual(batch_ptr, test::AsTensor({0, 4})); + test::ExpectTensorEqual(csr_row_ptr, + test::AsTensor({0, 1, 1, 3, 4})); + test::ExpectTensorEqual(csr_col_ind, + test::AsTensor({0, 3, 4, 0})); } TEST(SparseTensorToCSRSparseMatrix, BatchConversion) { @@ -63,21 +63,22 @@ TEST(SparseTensorToCSRSparseMatrix, BatchConversion) { Tensor csr_col_ind(DT_INT32, {3}); // row pointers have size = batch_size * (num_rows + 1) = 3 * 4 = 12 Tensor csr_row_ptr(DT_INT32, {12}); - test::FillFn(&csr_row_ptr, [](int unused) { return 0; }); + test::FillFn(&csr_row_ptr, [](int unused) { return 0; }); functor::SparseTensorToCSRSparseMatrixCPUFunctor coo_to_csr; TF_EXPECT_OK(coo_to_csr(/*batch_size=*/3, /*num_rows=*/3, /*num_cols=*/4, indices.template matrix(), - batch_ptr.vec(), csr_row_ptr.vec(), - csr_col_ind.vec())); - - test::ExpectTensorEqual(batch_ptr, - test::AsTensor({0, 2, 2, 3})); - test::ExpectTensorEqual(csr_row_ptr, - test::AsTensor({0, 1, 1, 2, // - 0, 0, 0, 0, // - 0, 1, 1, 1})); - test::ExpectTensorEqual(csr_col_ind, test::AsTensor({0, 3, 1})); + batch_ptr.vec(), csr_row_ptr.vec(), + csr_col_ind.vec())); + + test::ExpectTensorEqual(batch_ptr, + test::AsTensor({0, 2, 2, 3})); + test::ExpectTensorEqual(csr_row_ptr, + test::AsTensor({0, 1, 1, 2, // + 0, 0, 0, 0, // + 0, 1, 1, 1})); + test::ExpectTensorEqual(csr_col_ind, + test::AsTensor({0, 3, 1})); } TEST(SparseTensorToCSRSparseMatrix, InvalidBatchThrowsIllegalArgument) { @@ -90,13 +91,13 @@ TEST(SparseTensorToCSRSparseMatrix, InvalidBatchThrowsIllegalArgument) { Tensor csr_col_ind(DT_INT32, {3}); // row pointers have size = batch_size * (num_rows + 1) = 3 * 4 = 12 Tensor csr_row_ptr(DT_INT32, {12}); - test::FillFn(&csr_row_ptr, [](int unused) { return 0; }); + test::FillFn(&csr_row_ptr, [](int unused) { return 0; }); functor::SparseTensorToCSRSparseMatrixCPUFunctor coo_to_csr; EXPECT_THAT( coo_to_csr(/*batch_size=*/3, /*num_rows=*/3, /*num_cols=*/4, - indices.template matrix(), batch_ptr.vec(), - csr_row_ptr.vec(), csr_col_ind.vec()), + indices.template matrix(), batch_ptr.vec(), + csr_row_ptr.vec(), csr_col_ind.vec()), absl_testing::StatusIs(tsl::error::Code::INVALID_ARGUMENT, ::testing::ContainsRegex( "Batch index .* is outside of valid range"))); @@ -111,13 +112,13 @@ TEST(SparseTensorToCSRSparseMatrix, InvalidRowThrowsIllegalArgument) { Tensor csr_col_ind(DT_INT32, {3}); // row pointers have size = batch_size * (num_rows + 1) = 3 * 4 = 12 Tensor csr_row_ptr(DT_INT32, {12}); - test::FillFn(&csr_row_ptr, [](int unused) { return 0; }); + test::FillFn(&csr_row_ptr, [](int unused) { return 0; }); functor::SparseTensorToCSRSparseMatrixCPUFunctor coo_to_csr; EXPECT_THAT( coo_to_csr(/*batch_size=*/3, /*num_rows=*/3, /*num_cols=*/4, - indices.template matrix(), batch_ptr.vec(), - csr_row_ptr.vec(), csr_col_ind.vec()), + indices.template matrix(), batch_ptr.vec(), + csr_row_ptr.vec(), csr_col_ind.vec()), absl_testing::StatusIs( tsl::error::Code::INVALID_ARGUMENT, ::testing::ContainsRegex("Row index .* is outside of valid range"))); @@ -132,13 +133,13 @@ TEST(SparseTensorToCSRSparseMatrix, InvalidColThrowsIllegalArgument) { Tensor csr_col_ind(DT_INT32, {3}); // row pointers have size = batch_size * (num_rows + 1) = 3 * 4 = 12 Tensor csr_row_ptr(DT_INT32, {12}); - test::FillFn(&csr_row_ptr, [](int unused) { return 0; }); + test::FillFn(&csr_row_ptr, [](int unused) { return 0; }); functor::SparseTensorToCSRSparseMatrixCPUFunctor coo_to_csr; EXPECT_THAT( coo_to_csr(/*batch_size=*/3, /*num_rows=*/3, /*num_cols=*/4, - indices.template matrix(), batch_ptr.vec(), - csr_row_ptr.vec(), csr_col_ind.vec()), + indices.template matrix(), batch_ptr.vec(), + csr_row_ptr.vec(), csr_col_ind.vec()), absl_testing::StatusIs(tsl::error::Code::INVALID_ARGUMENT, ::testing::ContainsRegex( "Column index .* is outside of valid range"))); @@ -154,13 +155,13 @@ TEST(SparseTensorToCSRSparseMatrix, InvalidRankIllegalArgument) { Tensor csr_col_ind(DT_INT32, {3}); // row pointers have size = batch_size * (num_rows + 1) = 3 * 4 = 12 Tensor csr_row_ptr(DT_INT32, {12}); - test::FillFn(&csr_row_ptr, [](int unused) { return 0; }); + test::FillFn(&csr_row_ptr, [](int unused) { return 0; }); functor::SparseTensorToCSRSparseMatrixCPUFunctor coo_to_csr; EXPECT_THAT( coo_to_csr(/*batch_size=*/3, /*num_rows=*/3, /*num_cols=*/4, - indices.template matrix(), batch_ptr.vec(), - csr_row_ptr.vec(), csr_col_ind.vec()), + indices.template matrix(), batch_ptr.vec(), + csr_row_ptr.vec(), csr_col_ind.vec()), absl_testing::StatusIs(tsl::error::Code::INVALID_ARGUMENT, ::testing::ContainsRegex( "Indices must have either 2 or 3 columns."))); diff --git a/tensorflow/core/kernels/sparse/mat_mul_op.h b/tensorflow/core/kernels/sparse/mat_mul_op.h index 3e55cfbc38f201..5c9bfd8a805a54 100644 --- a/tensorflow/core/kernels/sparse/mat_mul_op.h +++ b/tensorflow/core/kernels/sparse/mat_mul_op.h @@ -276,7 +276,7 @@ class CSRMatMulCPUOp : public CSRMatMulOp { Eigen::Ref GetSparseMatrixRef( const CSRSparseMatrix& csr_matrix, const int batch_index, const int64_t row_begin, const int64_t num_shard_rows, - std::vector* row_ptrs) { + std::vector* row_ptrs) { // Compute the row pointers of the sparse sub-matrix. row_ptrs->resize(num_shard_rows + 1); const int64_t row_offset = @@ -325,7 +325,7 @@ class CSRMatMulCPUOp : public CSRMatMulOp { // Define an Eigen::SparseMatrix over the row range: // [row_begin, row_end) of the CSR SparseMatrix A. - std::vector row_ptrs; + std::vector row_ptrs; auto sparse_matrix = GetSparseMatrixRef( lhs, batch_idx, row_begin, num_shard_rows, &row_ptrs); @@ -396,7 +396,7 @@ class CSRMatMulCPUOp : public CSRMatMulOp { // Define a new sparse sub-matrix from the row range // [row_begin, row_end) of the sparse matrix A. - std::vector row_ptrs; + std::vector row_ptrs; auto sparse_matrix = GetSparseMatrixRef( lhs, batch_idx, row_begin, num_shard_rows, &row_ptrs); @@ -773,9 +773,9 @@ class CSRSparseMatrixMatMul { explicit CSRSparseMatrixMatMul(const bool transpose_output) : transpose_output_(transpose_output) {} - Status Compute(OpKernelContext* ctx, const ConstCSRComponent& a, - typename TTypes::UnalignedConstMatrix b, - typename TTypes::UnalignedMatrix c) { + absl::Status Compute(OpKernelContext* ctx, const ConstCSRComponent& a, + typename TTypes::UnalignedConstMatrix b, + typename TTypes::UnalignedMatrix c) { GpuSparse cuda_sparse(ctx); TF_RETURN_IF_ERROR(cuda_sparse.Initialize()); { @@ -859,11 +859,11 @@ class CSRSparseMatrixMatMul { Tensor buffer; TF_RETURN_IF_ERROR(ctx->allocate_temp( DT_INT8, TensorShape({static_cast(bufferSize)}), &buffer)); - DCHECK(buffer.flat().data() != nullptr); + DCHECK(buffer.flat().data() != nullptr); TF_RETURN_IF_ERROR(cuda_sparse.SpMM(transA, transB, &alpha, matA, matB, &beta, matC, algo, - buffer.flat().data())); + buffer.flat().data())); TF_RETURN_IF_GPUSPARSE_ERROR(cusparseDestroyDnMat(matB)); TF_RETURN_IF_GPUSPARSE_ERROR(cusparseDestroyDnMat(matC)); @@ -940,7 +940,7 @@ class CSRSparseMatrixMatMul { #endif // GOOGLE_CUDA && CUDA_VERSION >= 10020 } - return OkStatus(); + return absl::OkStatus(); } private: @@ -954,8 +954,8 @@ class CSRSparseMatrixMatVec { : transA_(TransposeAndConjugateToGpuSparseOp(transpose_a, conjugate_a, &status_)) {} - Status Compute(OpKernelContext* ctx, const ConstCSRComponent& a, - const T* x, T* y) { + absl::Status Compute(OpKernelContext* ctx, const ConstCSRComponent& a, + const T* x, T* y) { TF_RETURN_IF_ERROR(status_); GpuSparse cuda_sparse(ctx); TF_RETURN_IF_ERROR(cuda_sparse.Initialize()); @@ -1001,11 +1001,11 @@ class CSRSparseMatrixMatVec { #endif } - return OkStatus(); + return absl::OkStatus(); } private: - Status status_; + absl::Status status_; const gpusparseOperation_t transA_; }; diff --git a/tensorflow/core/kernels/sparse/mul_op.cc b/tensorflow/core/kernels/sparse/mul_op.cc index 37ce9a6feb51bf..1a68bcc34e9143 100644 --- a/tensorflow/core/kernels/sparse/mul_op.cc +++ b/tensorflow/core/kernels/sparse/mul_op.cc @@ -125,8 +125,8 @@ class CSRSparseMatrixMulScalar { public: explicit CSRSparseMatrixMulScalar() {} - Status Compute(OpKernelContext* ctx, const CSRSparseMatrix& a, - typename TTypes::ConstScalar b, CSRSparseMatrix* c) { + absl::Status Compute(OpKernelContext* ctx, const CSRSparseMatrix& a, + typename TTypes::ConstScalar b, CSRSparseMatrix* c) { const int total_nnz = a.total_nnz(); Tensor c_values_t; TF_RETURN_IF_ERROR(ctx->allocate_temp( @@ -146,7 +146,7 @@ class CSRSparseMatrixMulScalar { functor::BinaryFunctor, 1>().Right( d, c_values, a_values, b, error_ptr); - return OkStatus(); + return absl::OkStatus(); } }; diff --git a/tensorflow/core/kernels/sparse/nnz_op.cc b/tensorflow/core/kernels/sparse/nnz_op.cc index 2006abfe4459b1..ad8095b1cdc925 100644 --- a/tensorflow/core/kernels/sparse/nnz_op.cc +++ b/tensorflow/core/kernels/sparse/nnz_op.cc @@ -53,7 +53,7 @@ class CSRNNZOp : public OpKernel { c, nnz_shape.AddDimWithStatus(csr_sparse_matrix->batch_size())); } OP_REQUIRES_OK(c, c->allocate_output(0, nnz_shape, &nnz_t)); - auto nnz = nnz_t->flat(); + auto nnz = nnz_t->flat(); for (int i = 0; i < csr_sparse_matrix->batch_size(); ++i) { nnz(i) = csr_sparse_matrix->nnz(i); } diff --git a/tensorflow/core/kernels/sparse/sparse_cholesky_op.cc b/tensorflow/core/kernels/sparse/sparse_cholesky_op.cc index 51f867277c6a55..afe0a1322ba866 100644 --- a/tensorflow/core/kernels/sparse/sparse_cholesky_op.cc +++ b/tensorflow/core/kernels/sparse/sparse_cholesky_op.cc @@ -93,7 +93,7 @@ class CSRSparseCholeskyCPUOp : public OpKernel { // Allocate batch pointers. Tensor batch_ptr(cpu_allocator(), DT_INT32, TensorShape({batch_size + 1})); - auto batch_ptr_vec = batch_ptr.vec(); + auto batch_ptr_vec = batch_ptr.vec(); batch_ptr_vec(0) = 0; // Temporary vector of Eigen SparseMatrices to store the Sparse Cholesky @@ -130,7 +130,7 @@ class CSRSparseCholeskyCPUOp : public OpKernel { Eigen::NaturalOrdering> solver; auto permutation_indices_flat = - input_permutation_indices.flat().data(); + input_permutation_indices.flat().data(); // Invert the fill-in reducing ordering and apply it to the input // sparse matrix. @@ -183,8 +183,8 @@ class CSRSparseCholeskyCPUOp : public OpKernel { Tensor output_col_ind(cpu_allocator(), DT_INT32, TensorShape({total_nnz})); Tensor output_values(cpu_allocator(), DataTypeToEnum::value, TensorShape({total_nnz})); - auto output_row_ptr_ptr = output_row_ptr.flat().data(); - auto output_col_ind_ptr = output_col_ind.flat().data(); + auto output_row_ptr_ptr = output_row_ptr.flat().data(); + auto output_col_ind_ptr = output_col_ind.flat().data(); auto output_values_ptr = output_values.flat().data(); // Copy the output matrices from each batch into the CSRSparseMatrix diff --git a/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc b/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc index c961ec282b4ed0..be11f9d81065a6 100644 --- a/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc +++ b/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc @@ -188,7 +188,7 @@ class CSRSparseMatMulCPUOp : public OpKernel { // Set batch pointers. Tensor batch_ptr(cpu_allocator(), DT_INT32, TensorShape({batch_size + 1})); - auto batch_ptr_vec = batch_ptr.vec(); + auto batch_ptr_vec = batch_ptr.vec(); batch_ptr_vec(0) = 0; // Store intermediate matrix products for each batch. @@ -248,8 +248,8 @@ class CSRSparseMatMulCPUOp : public OpKernel { Tensor output_col_ind(cpu_allocator(), DT_INT32, TensorShape({total_nnz})); Tensor output_values(cpu_allocator(), DataTypeToEnum::value, TensorShape({total_nnz})); - auto output_row_ptr_ptr = output_row_ptr.flat().data(); - auto output_col_ind_ptr = output_col_ind.flat().data(); + auto output_row_ptr_ptr = output_row_ptr.flat().data(); + auto output_col_ind_ptr = output_col_ind.flat().data(); auto output_values_ptr = output_values.flat().data(); // Copy the output matrices from each batch into the CSRSparseMatrix @@ -411,14 +411,14 @@ class CSRSparseMatMulGPUOp : public OpKernel { Tensor c_batch_ptr_t(cpu_allocator(), DT_INT32, TensorShape({batch_size + 1})); - auto c_batch_ptr = c_batch_ptr_t.vec(); + auto c_batch_ptr = c_batch_ptr_t.vec(); c_batch_ptr(0) = 0; Tensor c_row_ptr_t; OP_REQUIRES_OK(ctx, ctx->allocate_temp( DT_INT32, TensorShape({batch_size * (rows + 1)}), &c_row_ptr_t)); - auto c_row_ptr = c_row_ptr_t.vec(); + auto c_row_ptr = c_row_ptr_t.vec(); // Possibly transpose a. const CSRSparseMatrix* a_input_matrix; @@ -506,7 +506,7 @@ class CSRSparseMatMulGPUOp : public OpKernel { DT_INT8, TensorShape({static_cast(bufferSize1)}), &buffer1_t)); } - void* buffer1 = buffer1_t.flat().data(); + void* buffer1 = buffer1_t.flat().data(); // Do workEstimation using buffer1. // buffer1 implicitly captured in gemmDesc for use in the compute call. @@ -525,7 +525,7 @@ class CSRSparseMatMulGPUOp : public OpKernel { DT_INT8, TensorShape({static_cast(bufferSize2)}), &buffer2_t)); } - void* buffer2 = buffer2_t.flat().data(); + void* buffer2 = buffer2_t.flat().data(); // Compute the gemm. // Note that buffer1 is implicitly consumed here and buffer2 is implicitly @@ -552,7 +552,7 @@ class CSRSparseMatMulGPUOp : public OpKernel { // Copy product to final c_row_ptr and intermediate column and values // tensors. void* row_ptr = &c_row_ptr(i * (rows + 1)); - void* col_ptr = colidx_tmp.flat().data(); + void* col_ptr = colidx_tmp.flat().data(); void* val_ptr = values_tmp.flat().data(); cusparseStatus_t cusp_status = cusparseCsrSetPointers(matC.get(), row_ptr, col_ptr, val_ptr); @@ -643,8 +643,8 @@ class CSRSparseMatMulGPUOp : public OpKernel { b_input_matrix->values_vec(b_batch), b_input_dense_shape}; - TTypes::UnalignedVec c_row_ptr_i(&c_row_ptr(i * (rows + 1)), - rows + 1); + TTypes::UnalignedVec c_row_ptr_i(&c_row_ptr(i * (rows + 1)), + rows + 1); int c_nnz_i; OP_REQUIRES_OK(ctx, diff --git a/tensorflow/core/kernels/sparse/sparse_matrix.h b/tensorflow/core/kernels/sparse/sparse_matrix.h index 8e5ff45f57d30a..d3db1f29871d80 100644 --- a/tensorflow/core/kernels/sparse/sparse_matrix.h +++ b/tensorflow/core/kernels/sparse/sparse_matrix.h @@ -217,40 +217,40 @@ class CSRSparseMatrix { return dense_shape_; } - inline TTypes::UnalignedVec row_pointers_vec(int batch) { + inline TTypes::UnalignedVec row_pointers_vec(int batch) { DCHECK(valid()); DCHECK_LT(batch, batch_size()); const int64_t rows = dense_shape().vec()((dims() == 2) ? 0 : 1); const int offset = batch * (rows + 1); - return TTypes::UnalignedVec(row_pointers_vec_->data() + offset, - rows + 1); + return TTypes::UnalignedVec(row_pointers_vec_->data() + offset, + rows + 1); } - inline TTypes::UnalignedConstVec row_pointers_vec(int batch) const { + inline TTypes::UnalignedConstVec row_pointers_vec(int batch) const { DCHECK(valid()); DCHECK_LT(batch, batch_size()); const int64_t rows = dense_shape().vec()((dims() == 2) ? 0 : 1); const int offset = batch * (rows + 1); - return TTypes::UnalignedConstVec(row_pointers_vec_->data() + offset, - rows + 1); + return TTypes::UnalignedConstVec( + row_pointers_vec_->data() + offset, rows + 1); } - inline TTypes::UnalignedVec col_indices_vec(int batch) { + inline TTypes::UnalignedVec col_indices_vec(int batch) { DCHECK(valid()); DCHECK_LT(batch, batch_size()); const int offset = (*batch_pointers_vec_)(batch); const int nnz_in_batch = nnz(batch); - return TTypes::UnalignedVec(col_indices_vec_->data() + offset, - nnz_in_batch); + return TTypes::UnalignedVec(col_indices_vec_->data() + offset, + nnz_in_batch); } - inline TTypes::UnalignedConstVec col_indices_vec(int batch) const { + inline TTypes::UnalignedConstVec col_indices_vec(int batch) const { DCHECK(valid()); DCHECK_LT(batch, batch_size()); const int offset = (*batch_pointers_vec_)(batch); const int nnz_in_batch = nnz(batch); - return TTypes::UnalignedConstVec(col_indices_vec_->data() + offset, - nnz_in_batch); + return TTypes::UnalignedConstVec(col_indices_vec_->data() + offset, + nnz_in_batch); } template @@ -411,9 +411,11 @@ class CSRSparseMatrix { void SetupVecs() { if (!metadata_.validated) return; batch_pointers_vec_.reset( - new TTypes::Vec(batch_pointers_.vec())); - row_pointers_vec_.reset(new TTypes::Vec(row_pointers_.vec())); - col_indices_vec_.reset(new TTypes::Vec(col_indices_.vec())); + new TTypes::Vec(batch_pointers_.vec())); + row_pointers_vec_.reset( + new TTypes::Vec(row_pointers_.vec())); + col_indices_vec_.reset( + new TTypes::Vec(col_indices_.vec())); } void ClearVecs() { @@ -537,9 +539,9 @@ class CSRSparseMatrix { Tensor row_pointers_; Tensor col_indices_; Tensor values_; - std::unique_ptr::Vec> batch_pointers_vec_; - std::unique_ptr::Vec> row_pointers_vec_; - std::unique_ptr::Vec> col_indices_vec_; + std::unique_ptr::Vec> batch_pointers_vec_; + std::unique_ptr::Vec> row_pointers_vec_; + std::unique_ptr::Vec> col_indices_vec_; }; // Call BinaryFunctor()(ctx, a, b, c) @@ -616,16 +618,16 @@ absl::Status CSRSparseMatrixUnaryHelper(OpKernelContext* ctx, template struct ConstCSRComponent { - TTypes::UnalignedConstVec row_ptr; - TTypes::UnalignedConstVec col_ind; + TTypes::UnalignedConstVec row_ptr; + TTypes::UnalignedConstVec col_ind; typename TTypes::UnalignedConstVec values; TTypes::ConstVec dense_shape_host; }; template struct CSRComponent { - TTypes::UnalignedVec row_ptr; - TTypes::UnalignedVec col_ind; + TTypes::UnalignedVec row_ptr; + TTypes::UnalignedVec col_ind; typename TTypes::UnalignedVec values; TTypes::Vec dense_shape_host; }; diff --git a/tensorflow/core/kernels/sparse/sparse_matrix_components_op.cc b/tensorflow/core/kernels/sparse/sparse_matrix_components_op.cc index 5c1a0f007ed656..d25a86056b574b 100644 --- a/tensorflow/core/kernels/sparse/sparse_matrix_components_op.cc +++ b/tensorflow/core/kernels/sparse/sparse_matrix_components_op.cc @@ -57,7 +57,7 @@ class CSRSparseMatrixComponentsOp : public OpKernel { OP_REQUIRES(c, index_t.dims() == 0, errors::InvalidArgument("index should be a scalar, but saw: ", index_t.DebugString())); - int32_t index = index_t.scalar()(); + int32_t index = index_t.scalar()(); OP_REQUIRES(c, index >= 0 && index < csr_sparse_matrix->batch_size(), errors::InvalidArgument("index (", index, ") not in [0, ", csr_sparse_matrix->batch_size(), ")")); @@ -67,7 +67,7 @@ class CSRSparseMatrixComponentsOp : public OpKernel { c->set_output(1, csr_sparse_matrix->col_indices()); c->set_output(2, csr_sparse_matrix->values()); } else { - auto batch_ptrs = csr_sparse_matrix->batch_pointers().vec(); + auto batch_ptrs = csr_sparse_matrix->batch_pointers().vec(); auto dense_shape = csr_sparse_matrix->dense_shape().vec(); int64_t rows = dense_shape(1); int nnz = batch_ptrs(index + 1) - batch_ptrs(index); @@ -78,23 +78,23 @@ class CSRSparseMatrixComponentsOp : public OpKernel { c, c->allocate_output(0, TensorShape({rows + 1}), &row_ptrs_t)); OP_REQUIRES_OK(c, c->allocate_output(1, TensorShape({nnz}), &col_inds_t)); OP_REQUIRES_OK(c, c->allocate_output(2, TensorShape({nnz}), &values_t)); - auto row_ptrs = row_ptrs_t->vec(); - auto col_inds = col_inds_t->vec(); + auto row_ptrs = row_ptrs_t->vec(); + auto col_inds = col_inds_t->vec(); auto values = values_t->vec(); - functor::Slice slice_int; + functor::Slice slice_int; functor::Slice slice_t; typedef Eigen::DSizes EVec; const Device& d = c->eigen_device(); slice_int(d, /*output*/ row_ptrs, - /*input*/ csr_sparse_matrix->row_pointers().vec(), + /*input*/ csr_sparse_matrix->row_pointers().vec(), /*slice_indices*/ EVec{static_cast(index * (rows + 1))}, /*slice_sizes*/ EVec{static_cast(rows + 1)}); slice_int(d, /*output*/ col_inds, - /*input*/ csr_sparse_matrix->col_indices().vec(), + /*input*/ csr_sparse_matrix->col_indices().vec(), /*slice_indices*/ EVec{batch_ptrs(index)}, /*slice_sizes*/ EVec{nnz}); slice_t(d, @@ -137,7 +137,7 @@ namespace functor { const Eigen::DSizes& sizes); \ extern template struct Slice; -DECLARE_GPU_SPEC(int32); +DECLARE_GPU_SPEC(int32_t); DECLARE_GPU_SPEC(float); DECLARE_GPU_SPEC(double); DECLARE_GPU_SPEC(complex64); diff --git a/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc b/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc index e93e2b0a018845..7d7bba8601da64 100644 --- a/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc +++ b/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc @@ -74,7 +74,7 @@ class SparseTensorToCSRSparseMatrixCPUOp : public OpKernel { const int64_t num_cols = dense_shape_vec((rank == 2) ? 1 : 2); const int64_t total_nnz = values.NumElements(); - static constexpr int64_t kInt32Max = std::numeric_limits::max(); + static constexpr int64_t kInt32Max = std::numeric_limits::max(); OP_REQUIRES( ctx, batch_size < kInt32Max, errors::InvalidArgument("dense_shape batch_size must be < Int32Max," @@ -106,16 +106,16 @@ class SparseTensorToCSRSparseMatrixCPUOp : public OpKernel { Tensor csr_row_ptr(cpu_allocator(), DT_INT32, csr_row_ind_shape); // Fill the row pointers with zeros. - functor::SetZeroFunctor set_zero; - set_zero(ctx->eigen_device(), csr_row_ptr.flat()); + functor::SetZeroFunctor set_zero; + set_zero(ctx->eigen_device(), csr_row_ptr.flat()); // Convert from COO to CSR format. functor::SparseTensorToCSRSparseMatrixCPUFunctor coo_to_csr; OP_REQUIRES_OK( ctx, coo_to_csr(batch_size, num_rows, num_cols, - indices.template matrix(), batch_ptr.vec(), - csr_row_ptr.vec(), csr_col_ind.vec())); + indices.template matrix(), batch_ptr.vec(), + csr_row_ptr.vec(), csr_col_ind.vec())); // Create the CSRSparseMatrix object from its component Tensors and prepare // the Variant output Tensor. @@ -166,7 +166,7 @@ class SparseTensorToCSRSparseMatrixGPUOp : public AsyncOpKernel { const int64_t rows = dense_shape((rank == 2) ? 0 : 1); const int64_t cols = dense_shape((rank == 2) ? 1 : 2); - static constexpr int64_t kInt32Max = std::numeric_limits::max(); + static constexpr int64_t kInt32Max = std::numeric_limits::max(); OP_REQUIRES_ASYNC( c, batch_size < kInt32Max, errors::InvalidArgument("dense_shape batch_size must be < Int32Max," @@ -187,7 +187,7 @@ class SparseTensorToCSRSparseMatrixGPUOp : public AsyncOpKernel { (rows + 1) * batch_size), done); - ScratchSpace nnz_per_batch_host(c, batch_size, /*on_host*/ true); + ScratchSpace nnz_per_batch_host(c, batch_size, /*on_host*/ true); Tensor nnz_per_batch_device_t; if (rank == 2) { @@ -198,7 +198,7 @@ class SparseTensorToCSRSparseMatrixGPUOp : public AsyncOpKernel { c->allocate_temp(DT_INT32, TensorShape({batch_size}), &nnz_per_batch_device_t), done); - auto nnz_per_batch_device = nnz_per_batch_device_t.vec(); + auto nnz_per_batch_device = nnz_per_batch_device_t.vec(); functor::CalculateNNZPerBatchMatrixFromIndices calculate_nnz_from_indices; @@ -207,14 +207,14 @@ class SparseTensorToCSRSparseMatrixGPUOp : public AsyncOpKernel { c, calculate_nnz_from_indices(c, indices, nnz_per_batch_device), done); - stream_executor::DeviceMemoryBase nnz_per_batch_device_ptr( + stream_executor::DeviceAddressBase nnz_per_batch_device_ptr( static_cast(nnz_per_batch_device.data())); OP_REQUIRES_OK_ASYNC( c, stream->Memcpy(nnz_per_batch_host.mutable_data() /*host_dst*/, nnz_per_batch_device_ptr /*gpu_src*/, - batch_size * sizeof(int32) /*size*/), + batch_size * sizeof(int32_t) /*size*/), done); } @@ -227,7 +227,7 @@ class SparseTensorToCSRSparseMatrixGPUOp : public AsyncOpKernel { // tensor by the time we get here; we can unreference it. nnz_per_batch_device_ref.Unref(); - auto nnz_per_batch = nnz_per_batch_host.tensor().vec(); + auto nnz_per_batch = nnz_per_batch_host.tensor().vec(); // Ensure that within the callback, the proper GPU settings are // configured. @@ -237,7 +237,7 @@ class SparseTensorToCSRSparseMatrixGPUOp : public AsyncOpKernel { Tensor batch_ptr_t(cpu_allocator(), DT_INT32, TensorShape({batch_size + 1})); - auto batch_ptr = batch_ptr_t.vec(); + auto batch_ptr = batch_ptr_t.vec(); auto indices = indices_t.matrix(); batch_ptr(0) = 0; @@ -274,9 +274,9 @@ class SparseTensorToCSRSparseMatrixGPUOp : public AsyncOpKernel { &csr_row_ptr_t), done); - auto coo_row_ind = coo_row_ind_t.vec(); - auto coo_col_ind = coo_col_ind_t.vec(); - auto csr_row_ptr = csr_row_ptr_t.vec(); + auto coo_row_ind = coo_row_ind_t.vec(); + auto coo_col_ind = coo_col_ind_t.vec(); + auto csr_row_ptr = csr_row_ptr_t.vec(); // Convert SparseTensor rep to coo row ind, coo col ind. if (total_nnz > 0) { @@ -290,8 +290,8 @@ class SparseTensorToCSRSparseMatrixGPUOp : public AsyncOpKernel { // a bug if you have empty coo rows. // TODO(ebrevdo): File bug w/ nvidia so coo2csr can handle // zero-element input coo rows. - functor::SetZeroFunctor set_zero; - set_zero(d, csr_row_ptr_t.flat()); + functor::SetZeroFunctor set_zero; + set_zero(d, csr_row_ptr_t.flat()); functor::COOSparseMatrixToCSRSparseMatrix coo_to_csr; for (int i = 0; i < batch_size; ++i) { @@ -301,9 +301,9 @@ class SparseTensorToCSRSparseMatrixGPUOp : public AsyncOpKernel { // handled by the SetZero above. } else { // Convert coo to csr. - auto coo_row_ind_i = - TTypes::UnalignedVec(&coo_row_ind(batch_ptr(i)), nnz_i); - auto csr_row_ptr_i = TTypes::UnalignedVec( + auto coo_row_ind_i = TTypes::UnalignedVec( + &coo_row_ind(batch_ptr(i)), nnz_i); + auto csr_row_ptr_i = TTypes::UnalignedVec( &csr_row_ptr((rows + 1) * i), rows + 1); OP_REQUIRES_OK_ASYNC( c, coo_to_csr(c, rows, cols, coo_row_ind_i, csr_row_ptr_i), @@ -345,9 +345,9 @@ class SparseTensorToCSRSparseMatrixGPUOp : public AsyncOpKernel { namespace functor { template <> -Status CalculateNNZPerBatchMatrixFromIndices::operator()( +absl::Status CalculateNNZPerBatchMatrixFromIndices::operator()( OpKernelContext* c, TTypes::ConstMatrix indices, - TTypes::Vec nnz_per_batch); + TTypes::Vec nnz_per_batch); extern template struct CalculateNNZPerBatchMatrixFromIndices; template <> @@ -361,9 +361,9 @@ extern template struct SparseTensorToCOOSparseMatrix; template <> struct COOSparseMatrixToCSRSparseMatrix { - Status operator()(OpKernelContext* c, const int rows, const int cols, - TTypes::UnalignedVec coo_row_ind, - TTypes::UnalignedVec csr_row_ptr) { + absl::Status operator()(OpKernelContext* c, const int rows, const int cols, + TTypes::UnalignedVec coo_row_ind, + TTypes::UnalignedVec csr_row_ptr) { GpuSparse cuda_sparse(c); TF_RETURN_IF_ERROR(cuda_sparse.Initialize()); return cuda_sparse.Coo2csr(coo_row_ind.data(), diff --git a/tensorflow/core/kernels/sparse/transpose_op.cc b/tensorflow/core/kernels/sparse/transpose_op.cc index 74e0b85f393e40..234b00e5749593 100644 --- a/tensorflow/core/kernels/sparse/transpose_op.cc +++ b/tensorflow/core/kernels/sparse/transpose_op.cc @@ -182,9 +182,9 @@ absl::Status CSRSparseMatrixTranspose::operator()( // Set the output row pointers to zero, in case we hit any empty // input batches. - functor::SetZeroFunctor set_zero; + functor::SetZeroFunctor set_zero; const Device& d = ctx->eigen_device(); - set_zero(d, output_row_ptr_t.flat()); + set_zero(d, output_row_ptr_t.flat()); functor::CSRSparseMatrixTransposeComponent transpose_component; for (int i = 0; i < batch_size; ++i) { @@ -255,8 +255,8 @@ struct CSRSparseMatrixTransposeComponent { template struct CSRSparseMatrixTransposeComponent { - Status operator()(OpKernelContext* ctx, const ConstCSRComponent& x, - CSRComponent* y) { + absl::Status operator()(OpKernelContext* ctx, const ConstCSRComponent& x, + CSRComponent* y) { TF_RETURN_IF_ERROR(ValidateTransposeInputs(x, *y)); GpuSparse cuda_sparse(ctx); TF_RETURN_IF_ERROR(cuda_sparse.Initialize()); @@ -277,7 +277,7 @@ struct CSRSparseMatrixTransposeComponent { x.col_ind.data() /*csrColInd*/, y->values.data() /*cscVal*/, y->col_ind.data() /*cscRowInd*/, y->row_ptr.data() /*cscColPtr*/, copyValues); - return OkStatus(); + return absl::OkStatus(); } }; #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/tensorflow/core/kernels/sparse/zeros_op.h b/tensorflow/core/kernels/sparse/zeros_op.h index 2a86089e04e62e..8f6c09fdb0fa68 100644 --- a/tensorflow/core/kernels/sparse/zeros_op.h +++ b/tensorflow/core/kernels/sparse/zeros_op.h @@ -54,7 +54,7 @@ struct CSRSparseMatrixZeros { Tensor batch_ptr_t(cpu_allocator(), DT_INT32, TensorShape({batch_size + 1})); - batch_ptr_t.vec().setZero(); // On host. + batch_ptr_t.vec().setZero(); // On host. Allocator* allocator = c->device()->GetAllocator(AllocatorAttributes()); // An all-zeros CSR matrix is composed of an empty set of column @@ -66,10 +66,10 @@ struct CSRSparseMatrixZeros { Tensor coo_col_ind_t(allocator, DT_INT32, TensorShape({0})); Tensor csr_values_t(allocator, dtype, TensorShape({0})); const Device& d = c->eigen_device(); - functor::SetZeroFunctor set_zero; + functor::SetZeroFunctor set_zero; TF_RETURN_IF_ERROR(c->allocate_temp( DT_INT32, TensorShape({batch_size * (rows + 1)}), &csr_row_ptr_t)); - set_zero(d, csr_row_ptr_t.flat()); + set_zero(d, csr_row_ptr_t.flat()); TF_RETURN_IF_ERROR(CSRSparseMatrix::CreateCSRSparseMatrix( dtype, dense_shape_t, batch_ptr_t, csr_row_ptr_t, coo_col_ind_t, diff --git a/tensorflow/core/kernels/tensor_array_ops.cc b/tensorflow/core/kernels/tensor_array_ops.cc index bd2956c734a1b7..7bd5f5be719565 100644 --- a/tensorflow/core/kernels/tensor_array_ops.cc +++ b/tensorflow/core/kernels/tensor_array_ops.cc @@ -836,24 +836,24 @@ TF_CALL_COMPLEX_TYPES(REGISTER_GPU); REGISTER_KERNEL_BUILDER( Name("TensorArrayGather") .Device(DEVICE_GPU) - .TypeConstraint("dtype") + .TypeConstraint("dtype") .HostMemory("indices") .HostMemory("handle"), - TensorArrayPackOrGatherOp); + TensorArrayPackOrGatherOp); REGISTER_KERNEL_BUILDER( Name("TensorArrayGatherV2") .Device(DEVICE_GPU) - .TypeConstraint("dtype") + .TypeConstraint("dtype") .HostMemory("indices") .HostMemory("handle"), - TensorArrayPackOrGatherOp); + TensorArrayPackOrGatherOp); REGISTER_KERNEL_BUILDER( Name("TensorArrayGatherV3") .Device(DEVICE_GPU) - .TypeConstraint("dtype") + .TypeConstraint("dtype") .HostMemory("indices") .HostMemory("handle"), - TensorArrayPackOrGatherOp); + TensorArrayPackOrGatherOp); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -1050,22 +1050,22 @@ TF_CALL_COMPLEX_TYPES(REGISTER_GPU); // registration requires all int32 inputs and outputs to be in host memory. REGISTER_KERNEL_BUILDER(Name("TensorArrayConcat") .Device(DEVICE_GPU) - .TypeConstraint("dtype") + .TypeConstraint("dtype") .HostMemory("lengths") .HostMemory("handle"), - TensorArrayConcatOp); + TensorArrayConcatOp); REGISTER_KERNEL_BUILDER(Name("TensorArrayConcatV2") .Device(DEVICE_GPU) - .TypeConstraint("dtype") + .TypeConstraint("dtype") .HostMemory("lengths") .HostMemory("handle"), - TensorArrayConcatOp); + TensorArrayConcatOp); REGISTER_KERNEL_BUILDER(Name("TensorArrayConcatV3") .Device(DEVICE_GPU) - .TypeConstraint("dtype") + .TypeConstraint("dtype") .HostMemory("lengths") .HostMemory("handle"), - TensorArrayConcatOp); + TensorArrayConcatOp); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/tensorflow/core/kernels/unique_op.cc b/tensorflow/core/kernels/unique_op.cc index b1207aeea7f674..b23b7a1b4d4e81 100644 --- a/tensorflow/core/kernels/unique_op.cc +++ b/tensorflow/core/kernels/unique_op.cc @@ -83,10 +83,10 @@ class UniqueOp : public OpKernel { // TODO(dga): Make unique polymorphic for returning int32 and int64 // vectors to support large tensors. OP_REQUIRES(context, - input.NumElements() <= std::numeric_limits::max(), + input.NumElements() <= std::numeric_limits::max(), errors::InvalidArgument( "unique does not support input tensors larger than ", - std::numeric_limits::max(), " elements")); + std::numeric_limits::max(), " elements")); int64_t axis = 0; std::vector new_sizes{1, input.NumElements(), 1}; @@ -115,7 +115,7 @@ class UniqueOp : public OpKernel { "axis tensor should be int32 or int64, but got ", DataTypeString(axis_tensor.dtype()))); if (axis_tensor.dtype() == DT_INT32) { - axis = internal::SubtleMustCopy(axis_tensor.scalar()()); + axis = internal::SubtleMustCopy(axis_tensor.scalar()()); } else { axis = internal::SubtleMustCopy(axis_tensor.scalar()()); } diff --git a/tensorflow/core/kernels/unique_op_test.cc b/tensorflow/core/kernels/unique_op_test.cc index e21c0bfad6ae52..b870921666bd83 100644 --- a/tensorflow/core/kernels/unique_op_test.cc +++ b/tensorflow/core/kernels/unique_op_test.cc @@ -84,7 +84,7 @@ void BM_Unique_INT32(::testing::benchmark::State& state) { "SINGLE_THREADED_EXECUTOR", /*old_benchmark_api*/ false) .Run(state); state.SetBytesProcessed(static_cast(state.iterations()) * dim * - sizeof(int32)); + sizeof(int32_t)); } void BM_Unique_INT32_Repeat(::testing::benchmark::State& state) { @@ -108,7 +108,7 @@ void BM_Unique_INT32_Repeat(::testing::benchmark::State& state) { "SINGLE_THREADED_EXECUTOR", /*old_benchmark_api*/ false) .Run(state); state.SetBytesProcessed(static_cast(state.iterations()) * dim * 200 * - sizeof(int32)); + sizeof(int32_t)); } TensorProto GetRandomStringsTensorProto(int dim, int max_str_len) { @@ -118,7 +118,7 @@ TensorProto GetRandomStringsTensorProto(int dim, int max_str_len) { tensor_proto.mutable_tensor_shape()->set_unknown_rank(false); for (int i = 0; i < dim; ++i) { const int len = std::rand() % max_str_len + 1; - string rand_str; + std::string rand_str; rand_str.resize(len); for (int j = 0; j < len; ++j) { rand_str[j] = static_cast(j % 256); diff --git a/tensorflow/core/kernels/variable_ops.cc b/tensorflow/core/kernels/variable_ops.cc index 25eb23de84e177..a43beaecc040be 100644 --- a/tensorflow/core/kernels/variable_ops.cc +++ b/tensorflow/core/kernels/variable_ops.cc @@ -29,8 +29,8 @@ namespace { // Makes a unique name for a temporary variable inside a while loop body, // because loop can be executed in multiple iterations in parallel. -string TemporaryVariableName(const string& var_name, - const FrameAndIter& control_frame) { +std::string TemporaryVariableName(const std::string& var_name, + const FrameAndIter& control_frame) { if (control_frame.frame_id != kIllegalFrameId && control_frame.iter_id != kIllegalIterId) { return strings::StrCat(var_name, "/frame:", control_frame.frame_id, @@ -53,7 +53,7 @@ class LegacyVar : public ResourceBase { mutex* mu() { return &mu_; } Tensor* tensor() { return &tensor_; } - string DebugString() const override { + std::string DebugString() const override { return absl::StrCat(DataTypeString(tensor_.dtype()), "/", tensor_.shape().DebugString()); } @@ -130,14 +130,14 @@ class TemporaryVariableOp : public OpKernel { struct TmpVar : public ResourceBase { mutex mu; Tensor val; - string name; - string DebugString() const override { return name; } + std::string name; + std::string DebugString() const override { return name; } ~TmpVar() override { VLOG(3) << "TmpVar " << name << " deleted"; } }; TensorShape shape_; DataType dtype_; - string var_name_; + std::string var_name_; }; class DestroyTemporaryVariableOp : public OpKernel { @@ -171,7 +171,7 @@ class DestroyTemporaryVariableOp : public OpKernel { } private: - string var_name_; + std::string var_name_; }; class IsVariableInitializedOp : public OpKernel { diff --git a/tensorflow/core/kernels/variable_ops_test.cc b/tensorflow/core/kernels/variable_ops_test.cc index 0a814aab1db9fe..6ed93a0e643f2f 100644 --- a/tensorflow/core/kernels/variable_ops_test.cc +++ b/tensorflow/core/kernels/variable_ops_test.cc @@ -31,7 +31,7 @@ namespace { void ManyManyVariablesHelper(int threads, int variables, ::testing::benchmark::State& state) { Graph g(OpRegistry::Global()); - std::vector targets; + std::vector targets; for (int i = 0; i < variables; ++i) { Node* v; TF_CHECK_OK( diff --git a/tensorflow/core/kernels/where_op.cc b/tensorflow/core/kernels/where_op.cc index 1421e24cbb0fdd..42c89f61ff3f48 100644 --- a/tensorflow/core/kernels/where_op.cc +++ b/tensorflow/core/kernels/where_op.cc @@ -259,8 +259,8 @@ class WhereGPUOp : public AsyncOpKernel { const Tensor& input = context->input(0); const int input_dims = input.dims(); - if (input.NumElements() < std::numeric_limits::max()) { - ComputeAsyncType(input, input_dims, context, done); + if (input.NumElements() < std::numeric_limits::max()) { + ComputeAsyncType(input, input_dims, context, done); } else { ComputeAsyncType(input, input_dims, context, done); } @@ -282,7 +282,7 @@ class WhereGPUOp : public AsyncOpKernel { // Push kernel to stream to get number of true elements. const GPUDevice& d = context->eigen_device(); - Status s = functor::NumTrue::Compute( + absl::Status s = functor::NumTrue::Compute( context, d, input.flat(), num_true_t); OP_REQUIRES_OK_ASYNC(context, s, done); @@ -374,9 +374,9 @@ TF_CALL_WHERE_GPU_TYPES(REGISTER_GPU_WHERE_OP); REGISTER_KERNEL_BUILDER(Name("Where") .Device(DEVICE_DEFAULT) - .TypeConstraint("T") + .TypeConstraint("T") .HostMemory("input") .HostMemory("index"), - WhereCPUOp); + WhereCPUOp); } // namespace tensorflow diff --git a/tensorflow/core/kernels/while_op_test.cc b/tensorflow/core/kernels/while_op_test.cc index b7f5af047b8186..36c68d2b14e508 100644 --- a/tensorflow/core/kernels/while_op_test.cc +++ b/tensorflow/core/kernels/while_op_test.cc @@ -123,7 +123,8 @@ TEST_F(WhileOpTest, WhileOpCPUBuildWithPluggableDevice) { #if EIGEN_MAX_ALIGN_BYTES == 0 return malloc(size); #else - return tensorflow::port::AlignedMalloc(size, EIGEN_MAX_ALIGN_BYTES); + return tsl::port::AlignedMalloc( + size, static_cast(EIGEN_MAX_ALIGN_BYTES)); #endif }; se_.host_memory_deallocate = [](const SP_Device* const device, void* mem) { @@ -136,7 +137,8 @@ TEST_F(WhileOpTest, WhileOpCPUBuildWithPluggableDevice) { #if EIGEN_MAX_ALIGN_BYTES == 0 mem->opaque = malloc(size); #else - mem->opaque = tensorflow::port::AlignedMalloc(size, EIGEN_MAX_ALIGN_BYTES); + mem->opaque = tsl::port::AlignedMalloc( + size, static_cast(EIGEN_MAX_ALIGN_BYTES)); #endif mem->size = size; }; diff --git a/tensorflow/core/kernels/whole_file_read_ops.cc b/tensorflow/core/kernels/whole_file_read_ops.cc index c5c8e548a7592f..884c7725f43d6b 100644 --- a/tensorflow/core/kernels/whole_file_read_ops.cc +++ b/tensorflow/core/kernels/whole_file_read_ops.cc @@ -35,7 +35,7 @@ limitations under the License. namespace tensorflow { template -static absl::Status ReadEntireFile(Env* env, const string& filename, +static absl::Status ReadEntireFile(Env* env, const std::string& filename, T* contents) { std::unique_ptr file; TF_RETURN_IF_ERROR(env->NewRandomAccessFile(filename, &file)); @@ -47,7 +47,7 @@ static absl::Status ReadEntireFile(Env* env, const string& filename, class WholeFileReader : public ReaderBase { public: - WholeFileReader(Env* env, const string& node_name) + WholeFileReader(Env* env, const std::string& node_name) : ReaderBase(absl::StrCat("WholeFileReader '", node_name, "'")), env_(env) {} @@ -136,8 +136,8 @@ class WriteFileOp : public OpKernel { errors::InvalidArgument( "Contents tensor must be scalar, but had shape: ", contents_input->shape().DebugString())); - const string& filename = filename_input->scalar()(); - const string dir(io::Dirname(filename)); + const std::string& filename = filename_input->scalar()(); + const std::string dir(io::Dirname(filename)); if (!context->env()->FileExists(dir).ok()) { OP_REQUIRES_OK(context, context->env()->RecursivelyCreateDir(dir)); } diff --git a/tensorflow/core/lib/core/arena.h b/tensorflow/core/lib/core/arena.h index 14d80422496bd2..d5f6c765e51dd0 100644 --- a/tensorflow/core/lib/core/arena.h +++ b/tensorflow/core/lib/core/arena.h @@ -62,7 +62,7 @@ class Arena { protected: bool SatisfyAlignment(const size_t alignment); - void MakeNewBlock(const uint32 alignment); + void MakeNewBlock(const uint32_t alignment); void* GetMemoryFallback(const size_t size, const int align); void* GetMemory(const size_t size, const int align) { assert(remaining_ <= block_size_); // an invariant @@ -88,7 +88,7 @@ class Arena { // The returned AllocatedBlock* is valid until the next call to AllocNewBlock // or Reset (i.e. anything that might affect overflow_blocks_). AllocatedBlock* AllocNewBlock(const size_t block_size, - const uint32 alignment); + const uint32_t alignment); const size_t block_size_; char* freestart_; // beginning of the free space in most recent block diff --git a/tensorflow/core/lib/core/coding_test.cc b/tensorflow/core/lib/core/coding_test.cc index 9efe3d8ec10f2c..4769cddaca0906 100644 --- a/tensorflow/core/lib/core/coding_test.cc +++ b/tensorflow/core/lib/core/coding_test.cc @@ -22,46 +22,46 @@ namespace tensorflow { namespace core { TEST(Coding, Fixed16) { - static const uint16 N = 50000; + static const uint16_t N = 50000; - string s; - for (uint16 v = 0; v < N; v++) { - char buf[sizeof(uint16)]; + std::string s; + for (uint16_t v = 0; v < N; v++) { + char buf[sizeof(uint16_t)]; EncodeFixed16(buf, v); s.append(buf, sizeof(buf)); } const char* p = s.data(); - for (uint16 v = 0; v < N; v++) { - uint16 actual = DecodeFixed16(p); + for (uint16_t v = 0; v < N; v++) { + uint16_t actual = DecodeFixed16(p); ASSERT_EQ(v, actual); - p += sizeof(uint16); + p += sizeof(uint16_t); } } TEST(Coding, Fixed32) { - static const uint32 N = 100000; + static const uint32_t N = 100000; - string s; - for (uint32 v = 0; v < N; v++) { - char buf[sizeof(uint32)]; + std::string s; + for (uint32_t v = 0; v < N; v++) { + char buf[sizeof(uint32_t)]; EncodeFixed32(buf, v); s.append(buf, sizeof(buf)); } const char* p = s.data(); - for (uint32 v = 0; v < N; v++) { - uint32 actual = DecodeFixed32(p); + for (uint32_t v = 0; v < N; v++) { + uint32_t actual = DecodeFixed32(p); ASSERT_EQ(v, actual); - p += sizeof(uint32); + p += sizeof(uint32_t); } } TEST(Coding, Fixed64) { - string s; + std::string s; for (int power = 0; power <= 63; power++) { - uint64 v = static_cast(1) << power; - char buf[sizeof(uint64)]; + uint64_t v = static_cast(1) << power; + char buf[sizeof(uint64_t)]; EncodeFixed64(buf, v - 1); s.append(buf, sizeof(buf)); EncodeFixed64(buf, v + 0); @@ -72,19 +72,19 @@ TEST(Coding, Fixed64) { const char* p = s.data(); for (int power = 0; power <= 63; power++) { - uint64 v = static_cast(1) << power; - uint64 actual; + uint64_t v = static_cast(1) << power; + uint64_t actual; actual = DecodeFixed64(p); ASSERT_EQ(v - 1, actual); - p += sizeof(uint64); + p += sizeof(uint64_t); actual = DecodeFixed64(p); ASSERT_EQ(v + 0, actual); - p += sizeof(uint64); + p += sizeof(uint64_t); actual = DecodeFixed64(p); ASSERT_EQ(v + 1, actual); - p += sizeof(uint64); + p += sizeof(uint64_t); } } @@ -113,17 +113,17 @@ TEST(Coding, EncodingOutput) { } TEST(Coding, Varint32) { - string s; - for (uint32 i = 0; i < (32 * 32); i++) { - uint32 v = (i / 32) << (i % 32); + std::string s; + for (uint32_t i = 0; i < (32 * 32); i++) { + uint32_t v = (i / 32) << (i % 32); PutVarint32(&s, v); } const char* p = s.data(); const char* limit = p + s.size(); - for (uint32 i = 0; i < (32 * 32); i++) { - uint32 expected = (i / 32) << (i % 32); - uint32 actual; + for (uint32_t i = 0; i < (32 * 32); i++) { + uint32_t expected = (i / 32) << (i % 32); + uint32_t actual; p = GetVarint32Ptr(p, limit, &actual); ASSERT_TRUE(p != nullptr); ASSERT_EQ(expected, actual); @@ -133,21 +133,21 @@ TEST(Coding, Varint32) { TEST(Coding, Varint64) { // Construct the list of values to check - std::vector values; + std::vector values; // Some special values values.push_back(0); values.push_back(100); - values.push_back(~static_cast(0)); - values.push_back(~static_cast(0) - 1); - for (uint32 k = 0; k < 64; k++) { + values.push_back(~static_cast(0)); + values.push_back(~static_cast(0) - 1); + for (uint32_t k = 0; k < 64; k++) { // Test values near powers of two - const uint64 power = 1ull << k; + const uint64_t power = 1ull << k; values.push_back(power); values.push_back(power - 1); values.push_back(power + 1); } - string s; + std::string s; for (size_t i = 0; i < values.size(); i++) { PutVarint64(&s, values[i]); } @@ -156,7 +156,7 @@ TEST(Coding, Varint64) { const char* limit = p + s.size(); for (size_t i = 0; i < values.size(); i++) { ASSERT_TRUE(p < limit); - uint64 actual; + uint64_t actual; p = GetVarint64Ptr(p, limit, &actual); ASSERT_TRUE(p != nullptr); ASSERT_EQ(values[i], actual); @@ -165,17 +165,17 @@ TEST(Coding, Varint64) { } TEST(Coding, Varint32Overflow) { - uint32 result; - string input("\x81\x82\x83\x84\x85\x11"); + uint32_t result; + std::string input("\x81\x82\x83\x84\x85\x11"); ASSERT_TRUE(GetVarint32Ptr(input.data(), input.data() + input.size(), &result) == nullptr); } TEST(Coding, Varint32Truncation) { - uint32 large_value = (1u << 31) + 100; - string s; + uint32_t large_value = (1u << 31) + 100; + std::string s; PutVarint32(&s, large_value); - uint32 result; + uint32_t result; for (size_t len = 0; len < s.size() - 1; len++) { ASSERT_TRUE(GetVarint32Ptr(s.data(), s.data() + len, &result) == nullptr); } @@ -185,17 +185,17 @@ TEST(Coding, Varint32Truncation) { } TEST(Coding, Varint64Overflow) { - uint64 result; - string input("\x81\x82\x83\x84\x85\x81\x82\x83\x84\x85\x11"); + uint64_t result; + std::string input("\x81\x82\x83\x84\x85\x81\x82\x83\x84\x85\x11"); ASSERT_TRUE(GetVarint64Ptr(input.data(), input.data() + input.size(), &result) == nullptr); } TEST(Coding, Varint64Truncation) { - uint64 large_value = (1ull << 63) + 100ull; - string s; + uint64_t large_value = (1ull << 63) + 100ull; + std::string s; PutVarint64(&s, large_value); - uint64 result; + uint64_t result; for (size_t len = 0; len < s.size() - 1; len++) { ASSERT_TRUE(GetVarint64Ptr(s.data(), s.data() + len, &result) == nullptr); } diff --git a/tensorflow/core/lib/jpeg/jpeg_mem.cc b/tensorflow/core/lib/jpeg/jpeg_mem.cc index bb729bf99272cc..85c7ef3d268372 100644 --- a/tensorflow/core/lib/jpeg/jpeg_mem.cc +++ b/tensorflow/core/lib/jpeg/jpeg_mem.cc @@ -58,7 +58,7 @@ class FewerArgsForCompiler { public: FewerArgsForCompiler(int datasize, const UncompressFlags& flags, int64_t* nwarn, - std::function allocate_output) + std::function allocate_output) : datasize_(datasize), flags_(flags), pnwarn_(nwarn), @@ -72,7 +72,7 @@ class FewerArgsForCompiler { const int datasize_; const UncompressFlags flags_; int64_t* const pnwarn_; - std::function allocate_output_; + std::function allocate_output_; int height_read_; // number of scanline lines successfully read int height_; int stride_; @@ -95,7 +95,7 @@ bool IsCropWindowValid(const UncompressFlags& flags, int input_image_width, void no_print(j_common_ptr cinfo) {} #endif -uint8* UncompressLow(const void* srcdata, FewerArgsForCompiler* argball) { +uint8_t* UncompressLow(const void* srcdata, FewerArgsForCompiler* argball) { // unpack the argball const int datasize = argball->datasize_; const auto& flags = argball->flags_; @@ -252,8 +252,8 @@ uint8* UncompressLow(const void* srcdata, FewerArgsForCompiler* argball) { target_output_height, components); } #else - uint8* dstdata = argball->allocate_output_(target_output_width, - target_output_height, components); + uint8_t* dstdata = argball->allocate_output_( + target_output_width, target_output_height, components); #endif if (dstdata == nullptr) { jpeg_destroy_decompress(&cinfo); @@ -509,12 +509,12 @@ uint8* UncompressLow(const void* srcdata, FewerArgsForCompiler* argball) { // associated libraries aren't good enough to guarantee that 7 // parameters won't get clobbered by the longjmp. So we help // it out a little. -uint8* Uncompress(const void* srcdata, int datasize, - const UncompressFlags& flags, int64_t* nwarn, - std::function allocate_output) { +uint8_t* Uncompress(const void* srcdata, int datasize, + const UncompressFlags& flags, int64_t* nwarn, + std::function allocate_output) { FewerArgsForCompiler argball(datasize, flags, nwarn, std::move(allocate_output)); - uint8* const dstdata = UncompressLow(srcdata, &argball); + uint8_t* const dstdata = UncompressLow(srcdata, &argball); const float fraction_read = argball.height_ == 0 @@ -530,7 +530,7 @@ uint8* Uncompress(const void* srcdata, int datasize, // set the unread pixels to black if (argball.height_read_ != argball.height_) { const int first_bad_line = argball.height_read_; - uint8* start = dstdata + first_bad_line * argball.stride_; + uint8_t* start = dstdata + first_bad_line * argball.stride_; const int nbytes = (argball.height_ - first_bad_line) * argball.stride_; memset(static_cast(start), 0, nbytes); } @@ -538,17 +538,17 @@ uint8* Uncompress(const void* srcdata, int datasize, return dstdata; } -uint8* Uncompress(const void* srcdata, int datasize, - const UncompressFlags& flags, int* pwidth, int* pheight, - int* pcomponents, int64_t* nwarn) { - uint8* buffer = nullptr; - uint8* result = +uint8_t* Uncompress(const void* srcdata, int datasize, + const UncompressFlags& flags, int* pwidth, int* pheight, + int* pcomponents, int64_t* nwarn) { + uint8_t* buffer = nullptr; + uint8_t* result = Uncompress(srcdata, datasize, flags, nwarn, [=, &buffer](int width, int height, int components) { if (pwidth != nullptr) *pwidth = width; if (pheight != nullptr) *pheight = height; if (pcomponents != nullptr) *pcomponents = components; - buffer = new uint8[height * width * components]; + buffer = new uint8_t[height * width * components]; return buffer; }); if (!result) delete[] buffer; @@ -599,7 +599,7 @@ bool GetImageInfo(const void* srcdata, int datasize, int* width, int* height, // Compression namespace { -bool CompressInternal(const uint8* srcdata, int width, int height, +bool CompressInternal(const uint8_t* srcdata, int width, int height, const CompressFlags& flags, tstring* output) { if (output == nullptr) { LOG(ERROR) << "Output buffer is null: "; @@ -711,7 +711,7 @@ bool CompressInternal(const uint8* srcdata, int width, int height, if (!flags.xmp_metadata.empty()) { // XMP metadata is embedded in the APP1 tag of JPEG and requires this // namespace header string (null-terminated) - const string name_space = "http://ns.adobe.com/xap/1.0/"; + const std::string name_space = "http://ns.adobe.com/xap/1.0/"; const int name_space_length = name_space.size(); const int metadata_length = flags.xmp_metadata.size(); const int packet_length = metadata_length + name_space_length + 1; @@ -736,8 +736,8 @@ bool CompressInternal(const uint8* srcdata, int width, int height, new JSAMPLE[width * cinfo.input_components]); while (cinfo.next_scanline < cinfo.image_height) { JSAMPROW row_pointer[1]; // pointer to JSAMPLE row[s] - const uint8* r = &srcdata[cinfo.next_scanline * in_stride]; - uint8* p = static_cast(row_temp.get()); + const uint8_t* r = &srcdata[cinfo.next_scanline * in_stride]; + uint8_t* p = static_cast(row_temp.get()); switch (flags.format) { case FORMAT_RGBA: { for (int i = 0; i < width; ++i, p += 3, r += 4) { @@ -777,14 +777,14 @@ bool CompressInternal(const uint8* srcdata, int width, int height, bool Compress(const void* srcdata, int width, int height, const CompressFlags& flags, tstring* output) { - return CompressInternal(static_cast(srcdata), width, height, + return CompressInternal(static_cast(srcdata), width, height, flags, output); } tstring Compress(const void* srcdata, int width, int height, const CompressFlags& flags) { tstring temp; - CompressInternal(static_cast(srcdata), width, height, flags, + CompressInternal(static_cast(srcdata), width, height, flags, &temp); // If CompressInternal fails, temp will be empty. return temp; diff --git a/tensorflow/core/lib/jpeg/jpeg_mem.h b/tensorflow/core/lib/jpeg/jpeg_mem.h index 859c4702fd09fa..569abb6b79bf74 100644 --- a/tensorflow/core/lib/jpeg/jpeg_mem.h +++ b/tensorflow/core/lib/jpeg/jpeg_mem.h @@ -87,19 +87,19 @@ struct UncompressFlags { // The function returns a pointer to the raw uncompressed data or NULL if // there was an error. The caller of the function is responsible for // freeing the memory (using delete []). -uint8* Uncompress(const void* srcdata, int datasize, - const UncompressFlags& flags, int* width, int* height, - int* components, // Output only: useful with autodetect - int64_t* nwarn); +uint8_t* Uncompress(const void* srcdata, int datasize, + const UncompressFlags& flags, int* width, int* height, + int* components, // Output only: useful with autodetect + int64_t* nwarn); // Version of Uncompress that allocates memory via a callback. The callback // arguments are (width, height, components). If the size is known ahead of // time this function can return an existing buffer; passing a callback allows // the buffer to be shaped based on the JPEG header. The caller is responsible // for freeing the memory *even along error paths*. -uint8* Uncompress(const void* srcdata, int datasize, - const UncompressFlags& flags, int64_t* nwarn, - std::function allocate_output); +uint8_t* Uncompress(const void* srcdata, int datasize, + const UncompressFlags& flags, int64_t* nwarn, + std::function allocate_output); // Read jpeg header and get image information. Returns true on success. // The width, height, and components points may be null. diff --git a/tensorflow/core/lib/jpeg/jpeg_mem_unittest.cc b/tensorflow/core/lib/jpeg/jpeg_mem_unittest.cc index 0248a453d5586f..a8c5401bf52e01 100644 --- a/tensorflow/core/lib/jpeg/jpeg_mem_unittest.cc +++ b/tensorflow/core/lib/jpeg/jpeg_mem_unittest.cc @@ -41,12 +41,12 @@ namespace { const char kTestData[] = "tensorflow/core/lib/jpeg/testdata/"; -int ComputeSumAbsoluteDifference(const uint8* a, const uint8* b, int width, +int ComputeSumAbsoluteDifference(const uint8_t* a, const uint8_t* b, int width, int height, int a_stride, int b_stride) { int totalerr = 0; for (int i = 0; i < height; i++) { - const uint8* const pa = a + i * a_stride; - const uint8* const pb = b + i * b_stride; + const uint8_t* const pa = a + i * a_stride; + const uint8_t* const pb = b + i * b_stride; for (int j = 0; j < 3 * width; j++) { totalerr += abs(static_cast(pa[j]) - static_cast(pb[j])); } @@ -55,20 +55,21 @@ int ComputeSumAbsoluteDifference(const uint8* a, const uint8* b, int width, } // Reads the contents of the file into output -void ReadFileToStringOrDie(Env* env, const string& filename, string* output) { +void ReadFileToStringOrDie(Env* env, const std::string& filename, + std::string* output) { TF_CHECK_OK(ReadFileToString(env, filename, output)); } -void TestJPEG(Env* env, const string& jpegfile) { +void TestJPEG(Env* env, const std::string& jpegfile) { // Read the data from the jpeg file into memory - string jpeg; + std::string jpeg; ReadFileToStringOrDie(env, jpegfile, &jpeg); const int fsize = jpeg.size(); - const uint8* const temp = absl::bit_cast(jpeg.data()); + const uint8_t* const temp = absl::bit_cast(jpeg.data()); // Try partial decoding (half of the data) int w, h, c; - std::unique_ptr imgdata; + std::unique_ptr imgdata; UncompressFlags flags; flags.components = 3; @@ -91,7 +92,7 @@ void TestJPEG(Env* env, const string& jpegfile) { TEST(JpegMemTest, Jpeg) { Env* env = Env::Default(); - const string data_path = kTestData; + const std::string data_path = kTestData; // Name of a valid jpeg file on the disk TestJPEG(env, data_path + "jpeg_merge_test1.jpg"); @@ -100,16 +101,16 @@ TEST(JpegMemTest, Jpeg) { TestJPEG(env, data_path + "jpeg_merge_test1_cmyk.jpg"); } -void TestCropAndDecodeJpeg(Env* env, const string& jpegfile, +void TestCropAndDecodeJpeg(Env* env, const std::string& jpegfile, const UncompressFlags& default_flags) { // Read the data from the jpeg file into memory - string jpeg; + std::string jpeg; ReadFileToStringOrDie(env, jpegfile, &jpeg); const int fsize = jpeg.size(); - const auto* temp = absl::bit_cast(jpeg.data()); + const auto* temp = absl::bit_cast(jpeg.data()); // Decode the whole image. - std::unique_ptr imgdata1; + std::unique_ptr imgdata1; int w1, h1, c1; { UncompressFlags flags = default_flags; @@ -119,13 +120,13 @@ void TestCropAndDecodeJpeg(Env* env, const string& jpegfile, // If stride is not zero, the default allocator would fail because it // allocate w*h*c bytes, but the actual required bytes should be stride*h. // Therefore, we provide a specialized allocator here. - uint8* buffer = nullptr; + uint8_t* buffer = nullptr; imgdata1.reset(Uncompress(temp, fsize, flags, nullptr, [&](int width, int height, int components) { w1 = width; h1 = height; c1 = components; - buffer = new uint8[flags.stride * height]; + buffer = new uint8_t[flags.stride * height]; return buffer; })); } @@ -134,7 +135,7 @@ void TestCropAndDecodeJpeg(Env* env, const string& jpegfile, auto check_crop_and_decode_func = [&](int crop_x, int crop_y, int crop_width, int crop_height) { - std::unique_ptr imgdata2; + std::unique_ptr imgdata2; int w, h, c; UncompressFlags flags = default_flags; flags.crop = true; @@ -145,13 +146,13 @@ void TestCropAndDecodeJpeg(Env* env, const string& jpegfile, if (flags.stride == 0) { imgdata2.reset(Uncompress(temp, fsize, flags, &w, &h, &c, nullptr)); } else { - uint8* buffer = nullptr; + uint8_t* buffer = nullptr; imgdata2.reset(Uncompress(temp, fsize, flags, nullptr, [&](int width, int height, int components) { w = width; h = height; c = components; - buffer = new uint8[flags.stride * height]; + buffer = new uint8_t[flags.stride * height]; return buffer; })); } @@ -164,8 +165,8 @@ void TestCropAndDecodeJpeg(Env* env, const string& jpegfile, const int stride1 = (flags.stride != 0) ? flags.stride : w1 * c; const int stride2 = (flags.stride != 0) ? flags.stride : w * c; for (int i = 0; i < crop_height; i++) { - const uint8* p1 = &imgdata1[(i + crop_y) * stride1 + crop_x * c]; - const uint8* p2 = &imgdata2[i * stride2]; + const uint8_t* p1 = &imgdata1[(i + crop_y) * stride1 + crop_x * c]; + const uint8_t* p2 = &imgdata2[i * stride2]; for (int j = 0; j < c * w; j++) { ASSERT_EQ(p1[j], p2[j]) @@ -185,7 +186,7 @@ void TestCropAndDecodeJpeg(Env* env, const string& jpegfile, TEST(JpegMemTest, CropAndDecodeJpeg) { Env* env = Env::Default(); - const string data_path = kTestData; + const std::string data_path = kTestData; UncompressFlags flags; // Test basic flags for jpeg and cmyk jpeg. @@ -195,7 +196,7 @@ TEST(JpegMemTest, CropAndDecodeJpeg) { TEST(JpegMemTest, CropAndDecodeJpegWithRatio) { Env* env = Env::Default(); - const string data_path = kTestData; + const std::string data_path = kTestData; UncompressFlags flags; for (int ratio : {1, 2, 4, 8}) { flags.ratio = ratio; @@ -205,7 +206,7 @@ TEST(JpegMemTest, CropAndDecodeJpegWithRatio) { TEST(JpegMemTest, CropAndDecodeJpegWithComponents) { Env* env = Env::Default(); - const string data_path = kTestData; + const std::string data_path = kTestData; UncompressFlags flags; for (const int components : {0, 1, 3}) { flags.components = components; @@ -215,7 +216,7 @@ TEST(JpegMemTest, CropAndDecodeJpegWithComponents) { TEST(JpegMemTest, CropAndDecodeJpegWithUpScaling) { Env* env = Env::Default(); - const string data_path = kTestData; + const std::string data_path = kTestData; UncompressFlags flags; flags.fancy_upscaling = true; TestCropAndDecodeJpeg(env, data_path + "jpeg_merge_test1.jpg", flags); @@ -223,13 +224,13 @@ TEST(JpegMemTest, CropAndDecodeJpegWithUpScaling) { TEST(JpegMemTest, CropAndDecodeJpegWithStride) { Env* env = Env::Default(); - const string data_path = kTestData; + const std::string data_path = kTestData; // Read the data from the jpeg file into memory - string jpeg; + std::string jpeg; ReadFileToStringOrDie(env, data_path + "jpeg_merge_test1.jpg", &jpeg); const int fsize = jpeg.size(); - const auto* temp = absl::bit_cast(jpeg.data()); + const auto* temp = absl::bit_cast(jpeg.data()); int w, h, c; ASSERT_TRUE(GetImageInfo(temp, fsize, &w, &h, &c)); @@ -244,9 +245,9 @@ TEST(JpegMemTest, CropAndDecodeJpegWithStride) { TestCropAndDecodeJpeg(env, data_path + "jpeg_merge_test1.jpg", flags); } -void CheckInvalidCropWindowFailed(const uint8* const temp, int fsize, int x, +void CheckInvalidCropWindowFailed(const uint8_t* const temp, int fsize, int x, int y, int w, int h) { - std::unique_ptr imgdata; + std::unique_ptr imgdata; int ww, hh, cc; UncompressFlags flags; flags.components = 3; @@ -261,13 +262,13 @@ void CheckInvalidCropWindowFailed(const uint8* const temp, int fsize, int x, TEST(JpegMemTest, CropAndDecodeJpegWithInvalidCropWindow) { Env* env = Env::Default(); - const string data_path = kTestData; + const std::string data_path = kTestData; // Read the data from the jpeg file into memory - string jpeg; + std::string jpeg; ReadFileToStringOrDie(env, data_path + "jpeg_merge_test1.jpg", &jpeg); const int fsize = jpeg.size(); - const auto* temp = absl::bit_cast(jpeg.data()); + const auto* temp = absl::bit_cast(jpeg.data()); int w, h, c; ASSERT_TRUE(GetImageInfo(temp, fsize, &w, &h, &c)); @@ -293,27 +294,27 @@ TEST(JpegMemTest, Jpeg2) { const int in_w = 256; const int in_h = 256; const int stride1 = 3 * in_w; - const std::unique_ptr refdata1(new uint8[stride1 * in_h]); + const std::unique_ptr refdata1(new uint8_t[stride1 * in_h]); for (int i = 0; i < in_h; i++) { for (int j = 0; j < in_w; j++) { const int offset = i * stride1 + 3 * j; refdata1[offset + 0] = i; refdata1[offset + 1] = j; - refdata1[offset + 2] = static_cast((i + j) >> 1); + refdata1[offset + 2] = static_cast((i + j) >> 1); } } // duplicate with weird input stride const int stride2 = 3 * 357; - const std::unique_ptr refdata2(new uint8[stride2 * in_h]); + const std::unique_ptr refdata2(new uint8_t[stride2 * in_h]); for (int i = 0; i < in_h; i++) { memcpy(&refdata2[i * stride2], &refdata1[i * stride1], 3 * in_w); } // Test compression - string cpdata1, cpdata2; + std::string cpdata1, cpdata2; { - const string kXMP = "XMP_TEST_123"; + const std::string kXMP = "XMP_TEST_123"; // Compress it to JPEG CompressFlags flags; @@ -327,7 +328,7 @@ TEST(JpegMemTest, Jpeg2) { CHECK_EQ(cpdata1, cpdata2); // Verify valid XMP. - CHECK_NE(string::npos, cpdata1.find(kXMP)); + CHECK_NE(std::string::npos, cpdata1.find(kXMP)); // Test the other API, where a storage string is supplied tstring cptest; @@ -340,7 +341,7 @@ TEST(JpegMemTest, Jpeg2) { } // Uncompress twice: once with 3 components and once with autodetect. - std::unique_ptr imgdata1; + std::unique_ptr imgdata1; for (const int components : {0, 3}) { // Uncompress it UncompressFlags flags; @@ -366,7 +367,7 @@ TEST(JpegMemTest, Jpeg2) { { UncompressFlags flags; flags.stride = 3 * 411; - const std::unique_ptr imgdata2(new uint8[flags.stride * in_h]); + const std::unique_ptr imgdata2(new uint8_t[flags.stride * in_h]); CHECK(imgdata2.get() == Uncompress(cpdata2.c_str(), cpdata2.length(), flags, nullptr /* nwarn */, [=, &imgdata2](int w, int h, int c) { @@ -404,7 +405,7 @@ TEST(JpegMemTest, Jpeg2) { // Takes JPEG data and reads its headers to determine whether or not the JPEG // was chroma downsampled. -bool IsChromaDownsampled(const string& jpegdata) { +bool IsChromaDownsampled(const std::string& jpegdata) { // Initialize libjpeg structures to have a memory source // Modify the usual jpeg error manager to catch fatal errors. struct jpeg_decompress_struct cinfo; @@ -447,8 +448,8 @@ bool IsChromaDownsampled(const string& jpegdata) { TEST(JpegMemTest, ChromaDownsampling) { // Read the data from a test jpeg file into memory - const string jpegfile = string(kTestData) + "jpeg_merge_test1.jpg"; - string jpeg; + const std::string jpegfile = std::string(kTestData) + "jpeg_merge_test1.jpg"; + std::string jpeg; ReadFileToStringOrDie(Env::Default(), jpegfile, &jpeg); // Verify that compressing the JPEG with chroma downsampling works. @@ -458,7 +459,7 @@ TEST(JpegMemTest, ChromaDownsampling) { unflags.components = 3; int w, h, c; int64_t num_warnings; - std::unique_ptr uncompressed(Uncompress( + std::unique_ptr uncompressed(Uncompress( jpeg.c_str(), jpeg.size(), unflags, &w, &h, &c, &num_warnings)); CHECK(uncompressed != nullptr); CHECK_EQ(num_warnings, 0); @@ -476,10 +477,10 @@ TEST(JpegMemTest, ChromaDownsampling) { } } -void TestBadJPEG(Env* env, const string& bad_jpeg_file, int expected_width, - int expected_height, const string& reference_RGB_file, +void TestBadJPEG(Env* env, const std::string& bad_jpeg_file, int expected_width, + int expected_height, const std::string& reference_RGB_file, const bool try_recover_truncated_jpeg) { - string jpeg; + std::string jpeg; ReadFileToStringOrDie(env, bad_jpeg_file, &jpeg); UncompressFlags flags; @@ -487,7 +488,7 @@ void TestBadJPEG(Env* env, const string& bad_jpeg_file, int expected_width, flags.try_recover_truncated_jpeg = try_recover_truncated_jpeg; int width, height, components; - std::unique_ptr imgdata; + std::unique_ptr imgdata; imgdata.reset(Uncompress(jpeg.c_str(), jpeg.size(), flags, &width, &height, &components, nullptr)); if (expected_width > 0) { // we expect the file to decode into 'something' @@ -496,7 +497,7 @@ void TestBadJPEG(Env* env, const string& bad_jpeg_file, int expected_width, CHECK_EQ(components, 3); CHECK(imgdata.get()); if (!reference_RGB_file.empty()) { - string ref; + std::string ref; ReadFileToStringOrDie(env, reference_RGB_file, &ref); CHECK(!memcmp(ref.data(), imgdata.get(), ref.size())); } @@ -507,7 +508,7 @@ void TestBadJPEG(Env* env, const string& bad_jpeg_file, int expected_width, TEST(JpegMemTest, BadJpeg) { Env* env = Env::Default(); - const string data_path = kTestData; + const std::string data_path = kTestData; // Test corrupt file TestBadJPEG(env, data_path + "bad_huffman.jpg", 1024, 768, "", false); diff --git a/tensorflow/core/lib/png/png_io.cc b/tensorflow/core/lib/png/png_io.cc index b16584be2f3da5..82bff12556d89c 100644 --- a/tensorflow/core/lib/png/png_io.cc +++ b/tensorflow/core/lib/png/png_io.cc @@ -53,8 +53,8 @@ namespace { (del))) // Convert from 8 bit components to 16. This works in-place. -static void Convert8to16(const uint8* p8, int num_comps, int p8_row_bytes, - int width, int height_in, uint16* p16, +static void Convert8to16(const uint8_t* p8, int num_comps, int p8_row_bytes, + int width, int height_in, uint16_t* p16, int p16_row_bytes) { // Force height*row_bytes computations to use 64 bits. Height*width is // enforced to < 29 bits in decode_png_op.cc, but height*row_bytes is @@ -64,17 +64,18 @@ static void Convert8to16(const uint8* p8, int num_comps, int p8_row_bytes, // Adjust pointers to copy backwards width *= num_comps; - CPTR_INC(uint8, p8, (height - 1) * p8_row_bytes + (width - 1) * sizeof(*p8)); - PTR_INC(uint16, p16, + CPTR_INC(uint8_t, p8, + (height - 1) * p8_row_bytes + (width - 1) * sizeof(*p8)); + PTR_INC(uint16_t, p16, (height - 1) * p16_row_bytes + (width - 1) * sizeof(*p16)); int bump8 = width * sizeof(*p8) - p8_row_bytes; int bump16 = width * sizeof(*p16) - p16_row_bytes; for (; height-- != 0; - CPTR_INC(uint8, p8, bump8), PTR_INC(uint16, p16, bump16)) { + CPTR_INC(uint8_t, p8, bump8), PTR_INC(uint16_t, p16, bump16)) { for (int w = width; w-- != 0; --p8, --p16) { - uint32 pix = *p8; + uint32_t pix = *p8; pix |= pix << 8; - *p16 = static_cast(pix); + *p16 = static_cast(pix); } } } @@ -229,7 +230,7 @@ bool CommonInitDecode(absl::string_view png_string, int desired_channels, CommonFreeDecode(context); return false; } - context->data = absl::bit_cast(png_string.data()); + context->data = absl::bit_cast(png_string.data()); context->data_left = png_string.size(); png_set_read_fn(context->png_ptr, context, StringReader); png_read_info(context->png_ptr, context->info_ptr); @@ -342,9 +343,9 @@ bool CommonFinishDecode(png_bytep data, int row_bytes, DecodeContext* context) { // Synthesize 16 bits from 8 if requested. if (context->need_to_synthesize_16) - Convert8to16(absl::bit_cast(data), context->channels, row_bytes, - context->width, context->height, absl::bit_cast(data), - row_bytes); + Convert8to16(absl::bit_cast(data), context->channels, row_bytes, + context->width, context->height, + absl::bit_cast(data), row_bytes); return ok; } diff --git a/tensorflow/core/lib/png/png_io.h b/tensorflow/core/lib/png/png_io.h index a7fff84c1961ef..71d14546613328 100644 --- a/tensorflow/core/lib/png/png_io.h +++ b/tensorflow/core/lib/png/png_io.h @@ -45,7 +45,7 @@ namespace png { // Handy container for decoding information and struct pointers struct DecodeContext { - const uint8* data; + const uint8_t* data; int data_left; png_structp png_ptr; png_infop info_ptr; diff --git a/tensorflow/core/lib/strings/base64_test.cc b/tensorflow/core/lib/strings/base64_test.cc index df4a4bcf59353c..ce68ee2b4dea72 100644 --- a/tensorflow/core/lib/strings/base64_test.cc +++ b/tensorflow/core/lib/strings/base64_test.cc @@ -20,7 +20,7 @@ limitations under the License. namespace tensorflow { TEST(Base64, EncodeDecode) { - const string original = "a simple test message!"; + const std::string original = "a simple test message!"; tstring encoded; TF_EXPECT_OK(Base64Encode(original, &encoded)); EXPECT_EQ("YSBzaW1wbGUgdGVzdCBtZXNzYWdlIQ", encoded); diff --git a/tensorflow/core/lib/strings/ordered_code.cc b/tensorflow/core/lib/strings/ordered_code.cc index 31b08152f963e2..8e69dbe7fc6809 100644 --- a/tensorflow/core/lib/strings/ordered_code.cc +++ b/tensorflow/core/lib/strings/ordered_code.cc @@ -134,7 +134,7 @@ static const char kFFCharacter = '\000'; // Combined with kEscape2 static const char kEscape1_Separator[2] = {kEscape1, kSeparator}; // Append to "*dest" the "len" bytes starting from "*src". -inline static void AppendBytes(string* dest, const char* src, size_t len) { +inline static void AppendBytes(std::string* dest, const char* src, size_t len) { dest->append(src, len); } @@ -164,7 +164,8 @@ const char* OrderedCode::TEST_SkipToNextSpecialByte(const char* start, // Helper routine to encode "s" and append to "*dest", escaping special // characters. -inline static void EncodeStringFragment(string* dest, absl::string_view s) { +inline static void EncodeStringFragment(std::string* dest, + absl::string_view s) { const char* p = s.data(); const char* limit = p + s.size(); const char* copy_start = p; @@ -191,12 +192,12 @@ inline static void EncodeStringFragment(string* dest, absl::string_view s) { } } -void OrderedCode::WriteString(string* dest, absl::string_view s) { +void OrderedCode::WriteString(std::string* dest, absl::string_view s) { EncodeStringFragment(dest, s); AppendBytes(dest, kEscape1_Separator, 2); } -void OrderedCode::WriteNumIncreasing(string* dest, uint64 val) { +void OrderedCode::WriteNumIncreasing(std::string* dest, uint64_t val) { // Values are encoded with a single byte length prefix, followed // by the actual value in big-endian format with leading 0 bytes // dropped. @@ -216,7 +217,8 @@ void OrderedCode::WriteNumIncreasing(string* dest, uint64 val) { // If parse succeeds, return true, consume encoding from // "*src", and if result != NULL append the decoded string to "*result". // Otherwise, return false and leave both undefined. -inline static bool ReadStringInternal(absl::string_view* src, string* result) { +inline static bool ReadStringInternal(absl::string_view* src, + std::string* result) { const char* start = src->data(); const char* string_limit = src->data() + src->size(); @@ -271,11 +273,11 @@ inline static bool ReadStringInternal(absl::string_view* src, string* result) { return false; } -bool OrderedCode::ReadString(absl::string_view* src, string* result) { +bool OrderedCode::ReadString(absl::string_view* src, std::string* result) { return ReadStringInternal(src, result); } -bool OrderedCode::ReadNumIncreasing(absl::string_view* src, uint64* result) { +bool OrderedCode::ReadNumIncreasing(absl::string_view* src, uint64_t* result) { if (src->empty()) { return false; // Not enough bytes } @@ -294,7 +296,7 @@ bool OrderedCode::ReadNumIncreasing(absl::string_view* src, uint64* result) { } if (result) { - uint64 tmp = 0; + uint64_t tmp = 0; for (size_t i = 0; i < len; i++) { tmp <<= 8; tmp |= static_cast((*src)[1 + i]); @@ -305,7 +307,7 @@ bool OrderedCode::ReadNumIncreasing(absl::string_view* src, uint64* result) { return true; } -void OrderedCode::TEST_Corrupt(string* str, int k) { +void OrderedCode::TEST_Corrupt(std::string* str, int k) { int seen_seps = 0; for (size_t i = 0; i + 1 < str->size(); i++) { if ((*str)[i] == kEscape1 && (*str)[i + 1] == kSeparator) { @@ -389,7 +391,7 @@ static const char kLengthToHeaderBits[1 + kMaxSigned64Length][2] = { // This array maps encoding lengths to the header bits that overlap with // the payload and need fixing when reading. -static const uint64 kLengthToMask[1 + kMaxSigned64Length] = { +static const uint64_t kLengthToMask[1 + kMaxSigned64Length] = { 0ULL, 0x80ULL, 0xc000ULL, @@ -408,7 +410,7 @@ static const uint64 kLengthToMask[1 + kMaxSigned64Length] = { // bit position (the highest bit position in a positive int64 is 63). // For a negative number n, we count the bits in ~n. // That is, length = kBitsToLength[tsl::Log2Floor64(n < 0 ? ~n : n) + 1]. -static const int8 kBitsToLength[1 + 63] = { +static const int8_t kBitsToLength[1 + 63] = { 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 10}; @@ -418,23 +420,23 @@ static inline int SignedEncodingLength(int64_t n) { return kBitsToLength[tsl::Log2Floor64(n < 0 ? ~n : n) + 1]; } -static void StoreBigEndian64(char* dst, uint64 v) { +static void StoreBigEndian64(char* dst, uint64_t v) { for (int i = 0; i < 8; i++) { dst[i] = (v >> (56 - 8 * i)) & 0xff; } } -static uint64 LoadBigEndian64(const char* src) { - uint64 result = 0; +static uint64_t LoadBigEndian64(const char* src) { + uint64_t result = 0; for (int i = 0; i < 8; i++) { unsigned char c = static_cast(src[i]); - result |= static_cast(c) << (56 - 8 * i); + result |= static_cast(c) << (56 - 8 * i); } return result; } -void OrderedCode::WriteSignedNumIncreasing(string* dest, int64_t val) { - const uint64 x = val < 0 ? ~val : val; +void OrderedCode::WriteSignedNumIncreasing(std::string* dest, int64_t val) { + const uint64_t x = val < 0 ? ~val : val; if (x < 64) { // fast path for encoding length == 1 *dest += kLengthToHeaderBits[1][0] ^ val; return; @@ -458,12 +460,12 @@ void OrderedCode::WriteSignedNumIncreasing(string* dest, int64_t val) { bool OrderedCode::ReadSignedNumIncreasing(absl::string_view* src, int64_t* result) { if (src->empty()) return false; - const uint64 xor_mask = (!((*src)[0] & 0x80)) ? ~0ULL : 0ULL; + const uint64_t xor_mask = (!((*src)[0] & 0x80)) ? ~0ULL : 0ULL; const unsigned char first_byte = (*src)[0] ^ (xor_mask & 0xff); // now calculate and test length, and set x to raw (unmasked) result int len; - uint64 x; + uint64_t x; if (first_byte != 0xff) { len = 7 - tsl::Log2Floor64(first_byte ^ 0xff); if (src->size() < static_cast(len)) return false; diff --git a/tensorflow/core/lib/strings/ordered_code.h b/tensorflow/core/lib/strings/ordered_code.h index e7485bd57f7e15..2d2811ac5af30f 100644 --- a/tensorflow/core/lib/strings/ordered_code.h +++ b/tensorflow/core/lib/strings/ordered_code.h @@ -54,9 +54,9 @@ class OrderedCode { // Encoding routines: each one of the following routines append // one item to "*dest" in an encoding where larger values are // ordered lexicographically after smaller values. - static void WriteString(string* dest, absl::string_view str); - static void WriteNumIncreasing(string* dest, uint64 num); - static void WriteSignedNumIncreasing(string* dest, int64_t num); + static void WriteString(std::string* dest, absl::string_view str); + static void WriteNumIncreasing(std::string* dest, uint64_t num); + static void WriteSignedNumIncreasing(std::string* dest, int64_t num); // ------------------------------------------------------------------- // Decoding routines: these extract an item earlier encoded using @@ -66,13 +66,13 @@ class OrderedCode { // result. In case of string result, the decoded string is appended to // "*result". Returns true if the next item was read successfully, false // otherwise. - static bool ReadString(absl::string_view* src, string* result); - static bool ReadNumIncreasing(absl::string_view* src, uint64* result); + static bool ReadString(absl::string_view* src, std::string* result); + static bool ReadNumIncreasing(absl::string_view* src, uint64_t* result); static bool ReadSignedNumIncreasing(absl::string_view* src, int64_t* result); // Helper for testing: corrupt "*str" by changing the kth item separator // in the string. - static void TEST_Corrupt(string* str, int k); + static void TEST_Corrupt(std::string* str, int k); // Helper for testing. // SkipToNextSpecialByte is an internal routine defined in the .cc file diff --git a/tensorflow/core/lib/strings/ordered_code_test.cc b/tensorflow/core/lib/strings/ordered_code_test.cc index 4717007fc27fc2..9ef3a8dafd2138 100644 --- a/tensorflow/core/lib/strings/ordered_code_test.cc +++ b/tensorflow/core/lib/strings/ordered_code_test.cc @@ -32,8 +32,8 @@ namespace tensorflow { namespace strings { namespace { -string RandomString(random::SimplePhilox* rnd, size_t len) { - string x; +std::string RandomString(random::SimplePhilox* rnd, size_t len) { + std::string x; for (size_t i = 0; i < len; i++) { x += rnd->Uniform(256); } @@ -45,33 +45,34 @@ string RandomString(random::SimplePhilox* rnd, size_t len) { // Read/WriteIncreasing are defined for string, uint64, int64 below. template -void OCWriteIncreasing(string* dest, const T& val); +void OCWriteIncreasing(std::string* dest, const T& val); template bool OCReadIncreasing(absl::string_view* src, T* result); // Read/WriteIncreasing template <> -void OCWriteIncreasing(string* dest, const string& val) { +void OCWriteIncreasing(std::string* dest, const std::string& val) { OrderedCode::WriteString(dest, val); } template <> -bool OCReadIncreasing(absl::string_view* src, string* result) { +bool OCReadIncreasing(absl::string_view* src, + std::string* result) { return OrderedCode::ReadString(src, result); } // Read/WriteIncreasing template <> -void OCWriteIncreasing(string* dest, const uint64& val) { +void OCWriteIncreasing(std::string* dest, const uint64_t& val) { OrderedCode::WriteNumIncreasing(dest, val); } template <> -bool OCReadIncreasing(absl::string_view* src, uint64* result) { +bool OCReadIncreasing(absl::string_view* src, uint64_t* result) { return OrderedCode::ReadNumIncreasing(src, result); } // Read/WriteIncreasing template <> -void OCWriteIncreasing(string* dest, const int64_t& val) { +void OCWriteIncreasing(std::string* dest, const int64_t& val) { OrderedCode::WriteSignedNumIncreasing(dest, val); } template <> @@ -80,14 +81,14 @@ bool OCReadIncreasing(absl::string_view* src, int64_t* result) { } template -string OCWrite(T val) { - string result; +std::string OCWrite(T val) { + std::string result; OCWriteIncreasing(&result, val); return result; } template -void OCWriteToString(string* result, T val) { +void OCWriteToString(std::string* result, T val) { OCWriteIncreasing(result, val); } @@ -100,7 +101,7 @@ bool OCRead(absl::string_view* s, T* val) { // Numbers template -T TestRead(const string& a) { +T TestRead(const std::string& a) { // gracefully reject any proper prefix of an encoding for (int i = 0; i < a.size() - 1; ++i) { absl::string_view s(a.data(), i); @@ -124,9 +125,9 @@ void TestWriteRead(T expected) { // output. template void TestWriteAppends(T first, U second) { - string encoded; + std::string encoded; OCWriteToString(&encoded, first); - string encoded_first_only = encoded; + std::string encoded_first_only = encoded; OCWriteToString(&encoded, second); EXPECT_NE(encoded, encoded_first_only); EXPECT_TRUE(absl::StartsWith(encoded, encoded_first_only)); @@ -149,7 +150,7 @@ void TestNumbers(T multiplier) { random::SimplePhilox rnd(&philox); for (int bits = 1; bits <= std::numeric_limits().digits; ++bits) { // test random non-negative numbers with given number of significant bits - const uint64 mask = (~0ULL) >> (64 - bits); + const uint64_t mask = (~0ULL) >> (64 - bits); for (int i = 0; i < 1000; i++) { T x = rnd.Rand64() & mask; TestWriteRead(multiplier * x); @@ -160,16 +161,18 @@ void TestNumbers(T multiplier) { } // Return true iff 'a' is "before" 'b' -bool CompareStrings(const string& a, const string& b) { return (a < b); } +bool CompareStrings(const std::string& a, const std::string& b) { + return (a < b); +} template void TestNumberOrdering() { // first the negative numbers (if T is signed, otherwise no-op) - string laststr = OCWrite(std::numeric_limits().min()); + std::string laststr = OCWrite(std::numeric_limits().min()); for (T num = std::numeric_limits().min() / 2; num != 0; num /= 2) { - string strminus1 = OCWrite(num - 1); - string str = OCWrite(num); - string strplus1 = OCWrite(num + 1); + std::string strminus1 = OCWrite(num - 1); + std::string str = OCWrite(num); + std::string strplus1 = OCWrite(num + 1); CHECK(CompareStrings(strminus1, str)); CHECK(CompareStrings(str, strplus1)); @@ -185,9 +188,9 @@ void TestNumberOrdering() { T num = 1; while (num < std::numeric_limits().max() / 2) { num *= 2; - string strminus1 = OCWrite(num - 1); - string str = OCWrite(num); - string strplus1 = OCWrite(num + 1); + std::string strminus1 = OCWrite(num - 1); + std::string str = OCWrite(num); + std::string strplus1 = OCWrite(num + 1); CHECK(CompareStrings(strminus1, str)); CHECK(CompareStrings(str, strplus1)); @@ -199,7 +202,7 @@ void TestNumberOrdering() { } // Helper routine for testing TEST_SkipToNextSpecialByte -size_t FindSpecial(const string& x) { +size_t FindSpecial(const std::string& x) { const char* p = x.data(); const char* limit = p + x.size(); const char* result = OrderedCode::TEST_SkipToNextSpecialByte(p, limit); @@ -209,15 +212,15 @@ size_t FindSpecial(const string& x) { // Helper function template to create strings from string literals (excluding // the terminal zero byte of the underlying character array). template -string ByteSequence(const char (&arr)[N]) { - return string(arr, N - 1); +std::string ByteSequence(const char (&arr)[N]) { + return std::string(arr, N - 1); } TEST(OrderedCode, SkipToNextSpecialByte) { for (size_t len = 0; len < 256; len++) { random::PhiloxRandom philox(301, 17); random::SimplePhilox rnd(&philox); - string x; + std::string x; while (x.size() < len) { char c = 1 + rnd.Uniform(254); ASSERT_NE(c, 0); @@ -228,7 +231,7 @@ TEST(OrderedCode, SkipToNextSpecialByte) { for (size_t special_pos = 0; special_pos < len; special_pos++) { for (size_t special_test = 0; special_test < 2; special_test++) { const char special_byte = (special_test == 0) ? 0 : 255; - string y = x; + std::string y = x; y[special_pos] = special_byte; EXPECT_EQ(FindSpecial(y), special_pos); if (special_pos < 16) { @@ -283,9 +286,9 @@ TEST(OrderedCode, ExhaustiveFindSpecial) { EXPECT_EQ(count, 256 * 256 * 256 * 2); } -TEST(Uint64, EncodeDecode) { TestNumbers(1); } +TEST(Uint64, EncodeDecode) { TestNumbers(1); } -TEST(Uint64, Ordering) { TestNumberOrdering(); } +TEST(Uint64, Ordering) { TestNumberOrdering(); } TEST(Int64, EncodeDecode) { TestNumbers(1); @@ -295,15 +298,15 @@ TEST(Int64, EncodeDecode) { TEST(Int64, Ordering) { TestNumberOrdering(); } // Returns the bitwise complement of s. -inline string StrNot(const string& s) { - string result; - for (string::const_iterator it = s.begin(); it != s.end(); ++it) +inline std::string StrNot(const std::string& s) { + std::string result; + for (std::string::const_iterator it = s.begin(); it != s.end(); ++it) result.push_back(~*it); return result; } template -void TestInvalidEncoding(const string& s) { +void TestInvalidEncoding(const std::string& s) { absl::string_view p(s); EXPECT_FALSE(OCRead(&p, nullptr)); EXPECT_EQ(s, p); @@ -311,11 +314,11 @@ void TestInvalidEncoding(const string& s) { TEST(OrderedCodeInvalidEncodingsTest, Overflow) { // 1U << 64, increasing and decreasing - const string k2xx64U = "\x09\x01" + string(8, 0); - TestInvalidEncoding(k2xx64U); + const std::string k2xx64U = "\x09\x01" + std::string(8, 0); + TestInvalidEncoding(k2xx64U); // 1 << 63 and ~(1 << 63), increasing and decreasing - const string k2xx63 = "\xff\xc0\x80" + string(7, 0); + const std::string k2xx63 = "\xff\xc0\x80" + std::string(7, 0); TestInvalidEncoding(k2xx63); TestInvalidEncoding(StrNot(k2xx63)); } @@ -332,11 +335,11 @@ TEST(OrderedCodeInvalidEncodingsDeathTest, NonCanonical) { for (int n = 2; n <= 9; ++n) { // The zero in non_minimal[1] is "redundant". - string non_minimal = - string(1, n - 1) + string(1, 0) + RandomString(&rnd, n - 2); + std::string non_minimal = + std::string(1, n - 1) + std::string(1, 0) + RandomString(&rnd, n - 2); EXPECT_EQ(n, non_minimal.length()); - EXPECT_NE(OCWrite(0), non_minimal); + EXPECT_NE(OCWrite(0), non_minimal); #ifndef NDEBUG absl::string_view s(non_minimal); EXPECT_DEATH(OrderedCode::ReadNumIncreasing(&s, nullptr), @@ -348,11 +351,12 @@ TEST(OrderedCodeInvalidEncodingsDeathTest, NonCanonical) { for (int n = 2; n <= 10; ++n) { // Header with 1 sign bit and n-1 size bits. - string header = string(n / 8, 0xff) + string(1, 0xff << (8 - (n % 8))); + std::string header = + std::string(n / 8, 0xff) + std::string(1, 0xff << (8 - (n % 8))); // There are more than 7 zero bits between header bits and "payload". - string non_minimal = header + - string(1, rnd.Uniform(256) & ~*header.rbegin()) + - RandomString(&rnd, n - header.length() - 1); + std::string non_minimal = + header + std::string(1, rnd.Uniform(256) & ~*header.rbegin()) + + RandomString(&rnd, n - header.length() - 1); EXPECT_EQ(n, non_minimal.length()); EXPECT_NE(OCWrite(0), non_minimal); @@ -369,7 +373,7 @@ TEST(OrderedCodeInvalidEncodingsDeathTest, NonCanonical) { // Returns random number with specified number of bits, // i.e., in the range [2^(bits-1),2^bits). -uint64 NextBits(random::SimplePhilox* rnd, int bits) { +uint64_t NextBits(random::SimplePhilox* rnd, int bits) { return (bits != 0) ? (rnd->Rand64() % (1LL << (bits - 1))) + (1LL << (bits - 1)) : 0; @@ -385,7 +389,7 @@ void BM_WriteNum(::testing::benchmark::State& state, T multiplier) { for (int i = 0; i < kValues; i++) { values[i] = NextBits(&rnd, state.max_iterations % 64) * multiplier; } - string result; + std::string result; int index = 0; for (auto i : state) { result.clear(); @@ -400,12 +404,12 @@ void BM_ReadNum(::testing::benchmark::State& state, T multiplier) { random::SimplePhilox rnd(&philox); // Use enough distinct values to confuse the branch predictor constexpr int kValues = 64; - string values[kValues]; + std::string values[kValues]; for (int i = 0; i < kValues; i++) { T val = NextBits(&rnd, i % 64) * multiplier; values[i] = OCWrite(val); } - uint32 index = 0; + uint32_t index = 0; for (auto i : state) { T val; absl::string_view s = values[index++ % kValues]; @@ -423,7 +427,7 @@ void BM_ReadNum(::testing::benchmark::State& state, T multiplier) { } \ BENCHMARK(BM_Read##name) -BENCHMARK_NUM(NumIncreasing, uint64, 1); +BENCHMARK_NUM(NumIncreasing, uint64_t, 1); BENCHMARK_NUM(SignedNum, int64_t, 1); BENCHMARK_NUM(SignedNumNegative, int64_t, -1); @@ -437,30 +441,30 @@ TEST(String, EncodeDecode) { random::SimplePhilox rnd(&philox); for (int len = 0; len < 256; len++) { - const string a = RandomString(&rnd, len); + const std::string a = RandomString(&rnd, len); TestWriteRead(a); for (int len2 = 0; len2 < 64; len2++) { - const string b = RandomString(&rnd, len2); + const std::string b = RandomString(&rnd, len2); TestWriteAppends(a, b); - string out; - OCWriteToString(&out, a); - OCWriteToString(&out, b); + std::string out; + OCWriteToString(&out, a); + OCWriteToString(&out, b); - string a2, b2, dummy; + std::string a2, b2, dummy; absl::string_view s = out; absl::string_view s2 = out; - CHECK(OCRead(&s, &a2)); - CHECK(OCRead(&s2, nullptr)); + CHECK(OCRead(&s, &a2)); + CHECK(OCRead(&s2, nullptr)); CHECK_EQ(s, s2); - CHECK(OCRead(&s, &b2)); - CHECK(OCRead(&s2, nullptr)); + CHECK(OCRead(&s, &b2)); + CHECK(OCRead(&s2, nullptr)); CHECK_EQ(s, s2); - CHECK(!OCRead(&s, &dummy)); - CHECK(!OCRead(&s2, nullptr)); + CHECK(!OCRead(&s, &dummy)); + CHECK(!OCRead(&s2, nullptr)); CHECK_EQ(a, a2); CHECK_EQ(b, b2); CHECK(s.empty()); @@ -472,8 +476,8 @@ TEST(String, EncodeDecode) { // 'str' is a string literal that may contain '\0'. #define STATIC_STR(str) StringPiece((str), sizeof(str) - 1) -string EncodeStringIncreasing(absl::string_view value) { - string encoded; +std::string EncodeStringIncreasing(absl::string_view value) { + std::string encoded; OrderedCode::WriteString(&encoded, value); return encoded; } @@ -515,19 +519,20 @@ TEST(String, Increasing) { } TEST(EncodingIsExpected, String) { - std::vector> data = { - {"", string("\x00\x01", 2)}, - {"foo", string("foo\x00\x01", 5)}, - {"hello", string("hello\x00\x01", 7)}, - {string("\x00\x01\xff", 3), string("\x00\xff\x01\xff\x00\x00\x01", 7)}, + std::vector> data = { + {"", std::string("\x00\x01", 2)}, + {"foo", std::string("foo\x00\x01", 5)}, + {"hello", std::string("hello\x00\x01", 7)}, + {std::string("\x00\x01\xff", 3), + std::string("\x00\xff\x01\xff\x00\x00\x01", 7)}, }; for (const auto& t : data) { - string result; + std::string result; OrderedCode::WriteString(&result, t.first); EXPECT_EQ(t.second, result); absl::string_view in = result; - string decoded; + std::string decoded; EXPECT_TRUE(OrderedCode::ReadString(&in, &decoded)); EXPECT_EQ(t.first, decoded); EXPECT_EQ("", in); @@ -535,7 +540,7 @@ TEST(EncodingIsExpected, String) { } TEST(EncodingIsExpected, Unsigned) { - std::vector> data = { + std::vector> data = { {0x0ull, ByteSequence("\000")}, {0x1ull, ByteSequence("\001\001")}, {0x2ull, ByteSequence("\001\002")}, @@ -753,13 +758,13 @@ TEST(EncodingIsExpected, Unsigned) { ByteSequence("\010\200\000\000\000\000\000\000\001")}, }; for (const auto& t : data) { - uint64 num = t.first; - string result; + uint64_t num = t.first; + std::string result; OrderedCode::WriteNumIncreasing(&result, num); EXPECT_EQ(t.second, result) << std::hex << num; absl::string_view in = result; - uint64 decoded; + uint64_t decoded; EXPECT_TRUE(OrderedCode::ReadNumIncreasing(&in, &decoded)); EXPECT_EQ(num, decoded); EXPECT_EQ("", in); @@ -767,7 +772,7 @@ TEST(EncodingIsExpected, Unsigned) { } TEST(EncodingIsExpected, Signed) { - std::vector> data = { + std::vector> data = { {0ll, ByteSequence("\200")}, {1ll, ByteSequence("\201")}, {2ll, ByteSequence("\202")}, @@ -1201,7 +1206,7 @@ TEST(EncodingIsExpected, Signed) { }; for (const auto& t : data) { int64_t num = t.first; - string result; + std::string result; OrderedCode::WriteSignedNumIncreasing(&result, num); EXPECT_EQ(t.second, result) << std::hex << num; @@ -1216,15 +1221,15 @@ TEST(EncodingIsExpected, Signed) { void BM_WriteString(::testing::benchmark::State& state, int len) { random::PhiloxRandom philox(301, 17); random::SimplePhilox rnd(&philox); - string x; + std::string x; for (int i = 0; i < len; i++) { x += rnd.Uniform(256); } - string y; + std::string y; for (auto s : state) { y.clear(); - OCWriteToString(&y, x); + OCWriteToString(&y, x); } state.SetBytesProcessed(state.iterations() * len); } @@ -1232,18 +1237,18 @@ void BM_WriteString(::testing::benchmark::State& state, int len) { void BM_ReadString(::testing::benchmark::State& state, int len) { random::PhiloxRandom philox(301, 17); random::SimplePhilox rnd(&philox); - string x; + std::string x; for (int i = 0; i < len; i++) { x += rnd.Uniform(256); } - string data; - OCWriteToString(&data, x); - string result; + std::string data; + OCWriteToString(&data, x); + std::string result; for (auto i : state) { result.clear(); absl::string_view s = data; - OCRead(&s, &result); + OCRead(&s, &result); } state.SetBytesProcessed(state.iterations() * len); } diff --git a/tensorflow/core/lib/strings/proto_serialization_test.cc b/tensorflow/core/lib/strings/proto_serialization_test.cc index 052a6dff016d25..fa2e2364d9f216 100644 --- a/tensorflow/core/lib/strings/proto_serialization_test.cc +++ b/tensorflow/core/lib/strings/proto_serialization_test.cc @@ -47,7 +47,7 @@ static void BM_ProtoSerializationToString(::testing::benchmark::State& state) { GraphDef graph_def = MakeGraphDef(num_nodes); for (auto i : state) { - string serialized; + std::string serialized; testing::DoNotOptimize( SerializeToStringDeterministic(graph_def, &serialized)); } diff --git a/tensorflow/core/lib/wav/wav_io.cc b/tensorflow/core/lib/wav/wav_io.cc index 41db93ae910a18..5edbfd28bdccbf 100644 --- a/tensorflow/core/lib/wav/wav_io.cc +++ b/tensorflow/core/lib/wav/wav_io.cc @@ -73,7 +73,7 @@ constexpr char kRiffType[] = "WAVE"; constexpr char kFormatChunkId[] = "fmt "; constexpr char kDataChunkId[] = "data"; -inline int16 FloatToInt16Sample(float data) { +inline int16_t FloatToInt16Sample(float data) { constexpr float kMultiplier = 1.0f * (1 << 15); return std::min(std::max(roundf(data * kMultiplier), std::numeric_limits::min()), @@ -212,7 +212,7 @@ absl::Status EncodeAudioAsS16LEWav(const float* audio, size_t sample_rate, for (size_t i = 0; i < num_samples; ++i) { int16_t sample = FloatToInt16Sample(audio[i]); core::EncodeFixed16(&data[i * kBytesPerSample], - static_cast(sample)); + static_cast(sample)); } return absl::OkStatus(); } @@ -230,13 +230,14 @@ template Status EncodeAudioAsS16LEWav(const float* audio, absl::Status DecodeLin16WaveAsFloatVector(const std::string& wav_string, std::vector* float_values, - uint32* sample_count, - uint16* channel_count, - uint32* sample_rate) { + uint32_t* sample_count, + uint16_t* channel_count, + uint32_t* sample_rate) { int offset = 0; TF_RETURN_IF_ERROR(ExpectText(wav_string, kRiffChunkId, &offset)); - uint32 total_file_size; - TF_RETURN_IF_ERROR(ReadValue(wav_string, &total_file_size, &offset)); + uint32_t total_file_size; + TF_RETURN_IF_ERROR( + ReadValue(wav_string, &total_file_size, &offset)); TF_RETURN_IF_ERROR(ExpectText(wav_string, kRiffType, &offset)); std::string found_text; TF_RETURN_IF_ERROR(ReadString(wav_string, 4, &found_text, &offset)); @@ -252,57 +253,61 @@ absl::Status DecodeLin16WaveAsFloatVector(const std::string& wav_string, found_text != "link" && found_text != "axml") { return errors::InvalidArgument("Unexpected field ", found_text); } - uint32 size_of_chunk; - TF_RETURN_IF_ERROR(ReadValue(wav_string, &size_of_chunk, &offset)); + uint32_t size_of_chunk; + TF_RETURN_IF_ERROR( + ReadValue(wav_string, &size_of_chunk, &offset)); TF_RETURN_IF_ERROR( IncrementOffset(offset, size_of_chunk, wav_string.size(), &offset)); TF_RETURN_IF_ERROR(ReadString(wav_string, 4, &found_text, &offset)); } - uint32 format_chunk_size; + uint32_t format_chunk_size; TF_RETURN_IF_ERROR( - ReadValue(wav_string, &format_chunk_size, &offset)); + ReadValue(wav_string, &format_chunk_size, &offset)); if ((format_chunk_size != 16) && (format_chunk_size != 18)) { return errors::InvalidArgument( "Bad format chunk size for WAV: Expected 16 or 18, but got", format_chunk_size); } - uint16 audio_format; - TF_RETURN_IF_ERROR(ReadValue(wav_string, &audio_format, &offset)); + uint16_t audio_format; + TF_RETURN_IF_ERROR(ReadValue(wav_string, &audio_format, &offset)); if (audio_format != 1) { return errors::InvalidArgument( "Bad audio format for WAV: Expected 1 (PCM), but got", audio_format); } - TF_RETURN_IF_ERROR(ReadValue(wav_string, channel_count, &offset)); + TF_RETURN_IF_ERROR(ReadValue(wav_string, channel_count, &offset)); if (*channel_count < 1) { return errors::InvalidArgument( "Bad number of channels for WAV: Expected at least 1, but got ", *channel_count); } - TF_RETURN_IF_ERROR(ReadValue(wav_string, sample_rate, &offset)); - uint32 bytes_per_second; - TF_RETURN_IF_ERROR(ReadValue(wav_string, &bytes_per_second, &offset)); - uint16 bytes_per_sample; - TF_RETURN_IF_ERROR(ReadValue(wav_string, &bytes_per_sample, &offset)); + TF_RETURN_IF_ERROR(ReadValue(wav_string, sample_rate, &offset)); + uint32_t bytes_per_second; + TF_RETURN_IF_ERROR( + ReadValue(wav_string, &bytes_per_second, &offset)); + uint16_t bytes_per_sample; + TF_RETURN_IF_ERROR( + ReadValue(wav_string, &bytes_per_sample, &offset)); // Confusingly, bits per sample is defined as holding the number of bits for // one channel, unlike the definition of sample used elsewhere in the WAV // spec. For example, bytes per sample is the memory needed for all channels // for one point in time. - uint16 bits_per_sample; - TF_RETURN_IF_ERROR(ReadValue(wav_string, &bits_per_sample, &offset)); + uint16_t bits_per_sample; + TF_RETURN_IF_ERROR( + ReadValue(wav_string, &bits_per_sample, &offset)); if (bits_per_sample != 16) { return errors::InvalidArgument( "Can only read 16-bit WAV files, but received ", bits_per_sample); } - const uint32 expected_bytes_per_sample = + const uint32_t expected_bytes_per_sample = ((bits_per_sample * *channel_count) + 7) / 8; if (bytes_per_sample != expected_bytes_per_sample) { return errors::InvalidArgument( "Bad bytes per sample in WAV header: Expected ", expected_bytes_per_sample, " but got ", bytes_per_sample); } - const uint64 expected_bytes_per_second = - static_cast(bytes_per_sample) * *sample_rate; - if (static_cast(bytes_per_second) != expected_bytes_per_second) { + const uint64_t expected_bytes_per_second = + static_cast(bytes_per_sample) * *sample_rate; + if (static_cast(bytes_per_second) != expected_bytes_per_second) { return errors::InvalidArgument( "Bad bytes per second in WAV header: Expected ", expected_bytes_per_second, " but got ", bytes_per_second, @@ -318,12 +323,12 @@ absl::Status DecodeLin16WaveAsFloatVector(const std::string& wav_string, while (offset < wav_string.size()) { std::string chunk_id; TF_RETURN_IF_ERROR(ReadString(wav_string, 4, &chunk_id, &offset)); - uint32 chunk_size; - TF_RETURN_IF_ERROR(ReadValue(wav_string, &chunk_size, &offset)); - if (chunk_size > std::numeric_limits::max()) { + uint32_t chunk_size; + TF_RETURN_IF_ERROR(ReadValue(wav_string, &chunk_size, &offset)); + if (chunk_size > std::numeric_limits::max()) { return errors::InvalidArgument( "WAV data chunk '", chunk_id, "' is too large: ", chunk_size, - " bytes, but the limit is ", std::numeric_limits::max()); + " bytes, but the limit is ", std::numeric_limits::max()); } if (chunk_id == kDataChunkId) { if (was_data_found) { @@ -331,18 +336,18 @@ absl::Status DecodeLin16WaveAsFloatVector(const std::string& wav_string, } was_data_found = true; *sample_count = chunk_size / bytes_per_sample; - const uint32 data_count = *sample_count * *channel_count; + const uint32_t data_count = *sample_count * *channel_count; int unused_new_offset = 0; // Validate that the data exists before allocating space for it // (prevent easy OOM errors). - TF_RETURN_IF_ERROR(IncrementOffset(offset, sizeof(int16) * data_count, + TF_RETURN_IF_ERROR(IncrementOffset(offset, sizeof(int16_t) * data_count, wav_string.size(), &unused_new_offset)); float_values->resize(data_count); for (int i = 0; i < data_count; ++i) { int16_t single_channel_value = 0; TF_RETURN_IF_ERROR( - ReadValue(wav_string, &single_channel_value, &offset)); + ReadValue(wav_string, &single_channel_value, &offset)); (*float_values)[i] = Int16SampleToFloat(single_channel_value); } } else { diff --git a/tensorflow/core/lib/wav/wav_io.h b/tensorflow/core/lib/wav/wav_io.h index 99a3df5038e68b..4ffe789dd282d7 100644 --- a/tensorflow/core/lib/wav/wav_io.h +++ b/tensorflow/core/lib/wav/wav_io.h @@ -65,9 +65,9 @@ extern template Status EncodeAudioAsS16LEWav(const float* audio, // The results are output as floats within the range -1 to 1, absl::Status DecodeLin16WaveAsFloatVector(const std::string& wav_string, std::vector* float_values, - uint32* sample_count, - uint16* channel_count, - uint32* sample_rate); + uint32_t* sample_count, + uint16_t* channel_count, + uint32_t* sample_rate); // Everything below here is only exposed publicly for testing purposes. @@ -88,8 +88,8 @@ absl::Status ReadValue(const std::string& data, T* value, int* offset) { memcpy(value, data.data() + *offset, sizeof(T)); } else { *value = 0; - const uint8* data_buf = - reinterpret_cast(data.data() + *offset); + const uint8_t* data_buf = + reinterpret_cast(data.data() + *offset); int shift = 0; for (int i = 0; i < sizeof(T); ++i, shift += 8) { *value = *value | (data_buf[i] << shift); diff --git a/tensorflow/core/lib/wav/wav_io_test.cc b/tensorflow/core/lib/wav/wav_io_test.cc index dfc75257cc85f5..68d0c0fa35fbe7 100644 --- a/tensorflow/core/lib/wav/wav_io_test.cc +++ b/tensorflow/core/lib/wav/wav_io_test.cc @@ -31,10 +31,10 @@ namespace wav { // These are defined in wav_io.cc, and the signatures are here so we don't have // to expose them in the public header. -absl::Status ExpectText(const string& data, const string& expected_text, - int* offset); -absl::Status ReadString(const string& data, int expected_length, string* value, - int* offset); +absl::Status ExpectText(const std::string& data, + const std::string& expected_text, int* offset); +absl::Status ReadString(const std::string& data, int expected_length, + std::string* value, int* offset); TEST(WavIO, BadArguments) { float audio[] = {0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f}; @@ -76,7 +76,7 @@ TEST(WavIO, BadArguments) { TEST(WavIO, BasicEven) { float audio[] = {0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f}; - string result; + std::string result; TF_EXPECT_OK(EncodeAudioAsS16LEWav(audio, 44100, 2, 3, &result)); EXPECT_EQ(56, result.size()); TF_EXPECT_OK(EncodeAudioAsS16LEWav(audio, 22050, 1, 6, &result)); @@ -87,19 +87,19 @@ TEST(WavIO, BasicEven) { TEST(WavIO, BasicOdd) { float audio[] = {0.0f, 0.1f, 0.2f, 0.3f, 0.4f}; - string result; + std::string result; TF_EXPECT_OK(EncodeAudioAsS16LEWav(audio, 22050, 1, 5, &result)); EXPECT_EQ(54, result.size()); } TEST(WavIO, EncodeThenDecode) { float audio[] = {0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f}; - string wav_data; + std::string wav_data; TF_ASSERT_OK(EncodeAudioAsS16LEWav(audio, 44100, 2, 3, &wav_data)); std::vector decoded_audio; - uint32 decoded_sample_count; - uint16 decoded_channel_count; - uint32 decoded_sample_rate; + uint32_t decoded_sample_count; + uint16_t decoded_channel_count; + uint32_t decoded_sample_rate; TF_ASSERT_OK(DecodeLin16WaveAsFloatVector( wav_data, &decoded_audio, &decoded_sample_count, &decoded_channel_count, &decoded_sample_rate)); @@ -112,59 +112,129 @@ TEST(WavIO, EncodeThenDecode) { } TEST(WavIO, BasicMono) { - std::vector wav_data = { - 'R', 'I', 'F', 'F', // ChunkID - 44, 0, 0, 0, // ChunkSize: 36 + SubChunk2Size - 'W', 'A', 'V', 'E', // Format - 'f', 'm', 't', ' ', // Subchunk1ID - 16, 0, 0, 0, // Subchunk1Size - 1, 0, // AudioFormat: 1=PCM - 1, 0, // NumChannels - 0x44, 0xac, 0, 0, // SampleRate: 44100 - 0x88, 0x58, 0x1, 0, // BytesPerSecond: SampleRate * NumChannels * - // BitsPerSample/8 - 2, 0, // BytesPerSample: NumChannels * BitsPerSample/8 - 16, 0, // BitsPerSample - 'd', 'a', 't', 'a', // Subchunk2ID - 8, 0, 0, 0, // Subchunk2Size: NumSamples * NumChannels * - // BitsPerSample/8 - 0, 0, // Sample 1: 0 - 0xff, 0x7f, // Sample 2: 32767 (saturated) - 0, 0, // Sample 3: 0 - 0x00, 0x80, // Sample 4: -32768 (saturated) + std::vector wav_data = { + 'R', + 'I', + 'F', + 'F', // ChunkID + 44, + 0, + 0, + 0, // ChunkSize: 36 + SubChunk2Size + 'W', + 'A', + 'V', + 'E', // Format + 'f', + 'm', + 't', + ' ', // Subchunk1ID + 16, + 0, + 0, + 0, // Subchunk1Size + 1, + 0, // AudioFormat: 1=PCM + 1, + 0, // NumChannels + 0x44, + 0xac, + 0, + 0, // SampleRate: 44100 + 0x88, + 0x58, + 0x1, + 0, // BytesPerSecond: SampleRate * NumChannels * + // BitsPerSample/8 + 2, + 0, // BytesPerSample: NumChannels * BitsPerSample/8 + 16, + 0, // BitsPerSample + 'd', + 'a', + 't', + 'a', // Subchunk2ID + 8, + 0, + 0, + 0, // Subchunk2Size: NumSamples * NumChannels * + // BitsPerSample/8 + 0, + 0, // Sample 1: 0 + 0xff, + 0x7f, // Sample 2: 32767 (saturated) + 0, + 0, // Sample 3: 0 + 0x00, + 0x80, // Sample 4: -32768 (saturated) }; - string expected(wav_data.begin(), wav_data.end()); + std::string expected(wav_data.begin(), wav_data.end()); float audio[] = {0.0f, 1.0f, 0.0f, -1.0f}; - string result; + std::string result; TF_EXPECT_OK(EncodeAudioAsS16LEWav(audio, 44100, 1, 4, &result)); EXPECT_EQ(expected, result); } TEST(WavIO, BasicStereo) { - std::vector wav_data = { - 'R', 'I', 'F', 'F', // ChunkID - 44, 0, 0, 0, // ChunkSize: 36 + SubChunk2Size - 'W', 'A', 'V', 'E', // Format - 'f', 'm', 't', ' ', // Subchunk1ID - 16, 0, 0, 0, // Subchunk1Size - 1, 0, // AudioFormat: 1=PCM - 2, 0, // NumChannels - 0x44, 0xac, 0, 0, // SampleRate: 44100 - 0x10, 0xb1, 0x2, 0, // BytesPerSecond: SampleRate * NumChannels * - // BitsPerSample/8 - 4, 0, // BytesPerSample: NumChannels * BitsPerSample/8 - 16, 0, // BitsPerSample - 'd', 'a', 't', 'a', // Subchunk2ID - 8, 0, 0, 0, // Subchunk2Size: NumSamples * NumChannels * - // BitsPerSample/8 - 0, 0, // Sample 1: 0 - 0xff, 0x7f, // Sample 2: 32767 (saturated) - 0, 0, // Sample 3: 0 - 0x00, 0x80, // Sample 4: -32768 (saturated) + std::vector wav_data = { + 'R', + 'I', + 'F', + 'F', // ChunkID + 44, + 0, + 0, + 0, // ChunkSize: 36 + SubChunk2Size + 'W', + 'A', + 'V', + 'E', // Format + 'f', + 'm', + 't', + ' ', // Subchunk1ID + 16, + 0, + 0, + 0, // Subchunk1Size + 1, + 0, // AudioFormat: 1=PCM + 2, + 0, // NumChannels + 0x44, + 0xac, + 0, + 0, // SampleRate: 44100 + 0x10, + 0xb1, + 0x2, + 0, // BytesPerSecond: SampleRate * NumChannels * + // BitsPerSample/8 + 4, + 0, // BytesPerSample: NumChannels * BitsPerSample/8 + 16, + 0, // BitsPerSample + 'd', + 'a', + 't', + 'a', // Subchunk2ID + 8, + 0, + 0, + 0, // Subchunk2Size: NumSamples * NumChannels * + // BitsPerSample/8 + 0, + 0, // Sample 1: 0 + 0xff, + 0x7f, // Sample 2: 32767 (saturated) + 0, + 0, // Sample 3: 0 + 0x00, + 0x80, // Sample 4: -32768 (saturated) }; - string expected(wav_data.begin(), wav_data.end()); + std::string expected(wav_data.begin(), wav_data.end()); float audio[] = {0.0f, 1.0f, 0.0f, -1.0f}; - string result; + std::string result; TF_EXPECT_OK(EncodeAudioAsS16LEWav(audio, 44100, 2, 2, &result)); EXPECT_EQ(expected, result); } @@ -175,38 +245,83 @@ TEST(WavIO, BasicStereo) { // large WAV files are not common, and are unsupported by many readers. // See b/72655902. TEST(WavIO, ChunkSizeOverflow) { - std::vector wav_data = { - 'R', 'I', 'F', 'F', // ChunkID - 60, 0, 0, 0, // ChunkSize: 36 + SubChunk2Size - 'W', 'A', 'V', 'E', // Format - 'f', 'm', 't', ' ', // Subchunk1ID - 16, 0, 0, 0, // Subchunk1Size - 1, 0, // AudioFormat: 1=PCM - 1, 0, // NumChannels - 0x44, 0xac, 0, 0, // SampleRate: 44100 - 0x88, 0x58, 0x1, 0, // BytesPerSecond: SampleRate * NumChannels * - // BitsPerSample/8 - 2, 0, // BytesPerSample: NumChannels * BitsPerSample/8 - 16, 0, // BitsPerSample - 'd', 'a', 't', 'a', // Subchunk2ID - 8, 0, 0, 0, // Subchunk2Size: NumSamples * NumChannels * - // BitsPerSample/8 - 0, 0, // Sample 1: 0 - 0xff, 0x7f, // Sample 2: 32767 (saturated) - 0, 0, // Sample 3: 0 - 0x00, 0x80, // Sample 4: -32768 (saturated) - 'f', 'o', 'o', 'o', // Subchunk2ID - 0xff, 0xff, 0xff, 0xf8, // Chunk size that could cause an infinite loop. - 0, 0, // Sample 1: 0 - 0xff, 0x7f, // Sample 2: 32767 (saturated) - 0, 0, // Sample 3: 0 - 0x00, 0x80, // Sample 4: -32768 (saturated) + std::vector wav_data = { + 'R', + 'I', + 'F', + 'F', // ChunkID + 60, + 0, + 0, + 0, // ChunkSize: 36 + SubChunk2Size + 'W', + 'A', + 'V', + 'E', // Format + 'f', + 'm', + 't', + ' ', // Subchunk1ID + 16, + 0, + 0, + 0, // Subchunk1Size + 1, + 0, // AudioFormat: 1=PCM + 1, + 0, // NumChannels + 0x44, + 0xac, + 0, + 0, // SampleRate: 44100 + 0x88, + 0x58, + 0x1, + 0, // BytesPerSecond: SampleRate * NumChannels * + // BitsPerSample/8 + 2, + 0, // BytesPerSample: NumChannels * BitsPerSample/8 + 16, + 0, // BitsPerSample + 'd', + 'a', + 't', + 'a', // Subchunk2ID + 8, + 0, + 0, + 0, // Subchunk2Size: NumSamples * NumChannels * + // BitsPerSample/8 + 0, + 0, // Sample 1: 0 + 0xff, + 0x7f, // Sample 2: 32767 (saturated) + 0, + 0, // Sample 3: 0 + 0x00, + 0x80, // Sample 4: -32768 (saturated) + 'f', + 'o', + 'o', + 'o', // Subchunk2ID + 0xff, + 0xff, + 0xff, + 0xf8, // Chunk size that could cause an infinite loop. + 0, + 0, // Sample 1: 0 + 0xff, + 0x7f, // Sample 2: 32767 (saturated) + 0, + 0, // Sample 3: 0 + 0x00, + 0x80, // Sample 4: -32768 (saturated) }; - string wav_data_string(wav_data.begin(), wav_data.end()); + std::string wav_data_string(wav_data.begin(), wav_data.end()); std::vector decoded_audio; - uint32 decoded_sample_count; - uint16 decoded_channel_count; - uint32 decoded_sample_rate; + uint32_t decoded_sample_count; + uint16_t decoded_channel_count; + uint32_t decoded_sample_rate; absl::Status decode_status = DecodeLin16WaveAsFloatVector( wav_data_string, &decoded_audio, &decoded_sample_count, &decoded_channel_count, &decoded_sample_rate); @@ -244,10 +359,10 @@ TEST(WavIO, IncrementOffset) { } TEST(WavIO, ExpectText) { - std::vector test_data = { + std::vector test_data = { 'E', 'x', 'p', 'e', 'c', 't', 'e', 'd', }; - string test_string(test_data.begin(), test_data.end()); + std::string test_string(test_data.begin(), test_data.end()); int offset = 0; TF_EXPECT_OK(ExpectText(test_string, "Expected", &offset)); @@ -267,13 +382,13 @@ TEST(WavIO, ExpectText) { } TEST(WavIO, ReadString) { - std::vector test_data = { + std::vector test_data = { 'E', 'x', 'p', 'e', 'c', 't', 'e', 'd', }; - string test_string(test_data.begin(), test_data.end()); + std::string test_string(test_data.begin(), test_data.end()); int offset = 0; - string read_value; + std::string read_value; TF_EXPECT_OK(ReadString(test_string, 2, &read_value, &offset)); EXPECT_EQ("Ex", read_value); EXPECT_EQ(2, offset); @@ -287,8 +402,8 @@ TEST(WavIO, ReadString) { } TEST(WavIO, ReadValueInt8) { - std::vector test_data = {0x00, 0x05, 0xff, 0x80}; - string test_string(test_data.begin(), test_data.end()); + std::vector test_data = {0x00, 0x05, 0xff, 0x80}; + std::string test_string(test_data.begin(), test_data.end()); int offset = 0; int8_t read_value; @@ -313,11 +428,11 @@ TEST(WavIO, ReadValueInt8) { } TEST(WavIO, ReadValueUInt8) { - std::vector test_data = {0x00, 0x05, 0xff, 0x80}; - string test_string(test_data.begin(), test_data.end()); + std::vector test_data = {0x00, 0x05, 0xff, 0x80}; + std::string test_string(test_data.begin(), test_data.end()); int offset = 0; - uint8 read_value; + uint8_t read_value; TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); EXPECT_EQ(0, read_value); EXPECT_EQ(1, offset); @@ -339,14 +454,14 @@ TEST(WavIO, ReadValueUInt8) { } TEST(WavIO, ReadValueInt16) { - std::vector test_data = { + std::vector test_data = { 0x00, 0x00, // 0 0xff, 0x00, // 255 0x00, 0x01, // 256 0xff, 0xff, // -1 0x00, 0x80, // -32768 }; - string test_string(test_data.begin(), test_data.end()); + std::string test_string(test_data.begin(), test_data.end()); int offset = 0; int16_t read_value; @@ -375,17 +490,17 @@ TEST(WavIO, ReadValueInt16) { } TEST(WavIO, ReadValueUInt16) { - std::vector test_data = { + std::vector test_data = { 0x00, 0x00, // 0 0xff, 0x00, // 255 0x00, 0x01, // 256 0xff, 0xff, // 65535 0x00, 0x80, // 32768 }; - string test_string(test_data.begin(), test_data.end()); + std::string test_string(test_data.begin(), test_data.end()); int offset = 0; - uint16 read_value; + uint16_t read_value; TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); EXPECT_EQ(0, read_value); EXPECT_EQ(2, offset); @@ -411,14 +526,14 @@ TEST(WavIO, ReadValueUInt16) { } TEST(WavIO, ReadValueInt32) { - std::vector test_data = { + std::vector test_data = { 0x00, 0x00, 0x00, 0x00, // 0 0xff, 0x00, 0x00, 0x00, // 255 0x00, 0xff, 0x00, 0x00, // 65280 0x00, 0x00, 0xff, 0x00, // 16,711,680 0xff, 0xff, 0xff, 0xff, // -1 }; - string test_string(test_data.begin(), test_data.end()); + std::string test_string(test_data.begin(), test_data.end()); int offset = 0; int32_t read_value; @@ -447,17 +562,17 @@ TEST(WavIO, ReadValueInt32) { } TEST(WavIO, ReadValueUInt32) { - std::vector test_data = { + std::vector test_data = { 0x00, 0x00, 0x00, 0x00, // 0 0xff, 0x00, 0x00, 0x00, // 255 0x00, 0xff, 0x00, 0x00, // 65280 0x00, 0x00, 0xff, 0x00, // 16,711,680 0xff, 0xff, 0xff, 0xff, // 4,294,967,295 }; - string test_string(test_data.begin(), test_data.end()); + std::string test_string(test_data.begin(), test_data.end()); int offset = 0; - uint32 read_value; + uint32_t read_value; TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); EXPECT_EQ(0, read_value); EXPECT_EQ(4, offset); diff --git a/tensorflow/core/nccl/BUILD b/tensorflow/core/nccl/BUILD index 9fdae56fb81d87..ec1ee113fcff2d 100644 --- a/tensorflow/core/nccl/BUILD +++ b/tensorflow/core/nccl/BUILD @@ -61,7 +61,6 @@ tf_cuda_cc_test( "multi_gpu", "no_oss", "notap", - "cuda-only", # flaky on CI as of 2022-05-30 ], deps = [ "//tensorflow/core:test", diff --git a/tensorflow/core/profiler/backends/gpu/BUILD b/tensorflow/core/profiler/backends/gpu/BUILD index fee9ede90965fe..feb6a553313283 100644 --- a/tensorflow/core/profiler/backends/gpu/BUILD +++ b/tensorflow/core/profiler/backends/gpu/BUILD @@ -18,7 +18,6 @@ tf_cuda_cc_test( tags = tf_cuda_tests_tags() + [ "gpu_cupti", "nomac", - "cuda-only", # flaky on CI ], deps = [ "//tensorflow/cc:cc_ops", @@ -43,11 +42,11 @@ tf_cuda_cc_test( "//tensorflow/core/profiler/utils:xplane_utils", "//tensorflow/core/profiler/utils:xplane_visitor", "@com_google_absl//absl/strings", - "@local_xla//xla/backends/profiler/gpu:cuda_test", - "@local_xla//xla/backends/profiler/gpu:cupti_collector", "@local_xla//xla/backends/profiler/gpu:device_tracer", "@local_xla//xla/tsl/profiler/utils:tf_xplane_visitor", - ] + if_cuda_is_configured([ + ] + if_cuda_is_configured([ + "@local_xla//xla/backends/profiler/gpu:cupti_collector", + "@local_xla//xla/backends/profiler/gpu:cuda_test", "@local_config_cuda//cuda:cuda_headers", "@local_config_cuda//cuda:cupti_headers", ]), diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 97ae6af69c56ae..964b014a3aa3f7 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -93,7 +93,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 2434 // Updated: 2025/12/7 +#define TF_GRAPH_DEF_VERSION 2451 // Updated: 2025/12/24 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // diff --git a/tensorflow/core/runtime_fallback/kernel/attr_util.cc b/tensorflow/core/runtime_fallback/kernel/attr_util.cc index 82bb7ce1b89b57..3c319e09e0e137 100644 --- a/tensorflow/core/runtime_fallback/kernel/attr_util.cc +++ b/tensorflow/core/runtime_fallback/kernel/attr_util.cc @@ -72,7 +72,7 @@ absl::Status ParseValue(absl::string_view input, bool* value) { return absl::OkStatus(); } -absl::Status ParseValue(absl::string_view input, int32* value) { +absl::Status ParseValue(absl::string_view input, int32_t* value) { bool parse_result = absl::SimpleAtoi(input, value); if (!parse_result) { return errors::InvalidArgument("Could not parse int32 from ", input); @@ -90,7 +90,7 @@ absl::Status ParseValue(absl::string_view input, std::string* value) { return absl::OkStatus(); } -absl::Status ParseValue(absl::string_view input, std::vector* value) { +absl::Status ParseValue(absl::string_view input, std::vector* value) { std::vector parts = str_util::Split(input, ","); value->reserve(parts.size()); for (const auto& value_str : parts) { @@ -123,7 +123,7 @@ absl::Status AddOpAttr(const std::string& name, const std::string& attr_value, } else if (type == "i32") { int32_t val; s = ParseValue(value, &val); - opattrs->Set(name, val); + opattrs->Set(name, val); } else if (type == "string" || type == "padding") { std::string val; s = ParseValue(value, &val); @@ -133,9 +133,9 @@ absl::Status AddOpAttr(const std::string& name, const std::string& attr_value, s = ParseValue(value, &val); opattrs->Set(name, tfd::ConvertFromTfDataType(val)); } else if (type == "list(i32)") { - std::vector val; + std::vector val; s = ParseValue(value, &val); - opattrs->SetArray(name, val); + opattrs->SetArray(name, val); } return s; } diff --git a/tensorflow/core/runtime_fallback/kernel/attr_util.h b/tensorflow/core/runtime_fallback/kernel/attr_util.h index 4abbb4f8b31c58..41f0e657d6b1af 100644 --- a/tensorflow/core/runtime_fallback/kernel/attr_util.h +++ b/tensorflow/core/runtime_fallback/kernel/attr_util.h @@ -38,10 +38,10 @@ typedef llvm::StringMap AttrMap; // Parse value from the given string input. absl::Status ParseValue(absl::string_view input, bool* value); -absl::Status ParseValue(absl::string_view input, int32* value); +absl::Status ParseValue(absl::string_view input, int32_t* value); absl::Status ParseValue(absl::string_view input, DataType* value); absl::Status ParseValue(absl::string_view input, std::string* value); -absl::Status ParseValue(absl::string_view input, std::vector* value); +absl::Status ParseValue(absl::string_view input, std::vector* value); absl::Status ParseValue(absl::string_view input, Padding* value); absl::Status AddOpAttr(const std::string& name, const std::string& attr_value, diff --git a/tensorflow/core/runtime_fallback/kernel/attr_util_test.cc b/tensorflow/core/runtime_fallback/kernel/attr_util_test.cc index 79d80b13ff501a..e6975350c55da4 100644 --- a/tensorflow/core/runtime_fallback/kernel/attr_util_test.cc +++ b/tensorflow/core/runtime_fallback/kernel/attr_util_test.cc @@ -47,9 +47,9 @@ TEST(AttrUtilTest, TestGetIntAttr) { TF_ASSERT_OK(AddOpAttr("bar", "i32$0", &opattrs)); TF_ASSERT_OK(AddOpAttr("baz", "i32$123", &opattrs)); - ASSERT_EQ(opattrs.GetAsserting("foo"), -2); - ASSERT_EQ(opattrs.GetAsserting("bar"), 0); - ASSERT_EQ(opattrs.GetAsserting("baz"), 123); + ASSERT_EQ(opattrs.GetAsserting("foo"), -2); + ASSERT_EQ(opattrs.GetAsserting("bar"), 0); + ASSERT_EQ(opattrs.GetAsserting("baz"), 123); absl::Status s = AddOpAttr("invalid", "i32$4.5", &opattrs); ASSERT_FALSE(s.ok()); @@ -71,17 +71,17 @@ TEST(AttrUtilTest, TestGetIntListAttr) { TF_ASSERT_OK(AddOpAttr("baz", "list(i32)$1,2,3", &opattrs)); // std::vector v1, v2, v3; - ArrayRef v1, v2, v3; - std::vector expected_v1; - std::vector expected_v2 = {1}; - std::vector expected_v3 = {1, 2, 3}; - ArrayRef expected_v1_ref(expected_v1); - ArrayRef expected_v2_ref(expected_v2); - ArrayRef expected_v3_ref(expected_v3); - - ASSERT_TRUE(opattrs.GetArray("foo", &v1)); - ASSERT_TRUE(opattrs.GetArray("bar", &v2)); - ASSERT_TRUE(opattrs.GetArray("baz", &v3)); + ArrayRef v1, v2, v3; + std::vector expected_v1; + std::vector expected_v2 = {1}; + std::vector expected_v3 = {1, 2, 3}; + ArrayRef expected_v1_ref(expected_v1); + ArrayRef expected_v2_ref(expected_v2); + ArrayRef expected_v3_ref(expected_v3); + + ASSERT_TRUE(opattrs.GetArray("foo", &v1)); + ASSERT_TRUE(opattrs.GetArray("bar", &v2)); + ASSERT_TRUE(opattrs.GetArray("baz", &v3)); ASSERT_EQ(v1, expected_v1_ref); ASSERT_EQ(v2, expected_v2_ref); ASSERT_EQ(v3, expected_v3_ref); diff --git a/tensorflow/core/runtime_fallback/kernel/kernel_fallback_execute_compat.cc b/tensorflow/core/runtime_fallback/kernel/kernel_fallback_execute_compat.cc index b496e1924107d9..2bab64c6a02ac6 100644 --- a/tensorflow/core/runtime_fallback/kernel/kernel_fallback_execute_compat.cc +++ b/tensorflow/core/runtime_fallback/kernel/kernel_fallback_execute_compat.cc @@ -427,8 +427,9 @@ TF_ATTRIBUTE_ALWAYS_INLINE static void KernelFallbackExecuteOpInternal( [&]() { return GetTracingMetadata(args, exec_ctx, kernel_runner); }); if (fallback_request_state.log_device_placement() || VLOG_IS_ON(1)) { - string msg = absl::StrCat("Executing op ", frame.op_name().GetValue().str(), - " in device ", frame.device().GetValue().str()); + std::string msg = + absl::StrCat("Executing op ", frame.op_name().GetValue().str(), + " in device ", frame.device().GetValue().str()); if (!logging::LogToListeners(msg)) { LOG(INFO) << msg; } @@ -865,10 +866,10 @@ llvm::Expected Predicate( CASE(float); CASE(double); - CASE(uint8); - CASE(int8); - CASE(int16); - CASE(int32); + CASE(uint8_t); + CASE(int8_t); + CASE(int16_t); + CASE(int32_t); CASE(int64_t); CASE(bool); #undef CASE diff --git a/tensorflow/core/runtime_fallback/kernel/kernel_fallback_kernels.cc b/tensorflow/core/runtime_fallback/kernel/kernel_fallback_kernels.cc index da93625c5111c2..b93902e576ddd6 100644 --- a/tensorflow/core/runtime_fallback/kernel/kernel_fallback_kernels.cc +++ b/tensorflow/core/runtime_fallback/kernel/kernel_fallback_kernels.cc @@ -77,7 +77,7 @@ static void TFDConstantTensor(tfrt::Argument value, // it causes a missing typeinfo error when using -fno-rtti. Investigate // if we can make it work with no-rtti. Tensor out(DT_INT32, TensorShape({})); - out.flat()(0) = value.get(); + out.flat()(0) = value.get(); tensor.Emplace(out); } diff --git a/tensorflow/core/runtime_fallback/kernel/tfrt_op_kernel.cc b/tensorflow/core/runtime_fallback/kernel/tfrt_op_kernel.cc index 41e7cfae0637e7..c26dae601b69fe 100644 --- a/tensorflow/core/runtime_fallback/kernel/tfrt_op_kernel.cc +++ b/tensorflow/core/runtime_fallback/kernel/tfrt_op_kernel.cc @@ -81,9 +81,9 @@ absl::Status TFRTOpKernelConstruction::GetAttr(absl::string_view attr_name, template <> absl::Status TFRTOpKernelConstruction::GetAttr( - absl::string_view attr_name, std::vector* value) const { - llvm::ArrayRef arrayref; - bool success = attributes_.GetArray( + absl::string_view attr_name, std::vector* value) const { + llvm::ArrayRef arrayref; + bool success = attributes_.GetArray( llvm::StringRef(attr_name.data(), attr_name.size()), &arrayref); if (!success) { return MissingAttributeError(attr_name); @@ -239,7 +239,7 @@ TFRTOpMetaBuilder& TFRTOpMetaBuilder::Attr(absl::string_view attr_spec) { return *this; } -const string& TFRTOpMetaBuilder::op_name() const { return op_name_; } +const std::string& TFRTOpMetaBuilder::op_name() const { return op_name_; } TFRTOpMeta TFRTOpMetaBuilder::BuildMeta() const { return TFRTOpMeta(output_types_); diff --git a/tensorflow/core/runtime_fallback/kernel/tfrt_op_kernel.h b/tensorflow/core/runtime_fallback/kernel/tfrt_op_kernel.h index e370fde54e23db..e06a0f13f3ec2b 100644 --- a/tensorflow/core/runtime_fallback/kernel/tfrt_op_kernel.h +++ b/tensorflow/core/runtime_fallback/kernel/tfrt_op_kernel.h @@ -100,8 +100,8 @@ absl::Status TFRTOpKernelConstruction::GetAttr(absl::string_view attr_name, Padding* value) const; template <> -absl::Status TFRTOpKernelConstruction::GetAttr(absl::string_view attr_name, - std::vector* value) const; +absl::Status TFRTOpKernelConstruction::GetAttr( + absl::string_view attr_name, std::vector* value) const; absl::Status MissingAttributeError(absl::string_view attr_name); @@ -207,11 +207,11 @@ class TFRTOpMetaBuilder { TFRTOpMetaBuilder& Input(absl::string_view input_spec); TFRTOpMetaBuilder& Attr(absl::string_view attr_spec); - const string& op_name() const; + const std::string& op_name() const; TFRTOpMeta BuildMeta() const; private: - string op_name_; + std::string op_name_; std::vector output_types_; }; diff --git a/tensorflow/core/runtime_fallback/kernel/tfrt_op_kernel_test.cc b/tensorflow/core/runtime_fallback/kernel/tfrt_op_kernel_test.cc index 5c99d39745c519..3b96ce59d9335d 100644 --- a/tensorflow/core/runtime_fallback/kernel/tfrt_op_kernel_test.cc +++ b/tensorflow/core/runtime_fallback/kernel/tfrt_op_kernel_test.cc @@ -56,7 +56,7 @@ TEST(TFRTOpKernelTest, TestGetBoolAttr) { TEST(TFRTOpKernelTest, TestGetIntAttr) { tfrt::OpAttrs attrs; - attrs.Set("foo", -2); + attrs.Set("foo", -2); tfrt::OpAttrsRef attrsref(attrs); TFRTOpKernelConstruction ctx(attrsref); @@ -68,18 +68,18 @@ TEST(TFRTOpKernelTest, TestGetIntAttr) { TEST(TFRTOpKernelTest, TestGetIntListAttr) { tfrt::OpAttrs attrs; - attrs.SetArray("foo", {}); - attrs.SetArray("bar", {1}); - attrs.SetArray("baz", {1, 2, 3}); + attrs.SetArray("foo", {}); + attrs.SetArray("bar", {1}); + attrs.SetArray("baz", {1, 2, 3}); attrs.SetString("bar", "test"); tfrt::OpAttrsRef attrsref(attrs); TFRTOpKernelConstruction ctx(attrsref); - std::vector v1, v2, v3; - std::vector expected_v1; - std::vector expected_v2 = {1}; - std::vector expected_v3 = {1, 2, 3}; + std::vector v1, v2, v3; + std::vector expected_v1; + std::vector expected_v2 = {1}; + std::vector expected_v3 = {1, 2, 3}; TF_ASSERT_OK(ctx.GetAttr("foo", &v1)); ASSERT_EQ(v1, expected_v1); TF_ASSERT_OK(ctx.GetAttr("bar", &v2)); @@ -217,7 +217,7 @@ TEST(TFRTOpKernelTest, TestAllocateTemp) { ASSERT_EQ(out.AllocatedBytes(), 0); TF_EXPECT_OK(ctx.allocate_temp(DT_INT32, {}, &out)); ASSERT_GT(out.AllocatedBytes(), 0); - out.scalar()() = 123; + out.scalar()() = 123; ASSERT_EQ(out.dtype(), DT_INT32); ASSERT_EQ(out.shape().dims(), 0); } diff --git a/tensorflow/core/runtime_fallback/runtime/fallback_batch_kernel.cc b/tensorflow/core/runtime_fallback/runtime/fallback_batch_kernel.cc index 04149fd2b397b4..a35e77ae99776f 100644 --- a/tensorflow/core/runtime_fallback/runtime/fallback_batch_kernel.cc +++ b/tensorflow/core/runtime_fallback/runtime/fallback_batch_kernel.cc @@ -50,8 +50,9 @@ constexpr char kBatchesToAverageOverAttr[] = "_batches_to_average_over"; } // namespace -int32 BatchFunctionFallbackKernelBase:: - NumBatchThreadsFromEnvironmentWithDefault(int default_num_batch_threads) { +int32_t +BatchFunctionFallbackKernelBase::NumBatchThreadsFromEnvironmentWithDefault( + int default_num_batch_threads) { int32_t num; const char* val = std::getenv("TF_NUM_BATCH_THREADS"); diff --git a/tensorflow/core/runtime_fallback/runtime/fallback_batch_kernel.h b/tensorflow/core/runtime_fallback/runtime/fallback_batch_kernel.h index f053704fd50dcb..3b26516602d4d2 100644 --- a/tensorflow/core/runtime_fallback/runtime/fallback_batch_kernel.h +++ b/tensorflow/core/runtime_fallback/runtime/fallback_batch_kernel.h @@ -67,7 +67,7 @@ class BatchFunctionFallbackKernelBase : public AsyncOpKernel { void SetAdaptiveBatchSchedulerOptions(OpKernelConstruction* c, int32_t num_batch_threads); - static int32 NumBatchThreadsFromEnvironmentWithDefault( + static int32_t NumBatchThreadsFromEnvironmentWithDefault( int default_num_batch_threads); static thread::ThreadPool* GetOrCreateBatchThreadsPool(); static constexpr int64_t kBatchThreadPoolSize = 128; @@ -80,10 +80,10 @@ class BatchFunctionFallbackKernelBase : public AsyncOpKernel { int32_t batch_timeout_micros_; int32_t max_enqueued_batches_; std::vector allowed_batch_sizes_; - int32 low_priority_max_batch_size_; - int32 low_priority_batch_timeout_micros_; - int32 low_priority_max_enqueued_batches_; - std::vector low_priority_allowed_batch_sizes_; + int32_t low_priority_max_batch_size_; + int32_t low_priority_batch_timeout_micros_; + int32_t low_priority_max_enqueued_batches_; + std::vector low_priority_allowed_batch_sizes_; std::string mixed_priority_policy_; bool enable_large_batch_splitting_; bool has_attribute_enable_large_batch_splitting_; @@ -100,10 +100,10 @@ class BatchFunctionFallbackKernelBase : public AsyncOpKernel { static constexpr int64_t kMaxInflightBatches = 64; bool enable_adaptive_batch_threads_ = false; struct AdaptiveBatchSchedulerOptions { - int32 min_in_flight_batches_limit = kMinInflightBatches; - int32 initial_in_flight_batches_limit = kInitialInflightBatches; - int32 max_in_flight_batches_limit = kMaxInflightBatches; - int32 batches_to_average_over = kBatchesToAverageOver; + int32_t min_in_flight_batches_limit = kMinInflightBatches; + int32_t initial_in_flight_batches_limit = kInitialInflightBatches; + int32_t max_in_flight_batches_limit = kMaxInflightBatches; + int32_t batches_to_average_over = kBatchesToAverageOver; }; std::optional adaptive_batch_scheduler_options_ = std::nullopt; diff --git a/tensorflow/core/runtime_fallback/runtime/runtime_fallback_batch_tf_opkernels.cc b/tensorflow/core/runtime_fallback/runtime/runtime_fallback_batch_tf_opkernels.cc index 100290da8bff1e..016ccf6b1bf55c 100644 --- a/tensorflow/core/runtime_fallback/runtime/runtime_fallback_batch_tf_opkernels.cc +++ b/tensorflow/core/runtime_fallback/runtime/runtime_fallback_batch_tf_opkernels.cc @@ -193,7 +193,7 @@ class FallbackBatchResource : public tensorflow::serving::BatchResourceBase { return absl::OkStatus(); } - string DebugString() const final { return "FallbackBatchResource"; } + std::string DebugString() const final { return "FallbackBatchResource"; } const tsl::RCReference& batch_function() const { return bef_func_; @@ -407,6 +407,7 @@ REGISTER_KERNEL_BUILDER( // Identical to BatchFunction except it has 2 extra TFRT attributes and it does // not have `f` attribute. Users will not invoke this op directly. +// LINT.IfChange REGISTER_OP("_BatchFunctionFallback") .Input("in_tensors: Tin") .Input("captured_tensors: Tcaptured") @@ -467,6 +468,7 @@ REGISTER_OP("_BatchFunctionFallback") .Attr("opaque_function_handle: int") .SetShapeFn(shape_inference::UnknownShape); +// LINT.ThenChange(//tensorflow/core/tfrt/mlrt/kernel/batch_kernel.cc) } // namespace } // namespace tfrt_stub } // namespace tensorflow diff --git a/tensorflow/core/runtime_fallback/test/forwarding_test_kernels.cc b/tensorflow/core/runtime_fallback/test/forwarding_test_kernels.cc index 4454a04cc1ab34..758a9074637aa2 100644 --- a/tensorflow/core/runtime_fallback/test/forwarding_test_kernels.cc +++ b/tensorflow/core/runtime_fallback/test/forwarding_test_kernels.cc @@ -39,8 +39,8 @@ class ScalarAdd : public OpKernelT { const Tensor& input1 = ctx->input(1); Tensor output(input0); - output.scalar()() = - input0.scalar()() + input1.scalar()(); + output.scalar()() = + input0.scalar()() + input1.scalar()(); ctx->set_output(0, output); } @@ -54,7 +54,7 @@ REGISTER_OP("ScalarAdd") SCALAR_ADD_PROPERTIES; // When calling ScalarAdd from TF, use the standard OpKernel* types. REGISTER_KERNEL_BUILDER( - Name("ScalarAdd").Device(DEVICE_CPU).TypeConstraint("T"), + Name("ScalarAdd").Device(DEVICE_CPU).TypeConstraint("T"), ScalarAdd) #endif diff --git a/tensorflow/core/runtime_fallback/test/tfrt_forwarding_kernels.cc b/tensorflow/core/runtime_fallback/test/tfrt_forwarding_kernels.cc index 6d45437dae4625..5193167366ac35 100644 --- a/tensorflow/core/runtime_fallback/test/tfrt_forwarding_kernels.cc +++ b/tensorflow/core/runtime_fallback/test/tfrt_forwarding_kernels.cc @@ -28,7 +28,7 @@ namespace tensorflow { static void TFDConstantTensor5D(tfrt::Argument value, tfrt::Result tensor) { Tensor out(DT_INT32, TensorShape({1, 1, 1, 1, 1})); - out.flat()(0) = value.get(); + out.flat()(0) = value.get(); tensor.Emplace(out); } diff --git a/tensorflow/core/summary/BUILD b/tensorflow/core/summary/BUILD index 7b0981742dd5b4..8af924c1b40dfe 100644 --- a/tensorflow/core/summary/BUILD +++ b/tensorflow/core/summary/BUILD @@ -24,6 +24,7 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core/lib/db:sqlite", "@com_google_absl//absl/status", + "@com_google_absl//absl/strings", ], ) @@ -55,6 +56,7 @@ cc_library( "@com_google_absl//absl/log:check", "@com_google_absl//absl/status", "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:string_view", "@local_xla//xla/tsl/protobuf:error_codes_proto_impl_cc", "@local_xla//xla/tsl/protobuf:histogram_proto_cc", ], @@ -74,6 +76,7 @@ tf_cc_test( "//tensorflow/core/lib/db:sqlite", "@com_google_absl//absl/log", "@com_google_absl//absl/status", + "@com_google_absl//absl/strings", "@local_xla//xla/tsl/protobuf:histogram_proto_cc", ], ) @@ -128,6 +131,7 @@ cc_library( "//tensorflow/core:protos_all_cc", "//tensorflow/core/lib/png:png_io", "@com_google_absl//absl/status", + "@com_google_absl//absl/strings", ], ) diff --git a/tensorflow/core/summary/loader.cc b/tensorflow/core/summary/loader.cc index 1443cffc4c6e6a..08e4ea469b106b 100644 --- a/tensorflow/core/summary/loader.cc +++ b/tensorflow/core/summary/loader.cc @@ -12,8 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include #include #include +#include #include #include #include diff --git a/tensorflow/core/summary/schema.cc b/tensorflow/core/summary/schema.cc index 2cd421afc59bff..3ba5db4037e419 100644 --- a/tensorflow/core/summary/schema.cc +++ b/tensorflow/core/summary/schema.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/summary/schema.h" #include "absl/status/status.h" +#include "absl/strings/str_cat.h" #include "tensorflow/core/lib/core/errors.h" namespace tensorflow { diff --git a/tensorflow/core/summary/summary_converter.cc b/tensorflow/core/summary/summary_converter.cc index a5e3695e420103..449f851c74669f 100644 --- a/tensorflow/core/summary/summary_converter.cc +++ b/tensorflow/core/summary/summary_converter.cc @@ -21,8 +21,10 @@ limitations under the License. #include #include #include +#include #include "absl/status/status.h" +#include "absl/strings/str_cat.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/summary.pb.h" #include "tensorflow/core/framework/types.h" diff --git a/tensorflow/core/summary/summary_db_writer.cc b/tensorflow/core/summary/summary_db_writer.cc index 849fc9a6954c7e..2cc0a6b36a1863 100644 --- a/tensorflow/core/summary/summary_db_writer.cc +++ b/tensorflow/core/summary/summary_db_writer.cc @@ -20,6 +20,7 @@ limitations under the License. #include #include #include +#include #include #include #include @@ -28,6 +29,8 @@ limitations under the License. #include "absl/log/log.h" #include "absl/status/status.h" #include "absl/strings/numbers.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" #include "xla/tsl/protobuf/error_codes.pb.h" #include "xla/tsl/protobuf/histogram.pb.h" #include "tensorflow/core/framework/graph.pb.h" diff --git a/tensorflow/core/summary/summary_db_writer_test.cc b/tensorflow/core/summary/summary_db_writer_test.cc index 8c25da1823f057..b65349e935aa15 100644 --- a/tensorflow/core/summary/summary_db_writer_test.cc +++ b/tensorflow/core/summary/summary_db_writer_test.cc @@ -17,10 +17,12 @@ limitations under the License. #include #include #include +#include #include #include "absl/log/log.h" #include "absl/status/status.h" +#include "absl/strings/str_cat.h" #include "xla/tsl/protobuf/histogram.pb.h" #include "tensorflow/core/framework/function.pb.h" #include "tensorflow/core/framework/graph.pb.h" diff --git a/tensorflow/core/summary/summary_file_writer.cc b/tensorflow/core/summary/summary_file_writer.cc index dfb1bba4aecbe5..a77641f7e912e5 100644 --- a/tensorflow/core/summary/summary_file_writer.cc +++ b/tensorflow/core/summary/summary_file_writer.cc @@ -17,6 +17,7 @@ limitations under the License. #include #include #include +#include #include #include diff --git a/tensorflow/core/summary/summary_file_writer_test.cc b/tensorflow/core/summary/summary_file_writer_test.cc index 94ca029774f40d..c0ef770435f05c 100644 --- a/tensorflow/core/summary/summary_file_writer_test.cc +++ b/tensorflow/core/summary/summary_file_writer_test.cc @@ -15,9 +15,11 @@ limitations under the License. #include "tensorflow/core/summary/summary_file_writer.h" #include +#include #include #include #include +#include #include #include diff --git a/tensorflow/core/summary/vacuum.cc b/tensorflow/core/summary/vacuum.cc index 29c459cca89f13..7db3633b4c21c0 100644 --- a/tensorflow/core/summary/vacuum.cc +++ b/tensorflow/core/summary/vacuum.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ #include +#include #include "absl/log/log.h" #include "tensorflow/core/lib/db/sqlite.h" diff --git a/tensorflow/core/tfrt/common/BUILD b/tensorflow/core/tfrt/common/BUILD index 5658c7db2ca6bb..571caba934dfe7 100644 --- a/tensorflow/core/tfrt/common/BUILD +++ b/tensorflow/core/tfrt/common/BUILD @@ -106,6 +106,7 @@ cc_library( "@com_google_absl//absl/synchronization", "@local_tsl//tsl/platform:statusor", "@local_xla//xla/client:local_client", + "@local_xla//xla/pjrt:host_memory_allocator", "@local_xla//xla/pjrt:local_device_state", "@local_xla//xla/pjrt:pjrt_client", "@local_xla//xla/pjrt:tf_pjrt_client", diff --git a/tensorflow/core/tfrt/common/async_value_tensor.h b/tensorflow/core/tfrt/common/async_value_tensor.h index 06e99f8f7bcc48..83d0efcb5cc63a 100644 --- a/tensorflow/core/tfrt/common/async_value_tensor.h +++ b/tensorflow/core/tfrt/common/async_value_tensor.h @@ -64,7 +64,7 @@ class AsyncValueAllocator : public Allocator { void DeallocateRaw(void* ptr) override; bool AllocatesOpaqueHandle() const override { return true; } - string Name() override { return "async-value"; } + std::string Name() override { return "async-value"; } }; } // namespace tensorflow diff --git a/tensorflow/core/tfrt/common/pjrt_state.cc b/tensorflow/core/tfrt/common/pjrt_state.cc index 9a6ec5bba211e5..e20e2ca0790586 100644 --- a/tensorflow/core/tfrt/common/pjrt_state.cc +++ b/tensorflow/core/tfrt/common/pjrt_state.cc @@ -99,6 +99,6 @@ PjRtGpuClientCreationInfo* PjRtState::GetPjRtGpuClientCreationInfo() { return pjrt_gpu_client_creation_info_.get(); } -string PjRtState::DebugString() const { return "PjRtState"; } +std::string PjRtState::DebugString() const { return "PjRtState"; } } // namespace tensorflow diff --git a/tensorflow/core/tfrt/common/pjrt_state.h b/tensorflow/core/tfrt/common/pjrt_state.h index 0c6f78cfd82ba8..e0e9f8657bb8a8 100644 --- a/tensorflow/core/tfrt/common/pjrt_state.h +++ b/tensorflow/core/tfrt/common/pjrt_state.h @@ -25,6 +25,7 @@ limitations under the License. #include "absl/status/statusor.h" #include "absl/synchronization/mutex.h" #include "xla/client/local_client.h" +#include "xla/pjrt/host_memory_allocator.h" #include "xla/pjrt/local_device_state.h" #include "xla/pjrt/pjrt_client.h" #include "xla/stream_executor/integrations/tf_allocator_adapter.h" @@ -44,7 +45,7 @@ using PjRtClientsMap = std::map>; struct PjRtGpuClientCreationInfo { std::set allowed_devices; std::unique_ptr allocator; - std::unique_ptr host_memory_allocator; + std::unique_ptr host_memory_allocator; std::map> local_device_states; xla::LocalClient* local_client; }; @@ -62,7 +63,7 @@ class PjRtState : public ResourceBase { // Moves PJRT client to `unused_`. The PJRT client moved to `unused_` will not // be returned by `GetPjRtClient`. absl::Status MovePjRtClientToUnused(const DeviceType& device_type); - string DebugString() const override; + std::string DebugString() const override; // Saves information needed to create a PJRT client (to enable creating a // client with remote devices). diff --git a/tensorflow/core/tfrt/fallback/fallback_state.cc b/tensorflow/core/tfrt/fallback/fallback_state.cc index c7f12aed50daa3..c500f862e1e706 100644 --- a/tensorflow/core/tfrt/fallback/fallback_state.cc +++ b/tensorflow/core/tfrt/fallback/fallback_state.cc @@ -51,8 +51,9 @@ namespace tfrt_stub { namespace { -string DeviceName(absl::string_view name_prefix, absl::string_view device_type, - int32_t task_id, size_t device_id) { +std::string DeviceName(absl::string_view name_prefix, + absl::string_view device_type, int32_t task_id, + size_t device_id) { return strings::StrCat(absl::StripSuffix(name_prefix, "0"), task_id, "/device:", device_type, ":", device_id); } diff --git a/tensorflow/core/tfrt/ifrt/BUILD b/tensorflow/core/tfrt/ifrt/BUILD index 4533b3c2e102de..7d0d005b0474dc 100644 --- a/tensorflow/core/tfrt/ifrt/BUILD +++ b/tensorflow/core/tfrt/ifrt/BUILD @@ -120,7 +120,6 @@ cc_library( ":ifrt_persistent_compilation_cache", ":ifrt_restore_tensor_registry", ":ifrt_serving_core_selector", - ":ifrt_tensor_utils", ":sharding_utils", ":tf_host_callback", "//tensorflow/compiler/mlir/tensorflow", diff --git a/tensorflow/core/tfrt/ifrt/ifrt_serving_executable.cc b/tensorflow/core/tfrt/ifrt/ifrt_serving_executable.cc index d8c2064e5f6f81..bc376e94d09962 100644 --- a/tensorflow/core/tfrt/ifrt/ifrt_serving_executable.cc +++ b/tensorflow/core/tfrt/ifrt/ifrt_serving_executable.cc @@ -97,7 +97,6 @@ limitations under the License. #include "tensorflow/core/tfrt/ifrt/ifrt_persistent_compilation_cache.h" #include "tensorflow/core/tfrt/ifrt/ifrt_restore_tensor_registry.h" #include "tensorflow/core/tfrt/ifrt/ifrt_serving_core_selector.h" -#include "tensorflow/core/tfrt/ifrt/ifrt_tensor_utils.h" #include "tensorflow/core/tfrt/ifrt/sharding_utils.h" #include "tensorflow/core/tfrt/ifrt/tf_host_callback.h" #include "tsl/platform/tstring.h" @@ -504,7 +503,7 @@ IfrtServingExecutable::CreateExecutableSynchronously( compile_metadata.use_shardy_partitioner()); xla_compile_options.parameter_is_tupled_arguments = false; // Use portable execution for single device + core selection. - if (UsePortableExecution(compile_metadata)) { + if (UsePortableExecution()) { xla_compile_options.compile_portable_executable = true; } else { TF_ASSIGN_OR_RETURN( @@ -555,9 +554,8 @@ IfrtServingExecutable::CreateExecutableSynchronously( return executable_bundle; } -tsl::Future +absl::StatusOr> IfrtServingExecutable::LookUpOrCreateExecutable( - const tensorflow::tpu::TPUCompileMetadataProto& compile_metadata, absl::Span dtypes_and_shapes, absl::Span variable_arg_indices) { std::vector input_shapes; @@ -597,7 +595,18 @@ IfrtServingExecutable::LookUpOrCreateExecutable( // compilation. module_copy = mlir::OwningOpRef(module_->clone()); } + tensorflow::tpu::TPUCompileMetadataProto compile_metadata = + original_compile_metadata_; + + // b/469105465: Add test coverage for core selection in execution. + if (UsePortableExecution()) { + // Clear device_assignment because portable execution doesn't allow device + // assignment. + compile_metadata.clear_device_assignment(); + } + TF_RETURN_IF_ERROR( + UpdateCompileMetadata(compile_metadata, dtypes_and_shapes)); LOG(INFO) << "Cache missed. Building executable"; absl::StatusOr executable_bundle = CreateExecutableSynchronously(std::move(module_copy), compile_metadata, @@ -613,11 +622,11 @@ void IfrtServingExecutable::Freeze() { module_ = nullptr; } -bool IfrtServingExecutable::UsePortableExecution( - const tensorflow::tpu::TPUCompileMetadataProto& compile_metadata) { +bool IfrtServingExecutable::UsePortableExecution() { // TODO(b/335247101) Add a check that the core selector must be non-null if // it is a single-device program after core selection in Ifrt is stable. - return IsSingleDevice(compile_metadata) && ifrt_serving_core_selector_; + return IsSingleDevice(original_compile_metadata_) && + ifrt_serving_core_selector_; } absl::StatusOr> IfrtServingExecutable::Execute( @@ -657,20 +666,12 @@ absl::StatusOr> IfrtServingExecutable::Execute( BuildDtypeAndShape(inputs, variable_arg_indices, ifrt_restore_tensor_registry_)); - tensorflow::tpu::TPUCompileMetadataProto compile_metadata = - original_compile_metadata_; - TF_RETURN_IF_ERROR( - UpdateCompileMetadata(compile_metadata, dtypes_and_shapes)); - // `device_reservation` should be alive before the end of the execution. tsl::DeviceReservation device_reservation(kNoCoreSelectedIndex, nullptr); xla::ifrt::DeviceListRef device_list; - if (UsePortableExecution(compile_metadata)) { + if (UsePortableExecution()) { device_reservation = ifrt_serving_core_selector_->ReserveDevice(program_id_); - // Clear device_assignment because portable execution doesn't allow device - // assignment. - compile_metadata.clear_device_assignment(); TF_ASSIGN_OR_RETURN(xla::ifrt::Device * device, ifrt_client_->LookupDevice(xla::ifrt::DeviceId( device_reservation.device_index()))); @@ -679,10 +680,10 @@ absl::StatusOr> IfrtServingExecutable::Execute( device_list = assigned_device_list_; } TF_ASSIGN_OR_RETURN( - SharedCachedExecutableBundle executable_bundle, - LookUpOrCreateExecutable(compile_metadata, dtypes_and_shapes, - variable_arg_indices) - .Await()); + tsl::Future executable_bundle_future, + LookUpOrCreateExecutable(dtypes_and_shapes, variable_arg_indices)); + TF_ASSIGN_OR_RETURN(SharedCachedExecutableBundle executable_bundle, + executable_bundle_future.Await()); if (executable_bundle->compile_metadata.args().size() != dtypes_and_shapes.size()) { @@ -694,7 +695,7 @@ absl::StatusOr> IfrtServingExecutable::Execute( { tsl::profiler::TraceMe traceme("AsyncRestoreVariables"); absl::ReaderMutexLock lock(mutex_); - if (!is_frozen_) { + if (!is_frozen_ && !tf_to_hlo_compiler_->IsXlaCompilationDisabled()) { // Asynchronously load the restored variable tensors to Ifrt array. TF_RETURN_IF_ERROR(AsyncLoadIfrtArray(inputs, variable_arg_indices, *executable_bundle, device_list)); @@ -775,7 +776,7 @@ absl::StatusOr> IfrtServingExecutable::Execute( VLOG(2) << "Start Execution"; std::optional execution_device_list; - if (UsePortableExecution(compile_metadata)) { + if (UsePortableExecution()) { execution_device_list = device_list; } diff --git a/tensorflow/core/tfrt/ifrt/ifrt_serving_executable.h b/tensorflow/core/tfrt/ifrt/ifrt_serving_executable.h index 8e29544fd01d78..ac772ae89d89be 100644 --- a/tensorflow/core/tfrt/ifrt/ifrt_serving_executable.h +++ b/tensorflow/core/tfrt/ifrt/ifrt_serving_executable.h @@ -230,10 +230,9 @@ class IfrtServingExecutable { const CachedExecutableBundle& executable_bundle, const xla::ifrt::DeviceListRef& devices); - tsl::Future LookUpOrCreateExecutable( - const tensorflow::tpu::TPUCompileMetadataProto& compile_metadata, - absl::Span dtypes_and_shapes, - absl::Span variable_arg_indices); + absl::StatusOr> + LookUpOrCreateExecutable(absl::Span dtypes_and_shapes, + absl::Span variable_arg_indices); absl::StatusOr CreateExecutableSynchronously( mlir::OwningOpRef module_copy, @@ -248,8 +247,7 @@ class IfrtServingExecutable { std::vector GetArgShape( int arg_index, const CachedExecutableBundle& entry); - bool UsePortableExecution( - const tensorflow::tpu::TPUCompileMetadataProto& compile_metadata); + bool UsePortableExecution(); }; } // namespace ifrt_serving diff --git a/tensorflow/core/tfrt/mlrt/kernel/batch_kernel.cc b/tensorflow/core/tfrt/mlrt/kernel/batch_kernel.cc index 17243e2e3b0bc6..b260fc6a492833 100644 --- a/tensorflow/core/tfrt/mlrt/kernel/batch_kernel.cc +++ b/tensorflow/core/tfrt/mlrt/kernel/batch_kernel.cc @@ -244,7 +244,7 @@ class MlrtBatchResource : public tensorflow::serving::BatchResourceBase { return absl::OkStatus(); } - string DebugString() const final { return "MlrtBatchResource"; } + std::string DebugString() const final { return "MlrtBatchResource"; } mlrt::bc::Function batch_function() const { return batch_function_; } @@ -461,6 +461,7 @@ REGISTER_KERNEL_BUILDER( Name(kMlrtBatchFunctionName).Device(DEVICE_GPU), tfrt_stub::BatchFunctionFallbackKernel); +// LINT.IfChange // Identical to BatchFunction except it has 2 extra TFRT attributes and it does // not have `f` attribute. Users will not invoke this op directly. REGISTER_OP(kMlrtBatchFunctionName) @@ -475,6 +476,43 @@ REGISTER_OP(kMlrtBatchFunctionName) .Attr("container: string = ''") .Attr("shared_name: string = ''") .Attr("batching_queue: string = ''") + // A separate set of batch options for the low priority requests, which is + // used for priority queue batching. + .Attr("low_priority_max_batch_size: int = 0") + .Attr("low_priority_batch_timeout_micros: int = 0") + .Attr("low_priority_allowed_batch_sizes: list(int) = []") + .Attr("low_priority_max_enqueued_batches: int = 0") + // Policy that determines the mixed priority batching behavior when low + // priority batch parameters are present. + // + // low_priority_padding_with_next_allowed_batch_size: If high priority + // batches time out without reaching the max batch size, low priority inputs + // pad the high priority batches up to the next allowed batch size. A low + // priority only batch gets schedule only when the low priority input times + // out or reaches the max batch size while there is no high priority input + // waiting to be processed. + // low_priority_padding_with_max_batch_size: Same as above but pad up to the + // max batch size. + // priority_isolation: High priority and low priority inputs never share the + // same batch, i.e., no low priority input padding high priority batches. + // Low priority inputs get scheduled only as part of low priority only + // batches as described above. + // priority_merge: High and low priority inputs are queued separately but + // when a batch needs to be scheduled, the two queues are treated as one + // merged flat list of inputs with high priority inputs at the front of the + // list of tasks to use for the next batch. If all inputs are of the same + // priority, the behavior is the same as disabling prioritization. + .Attr( + "mixed_priority_policy: " + "{'low_priority_padding_with_max_batch_size', " + "'low_priority_padding_with_next_allowed_batch_size', " + "'priority_isolation', 'priority_merge'} = " + "'low_priority_padding_with_max_batch_size'") + // See the description of the batch_padding_policy attribute of + // BatchFunction in core/ops/batch_ops.cc. + .Attr( + "batch_padding_policy: " + "{'PAD_UP', 'BATCH_DOWN', 'MINIMIZE_TPU_COST_PER_REQUEST'} = 'PAD_UP'") .Attr("Tin: list(type)") .Attr("Tcaptured: list(type) >= 0") .Attr("Tout: list(type)") @@ -485,6 +523,8 @@ REGISTER_OP(kMlrtBatchFunctionName) .Attr("opaque_function_handle: int") .SetShapeFn(shape_inference::UnknownShape); +// LINT.ThenChange(//tensorflow/core/runtime_fallback/runtime/runtime_fallback_batch_tf_opkernels.cc) + } // namespace // TODO(rohitju, chky): This additional Register is not ideal but unavoidable diff --git a/tensorflow/core/tfrt/tfrt_session/tfrt_session.cc b/tensorflow/core/tfrt/tfrt_session/tfrt_session.cc index 0fc8f06b2b5e53..9956e74011d7ed 100644 --- a/tensorflow/core/tfrt/tfrt_session/tfrt_session.cc +++ b/tensorflow/core/tfrt/tfrt_session/tfrt_session.cc @@ -173,7 +173,7 @@ class TfrtSession : public tensorflow::Session { } absl::Status Create(GraphDef&& graph) override { - absl::MutexLock lock(&session_state_lock_); + absl::MutexLock lock(session_state_lock_); return CreateLocked(std::move(graph)); } @@ -279,7 +279,7 @@ class TfrtSession : public tensorflow::Session { } absl::Status Extend(GraphDef&& graph) override { - absl::MutexLock lock(&session_state_lock_); + absl::MutexLock lock(session_state_lock_); return ExtendLocked(std::move(graph)); } @@ -299,7 +299,7 @@ class TfrtSession : public tensorflow::Session { std::vector* outputs, const thread::ThreadPoolOptions& thread_pool_options) { { - absl::MutexLock lock(&session_state_lock_); + absl::MutexLock lock(session_state_lock_); if (session_state_ == SessionState::kInitialized) { return errors::Unavailable("Session not created yet."); } @@ -401,7 +401,7 @@ class TfrtSession : public tensorflow::Session { // NOTE: This API is still experimental and may change. absl::Status MakeCallable(const CallableOptions& callable_options, CallableHandle* out_handle) override { - absl::MutexLock lock(&callables_lock_); + absl::MutexLock lock(callables_lock_); *out_handle = next_callable_handle_++; assert(callables_.find(*out_handle) == callables_.end()); callables_[*out_handle] = {callable_options}; @@ -436,7 +436,7 @@ class TfrtSession : public tensorflow::Session { const thread::ThreadPoolOptions& thread_pool_options) override { Callable callable; { - absl::MutexLock lock(&callables_lock_); + absl::MutexLock lock(callables_lock_); auto it = callables_.find(handle); if (it == callables_.end()) return errors::InvalidArgument("No such callable handle: ", handle); @@ -466,7 +466,7 @@ class TfrtSession : public tensorflow::Session { /// session. /// NOTE: This API is still experimental and may change. absl::Status ReleaseCallable(CallableHandle handle) override { - absl::MutexLock lock(&callables_lock_); + absl::MutexLock lock(callables_lock_); auto it = callables_.find(handle); if (it == callables_.end()) return errors::InvalidArgument("No such callable handle: ", handle); @@ -475,7 +475,7 @@ class TfrtSession : public tensorflow::Session { } absl::Status Close() override { - absl::MutexLock lock(&session_state_lock_); + absl::MutexLock lock(session_state_lock_); session_state_ = SessionState::kClosed; return absl::OkStatus(); } @@ -721,7 +721,7 @@ class TfrtSessionFactory::ThreadPoolManager { "TFRT session does not yet support session local thread pool"); } - absl::MutexLock lock(&mutex_); + absl::MutexLock lock(mutex_); auto it = named_thread_pools_.find(name); // The thread pool with the given name already exists. @@ -842,7 +842,7 @@ absl::Status TfrtSessionFactory::NewSession(const SessionOptions& options, *out_session = nullptr; - absl::MutexLock lock(&mutex_); + absl::MutexLock lock(mutex_); std::vector> devices; TF_RETURN_IF_ERROR(DeviceFactory::AddDevices( options, "/job:localhost/replica:0/task:0", &devices)); @@ -873,13 +873,13 @@ static TfrtSessionFactory* session_factory = nullptr; tfrt_stub::Runtime* TfrtSessionFactory::GetRuntime() { DCHECK(session_factory != nullptr); - absl::MutexLock lock(&session_factory->mutex_); + absl::MutexLock lock(session_factory->mutex_); return session_factory->runtime_; } absl::Status InitializeTfrtSession(const TfrtSessionOptions& options) { DCHECK(session_factory != nullptr); - absl::MutexLock lock(&session_factory->mutex_); + absl::MutexLock lock(session_factory->mutex_); DCHECK(!session_factory->IsInitialized()); return UpdateTfrtSessionOptionsLocked(options); } diff --git a/tensorflow/core/tfrt/utils/graph_partition.cc b/tensorflow/core/tfrt/utils/graph_partition.cc index 08f5dce6d5734d..ddf50ab8c7ef4d 100644 --- a/tensorflow/core/tfrt/utils/graph_partition.cc +++ b/tensorflow/core/tfrt/utils/graph_partition.cc @@ -436,7 +436,7 @@ absl::StatusOr> InsertTransferOps( auto new_graph = std::make_unique(graph->flib_def()); FunctionDefLibrary flib = graph->flib_def().ToProto(); - std::unordered_map> partitions; + std::unordered_map> partitions; TF_RETURN_IF_ERROR( PartitionFunctionGraph(device_set, std::move(graph), &partitions)); @@ -447,7 +447,7 @@ absl::StatusOr> InsertTransferOps( std::map device_to_output_info_map; for (auto& partition : partitions) { - const string& device = partition.first; + const std::string& device = partition.first; VLOG(1) << "Process the partitioin on device: " << device; Graph* subgraph = partition.second.get(); diff --git a/tensorflow/core/tpu/kernels/image_resize_ops.cc b/tensorflow/core/tpu/kernels/image_resize_ops.cc index 7e255bab054550..dfc4077e8f10a1 100644 --- a/tensorflow/core/tpu/kernels/image_resize_ops.cc +++ b/tensorflow/core/tpu/kernels/image_resize_ops.cc @@ -57,7 +57,7 @@ class TpuCustomResizeOp : public XlaOpKernel { return output_shape; } - string OpaqueField() const { + std::string OpaqueField() const { return absl::StrCat("\"", align_corners_, half_pixel_centers_, "\""); } diff --git a/tensorflow/core/tpu/kernels/infeed_ops.cc b/tensorflow/core/tpu/kernels/infeed_ops.cc index d59c6c4b6d4683..2d13813db101cf 100644 --- a/tensorflow/core/tpu/kernels/infeed_ops.cc +++ b/tensorflow/core/tpu/kernels/infeed_ops.cc @@ -188,7 +188,9 @@ struct LinearizedBuffersWrapper { ~LinearizedBuffersWrapper() = default; // These functions are tensorflow::Variant requirements. - string TypeName() const { return "(anonymous)::LinearizedBuffersWrapper"; } + std::string TypeName() const { + return "(anonymous)::LinearizedBuffersWrapper"; + } void Encode(tensorflow::VariantTensorData* data) const { LOG(ERROR) << "Encode() is not implemented for LinearizedBuffersWrapper " "objects."; diff --git a/tensorflow/core/tpu/kernels/sparse_core_ops_utils.cc b/tensorflow/core/tpu/kernels/sparse_core_ops_utils.cc index 182f5bf29ca32b..2fa5972f29af46 100644 --- a/tensorflow/core/tpu/kernels/sparse_core_ops_utils.cc +++ b/tensorflow/core/tpu/kernels/sparse_core_ops_utils.cc @@ -44,10 +44,10 @@ limitations under the License. namespace tensorflow { -std::vector ConvertBinarySplitsToBucketSplits(int64 split, +std::vector ConvertBinarySplitsToBucketSplits(int64_t split, int max_division_level) { std::vector bucket_splits; - uint32 current_index = 0; + uint32_t current_index = 0; while (split > 0) { if (split % 2 == 1) { int split_level = absl::bit_width(current_index + 1) - 1; @@ -62,9 +62,9 @@ std::vector ConvertBinarySplitsToBucketSplits(int64 split, return bucket_splits; } -int64 ConvertBucketSplitsToBinarySplits(std::vector bucket_splits, - int max_division_level) { - int64 binary_splits = 0; +int64_t ConvertBucketSplitsToBinarySplits(std::vector bucket_splits, + int max_division_level) { + int64_t binary_splits = 0; for (auto& bucket_split : bucket_splits) { int split_level = max_division_level - 1; while (bucket_split > 0 && bucket_split % 2 == 0) { diff --git a/tensorflow/core/tpu/kernels/sparse_core_ops_utils.h b/tensorflow/core/tpu/kernels/sparse_core_ops_utils.h index 72419504760aa6..cd958fc5d2218d 100644 --- a/tensorflow/core/tpu/kernels/sparse_core_ops_utils.h +++ b/tensorflow/core/tpu/kernels/sparse_core_ops_utils.h @@ -33,11 +33,11 @@ namespace tensorflow { // Pad value used for SparseCore mini batching logic. const int32_t kXlaPadValue = std::numeric_limits::max(); -std::vector ConvertBinarySplitsToBucketSplits(int64 split, +std::vector ConvertBinarySplitsToBucketSplits(int64_t split, int max_division_level); -int64 ConvertBucketSplitsToBinarySplits(std::vector bucket_splits, - int max_division_level); +int64_t ConvertBucketSplitsToBinarySplits(std::vector bucket_splits, + int max_division_level); absl::Status ValidateInputCombiner(const std::string& combiner); diff --git a/tensorflow/core/tpu/kernels/sparse_core_ops_utils_test.cc b/tensorflow/core/tpu/kernels/sparse_core_ops_utils_test.cc index 9af20e1f2a540d..6a241cdb3a3795 100644 --- a/tensorflow/core/tpu/kernels/sparse_core_ops_utils_test.cc +++ b/tensorflow/core/tpu/kernels/sparse_core_ops_utils_test.cc @@ -25,11 +25,11 @@ namespace { TEST(ConvertSplitsAndBackTest, Split0) { const int max_division_level = 6; - int64 original_split = 0; + int64_t original_split = 0; std::vector actual_buckets = ConvertBinarySplitsToBucketSplits(original_split, max_division_level); std::vector expected_buckets = {}; - int64 re_split = + int64_t re_split = ConvertBucketSplitsToBinarySplits(expected_buckets, max_division_level); ASSERT_EQ(re_split, original_split); } @@ -37,11 +37,11 @@ TEST(ConvertSplitsAndBackTest, Split0) { TEST(ConvertSplitsAndBackTest, Split2) { const int max_division_level = 6; - int64 original_split = 2; + int64_t original_split = 2; std::vector actual_buckets = ConvertBinarySplitsToBucketSplits(original_split, max_division_level); std::vector expected_buckets = {16}; - int64 re_split = + int64_t re_split = ConvertBucketSplitsToBinarySplits(expected_buckets, max_division_level); ASSERT_EQ(re_split, original_split); } @@ -49,11 +49,11 @@ TEST(ConvertSplitsAndBackTest, Split2) { TEST(ConvertSplitsAndBackTest, Split3) { const int max_division_level = 6; - int64 original_split = 3; + int64_t original_split = 3; std::vector actual_buckets = ConvertBinarySplitsToBucketSplits(original_split, max_division_level); std::vector expected_buckets = {16, 32}; - int64 re_split = + int64_t re_split = ConvertBucketSplitsToBinarySplits(expected_buckets, max_division_level); ASSERT_EQ(re_split, original_split); } diff --git a/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.cc b/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.cc index 0815f742b4e9e5..ddd47e0d53c701 100644 --- a/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.cc +++ b/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.cc @@ -118,9 +118,9 @@ absl::Status ValidateInputs(const Tensor& indices_or_row_splits, } absl::Status ComputeRowIdsBeforePadding(const Tensor& indices_or_row_splits, - const int32 total_id_count, - const int32 sample_count, - int32* row_ids_before_padding, + const int32_t total_id_count, + const int32_t sample_count, + int32_t* row_ids_before_padding, std::vector shape_strides) { // The only difference between dense tensor, sparse tensor and ragged tensor // is the row ids output. @@ -129,7 +129,7 @@ absl::Status ComputeRowIdsBeforePadding(const Tensor& indices_or_row_splits, // Row ids are just the index ids. // Note: this path is also taken when the input is a ragged/sparse tensor // with 0 elements. In that case, the row_ids will just be empty as well. - for (int32 i = 0; i < total_id_count; ++i) { + for (int32_t i = 0; i < total_id_count; ++i) { *(row_ids_before_padding + i) = i; } } else if (indices_or_row_splits.dims() == 2 && @@ -140,12 +140,12 @@ absl::Status ComputeRowIdsBeforePadding(const Tensor& indices_or_row_splits, // For 2D sparse tensor, as we always combine on the last dimension. // The row ids are just the sample ids which is the first dim of the // indices. - auto indices_matrix = indices_or_row_splits.matrix(); + auto indices_matrix = indices_or_row_splits.matrix(); // TODO(b/432045101): remove this once the bug is fixed. if (indices_matrix.dimension(1) == 2) { - int32 previous_row_id = -1; - for (int32 i = 0; i < total_id_count; ++i) { - int32 current_row_id = indices_matrix(i, 0); + int32_t previous_row_id = -1; + for (int32_t i = 0; i < total_id_count; ++i) { + int32_t current_row_id = indices_matrix(i, 0); if (current_row_id < previous_row_id) { return absl::InvalidArgumentError( "Invalid indices_or_row_splits input, indices of SparseTensor " @@ -173,7 +173,7 @@ absl::Status ComputeRowIdsBeforePadding(const Tensor& indices_or_row_splits, "Invalid shape_strides input, expected non-empty shape_strides for " "SparseTensor with rank > 2."); } - int32 previous_row_id = -1; + int32_t previous_row_id = -1; int32_t rank = indices_matrix.dimension(1) - 1; for (int32_t i = 0; i < total_id_count; ++i) { int32_t current_row_id = 0; @@ -205,10 +205,10 @@ absl::Status ComputeRowIdsBeforePadding(const Tensor& indices_or_row_splits, } else if (indices_or_row_splits.dims() == 1 && indices_or_row_splits.NumElements() > 0) { // Ragged tensor to COO format. - const int32* indices_or_row_splits_ptr = - indices_or_row_splits.flat().data(); - int32 current_row_id = -1; - for (int32 i = 0; i < total_id_count; ++i) { + const int32_t* indices_or_row_splits_ptr = + indices_or_row_splits.flat().data(); + int32_t current_row_id = -1; + for (int32_t i = 0; i < total_id_count; ++i) { while (i == *(indices_or_row_splits_ptr + 1 + current_row_id)) { current_row_id += 1; } @@ -308,7 +308,7 @@ absl::Status SortDedupAndCountStatsOfCooTensor( uint32_t previous_id_array_index = 0; for (int32_t index = 0; index < total_id_count; ++index) { uint64_t item = per_feature_col_ids_index_list[index]; - int32 col_id = item >> 32; + int32_t col_id = item >> 32; uint32_t id_array_index = item & 0xffffffff; int32_t row_id = *(row_ids_ptr + id_array_index); // If the row ids and col ids are both same as the previous one, @@ -362,9 +362,9 @@ class ConvertToCooTensorOp : public OpKernel { OP_REQUIRES_OK(ctx, ValidateInputs(*indices_or_row_splits, *values, *weights, sample_count_)); - const int32 total_id_count = values->NumElements(); + const int32_t total_id_count = values->NumElements(); - auto row_ids_before_dedup = std::make_unique(total_id_count); + auto row_ids_before_dedup = std::make_unique(total_id_count); OP_REQUIRES_OK(ctx, ComputeRowIdsBeforePadding( *indices_or_row_splits, total_id_count, @@ -382,14 +382,14 @@ class ConvertToCooTensorOp : public OpKernel { auto combiner_scale_transform_fn = GetCombinerScaleTransformFunction(combiner_); - const int32* row_ids_before_dedup_ptr = row_ids_before_dedup.get(); - const int32* values_ptr = values->flat().data(); + const int32_t* row_ids_before_dedup_ptr = row_ids_before_dedup.get(); + const int32_t* values_ptr = values->flat().data(); const float* weights_ptr = weights->flat().data(); // Dedup the ids within one sample by just checking the adjacent ids. This // will NOT result in a full deduplication. - std::vector row_ids; - std::vector col_ids; + std::vector row_ids; + std::vector col_ids; std::vector gains; row_ids.reserve(total_id_count); col_ids.reserve(total_id_count); @@ -400,8 +400,8 @@ class ConvertToCooTensorOp : public OpKernel { const float gain = *weights_ptr; const float rescaled_gain = combiner_scale_contribution_fn(gain); for (int token_id = 0; token_id < total_id_count; ++token_id) { - const int32 row_id = *(row_ids_before_dedup_ptr + token_id); - const int32 col_id = *(values_ptr + token_id); + const int32_t row_id = *(row_ids_before_dedup_ptr + token_id); + const int32_t col_id = *(values_ptr + token_id); if (gains_rescale.has_value()) { // Compute the gain rescale before doing the dedup. (*gains_rescale)[row_id] += rescaled_gain; @@ -417,8 +417,8 @@ class ConvertToCooTensorOp : public OpKernel { } } else { for (int token_id = 0; token_id < total_id_count; ++token_id) { - const int32 row_id = *(row_ids_before_dedup_ptr + token_id); - const int32 col_id = *(values_ptr + token_id); + const int32_t row_id = *(row_ids_before_dedup_ptr + token_id); + const int32_t col_id = *(values_ptr + token_id); const float gain = *(weights_ptr + token_id); if (gains_rescale.has_value()) { // Compute the gain rescale before doing the dedup. @@ -435,7 +435,7 @@ class ConvertToCooTensorOp : public OpKernel { } } - const int32 output_id_count = row_ids.size(); + const int32_t output_id_count = row_ids.size(); Tensor* gains_tensor; OP_REQUIRES_OK(ctx, @@ -450,8 +450,8 @@ class ConvertToCooTensorOp : public OpKernel { ctx, ctx->allocate_output("col_ids", TensorShape({output_id_count}), &col_ids_tensor)); - int32* row_ids_tensor_ptr = row_ids_tensor->flat().data(); - int32* col_ids_tensor_ptr = col_ids_tensor->flat().data(); + int32_t* row_ids_tensor_ptr = row_ids_tensor->flat().data(); + int32_t* col_ids_tensor_ptr = col_ids_tensor->flat().data(); float* gains_tensor_ptr = gains_tensor->flat().data(); if (gains_rescale.has_value()) { @@ -535,11 +535,11 @@ void GetMinibatchesInCsrWithPhysicalReplicaOp::Compute(OpKernelContext* ctx) { feature_width_, &max_ids_per_partition, &max_unique_ids_per_partition)); - const int32* row_ids_tensor_ptr = row_ids->flat().data(); - const int32* col_ids_tensor_ptr = col_ids->flat().data(); + const int32_t* row_ids_tensor_ptr = row_ids->flat().data(); + const int32_t* col_ids_tensor_ptr = col_ids->flat().data(); const float* gains_tensor_ptr = gains->flat().data(); - const int64* splits_tensor_ptr = splits->flat().data(); - const int32* id_counts_tensor_ptr = id_counts->flat().data(); + const int64_t* splits_tensor_ptr = splits->flat().data(); + const int32_t* id_counts_tensor_ptr = id_counts->flat().data(); const int32_t total_id_count = row_ids->NumElements(); @@ -556,9 +556,9 @@ void GetMinibatchesInCsrWithPhysicalReplicaOp::Compute(OpKernelContext* ctx) { const int max_division_level = GetMinibatchMaxDivisionLevel(); - const int32 kMaxDivisions = 1 << max_division_level; + const int32_t kMaxDivisions = 1 << max_division_level; - int64 binary_splits = 0; + int64_t binary_splits = 0; for (int i = 0; i < splits->NumElements(); ++i) { binary_splits |= *(splits_tensor_ptr + i); } @@ -566,7 +566,7 @@ void GetMinibatchesInCsrWithPhysicalReplicaOp::Compute(OpKernelContext* ctx) { std::vector bucket_splits = ConvertBinarySplitsToBucketSplits(binary_splits, max_division_level); - const int32 num_minibatch_per_sc = bucket_splits.size() + 1; + const int32_t num_minibatch_per_sc = bucket_splits.size() + 1; sparse_core_ops_stats_handler_->Record(StatsType::NUM_MINIBATCHES_PER_SC, num_minibatch_per_sc, device_name_, table_name_); @@ -588,7 +588,7 @@ void GetMinibatchesInCsrWithPhysicalReplicaOp::Compute(OpKernelContext* ctx) { bucket_splits.insert(bucket_splits.begin(), 0); bucket_splits.push_back(kMaxDivisions); - const int32 max_ids_per_chip = max_ids_per_chip_per_sample_ * sample_count_; + const int32_t max_ids_per_chip = max_ids_per_chip_per_sample_ * sample_count_; OP_REQUIRES( ctx, max_ids_per_chip % xla_pad_size == 0, @@ -596,8 +596,8 @@ void GetMinibatchesInCsrWithPhysicalReplicaOp::Compute(OpKernelContext* ctx) { "The max_ids_per_chip is set to be ", max_ids_per_chip, " which is not divisible by the xla_pad_size ", xla_pad_size, " ."))); - const int32 padded_row_pointers_size_per_sc = - xla::RoundUpTo(num_physical_replica, xla_pad_size); + const int32_t padded_row_pointers_size_per_sc = + xla::RoundUpTo(num_physical_replica, xla_pad_size); Tensor* row_pointers_tensor; OP_REQUIRES_OK(ctx, @@ -619,11 +619,12 @@ void GetMinibatchesInCsrWithPhysicalReplicaOp::Compute(OpKernelContext* ctx) { OP_REQUIRES_OK( ctx, ctx->allocate_output("sorted_gains", TensorShape({max_ids_per_chip}), &sorted_gains_tensor)); - int32* row_pointers_tensor_ptr = row_pointers_tensor->flat().data(); - int32* sorted_sample_ids_tensor_ptr = - sorted_sample_ids_tensor->flat().data(); - int32* sorted_token_ids_tensor_ptr = - sorted_token_ids_tensor->flat().data(); + int32_t* row_pointers_tensor_ptr = + row_pointers_tensor->flat().data(); + int32_t* sorted_sample_ids_tensor_ptr = + sorted_sample_ids_tensor->flat().data(); + int32_t* sorted_token_ids_tensor_ptr = + sorted_token_ids_tensor->flat().data(); float* sorted_gains_tensor_ptr = sorted_gains_tensor->flat().data(); // This packed id count is used to track how many ids we have packed into @@ -631,8 +632,8 @@ void GetMinibatchesInCsrWithPhysicalReplicaOp::Compute(OpKernelContext* ctx) { // dropped. int32_t packed_id_count = 0; - int32 global_index = 0; - int32 row_pointers_index = 0; + int32_t global_index = 0; + int32_t row_pointers_index = 0; for (int sc_id = 0; sc_id < num_sc_per_chip_; ++sc_id) { for (int i = 1; i < bucket_splits.size(); ++i) { for (int replica_id = 0; replica_id < num_physical_replica; @@ -686,8 +687,8 @@ void GetMinibatchesInCsrWithPhysicalReplicaOp::Compute(OpKernelContext* ctx) { } *(row_pointers_tensor_ptr + row_pointers_index) = global_index; - int32 num_ids_to_pad_per_replica = - xla::RoundUpTo(global_index, xla_pad_size) - global_index; + int32_t num_ids_to_pad_per_replica = + xla::RoundUpTo(global_index, xla_pad_size) - global_index; std::fill_n(sorted_token_ids_tensor_ptr + global_index, num_ids_to_pad_per_replica, kXlaPadValue); std::fill_n(sorted_sample_ids_tensor_ptr + global_index, @@ -698,8 +699,8 @@ void GetMinibatchesInCsrWithPhysicalReplicaOp::Compute(OpKernelContext* ctx) { ++row_pointers_index; } // Pad the row_pointers to be memory aligned. - int32 num_row_pointers_to_pad = - xla::RoundUpTo(row_pointers_index, xla_pad_size) - + int32_t num_row_pointers_to_pad = + xla::RoundUpTo(row_pointers_index, xla_pad_size) - row_pointers_index; std::fill_n(row_pointers_tensor_ptr + row_pointers_index, num_row_pointers_to_pad, global_index); @@ -718,7 +719,7 @@ void GetMinibatchesInCsrWithPhysicalReplicaOp::Compute(OpKernelContext* ctx) { << " . This could potentially impact the model quality."; } - int32 row_pointers_unpadded_size = + int32_t row_pointers_unpadded_size = total_num_minibatch * padded_row_pointers_size_per_sc; Tensor* num_minibatches_per_physical_sparse_core_tensor; @@ -736,11 +737,11 @@ void GetMinibatchesInCsrWithPhysicalReplicaOp::Compute(OpKernelContext* ctx) { OP_REQUIRES_OK(ctx, ctx->allocate_output("ids_unpadded_size", TensorShape({}), &ids_unpadded_size_tensor)); - num_minibatches_per_physical_sparse_core_tensor->flat()(0) = + num_minibatches_per_physical_sparse_core_tensor->flat()(0) = num_minibatch_per_sc; - row_pointers_unpadded_size_tensor->flat()(0) = + row_pointers_unpadded_size_tensor->flat()(0) = row_pointers_unpadded_size; - ids_unpadded_size_tensor->flat()(0) = ids_unpadded_size; + ids_unpadded_size_tensor->flat()(0) = ids_unpadded_size; } #ifdef LIBTPU_ON_GCE @@ -778,7 +779,7 @@ void GetMinibatchSplitsWithPhysicalReplicaOp::Compute(OpKernelContext* ctx) { OP_REQUIRES_OK(ctx, ctx->input("program_key", &program_key_t)); tstring program_key = program_key_t->vec()(0); - int32 per_sc_sample_count = sample_count_ / num_sc_per_chip_; + int32_t per_sc_sample_count = sample_count_ / num_sc_per_chip_; int64_t max_ids_per_partition = -1; int64_t max_unique_ids_per_partition = -1; @@ -802,10 +803,10 @@ void GetMinibatchSplitsWithPhysicalReplicaOp::Compute(OpKernelContext* ctx) { const Tensor* gains; OP_REQUIRES_OK(ctx, ctx->input("gains", &gains)); - const int32 total_id_count = row_ids->NumElements(); + const int32_t total_id_count = row_ids->NumElements(); - const int32* row_ids_ptr = row_ids->flat().data(); - const int32* col_ids_ptr = col_ids->flat().data(); + const int32_t* row_ids_ptr = row_ids->flat().data(); + const int32_t* col_ids_ptr = col_ids->flat().data(); const float* gains_ptr = gains->flat().data(); #ifndef NDEBUG @@ -829,7 +830,7 @@ void GetMinibatchSplitsWithPhysicalReplicaOp::Compute(OpKernelContext* ctx) { const int max_division_level = GetMinibatchMaxDivisionLevel(); - const int32 kMaxDivisions = 1 << max_division_level; + const int32_t kMaxDivisions = 1 << max_division_level; // The id counts tensor is the running sum of the number of ids for all // buckets for all the replicas on each SparseCore. @@ -842,7 +843,7 @@ void GetMinibatchSplitsWithPhysicalReplicaOp::Compute(OpKernelContext* ctx) { TensorShape( {kMaxDivisions * num_sc_per_chip_ * num_physical_replica + 1}), &id_counts_tensor)); - int32* id_counts_tensor_ptr = id_counts_tensor->flat().data(); + int32_t* id_counts_tensor_ptr = id_counts_tensor->flat().data(); *id_counts_tensor_ptr = 0; const int32_t division_size = @@ -855,8 +856,8 @@ void GetMinibatchSplitsWithPhysicalReplicaOp::Compute(OpKernelContext* ctx) { // 0001011 -> 0001 01 1 // which mean split at level 0 section 0, level 1 section 0 and level // 2 section 0. the split points are [128, 256, 512]. - int64 pre_merge_splits = 0; - int64 after_merge_splits = 0; + int64_t pre_merge_splits = 0; + int64_t after_merge_splits = 0; // Vector of uint64_t storing the col ids in the upper 32 bit and the index // to the original id array in the lower 32 bit. std::vector> col_ids_index_list( @@ -926,7 +927,7 @@ void GetMinibatchSplitsWithPhysicalReplicaOp::Compute(OpKernelContext* ctx) { int32_t previous_row_id = -1; uint32_t previous_id_array_index = 0; for (uint64_t item : col_ids_index_list[sc_id]) { - int32 col_id = item >> 32; + int32_t col_id = item >> 32; uint32_t id_array_index = item & 0xffffffff; int32_t row_id = *(row_ids_ptr + id_array_index); // If the row ids and col ids are both same as the previous one, @@ -1027,9 +1028,9 @@ void GetMinibatchSplitsWithPhysicalReplicaOp::Compute(OpKernelContext* ctx) { if (level > 0 && (pre_merge_splits & (1LL << (pre_start_bit_pos + (section >> 1)))) == 0) continue; - int32 id_count = id_counter[(section + 1) * section_size] - - id_counter[section * section_size]; - int32 unique_id_count = + int32_t id_count = id_counter[(section + 1) * section_size] - + id_counter[section * section_size]; + int32_t unique_id_count = unique_id_counter[(section + 1) * section_size] - unique_id_counter[section * section_size]; // If the number of ids or unique ids exceeds the limit, We need to @@ -1155,17 +1156,17 @@ void GetMinibatchSplitsWithPhysicalReplicaOp::Compute(OpKernelContext* ctx) { Tensor* splits_tensor; OP_REQUIRES_OK( ctx, ctx->allocate_output("splits", TensorShape({}), &splits_tensor)); - splits_tensor->flat()(0) = after_merge_splits; + splits_tensor->flat()(0) = after_merge_splits; Tensor* max_ids_tensor; OP_REQUIRES_OK( ctx, ctx->allocate_output("max_ids", TensorShape({}), &max_ids_tensor)); - max_ids_tensor->flat()(0) = this_max_ids; + max_ids_tensor->flat()(0) = this_max_ids; Tensor* max_uniques_tensor; OP_REQUIRES_OK(ctx, ctx->allocate_output("max_uniques", TensorShape({}), &max_uniques_tensor)); - max_uniques_tensor->flat()(0) = this_max_uniques; + max_uniques_tensor->flat()(0) = this_max_uniques; } #ifdef LIBTPU_ON_GCE @@ -1197,12 +1198,12 @@ void StoreMinibatchStatisticsInFdoOp::Compute(OpKernelContext* ctx) { const Tensor* max_ids_t; OP_REQUIRES_OK(ctx, ctx->input("max_ids", &max_ids_t)); - int64_t max_ids = max_ids_t->scalar()(); + int64_t max_ids = max_ids_t->scalar()(); const Tensor* max_uniques_t; OP_REQUIRES_OK(ctx, ctx->input("max_uniques", &max_uniques_t)); - int64_t max_uniques = max_uniques_t->scalar()(); + int64_t max_uniques = max_uniques_t->scalar()(); - int32 per_sc_sample_count = sample_count_ / num_sc_per_chip_; + int32_t per_sc_sample_count = sample_count_ / num_sc_per_chip_; int64_t max_ids_per_partition = -1; int64_t max_unique_ids_per_partition = -1; @@ -1264,10 +1265,10 @@ void ConvertToListOfSparseCoreCooTensorsOp::Compute(OpKernelContext* ctx) { OP_REQUIRES_OK(ctx, ValidateInputs(*indices_or_row_splits, *values, *weights, sample_count_)); - const int32 total_id_count = values->NumElements(); + const int32_t total_id_count = values->NumElements(); - auto row_ids_before_dedup = std::unique_ptr( - new std::remove_extent_t[total_id_count]); + auto row_ids_before_dedup = std::unique_ptr( + new std::remove_extent_t[total_id_count]); OP_REQUIRES_OK(ctx, ComputeRowIdsBeforePadding(*indices_or_row_splits, total_id_count, sample_count_, @@ -1285,14 +1286,14 @@ void ConvertToListOfSparseCoreCooTensorsOp::Compute(OpKernelContext* ctx) { auto combiner_scale_transform_fn = GetCombinerScaleTransformFunction(combiner_); - const int32* row_ids_before_dedup_ptr = row_ids_before_dedup.get(); - const int32* values_ptr = values->flat().data(); + const int32_t* row_ids_before_dedup_ptr = row_ids_before_dedup.get(); + const int32_t* values_ptr = values->flat().data(); const float* weights_ptr = weights->flat().data(); // Dedup the ids within one sample by just checking the adjacent ids. This // will NOT result in a full deduplication. - std::vector row_ids; - std::vector col_ids; + std::vector row_ids; + std::vector col_ids; std::vector gains; row_ids.reserve(total_id_count); col_ids.reserve(total_id_count); @@ -1306,8 +1307,8 @@ void ConvertToListOfSparseCoreCooTensorsOp::Compute(OpKernelContext* ctx) { const float gain = *weights_ptr; const float rescaled_gain = combiner_scale_contribution_fn(gain); for (int token_id = 0; token_id < total_id_count; ++token_id) { - const int32 row_id = *(row_ids_before_dedup_ptr + token_id); - const int32 col_id = *(values_ptr + token_id); + const int32_t row_id = *(row_ids_before_dedup_ptr + token_id); + const int32_t col_id = *(values_ptr + token_id); if (gains_rescale.has_value()) { // Compute the gain rescale before doing the dedup. (*gains_rescale)[row_id] += rescaled_gain; @@ -1324,8 +1325,8 @@ void ConvertToListOfSparseCoreCooTensorsOp::Compute(OpKernelContext* ctx) { } } else { for (int token_id = 0; token_id < total_id_count; ++token_id) { - const int32 row_id = *(row_ids_before_dedup_ptr + token_id); - const int32 col_id = *(values_ptr + token_id); + const int32_t row_id = *(row_ids_before_dedup_ptr + token_id); + const int32_t col_id = *(values_ptr + token_id); const float gain = *(weights_ptr + token_id); if (gains_rescale.has_value()) { // Compute the gain rescale before doing the dedup. @@ -1371,8 +1372,8 @@ void ConvertToListOfSparseCoreCooTensorsOp::Compute(OpKernelContext* ctx) { ctx, col_ids_output_list.allocate( i, TensorShape({per_sc_token_count[i]}), &col_ids_tensor)); - int32* row_ids_tensor_ptr = row_ids_tensor->flat().data(); - int32* col_ids_tensor_ptr = col_ids_tensor->flat().data(); + int32_t* row_ids_tensor_ptr = row_ids_tensor->flat().data(); + int32_t* col_ids_tensor_ptr = col_ids_tensor->flat().data(); float* gains_tensor_ptr = gains_tensor->flat().data(); WriteToOutputTensor( @@ -1384,10 +1385,10 @@ void ConvertToListOfSparseCoreCooTensorsOp::Compute(OpKernelContext* ctx) { } void ConvertToListOfSparseCoreCooTensorsOp::WriteToOutputTensor( - int32* row_ids, int32* col_ids, float* gains, int32* row_ids_tensor_ptr, - int32* col_ids_tensor_ptr, float* gains_tensor_ptr, int32_t begin_index, - int32_t end_index, int32_t sc_id, - std::optional> gains_rescale) { + int32_t* row_ids, int32_t* col_ids, float* gains, + int32_t* row_ids_tensor_ptr, int32_t* col_ids_tensor_ptr, + float* gains_tensor_ptr, int32_t begin_index, int32_t end_index, + int32_t sc_id, std::optional> gains_rescale) { tsl::profiler::TraceMe traceme( "ConvertToListOfSparseCoreCooTensorsOp::WriteToOutputTensor"); if (gains_rescale.has_value()) { @@ -1407,12 +1408,13 @@ void ConvertToListOfSparseCoreCooTensorsOp::WriteToOutputTensor( } } else { std::transform(row_ids + begin_index, row_ids + end_index, - row_ids_tensor_ptr, [this, &sc_id](int32 row_id) -> int32 { + row_ids_tensor_ptr, + [this, &sc_id](int32_t row_id) -> int32_t { return row_id % per_sc_sample_count_ + per_sc_row_offset_ + per_sc_stacked_table_sample_count_ * sc_id; }); std::transform(col_ids + begin_index, col_ids + end_index, - col_ids_tensor_ptr, [this](int32 col_id) -> int32 { + col_ids_tensor_ptr, [this](int32_t col_id) -> int32_t { return ((col_id + col_shift_) & num_sc_shards_bit_mod_) + (col_id & num_sc_shards_bit_mod_inv_) + col_offset_; }); @@ -1804,7 +1806,7 @@ void ConvertToSparseCoreCsrWrappedCooTensorOp::Compute(OpKernelContext* ctx) { } *(row_pointers_tensor_ptr + row_pointers_index) = global_index; - int32 num_ids_to_pad_per_replica = + int32_t num_ids_to_pad_per_replica = xla::RoundUpTo(global_index, xla_pad_size) - global_index; std::fill_n(sorted_token_ids_tensor_ptr + global_index, @@ -1818,8 +1820,8 @@ void ConvertToSparseCoreCsrWrappedCooTensorOp::Compute(OpKernelContext* ctx) { ++row_pointers_index; } // Pad the row_pointers to be memory aligned. - int32 num_row_pointers_to_pad = - xla::RoundUpTo(row_pointers_index, xla_pad_size) - + int32_t num_row_pointers_to_pad = + xla::RoundUpTo(row_pointers_index, xla_pad_size) - row_pointers_index; std::fill_n(row_pointers_tensor_ptr + row_pointers_index, num_row_pointers_to_pad, global_index); @@ -1838,7 +1840,7 @@ void ConvertToSparseCoreCsrWrappedCooTensorOp::Compute(OpKernelContext* ctx) { << " . This could potentially impact the model quality."; } - int32 row_pointers_unpadded_size = + int32_t row_pointers_unpadded_size = total_num_minibatch * padded_row_pointers_size_per_sc; Tensor* num_minibatches_per_sc_tensor; @@ -1855,10 +1857,10 @@ void ConvertToSparseCoreCsrWrappedCooTensorOp::Compute(OpKernelContext* ctx) { OP_REQUIRES_OK(ctx, ctx->allocate_output("ids_unpadded_size", TensorShape({}), &ids_unpadded_size_tensor)); - num_minibatches_per_sc_tensor->flat()(0) = num_minibatch_per_sc; - row_pointers_unpadded_size_tensor->flat()(0) = + num_minibatches_per_sc_tensor->flat()(0) = num_minibatch_per_sc; + row_pointers_unpadded_size_tensor->flat()(0) = row_pointers_unpadded_size; - ids_unpadded_size_tensor->flat()(0) = ids_unpadded_size; + ids_unpadded_size_tensor->flat()(0) = ids_unpadded_size; } REGISTER_KERNEL_BUILDER( diff --git a/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.h b/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.h index 05bc79e416de8a..706622ae1dfbe4 100644 --- a/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.h +++ b/tensorflow/core/tpu/kernels/sparse_core_preprocess_ops.h @@ -34,15 +34,15 @@ namespace tensorflow { // Struct to describe an embedding lookup input data. struct EmbeddingLookupInput { // Which replica it belongs. - int32 replica_id; + int32_t replica_id; // Token id. - int32 token_id; + int32_t token_id; // Sample id. - int32 sample_id; + int32_t sample_id; // Gain. float gain; - EmbeddingLookupInput(int32 replica_id, int32 token_id, int32 sample_id, + EmbeddingLookupInput(int32_t replica_id, int32_t token_id, int32_t sample_id, float gain) : replica_id(replica_id), token_id(token_id), @@ -56,9 +56,9 @@ absl::Status ValidateInputs(const Tensor& indices_or_row_splits, // Compute the row id list before padding. absl::Status ComputeRowIdsBeforePadding(const Tensor& indices_or_row_splits, - int32 total_id_count, - int32 sample_count, - int32* row_ids_before_padding, + int32_t total_id_count, + int32_t sample_count, + int32_t* row_ids_before_padding, std::vector shape_strides = {}); class GetMinibatchesInCsrWithPhysicalReplicaOp : public OpKernel { @@ -101,7 +101,7 @@ class GetMinibatchSplitsWithPhysicalReplicaOp : public OpKernel { void Compute(OpKernelContext* ctx) override; protected: - virtual void CalculateHeadroom(int32 this_max_ids, int32 this_max_uniques, + virtual void CalculateHeadroom(int32_t this_max_ids, int32_t this_max_uniques, tstring program_key, int64_t max_ids_per_partition, int64_t max_unique_ids_per_partition, @@ -138,7 +138,7 @@ class StoreMinibatchStatisticsInFdoOp : public OpKernel { void Compute(OpKernelContext* ctx) override; protected: - virtual void CalculateHeadroom(int32 this_max_ids, int32 this_max_uniques, + virtual void CalculateHeadroom(int32_t this_max_ids, int32_t this_max_uniques, tstring program_key, int64_t max_ids_per_partition, int64_t max_unique_ids_per_partition) {} @@ -165,10 +165,11 @@ class ConvertToListOfSparseCoreCooTensorsOp : public OpKernel { void Compute(OpKernelContext* ctx) override; private: - void WriteToOutputTensor(int32* row_ids, int32* col_ids, float* gains, - int32* row_ids_tensor_ptr, int32* col_ids_tensor_ptr, - float* gains_tensor_ptr, int32_t begin_index, - int32_t end_index, int32_t sc_id, + void WriteToOutputTensor(int32_t* row_ids, int32_t* col_ids, float* gains, + int32_t* row_ids_tensor_ptr, + int32_t* col_ids_tensor_ptr, float* gains_tensor_ptr, + int32_t begin_index, int32_t end_index, + int32_t sc_id, std::optional> gains_rescale); int sample_count_; int num_sc_per_chip_; diff --git a/tensorflow/core/tpu/kernels/sparse_core_xla_ops.cc b/tensorflow/core/tpu/kernels/sparse_core_xla_ops.cc index 50e86ba0198602..f3576628d048bc 100644 --- a/tensorflow/core/tpu/kernels/sparse_core_xla_ops.cc +++ b/tensorflow/core/tpu/kernels/sparse_core_xla_ops.cc @@ -143,7 +143,7 @@ class XlaSparseDenseMatmulOp : public XlaOpKernel { void Compile(XlaOpKernelContext* ctx) override { xla::XlaBuilder* builder = ctx->builder(); - const int32 num_physical_replica = + const int32_t num_physical_replica = stream_executor::tpu::OpsApiFn()->TpuTopology_AvailableCoreCountFn( /*mesh_state=*/nullptr, /*tpu_core_type=*/TpuCoreTypeEnum::kEmbeddingV2); @@ -662,7 +662,7 @@ class XlaSparseDenseMatmulGradWithCsrInputBase : public XlaOpKernel { errors::InvalidArgument( "activations input has non static or non-rank 2 shape: ", activation_shape.ToString())); - int64 num_samples_per_chip = activation_shape.dimensions(0); + int64_t num_samples_per_chip = activation_shape.dimensions(0); OP_REQUIRES(ctx, num_samples_per_chip % num_sparsecores_per_device_ == 0, errors::InvalidArgument( "num_samples_per_chip ", num_samples_per_chip, diff --git a/tensorflow/core/tpu/kernels/topk_ops.cc b/tensorflow/core/tpu/kernels/topk_ops.cc index 16334632946c25..22d18e39220146 100644 --- a/tensorflow/core/tpu/kernels/topk_ops.cc +++ b/tensorflow/core/tpu/kernels/topk_ops.cc @@ -51,21 +51,21 @@ xla::XlaOp CreateKthOrderStatisticComputation(xla::XlaBuilder* builder, const int64_t width = input_shape.dim_size(1); xla::XlaOp input_sm32 = xla::BitcastConvertType(input, xla::S32); - xla::XlaOp zero_r0 = xla::ConstantR0(builder, 0); + xla::XlaOp zero_r0 = xla::ConstantR0(builder, 0); xla::XlaOp zero_r1 = xla::Broadcast(zero_r0, {height}); xla::XlaOp zero_r2 = xla::Broadcast(zero_r0, {height, width}); - xla::XlaOp max_r0 = xla::ConstantR0(builder, 0x7FFFFFFF); + xla::XlaOp max_r0 = xla::ConstantR0(builder, 0x7FFFFFFF); xla::XlaOp max_r1 = xla::Broadcast(max_r0, {height}); // Start at positive zero, so that pivot is always less than top. - xla::XlaOp negative_zero_r0 = xla::ConstantR0(builder, 0x80000000); + xla::XlaOp negative_zero_r0 = xla::ConstantR0(builder, 0x80000000); xla::XlaOp negative_zero_r1 = xla::Broadcast(negative_zero_r0, {height}); xla::XlaOp top_r1 = zero_r1; - for (uint32 mask = 1U << 31; mask; mask >>= 1) { + for (uint32_t mask = 1U << 31; mask; mask >>= 1) { xla::XlaOp broadcast_mask_r1 = - xla::Broadcast(xla::ConstantR0(builder, mask), {height}); + xla::Broadcast(xla::ConstantR0(builder, mask), {height}); // The first iteration of the loop determines if the kth element // is positive or negative. If the kth element is negative, we @@ -111,14 +111,14 @@ class KthOrderStatistic : public XlaOpKernel { ctx, input_shape.dims() == 2, InvalidArgument("input must be rank-2: ", input_shape.DebugString())); - xla::XlaOp k = xla::ConstantR0(builder, k_); + xla::XlaOp k = xla::ConstantR0(builder, k_); xla::XlaOp kth_order_statistics = CreateKthOrderStatisticComputation(builder, input_shape, input, k); ctx->SetOutput(0, kth_order_statistics); } private: - int32 k_; + int32_t k_; }; REGISTER_XLA_OP(Name("KthOrderStatistic"), KthOrderStatistic); @@ -269,21 +269,21 @@ xla::XlaOp CreateMakeUnique(xla::XlaBuilder* builder, const xla::XlaOp input, // count_mask is used to mask away the low order bits to ensure // that every element is distinct. uint32_t next_power_of_two = absl::bit_ceil(width); - uint32 count_mask = ~(next_power_of_two - 1); + uint32_t count_mask = ~(next_power_of_two - 1); xla::XlaOp count_mask_r0 = xla::ConstantR0(builder, count_mask); xla::XlaOp count_mask_r2 = xla::Broadcast(count_mask_r0, {height, width}); // smallest_normal is the bit representation of the smallest // positive normal floating point number. The sign is zero, // exponent is one, and the fraction is zero. - uint32 smallest_normal = 1U << 23; + uint32_t smallest_normal = 1U << 23; xla::XlaOp smallest_normal_r0 = xla::ConstantR0(builder, smallest_normal); xla::XlaOp smallest_normal_r2 = xla::Broadcast(smallest_normal_r0, {height, width}); // Used to mask away the sign bit when computing the absolute // value. - uint32 low_bit_mask = ~(1U << 31); + uint32_t low_bit_mask = ~(1U << 31); xla::XlaOp low_bit_mask_r0 = xla::ConstantR0(builder, low_bit_mask); xla::XlaOp low_bit_mask_r2 = xla::Broadcast(low_bit_mask_r0, {height, width}); diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_rpc_lookup.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_rpc_lookup.h index e8666ec63e171a..06fde06bdcac84 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_rpc_lookup.h +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_rpc_lookup.h @@ -37,11 +37,11 @@ class TpuCompilationCacheRpcLookup : public TpuCompilationCacheLookup { public: using StubType = tpu::grpc::TpuCompilationCacheService::Stub; - TpuCompilationCacheRpcLookup(const string& server_address, + TpuCompilationCacheRpcLookup(const std::string& server_address, int64_t max_cache_size); ~TpuCompilationCacheRpcLookup() override = default; - absl::Status Lookup(const string& proto_key, + absl::Status Lookup(const std::string& proto_key, std::unique_ptr* entry, tpu::CompilationCacheFetchTarget fetch_target) override; @@ -49,11 +49,11 @@ class TpuCompilationCacheRpcLookup : public TpuCompilationCacheLookup { std::unique_ptr* entry, tpu::CompilationCacheFetchTarget fetch_target) override; - string DebugString() const override; + std::string DebugString() const override; private: // Helper method to make the RPC request to the central cache. - absl::Status RemoteLookupLocked(const string& local_proto_key, + absl::Status RemoteLookupLocked(const std::string& local_proto_key, const tpu::GetTpuProgramRequest& request, std::shared_ptr* cache_entry) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc index a456a473c1a836..4f7af33e8c1c35 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc @@ -167,7 +167,7 @@ void TpuCompileOpKernelCommon::Compute(OpKernelContext* ctx) { }); absl::Status compile_status = ComputeInternal(ctx); - string status_payload; + std::string status_payload; // Construct payload if compile_status is not ok and there's no payload for // compilation yet. if (!compile_status.ok() && diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_common.h b/tensorflow/core/tpu/kernels/tpu_compile_op_common.h index 66f7b02e6bc04d..56e2130495750c 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_common.h +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_common.h @@ -178,7 +178,7 @@ class TpuCompileOpKernelCommon { std::string mlir_module_; // Fingerprint of the MLIR Module created once on construction to avoid paying // the cost on each invocation. - uint64 mlir_module_fingerprint_ = 0; + uint64_t mlir_module_fingerprint_ = 0; // Number of different programs to compile. This maps to number of cores in // each replica. @@ -198,7 +198,7 @@ class TpuCompileOpKernelCommon { absl::Status RegisterXLAFingerprints( const std::vector& arg_shapes, - TpuProgramGroupInterface* tpu_program_group, uint64 fingerprint); + TpuProgramGroupInterface* tpu_program_group, uint64_t fingerprint); }; } // namespace tpu diff --git a/tensorflow/core/tpu/kernels/tpu_embedding_engine_state_interface.h b/tensorflow/core/tpu/kernels/tpu_embedding_engine_state_interface.h index 73b0a492b3551c..a6bf93239dc3d4 100644 --- a/tensorflow/core/tpu/kernels/tpu_embedding_engine_state_interface.h +++ b/tensorflow/core/tpu/kernels/tpu_embedding_engine_state_interface.h @@ -61,7 +61,7 @@ class TpuEmbeddingEngineStateInterface : public ResourceBase { return new TpuEmbeddingEngineStateInterface(state); } - string DebugString() const override { + std::string DebugString() const override { return "TpuEmbeddingEngineStateInterface"; } diff --git a/tensorflow/core/tpu/kernels/tpu_embedding_enqueue_ops.h b/tensorflow/core/tpu/kernels/tpu_embedding_enqueue_ops.h index e06c02c99b6cbb..46981718facdb4 100644 --- a/tensorflow/core/tpu/kernels/tpu_embedding_enqueue_ops.h +++ b/tensorflow/core/tpu/kernels/tpu_embedding_enqueue_ops.h @@ -31,7 +31,8 @@ absl::Status ValidateCombiners(absl::Span combiners); // Validates the `mode_override` input of the TPUEnqueue* ops, and, if correct, // sets the `mode` to pass on to the TPU Embedding manager. absl::Status GetValidatedModeOverride( - const string& mode_override, tpu::TPUEmbeddingConfiguration::Mode* mode); + const std::string& mode_override, + tpu::TPUEmbeddingConfiguration::Mode* mode); } // namespace tensorflow #endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_EMBEDDING_ENQUEUE_OPS_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_functional_ops.h b/tensorflow/core/tpu/kernels/tpu_functional_ops.h index 1d9e5cd57697ec..45c5fb52e1d9c9 100644 --- a/tensorflow/core/tpu/kernels/tpu_functional_ops.h +++ b/tensorflow/core/tpu/kernels/tpu_functional_ops.h @@ -84,18 +84,19 @@ GroupedEdges GroupTensorsForOutputPacking(Graph* graph, GraphShapeInfo* shape_info); absl::Status CreateConcatAndSplitNodesForInputTensor( - Graph* graph, const string& cluster_name, EdgeShapes* tpu_input_shapes, + Graph* graph, const std::string& cluster_name, EdgeShapes* tpu_input_shapes, const absl::flat_hash_map>& grouped_input_edges, int32_t minimum_input_tensors_packing, bool xla_spmd_input_sharded, const XlaShardingInfoMap& xla_sharding_info, const TpuReplicatedInputInfoMap& tpu_replicated_input_info); absl::Status CreateConcatAndSplitNodesForOutputTensor( - Graph* graph, const string& cluster_name, EdgeShapes* tpu_output_shapes, - GraphShapeInfo* tpu_inferred_info, GroupedEdges shape_to_output, - int32_t minimum_output_tensors_packing); + Graph* graph, const std::string& cluster_name, + EdgeShapes* tpu_output_shapes, GraphShapeInfo* tpu_inferred_info, + GroupedEdges shape_to_output, int32_t minimum_output_tensors_packing); -absl::Status InsertReshapeNodePairs(Graph* graph, const string& cluster_name, +absl::Status InsertReshapeNodePairs(Graph* graph, + const std::string& cluster_name, EdgeShapes* tpu_input_shapes, int num_cores_per_replica); @@ -172,7 +173,7 @@ class TPUPartitionedCallOp : public AsyncOpKernel { }; // This method is thread-safe. - absl::Status GetTpuCoreOrdinal(OpKernelContext* ctx, uint64 input_hash, + absl::Status GetTpuCoreOrdinal(OpKernelContext* ctx, uint64_t input_hash, int64_t* ordinal_selector_req_id, int32_t* core_ordinal); @@ -196,11 +197,10 @@ class TPUPartitionedCallOp : public AsyncOpKernel { // device_ordinal: The index of the TPU core that is scheduled to run // the computation. In the case of XLA SPMD, it is the "primary" core, which // is the smallest index of all the cores. - absl::Status InitializeShardedVarOnTPU(OpKernelContext* ctx, - const core::RefCountPtr& var, - std::vector& ndefs, - int split_dim, - const std::vector& tpu_devices) + absl::Status InitializeShardedVarOnTPU( + OpKernelContext* ctx, const core::RefCountPtr& var, + std::vector& ndefs, int split_dim, + const std::vector& tpu_devices) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); // Check if any of the immediate successors of node has attribute @@ -250,7 +250,7 @@ class TPUPartitionedCallOp : public AsyncOpKernel { absl::Status PlacementHelper( const DeviceSet& device_set, const GraphOptimizationPassOptions& optimization_options, - const string& function_name); + const std::string& function_name); // Partitions `graph`, populates `subgraphs` with the partitions, and runs // the post-partitioning graph optimization passes. absl::Status PartitionHelper( @@ -263,15 +263,15 @@ class TPUPartitionedCallOp : public AsyncOpKernel { // If `out_flib_def` is not null, it will be set to a copy of `flib_def_` and // used for instantiation. absl::Status InstantiatePartition( - const Graph& graph, const string& function_name, - const string& target_device, FHandle* handle, + const Graph& graph, const std::string& function_name, + const std::string& target_device, FHandle* handle, std::unique_ptr* out_flib_def) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); // Adds and instantiates functions for each subgraph in `subgraphs` after // rewriting nodes' `device_ordinal` attributes to match `replica_id` when // num_cores_per_replica == 1. absl::Status InstantiateFunctionsFromSubgraphs( - const DeviceSet& device_set, int replica_id, uint64 cache_hash, + const DeviceSet& device_set, int replica_id, uint64_t cache_hash, int num_cores_per_replica, std::unordered_map> subgraphs) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); @@ -344,7 +344,7 @@ class TPUPartitionedCallOp : public AsyncOpKernel { const std::string local_device_name_; // Maps from cache key to their corresponding functions, which are // represented as (device, handle) pairs. - gtl::FlatMap> partition_cache_ + gtl::FlatMap> partition_cache_ ABSL_GUARDED_BY(mu_); // A set contains seen ordinals. Used by variable initialization on TPU. @@ -362,7 +362,7 @@ class TPUPartitionedCallOp : public AsyncOpKernel { FunctionLibraryRuntime* library_runtime_; // Used to uniquify function names in `flib_def_`. - uint32 suffix_ = 0; + uint32_t suffix_ = 0; // Minimum number of run steps (batches) necessary to trigger xla autotuner. int autotuner_thresh_ = 0; @@ -371,7 +371,7 @@ class TPUPartitionedCallOp : public AsyncOpKernel { std::shared_ptr ordinal_selector_; // Maps input hash to TF fingerprint. - absl::flat_hash_map inputs_to_fingerprint_; + absl::flat_hash_map inputs_to_fingerprint_; // List of TPU devices std::vector tpu_devices_; diff --git a/tensorflow/core/tpu/kernels/tpu_mesh_state_interface.h b/tensorflow/core/tpu/kernels/tpu_mesh_state_interface.h index 6e84dde261bb24..1d50e75bb804b3 100644 --- a/tensorflow/core/tpu/kernels/tpu_mesh_state_interface.h +++ b/tensorflow/core/tpu/kernels/tpu_mesh_state_interface.h @@ -75,7 +75,7 @@ class TpuMeshStateInterface : public tensorflow::ResourceBase { mesh_state_, tpu_core_type); } - string DebugString() const override { return "TpuMeshStateInterface"; } + std::string DebugString() const override { return "TpuMeshStateInterface"; } private: XLA_TpuMeshState* mesh_state_; diff --git a/tensorflow/core/tpu/kernels/tpu_op_util.cc b/tensorflow/core/tpu/kernels/tpu_op_util.cc index 8d1d4861b6fcca..6da81d1ffefabe 100644 --- a/tensorflow/core/tpu/kernels/tpu_op_util.cc +++ b/tensorflow/core/tpu/kernels/tpu_op_util.cc @@ -73,8 +73,8 @@ std::string CreateConfigPrefix(const TPUCompileMetadataProto& metadata) { } } // namespace -uint64 CreateFingerprintWithNameAndShapes( - uint64 name, const std::vector& shapes) { +uint64_t CreateFingerprintWithNameAndShapes( + uint64_t name, const std::vector& shapes) { std::string shape_prefix = CreateShapePrefix(shapes); VLOG(2) << "CreateFingerprintWithNameAndShapes, name: " << name << ", shape_prefix: " << shape_prefix; @@ -85,7 +85,7 @@ uint64 CreateFingerprintWithNameAndShapes( // Return fingerprint_in_metadata if it's not empty; otherwise read input tensor // data to compute the fingerprint. std::string GuaranteedConstFingerprint( - const string& fingerprint_in_metadata, + const std::string& fingerprint_in_metadata, const OpInputList& guaranteed_constants) { if (fingerprint_in_metadata.empty()) { uint64_t fingerprint = 0; @@ -104,8 +104,8 @@ std::string GuaranteedConstFingerprint( // The `guaranteed_constants` must be passed as reference due to the lazy // evaluation of `guaranteed_const_fingerprint()` callback. TpuCompilationCacheKey CreateCompilationCacheKey( - absl::string_view function_name, uint64 function_library_fingerprint, - uint64 mlir_module_fingerprint, const OpInputList& guaranteed_constants, + absl::string_view function_name, uint64_t function_library_fingerprint, + uint64_t mlir_module_fingerprint, const OpInputList& guaranteed_constants, const std::vector& dynamic_shapes, const TPUCompileMetadataProto& metadata, const TpuMeshStateInterface& mesh_state, uint64_t session_id, @@ -151,7 +151,7 @@ TpuCompilationCacheKey CreateCompilationCacheKey( // reference based on the assumption that these variables lifetime is // managed through the `TPUCompileOpKernelImpl` that outlives the // lifetime of the compilation cache lookups. - string fingerprint; + std::string fingerprint; key.guaranteed_const_fingerprint = [&metadata, &guaranteed_constants, fingerprint]() mutable { if (fingerprint.empty()) { diff --git a/tensorflow/core/tpu/kernels/tpu_op_util.h b/tensorflow/core/tpu/kernels/tpu_op_util.h index d0ca805fec4757..df68fdaaff39e5 100644 --- a/tensorflow/core/tpu/kernels/tpu_op_util.h +++ b/tensorflow/core/tpu/kernels/tpu_op_util.h @@ -28,13 +28,13 @@ limitations under the License. namespace tensorflow { namespace tpu { // Creates a fingerprint given the name and the vector of shapes. -uint64 CreateFingerprintWithNameAndShapes( - uint64 name, const std::vector& shapes); +uint64_t CreateFingerprintWithNameAndShapes( + uint64_t name, const std::vector& shapes); // Creates a unique compilation cache `key`. TpuCompilationCacheKey CreateCompilationCacheKey( - absl::string_view function_name, uint64 function_library_fingerprint, - uint64 mlir_module_fingerprint, const OpInputList& guaranteed_constants, + absl::string_view function_name, uint64_t function_library_fingerprint, + uint64_t mlir_module_fingerprint, const OpInputList& guaranteed_constants, const std::vector& dynamic_shapes, const TPUCompileMetadataProto& metadata, const TpuMeshStateInterface& mesh_state, uint64_t session_id = 0, diff --git a/tensorflow/core/tpu/kernels/tpu_ordinal_selector.h b/tensorflow/core/tpu/kernels/tpu_ordinal_selector.h index 9ea689b317f551..3bf1bfac3fe0bb 100644 --- a/tensorflow/core/tpu/kernels/tpu_ordinal_selector.h +++ b/tensorflow/core/tpu/kernels/tpu_ordinal_selector.h @@ -39,7 +39,7 @@ class TPUOrdinalSelector : TPUOrdinalSelectorInterface { stream_executor::tpu::OpsApiFn()->TfTpuOrdinalSelector_DestroyFn( ordinal_selector_); } - int64_t GetOrdinal(std::optional key, int64_t* req_id) override { + int64_t GetOrdinal(std::optional key, int64_t* req_id) override { int64_t ordinal; stream_executor::tpu::OpsApiFn()->TfTpuOrdinalSelector_GetOrdinalFn( ordinal_selector_, key, req_id, &ordinal); diff --git a/tensorflow/core/tpu/kernels/tpu_ordinal_selector_interface.h b/tensorflow/core/tpu/kernels/tpu_ordinal_selector_interface.h index 040959d592a1bd..21ce7b393d6195 100644 --- a/tensorflow/core/tpu/kernels/tpu_ordinal_selector_interface.h +++ b/tensorflow/core/tpu/kernels/tpu_ordinal_selector_interface.h @@ -26,7 +26,7 @@ namespace tpu { class TPUOrdinalSelectorInterface { public: virtual ~TPUOrdinalSelectorInterface() = default; - virtual int64_t GetOrdinal(std::optional key, int64_t* req_id) = 0; + virtual int64_t GetOrdinal(std::optional key, int64_t* req_id) = 0; virtual void DequeueFromCoreSelector(int32_t device_ordinal, int64_t req_id) = 0; }; diff --git a/tensorflow/core/tpu/kernels/tpu_pod_state.cc b/tensorflow/core/tpu/kernels/tpu_pod_state.cc index 1457ceac9b790b..73acdd65ef166c 100644 --- a/tensorflow/core/tpu/kernels/tpu_pod_state.cc +++ b/tensorflow/core/tpu/kernels/tpu_pod_state.cc @@ -123,7 +123,7 @@ TpuPodState::~TpuPodState() { VLOG(1) << "Shutting down Compilation Cache Service done."; } -string TpuPodState::DebugString() const { +std::string TpuPodState::DebugString() const { return "Wrapper for distributed TPU state"; } diff --git a/tensorflow/core/tpu/kernels/tpu_pod_state.h b/tensorflow/core/tpu/kernels/tpu_pod_state.h index b24a512d341cbe..99e2cff3e1f948 100644 --- a/tensorflow/core/tpu/kernels/tpu_pod_state.h +++ b/tensorflow/core/tpu/kernels/tpu_pod_state.h @@ -38,7 +38,7 @@ class TpuPodState : public ResourceBase { ~TpuPodState() override; - string DebugString() const override; + std::string DebugString() const override; private: std::unique_ptr cache_service_; diff --git a/tensorflow/core/tpu/kernels/tpu_reshard_variables_op_util.cc b/tensorflow/core/tpu/kernels/tpu_reshard_variables_op_util.cc index f50652f8b5e81c..73214c817eaf04 100644 --- a/tensorflow/core/tpu/kernels/tpu_reshard_variables_op_util.cc +++ b/tensorflow/core/tpu/kernels/tpu_reshard_variables_op_util.cc @@ -99,7 +99,7 @@ absl::Status GetComputationCacheEntry( } // Builds an InputBuffers object that describes the inputs to the computation. -absl::StatusOr> BuildInputBuffers( +absl::StatusOr> BuildInputBuffers( OpKernelContext* context, const std::vector& variables, const xla::Shape& input_host_shape, xla::Backend* backend, int device_ordinal, se::Stream* stream) { @@ -150,10 +150,11 @@ absl::StatusOr> BuildInputBuffers( validate_shape(variables[i].index(), *variables[i].var()->tensor())); } - se::DeviceMemoryAllocator* const allocator = backend->memory_allocator(); + stream_executor::DeviceAddressAllocator* const allocator = + backend->memory_allocator(); xla::TransferManager* const transfer_manager = backend->transfer_manager(); - xla::ShapeTree input_buffers( + xla::ShapeTree input_buffers( transfer_manager->HostShapeToDeviceShape(input_host_shape)); // Allocates a buffer for the root tuple. @@ -165,15 +166,17 @@ absl::StatusOr> BuildInputBuffers( auto set_input_buffers_helper = [&](int arg_index, xla::ShapedBuffer* buffers, bool owning = false) { buffers->buffers().ForEachMutableElement( - [&](const xla::ShapeIndex& index, se::DeviceMemoryBase* buffer) { + [&](const xla::ShapeIndex& index, + stream_executor::DeviceAddressBase* buffer) { xla::ShapeIndex in_index = {arg_index}; for (int64_t j : index) { in_index.push_back(j); } if (owning) { *input_buffers.mutable_element(in_index) = - se::OwningDeviceMemory(*buffer, device_ordinal, allocator); - *buffer = se::DeviceMemoryBase(); + stream_executor::ScopedDeviceAddress( + *buffer, device_ordinal, allocator); + *buffer = stream_executor::DeviceAddressBase(); } else { *input_buffers.mutable_element(in_index) = *buffer; } @@ -268,7 +271,8 @@ absl::Status UpdateOutputVariables( TF_RET_CHECK(result_buffers.on_host_shape().IsTuple()); TF_RET_CHECK(!xla::ShapeUtil::IsNestedTuple(result_buffers.on_host_shape())); - se::DeviceMemoryAllocator* const allocator = backend->memory_allocator(); + stream_executor::DeviceAddressAllocator* const allocator = + backend->memory_allocator(); auto output_buffers = result_buffers.release(); const xla::Shape& output_host_shape = output_buffers.on_host_shape(); @@ -285,7 +289,8 @@ absl::Status UpdateOutputVariables( xla::ScopedShapedBuffer shaped_buffer(host_shape, device_shape, allocator, device_ordinal); shaped_buffer.buffers().ForEachMutableElement( - [&](const xla::ShapeIndex& index, se::DeviceMemoryBase* buffer) { + [&](const xla::ShapeIndex& index, + stream_executor::DeviceAddressBase* buffer) { xla::ShapeIndex out_index = {i}; for (int64_t j : index) { out_index.push_back(j); diff --git a/tensorflow/core/tpu/kernels/tpu_reshard_variables_op_util.h b/tensorflow/core/tpu/kernels/tpu_reshard_variables_op_util.h index c731cc10ec70ce..ab44f7788fbf50 100644 --- a/tensorflow/core/tpu/kernels/tpu_reshard_variables_op_util.h +++ b/tensorflow/core/tpu/kernels/tpu_reshard_variables_op_util.h @@ -42,7 +42,7 @@ absl::Status GetComputationCacheEntry( std::unique_ptr* entry, tpu::CompilationCacheFetchTarget fetch_target); -absl::StatusOr> BuildInputBuffers( +absl::StatusOr> BuildInputBuffers( OpKernelContext* context, const std::vector& variables, const xla::Shape& input_host_shape, xla::Backend* backend, int device_ordinal, se::Stream* stream); diff --git a/tensorflow/core/tpu/kernels/tpu_util.cc b/tensorflow/core/tpu/kernels/tpu_util.cc index 14223164d1e1b2..25e57e71da8dbf 100644 --- a/tensorflow/core/tpu/kernels/tpu_util.cc +++ b/tensorflow/core/tpu/kernels/tpu_util.cc @@ -48,7 +48,7 @@ absl::StatusOr ParseCompilationCacheKey( TpuCompilationCacheKey parsed_key(splits.at(0)); parsed_key.has_guaranteed_const = true; parsed_key.session_handle = splits.at(1); - const string fingerprint = splits.at(2); + const std::string fingerprint = splits.at(2); parsed_key.guaranteed_const_fingerprint = [fingerprint] { return fingerprint; }; diff --git a/tensorflow/core/tpu/kernels/transfer_ops.cc b/tensorflow/core/tpu/kernels/transfer_ops.cc index 703dc3e7589134..1610d807411cdb 100644 --- a/tensorflow/core/tpu/kernels/transfer_ops.cc +++ b/tensorflow/core/tpu/kernels/transfer_ops.cc @@ -51,7 +51,7 @@ limitations under the License. namespace tensorflow { TpuTransferAsyncOpKernelBase::TpuTransferAsyncOpKernelBase( - OpKernelConstruction* ctx, const string& transfer_type, + OpKernelConstruction* ctx, const std::string& transfer_type, int number_of_threads, std::unique_ptr transfer_op) : AsyncOpKernel(ctx), transfer_type_(transfer_type), @@ -113,7 +113,7 @@ absl::Status TpuTransferAsyncOpKernelBase::RunTransferWithOrdinal( } TpuTransferAsyncOpKernel::TpuTransferAsyncOpKernel( - OpKernelConstruction* ctx, const string& transfer_type, + OpKernelConstruction* ctx, const std::string& transfer_type, int number_of_threads, std::unique_ptr transfer_op) : TpuTransferAsyncOpKernelBase(ctx, transfer_type, number_of_threads, std::move(transfer_op)) { @@ -132,7 +132,7 @@ absl::Status TpuTransferAsyncOpKernel::RunTransfer(OpKernelContext* ctx) { } TpuTransferAsyncDynamicOrdinalOpKernel::TpuTransferAsyncDynamicOrdinalOpKernel( - OpKernelConstruction* ctx, const string& transfer_type, + OpKernelConstruction* ctx, const std::string& transfer_type, int number_of_threads, std::unique_ptr transfer_op) : TpuTransferAsyncOpKernelBase(ctx, transfer_type, number_of_threads, std::move(transfer_op)) {} @@ -140,7 +140,7 @@ TpuTransferAsyncDynamicOrdinalOpKernel::TpuTransferAsyncDynamicOrdinalOpKernel( absl::Status TpuTransferAsyncDynamicOrdinalOpKernel::RunTransfer( OpKernelContext* ctx) { const Tensor& device_ordinal_tensor = ctx->input(0); - const int device_ordinal = device_ordinal_tensor.scalar()(); + const int device_ordinal = device_ordinal_tensor.scalar()(); XlaDevice* xla_device = dynamic_cast(ctx->device()->UnderlyingDevice()); if (((xla_device == nullptr) || (xla_device->device_type() == DEVICE_CPU)) && diff --git a/tensorflow/core/tpu/ops/sparse_core_preprocess_ops.cc b/tensorflow/core/tpu/ops/sparse_core_preprocess_ops.cc index 4985cea9558993..ed7ff78c77da57 100644 --- a/tensorflow/core/tpu/ops/sparse_core_preprocess_ops.cc +++ b/tensorflow/core/tpu/ops/sparse_core_preprocess_ops.cc @@ -73,14 +73,14 @@ REGISTER_OP("GetMinibatchesInCsrWithPhysicalReplica") for (int i = 0; i < c->num_inputs(); ++i) { TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 1, &rank)); } - int32 max_minibatches_per_sc; + int32_t max_minibatches_per_sc; TF_RETURN_IF_ERROR( c->GetAttr("max_minibatches_per_sc", &max_minibatches_per_sc)); - int32 num_replica; + int32_t num_replica; TF_RETURN_IF_ERROR(c->GetAttr("num_replica", &num_replica)); - int32 sample_count; + int32_t sample_count; TF_RETURN_IF_ERROR(c->GetAttr("sample_count", &sample_count)); - int32 max_ids_per_chip_per_sample; + int32_t max_ids_per_chip_per_sample; TF_RETURN_IF_ERROR(c->GetAttr("max_ids_per_chip_per_sample", &max_ids_per_chip_per_sample)); @@ -88,7 +88,7 @@ REGISTER_OP("GetMinibatchesInCsrWithPhysicalReplica") // will be run as part of the graph generation which might not have the // tpu system available. const int xla_pad_size = 8; - int32 num_sc_per_chip; + int32_t num_sc_per_chip; TF_RETURN_IF_ERROR(c->GetAttr("num_sc_per_chip", &num_sc_per_chip)); const int num_physical_replica = num_replica * num_sc_per_chip; @@ -253,22 +253,22 @@ REGISTER_OP("ConvertToSparseCoreCsrWrappedCooTensor") .Attr("table_name: string") .Attr("allow_id_dropping: bool") .SetShapeFn([](shape_inference::InferenceContext* c) { - int32 max_minibatches_per_sc; + int32_t max_minibatches_per_sc; TF_RETURN_IF_ERROR( c->GetAttr("max_minibatches_per_sc", &max_minibatches_per_sc)); - int32 num_replica; + int32_t num_replica; TF_RETURN_IF_ERROR(c->GetAttr("num_replica", &num_replica)); - int32 sample_count_per_sc; + int32_t sample_count_per_sc; TF_RETURN_IF_ERROR( c->GetAttr("sample_count_per_sc", &sample_count_per_sc)); - int32 max_ids_per_chip_per_sample; + int32_t max_ids_per_chip_per_sample; TF_RETURN_IF_ERROR(c->GetAttr("max_ids_per_chip_per_sample", &max_ids_per_chip_per_sample)); // We can't get this number programmatically since the shape inference // will be run as part of the graph generation which might not have the // tpu system available. const int xla_pad_size = 8; - int32 num_sc_per_chip; + int32_t num_sc_per_chip; TF_RETURN_IF_ERROR(c->GetAttr("num_sc_per_chip", &num_sc_per_chip)); const int num_physical_replica = num_replica * num_sc_per_chip; diff --git a/tensorflow/core/tpu/tpu_embedding_optimization_parameters_utils.cc b/tensorflow/core/tpu/tpu_embedding_optimization_parameters_utils.cc index 587d6341527a20..95044439f5b894 100644 --- a/tensorflow/core/tpu/tpu_embedding_optimization_parameters_utils.cc +++ b/tensorflow/core/tpu/tpu_embedding_optimization_parameters_utils.cc @@ -256,7 +256,7 @@ absl::Status UseGradientAccumulation(const OptimizationParameters& params, } case GradientAccumulationSupport::kNotSupported: { if (raw_gradient_accumulation_status) { - return errors::InvalidArgument(strings::Printf( + return errors::InvalidArgument(absl::StrFormat( "Optimization algorithm %s does not support gradient accumulation " "but parameters specify it.", GetOptimizationAlgorithmName(params.parameters_case()).c_str())); diff --git a/tensorflow/core/tpu/tpu_execute.cc b/tensorflow/core/tpu/tpu_execute.cc index 865683dcb430cf..251cde239bcf6c 100644 --- a/tensorflow/core/tpu/tpu_execute.cc +++ b/tensorflow/core/tpu/tpu_execute.cc @@ -115,16 +115,16 @@ absl::Status FixTupleTableAsync(se::Stream* stream, if (!element_shape.IsTuple()) { return absl::OkStatus(); } - std::vector elements; + std::vector elements; xla::ShapeIndex element_index = index; element_index.push_back(0); for (int i = 0; i < element_shape.tuple_shapes().size(); ++i) { // Gather all children of the tuple element. element_index.back() = i; - elements.push_back(mem->Buffer(element_index).AsDeviceMemoryBase()); + elements.push_back(mem->Buffer(element_index).AsDeviceAddress()); } - se::DeviceMemoryBase tuple_table_addr = - mem->Buffer(index).AsDeviceMemoryBase(); + stream_executor::DeviceAddressBase tuple_table_addr = + mem->Buffer(index).AsDeviceAddress(); return transfer_manager->WriteSingleTupleIndexTable( stream, elements, element_shape, &tuple_table_addr); }); @@ -160,7 +160,7 @@ bool DynamicShapeIsCompatible(const xla::Shape& dynamic_shape, // Metadata contains the sizes of shape without padding, eventually // representing the size of valid data. absl::Status UpdateDynamicInputs( - se::Stream* stream, se::DeviceMemoryAllocator* allocator, + se::Stream* stream, stream_executor::DeviceAddressAllocator* allocator, std::vector* runtime_inputs, const std::vector& compile_time_shapes) { TF_RET_CHECK(runtime_inputs->size() == compile_time_shapes.size()); @@ -193,14 +193,15 @@ absl::Status UpdateDynamicInputs( TF_RET_CHECK( DynamicShapeIsCompatible(runtime_shape, compile_time_shape)); - xla::MaybeOwningDeviceMemory* mutable_input_mem = + xla::MaybeOwningDeviceAddress* mutable_input_mem = runtime_input.MutableBuffer(index); auto padded_data = std::make_shared>( ShapeSizeCompact(compile_time_shape), -1); auto raw_input_runtime = std::make_shared>( ShapeSizeCompact(runtime_shape) / sizeof(uint32_t)); TF_RETURN_IF_ERROR(stream->MemcpyD2H( - se::DeviceMemory(mutable_input_mem->AsDeviceMemoryBase()), + stream_executor::DeviceAddress( + mutable_input_mem->AsDeviceAddress()), absl::MakeSpan(absl::bit_cast(raw_input_runtime->data()), ShapeSizeCompactRaw(runtime_shape)))); TF_RETURN_IF_ERROR(stream->DoHostCallbackWithStatus( @@ -239,7 +240,7 @@ absl::Status UpdateDynamicInputs( allocator->Allocate(stream->parent()->device_ordinal(), ShapeSizeCompact(compile_time_shape))); auto typed_new_input_memory = - se::DeviceMemory(new_input.cref()); + stream_executor::DeviceAddress(new_input.cref()); TF_RETURN_IF_ERROR( stream->MemcpyH2D(*padded_data, &typed_new_input_memory)); @@ -249,7 +250,7 @@ absl::Status UpdateDynamicInputs( // Modify the memory location in the input shape tree to point to the // new input. *mutable_input_mem = - xla::MaybeOwningDeviceMemory(std::move(new_input)); + xla::MaybeOwningDeviceAddress(std::move(new_input)); element_modified = true; return absl::OkStatus(); })); @@ -474,7 +475,7 @@ absl::StatusOr TPUExecute( VLOG(1) << "TPUExecute: Updating TPUEmbedding memory addresses on " << device_ordinal; - SE_DeviceMemoryBase* device_memory_addrs = nullptr; + SE_DeviceAddressBase* device_memory_addrs = nullptr; size_t device_memory_addrs_count; auto device_memory_cleanup = absl::MakeCleanup([device_memory_addrs, device_ordinal]() { @@ -499,9 +500,9 @@ absl::StatusOr TPUExecute( VLOG(1) << "TPUExecute: Adding " << device_memory_addrs_count << " TPUEmbedding memory addresses to HLO parameters."; for (int i = 0; i < device_memory_addrs_count; ++i) { - xla::ShapeTree tree( + xla::ShapeTree tree( xla::ShapeUtil::MakeOpaqueShape()); - const SE_DeviceMemoryBase& addr = device_memory_addrs[i]; + const SE_DeviceAddressBase& addr = device_memory_addrs[i]; VLOG(2) << absl::StrFormat("Device memory addr[%i] = {%p, %llu, %llu}", i, addr.opaque, addr.size, addr.payload); *tree.mutable_element({}) = ApiConverter::FromC(addr); diff --git a/tensorflow/core/transforms/eliminate_passthrough_iter_args/pass.cc b/tensorflow/core/transforms/eliminate_passthrough_iter_args/pass.cc index c7a943533ba8ec..e58e58aec7f9c0 100644 --- a/tensorflow/core/transforms/eliminate_passthrough_iter_args/pass.cc +++ b/tensorflow/core/transforms/eliminate_passthrough_iter_args/pass.cc @@ -133,8 +133,8 @@ struct EliminateForPassthroughIterArgs static ForRegionOp RebuildOp(const llvm::BitVector &indices, ForRegionOp op, IRRewriter &rewriter) { rewriter.setInsertionPoint(op); - auto new_op = rewriter.create( - op.getLoc(), FilterByIndex(op.getOuts().getTypes(), indices), + auto new_op = ForRegionOp::create( + rewriter, op.getLoc(), FilterByIndex(op.getOuts().getTypes(), indices), op.getCtl().getType(), op.getStart(), op.getLimit(), op.getDelta(), FilterByIndex(op.getInit(), indices), op.getCtls(), op.getBodyAttrsAttr(), op.getRegionAttrsAttr()); @@ -163,8 +163,8 @@ struct EliminateWhileLikePassthroughIterArgs WhileLikeRegionOp op, IRRewriter &rewriter) { rewriter.setInsertionPoint(op); - auto new_op = rewriter.create( - op.getLoc(), FilterByIndex(op.getOuts().getTypes(), indices), + auto new_op = WhileLikeRegionOp::create( + rewriter, op.getLoc(), FilterByIndex(op.getOuts().getTypes(), indices), op.getCtl().getType(), FilterByIndex(op.getInit(), indices), op.getCtls(), op.getParallelIterationsAttr(), op.getCondAttrsAttr(), op.getBodyAttrsAttr(), op.getCondRegionAttrsAttr(), diff --git a/tensorflow/core/transforms/func_to_graph/func_to_graph.cc b/tensorflow/core/transforms/func_to_graph/func_to_graph.cc index 1c2941bd8da120..5be91dbb286d92 100644 --- a/tensorflow/core/transforms/func_to_graph/func_to_graph.cc +++ b/tensorflow/core/transforms/func_to_graph/func_to_graph.cc @@ -102,7 +102,7 @@ absl::Status FuncToGraph(GraphFuncOp func) { } OpBuilder builder(func); - auto graph = builder.create(func.getLoc(), version); + auto graph = GraphOp::create(builder, func.getLoc(), version); // Remove the terminator. func.SingleBlock::getBody()->getTerminator()->erase(); diff --git a/tensorflow/core/transforms/functional_to_region/impl.cc b/tensorflow/core/transforms/functional_to_region/impl.cc index f5bdd163ed1007..aaf67332ae2d48 100644 --- a/tensorflow/core/transforms/functional_to_region/impl.cc +++ b/tensorflow/core/transforms/functional_to_region/impl.cc @@ -322,8 +322,8 @@ LogicalResult ConvertIfLikeOp::matchAndRewrite( // Create the region-based op, passing in the required attributes. ValueRange args, ctls; std::tie(args, ctls) = this->SplitControl(op.getArgs()); - auto region_op = rewriter.create( - op.getLoc(), op.getResultTypes(), op.getCond(), ctls, + auto region_op = IfLikeRegionOp::create( + rewriter, op.getLoc(), op.getResultTypes(), op.getCond(), ctls, op.getThenBranch().getAttrs(), op.getElseBranch().getAttrs(), PreserveAttributes(then_func, /*drop_args=*/true), PreserveAttributes(else_func, /*drop_args=*/true)); @@ -390,8 +390,8 @@ LogicalResult ConvertCaseLikeOp::matchAndRewrite( // Create the region-based op, passing in the required attributes. ValueRange args, ctls; std::tie(args, ctls) = this->SplitControl(op.getArgs()); - auto region_op = rewriter.create( - op.getLoc(), op.getResultTypes(), op.getBranchIndex(), ctls, + auto region_op = CaseLikeRegionOp::create( + rewriter, op.getLoc(), op.getResultTypes(), op.getBranchIndex(), ctls, rewriter.getArrayAttr(branch_attrs), region_attrs, op.getBranches().size()); util::ForwardNonIntrinsicAttributes(op, region_op); @@ -440,8 +440,8 @@ ConvertWhileLikeOp::matchAndRewrite( // TODO(jeffniu): Change this to call the infer return types builder. ValueRange init, ctls; std::tie(init, ctls) = this->SplitControl(op.getArgs()); - auto region_op = rewriter.create( - op.getLoc(), op.getResultTypes(), init, ctls, + auto region_op = WhileLikeRegionOp::create( + rewriter, op.getLoc(), op.getResultTypes(), init, ctls, op.getParallelIterationsAttr(), op.getCond().getAttrs(), op.getBody().getAttrs(), PreserveAttributes(cond_func), PreserveAttributes(body_func)); @@ -482,8 +482,8 @@ LogicalResult ConvertForOp::matchAndRewrite(tfg::ForOp op, // `ForRegion` does. We will need to insert casts. ValueRange init, ctls; std::tie(init, ctls) = SplitControl(op.getArgs()); - auto region_op = rewriter.create( - op.getLoc(), op.getResultTypes(), op.getStart(), op.getLimit(), + auto region_op = ForRegionOp::create( + rewriter, op.getLoc(), op.getResultTypes(), op.getStart(), op.getLimit(), op.getDelta(), init, ctls, op.getBody().getAttrs(), PreserveAttributes(body_func)); util::ForwardNonIntrinsicAttributes(op, region_op); diff --git a/tensorflow/core/transforms/graph_to_func/graph_to_func.cc b/tensorflow/core/transforms/graph_to_func/graph_to_func.cc index d3769db8bcdf00..ae9e8d48c6a17a 100644 --- a/tensorflow/core/transforms/graph_to_func/graph_to_func.cc +++ b/tensorflow/core/transforms/graph_to_func/graph_to_func.cc @@ -66,8 +66,8 @@ absl::Status GraphToFunc(GraphOp graph, ArrayRef feeds, FunctionType func_type = builder.getFunctionType(arg_types, ret_types); auto loc = graph.getLoc(); - auto func_op = builder.create(loc, func_name, func_type, - /*generic=*/false); + auto func_op = GraphFuncOp::create(builder, loc, func_name, func_type, + /*generic=*/false); func_op->setAttr("tfg.lifted_graph_version", graph.getVersion()); func_op.getRegion().takeBody(graph.getRegion()); @@ -75,7 +75,7 @@ absl::Status GraphToFunc(GraphOp graph, ArrayRef feeds, // fetches, the fetch value will be replaced with feed argument. OpBuilder body_builder = OpBuilder::atBlockEnd(func_op.SingleBlock::getBody()); - body_builder.create(loc, fetches, control_rets); + ReturnOp::create(body_builder, loc, fetches, control_rets); StringAttr tfg_name = dialect->getTfgNameAttrIdentifier(); StringAttr lifted_value_name = builder.getStringAttr("tfg.lifted_value_attr"); diff --git a/tensorflow/core/transforms/region_to_functional/impl.cc b/tensorflow/core/transforms/region_to_functional/impl.cc index 65c37b8b468825..9fce62a74a1173 100644 --- a/tensorflow/core/transforms/region_to_functional/impl.cc +++ b/tensorflow/core/transforms/region_to_functional/impl.cc @@ -16,7 +16,6 @@ limitations under the License. #include "tensorflow/core/transforms/region_to_functional/impl.h" #include -#include #include #include #include @@ -233,8 +232,8 @@ struct ConvertIfLikeRegionOpToExplicitCapture IfLikeRegionOp RebuildWith(IfLikeRegionOp op, ValueRange added, PatternRewriter &rewriter) const override { - return rewriter.create( - op.getLoc(), op.getResultTypes(), op.getCond(), op.getCtls(), + return IfLikeRegionOp::create( + rewriter, op.getLoc(), op.getResultTypes(), op.getCond(), op.getCtls(), op.getThenAttrsAttr(), op.getElseAttrsAttr(), op.getThenRegionAttrsAttr(), op.getElseRegionAttrsAttr()); } @@ -247,9 +246,9 @@ struct ConvertCaseLikeRegionOpToExplicitCapture CaseLikeRegionOp RebuildWith(CaseLikeRegionOp op, ValueRange added, PatternRewriter &rewriter) const override { - return rewriter.create( - op.getLoc(), op.getResultTypes(), op.getBranchIndex(), op.getCtls(), - op.getBranchAttrsAttr(), op.getRegionAttrsAttr(), + return CaseLikeRegionOp::create( + rewriter, op.getLoc(), op.getResultTypes(), op.getBranchIndex(), + op.getCtls(), op.getBranchAttrsAttr(), op.getRegionAttrsAttr(), op.getBranches().size()); } }; @@ -295,9 +294,9 @@ struct ConvertWhileLikeRegionOpToExplicitCapture util::LoopRegionResultAdded(op.getBodyRegion(), added.size()); rewriter.setInsertionPoint(op); - return rewriter.create( - op.getLoc(), results, op.getCtl().getType(), operands, op.getCtls(), - op.getParallelIterationsAttr(), op.getCondAttrsAttr(), + return WhileLikeRegionOp::create( + rewriter, op.getLoc(), results, op.getCtl().getType(), operands, + op.getCtls(), op.getParallelIterationsAttr(), op.getCondAttrsAttr(), op.getBodyAttrsAttr(), op.getCondRegionAttrsAttr(), op.getBodyRegionAttrsAttr()); } @@ -324,8 +323,8 @@ struct ConvertForRegionOpToExplicitCapture util::LoopRegionResultAdded(op.getBodyRegion(), added.size()); rewriter.setInsertionPoint(op); - return rewriter.create( - op.getLoc(), results, op.getCtl().getType(), op.getStart(), + return ForRegionOp::create( + rewriter, op.getLoc(), results, op.getCtl().getType(), op.getStart(), op.getLimit(), op.getDelta(), operands, op.getCtls(), op.getBodyAttrsAttr(), op.getRegionAttrsAttr()); } @@ -870,8 +869,8 @@ LogicalResult ConvertIfLikeOp::matchAndRewrite( rewriter.setInsertionPoint(op); auto func_op = - rewriter.create(op.getLoc(), op.getResultTypes(), op.getCond(), - operands, branches[0], branches[1]); + IfLikeOp::create(rewriter, op.getLoc(), op.getResultTypes(), op.getCond(), + operands, branches[0], branches[1]); util::ForwardNonIntrinsicAttributes(op, func_op); rewriter.replaceOp(op, func_op.getResults()); return success(); @@ -924,9 +923,9 @@ LogicalResult ConvertCaseLikeOp::matchAndRewrite( llvm::append_range(operands, op.getCtls()); rewriter.setInsertionPoint(op); - auto func_op = rewriter.create(op.getLoc(), op.getResultTypes(), - op.getBranchIndex(), operands, - rewriter.getArrayAttr(branches)); + auto func_op = CaseLikeOp::create(rewriter, op.getLoc(), op.getResultTypes(), + op.getBranchIndex(), operands, + rewriter.getArrayAttr(branches)); util::ForwardNonIntrinsicAttributes(op, func_op); rewriter.replaceOp(op, func_op.getResults()); return success(); @@ -1000,9 +999,9 @@ ConvertWhileLikeOp::matchAndRewrite( llvm::append_range(operands, op.getCtls()); rewriter.setInsertionPoint(op); - auto func_op = rewriter.create(op.getLoc(), op.getResultTypes(), - operands, cond_ref, body_ref, - op.getParallelIterationsAttr()); + auto func_op = + WhileLikeOp::create(rewriter, op.getLoc(), op.getResultTypes(), operands, + cond_ref, body_ref, op.getParallelIterationsAttr()); util::ForwardNonIntrinsicAttributes(op, func_op); rewriter.replaceOp(op, func_op.getResults()); return success(); @@ -1038,9 +1037,9 @@ LogicalResult ConvertForOp::matchAndRewrite(ForRegionOp op, llvm::append_range(operands, op.getCtls()); rewriter.setInsertionPoint(op); - auto func_op = rewriter.create( - op.getLoc(), op.getResultTypes(), op.getStart(), op.getLimit(), - op.getDelta(), operands, body_ref[0]); + auto func_op = tfg::ForOp::create(rewriter, op.getLoc(), op.getResultTypes(), + op.getStart(), op.getLimit(), op.getDelta(), + operands, body_ref[0]); util::ForwardNonIntrinsicAttributes(op, func_op); rewriter.replaceOp(op, func_op.getResults()); return success(); diff --git a/tensorflow/core/util/BUILD b/tensorflow/core/util/BUILD index 3acd07c02fadf8..05ae29c1619d87 100644 --- a/tensorflow/core/util/BUILD +++ b/tensorflow/core/util/BUILD @@ -298,6 +298,7 @@ filegroup( "mkl_heuristics.h", "mkl_util.h", "onednn_env_vars.h", + "@com_google_absl//absl/container:flat_hash_map", "@local_xla//xla/tsl/util:onednn_util_hdrs", ], visibility = ["//tensorflow/core:__pkg__"], @@ -952,38 +953,39 @@ tf_proto_library( name = "test_log_proto", srcs = ["test_log.proto"], make_default_target_header_only = True, - protodeps = ["@local_xla//xla/tsl/protobuf:test_log_proto"], # Not to be used outside of tensorflow/core. visibility = ["//tensorflow/core:__pkg__"], exports = ["@local_xla//xla/tsl/protobuf:test_log_proto"], + deps = ["@local_xla//xla/tsl/protobuf:test_log_proto"], ) tf_proto_library( name = "protos_test", srcs = ["example_proto_fast_parsing_test.proto"], - protodeps = ["//tensorflow/core:protos_all"], visibility = ["//visibility:public"], + deps = ["//tensorflow/core:protos_all"], ) tf_proto_library( name = "event_proto", srcs = ["event.proto"], make_default_target_header_only = True, - protodeps = [ - "//tensorflow/core/framework:summary_proto", + visibility = ["//visibility:public"], + deps = [ "//tensorflow/core/framework:resource_handle_proto", + "//tensorflow/core/framework:summary_proto", "//tensorflow/core/framework:tensor_proto", "//tensorflow/core/framework:tensor_shape_proto", "//tensorflow/core/framework:types_proto", ], - visibility = ["//visibility:public"], ) tf_proto_library( name = "saved_tensor_slice_proto", srcs = ["saved_tensor_slice.proto"], make_default_target_header_only = True, - protodeps = [ + visibility = ["//visibility:public"], + deps = [ "//tensorflow/core/framework:resource_handle_proto", "//tensorflow/core/framework:tensor_proto", "//tensorflow/core/framework:tensor_shape_proto", @@ -991,7 +993,6 @@ tf_proto_library( "//tensorflow/core/framework:types_proto", "//tensorflow/core/framework:versions_proto", ], - visibility = ["//visibility:public"], ) tf_proto_library( @@ -1004,16 +1005,16 @@ tf_proto_library( tf_proto_library( name = "protos_all", make_default_target_header_only = True, - protodeps = [ - ":event_proto", - ":saved_tensor_slice_proto", - ":memmapped_file_system_proto", - "//tensorflow/core/util/quantization:uniform_quant_ops_attr_proto", - ], tags = [ "alt_dep=//third_party/tensorflow/core:protos_all", ], visibility = ["//tensorflow/core:__subpackages__"], + deps = [ + ":event_proto", + ":memmapped_file_system_proto", + ":saved_tensor_slice_proto", + "//tensorflow/core/util/quantization:uniform_quant_ops_attr_proto", + ], ) cc_library( diff --git a/tensorflow/core/util/autotune_maps/BUILD b/tensorflow/core/util/autotune_maps/BUILD index 3868d4971b8035..a5de2c3ba00baa 100644 --- a/tensorflow/core/util/autotune_maps/BUILD +++ b/tensorflow/core/util/autotune_maps/BUILD @@ -193,7 +193,6 @@ tf_cuda_only_cc_test( size = "small", srcs = ["autotune_serialize_test.cc"], features = ["-layering_check"], - tags = ["cuda-only"], deps = [ ":autotune_serialize", ":conv_autotune_maps", diff --git a/tensorflow/core/util/autotune_maps/conv_autotune_maps.h b/tensorflow/core/util/autotune_maps/conv_autotune_maps.h index 7c00348adfe1ba..ebf542b2afbd75 100644 --- a/tensorflow/core/util/autotune_maps/conv_autotune_maps.h +++ b/tensorflow/core/util/autotune_maps/conv_autotune_maps.h @@ -39,7 +39,7 @@ namespace tensorflow { // A dummy type to group forward convolution autotune results together. struct ConvAutotuneGroup { - static string name() { return "Conv"; } + static std::string name() { return "Conv"; } }; using ConvAutotuneMap = AutotuneSingletonproto_, other.proto_); } -string ConvParameters::ToString() const { return proto_.DebugString(); } +std::string ConvParameters::ToString() const { return proto_.DebugString(); } MatmulParameters::MatmulParameters( se::StreamExecutor* stream_exec, DataType ab_dtype, DataType c_dtype, @@ -137,7 +137,7 @@ bool MatmulParameters::operator==(const MatmulParameters& other) const { MessageDifferencer::Equals(this->proto_, other.proto_); } -string MatmulParameters::ToString() const { return proto_.DebugString(); } +std::string MatmulParameters::ToString() const { return proto_.DebugString(); } } // namespace tensorflow diff --git a/tensorflow/core/util/autotune_maps/conv_parameters.h b/tensorflow/core/util/autotune_maps/conv_parameters.h index b213dba9298dd3..12da493f4a59f4 100644 --- a/tensorflow/core/util/autotune_maps/conv_parameters.h +++ b/tensorflow/core/util/autotune_maps/conv_parameters.h @@ -90,16 +90,16 @@ class ConvParameters { bool operator!=(const ConvParameters& other) const { return !(*this == other); } - uint64 hash() const { return hash_code_; } + uint64_t hash() const { return hash_code_; } - string ToString() const; + std::string ToString() const; const ConvParametersProto& proto() const { return proto_; } private: int device_id_; ConvParametersProto proto_; - uint64 hash_code_; + uint64_t hash_code_; }; class MatmulParameters { @@ -127,16 +127,16 @@ class MatmulParameters { bool operator!=(const MatmulParameters& other) const { return !(*this == other); } - uint64 hash() const { return hash_code_; } + uint64_t hash() const { return hash_code_; } - string ToString() const; + std::string ToString() const; const MatmulParametersProto& proto() const { return proto_; } private: int device_id_; MatmulParametersProto proto_; - uint64 hash_code_; + uint64_t hash_code_; }; } // namespace tensorflow diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index d15ec3034a93c9..fd1762482b4340 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -26,6 +26,7 @@ limitations under the License. #include "oneapi/dnnl/dnnl.hpp" #include "oneapi/dnnl/dnnl_threadpool.hpp" +#include "absl/container/flat_hash_map.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" @@ -1963,7 +1964,7 @@ class LRUCache { size_t capacity_; // The cache, a map from string key to a LRU entry. - std::unordered_map cache_; + absl::flat_hash_map cache_; // The LRU list of entries. // The front of the list contains the key of the most recently accessed diff --git a/tensorflow/core/util/proto/decode.h b/tensorflow/core/util/proto/decode.h index 7d43e34b35ce50..a3a5c5a72c2f01 100644 --- a/tensorflow/core/util/proto/decode.h +++ b/tensorflow/core/util/proto/decode.h @@ -42,7 +42,7 @@ using tensorflow::protobuf::io::StringOutputStream; // Converts an uint64 to an int64 without loss of information. // Unsigned values greater than INT64_MAX are represented as // negative numbers by wrapping (same as twos-complement bit equivalence). -inline int64_t WrapUnsignedAsSigned64(uint64 unsigned_value) { +inline int64_t WrapUnsignedAsSigned64(uint64_t unsigned_value) { // For a detailed explanation of why this works to wrap unsigned ints, see // http://stackoverflow.com/questions/13150449/efficient-unsigned-to-signed-cast-avoiding-implementation-defined-behavior // Both if tests should be optimized out. @@ -59,16 +59,16 @@ inline int64_t WrapUnsignedAsSigned64(uint64 unsigned_value) { // Converts an uint32 to an int32 without loss of information. // Unsigned values greater than INT_MAX are represented as // negative numbers by wrapping (same as twos-complement bit equivalence). -inline int32 WrapUnsignedAsSigned32(uint32 unsigned_value) { +inline int32_t WrapUnsignedAsSigned32(uint32_t unsigned_value) { // For a detailed explanation of why this works to wrap unsigned ints, see // http://stackoverflow.com/questions/13150449/efficient-unsigned-to-signed-cast-avoiding-implementation-defined-behavior // Both if tests should be optimized out. if (unsigned_value <= INT_MAX) { - return static_cast(unsigned_value); + return static_cast(unsigned_value); } // The C++ spec allows an architecture where this test is required. if (unsigned_value >= INT_MIN) { - return static_cast(unsigned_value - INT_MIN) + INT_MIN; + return static_cast(unsigned_value - INT_MIN) + INT_MIN; } return 0; // This should never occur. } @@ -78,8 +78,8 @@ inline int32 WrapUnsignedAsSigned32(uint32 unsigned_value) { // space in the buffer. // The ok value will be set to false if the buffer does not contain // a valid varint. -inline const uint8* ReadVarint64FromArray(const uint8* buffer, bool* ok, - uint64* value); +inline const uint8_t* ReadVarint64FromArray(const uint8_t* buffer, bool* ok, + uint64_t* value); // Reads a single varint32 from a byte array. // It is the caller's responsibility to ensure that there is enough @@ -89,10 +89,10 @@ inline const uint8* ReadVarint64FromArray(const uint8* buffer, bool* ok, // This is slightly less efficient than the private version in // coded_stream.cc but we duplicate less code by calling // the 64 bit version instead of copying the code. -inline const uint8* ReadVarint32FromArray(const uint8* buffer, bool* ok, - uint32* value) { - uint64 tmp = 0; - const uint8* buf = ReadVarint64FromArray(buffer, ok, &tmp); +inline const uint8_t* ReadVarint32FromArray(const uint8_t* buffer, bool* ok, + uint32_t* value) { + uint64_t tmp = 0; + const uint8_t* buf = ReadVarint64FromArray(buffer, ok, &tmp); *value = tmp & 0xffffffff; return buf; } @@ -101,12 +101,12 @@ inline const uint8* ReadVarint32FromArray(const uint8* buffer, bool* ok, // The array is part of a Tensor that was allocated by the caller // with type TensorType, while DeclaredType is the proto field type. template -const uint8* ReadFromArray(const uint8* buf, TensorType* value); +const uint8_t* ReadFromArray(const uint8_t* buf, TensorType* value); template <> -inline const uint8* ReadFromArray( - const uint8* buf, int64_t* value) { - uint32 temp = 0; +inline const uint8_t* ReadFromArray( + const uint8_t* buf, int64_t* value) { + uint32_t temp = 0; bool unused_ok; // The Counting pass would have failed if this were corrupt. buf = ReadVarint32FromArray(buf, &unused_ok, &temp); *value = static_cast(temp); @@ -114,19 +114,19 @@ inline const uint8* ReadFromArray( } template <> -inline const uint8* ReadFromArray( - const uint8* buf, int32* value) { - uint32 temp = 0; +inline const uint8_t* ReadFromArray( + const uint8_t* buf, int32_t* value) { + uint32_t temp = 0; bool unused_ok; // The Counting pass would have failed if this were corrupt. buf = ReadVarint32FromArray(buf, &unused_ok, &temp); - *value = static_cast(temp); + *value = static_cast(temp); return buf; } template <> -inline const uint8* ReadFromArray( - const uint8* buf, int64_t* value) { - uint64 temp = 0; +inline const uint8_t* ReadFromArray( + const uint8_t* buf, int64_t* value) { + uint64_t temp = 0; bool unused_ok; // The Counting pass would have failed if this were corrupt. buf = ReadVarint64FromArray(buf, &unused_ok, &temp); *value = WrapUnsignedAsSigned64(temp); @@ -134,9 +134,9 @@ inline const uint8* ReadFromArray( } template <> -inline const uint8* ReadFromArray( - const uint8* buf, uint64* value) { - uint32 temp = 0; +inline const uint8_t* ReadFromArray( + const uint8_t* buf, uint64_t* value) { + uint32_t temp = 0; bool unused_ok; // The Counting pass would have failed if this were corrupt. buf = ReadVarint32FromArray(buf, &unused_ok, &temp); *value = temp; @@ -144,23 +144,23 @@ inline const uint8* ReadFromArray( } template <> -inline const uint8* ReadFromArray( - const uint8* buf, uint32* value) { +inline const uint8_t* ReadFromArray( + const uint8_t* buf, uint32_t* value) { bool unused_ok; // The Counting pass would have failed if this were corrupt. return ReadVarint32FromArray(buf, &unused_ok, value); } template <> -inline const uint8* ReadFromArray( - const uint8* buf, uint64* value) { +inline const uint8_t* ReadFromArray( + const uint8_t* buf, uint64_t* value) { bool unused_ok; // The Counting pass would have failed if this were corrupt. return ReadVarint64FromArray(buf, &unused_ok, value); } template <> -inline const uint8* ReadFromArray( - const uint8* buf, int64_t* value) { - uint64 temp = 0; +inline const uint8_t* ReadFromArray( + const uint8_t* buf, int64_t* value) { + uint64_t temp = 0; bool unused_ok; // The Counting pass would have failed if this were corrupt. buf = ReadVarint64FromArray(buf, &unused_ok, &temp); *value = WireFormatLite::ZigZagDecode32(temp); @@ -168,9 +168,9 @@ inline const uint8* ReadFromArray( } template <> -inline const uint8* ReadFromArray( - const uint8* buf, int32* value) { - uint32 temp = 0; +inline const uint8_t* ReadFromArray( + const uint8_t* buf, int32_t* value) { + uint32_t temp = 0; bool unused_ok; // The Counting pass would have failed if this were corrupt. buf = ReadVarint32FromArray(buf, &unused_ok, &temp); *value = WireFormatLite::ZigZagDecode32(temp); @@ -178,9 +178,9 @@ inline const uint8* ReadFromArray( } template <> -inline const uint8* ReadFromArray( - const uint8* buf, int64_t* value) { - uint64 temp = 0; +inline const uint8_t* ReadFromArray( + const uint8_t* buf, int64_t* value) { + uint64_t temp = 0; bool unused_ok; // The Counting pass would have failed if this were corrupt. buf = ReadVarint64FromArray(buf, &unused_ok, &temp); *value = WireFormatLite::ZigZagDecode64(temp); @@ -188,10 +188,10 @@ inline const uint8* ReadFromArray( } template <> -inline const uint8* ReadFromArray( - const uint8* buf, uint64* value) { - uint32 temp; - buf = WireFormatLite::ReadPrimitiveFromArray( + const uint8_t* buf, uint64_t* value) { + uint32_t temp; + buf = WireFormatLite::ReadPrimitiveFromArray( buf, &temp); *value = temp; @@ -199,10 +199,10 @@ inline const uint8* ReadFromArray( } template <> -inline const uint8* ReadFromArray( - const uint8* buf, uint32* value) { - uint32 temp; - buf = WireFormatLite::ReadPrimitiveFromArray( + const uint8_t* buf, uint32_t* value) { + uint32_t temp; + buf = WireFormatLite::ReadPrimitiveFromArray( buf, &temp); *value = WrapUnsignedAsSigned32(temp); @@ -210,8 +210,8 @@ inline const uint8* ReadFromArray( } template <> -inline const uint8* ReadFromArray( - const uint8* buf, uint64* value) { +inline const uint8_t* ReadFromArray( + const uint8_t* buf, uint64_t* value) { protobuf_uint64 temp; buf = WireFormatLite::ReadPrimitiveFromArray( @@ -221,10 +221,10 @@ inline const uint8* ReadFromArray( } template <> -inline const uint8* ReadFromArray( - const uint8* buf, int64_t* value) { +inline const uint8_t* ReadFromArray( + const uint8_t* buf, int64_t* value) { int32_t temp; - buf = WireFormatLite::ReadPrimitiveFromArray( buf, &temp); *value = temp; @@ -232,16 +232,16 @@ inline const uint8* ReadFromArray( } template <> -inline const uint8* ReadFromArray( - const uint8* buf, int32* value) { - return WireFormatLite::ReadPrimitiveFromArray( + const uint8_t* buf, int32_t* value) { + return WireFormatLite::ReadPrimitiveFromArray( buf, value); } template <> -inline const uint8* ReadFromArray( - const uint8* buf, int64_t* value) { +inline const uint8_t* ReadFromArray( + const uint8_t* buf, int64_t* value) { protobuf_int64 temp; buf = WireFormatLite::ReadPrimitiveFromArray( @@ -251,16 +251,16 @@ inline const uint8* ReadFromArray( } template <> -inline const uint8* ReadFromArray( - const uint8* buf, float* value) { +inline const uint8_t* ReadFromArray( + const uint8_t* buf, float* value) { return WireFormatLite::ReadPrimitiveFromArray( buf, value); } template <> -inline const uint8* ReadFromArray( - const uint8* buf, double* value) { +inline const uint8_t* ReadFromArray( + const uint8_t* buf, double* value) { float temp; buf = WireFormatLite::ReadPrimitiveFromArray( @@ -270,17 +270,17 @@ inline const uint8* ReadFromArray( } template <> -inline const uint8* ReadFromArray( - const uint8* buf, double* value) { +inline const uint8_t* ReadFromArray( + const uint8_t* buf, double* value) { return WireFormatLite::ReadPrimitiveFromArray( buf, value); } template <> -inline const uint8* ReadFromArray( - const uint8* buf, bool* value) { - uint64 temp = 0; +inline const uint8_t* ReadFromArray( + const uint8_t* buf, bool* value) { + uint64_t temp = 0; bool unused_ok; // The Counting pass would have failed if this were corrupt. buf = ReadVarint64FromArray(buf, &unused_ok, &temp); *value = temp != 0; @@ -288,9 +288,9 @@ inline const uint8* ReadFromArray( } template <> -inline const uint8* ReadFromArray( - const uint8* buf, int* value) { - uint32 temp = 0; +inline const uint8_t* ReadFromArray( + const uint8_t* buf, int* value) { + uint32_t temp = 0; bool unused_ok; // The Counting pass would have failed if this were corrupt. buf = ReadVarint32FromArray(buf, &unused_ok, &temp); *value = static_cast(temp); @@ -304,8 +304,8 @@ template inline int ReadPackedPrimitives(const void* bufp, const size_t len, const int index, const int stride, void* datap) { - const uint8* buf = reinterpret_cast(bufp); - const uint8* bound = buf + len; + const uint8_t* buf = reinterpret_cast(bufp); + const uint8_t* bound = buf + len; TensorType* data = reinterpret_cast(datap) + index; int count; @@ -340,7 +340,7 @@ inline absl::Status ReadPrimitive(CodedInputStream* input, int index, inline absl::Status ReadBytes(CodedInputStream* input, int index, void* datap) { tstring* data = reinterpret_cast(datap) + index; - uint32 length; + uint32_t length; if (!input->ReadVarint32(&length)) { return errors::DataLoss("Failed reading bytes"); } @@ -370,7 +370,7 @@ inline absl::Status ReadGroupBytes(CodedInputStream* input, int field_number, // TYPE_GROUP is deprecated and currently no tests in // tensorflow/python/kernel_tests/proto:decode_proto_op_test target a // TYPE_GROUP tag, we use std::string as a read buffer. - string buf; + std::string buf; StringOutputStream string_stream(&buf); { CodedOutputStream out(&string_stream); @@ -412,31 +412,33 @@ inline absl::Status ReadValue(CodedInputStream* input, return ReadPrimitive( input, index, datap); case WireFormatLite::TYPE_UINT64: - return ReadPrimitive(input, index, datap); case WireFormatLite::TYPE_INT32: switch (dtype) { case DataType::DT_INT64: - return ReadPrimitive( + return ReadPrimitive( input, index, datap); case DataType::DT_INT32: - return ReadPrimitive( + return ReadPrimitive( input, index, datap); default: return errors::DataLoss("Failed reading TYPE_INT32 for ", DataTypeString(dtype)); } case WireFormatLite::TYPE_FIXED64: - return ReadPrimitive(input, index, datap); case WireFormatLite::TYPE_FIXED32: switch (dtype) { case DataType::DT_UINT64: - return ReadPrimitive( - input, index, datap); + return ReadPrimitive(input, index, + datap); case DataType::DT_UINT32: - return ReadPrimitive( - input, index, datap); + return ReadPrimitive(input, index, + datap); default: return errors::DataLoss("Failed reading TYPE_FIXED32 for ", DataTypeString(dtype)); @@ -455,25 +457,25 @@ inline absl::Status ReadValue(CodedInputStream* input, case WireFormatLite::TYPE_UINT32: switch (dtype) { case DataType::DT_UINT64: - return ReadPrimitive( + return ReadPrimitive( input, index, datap); case DataType::DT_UINT32: - return ReadPrimitive( + return ReadPrimitive( input, index, datap); default: return errors::DataLoss("Failed reading TYPE_UINT32 for ", DataTypeString(dtype)); } case WireFormatLite::TYPE_ENUM: - return ReadPrimitive( + return ReadPrimitive( input, index, datap); case WireFormatLite::TYPE_SFIXED32: switch (dtype) { case DataType::DT_INT64: - return ReadPrimitive( + return ReadPrimitive( input, index, datap); case DataType::DT_INT32: - return ReadPrimitive( + return ReadPrimitive( input, index, datap); default: return errors::DataLoss("Failed reading TYPE_SFIXED32 for ", @@ -485,10 +487,10 @@ inline absl::Status ReadValue(CodedInputStream* input, case WireFormatLite::TYPE_SINT32: switch (dtype) { case DataType::DT_INT64: - return ReadPrimitive( + return ReadPrimitive( input, index, datap); case DataType::DT_INT32: - return ReadPrimitive( + return ReadPrimitive( input, index, datap); default: return errors::DataLoss("Failed reading TYPE_SINT32 for ", @@ -533,7 +535,7 @@ inline absl::Status ReadPackedFromArray( buf, buf_size, *index, stride, data); return absl::OkStatus(); case WireFormatLite::TYPE_UINT64: - *index += ReadPackedPrimitives( + *index += ReadPackedPrimitives( buf, buf_size, *index, stride, data); return absl::OkStatus(); case WireFormatLite::TYPE_INT32: @@ -543,7 +545,7 @@ inline absl::Status ReadPackedFromArray( buf, buf_size, *index, stride, data); return absl::OkStatus(); case DataType::DT_INT32: - *index += ReadPackedPrimitives( + *index += ReadPackedPrimitives( buf, buf_size, *index, stride, data); return absl::OkStatus(); default: @@ -551,18 +553,20 @@ inline absl::Status ReadPackedFromArray( DataTypeString(dtype)); } case WireFormatLite::TYPE_FIXED64: - *index += ReadPackedPrimitives( + *index += ReadPackedPrimitives( buf, buf_size, *index, stride, data); return absl::OkStatus(); case WireFormatLite::TYPE_FIXED32: switch (dtype) { case DataType::DT_UINT64: - *index += ReadPackedPrimitives( - buf, buf_size, *index, stride, data); + *index += + ReadPackedPrimitives( + buf, buf_size, *index, stride, data); return absl::OkStatus(); case DataType::DT_UINT32: - *index += ReadPackedPrimitives( - buf, buf_size, *index, stride, data); + *index += + ReadPackedPrimitives( + buf, buf_size, *index, stride, data); return absl::OkStatus(); default: return errors::DataLoss("Failed reading TYPE_FIXED32 for ", @@ -580,11 +584,11 @@ inline absl::Status ReadPackedFromArray( case WireFormatLite::TYPE_UINT32: switch (dtype) { case DataType::DT_UINT64: - *index += ReadPackedPrimitives( + *index += ReadPackedPrimitives( buf, buf_size, *index, stride, data); return absl::OkStatus(); case DataType::DT_UINT32: - *index += ReadPackedPrimitives( + *index += ReadPackedPrimitives( buf, buf_size, *index, stride, data); return absl::OkStatus(); default: @@ -592,7 +596,7 @@ inline absl::Status ReadPackedFromArray( DataTypeString(dtype)); } case WireFormatLite::TYPE_ENUM: - *index += ReadPackedPrimitives( + *index += ReadPackedPrimitives( buf, buf_size, *index, stride, data); return absl::OkStatus(); case WireFormatLite::TYPE_SFIXED32: @@ -603,8 +607,9 @@ inline absl::Status ReadPackedFromArray( buf, buf_size, *index, stride, data); return absl::OkStatus(); case DataType::DT_INT32: - *index += ReadPackedPrimitives( - buf, buf_size, *index, stride, data); + *index += + ReadPackedPrimitives( + buf, buf_size, *index, stride, data); return absl::OkStatus(); default: return errors::DataLoss("Failed reading TYPE_INT32 for ", @@ -622,7 +627,7 @@ inline absl::Status ReadPackedFromArray( buf, buf_size, *index, stride, data); return absl::OkStatus(); case DataType::DT_INT32: - *index += ReadPackedPrimitives( + *index += ReadPackedPrimitives( buf, buf_size, *index, stride, data); return absl::OkStatus(); default: @@ -645,14 +650,14 @@ inline absl::Status ReadPackedFromArray( // Important: This routine may read as much as kMaxVarintBytes from // the buffer. It is the caller's responsibility to make sure that there is // enough space in the buffer. -inline const uint8* ReadVarint64FromArray(const uint8* buffer, bool* ok, - uint64* value) { - const uint8* ptr = buffer; - uint32 b; +inline const uint8_t* ReadVarint64FromArray(const uint8_t* buffer, bool* ok, + uint64_t* value) { + const uint8_t* ptr = buffer; + uint32_t b; // Splitting into 32-bit pieces gives better performance on 32-bit // processors. - uint32 part0 = 0, part1 = 0, part2 = 0; + uint32_t part0 = 0, part1 = 0, part2 = 0; b = *(ptr++); part0 = b; @@ -702,8 +707,9 @@ inline const uint8* ReadVarint64FromArray(const uint8* buffer, bool* ok, done: *ok = true; - *value = (static_cast(part0)) | (static_cast(part1) << 28) | - (static_cast(part2) << 56); + *value = (static_cast(part0)) | + (static_cast(part1) << 28) | + (static_cast(part2) << 56); return ptr; } diff --git a/tensorflow/core/util/proto/descriptor_pool_registry.cc b/tensorflow/core/util/proto/descriptor_pool_registry.cc index 5f0423f76b74c2..e8184f6b2fabfc 100644 --- a/tensorflow/core/util/proto/descriptor_pool_registry.cc +++ b/tensorflow/core/util/proto/descriptor_pool_registry.cc @@ -27,19 +27,19 @@ DescriptorPoolRegistry* DescriptorPoolRegistry::Global() { } DescriptorPoolRegistry::DescriptorPoolFn* DescriptorPoolRegistry::Get( - const string& source) { + const std::string& source) { auto found = fns_.find(source); if (found == fns_.end()) return nullptr; return &found->second; } void DescriptorPoolRegistry::Register( - const string& source, + const std::string& source, const DescriptorPoolRegistry::DescriptorPoolFn& pool_fn) { auto existing = Get(source); CHECK_EQ(existing, nullptr) << "descriptor pool for source: " << source << " already registered"; - fns_.insert(std::pair(source, pool_fn)); + fns_.insert(std::pair(source, pool_fn)); } } // namespace tensorflow diff --git a/tensorflow/core/util/proto/descriptor_pool_registry.h b/tensorflow/core/util/proto/descriptor_pool_registry.h index 59c709ea150e87..5718243c15cbab 100644 --- a/tensorflow/core/util/proto/descriptor_pool_registry.h +++ b/tensorflow/core/util/proto/descriptor_pool_registry.h @@ -39,13 +39,13 @@ class DescriptorPoolRegistry { static DescriptorPoolRegistry* Global(); // Returns a pointer to a descriptor pool function for the given source. - DescriptorPoolFn* Get(const string& source); + DescriptorPoolFn* Get(const std::string& source); // Registers a descriptor pool factory. - void Register(const string& source, const DescriptorPoolFn& pool_fn); + void Register(const std::string& source, const DescriptorPoolFn& pool_fn); private: - std::map fns_; + std::map fns_; }; namespace descriptor_pool_registration { @@ -53,7 +53,7 @@ namespace descriptor_pool_registration { class DescriptorPoolRegistration { public: DescriptorPoolRegistration( - const string& source, + const std::string& source, const DescriptorPoolRegistry::DescriptorPoolFn& pool_fn) { DescriptorPoolRegistry::Global()->Register(source, pool_fn); } diff --git a/tensorflow/core/util/proto/descriptors.cc b/tensorflow/core/util/proto/descriptors.cc index 31942145fe32fa..e485499c94d5f7 100644 --- a/tensorflow/core/util/proto/descriptors.cc +++ b/tensorflow/core/util/proto/descriptors.cc @@ -45,7 +45,7 @@ absl::Status CreatePoolFromSet( // The file must contain a serialized `FileDescriptorSet`. See // `GetDescriptorPool()` for more information. absl::Status GetDescriptorPoolFromFile( - tensorflow::Env* env, const string& filename, + tensorflow::Env* env, const std::string& filename, std::unique_ptr* owned_desc_pool) { absl::Status st = env->FileExists(filename); if (!st.ok()) { @@ -66,7 +66,7 @@ absl::Status GetDescriptorPoolFromFile( } absl::Status GetDescriptorPoolFromBinary( - const string& source, + const std::string& source, std::unique_ptr* owned_desc_pool) { if (!absl::StartsWith(source, "bytes://")) { return errors::InvalidArgument(absl::StrCat( @@ -76,7 +76,7 @@ absl::Status GetDescriptorPoolFromBinary( } // Parse the FileDescriptorSet. protobuf::FileDescriptorSet proto; - if (!proto.ParseFromString(string(absl::StripPrefix(source, "bytes://")))) { + if (!proto.ParseFromString(absl::StripPrefix(source, "bytes://"))) { return errors::InvalidArgument(absl::StrCat( "Source does not represent serialized file descriptor set proto. ", "This may be due to a missing dependency on the file containing ", @@ -88,7 +88,7 @@ absl::Status GetDescriptorPoolFromBinary( } // namespace absl::Status GetDescriptorPool( - Env* env, string const& descriptor_source, + Env* env, const std::string& descriptor_source, protobuf::DescriptorPool const** desc_pool, std::unique_ptr* owned_desc_pool) { // Attempt to lookup the pool in the registry. diff --git a/tensorflow/core/util/proto/descriptors.h b/tensorflow/core/util/proto/descriptors.h index 3402ed0504410e..7b6ce3b97b5053 100644 --- a/tensorflow/core/util/proto/descriptors.h +++ b/tensorflow/core/util/proto/descriptors.h @@ -46,7 +46,7 @@ using tsl::Env; // Custom schemas can be supported by registering a handler with the // `DescriptorPoolRegistry`. absl::Status GetDescriptorPool( - Env* env, string const& descriptor_source, + Env* env, const std::string& descriptor_source, protobuf::DescriptorPool const** desc_pool, std::unique_ptr* owned_desc_pool); diff --git a/tensorflow/core/util/proto/proto_utils.cc b/tensorflow/core/util/proto/proto_utils.cc index 0833352bf431d7..f0a103eaa2823c 100644 --- a/tensorflow/core/util/proto/proto_utils.cc +++ b/tensorflow/core/util/proto/proto_utils.cc @@ -79,20 +79,20 @@ absl::Status ParseTextFormatFromString(absl::string_view input, return absl::Status(absl::StatusCode::kInvalidArgument, "output must be non NULL"); } - string err; + std::string err; StringErrorCollector err_collector(&err, /*one-indexing=*/true); protobuf::TextFormat::Parser parser; parser.RecordErrorsTo(&err_collector); - if (!parser.ParseFromString(string(input), output)) { + if (!parser.ParseFromString(input, output)) { return absl::Status(absl::StatusCode::kInvalidArgument, err); } return absl::OkStatus(); } -StringErrorCollector::StringErrorCollector(string* error_text) +StringErrorCollector::StringErrorCollector(std::string* error_text) : StringErrorCollector(error_text, false) {} -StringErrorCollector::StringErrorCollector(string* error_text, +StringErrorCollector::StringErrorCollector(std::string* error_text, bool one_indexing) : error_text_(error_text), index_offset_(one_indexing ? 1 : 0) { DCHECK(error_text_ != nullptr) << "error_text must be non NULL"; diff --git a/tensorflow/core/util/proto/proto_utils.h b/tensorflow/core/util/proto/proto_utils.h index 8a94a832fec58c..65c73e35c15f8b 100644 --- a/tensorflow/core/util/proto/proto_utils.h +++ b/tensorflow/core/util/proto/proto_utils.h @@ -44,11 +44,11 @@ class StringErrorCollector : public protobuf::io::ErrorCollector { public: // String error_text is unowned and must remain valid during the use of // StringErrorCollector. - explicit StringErrorCollector(string* error_text); + explicit StringErrorCollector(std::string* error_text); // If one_indexing is set to true, all line and column numbers will be // increased by one for cases when provided indices are 0-indexed and // 1-indexed error messages are desired - StringErrorCollector(string* error_text, bool one_indexing); + StringErrorCollector(std::string* error_text, bool one_indexing); StringErrorCollector(const StringErrorCollector&) = delete; StringErrorCollector& operator=(const StringErrorCollector&) = delete; @@ -61,7 +61,7 @@ class StringErrorCollector : public protobuf::io::ErrorCollector { absl::string_view message) override; private: - string* const error_text_; + std::string* const error_text_; const int index_offset_; }; diff --git a/tensorflow/core/util/proto/proto_utils_test.cc b/tensorflow/core/util/proto/proto_utils_test.cc index 8632c2a5e29d52..460e41ad770c31 100644 --- a/tensorflow/core/util/proto/proto_utils_test.cc +++ b/tensorflow/core/util/proto/proto_utils_test.cc @@ -61,21 +61,21 @@ TEST(ParseTextFormatFromStringTest, DiesOnNullOutputPointer) { } TEST(StringErrorCollectorTest, AppendsError) { - string err; + std::string err; StringErrorCollector collector(&err); collector.RecordError(1, 2, "foo"); EXPECT_EQ("1(2): foo\n", err); } TEST(StringErrorCollectorTest, AppendsWarning) { - string err; + std::string err; StringErrorCollector collector(&err); collector.RecordWarning(1, 2, "foo"); EXPECT_EQ("1(2): foo\n", err); } TEST(StringErrorCollectorTest, AppendsMultipleError) { - string err; + std::string err; StringErrorCollector collector(&err); collector.RecordError(1, 2, "foo"); collector.RecordError(3, 4, "bar"); @@ -83,7 +83,7 @@ TEST(StringErrorCollectorTest, AppendsMultipleError) { } TEST(StringErrorCollectorTest, AppendsMultipleWarning) { - string err; + std::string err; StringErrorCollector collector(&err); collector.RecordWarning(1, 2, "foo"); collector.RecordWarning(3, 4, "bar"); @@ -91,7 +91,7 @@ TEST(StringErrorCollectorTest, AppendsMultipleWarning) { } TEST(StringErrorCollectorTest, OffsetWorks) { - string err; + std::string err; StringErrorCollector collector(&err, true); collector.RecordError(1, 2, "foo"); collector.RecordWarning(3, 4, "bar"); diff --git a/tensorflow/core/util/stat_summarizer.cc b/tensorflow/core/util/stat_summarizer.cc index 26a06bbb6ff129..a9f1675544a2f2 100644 --- a/tensorflow/core/util/stat_summarizer.cc +++ b/tensorflow/core/util/stat_summarizer.cc @@ -82,7 +82,7 @@ void StatSummarizer::Validate(const std::vector* outputs, } void StatSummarizer::PrintStepStats() const { - string output = GetOutputString(); + std::string output = GetOutputString(); std::istringstream iss(output); for (std::string line; std::getline(iss, line);) { LOG(INFO) << line; diff --git a/tensorflow/core/util/stat_summarizer.h b/tensorflow/core/util/stat_summarizer.h index 3eae427f548475..62d192ab5193d2 100644 --- a/tensorflow/core/util/stat_summarizer.h +++ b/tensorflow/core/util/stat_summarizer.h @@ -89,7 +89,7 @@ class StatSummarizer { return stats_calculator_->GetStatsByNodeType(); } - std::string GetStatsByMetric(const string& title, + std::string GetStatsByMetric(const std::string& title, StatsCalculator::SortingMetric sorting_metric, int num_stats) const { return stats_calculator_->GetStatsByMetric(title, sorting_metric, diff --git a/tensorflow/core/util/stream_executor_util.h b/tensorflow/core/util/stream_executor_util.h index 4787bcf6ded5d2..63d6a3f1a9f146 100644 --- a/tensorflow/core/util/stream_executor_util.h +++ b/tensorflow/core/util/stream_executor_util.h @@ -32,7 +32,8 @@ class StreamExecutorUtil { template static se::DeviceMemory AsDeviceMemory(const Tensor& t) { T* ptr = reinterpret_cast(const_cast(t.tensor_data().data())); - return se::DeviceMemory(se::DeviceMemoryBase(ptr, t.TotalBytes())); + return se::DeviceMemory( + stream_executor::DeviceAddressBase(ptr, t.TotalBytes())); } }; diff --git a/tensorflow/core/util/strided_slice_op.cc b/tensorflow/core/util/strided_slice_op.cc index 93c5a7e9818ae2..3984d78e1b90bc 100644 --- a/tensorflow/core/util/strided_slice_op.cc +++ b/tensorflow/core/util/strided_slice_op.cc @@ -33,13 +33,13 @@ constexpr int32_t kShrinkAxis = -1, kNewAxis = -2; // if one does foo[3:5, ..., -3], this will have 3 length tensors struct StridedSliceSparseSpec { int64_t dims; - int32 num_add_axis_after_ellipsis; + int32_t num_add_axis_after_ellipsis; const Tensor* begin_tensor; const Tensor* end_tensor; const Tensor& strides_tensor; - const int32 begin_mask, end_mask; - int32 ellipsis_mask; - const int32 new_axis_mask, shrink_axis_mask; + const int32_t begin_mask, end_mask; + int32_t ellipsis_mask; + const int32_t new_axis_mask, shrink_axis_mask; }; // Dense slicing specification @@ -49,8 +49,8 @@ struct StridedSliceSparseSpec { // sparse had 3 length tensors. struct StridedSliceDenseSpec { const int64_t dims; - int32 begin_mask; - int32 end_mask; + int32_t begin_mask; + int32_t end_mask; bool begin_valid; bool end_valid; absl::InlinedVector& begin; @@ -62,18 +62,18 @@ struct StridedSliceDenseSpec { // entries. If an index in this array is positive, the size of the dimension // is obtained from canonical end-begin. Otherwise, if it is a kNewAxis, // it will be 1. A shrunk dimension is skipped. - absl::InlinedVector final_shape_gather_indices; + absl::InlinedVector final_shape_gather_indices; // This vector has the same size as final_shape_gather_indices, but it // remembers the sparse index that a dimension comes from, instead of dense // index. A -1 in this vector means there the index is not from the sparse // input. - absl::InlinedVector final_shape_gather_indices_sparse; - absl::InlinedVector input_shape_gather_indices_sparse; + absl::InlinedVector final_shape_gather_indices_sparse; + absl::InlinedVector input_shape_gather_indices_sparse; // The dense indexed shrink mask is which processing dimensions // should be shrunk. For example, if foo.shape = (10,10,10,10) // foo[3, ..., 5] has sparse_shrink_axis_mask of 0x5 and // dense_shrink_axis_mask of 0x9, yielding a final shape (10,10). - int32 shrink_axis_mask; + int32_t shrink_axis_mask; }; } // namespace @@ -281,7 +281,7 @@ absl::Status ValidateStridedSliceOp( *strides}; if (strides_tensor.dtype() == DT_INT32) { - TF_RETURN_IF_ERROR(BuildDenseSpec(sparse_spec, &dense_spec)); + TF_RETURN_IF_ERROR(BuildDenseSpec(sparse_spec, &dense_spec)); } else if (strides_tensor.dtype() == DT_INT64) { TF_RETURN_IF_ERROR(BuildDenseSpec(sparse_spec, &dense_spec)); } else if (strides_tensor.dtype() == DT_INT16) { diff --git a/tensorflow/core/util/tensor_bundle/naming.cc b/tensorflow/core/util/tensor_bundle/naming.cc index d59f12cd856148..fc5ab0b624754e 100644 --- a/tensorflow/core/util/tensor_bundle/naming.cc +++ b/tensorflow/core/util/tensor_bundle/naming.cc @@ -24,16 +24,16 @@ limitations under the License. namespace tensorflow { -string MetaFilename(absl::string_view prefix) { - return strings::Printf("%.*s.index", static_cast(prefix.size()), +std::string MetaFilename(absl::string_view prefix) { + return absl::StrFormat("%.*s.index", static_cast(prefix.size()), prefix.data()); } -string DataFilename(absl::string_view prefix, int32_t shard_id, - int32_t num_shards) { +std::string DataFilename(absl::string_view prefix, int32_t shard_id, + int32_t num_shards) { DCHECK_GT(num_shards, 0); DCHECK_LT(shard_id, num_shards); - return strings::Printf("%.*s.data-%05d-of-%05d", + return absl::StrFormat("%.*s.data-%05d-of-%05d", static_cast(prefix.size()), prefix.data(), shard_id, num_shards); } diff --git a/tensorflow/core/util/tensor_bundle/naming.h b/tensorflow/core/util/tensor_bundle/naming.h index c98abac755102a..3acd5dcdd9bbe8 100644 --- a/tensorflow/core/util/tensor_bundle/naming.h +++ b/tensorflow/core/util/tensor_bundle/naming.h @@ -40,9 +40,9 @@ limitations under the License. namespace tensorflow { -string MetaFilename(absl::string_view prefix); -string DataFilename(absl::string_view prefix, int32_t shard_id, - int32_t num_shards); +std::string MetaFilename(absl::string_view prefix); +std::string DataFilename(absl::string_view prefix, int32_t shard_id, + int32_t num_shards); } // namespace tensorflow diff --git a/tensorflow/core/util/tensor_bundle/tensor_bundle.cc b/tensorflow/core/util/tensor_bundle/tensor_bundle.cc index afa764a2e15227..1037ffd542b668 100644 --- a/tensorflow/core/util/tensor_bundle/tensor_bundle.cc +++ b/tensorflow/core/util/tensor_bundle/tensor_bundle.cc @@ -237,7 +237,7 @@ tstring* GetStringBackingBuffer(const Tensor& val) { absl::Status ParseEntryProto(absl::string_view key, absl::string_view value, protobuf::MessageLite* out) { - if (!out->ParseFromArray(value.data(), value.size())) { + if (!out->ParseFromString(value)) { return errors::DataLoss("Entry for key ", key, " not parseable."); } return absl::OkStatus(); @@ -1225,7 +1225,7 @@ string BundleReader::DebugString() { BundleEntryProto entry; Seek(kHeaderEntryKey); for (Next(); Valid(); Next()) { - CHECK(entry.ParseFromArray(value().data(), value().size())); + CHECK(entry.ParseFromString(value())); if (entry.slices_size() > 0) continue; // Slice of some partitioned var. strings::StrAppend(&shape_str, key(), " (", DataType_Name(entry.dtype()), diff --git a/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc b/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc index d25c6018e5beb9..592583c1acb2de 100644 --- a/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc +++ b/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc @@ -50,13 +50,13 @@ using ::testing::ElementsAre; namespace { // Prepend the current test case's working temporary directory to -string Prefix(const string& prefix) { +std::string Prefix(const std::string& prefix) { return absl::StrCat(testing::TmpDir(), "/", prefix); } // Construct a data input directory by prepending the test data root // directory to -string TestdataPrefix(const string& prefix) { +std::string TestdataPrefix(const std::string& prefix) { return absl::StrCat(testing::TensorFlowSrcRoot(), "/core/util/tensor_bundle/testdata/", prefix); } @@ -87,7 +87,7 @@ Tensor ByteSwap(Tensor t) { // Assert that has a tensor under matching in // terms of both shape, dtype, and value template -void Expect(BundleReader* reader, const string& key, +void Expect(BundleReader* reader, const std::string& key, const Tensor& expected_val) { // Tests for Contains(). EXPECT_TRUE(reader->Contains(key)); @@ -104,7 +104,7 @@ void Expect(BundleReader* reader, const string& key, } template -void ExpectVariant(BundleReader* reader, const string& key, +void ExpectVariant(BundleReader* reader, const std::string& key, const Tensor& expected_t) { // Tests for Contains(). EXPECT_TRUE(reader->Contains(key)); @@ -137,8 +137,8 @@ void ExpectNext(BundleReader* reader, const Tensor& expected_val) { test::ExpectTensorEqual(val, expected_val); } -std::vector AllTensorKeys(BundleReader* reader) { - std::vector ret; +std::vector AllTensorKeys(BundleReader* reader) { + std::vector ret; reader->Seek(kHeaderEntryKey); reader->Next(); for (; reader->Valid(); reader->Next()) { @@ -149,9 +149,9 @@ std::vector AllTensorKeys(BundleReader* reader) { // Writes out the metadata file of a bundle again, with the endianness marker // bit flipped. -absl::Status FlipEndiannessBit(const string& prefix) { +absl::Status FlipEndiannessBit(const std::string& prefix) { Env* env = Env::Default(); - const string metadata_tmp_path = Prefix("some_tmp_path"); + const std::string metadata_tmp_path = Prefix("some_tmp_path"); std::unique_ptr metadata_file; TF_RETURN_IF_ERROR(env->NewWritableFile(metadata_tmp_path, &metadata_file)); // We create the builder lazily in case we run into an exception earlier, in @@ -161,8 +161,8 @@ absl::Status FlipEndiannessBit(const string& prefix) { // Reads the existing metadata file, and fills the builder. { - const string filename = MetaFilename(prefix); - uint64 file_size; + const std::string filename = MetaFilename(prefix); + uint64_t file_size; TF_RETURN_IF_ERROR(env->GetFileSize(filename, &file_size)); std::unique_ptr file; TF_RETURN_IF_ERROR(env->NewRandomAccessFile(filename, &file)); @@ -213,7 +213,7 @@ void TestBasic() { TF_ASSERT_OK(reader.status()); EXPECT_EQ( AllTensorKeys(&reader), - std::vector({"foo_000", "foo_001", "foo_002", "foo_003"})); + std::vector({"foo_000", "foo_001", "foo_002", "foo_003"})); Expect(&reader, "foo_000", Constant_2x3(T(0))); Expect(&reader, "foo_001", Constant_2x3(T(1))); Expect(&reader, "foo_002", Constant_2x3(T(2))); @@ -243,7 +243,7 @@ void TestBasic() { TF_ASSERT_OK(reader.status()); EXPECT_EQ( AllTensorKeys(&reader), - std::vector({"bar_000", "bar_001", "bar_002", "bar_003"})); + std::vector({"bar_000", "bar_001", "bar_002", "bar_003"})); Expect(&reader, "bar_003", Constant_2x3(T(3))); Expect(&reader, "bar_002", Constant_2x3(T(2))); Expect(&reader, "bar_001", Constant_2x3(T(1))); @@ -267,8 +267,8 @@ void TestBasic() { TF_ASSERT_OK(reader.status()); EXPECT_EQ( AllTensorKeys(&reader), - std::vector({"bar_000", "bar_001", "bar_002", "bar_003", - "foo_000", "foo_001", "foo_002", "foo_003"})); + std::vector({"bar_000", "bar_001", "bar_002", "bar_003", + "foo_000", "foo_001", "foo_002", "foo_003"})); Expect(&reader, "bar_000", Constant_2x3(T(0))); Expect(&reader, "bar_001", Constant_2x3(T(1))); Expect(&reader, "bar_002", Constant_2x3(T(2))); @@ -361,8 +361,8 @@ TEST(TensorBundleTest, SwapBytes) { // 64-bit types // Cast to uint64*/int64* to make DataTypeToEnum happy - TestByteSwap(reinterpret_cast(forward_64), - reinterpret_cast(swapped_64), arr_len_64); + TestByteSwap(reinterpret_cast(forward_64), + reinterpret_cast(swapped_64), arr_len_64); TestByteSwap(reinterpret_cast(forward_64), reinterpret_cast(swapped_64), arr_len_64); TestByteSwap(reinterpret_cast(forward_64), @@ -413,7 +413,7 @@ void TestEndianness() { TF_ASSERT_OK(reader.status()); EXPECT_EQ( AllTensorKeys(&reader), - std::vector({"foo_000", "foo_001", "foo_002", "foo_003"})); + std::vector({"foo_000", "foo_001", "foo_002", "foo_003"})); Expect(&reader, "foo_000", Constant_2x3(T(0))); Expect(&reader, "foo_001", Constant_2x3(T(1))); Expect(&reader, "foo_002", Constant_2x3(T(2))); @@ -444,7 +444,7 @@ void TestEndianness() { TF_ASSERT_OK(reader.status()); EXPECT_EQ( AllTensorKeys(&reader), - std::vector({"bar_000", "bar_001", "bar_002", "bar_003"})); + std::vector({"bar_000", "bar_001", "bar_002", "bar_003"})); Expect(&reader, "bar_003", Constant_2x3(T(3))); Expect(&reader, "bar_002", Constant_2x3(T(2))); Expect(&reader, "bar_001", Constant_2x3(T(1))); @@ -468,8 +468,8 @@ void TestEndianness() { TF_ASSERT_OK(reader.status()); EXPECT_EQ( AllTensorKeys(&reader), - std::vector({"bar_000", "bar_001", "bar_002", "bar_003", - "foo_000", "foo_001", "foo_002", "foo_003"})); + std::vector({"bar_000", "bar_001", "bar_002", "bar_003", + "foo_000", "foo_001", "foo_002", "foo_003"})); Expect(&reader, "bar_000", Constant_2x3(T(0))); Expect(&reader, "bar_001", Constant_2x3(T(1))); Expect(&reader, "bar_002", Constant_2x3(T(2))); @@ -519,7 +519,7 @@ void TestNonStandardShapes() { // Writes a bundle to disk with a bad "version"; checks for "expected_error". void VersionTest(const VersionDef& version, absl::string_view expected_error) { - const string path = Prefix("version_test"); + const std::string path = Prefix("version_test"); { // Prepare an empty bundle with the given version information. BundleHeaderProto header; @@ -543,10 +543,10 @@ void VersionTest(const VersionDef& version, absl::string_view expected_error) { TEST(TensorBundleTest, Basic) { TestBasic(); TestBasic(); - TestBasic(); - TestBasic(); - TestBasic(); - TestBasic(); + TestBasic(); + TestBasic(); + TestBasic(); + TestBasic(); TestBasic(); TestBasic(); TestBasic(); @@ -560,10 +560,10 @@ TEST(TensorBundleTest, Basic) { TEST(TensorBundleTest, Endianness) { TestEndianness(); TestEndianness(); - TestEndianness(); - TestEndianness(); - TestEndianness(); - TestEndianness(); + TestEndianness(); + TestEndianness(); + TestEndianness(); + TestEndianness(); TestEndianness(); TestEndianness(); TestEndianness(); @@ -704,10 +704,10 @@ TEST(TensorBundleTest, EquivalentSliceTest) { TEST(TensorBundleTest, NonStandardShapes) { TestNonStandardShapes(); TestNonStandardShapes(); - TestNonStandardShapes(); - TestNonStandardShapes(); - TestNonStandardShapes(); - TestNonStandardShapes(); + TestNonStandardShapes(); + TestNonStandardShapes(); + TestNonStandardShapes(); + TestNonStandardShapes(); TestNonStandardShapes(); TestNonStandardShapes(); TestNonStandardShapes(); @@ -723,15 +723,16 @@ TEST(TensorBundleTest, StringTensorsOldFormat) { // varint32s to store string lengths (we now use varint64s). BundleReader reader(Env::Default(), TestdataPrefix("old_string_tensors/foo")); TF_ASSERT_OK(reader.status()); - EXPECT_EQ(AllTensorKeys(&reader), - std::vector({"floats", "scalar", "string_tensor", "strs"})); + EXPECT_EQ( + AllTensorKeys(&reader), + std::vector({"floats", "scalar", "string_tensor", "strs"})); Expect(&reader, "string_tensor", Tensor(DT_STRING, TensorShape({1}))); Expect(&reader, "scalar", test::AsTensor({"hello"})); Expect( &reader, "strs", - test::AsTensor({"hello", "", "x01", string(1 << 10, 'c')})); + test::AsTensor({"hello", "", "x01", std::string(1 << 10, 'c')})); Expect(&reader, "floats", Constant_2x3(16.18)); } @@ -758,8 +759,8 @@ TEST(TensorBundleTest, StringTensors) { Tensor(DT_STRING, TensorShape({1})))); // Empty. TF_EXPECT_OK(writer.Add("scalar", test::AsTensor({"hello"}))); TF_EXPECT_OK(writer.Add( - "strs", - test::AsTensor({"hello", "", "x01", string(1 << 25, 'c')}))); + "strs", test::AsTensor( + {"hello", "", "x01", std::string(1 << 25, 'c')}))); // Requires a 64-bit length. tstring* backing_string = long_string_tensor.flat().data(); @@ -775,15 +776,15 @@ TEST(TensorBundleTest, StringTensors) { BundleReader reader(Env::Default(), Prefix("foo")); TF_ASSERT_OK(reader.status()); EXPECT_EQ(AllTensorKeys(&reader), - std::vector({"floats", "long_scalar", "scalar", - "string_tensor", "strs"})); + std::vector({"floats", "long_scalar", "scalar", + "string_tensor", "strs"})); Expect(&reader, "string_tensor", Tensor(DT_STRING, TensorShape({1}))); Expect(&reader, "scalar", test::AsTensor({"hello"})); - Expect( - &reader, "strs", - test::AsTensor({"hello", "", "x01", string(1 << 25, 'c')})); + Expect(&reader, "strs", + test::AsTensor( + {"hello", "", "x01", std::string(1 << 25, 'c')})); Expect(&reader, "floats", Constant_2x3(16.18)); @@ -825,10 +826,10 @@ TEST(TensorBundleTest, StringTensors) { class VariantObject { public: VariantObject() {} - VariantObject(const string& metadata, int64_t value) + VariantObject(const std::string& metadata, int64_t value) : metadata_(metadata), value_(value) {} - string TypeName() const { return "TEST VariantObject"; } + std::string TypeName() const { return "TEST VariantObject"; } void Encode(VariantTensorData* data) const { data->set_type_name(TypeName()); data->set_metadata(metadata_); @@ -846,7 +847,7 @@ class VariantObject { bool operator==(const VariantObject other) const { return metadata_ == other.metadata_ && value_ == other.value_; } - string metadata_; + std::string metadata_; int64_t value_; }; @@ -874,8 +875,8 @@ TEST(TensorBundleTest, VariantTensors) { TEST(TensorBundleTest, DirectoryStructure) { Env* env = Env::Default(); // Writes two bundles. - const std::vector kBundlePrefixes = {Prefix("worker0"), - Prefix("worker1")}; + const std::vector kBundlePrefixes = {Prefix("worker0"), + Prefix("worker1")}; for (int i = 0; i < 2; ++i) { BundleWriter writer(env, kBundlePrefixes[i]); TF_EXPECT_OK( @@ -884,10 +885,10 @@ TEST(TensorBundleTest, DirectoryStructure) { } // Ensures we have the expected files. - auto CheckDirFiles = [env](const string& bundle_prefix, - absl::Span expected_files) { + auto CheckDirFiles = [env](const std::string& bundle_prefix, + absl::Span expected_files) { absl::string_view dir = io::Dirname(bundle_prefix); - for (const string& expected_file : expected_files) { + for (const std::string& expected_file : expected_files) { TF_EXPECT_OK(env->FileExists(io::JoinPath(dir, expected_file))); } }; @@ -901,7 +902,7 @@ TEST(TensorBundleTest, DirectoryStructure) { {"worker1.index", "worker1.data-00000-of-00001"}); // Trivially "merge" one bundle to some other location (i.e., a renaming). - const string kAnotherPrefix = Prefix("another"); + const std::string kAnotherPrefix = Prefix("another"); TF_ASSERT_OK(MergeBundles(env, {kBundlePrefixes[0]}, kAnotherPrefix)); CheckDirFiles(kAnotherPrefix, {"another.index", "another.data-00000-of-00001"}); @@ -910,7 +911,7 @@ TEST(TensorBundleTest, DirectoryStructure) { // merged.index // merged.data-00000-of-00002 // merged.data-00001-of-00002 - const string kMerged = Prefix("merged"); + const std::string kMerged = Prefix("merged"); TF_ASSERT_OK( MergeBundles(env, {kAnotherPrefix, kBundlePrefixes[1]}, kMerged)); CheckDirFiles(kMerged, {"merged.index", "merged.data-00000-of-00002", @@ -919,8 +920,8 @@ TEST(TensorBundleTest, DirectoryStructure) { TEST(TensorBundleTest, SortForSequentialAccess) { Env* env = Env::Default(); - const std::vector kBundlePrefixes = {Prefix("worker0"), - Prefix("worker1")}; + const std::vector kBundlePrefixes = {Prefix("worker0"), + Prefix("worker1")}; BundleWriter writer0(env, kBundlePrefixes[0]); for (int i = 0; i < 3; ++i) { TF_EXPECT_OK( @@ -935,7 +936,7 @@ TEST(TensorBundleTest, SortForSequentialAccess) { } TF_ASSERT_OK(writer1.Finish()); - const string kMerged = Prefix("merged"); + const std::string kMerged = Prefix("merged"); TF_ASSERT_OK( MergeBundles(env, {kBundlePrefixes[0], kBundlePrefixes[1]}, kMerged)); @@ -945,10 +946,11 @@ TEST(TensorBundleTest, SortForSequentialAccess) { BundleReader reader(env, kMerged); TF_ASSERT_OK(reader.status()); - std::vector tensor_names = {"tensor-1-0", "tensor-0-1", "tensor-1-2", - "tensor-0-0", "tensor-1-1", "tensor-0-2"}; - TF_ASSERT_OK(reader.SortForSequentialAccess( - tensor_names, [](const string& element) { return element; })); + std::vector tensor_names = {"tensor-1-0", "tensor-0-1", + "tensor-1-2", "tensor-0-0", + "tensor-1-1", "tensor-0-2"}; + TF_ASSERT_OK(reader.SortForSequentialAccess( + tensor_names, [](const std::string& element) { return element; })); EXPECT_THAT(tensor_names, ElementsAre("tensor-0-0", "tensor-0-1", "tensor-0-2", "tensor-1-2", "tensor-1-1", "tensor-1-0")); @@ -976,11 +978,11 @@ TEST(TensorBundleTest, Error) { TEST(TensorBundleTest, Checksum) { // Randomly flips a byte in [pos_lhs, end of data file), or exactly byte // pos_lhs if exact_pos == True. - auto FlipByte = [](const string& prefix, int pos_lhs, + auto FlipByte = [](const std::string& prefix, int pos_lhs, bool exact_pos = false) { DCHECK_GE(pos_lhs, 0); - const string& datafile = DataFilename(Prefix(prefix), 0, 1); - string data; + const std::string& datafile = DataFilename(Prefix(prefix), 0, 1); + std::string data; TF_ASSERT_OK(ReadFileToString(Env::Default(), datafile, &data)); int byte_pos = 0; @@ -995,8 +997,8 @@ TEST(TensorBundleTest, Checksum) { TF_ASSERT_OK(WriteStringToFile(Env::Default(), datafile, data)); }; // The lookup should fail with a checksum-related message. - auto ExpectLookupFails = [](const string& prefix, const string& key, - const string& expected_msg, Tensor& val) { + auto ExpectLookupFails = [](const std::string& prefix, const std::string& key, + const std::string& expected_msg, Tensor& val) { BundleReader reader(Env::Default(), Prefix(prefix)); absl::Status status = reader.Lookup(key, &val); EXPECT_TRUE(absl::IsDataLoss(status)); @@ -1048,8 +1050,8 @@ TEST(TensorBundleTest, TruncatedTensorContents) { TF_ASSERT_OK(writer.Finish()); // Truncates the data file by one byte, so that we hit EOF. - const string datafile = DataFilename(Prefix("end"), 0, 1); - string data; + const std::string datafile = DataFilename(Prefix("end"), 0, 1); + std::string data; TF_ASSERT_OK(ReadFileToString(env, datafile, &data)); ASSERT_TRUE(!data.empty()); TF_ASSERT_OK(WriteStringToFile( @@ -1143,7 +1145,7 @@ TEST(TensorBundleTest, LargeVariableLoadingTest) { TF_ASSERT_OK(reader.status()); EXPECT_EQ( AllTensorKeys(&reader), - std::vector({"foo_000", "foo_001", "foo_002", "foo_003"})); + std::vector({"foo_000", "foo_001", "foo_002", "foo_003"})); Expect(&reader, "foo_000", Constant_100x100(0)); Expect(&reader, "foo_001", Constant_100x100(1)); Expect(&reader, "foo_002", Constant_100x100(2)); @@ -1220,7 +1222,8 @@ TEST(BundleCacheTest, ConcurrentGetFile) { class TensorBundleAlignmentTest : public ::testing::Test { protected: template - void ExpectAlignment(BundleReader* reader, const string& key, int alignment) { + void ExpectAlignment(BundleReader* reader, const std::string& key, + int alignment) { BundleEntryProto full_tensor_entry; TF_ASSERT_OK(reader->GetBundleEntryProto(key, &full_tensor_entry)); EXPECT_EQ(0, full_tensor_entry.offset() % alignment); @@ -1243,7 +1246,7 @@ TEST_F(TensorBundleAlignmentTest, AlignmentTest) { TF_ASSERT_OK(reader.status()); EXPECT_EQ( AllTensorKeys(&reader), - std::vector({"foo_000", "foo_001", "foo_002", "foo_003"})); + std::vector({"foo_000", "foo_001", "foo_002", "foo_003"})); Expect(&reader, "foo_000", Constant_2x3(0)); Expect(&reader, "foo_001", Constant_2x3(1)); Expect(&reader, "foo_002", Constant_2x3(2)); @@ -1298,7 +1301,7 @@ BENCHMARK(BM_BundleAlignment)->ArgPair(4096, 1048576); static void BM_BundleWriterSmallTensor(::testing::benchmark::State& state) { const int64_t bytes = state.range(0); - Tensor t = Constant(static_cast('a'), TensorShape{bytes}); + Tensor t = Constant(static_cast('a'), TensorShape{bytes}); BundleWriter writer(Env::Default(), Prefix("foo")); int suffix = 0; for (auto s : state) { @@ -1311,7 +1314,7 @@ BENCHMARK(BM_BundleWriterSmallTensor)->Range(1, 1 << 20); static void BM_BundleWriterLargeTensor(::testing::benchmark::State& state) { const int mb = state.range(0); const int64_t bytes = static_cast(mb) * (1 << 20); - Tensor t = Constant(static_cast('a'), TensorShape{bytes}); + Tensor t = Constant(static_cast('a'), TensorShape{bytes}); for (auto s : state) { BundleWriter writer(Env::Default(), Prefix("foo")); TF_CHECK_OK(writer.Add("big", t)); diff --git a/tensorflow/dtensor/cc/BUILD b/tensorflow/dtensor/cc/BUILD index 1705ba2425577c..ccdf73f79f15b1 100644 --- a/tensorflow/dtensor/cc/BUILD +++ b/tensorflow/dtensor/cc/BUILD @@ -225,7 +225,6 @@ tf_kernel_library( "dtensor_tpu_kernels.cc", ], tags = [ - "cuda-only", "tpu", ], # Disable building of TPU kernels on non-TPU platforms. deps = [ diff --git a/tensorflow/dtensor/cc/save_restore_util.cc b/tensorflow/dtensor/cc/save_restore_util.cc index dcaf41baf5f1e6..2f8d75cca43fa9 100644 --- a/tensorflow/dtensor/cc/save_restore_util.cc +++ b/tensorflow/dtensor/cc/save_restore_util.cc @@ -156,13 +156,12 @@ SaveOpSpecs BuildPerDeviceSave( shape_and_slice_specs.push_back({}); mlir::Value new_prefix = - builder - .create( - prefix.getLoc(), - mlir::dyn_cast(prefix.getType()), - prefix, - StringScalarConst(builder, prefix.getLoc(), - DeviceSuffix(device_id, total_devices))) + mlir::TF::AddOp::create( + builder, prefix.getLoc(), + mlir::dyn_cast(prefix.getType()), + prefix, + StringScalarConst(builder, prefix.getLoc(), + DeviceSuffix(device_id, total_devices))) .getZ(); // Generate new prefix based on device_id and save op index, only when // we need a new save_op. diff --git a/tensorflow/dtensor/cc/xla_spmd/layout_to_xla_sharding.cc b/tensorflow/dtensor/cc/xla_spmd/layout_to_xla_sharding.cc index 027f53cc3fc3e2..1c612a8f28a4ff 100644 --- a/tensorflow/dtensor/cc/xla_spmd/layout_to_xla_sharding.cc +++ b/tensorflow/dtensor/cc/xla_spmd/layout_to_xla_sharding.cc @@ -133,8 +133,8 @@ StatusOr<::xla::OpSharding> ConvertLayoutToXlaOpSharding(const Layout& layout) { { // Set Tile Assignment Dimensions by handling both partially sharded and // fully sharded. - int32 product_of_sharded_dimensions = 1; - for (int32 dim_size : layout.num_shards()) { + int32_t product_of_sharded_dimensions = 1; + for (int32_t dim_size : layout.num_shards()) { product_of_sharded_dimensions *= dim_size; xla_sharding.add_tile_assignment_dimensions(dim_size); } diff --git a/tensorflow/dtensor/mlir/cluster_function_conversion.cc b/tensorflow/dtensor/mlir/cluster_function_conversion.cc index 2f725e3e9a383f..51107b7adf544c 100644 --- a/tensorflow/dtensor/mlir/cluster_function_conversion.cc +++ b/tensorflow/dtensor/mlir/cluster_function_conversion.cc @@ -142,8 +142,8 @@ mlir::LogicalResult ReplaceClusterWithPartitionCallOp( llvm::StringRef function_name = cluster_func.getFunc(); builder->setInsertionPoint(cluster_func); - auto call_op = builder->create( - cluster_func.getLoc(), output_types, cluster_func.getOperands(), + auto call_op = mlir::TF::StatefulPartitionedCallOp::create( + *builder, cluster_func.getLoc(), output_types, cluster_func.getOperands(), /*args_attrs=*/nullptr, /*res_attrs=*/nullptr, function_name, mesh_attr, /*config_proto=*/builder->getStringAttr(""), /*executor_type=*/builder->getStringAttr("")); diff --git a/tensorflow/dtensor/mlir/collectives.cc b/tensorflow/dtensor/mlir/collectives.cc index b82304c6fd1749..ca4f5b6e8febda 100644 --- a/tensorflow/dtensor/mlir/collectives.cc +++ b/tensorflow/dtensor/mlir/collectives.cc @@ -105,8 +105,8 @@ StatusOr EmitAllGather( mlir::Location loc = DT_LOC2(input.getLoc(), "DTensorAllGatherOp"); mlir::TF::DTensorAllGatherOp all_gather = - builder.create( - loc, output_type, input, + mlir::TF::DTensorAllGatherOp::create( + builder, loc, output_type, input, mlir::dtensor::LayoutAttr::get(builder.getContext(), src_layout), mlir::dtensor::LayoutAttr::get(builder.getContext(), tgt_layout)); SetSingleLayoutOnOp(all_gather, tgt_layout); @@ -153,8 +153,8 @@ StatusOr EmitAllScatter( mlir::Location loc = DT_LOC2(original_value.getLoc(), "DTensorAllScatterOp"); mlir::TF::DTensorAllScatterOp all_scatter = - builder.create( - loc, output_type, original_value, + mlir::TF::DTensorAllScatterOp::create( + builder, loc, output_type, original_value, mlir::dtensor::LayoutAttr::get(builder.getContext(), original_layout), mlir::dtensor::LayoutAttr::get(builder.getContext(), desired_layout)); SetSingleLayoutOnOp(all_scatter, desired_layout); @@ -224,11 +224,10 @@ StatusOr EmitAllToAll( LocalTypeFromGlobalType(tgt_layout, global_type)); mlir::Location loc = DT_LOC2(input.getLoc(), "DTensorAllToAllOp"); - mlir::TF::DTensorAllToAllOp all_to_all = - builder.create( - loc, output_type, input, - mlir::dtensor::LayoutAttr::get(builder.getContext(), src_layout), - mlir::dtensor::LayoutAttr::get(builder.getContext(), tgt_layout)); + mlir::TF::DTensorAllToAllOp all_to_all = mlir::TF::DTensorAllToAllOp::create( + builder, loc, output_type, input, + mlir::dtensor::LayoutAttr::get(builder.getContext(), src_layout), + mlir::dtensor::LayoutAttr::get(builder.getContext(), tgt_layout)); SetSingleLayoutOnOp(all_to_all, tgt_layout); if (newly_created_ops != nullptr) newly_created_ops->insert(all_to_all); @@ -247,20 +246,21 @@ StatusOr EmitDenseToSparseToDense( // values tensor = tf.gather_nd(input, indices) // shape tensor = tf.shape(input) mlir::TF::ZerosLikeOp zeros_like = - builder.create(input.getLoc(), input); - mlir::TF::NotEqualOp not_equal = builder.create( - zeros_like.getLoc(), input, zeros_like, builder.getBoolAttr(false)); + mlir::TF::ZerosLikeOp::create(builder, input.getLoc(), input); + mlir::TF::NotEqualOp not_equal = + mlir::TF::NotEqualOp::create(builder, zeros_like.getLoc(), input, + zeros_like, builder.getBoolAttr(false)); - mlir::TF::WhereOp indices = builder.create( - not_equal.getLoc(), + mlir::TF::WhereOp indices = mlir::TF::WhereOp::create( + builder, not_equal.getLoc(), mlir::RankedTensorType::get(GetShapeOfValue(not_equal).value(), builder.getI64Type()), not_equal); - mlir::TF::GatherNdOp values = builder.create( - input.getLoc(), input.getType(), input, indices); - auto shape = builder.create(input.getLoc(), input, - builder.getBoolAttr(false)); + mlir::TF::GatherNdOp values = mlir::TF::GatherNdOp::create( + builder, input.getLoc(), input.getType(), input, indices); + auto shape = mlir::TF::ShapeOp::create(builder, input.getLoc(), input, + builder.getBoolAttr(false)); // Emit a SparseToDenseOp and replace the SparseTensor with the result of // this new op. @@ -270,8 +270,8 @@ StatusOr EmitDenseToSparseToDense( builder, input.getLoc(), mlir::cast(input.getType()).getElementType())); - auto dense = builder.create( - input.getLoc(), input.getType(), + auto dense = mlir::TF::SparseToDenseOp::create( + builder, input.getLoc(), input.getType(), mlir::ValueRange({indices, shape, values, zero_scalar})); if (newly_created_ops != nullptr) { @@ -310,8 +310,8 @@ StatusOr EmitRelayout( // If two layouts are the same, or the only difference is layout type, then // there is no need to actually relayout data. if (src_layout.IsEquivalentIgnoringType(tgt_layout)) { - mlir::TF::IdentityOp op = builder.create( - input.getLoc(), input.getType(), input); + mlir::TF::IdentityOp op = mlir::TF::IdentityOp::create( + builder, input.getLoc(), input.getType(), input); if (newly_created_ops != nullptr) newly_created_ops->insert(op); return op.getOutput(); } @@ -405,7 +405,7 @@ mlir::Operation* EmitTransposeOp(mlir::OpBuilder& builder, auto constant_attr = builder.getI64TensorAttr(perm_arr); auto perm_op = - builder.create(loc, perm_type, constant_attr); + mlir::TF::ConstOp::create(builder, loc, perm_type, constant_attr); std::vector transposed_shape(shape.begin(), shape.end()); for (int i = 0; i < shape.size(); i++) { @@ -414,8 +414,8 @@ mlir::Operation* EmitTransposeOp(mlir::OpBuilder& builder, auto transposed_type = mlir::RankedTensorType::get( transposed_shape, tr_input_type.getElementType()); - return builder.create(loc, transposed_type, input, - perm_op); + return mlir::TF::TransposeOp::create(builder, loc, transposed_type, input, + perm_op); } StatusOr EmitBarrierWithConstValue(mlir::OpBuilder& builder, @@ -470,10 +470,10 @@ StatusOr EmitAllReduce( DeviceTypeFromMesh(output_layout.mesh())); mlir::Location loc = DT_LOC2(input->getLoc(), "DTensorAllReduceOp"); - auto all_reduce = builder.create( - loc, input->getResultTypes()[0], input->getOpResult(0), - builder.create(DT_LOC2(loc, "group_assignment"), - group_assignment), + auto all_reduce = mlir::TF::DTensorAllReduceOp::create( + builder, loc, input->getResultTypes()[0], input->getOpResult(0), + mlir::TF::ConstOp::create(builder, DT_LOC2(loc, "group_assignment"), + group_assignment), builder.getStringAttr(std::string(reduce_op)), builder.getStringAttr(device_type)); SetSingleLayoutOnOp(all_reduce, output_layout); @@ -575,7 +575,7 @@ StatusOr CreateConstSrcTargetPair(const Mesh& mesh, auto src_target_attr = mlir::DenseIntElementsAttr::get(shaped_type, src_target_pair_flat); mlir::Value src_target_pair_tensor = - builder.create(location, src_target_attr); + mlir::TF::ConstOp::create(builder, location, src_target_attr); return src_target_pair_tensor; } @@ -636,13 +636,14 @@ StatusOr EmitHaloExchange(mlir::OpBuilder& builder, int halo_size, // // For example, if mesh dimension splits the input tensor by its height // dimension, then `left` actually means tensor to pad on the top side. - mlir::Value is_on_left_edge = builder.create( - location, CreateIntScalarConst(0, builder, location, /*use_int64=*/false), + mlir::Value is_on_left_edge = mlir::TF::EqualOp::create( + builder, location, + CreateIntScalarConst(0, builder, location, /*use_int64=*/false), scalar_mesh_coordinate, builder.getBoolAttr(true)); TF_ASSIGN_OR_RETURN(const int mesh_dim_size, mesh.dim_size(mesh_dim)); - mlir::Value is_on_right_edge = builder.create( - location, + mlir::Value is_on_right_edge = mlir::TF::EqualOp::create( + builder, location, CreateIntScalarConst(mesh_dim_size - 1, builder, location, /*use_int64=*/false), scalar_mesh_coordinate, builder.getBoolAttr(true)); @@ -663,7 +664,7 @@ StatusOr EmitHaloExchange(mlir::OpBuilder& builder, int halo_size, } mlir::Value ghost_tensor_left = - builder.create(location, const_attr).getResult(); + mlir::TF::ConstOp::create(builder, location, const_attr).getResult(); // Get the right side slice of the input tensor to pad on left side. llvm::SmallVector begin_left(layout.rank(), 0); @@ -676,11 +677,13 @@ StatusOr EmitHaloExchange(mlir::OpBuilder& builder, int halo_size, size[split_dim_index] = halo_size; mlir::Value size_tensor_left = ops_util::GetR1Const(size, builder, location); - mlir::Value sliced_tensor_left = builder.create( - location, halo_type, tensor, begin_tensor_left, size_tensor_left); + mlir::Value sliced_tensor_left = + mlir::TF::SliceOp::create(builder, location, halo_type, tensor, + begin_tensor_left, size_tensor_left); - mlir::Value halo_tensor_left = builder.create( - location, is_on_right_edge, ghost_tensor_left, sliced_tensor_left); + mlir::Value halo_tensor_left = + mlir::TF::SelectV2Op::create(builder, location, is_on_right_edge, + ghost_tensor_left, sliced_tensor_left); // Invoke collective permute to receive the tensor from neighboring processor. // Halo slices from the left neighbor are received on each processor (they @@ -690,12 +693,12 @@ StatusOr EmitHaloExchange(mlir::OpBuilder& builder, int halo_size, CreateConstSrcTargetPair(mesh, mesh_dim, /*shift_left=*/false, location, builder)); - mlir::Value left_concat_value = builder.create( - location, sliced_tensor_left.getType(), halo_tensor_left, + mlir::Value left_concat_value = mlir::TF::CollectivePermuteOp::create( + builder, location, sliced_tensor_left.getType(), halo_tensor_left, src_target_pair_left); mlir::Value ghost_tensor_right = - builder.create(location, const_attr).getResult(); + mlir::TF::ConstOp::create(builder, location, const_attr).getResult(); // Else, values to pad is tensor from different processor. We use collective // permute to access tensor slice from another device. @@ -704,13 +707,15 @@ StatusOr EmitHaloExchange(mlir::OpBuilder& builder, int halo_size, mlir::Value begin_tensor_right = ops_util::GetR1Const(begin_right, builder, location); mlir::Value size_tensor_right = ops_util::GetR1Const(size, builder, location); - mlir::Value sliced_tensor_right = builder.create( - location, halo_type, tensor, begin_tensor_right, size_tensor_right); + mlir::Value sliced_tensor_right = + mlir::TF::SliceOp::create(builder, location, halo_type, tensor, + begin_tensor_right, size_tensor_right); // Find the halo tensor value to pad on the `right` side. // If input block is on the right edge, we use zero ghost tensor instead. - mlir::Value halo_tensor_right = builder.create( - location, is_on_left_edge, ghost_tensor_right, sliced_tensor_right); + mlir::Value halo_tensor_right = + mlir::TF::SelectV2Op::create(builder, location, is_on_left_edge, + ghost_tensor_right, sliced_tensor_right); // Invoke collective permute to receive the tensor from neighboring processor. // Halo slices from the right neighbor are received on each processor (they @@ -719,10 +724,9 @@ StatusOr EmitHaloExchange(mlir::OpBuilder& builder, int halo_size, mlir::Value src_target_pair_right, CreateConstSrcTargetPair(mesh, mesh_dim, /*shift_left=*/true, location, builder)); - mlir::Value right_concat_value = - builder.create( - location, sliced_tensor_right.getType(), halo_tensor_right, - src_target_pair_right); + mlir::Value right_concat_value = mlir::TF::CollectivePermuteOp::create( + builder, location, sliced_tensor_right.getType(), halo_tensor_right, + src_target_pair_right); // Final halo exchanged value is concatenated value of left_concat_value, // tensor, and right_concat_value in the mesh_dimension. @@ -734,8 +738,8 @@ StatusOr EmitHaloExchange(mlir::OpBuilder& builder, int halo_size, final_shape, input_tensor_type.getElementType()); mlir::Value concat_axis = CreateIntScalarConst(split_dim_index, builder, location); - mlir::Value final_value = builder.create( - location, final_type, + mlir::Value final_value = mlir::TF::ConcatV2Op::create( + builder, location, final_type, llvm::SmallVector{left_concat_value, tensor, right_concat_value}, concat_axis); diff --git a/tensorflow/dtensor/mlir/collectives.h b/tensorflow/dtensor/mlir/collectives.h index fc0f8f0203d68a..101e944b84d813 100644 --- a/tensorflow/dtensor/mlir/collectives.h +++ b/tensorflow/dtensor/mlir/collectives.h @@ -84,7 +84,7 @@ StatusOr EmitAllReduce( StatusOr EmitBarrierWithConstValue(mlir::OpBuilder& builder, mlir::Location loc, const Mesh& mesh, - int32 value); + int32_t value); // Given input `tensor` that is sharded across spatial dimensions, conduct // halo exchange such that each spatially sharded input blocks exchange diff --git a/tensorflow/dtensor/mlir/collectives_common.cc b/tensorflow/dtensor/mlir/collectives_common.cc index fcda6c26d51988..37bdd53366af82 100644 --- a/tensorflow/dtensor/mlir/collectives_common.cc +++ b/tensorflow/dtensor/mlir/collectives_common.cc @@ -38,7 +38,7 @@ namespace dtensor { // a multi-host cluster will generate the same grouping, and therefore the same // XLA program fingerprint, independently. std::map guarantees the same // iteration order. -using AllReducePartitions = std::map>; +using AllReducePartitions = std::map>; // Computes AllReduce partitions using reduced mesh dimension names. // @@ -60,11 +60,11 @@ StatusOr GetAllReducePartitionsFromReducedDims( const dtensor::Layout& output_layout, const absl::flat_hash_set& reduced_dims) { AllReducePartitions partitions; - for (int64 device = 0; device < output_layout.num_devices(); ++device) { + for (int64_t device = 0; device < output_layout.num_devices(); ++device) { TF_ASSIGN_OR_RETURN(const DeviceLocation device_loc, output_layout.mesh().device_location(device)); DeviceLocation kept_dims; - for (int64 dim_idx = 0; dim_idx < device_loc.size(); ++dim_idx) { + for (int64_t dim_idx = 0; dim_idx < device_loc.size(); ++dim_idx) { if (!reduced_dims.contains(output_layout.mesh().dim_name(dim_idx))) { kept_dims.push_back(device_loc[dim_idx]); } diff --git a/tensorflow/dtensor/mlir/collectives_common.h b/tensorflow/dtensor/mlir/collectives_common.h index 6041eb4501de3f..fe8688ebc673af 100644 --- a/tensorflow/dtensor/mlir/collectives_common.h +++ b/tensorflow/dtensor/mlir/collectives_common.h @@ -29,7 +29,7 @@ namespace tensorflow { namespace dtensor { // Computes AllReduce partitions using reduced mesh dimension names. -StatusOr>> +StatusOr>> GetAllReducePartitionsFromReducedDims( const dtensor::Layout& output_layout, const absl::flat_hash_set& reduced_dims); diff --git a/tensorflow/dtensor/mlir/device_mesh_cluster_coarsening.cc b/tensorflow/dtensor/mlir/device_mesh_cluster_coarsening.cc index 4c45da0110c7b0..3b0e959ee32979 100644 --- a/tensorflow/dtensor/mlir/device_mesh_cluster_coarsening.cc +++ b/tensorflow/dtensor/mlir/device_mesh_cluster_coarsening.cc @@ -194,8 +194,8 @@ mlir::LogicalResult CreateMergedMeshCluster( output_values_to_replace.emplace_back(std::get<1>(cluster_return_value)); } - *merged_cluster = builder->create( - current_cluster.getLoc(), merged_cluster_output_types); + *merged_cluster = mlir::tf_device::ClusterOp::create( + *builder, current_cluster.getLoc(), merged_cluster_output_types); auto mesh_attr = current_cluster->getAttrOfType(kMeshAttr); if (!mesh_attr) return current_cluster.emitOpError(kMissingMeshAttributeErrorMessage); @@ -206,8 +206,8 @@ mlir::LogicalResult CreateMergedMeshCluster( // `current_cluster` and `merging_cluster`. merged_cluster->getBody().push_back(new mlir::Block); builder->setInsertionPointToEnd(&merged_cluster->GetBody()); - builder->create(merged_cluster->getLoc(), - merged_cluster_output_values); + mlir::tf_device::ReturnOp::create(*builder, merged_cluster->getLoc(), + merged_cluster_output_values); // Make sure to replace usages of tf_device.cluster ops to be merged-away with // newly created tf_device.cluster op. diff --git a/tensorflow/dtensor/mlir/dtensor_allreduce_combine_optimization.cc b/tensorflow/dtensor/mlir/dtensor_allreduce_combine_optimization.cc index 9261255c304033..09b53ae4b72895 100644 --- a/tensorflow/dtensor/mlir/dtensor_allreduce_combine_optimization.cc +++ b/tensorflow/dtensor/mlir/dtensor_allreduce_combine_optimization.cc @@ -72,7 +72,7 @@ namespace ops_util = ::mlir::TF::collection_ops_util; // Pad the merged tensor shape to multiples of 1024B, so delinearization // skipping optimization in XLA can get activated. -constexpr int32 kAllReducePadding = 1024; +constexpr int32_t kAllReducePadding = 1024; // Returns true if `successor` depends on `predecessor`. // TODO(jiawenhao): Repeatedly computing dependency sets for a large cluster can @@ -151,10 +151,10 @@ mlir::LogicalResult MergeAllReduceGroup( mlir::Location loc = all_reduce_group[0].getLoc(); mlir::Type elem_type = all_reduce_group[0].getType().getElementType(); auto zero_scalar = ops_util::CreateScalarConst(0, builder, loc); - auto zero_scalar_elem_type = builder.create( - loc, mlir::RankedTensorType::get({}, elem_type), zero_scalar); - auto merged = builder.create( - loc, ops_util::GetR1Const({total_num_elements}, builder, loc), + auto zero_scalar_elem_type = mlir::TF::CastOp::create( + builder, loc, mlir::RankedTensorType::get({}, elem_type), zero_scalar); + auto merged = mlir::TF::FillOp::create( + builder, loc, ops_util::GetR1Const({total_num_elements}, builder, loc), zero_scalar_elem_type); // Store every all-reduce's input at an offset location in the merged tensor, @@ -175,23 +175,23 @@ mlir::LogicalResult MergeAllReduceGroup( } int num_elements = all_reduce_ranked_type.getNumElements(); - auto flattened = builder.create( - DT_LOC2(loc, "CombinedReduceFlatten"), all_reduce.getInput(), + auto flattened = mlir::TF::ReshapeOp::create( + builder, DT_LOC2(loc, "CombinedReduceFlatten"), all_reduce.getInput(), ops_util::GetR1Const({num_elements}, builder, loc)); flattened_types.push_back(flattened.getType()); auto indices = ops_util::GetR1Const({offset_num_elements}, builder, loc); if (all_reduce.getDeviceType().contains("TPU")) { - updated = builder.create( - DT_LOC2(loc, "CombinedReduceUpdateSlice"), merged.getType(), + updated = mlir::TF::XlaDynamicUpdateSliceOp::create( + builder, DT_LOC2(loc, "CombinedReduceUpdateSlice"), merged.getType(), /*input=*/i == 0 ? merged.getResult() : updated, /*update=*/flattened, indices); } else { auto end = ops_util::GetR1Const({offset_num_elements + num_elements}, builder, loc); auto strides = ops_util::GetR1Const({1}, builder, loc); - updated = builder.create( - DT_LOC2(loc, "CombinedReduceUpdateSlice"), merged.getType(), + updated = mlir::TF::TensorStridedSliceUpdateOp::create( + builder, DT_LOC2(loc, "CombinedReduceUpdateSlice"), merged.getType(), /*input=*/i == 0 ? merged.getResult() : updated, indices, end, strides, /*value=*/flattened); @@ -200,8 +200,8 @@ mlir::LogicalResult MergeAllReduceGroup( } // All-reduce the updated merged tensor. - auto merged_all_reduce = builder.create( - all_reduce_group[0].getLoc(), updated.getType(), updated, + auto merged_all_reduce = mlir::TF::DTensorAllReduceOp::create( + builder, all_reduce_group[0].getLoc(), updated.getType(), updated, all_reduce_group[0].getGroupAssignment(), all_reduce_group[0].getReduceOp(), all_reduce_group[0].getDeviceType()); SetSingleLayoutOnOp( @@ -223,13 +223,13 @@ mlir::LogicalResult MergeAllReduceGroup( all_reduce_ranked_type)); } int num_elements = all_reduce_ranked_type.getNumElements(); - auto slice = builder.create( - DT_LOC2(loc, "PostCombinedReduceSlice"), flattened_types[i], + auto slice = mlir::TF::SliceOp::create( + builder, DT_LOC2(loc, "PostCombinedReduceSlice"), flattened_types[i], /*input=*/merged_all_reduce, /*begin=*/ops_util::GetR1Const({offset_num_elements}, builder, loc), /*size=*/ops_util::GetR1Const({num_elements}, builder, loc)); - auto replacement = builder.create( - DT_LOC2(loc, "PostCombinedReduceReshape"), slice.getResult(), + auto replacement = mlir::TF::ReshapeOp::create( + builder, DT_LOC2(loc, "PostCombinedReduceReshape"), slice.getResult(), ops_util::GetR1Const(all_reduce_shapes[i], builder, loc)); replacements.push_back(replacement); offset_num_elements += num_elements; diff --git a/tensorflow/dtensor/mlir/dtensor_allreduce_scatter_optimization.cc b/tensorflow/dtensor/mlir/dtensor_allreduce_scatter_optimization.cc index 682af5ae92b021..5721d03ce2c343 100644 --- a/tensorflow/dtensor/mlir/dtensor_allreduce_scatter_optimization.cc +++ b/tensorflow/dtensor/mlir/dtensor_allreduce_scatter_optimization.cc @@ -64,16 +64,16 @@ mlir::DenseIntElementsAttr GetScatterGroupAssignment( auto partitions = GetAllReducePartitionsFromReducedDims(original_layout, scattered_dims) .value(); - const int32 num_partitions = partitions.size(); + const int32_t num_partitions = partitions.size(); // Construct a flattened list of scatter partitions. - std::vector partitions_flat; + std::vector partitions_flat; for (auto& p : partitions) { partitions_flat.insert(partitions_flat.end(), p.second.begin(), p.second.end()); } - int32 partition_size = partitions.begin()->second.size(); + int32_t partition_size = partitions.begin()->second.size(); mlir::OpBuilder builder(all_scatter); auto group_shaped_type = mlir::RankedTensorType::get( {num_partitions, partition_size}, @@ -137,14 +137,14 @@ mlir::LogicalResult ApplyOptimization(mlir::func::FuncOp function) { VLOG(2) << "Fuse reduce scatter with scatter_dim: " << scatter_dim; mlir::OpBuilder builder(all_reduce); - auto scatter_dim_const_op = builder.create( - all_reduce.getLoc(), + auto scatter_dim_const_op = mlir::TF::ConstOp::create( + builder, all_reduce.getLoc(), mlir::DenseIntElementsAttr::get( mlir::RankedTensorType::get({}, builder.getI32Type()), {scatter_dim})); - auto reduce_scatter = builder.create( - all_reduce.getLoc(), all_scatter->getResultTypes(), + auto reduce_scatter = mlir::TF::DTensorReduceScatterOp::create( + builder, all_reduce.getLoc(), all_scatter->getResultTypes(), all_reduce.getOperand(0), all_reduce.getGroupAssignment(), scatter_dim_const_op, all_reduce.getReduceOp(), all_reduce.getDeviceType()); diff --git a/tensorflow/dtensor/mlir/dtensor_allreduce_sum_optimization.cc b/tensorflow/dtensor/mlir/dtensor_allreduce_sum_optimization.cc index 0a7a232290b8a7..e8a2fde042ae62 100644 --- a/tensorflow/dtensor/mlir/dtensor_allreduce_sum_optimization.cc +++ b/tensorflow/dtensor/mlir/dtensor_allreduce_sum_optimization.cc @@ -160,8 +160,8 @@ mlir::LogicalResult OptimizeAllReduceAndSum(mlir::Operation* op, mlir::OpBuilder builder(op); builder.setInsertionPointAfterValue(op->getResult(0)); mlir::TF::DTensorAllReduceOp all_reduce = - builder.create( - op->getLoc(), op->getResult(0).getType(), op->getResult(0), + mlir::TF::DTensorAllReduceOp::create( + builder, op->getLoc(), op->getResult(0).getType(), op->getResult(0), group_assignment, builder.getStringAttr(std::string(kReduceOpAdd)), builder.getStringAttr(first_reduction_op.getDeviceType())); @@ -394,8 +394,8 @@ mlir::LogicalResult ExtractAllReduceFromWhileOp( // Create a singe reduction operation that reduces the result of the locally // added tensor. - auto new_all_reduce = builder.create( - all_reduce.getLoc(), while_output.getType(), while_output, + auto new_all_reduce = mlir::TF::DTensorAllReduceOp::create( + builder, all_reduce.getLoc(), while_output.getType(), while_output, cloned_group_assignment->getResult(0), builder.getStringAttr(std::string(kReduceOpAdd)), builder.getStringAttr(all_reduce.getDeviceType())); diff --git a/tensorflow/dtensor/mlir/dtensor_layout_to_xla_sharding_op.cc b/tensorflow/dtensor/mlir/dtensor_layout_to_xla_sharding_op.cc index 6cc0a14cb1eefd..457cec03a0e1ca 100644 --- a/tensorflow/dtensor/mlir/dtensor_layout_to_xla_sharding_op.cc +++ b/tensorflow/dtensor/mlir/dtensor_layout_to_xla_sharding_op.cc @@ -112,8 +112,8 @@ void DTensorLayoutToXlaShardingOpPass::runOnOperation() { // the V1 sharding attr, so set V2 sharding to "" here. It may be better // to set the V2 sharding attr here and then removed it when V1 is // removed. - auto sharding_op = builder.create( - layout_op.getLoc(), layout_op.getOutput().getType(), + auto sharding_op = mlir::TF::XlaShardingOp::create( + builder, layout_op.getLoc(), layout_op.getOutput().getType(), layout_op.getInput(), /*sharding=*/builder.getStringAttr(""), // Not used by tf2xla. /*_xlaSharding=*/sharding_attr, diff --git a/tensorflow/dtensor/mlir/dtensor_mixed_precision_reduce.cc b/tensorflow/dtensor/mlir/dtensor_mixed_precision_reduce.cc index f563dceb065671..c0f066483451fe 100644 --- a/tensorflow/dtensor/mlir/dtensor_mixed_precision_reduce.cc +++ b/tensorflow/dtensor/mlir/dtensor_mixed_precision_reduce.cc @@ -44,7 +44,7 @@ namespace { // the list of devices that are a part of the same reduction group. template mlir::LogicalResult GetAllReduceGroupSize(ReduceOpType reduce_op, - int32* group_size) { + int32_t* group_size) { mlir::DenseIntElementsAttr group_assignment_attr; if (!matchPattern(reduce_op.getGroupAssignment(), m_Constant(&group_assignment_attr))) @@ -80,7 +80,7 @@ mlir::LogicalResult MaybeUpcastForReduction(ReduceOpType reduce_op, mlir::OpBuilder builder(reduce_op); const mlir::Location loc = reduce_op.getLoc(); - int32 group_size; + int32_t group_size; if (mlir::failed(GetAllReduceGroupSize(reduce_op, &group_size))) return mlir::failure(); if (group_size <= ReduceInBfloat16MaxGroupSize()) @@ -98,16 +98,16 @@ mlir::LogicalResult MaybeUpcastForReduction(ReduceOpType reduce_op, const mlir::RankedTensorType& output_type = mlir::dyn_cast(reduce_op.getOutput().getType()); - mlir::TF::CastOp upcast = builder.create( - loc, + mlir::TF::CastOp upcast = mlir::TF::CastOp::create( + builder, loc, mlir::RankedTensorType::get(input_type.getShape(), builder.getF32Type()), reduce_op.getInput()); reduce_op->setOperand(0, upcast.getY()); reduce_op.getOutput().setType(upcast.getY().getType()); builder.setInsertionPointAfter(reduce_op); - mlir::TF::CastOp downcast = builder.create( - loc, + mlir::TF::CastOp downcast = mlir::TF::CastOp::create( + builder, loc, mlir::RankedTensorType::get(output_type.getShape(), output_type.getElementType()), reduce_op); diff --git a/tensorflow/dtensor/mlir/dtensor_replace_relayout_with_identity.cc b/tensorflow/dtensor/mlir/dtensor_replace_relayout_with_identity.cc index d5e957e19050d2..1b320bcfc100ab 100644 --- a/tensorflow/dtensor/mlir/dtensor_replace_relayout_with_identity.cc +++ b/tensorflow/dtensor/mlir/dtensor_replace_relayout_with_identity.cc @@ -37,9 +37,9 @@ class DTensorReplaceRelayoutWithIdentityPass mlir::OpBuilder builder(relayout_op); // Inserts an IdentityOp at the position of the relayout_op with the same // attributes as the relayout_op. - auto new_identity = builder.create( - relayout_op->getLoc(), relayout_op.getType(), relayout_op.getInput(), - relayout_op->getAttrs()); + auto new_identity = mlir::TF::IdentityOp::create( + builder, relayout_op->getLoc(), relayout_op.getType(), + relayout_op.getInput(), relayout_op->getAttrs()); relayout_op.getOutput().replaceAllUsesWith(new_identity.getOutput()); relayout_op.erase(); }); diff --git a/tensorflow/dtensor/mlir/dtensor_send_recv.cc b/tensorflow/dtensor/mlir/dtensor_send_recv.cc index c728725dbaf073..fa6d2bd041189f 100644 --- a/tensorflow/dtensor/mlir/dtensor_send_recv.cc +++ b/tensorflow/dtensor/mlir/dtensor_send_recv.cc @@ -85,8 +85,8 @@ mlir::Value GetOrCreateCompilationKey(mlir::Operation* op) { auto result_type = mlir::RankedTensorType::get({3}, builder.getType()); auto new_compilation_key = - builder.create( - cluster.getLoc(), /*program=*/result_type, + mlir::TF::_XlaCompileMlirPlaceholderProgramKeyOp::create( + builder, cluster.getLoc(), /*program=*/result_type, llvm::ArrayRef{}); return new_compilation_key.getProgram(); } @@ -107,8 +107,8 @@ StatusOr GetDeviceOrdinal(const Mesh& mesh, } // Slice out the device ordinal using the device ID as index. TF_ASSIGN_OR_RETURN(mlir::Value device_id, DeviceId(function)); - mlir::TF::SliceOp device_ordinal = builder->create( - loc, + mlir::TF::SliceOp device_ordinal = mlir::TF::SliceOp::create( + *builder, loc, /*output=*/EffectivelyScalarR1Type(builder->getIntegerType(32)), /*input=*/IntConst(*builder, loc, device_id_to_ordinal), /*begin=*/ @@ -118,8 +118,8 @@ StatusOr GetDeviceOrdinal(const Mesh& mesh, mlir::Value device_ordinal_scalar = ReshapeSizeTypeToScalar(*builder, loc, device_ordinal); if (return_int64_type) { - device_ordinal_scalar = builder->create( - loc, mlir::RankedTensorType::get({}, builder->getI64Type()), + device_ordinal_scalar = mlir::TF::CastOp::create( + *builder, loc, mlir::RankedTensorType::get({}, builder->getI64Type()), device_ordinal_scalar); } return device_ordinal_scalar; @@ -138,8 +138,8 @@ StatusOr LowerDTensorSendToTFOp( absl::Span receiving_devices = target_mesh.local_devices(); mlir::Operation* lowered_send_op; - lowered_send_op = builder.create( - send_input.getLoc(), send_input, tensor_name, sending_devices[0], + lowered_send_op = mlir::TF::_HostSendOp::create( + builder, send_input.getLoc(), send_input, tensor_name, sending_devices[0], /*send_device_incarnation=*/0, receiving_devices[0], /*client_terminated=*/false); @@ -184,12 +184,13 @@ StatusOr LowerDTensorSendToXlaOp( GetDeviceOrdinal(send_input_layout.mesh(), loc, send_func, &builder)); } // Create XlaSendFromHostV2 op - lowered_send_op = builder.create( - loc, value_to_send, program_key, device_ordinal, dtensor_send.getKey()); + lowered_send_op = mlir::TF::_XlaSendFromHostV2Op::create( + builder, loc, value_to_send, program_key, device_ordinal, + dtensor_send.getKey()); } else { // Note that for ops running in XLA/TPU, device ordinal input is not needed. - lowered_send_op = builder.create( - loc, send_input, dtensor_send.getKey()); + lowered_send_op = mlir::TF::XlaSendToHostOp::create( + builder, loc, send_input, dtensor_send.getKey()); } dtensor_send.erase(); @@ -246,16 +247,16 @@ StatusOr LowerDTensorRecvToXlaOp( auto program_key = GetOrCreateCompilationKey(dtensor_recv); builder.setInsertionPoint(dtensor_recv); - recv_xla_op = builder.create( - dtensor_recv.getLoc(), output_types, + recv_xla_op = mlir::TF::_XlaRecvAtHostV2Op::create( + builder, dtensor_recv.getLoc(), output_types, /*dynamic_key=*/program_key, device_ordinal, dtensor_recv.getKeyAttr()); } else { TF_ASSIGN_OR_RETURN(auto local_shape_attr, GetDTensorRecvLocalShapeAttr(dtensor_recv)); // Create XlaRecvFromHost op. - recv_xla_op = builder.create( - dtensor_recv.getLoc(), output_type, local_shape_attr, + recv_xla_op = mlir::TF::XlaRecvFromHostOp::create( + builder, dtensor_recv.getLoc(), output_type, local_shape_attr, dtensor_recv.getKeyAttr()); } @@ -299,8 +300,8 @@ StatusOr LowerDTensorSendFromCPUToTFOp( mlir::Operation* lowered_send_op; for (size_t i = 0; i < receiving_devices.size(); ++i) - lowered_send_op = builder.create( - send_input.getLoc(), dtensor_send.getInput(), tensor_name, + lowered_send_op = mlir::TF::_HostSendOp::create( + builder, send_input.getLoc(), dtensor_send.getInput(), tensor_name, sending_devices[0], /*send_device_incarnation=*/0, receiving_devices[i]); @@ -326,8 +327,8 @@ StatusOr LowerDTensorRecvFromCPUToTFOp( mlir::Operation* lowered_recv_op; mlir::Location loc = dtensor_recv.getLoc(); for (size_t i = 0; i < receiving_devices.size(); ++i) - lowered_recv_op = builder.create( - loc, dtensor_recv.getType(), tensor_name, sending_devices[0], + lowered_recv_op = mlir::TF::_HostRecvOp::create( + builder, loc, dtensor_recv.getType(), tensor_name, sending_devices[0], /*send_device_incarnation=*/0, receiving_devices[i]); // Replace dtensor_recv with newly created recv op and remove DTensorRecv op. @@ -351,8 +352,8 @@ StatusOr LowerDTensorRecvToTFOp( absl::Span receiving_devices = recv_mesh.local_devices(); mlir::Location loc = dtensor_recv.getLoc(); - mlir::Operation* lowered_recv_op = builder.create( - loc, output_type, tensor_name, sending_devices[0], + mlir::Operation* lowered_recv_op = mlir::TF::_HostRecvOp::create( + builder, loc, output_type, tensor_name, sending_devices[0], /*send_device_incarnation=*/0, receiving_devices[0]); return lowered_recv_op; @@ -385,7 +386,7 @@ llvm::SmallVector GenerateBranches( ? func_op.getArgument(0) : mlir::BlockArgument{}; auto branch_op = fn(fn_builder, location, arg, it.value()); - fn_builder.create(location, branch_op->getResults()); + mlir::func::ReturnOp::create(fn_builder, location, branch_op->getResults()); branches.push_back(mlir::SymbolRefAttr::get(func_op)); } @@ -429,25 +430,24 @@ StatusOr LowerOneToOneDTensorSendToTFHostSend( mlir::Value val = arg; if (i32_copy) { auto val_type = mlir::cast(val.getType()); - val = op_builder - .create( - loc, - mlir::RankedTensorType::get( - val_type.getShape(), op_builder.getIntegerType(64)), - val) + val = mlir::TF::CastOp::create( + op_builder, loc, + mlir::RankedTensorType::get(val_type.getShape(), + op_builder.getIntegerType(64)), + val) ->getResult(0); } - return op_builder.create( - loc, val, tensor_name, std::get<0>(device_pair), + return mlir::TF::_HostSendOp::create( + op_builder, loc, val, tensor_name, std::get<0>(device_pair), /*send_device_incarnation=*/0, std::get<1>(device_pair)); }); - mlir::Operation* case_op = builder.create( - dtensor_send.getLoc(), - /*output=*/llvm::ArrayRef{}, - /*branch_index=*/device_ordinal, - /*input=*/dtensor_send->getOperands(), - /*branches=*/builder.getArrayAttr(branches), - /*is_stateless=*/builder.getBoolAttr(false)); + mlir::Operation* case_op = + mlir::TF::CaseOp::create(builder, dtensor_send.getLoc(), + /*output=*/llvm::ArrayRef{}, + /*branch_index=*/device_ordinal, + /*input=*/dtensor_send->getOperands(), + /*branches=*/builder.getArrayAttr(branches), + /*is_stateless=*/builder.getBoolAttr(false)); // erase the send op here iff targeting a gpu // otherwise there will be 'op not within cluster' error(s) @@ -494,14 +494,15 @@ StatusOr LowerOneToOneDTensorRecvToTFHostRecv( "{0}_receive_{1}_{2}", device_pairs, [&](mlir::OpBuilder& op_builder, auto& loc, auto _, auto device_pair) -> mlir::Operation* { - auto recv_op = op_builder.create( - loc, local_output_type, tensor_name, std::get<0>(device_pair), + auto recv_op = mlir::TF::_HostRecvOp::create( + op_builder, loc, local_output_type, tensor_name, + std::get<0>(device_pair), /*send_device_incarnation=*/0, std::get<1>(device_pair)); SetSingleLayoutOnOp(recv_op, recv_layout); return recv_op; }); - mlir::Operation* case_op = builder.create( - dtensor_recv.getLoc(), + mlir::Operation* case_op = mlir::TF::CaseOp::create( + builder, dtensor_recv.getLoc(), /*output=*/llvm::ArrayRef{local_output_type}, /*branch_index=*/device_ordinal, /*input=*/dtensor_recv->getOperands(), @@ -510,8 +511,8 @@ StatusOr LowerOneToOneDTensorRecvToTFHostRecv( mlir::Operation* lowered_recv; if (i32_copy) { - lowered_recv = builder.create( - dtensor_recv.getLoc(), local_recv_type, case_op->getResult(0)); + lowered_recv = mlir::TF::CastOp::create( + builder, dtensor_recv.getLoc(), local_recv_type, case_op->getResult(0)); } else { lowered_recv = case_op; } @@ -639,12 +640,12 @@ StatusOr LowerDTensorSend(mlir::Operation* send_op, GetDeviceOrdinal(*mesh, loc, send_cluster->getParentOfType(), &builder)); - mlir::Value predicate = builder.create( - loc, device_ordinal, CreateIntScalarConst(0, builder, loc), + mlir::Value predicate = mlir::TF::EqualOp::create( + builder, loc, device_ordinal, CreateIntScalarConst(0, builder, loc), /*incompatible_shape_error=*/builder.getBoolAttr(true)); - auto send_if = builder.create( - loc, llvm::SmallVector{}, predicate, + auto send_if = mlir::TF::IfRegionOp::create( + builder, loc, llvm::SmallVector{}, predicate, /*is_stateless=*/builder.getBoolAttr(true), GetUniqueControlflowFnName("copy_to_mesh_send_if_then", builder), GetUniqueControlflowFnName("copy_to_mesh_send_if_else", builder)); @@ -653,16 +654,15 @@ StatusOr LowerDTensorSend(mlir::Operation* send_op, auto& else_branch = send_if.getElseBranch(); else_branch.push_back(new mlir::Block); builder.setInsertionPointToEnd(&else_branch.front()); - builder.create( - loc, - /*operands=*/llvm::ArrayRef{}); + mlir::TF::YieldOp::create(builder, loc, + /*operands=*/llvm::ArrayRef{}); // Create then branch region with DTensorSend op. auto& then_branch = send_if.getThenBranch(); then_branch.push_back(new mlir::Block); builder.setInsertionPointToEnd(&then_branch.front()); - auto yield = builder.create( - loc, /*operands=*/llvm::ArrayRef{}); + auto yield = mlir::TF::YieldOp::create( + builder, loc, /*operands=*/llvm::ArrayRef{}); dtensor_send->moveBefore(yield); // Lower DTensorSend op to actual TF op. @@ -684,8 +684,8 @@ StatusOr LowerDTensorSend(mlir::Operation* send_op, if (!recv_mesh.is_cpu_mesh() && send_type.getElementType().isInteger(32)) { builder.setInsertionPointAfter(send_input.getDefiningOp()); - auto cast_to_int64 = builder.create( - send_input.getLoc(), + auto cast_to_int64 = mlir::TF::CastOp::create( + builder, send_input.getLoc(), mlir::RankedTensorType::get(send_type.getShape(), builder.getIntegerType(64)), send_input); @@ -781,8 +781,8 @@ StatusOr LowerDTensorRecv(mlir::Operation* send_op, GetDeviceOrdinal(recv_mesh, loc, recv_cluster->getParentOfType(), &builder)); - mlir::Value predicate = builder.create( - loc, device_ordinal, CreateIntScalarConst(0, builder, loc), + mlir::Value predicate = mlir::TF::EqualOp::create( + builder, loc, device_ordinal, CreateIntScalarConst(0, builder, loc), /*incompatible_shape_error=*/builder.getBoolAttr(true)); mlir::TensorType recv_type = dtensor_recv.getType(); @@ -795,8 +795,8 @@ StatusOr LowerDTensorRecv(mlir::Operation* send_op, builder.getIntegerType(64)) : recv_type; - auto recv_if = builder.create( - loc, llvm::SmallVector{output_type}, predicate, + auto recv_if = mlir::TF::IfRegionOp::create( + builder, loc, llvm::SmallVector{output_type}, predicate, /*is_stateless=*/builder.getBoolAttr(true), GetUniqueControlflowFnName("copy_to_mesh_recv_if_then", builder), GetUniqueControlflowFnName("copy_to_mesh_recv_if_else", builder)); @@ -831,9 +831,9 @@ StatusOr LowerDTensorRecv(mlir::Operation* send_op, return absl::InvalidArgumentError("unsupported output type"); } - mlir::Value zeros = builder.create(loc, const_attr); - builder.create( - loc, /*operands=*/llvm::ArrayRef{zeros}); + mlir::Value zeros = mlir::TF::ConstOp::create(builder, loc, const_attr); + mlir::TF::YieldOp::create(builder, loc, + /*operands=*/llvm::ArrayRef{zeros}); // Create then branch region with DTensorRecv op. auto& then_branch = recv_if.getThenBranch(); @@ -843,8 +843,8 @@ StatusOr LowerDTensorRecv(mlir::Operation* send_op, TF_ASSIGN_OR_RETURN(mlir::Operation * xla_recv, lower_fn(send_mesh, dtensor_recv, output_type)); - builder.create( - loc, + mlir::TF::YieldOp::create( + builder, loc, /*operands=*/llvm::ArrayRef{xla_recv->getResult(0)}); // Broadcast the received output to all GPU/TPU devices. @@ -859,8 +859,8 @@ StatusOr LowerDTensorRecv(mlir::Operation* send_op, kReduceOpAdd)); if (need_i32_to_i64_upcast) { - lowered_recv = builder.create( - loc, recv_type, lowered_recv->getResult(0)); + lowered_recv = mlir::TF::CastOp::create(builder, loc, recv_type, + lowered_recv->getResult(0)); } // Replaces usages of DTensorRecv op with the broadcasted value. diff --git a/tensorflow/dtensor/mlir/expansions/argmax_spmd_expander.cc b/tensorflow/dtensor/mlir/expansions/argmax_spmd_expander.cc index e0bbc25792cd66..10b6296d5638b6 100644 --- a/tensorflow/dtensor/mlir/expansions/argmax_spmd_expander.cc +++ b/tensorflow/dtensor/mlir/expansions/argmax_spmd_expander.cc @@ -106,9 +106,9 @@ StatusOr ArgMaxSPMDExpander::ExpandOp(mlir::Operation* op) { } } - auto new_argmax = builder.create( - argmax_op.getLoc(), argmax_op.getResult().getType(), input, - argmax_op.getDimension()); + auto new_argmax = mlir::TF::ArgMaxOp::create(builder, argmax_op.getLoc(), + argmax_op.getResult().getType(), + input, argmax_op.getDimension()); op->getResult(0).replaceAllUsesWith(new_argmax.getOutput()); op->erase(); diff --git a/tensorflow/dtensor/mlir/expansions/dataparallel_spmd_expander.cc b/tensorflow/dtensor/mlir/expansions/dataparallel_spmd_expander.cc index f53f3b2a188945..6fb9cb790910ed 100644 --- a/tensorflow/dtensor/mlir/expansions/dataparallel_spmd_expander.cc +++ b/tensorflow/dtensor/mlir/expansions/dataparallel_spmd_expander.cc @@ -257,8 +257,8 @@ StatusOr DataparallelSPMDExpander::RelayoutOperandsAndOutputs( builder.setInsertionPointAfter(last_op_after_splitting); // Tie all outputs together with identity_n - auto identity_op = builder.create( - op->getLoc(), generated_types, generated_outputs); + auto identity_op = mlir::TF::IdentityNOp::create( + builder, op->getLoc(), generated_types, generated_outputs); newly_created_ops.insert(identity_op); for (int i = 0; i < output_layouts.size(); ++i) { op->getOpResult(i).replaceAllUsesExcept(identity_op.getResult(i), diff --git a/tensorflow/dtensor/mlir/expansions/fft_spmd_expander.cc b/tensorflow/dtensor/mlir/expansions/fft_spmd_expander.cc index 4ff627046e47a7..b5957d99dee649 100644 --- a/tensorflow/dtensor/mlir/expansions/fft_spmd_expander.cc +++ b/tensorflow/dtensor/mlir/expansions/fft_spmd_expander.cc @@ -281,7 +281,7 @@ StatusOr ExpandFFTN(mlir::Operation* fft_op, } else { TF_ASSIGN_OR_RETURN(auto fft_length_vec, ExtractFFTLengthFromOp(fft_op)); mlir::Value fft_length = IntConst( - builder, location, (int32)fft_length_vec[num_transform_axes - 1]); + builder, location, (int32_t)fft_length_vec[num_transform_axes - 1]); llvm::ArrayRef rfft_shape = mlir::dyn_cast(intermediate.getType()).getShape(); std::vector rfft_shape_vec = rfft_shape.vec(); @@ -380,7 +380,7 @@ StatusOr ExpandIFFTN(mlir::Operation* ifft_op, ExtractFFTLengthFromOp(ifft_op)); mlir::Value ifft_length = IntConst(builder, location, - (int32)complex_fft_length_vec[num_transform_axes - 1]); + (int32_t)complex_fft_length_vec[num_transform_axes - 1]); // IRFFT for the last axis. mlir::TF::IRFFTOp irfft_output_op = mlir::TF::IRFFTOp::create( builder, location, ifft_op->getResult(0).getType(), transposed_output, diff --git a/tensorflow/dtensor/mlir/expansions/io_op_spmd_expander.cc b/tensorflow/dtensor/mlir/expansions/io_op_spmd_expander.cc index 0242ebbb0544e3..7de31a8bb7e5f1 100644 --- a/tensorflow/dtensor/mlir/expansions/io_op_spmd_expander.cc +++ b/tensorflow/dtensor/mlir/expansions/io_op_spmd_expander.cc @@ -83,8 +83,8 @@ StatusOr Expand(mlir::Operation* op) { mlir::Block* then_fn_block = then_func.addEntryBlock(); mlir::OpBuilder then_fn_builder = mlir::OpBuilder::atBlockBegin(then_fn_block); - then_fn_builder.create(location); - then_fn_builder.create(location); + mlir::TF::NoOp::create(then_fn_builder, location); + mlir::func::ReturnOp::create(then_fn_builder, location); // Build else_func that is the branch of device_id == 0. // The else func is just the original op. @@ -100,9 +100,9 @@ StatusOr Expand(mlir::Operation* op) { mlir::OpBuilder else_fn_builder = mlir::OpBuilder::atBlockBegin(else_fn_block); - else_fn_builder.create(location, op->getResultTypes(), - else_fn_block->getArguments()); - else_fn_builder.create(location); + T::create(else_fn_builder, location, op->getResultTypes(), + else_fn_block->getArguments()); + mlir::func::ReturnOp::create(else_fn_builder, location); symbol_table.insert(then_func); symbol_table.insert(else_func); @@ -115,12 +115,12 @@ StatusOr Expand(mlir::Operation* op) { builder, location, mlir::cast(device_id.getType()).getElementType())); - mlir::TF::NotEqualOp not_equal = builder.create( - location, device_id, zero_scalar, + mlir::TF::NotEqualOp not_equal = mlir::TF::NotEqualOp::create( + builder, location, device_id, zero_scalar, /*incompatible_shape_error=*/builder.getBoolAttr(false)); - mlir::Operation* if_op = builder.create( - location, then_func.getFunctionType().getResults(), + mlir::Operation* if_op = mlir::TF::IfOp::create( + builder, location, then_func.getFunctionType().getResults(), /*cond=*/not_equal.getResult(), /*input=*/op->getOperands(), /*then_branch=*/then_func.getSymName(), diff --git a/tensorflow/dtensor/mlir/expansions/iterator_spmd_expander.cc b/tensorflow/dtensor/mlir/expansions/iterator_spmd_expander.cc index 8a442d727aa19f..0bd4da477d2205 100644 --- a/tensorflow/dtensor/mlir/expansions/iterator_spmd_expander.cc +++ b/tensorflow/dtensor/mlir/expansions/iterator_spmd_expander.cc @@ -62,8 +62,8 @@ StatusOr IteratorGetNextSPMDExpander::ExpandOp( local_shape, global_output_type.getElementType()); } - auto new_op = builder.create( - DT_LOC(op->getLoc()), local_types, original_op->getOperand(0)); + auto new_op = mlir::TF::IteratorGetNextOp::create( + builder, DT_LOC(op->getLoc()), local_types, original_op->getOperand(0)); for (int i = 0; i < original_op->getNumResults(); ++i) { original_op.getResult(i).replaceAllUsesWith(new_op.getResult(i)); diff --git a/tensorflow/dtensor/mlir/expansions/meta_spmd_expander.cc b/tensorflow/dtensor/mlir/expansions/meta_spmd_expander.cc index e18d3edf44d913..b2d6ca37777281 100644 --- a/tensorflow/dtensor/mlir/expansions/meta_spmd_expander.cc +++ b/tensorflow/dtensor/mlir/expansions/meta_spmd_expander.cc @@ -791,7 +791,7 @@ StatusOr ReshapeSPMDExpander::ExpandOp(mlir::Operation* op) { auto const_attr = mlir::DenseIntElementsAttr::get(new_shape, local_reshape_const); auto new_reshape_const_op = - builder.create(DT_LOC(op), const_attr); + mlir::TF::ConstOp::create(builder, DT_LOC(op), const_attr); mlir::TF::ReshapeOp new_reshape_op = mlir::TF::ReshapeOp::create( builder, op->getLoc(), new_input, new_reshape_const_op); diff --git a/tensorflow/dtensor/mlir/expansions/optional_spmd_expander.cc b/tensorflow/dtensor/mlir/expansions/optional_spmd_expander.cc index 3c146a06a48558..a45a2df40a32e4 100644 --- a/tensorflow/dtensor/mlir/expansions/optional_spmd_expander.cc +++ b/tensorflow/dtensor/mlir/expansions/optional_spmd_expander.cc @@ -54,8 +54,8 @@ StatusOr OptionalGetValueSPMDExpander::ExpandOp( local_types[i] = local_type; } - auto new_op = builder.create( - DT_LOC(op->getLoc()), local_types, original_op->getOperand(0)); + auto new_op = mlir::TF::OptionalGetValueOp::create( + builder, DT_LOC(op->getLoc()), local_types, original_op->getOperand(0)); for (int i = 0; i < original_op->getNumResults(); ++i) { original_op.getResult(i).replaceAllUsesWith(new_op.getResult(i)); diff --git a/tensorflow/dtensor/mlir/expansions/random_op_spmd_expander.cc b/tensorflow/dtensor/mlir/expansions/random_op_spmd_expander.cc index b6e1c316cef6f2..6175e133710f7a 100644 --- a/tensorflow/dtensor/mlir/expansions/random_op_spmd_expander.cc +++ b/tensorflow/dtensor/mlir/expansions/random_op_spmd_expander.cc @@ -151,26 +151,26 @@ StatusOr GetDeviceSeed(const Layout& layout, mlir::Operation* op) { mlir::Attribute const_attr = mlir::DenseIntElementsAttr::get(const_type, multipliers); mlir::Value multiplier = - builder.create(cluster.getLoc(), const_attr) + mlir::TF::ConstOp::create(builder, cluster.getLoc(), const_attr) .getOutput(); const mlir::RankedTensorType one_by_one = mlir::RankedTensorType::get({1, 1}, builder.getIntegerType(32)); - mlir::Value seed = builder.create( - cluster.getLoc(), one_by_one, mesh_coordinates, multiplier); + mlir::Value seed = mlir::TF::MatMulOp::create( + builder, cluster.getLoc(), one_by_one, mesh_coordinates, multiplier); // Largest prime in 16 bits. mlir::Value prime = CreateIntScalarConst( /*value=*/65521, builder, cluster.getLoc(), /*use_int64=*/false); mlir::Value seed_plus_prime = - builder - .create(cluster.getLoc(), one_by_one, seed, prime) + mlir::TF::AddV2Op::create(builder, cluster.getLoc(), one_by_one, seed, + prime) .getZ(); - mlir::TF::SqueezeOp squeeze = builder.create( - cluster.getLoc(), + mlir::TF::SqueezeOp squeeze = mlir::TF::SqueezeOp::create( + builder, cluster.getLoc(), mlir::RankedTensorType::get({}, builder.getIntegerType(32)), seed_plus_prime, builder.getI64ArrayAttr({0, 1})); @@ -207,11 +207,12 @@ StatusOr ComputeNewSeed(mlir::OpBuilder& builder, mlir::Type seed_type = mlir::cast(op_seed.getType()).getElementType(); - device_id_seed = builder.create( - location, mlir::RankedTensorType::get({}, seed_type), device_id_seed); + device_id_seed = mlir::TF::CastOp::create( + builder, location, mlir::RankedTensorType::get({}, seed_type), + device_id_seed); - mlir::Value seed_xor = - builder.create(location, op_seed, device_id_seed); + mlir::Value seed_xor = mlir::TF::BitwiseXorOp::create( + builder, location, op_seed, device_id_seed); return seed_xor; } @@ -240,8 +241,8 @@ StatusOr CreatedShardedLocalRandomOpV1(const Layout& layout, auto new_shape_value = Int64Const(builder, location, new_random_shape); // TODO(zhonglinhan) : check different input for StatelessRandomUniformInt - auto local_random = builder.create(location, new_random_type, - new_shape_value, seed_xor); + auto local_random = RandomOp::create(builder, location, new_random_type, + new_shape_value, seed_xor); op->getResult(0).replaceAllUsesWith(local_random.getOutput()); op->erase(); return local_random.getOperation(); @@ -272,9 +273,9 @@ StatusOr CreatedShardedLocalRandomOpV2(const Layout& layout, auto new_shape_value = Int64Const(builder, location, new_random_shape); - auto local_random = builder.create( - location, new_random_type, new_shape_value, seed_xor, - random_op.getCounter(), random_op.getAlg()); + auto local_random = + RandomOp::create(builder, location, new_random_type, new_shape_value, + seed_xor, random_op.getCounter(), random_op.getAlg()); op->getResult(0).replaceAllUsesWith(local_random.getOutput()); op->erase(); return local_random.getOperation(); @@ -305,10 +306,10 @@ StatusOr CreatedShardedLocalRandomOpV2Range( auto new_shape_value = Int64Const(builder, location, new_random_shape); - auto local_random = builder.create( - location, new_random_type, new_shape_value, seed_xor, - random_op.getCounter(), random_op.getAlg(), random_op.getMinval(), - random_op.getMaxval()); + auto local_random = + RandomOp::create(builder, location, new_random_type, new_shape_value, + seed_xor, random_op.getCounter(), random_op.getAlg(), + random_op.getMinval(), random_op.getMaxval()); op->getResult(0).replaceAllUsesWith(local_random.getOutput()); op->erase(); return local_random.getOperation(); diff --git a/tensorflow/dtensor/mlir/expansions/replicated_spmd_expander.cc b/tensorflow/dtensor/mlir/expansions/replicated_spmd_expander.cc index feb5b9eda74a01..f55d62efa81501 100644 --- a/tensorflow/dtensor/mlir/expansions/replicated_spmd_expander.cc +++ b/tensorflow/dtensor/mlir/expansions/replicated_spmd_expander.cc @@ -82,8 +82,8 @@ ReplicatedOpSPMDExpander::ReplicatedRelayoutOperandsAndOutputs( builder.setInsertionPointAfter(last_op_after_splitting); // Tie all outputs together with identity_n - auto identity_op = builder.create( - op->getLoc(), generated_types, generated_outputs); + auto identity_op = mlir::TF::IdentityNOp::create( + builder, op->getLoc(), generated_types, generated_outputs); newly_created_ops.insert(identity_op); for (int i = 0; i < output_layouts.size(); ++i) { op->getOpResult(i).replaceAllUsesExcept(identity_op.getResult(i), diff --git a/tensorflow/dtensor/mlir/expansions/segmentation_spmd_expander.cc b/tensorflow/dtensor/mlir/expansions/segmentation_spmd_expander.cc index c0aa768d9a5d03..c2fc958965ec33 100644 --- a/tensorflow/dtensor/mlir/expansions/segmentation_spmd_expander.cc +++ b/tensorflow/dtensor/mlir/expansions/segmentation_spmd_expander.cc @@ -122,9 +122,9 @@ StatusOr UnsortedSegmentSumSPMDExpander::ExpandOp( EmitRelayout(segment_ids, segment_ids_layout, new_segment_ids_layout)); mlir::OpBuilder builder(op); - mlir::Operation* new_sum_op = builder.create( - op->getLoc(), sum_op.getOutput().getType(), data, new_segment_ids, - sum_op.getNumSegments()); + mlir::Operation* new_sum_op = mlir::TF::UnsortedSegmentSumOp::create( + builder, op->getLoc(), sum_op.getOutput().getType(), data, + new_segment_ids, sum_op.getNumSegments()); InferSPMDExpandedLocalShape(new_sum_op); diff --git a/tensorflow/dtensor/mlir/expansions/slice_spmd_expander.cc b/tensorflow/dtensor/mlir/expansions/slice_spmd_expander.cc index fc082290109260..4cf10413879cbf 100644 --- a/tensorflow/dtensor/mlir/expansions/slice_spmd_expander.cc +++ b/tensorflow/dtensor/mlir/expansions/slice_spmd_expander.cc @@ -187,11 +187,10 @@ StatusOr SliceSPMDExpander::ExpandOp(mlir::Operation* op) { else new_size = Int64Const(builder, loc, sizes); - auto new_op = builder - .create( - loc, slice_op.getOutput().getType(), relayout_input, - slice_op.getBegin(), new_size) - .getOperation(); + auto new_op = + mlir::TF::SliceOp::create(builder, loc, slice_op.getOutput().getType(), + relayout_input, slice_op.getBegin(), new_size) + .getOperation(); new_op = InferSPMDExpandedLocalShape(new_op); TF_ASSIGN_OR_RETURN(auto relayout_output, diff --git a/tensorflow/dtensor/mlir/expansions/softmax_spmd_expander.cc b/tensorflow/dtensor/mlir/expansions/softmax_spmd_expander.cc index 196e3702b1c843..62fc9413e78307 100644 --- a/tensorflow/dtensor/mlir/expansions/softmax_spmd_expander.cc +++ b/tensorflow/dtensor/mlir/expansions/softmax_spmd_expander.cc @@ -70,12 +70,12 @@ StatusOr ComputeGlobalReduce( // First compute a local reduce if (reduce_op == kReduceOpAdd) { - local_reduce = builder.create( - input.getLoc(), input, reduction_indices, + local_reduce = mlir::TF::SumOp::create( + builder, input.getLoc(), input, reduction_indices, /*keep_dims=*/builder.getBoolAttr(true)); } else if (reduce_op == kReduceOpMax) { - local_reduce = builder.create( - input.getLoc(), input, reduction_indices, + local_reduce = mlir::TF::MaxOp::create( + builder, input.getLoc(), input, reduction_indices, /*keep_dims=*/builder.getBoolAttr(true)); } else { return errors::Unimplemented("reduction ", reduce_op, " not implemented"); @@ -107,8 +107,8 @@ StatusOr ComputeGlobalReduce( // dimension attribute type. Everything else is OK with int32_t dimensions. std::vector reduce_dim_array_64(reduced_dims.begin(), reduced_dims.end()); - global_reduce = builder.create( - input.getLoc(), new_type, global_reduce->getResult(0), + global_reduce = mlir::TF::SqueezeOp::create( + builder, input.getLoc(), new_type, global_reduce->getResult(0), builder.getI64ArrayAttr(reduce_dim_array_64)); } return global_reduce->getResult(0); @@ -143,9 +143,9 @@ absl::Status ComputeExpAndSum(mlir::OpBuilder& builder, // Subtract max from local copy of logits. shifted_logits = - builder.create(loc, logits, max_logits).getResult(); + mlir::TF::SubOp::create(builder, loc, logits, max_logits).getResult(); exp_of_shifted_logits = - builder.create(loc, shifted_logits).getResult(); + mlir::TF::ExpOp::create(builder, loc, shifted_logits).getResult(); // Sum the exponential. TF_ASSIGN_OR_RETURN( @@ -162,8 +162,9 @@ mlir::Value ComputeSoftmax(mlir::OpBuilder& builder, const mlir::Value& exp_of_shifted_logits, const mlir::Value& sum_of_exp) { // For Softmax, we compute exp(shifted_logits)/sum(exp(shifted_logits)) - auto softmax = builder.create( - exp_of_shifted_logits.getLoc(), exp_of_shifted_logits, sum_of_exp); + auto softmax = + mlir::TF::DivOp::create(builder, exp_of_shifted_logits.getLoc(), + exp_of_shifted_logits, sum_of_exp); return softmax.getResult(); } @@ -174,9 +175,9 @@ mlir::Value ComputeLogSoftmax(mlir::OpBuilder& builder, const mlir::Value& sum_of_exp) { // For LogSoftmax, we compute shifted_logs - log(sum(exp(shifted_logits))) auto log_of_sum = - builder.create(shifted_logits.getLoc(), sum_of_exp); - auto log_softmax = builder.create( - shifted_logits.getLoc(), shifted_logits, log_of_sum.getResult()); + mlir::TF::LogOp::create(builder, shifted_logits.getLoc(), sum_of_exp); + auto log_softmax = mlir::TF::SubOp::create( + builder, shifted_logits.getLoc(), shifted_logits, log_of_sum.getResult()); return log_softmax.getResult(); } @@ -223,12 +224,11 @@ StatusOr GetFPConstOfType(mlir::OpBuilder& builder, const mlir::Value& input, float value) { if (mlir::TensorType type = mlir::dyn_cast(input.getType())) { - return builder - .create( - input.getLoc(), - mlir::DenseFPElementsAttr::get( - mlir::RankedTensorType::get({}, type.getElementType()), - {value})) + return mlir::TF::ConstOp::create( + builder, input.getLoc(), + mlir::DenseFPElementsAttr::get( + mlir::RankedTensorType::get({}, type.getElementType()), + {value})) .getOutput(); } else { return errors::Unimplemented("non tensor type for labels is not supported"); @@ -290,23 +290,23 @@ StatusOr ComputeOneHot(mlir::OpBuilder& builder, // Slice out the [1,1] for mesh_dim_index. mlir::Value shard_id = - builder - .create( - loc, mlir::RankedTensorType::get({1, 1}, builder.getI32Type()), - mesh_coordinates, - IntConst(builder, input.getLoc(), {0, mesh_dim_index}), - IntConst(builder, input.getLoc(), {1, 1})) + mlir::TF::SliceOp::create( + builder, loc, + mlir::RankedTensorType::get({1, 1}, builder.getI32Type()), + mesh_coordinates, + IntConst(builder, input.getLoc(), {0, mesh_dim_index}), + IntConst(builder, input.getLoc(), {1, 1})) .getOutput(); - shard_id = builder - .create( - loc, mlir::RankedTensorType::get({}, builder.getI32Type()), - shard_id, builder.getI64ArrayAttr({0, 1})) - .getOutput(); + shard_id = + mlir::TF::SqueezeOp::create( + builder, loc, mlir::RankedTensorType::get({}, builder.getI32Type()), + shard_id, builder.getI64ArrayAttr({0, 1})) + .getOutput(); // `new_indices` = `input` - `shard_id` * (classes/num_shards) mlir::Value id_offset = - builder.create(loc, shard_id, depth).getZ(); + mlir::TF::MulOp::create(builder, loc, shard_id, depth).getZ(); // Note that the type of id_offset (int32) may not match the type of input. // So we insert a cast in this case. @@ -314,25 +314,23 @@ StatusOr ComputeOneHot(mlir::OpBuilder& builder, mlir::dyn_cast(input.getType()); if (!input_type) return errors::InvalidArgument("input is not a TensorType"); if (!input_type.getElementType().isInteger(32)) - id_offset = - builder - .create( - loc, - mlir::RankedTensorType::get({}, input_type.getElementType()), - id_offset) - .getY(); + id_offset = mlir::TF::CastOp::create(builder, loc, + mlir::RankedTensorType::get( + {}, input_type.getElementType()), + id_offset) + .getY(); mlir::Value indices = - builder.create(loc, input, id_offset).getZ(); + mlir::TF::SubOp::create(builder, loc, input, id_offset).getZ(); TF_ASSIGN_OR_RETURN(mlir::Value on_value, GetFPConstOfType(builder, features, 1.0)); TF_ASSIGN_OR_RETURN(mlir::Value off_value, GetFPConstOfType(builder, features, 0.0)); - return builder - .create(input.getLoc(), indices, depth, on_value, - off_value, builder.getI64IntegerAttr(1)) + return mlir::TF::OneHotOp::create(builder, input.getLoc(), indices, depth, + on_value, off_value, + builder.getI64IntegerAttr(1)) .getOutput(); } @@ -530,7 +528,7 @@ StatusOr SoftmaxLossOpSPMDExpander::MaybeRelayoutOutputs( llvm::SmallVector values = {new_loss, new_backprop}; mlir::TF::IdentityNOp identity_op = - builder.create(loss.getLoc(), types, values); + mlir::TF::IdentityNOp::create(builder, loss.getLoc(), types, values); newly_created_ops.insert(identity_op); @@ -627,17 +625,15 @@ StatusOr SoftmaxLossOpSPMDExpander::ExpandOp( GetFPConstOfType(builder, labels, 0.0)); const mlir::Value is_labels_zero = - builder - .create(op->getLoc(), labels, labels_zero, - builder.getBoolAttr(true)) + mlir::TF::EqualOp::create(builder, op->getLoc(), labels, labels_zero, + builder.getBoolAttr(true)) .getZ(); const mlir::Value safe_softmax = - builder - .create(op->getLoc(), is_labels_zero, - features_zero, log_softmax) + mlir::TF::SelectV2Op::create(builder, op->getLoc(), is_labels_zero, + features_zero, log_softmax) .getOutput(); const mlir::Value prod = - builder.create(op->getLoc(), labels, safe_softmax) + mlir::TF::MulOp::create(builder, op->getLoc(), labels, safe_softmax) .getZ(); // Compute the reduce sum @@ -648,10 +644,10 @@ StatusOr SoftmaxLossOpSPMDExpander::ExpandOp( builder.setInsertionPointAfterValue(positive_loss); mlir::Value loss = - builder.create(op->getLoc(), positive_loss).getY(); + mlir::TF::NegOp::create(builder, op->getLoc(), positive_loss).getY(); mlir::Value backprop = - builder.create(op->getLoc(), softmax, labels); + mlir::TF::SubOp::create(builder, op->getLoc(), softmax, labels); return MaybeRelayoutOutputs(op, loss, backprop, internal_layout, output_layouts[0], output_layouts[1]); diff --git a/tensorflow/dtensor/mlir/expansions/tensorlist_reserve_spmd_expander.cc b/tensorflow/dtensor/mlir/expansions/tensorlist_reserve_spmd_expander.cc index 7dc4ae56d0ed71..8e2fa02dcc9f44 100644 --- a/tensorflow/dtensor/mlir/expansions/tensorlist_reserve_spmd_expander.cc +++ b/tensorflow/dtensor/mlir/expansions/tensorlist_reserve_spmd_expander.cc @@ -58,10 +58,9 @@ StatusOr TensorListReserveSPMDExpander::ExpandOp( mlir::RankedTensorType::get(local_shape, element_type), builder.getContext())); mlir::Value new_shape_value = Int64Const(builder, DT_LOC(op), local_shape); - mlir::TF::TensorListReserveOp new_op = - builder.create( - DT_LOC(op), new_output_type, new_shape_value, - tensorlist_op.getNumElements()); + mlir::TF::TensorListReserveOp new_op = mlir::TF::TensorListReserveOp::create( + builder, DT_LOC(op), new_output_type, new_shape_value, + tensorlist_op.getNumElements()); op->getResult(0).replaceAllUsesWith(new_op.getResult()); op->erase(); diff --git a/tensorflow/dtensor/mlir/handle_cross_cluster_dependencies.cc b/tensorflow/dtensor/mlir/handle_cross_cluster_dependencies.cc index a0c137cb83dc4d..f1e3a60f8a2d21 100644 --- a/tensorflow/dtensor/mlir/handle_cross_cluster_dependencies.cc +++ b/tensorflow/dtensor/mlir/handle_cross_cluster_dependencies.cc @@ -206,8 +206,8 @@ mlir::LogicalResult HandleCopyToMeshWithinCluster( } } mlir::OpBuilder builder(op); - auto identity_op = builder.create( - op.getLoc(), input.getType(), input); + auto identity_op = mlir::TF::IdentityOp::create(builder, op.getLoc(), + input.getType(), input); op->getResult(0).replaceAllUsesWith(identity_op.getOutput()); op->erase(); return mlir::WalkResult::advance(); @@ -246,8 +246,9 @@ mlir::LogicalResult LowerToSendRecv(mlir::TF::CopyToMeshOp copy_to_mesh, // Create send op that sends data from input cluster to target cluster. const Mesh& target_mesh = mesh_or_status.value(); - builder.create( - copy_to_mesh.getLoc(), value_to_send, builder.getStringAttr(op_key), + mlir::TF::DTensorSend::create( + builder, copy_to_mesh.getLoc(), value_to_send, + builder.getStringAttr(op_key), mlir::dtensor::MeshAttr::get(context, target_mesh)); // Create recv op that recvs data from send op. @@ -258,8 +259,8 @@ mlir::LogicalResult LowerToSendRecv(mlir::TF::CopyToMeshOp copy_to_mesh, "CopyToMesh op must have static shape."); builder.setInsertionPoint(copy_to_mesh); - auto recv_op = builder.create( - copy_to_mesh.getLoc(), value_to_send.getType(), + auto recv_op = mlir::TF::DTensorRecv::create( + builder, copy_to_mesh.getLoc(), value_to_send.getType(), builder.getStringAttr(op_key), mlir::TF::ShapeAttr::get(context, tensor_type), mlir::dtensor::MeshAttr::get(context, target_mesh)); @@ -396,8 +397,9 @@ mlir::LogicalResult InsertCopyToMesh(mlir::tf_device::ClusterOp cluster) { if (input_mesh == mesh) continue; mlir::OpBuilder builder(op); - auto new_op = builder.create( - op->getLoc(), op->getResult(0).getType(), input, mesh.ToString()); + auto new_op = mlir::TF::CopyToMeshOp::create(builder, op->getLoc(), + op->getResult(0).getType(), + input, mesh.ToString()); op->replaceUsesOfWith(input, new_op.getResult()); } return mlir::success(); diff --git a/tensorflow/dtensor/mlir/layout_propagation_v2.cc b/tensorflow/dtensor/mlir/layout_propagation_v2.cc index c8dd29135e96ca..49ede9025b4310 100644 --- a/tensorflow/dtensor/mlir/layout_propagation_v2.cc +++ b/tensorflow/dtensor/mlir/layout_propagation_v2.cc @@ -733,9 +733,9 @@ mlir::LogicalResult InsertDTensorLayoutOps( mlir::Type value_type = GetSubtypeOrSelf(merged_layout.first); if (auto type = mlir::dyn_cast(value_type)) { - auto layout_op = builder.create( - merged_layout.first.getLoc(), merged_layout.first, layout_attr, - mlir::TF::ShapeAttr::get(builder.getContext(), type)); + auto layout_op = mlir::TF::DTensorLayout::create( + builder, merged_layout.first.getLoc(), merged_layout.first, + layout_attr, mlir::TF::ShapeAttr::get(builder.getContext(), type)); llvm::SmallPtrSet exception{layout_op}; merged_layout.first.replaceAllUsesExcept(layout_op.getOutput(), exception); @@ -1234,30 +1234,26 @@ mlir::LogicalResult InsertRelayoutForWhileLoops( mlir::TF::ShapeAttr global_shape = mlir::TF::ShapeAttr::get( builder.getContext(), mlir::cast(yield_op->getOperand(i).getType())); - mlir::TF::RelayoutOp first_relayout = - builder.create( - op.getLoc(), yield_op->getOperand(i).getType(), - yield_op->getOperand(i), input_layout.ToString()); - mlir::TF::DTensorLayout first_layout_op = - builder.create( - op.getLoc(), first_relayout.getOutput(), - mlir::dtensor::LayoutAttr::get(builder.getContext(), - input_layout), - global_shape); + mlir::TF::RelayoutOp first_relayout = mlir::TF::RelayoutOp::create( + builder, op.getLoc(), yield_op->getOperand(i).getType(), + yield_op->getOperand(i), input_layout.ToString()); + mlir::TF::DTensorLayout first_layout_op = mlir::TF::DTensorLayout::create( + builder, op.getLoc(), first_relayout.getOutput(), + mlir::dtensor::LayoutAttr::get(builder.getContext(), input_layout), + global_shape); yield_op->setOperand(i, first_layout_op.getOutput()); // Insert the second relayout op after the loop itself. builder.setInsertionPointAfter(op); mlir::TF::DTensorLayout second_layout_op = - builder.create( - op.getLoc(), op->getResult(i), + mlir::TF::DTensorLayout::create( + builder, op.getLoc(), op->getResult(i), mlir::dtensor::LayoutAttr::get(builder.getContext(), input_layout), global_shape); - mlir::TF::RelayoutOp second_relayout = - builder.create( - op.getLoc(), second_layout_op.getOutput().getType(), - second_layout_op.getOutput(), output_layout.ToString()); + mlir::TF::RelayoutOp second_relayout = mlir::TF::RelayoutOp::create( + builder, op.getLoc(), second_layout_op.getOutput().getType(), + second_layout_op.getOutput(), output_layout.ToString()); op->getResult(i).replaceAllUsesExcept( second_relayout.getOutput(), llvm::SmallPtrSet{ second_layout_op.getOperation()}); diff --git a/tensorflow/dtensor/mlir/lower_send_recv.cc b/tensorflow/dtensor/mlir/lower_send_recv.cc index 0cbcdd61abd7c4..142932afbee7da 100644 --- a/tensorflow/dtensor/mlir/lower_send_recv.cc +++ b/tensorflow/dtensor/mlir/lower_send_recv.cc @@ -90,8 +90,8 @@ void PropagateDeviceIdToClusters(mlir::ModuleOp module) { module.walk([&](mlir::tf_device::ClusterOp op) { mlir::OpBuilder builder(&op.GetBody().front()); - builder.create(main_func.getLoc(), - device_id->getType(), *device_id); + mlir::TF::IdentityOp::create(builder, main_func.getLoc(), + device_id->getType(), *device_id); }); } diff --git a/tensorflow/dtensor/mlir/merge_clusters.cc b/tensorflow/dtensor/mlir/merge_clusters.cc index 0e88ca55057a26..81a856aa1a0c9c 100644 --- a/tensorflow/dtensor/mlir/merge_clusters.cc +++ b/tensorflow/dtensor/mlir/merge_clusters.cc @@ -288,31 +288,31 @@ void CloneEmptyIfWithPredicate(mlir::TF::IfRegionOp if_region, const Mesh& mesh, absl::StrCat(kSendRecvKeyPrefix, *num_send_recvs); *num_send_recvs += 1; - builder.create( - if_region.getLoc(), if_region.getCond(), - builder.getStringAttr(send_recv_key), - mlir::dtensor::MeshAttr::get(context, mesh)); + mlir::TF::DTensorSend::create(builder, if_region.getLoc(), + if_region.getCond(), + builder.getStringAttr(send_recv_key), + mlir::dtensor::MeshAttr::get(context, mesh)); // Create new cluster op that contains cloned if operation. - auto new_cluster = builder.create( - if_region.getLoc(), llvm::SmallVector{}); + auto new_cluster = mlir::tf_device::ClusterOp::create( + builder, if_region.getLoc(), llvm::SmallVector{}); new_cluster.getBody().push_back(new mlir::Block); builder.setInsertionPointToEnd(&new_cluster.GetBody()); - auto return_op = builder.create( - if_region.getLoc(), llvm::SmallVector{}); + auto return_op = mlir::tf_device::ReturnOp::create( + builder, if_region.getLoc(), llvm::SmallVector{}); // Add DTensorRecv op inside new cluster that receives the cluster. builder.setInsertionPoint(return_op); - auto recv_op = builder.create( - if_region.getLoc(), predicate_tensor_type, + auto recv_op = mlir::TF::DTensorRecv::create( + builder, if_region.getLoc(), predicate_tensor_type, builder.getStringAttr(send_recv_key), mlir::TF::ShapeAttr::get(context, predicate_tensor_type), mlir::dtensor::MeshAttr::get(context, mesh)); // Clone tf.IfRegion op inside newly created cluster and make sure // that the predicate tensor is from DTensorRecv op created above. - auto host_side_if = builder.create( - if_region.getLoc(), llvm::SmallVector{}, + auto host_side_if = mlir::TF::IfRegionOp::create( + builder, if_region.getLoc(), llvm::SmallVector{}, recv_op.getOutput(), if_region.getIsStateless(), GetUniqueControlflowFnName("cloned_if_then", builder), GetUniqueControlflowFnName("cloned_if_else", builder)); @@ -322,15 +322,15 @@ void CloneEmptyIfWithPredicate(mlir::TF::IfRegionOp if_region, const Mesh& mesh, auto& then_branch = host_side_if.getThenBranch(); then_branch.push_back(new mlir::Block); builder.setInsertionPointToEnd(&then_branch.front()); - builder.create(if_region.getLoc(), - /*operands=*/llvm::ArrayRef{}); + mlir::TF::YieldOp::create(builder, if_region.getLoc(), + /*operands=*/llvm::ArrayRef{}); // Create empty else branch region. auto& else_branch = host_side_if.getElseBranch(); else_branch.push_back(new mlir::Block); builder.setInsertionPointToEnd(&else_branch.front()); - builder.create(if_region.getLoc(), - /*operands=*/llvm::ArrayRef{}); + mlir::TF::YieldOp::create(builder, if_region.getLoc(), + /*operands=*/llvm::ArrayRef{}); new_cluster->setAttr(kMeshAttr, builder.getStringAttr(mesh.ToString())); } @@ -550,8 +550,8 @@ mlir::LogicalResult MergeClusters(mlir::ModuleOp module) { // Create a single cluster op contains merged computations for `mesh`. builder.setInsertionPoint(&func_block.front()); - auto new_cluster = builder.create( - module.getLoc(), merged_return_types); + auto new_cluster = mlir::tf_device::ClusterOp::create( + builder, module.getLoc(), merged_return_types); new_cluster.getBody().push_back(new mlir::Block); new_cluster->setAttr(kMeshAttr, builder.getStringAttr(mesh.ToString())); @@ -578,8 +578,8 @@ mlir::LogicalResult MergeClusters(mlir::ModuleOp module) { } builder.setInsertionPointToEnd(&new_cluster.GetBody()); - builder.create(new_cluster.getLoc(), - merged_return_values); + mlir::tf_device::ReturnOp::create(builder, new_cluster.getLoc(), + merged_return_values); // Replace return value usages. for (auto it : diff --git a/tensorflow/dtensor/mlir/move_compilation_to_host.cc b/tensorflow/dtensor/mlir/move_compilation_to_host.cc index 053913f4844606..894b1bacbe72ee 100644 --- a/tensorflow/dtensor/mlir/move_compilation_to_host.cc +++ b/tensorflow/dtensor/mlir/move_compilation_to_host.cc @@ -117,8 +117,8 @@ mlir::LogicalResult CreateSendRecvOpsToTransferProgramKey( builder.setInsertionPointAfter(compile_op); for (int i = 0; i < num_tpu_devices; ++i) { const std::string& tensor_name = device_key_map[i]; - auto send = builder.create( - compile_op->getLoc(), compilation_key, tensor_name, + auto send = mlir::TF::_HostSendOp::create( + builder, compile_op->getLoc(), compilation_key, tensor_name, compile_op_launch.getDevice(), /*send_device_incarnation=*/0, local_devices[i]); send->setAttr("device", compile_op_launch.getDeviceAttr()); @@ -148,15 +148,15 @@ mlir::LogicalResult CreateSendRecvOpsToTransferProgramKey( mlir::Block* fn_block = recv_select_fn.addEntryBlock(); mlir::OpBuilder fn_builder = mlir::OpBuilder::atBlockEnd(fn_block); - auto recv = fn_builder.create( - compile_op->getLoc(), + auto recv = mlir::TF::_HostRecvOp::create( + fn_builder, compile_op->getLoc(), mlir::cast(compilation_key.getType()), device_key_map[i], compile_op_launch.getDevice(), /*send_device_incarnation=*/0, local_devices[i]); recv->setAttr("device", builder.getStringAttr(local_devices[i])); - fn_builder.create(recv_select_fn.getLoc(), - recv.getTensor()); + mlir::func::ReturnOp::create(fn_builder, recv_select_fn.getLoc(), + recv.getTensor()); compilation_key_functions.emplace_back(recv_select_fn); } @@ -172,8 +172,8 @@ mlir::LogicalResult CreateSendRecvOpsToTransferProgramKey( symbols.push_back(mlir::SymbolRefAttr::get(func)); // Create a TF::Case op that selects `values` based on `id`. - auto program_key = builder.create( - compile_op.getLoc(), + auto program_key = mlir::TF::CaseOp::create( + builder, compile_op.getLoc(), /*output=*/llvm::SmallVector{compilation_key.getType()}, /*branch_index=*/*device_id, /*input=*/llvm::ArrayRef{}, @@ -288,15 +288,16 @@ mlir::LogicalResult HandleCompilationOps( llvm::formatv("error while creating TPU compilation logic. {0}", device_ordinal_host.status().message())); - mlir::Value predicate_host = builder.create( - compile_op.getLoc(), *device_ordinal_host, + mlir::Value predicate_host = mlir::TF::EqualOp::create( + builder, compile_op.getLoc(), *device_ordinal_host, CreateIntScalarConst(0, builder, compile_op.getLoc()), /*incompatible_shape_error=*/builder.getBoolAttr(true)); // If op here contains send/recv and TPUCompile op that should not be pruned // away. Therefore, we explicitly set the op to be stateful. - auto if_host = builder.create( - compile_op.getLoc(), llvm::SmallVector{}, predicate_host, + auto if_host = mlir::TF::IfRegionOp::create( + builder, compile_op.getLoc(), llvm::SmallVector{}, + predicate_host, /*is_stateless=*/builder.getBoolAttr(false), GetUniqueControlflowFnName("compilation_host_then", builder), GetUniqueControlflowFnName("compilation_host_else", builder)); @@ -305,18 +306,17 @@ mlir::LogicalResult HandleCompilationOps( auto& host_else_branch = if_host.getElseBranch(); host_else_branch.push_back(new mlir::Block); builder.setInsertionPointToEnd(&host_else_branch.front()); - builder.create( - compile_op.getLoc(), - /*operands=*/llvm::ArrayRef{}); + mlir::TF::YieldOp::create(builder, compile_op.getLoc(), + /*operands=*/llvm::ArrayRef{}); // Create then branch region with logic to compile TPU program and send // program key to all TPU devices. auto& host_then_branch = if_host.getThenBranch(); host_then_branch.push_back(new mlir::Block); builder.setInsertionPointToEnd(&host_then_branch.front()); - auto yield = builder.create( - compile_op.getLoc(), - /*operands=*/llvm::ArrayRef{}); + auto yield = + mlir::TF::YieldOp::create(builder, compile_op.getLoc(), + /*operands=*/llvm::ArrayRef{}); compilation_move_before = yield; builder.setInsertionPointAfter(if_host); diff --git a/tensorflow/dtensor/mlir/op_to_device_cluster.cc b/tensorflow/dtensor/mlir/op_to_device_cluster.cc index df3aa89dd9bd35..89c351b0f71ccc 100644 --- a/tensorflow/dtensor/mlir/op_to_device_cluster.cc +++ b/tensorflow/dtensor/mlir/op_to_device_cluster.cc @@ -51,8 +51,8 @@ mlir::LogicalResult WrapDeviceCluster(mlir::OpBuilder *builder, mlir::Operation *op) { // Create new tf_device.cluster op wrapping a single operation. builder->setInsertionPoint(op); - auto cluster = builder->create( - op->getLoc(), op->getResultTypes()); + auto cluster = mlir::tf_device::ClusterOp::create(*builder, op->getLoc(), + op->getResultTypes()); if (auto layout_op = llvm::dyn_cast(op)) { cluster->setAttr(kMeshAttr, builder->getStringAttr( layout_op.getLayout().mesh().ToString())); @@ -89,7 +89,7 @@ mlir::LogicalResult WrapDeviceCluster(mlir::OpBuilder *builder, cluster.getBody().push_back(new mlir::Block); builder->setInsertionPointToEnd(&cluster.GetBody()); - builder->create(op->getLoc(), op->getResults()); + mlir::tf_device::ReturnOp::create(*builder, op->getLoc(), op->getResults()); // Move `op` inside newly created `ClusterOp`. op->moveBefore(cluster.GetBody().getTerminator()); diff --git a/tensorflow/dtensor/mlir/op_utils.cc b/tensorflow/dtensor/mlir/op_utils.cc index 08aa8f95612104..4b7a776ea2cd2c 100644 --- a/tensorflow/dtensor/mlir/op_utils.cc +++ b/tensorflow/dtensor/mlir/op_utils.cc @@ -116,8 +116,8 @@ mlir::LogicalResult ReplaceAuxiliaryDTensorLayoutOpsWithIdentity( // Replace DTensorLayout op with identity op. mlir::OpBuilder builder(input_layout_op); - auto new_identity = builder.create( - input_layout_op->getLoc(), input_layout_op.getType(), + auto new_identity = mlir::TF::IdentityOp::create( + builder, input_layout_op->getLoc(), input_layout_op.getType(), input_layout_op.getInput()); input_layout_op.getOutput().replaceAllUsesWith(new_identity.getOutput()); input_layout_op.erase(); diff --git a/tensorflow/dtensor/mlir/propagate_default_layout.cc b/tensorflow/dtensor/mlir/propagate_default_layout.cc index 6b0b35283fdca5..7be77a3f624ff4 100644 --- a/tensorflow/dtensor/mlir/propagate_default_layout.cc +++ b/tensorflow/dtensor/mlir/propagate_default_layout.cc @@ -53,8 +53,8 @@ void CreateDTensorLayoutOp(const Layout& layout, mlir::Value input, mlir::MLIRContext* context) { if (layout.IsEmpty()) return; - auto layout_op = builder->create( - loc, input, mlir::dtensor::LayoutAttr::get(context, layout), + auto layout_op = mlir::TF::DTensorLayout::create( + *builder, loc, input, mlir::dtensor::LayoutAttr::get(context, layout), mlir::TF::ShapeAttr::get(context, type)); if (arg_index != nullptr) { layout_op->setAttr(kFromArgIndex, arg_index); diff --git a/tensorflow/dtensor/mlir/propagate_device_id_to_function_args.cc b/tensorflow/dtensor/mlir/propagate_device_id_to_function_args.cc index 0417e392b4b28d..7381e3628e25d9 100644 --- a/tensorflow/dtensor/mlir/propagate_device_id_to_function_args.cc +++ b/tensorflow/dtensor/mlir/propagate_device_id_to_function_args.cc @@ -125,8 +125,8 @@ mlir::LogicalResult PrependDeviceIdToCallsites(mlir::OpBuilder* builder, mlir::Operation* new_call = nullptr; if (auto stateful_partitioned_call = llvm::dyn_cast(op)) { - new_call = builder->create( - op->getLoc(), op->getResultTypes(), new_operands, + new_call = mlir::TF::StatefulPartitionedCallOp::create( + *builder, op->getLoc(), op->getResultTypes(), new_operands, /*args_attrs=*/nullptr, /*res_attrs=*/nullptr, stateful_partitioned_call.getF(), stateful_partitioned_call.getConfig(), @@ -134,8 +134,8 @@ mlir::LogicalResult PrependDeviceIdToCallsites(mlir::OpBuilder* builder, stateful_partitioned_call.getExecutorType()); } else { auto partitioned_call = llvm::cast(op); - new_call = builder->create( - op->getLoc(), op->getResultTypes(), new_operands, + new_call = mlir::TF::PartitionedCallOp::create( + *builder, op->getLoc(), op->getResultTypes(), new_operands, /*args_attrs=*/nullptr, /*res_attrs=*/nullptr, partitioned_call.getF(), partitioned_call.getConfig(), partitioned_call.getConfigProto(), diff --git a/tensorflow/dtensor/mlir/restore_shape_inference.cc b/tensorflow/dtensor/mlir/restore_shape_inference.cc index ab327153634786..3be8637314be97 100644 --- a/tensorflow/dtensor/mlir/restore_shape_inference.cc +++ b/tensorflow/dtensor/mlir/restore_shape_inference.cc @@ -85,8 +85,8 @@ mlir::LogicalResult BackwardShapeInferenceToRestoreOp(mlir::ModuleOp module, // O(N). value.setType(type); } else if (auto cast_op = llvm::dyn_cast_or_null(op)) { - auto new_cast_op = builder->create(cast_op.getLoc(), type, - cast_op.getOperand()); + auto new_cast_op = mlir::TF::CastOp::create(*builder, cast_op.getLoc(), + type, cast_op.getOperand()); cast_op.replaceAllUsesWith(new_cast_op.getResult()); cast_op.erase(); @@ -103,8 +103,8 @@ mlir::LogicalResult BackwardShapeInferenceToRestoreOp(mlir::ModuleOp module, module, builder, new_cast_op.getOperand(), new_type); } else if (auto identity_op = llvm::dyn_cast_or_null(op)) { - auto new_identity_op = builder->create( - identity_op.getLoc(), type, identity_op.getInput()); + auto new_identity_op = mlir::TF::IdentityOp::create( + *builder, identity_op.getLoc(), type, identity_op.getInput()); identity_op.getOutput().replaceAllUsesWith(new_identity_op.getOutput()); identity_op.erase(); @@ -128,8 +128,9 @@ mlir::LogicalResult BackwardShapeInferenceToRestoreOp(mlir::ModuleOp module, // RestoreV2Op we want to fix is on the mesh of the corresponding // DTensorSend. Set shape of this DTensorRecv first and go to the // corresponding DTensorSend. - auto new_recv_op = builder->create( - recv_op.getLoc(), type, builder->getStringAttr(recv_op.getKey()), + auto new_recv_op = mlir::TF::DTensorRecv::create( + *builder, recv_op.getLoc(), type, + builder->getStringAttr(recv_op.getKey()), mlir::TF::ShapeAttr::get(builder->getContext(), mlir::dyn_cast(type)), mlir::dtensor::MeshAttr::get(builder->getContext(), recv_op.getMesh())); diff --git a/tensorflow/dtensor/mlir/sparse_expansions/dynamic_enqueue_sparse_expander.cc b/tensorflow/dtensor/mlir/sparse_expansions/dynamic_enqueue_sparse_expander.cc index f08908eff9395e..e695320769ecc4 100644 --- a/tensorflow/dtensor/mlir/sparse_expansions/dynamic_enqueue_sparse_expander.cc +++ b/tensorflow/dtensor/mlir/sparse_expansions/dynamic_enqueue_sparse_expander.cc @@ -53,14 +53,14 @@ StatusOr ExpandIndices(mlir::OpBuilder& builder, .getElementType()); // Little trick to make a rank-2 tensor of [[0,0], [0,1]] using rank 1 // constants. - mlir::Value indices_padding = builder.create( - loc, + mlir::Value indices_padding = mlir::TF::ReshapeOp::create( + builder, loc, mlir::TF::collection_ops_util::GetR1Const({0, 0, 0, 1}, builder, loc), mlir::TF::collection_ops_util::GetR1Const({2, 2}, builder, loc)); mlir::Value indices_padded = - builder.create(loc, indices_padded_type, - /*input=*/indices, - /*paddings=*/indices_padding); + mlir::TF::PadOp::create(builder, loc, indices_padded_type, + /*input=*/indices, + /*paddings=*/indices_padding); return indices_padded; } @@ -98,16 +98,15 @@ StatusOr DynamicEnqueueSparseExpander::ExpandOp( // This op does not have a return value so we do not need to replace any // consumers. mlir::Operation* sparse_enqueue_op = - builder - .create( - location, - /*sample_indices_or_row_splits_list=*/indices, - /*embedding_indices=*/values, - /*aggregation_weights=*/dense_enqueue_op.getAggregationWeights(), - /*mode_override=*/ - dense_enqueue_op.getModeOverride(), - /*device_ordinal=*/dense_enqueue_op.getDeviceOrdinal(), - /*combiners=*/dense_enqueue_op.getCombiners()); + mlir::TF::DynamicEnqueueTPUEmbeddingArbitraryTensorBatchOp::create( + builder, location, + /*sample_indices_or_row_splits_list=*/indices, + /*embedding_indices=*/values, + /*aggregation_weights=*/dense_enqueue_op.getAggregationWeights(), + /*mode_override=*/ + dense_enqueue_op.getModeOverride(), + /*device_ordinal=*/dense_enqueue_op.getDeviceOrdinal(), + /*combiners=*/dense_enqueue_op.getCombiners()); dense_enqueue_op.erase(); return sparse_enqueue_op; } diff --git a/tensorflow/dtensor/mlir/sparse_expansions/matmul_sparse_expander.cc b/tensorflow/dtensor/mlir/sparse_expansions/matmul_sparse_expander.cc index 7ed10e42dfe186..5056b89ca9ae32 100644 --- a/tensorflow/dtensor/mlir/sparse_expansions/matmul_sparse_expander.cc +++ b/tensorflow/dtensor/mlir/sparse_expansions/matmul_sparse_expander.cc @@ -38,8 +38,8 @@ StatusOr MatMulSparseExpander::ExpandOp(mlir::Operation* op) { // Since operand 0 is a SparseValue, we don't need to check that // the indices, values, and dense_shapes exist. mlir::TF::SparseTensorDenseMatMulOp new_op = - builder.create( - op->getLoc(), op->getResultTypes(), + mlir::TF::SparseTensorDenseMatMulOp::create( + builder, op->getLoc(), op->getResultTypes(), mlir::ValueRange{ GetIndicesFromSparseTensor(op->getOperand(0)).value(), GetValuesFromSparseTensor(op->getOperand(0)).value(), diff --git a/tensorflow/dtensor/mlir/spmd_expander_common.cc b/tensorflow/dtensor/mlir/spmd_expander_common.cc index 91eab6f8438dc2..9fd3af1af33c07 100644 --- a/tensorflow/dtensor/mlir/spmd_expander_common.cc +++ b/tensorflow/dtensor/mlir/spmd_expander_common.cc @@ -125,8 +125,8 @@ absl::Status CreateSplitOp(const int num_split, const int split_dimension, mlir::RankedTensorType::get({}, builder->getIntegerType(32)); auto split_dimension_attr = mlir::DenseElementsAttr::get(split_dim_type, split_dimension); - auto split_dimension_op = builder->create( - location, split_dim_type, split_dimension_attr); + auto split_dimension_op = mlir::TF::ConstOp::create( + *builder, location, split_dim_type, split_dimension_attr); // Correctly set output shapes of split op output if input shape is statically // known. @@ -157,8 +157,9 @@ absl::Status CreateSplitOp(const int num_split, const int split_dimension, // Creates a split op that splits |src_input| along |split_dimension|. llvm::SmallVector output_types(num_split, output_type); - *split_op = builder->create( - location, output_types, split_dimension_op.getOutput(), src_input); + *split_op = + mlir::TF::SplitOp::create(*builder, location, output_types, + split_dimension_op.getOutput(), src_input); return absl::OkStatus(); } diff --git a/tensorflow/dtensor/mlir/tpu_integration.cc b/tensorflow/dtensor/mlir/tpu_integration.cc index 67c6e0d9fbed23..e7fffe44a1f520 100644 --- a/tensorflow/dtensor/mlir/tpu_integration.cc +++ b/tensorflow/dtensor/mlir/tpu_integration.cc @@ -110,8 +110,8 @@ mlir::LogicalResult CreateTPUCluster( auto& function_block = function->getCallableRegion()->front(); builder->setInsertionPointToStart(&function_block); - auto cluster = builder->create( - tpu_call.getLoc(), function->getResultTypes()); + auto cluster = mlir::tf_device::ClusterOp::create(*builder, tpu_call.getLoc(), + function->getResultTypes()); cluster.getBody().push_back(new mlir::Block); auto& function_body = function_block.getOperations(); @@ -121,8 +121,8 @@ mlir::LogicalResult CreateTPUCluster( builder->setInsertionPointToEnd(&cluster.GetBody()); mlir::Operation* function_block_terminator = function_block.getTerminator(); - builder->create( - tpu_call.getLoc(), function_block_terminator->getOperands()); + mlir::tf_device::ReturnOp::create(*builder, tpu_call.getLoc(), + function_block_terminator->getOperands()); function_block_terminator->setOperands(cluster.getResults()); diff --git a/tensorflow/dtensor/mlir/utils/collective_lowering.cc b/tensorflow/dtensor/mlir/utils/collective_lowering.cc index df52a5ddde934b..7858b3430d33ef 100644 --- a/tensorflow/dtensor/mlir/utils/collective_lowering.cc +++ b/tensorflow/dtensor/mlir/utils/collective_lowering.cc @@ -122,8 +122,8 @@ mlir::LogicalResult EmitAllReduceForXla( constexpr char kCrossReplica[] = "CrossReplica"; // For TPUs, lower to XlaAllReduce straightforwardly. - *final_op = builder.create( - all_reduce.getLoc(), all_reduce.getResult().getType(), + *final_op = mlir::TF::XlaAllReduceOp::create( + builder, all_reduce.getLoc(), all_reduce.getResult().getType(), all_reduce.getInput(), all_reduce.getGroupAssignment(), all_reduce.getReduceOpAttr(), builder.getStringAttr(kCrossReplica)); return mlir::success(); @@ -198,7 +198,7 @@ mlir::Value GetRelativeDeviceId(mlir::Operation* op, ops_util::ReshapeScalarToSizeType(builder, DeviceId(op).value(), loc); mlir::Value start_device_id = ops_util::GetR1Const( {output_layout.mesh().min_global_device_id()}, builder, loc); - return builder.create(loc, device_id, start_device_id); + return mlir::TF::SubOp::create(builder, loc, device_id, start_device_id); } void CreateGroupAndInstanceKey( @@ -219,13 +219,14 @@ void CreateGroupAndInstanceKey( // Create a scalar group key by slicing device_id_to_group_key with // device_id. auto group_key_loc = DT_LOC2(loc, "group_key"); - auto group_key_slice = builder.create( - group_key_loc, EffectivelyScalarR1Type(builder.getIntegerType(32)), + auto group_key_slice = mlir::TF::SliceOp::create( + builder, group_key_loc, + EffectivelyScalarR1Type(builder.getIntegerType(32)), /*input=*/IntConst(builder, loc, device_id_to_group_key), /*begin=*/device_id, /*size=*/IntConst(builder, loc, {1})); - auto group_key_reshape = builder.create( - group_key_loc, /*tensor=*/group_key_slice.getResult(), + auto group_key_reshape = mlir::TF::ReshapeOp::create( + builder, group_key_loc, /*tensor=*/group_key_slice.getResult(), /*shape=*/ops_util::GetR1Const({}, builder, loc)); *group_key_scalar = group_key_reshape.getResult(); @@ -257,8 +258,8 @@ mlir::Operation* EmitCollectiveReduce( const bool is_mean_op = reduce_op_str == kReduceOpMean; mlir::Value group_size_scalar = ops_util::CreateScalarConst( host_group_size, builder, DT_LOC2(loc, "group_size")); - auto collective_reduce = builder.create( - loc, /*output_type=*/input.getType(), input, group_size_scalar, + auto collective_reduce = mlir::TF::CollectiveReduceV2Op::create( + builder, loc, /*output_type=*/input.getType(), input, group_size_scalar, group_key_scalar, instance_key_scalar, /*ordering_token=*/mlir::ValueRange({}), /*merge_op=*/builder.getStringAttr(is_mean_op ? "Add" : reduce_op_str), @@ -312,19 +313,21 @@ mlir::Operation* EmitCollectiveReduceScatter( const bool is_mean_op = reduce_op_str == kReduceOpMean; mlir::Value group_size_scalar = ops_util::CreateScalarConst( host_group_size, builder, DT_LOC2(loc, "group_size")); - auto collective_reduce_scatter = builder.create< - mlir::TF::CollectiveReduceScatterV2Op>( - loc, output_type, input, group_size_scalar, group_key_scalar, - instance_key_scalar, - /*ordering_token=*/mlir::ValueRange({}), - /*merge_op=*/builder.getStringAttr(is_mean_op ? "Add" : reduce_op_str), - /*final_op=*/builder.getStringAttr(is_mean_op ? "Div" : "Id"), - /*communication_hint=*/builder.getStringAttr("nccl"), // TODO(tmorris): - // this shouldn't - // be needed - /*timeout_seconds=*/builder.getF32FloatAttr(0.), - /*is_stateless=*/builder.getBoolAttr(false), - /*max_subdivs_per_device=*/builder.getI64IntegerAttr(16)); + auto collective_reduce_scatter = + mlir::TF::CollectiveReduceScatterV2Op::create( + builder, loc, output_type, input, group_size_scalar, group_key_scalar, + instance_key_scalar, + /*ordering_token=*/mlir::ValueRange({}), + /*merge_op=*/ + builder.getStringAttr(is_mean_op ? "Add" : reduce_op_str), + /*final_op=*/builder.getStringAttr(is_mean_op ? "Div" : "Id"), + /*communication_hint=*/ + builder.getStringAttr("nccl"), // TODO(tmorris): + // this shouldn't + // be needed + /*timeout_seconds=*/builder.getF32FloatAttr(0.), + /*is_stateless=*/builder.getBoolAttr(false), + /*max_subdivs_per_device=*/builder.getI64IntegerAttr(16)); SetSingleLayoutOnOp(collective_reduce_scatter, Layout::Empty()); if (need_transpose) { return EmitTransposeOp(builder, loc, @@ -394,8 +397,8 @@ mlir::Operation* EmitCollectiveAllToAll( new_shape.push_back(input_shape[i]); } } - auto reshape_op = builder.create( - loc, data, ops_util::GetR1Const(new_shape, builder, loc)); + auto reshape_op = mlir::TF::ReshapeOp::create( + builder, loc, data, ops_util::GetR1Const(new_shape, builder, loc)); std::vector perm_for_permute_transpose; perm_for_permute_transpose.reserve(input_shape.size() + 1); @@ -420,8 +423,8 @@ mlir::Operation* EmitCollectiveAllToAll( 1LL, std::multiplies()); std::vector flatten_shape = {host_group_size, num_elements / host_group_size}; - auto flatten_reshape_op = builder.create( - loc, input, ops_util::GetR1Const(flatten_shape, builder, loc)); + auto flatten_reshape_op = mlir::TF::ReshapeOp::create( + builder, loc, input, ops_util::GetR1Const(flatten_shape, builder, loc)); mlir::TensorType output_type = mlir::RankedTensorType::get(flatten_shape, input_type.getElementType()); @@ -432,9 +435,10 @@ mlir::Operation* EmitCollectiveAllToAll( &group_key_scalar, &instance_key_scalar); mlir::Value group_size_scalar = ops_util::CreateScalarConst(host_group_size, builder, loc); - auto collective_alltoall = builder.create( - loc, /*output_type=*/output_type, flatten_reshape_op->getResult(0), - group_size_scalar, group_key_scalar, instance_key_scalar, + auto collective_alltoall = mlir::TF::CollectiveAllToAllV2Op::create( + builder, loc, /*output_type=*/output_type, + flatten_reshape_op->getResult(0), group_size_scalar, group_key_scalar, + instance_key_scalar, /*ordering_token=*/mlir::ValueRange({}), /*communication_hint=*/builder.getStringAttr(""), /*timeout_seconds=*/builder.getF32FloatAttr(0.), @@ -444,8 +448,9 @@ mlir::Operation* EmitCollectiveAllToAll( if (requires_transpose) { // Unflatten after all-to-all. - auto reshape_op = builder.create( - loc, prev_op, ops_util::GetR1Const(transposed_shape, builder, loc)); + auto reshape_op = mlir::TF::ReshapeOp::create( + builder, loc, prev_op, + ops_util::GetR1Const(transposed_shape, builder, loc)); // Undo earlier transpose which moved split or concat dim to rank 0. std::vector perm_for_transpose; perm_for_transpose.reserve(input_shape.size()); @@ -473,8 +478,8 @@ mlir::Operation* EmitCollectiveAllToAll( std::vector output_shape(input_shape.begin(), input_shape.end()); output_shape[concat_dimension] *= host_group_size; output_shape[split_dimension] /= host_group_size; - auto post_reshape_op = builder.create( - loc, prev_op, ops_util::GetR1Const(output_shape, builder, loc)); + auto post_reshape_op = mlir::TF::ReshapeOp::create( + builder, loc, prev_op, ops_util::GetR1Const(output_shape, builder, loc)); return post_reshape_op; } @@ -503,8 +508,8 @@ mlir::Operation* EmitCollectiveGather( mlir::Value group_size_scalar = ops_util::CreateScalarConst(host_group_size, builder, loc); - auto collective_gather = builder.create( - loc, /*output_type=*/input.getType(), input, group_size_scalar, + auto collective_gather = mlir::TF::CollectiveGatherV2Op::create( + builder, loc, /*output_type=*/input.getType(), input, group_size_scalar, group_key_scalar, instance_key_scalar, /*ordering_token=*/mlir::ValueRange({}), /*communication_hint=*/builder.getStringAttr(""), @@ -606,12 +611,10 @@ mlir::LogicalResult LowerReduceScatterOp( mlir::OpBuilder builder(reduce_scatter); if (reduce_scatter.getDeviceType().ends_with("TPU")) { // For TPUs, lower to XlaReduceScatter straightforwardly. - mlir::Operation* xla_reduce_scatter = - builder.create( - loc, reduce_scatter.getResult().getType(), - reduce_scatter.getInput(), reduce_scatter.getGroupAssignment(), - reduce_scatter.getScatterDimension(), - reduce_scatter.getReduceOpAttr()); + mlir::Operation* xla_reduce_scatter = mlir::TF::XlaReduceScatterOp::create( + builder, loc, reduce_scatter.getResult().getType(), + reduce_scatter.getInput(), reduce_scatter.getGroupAssignment(), + reduce_scatter.getScatterDimension(), reduce_scatter.getReduceOpAttr()); SetSingleLayoutOnOp(xla_reduce_scatter, *output_layout); reduce_scatter.replaceAllUsesWith(xla_reduce_scatter); } else if (reduce_scatter.getDeviceType().ends_with("GPU") && @@ -653,16 +656,17 @@ mlir::LogicalResult LowerReduceScatterOp( return reduce_scatter.emitOpError(input_layout.status().message()); } - auto dtensor_allreduce = builder.create( - reduce_scatter.getLoc(), reduce_scatter.getOperand(0).getType(), - reduce_scatter.getOperand(0), reduce_scatter.getGroupAssignment(), - reduce_scatter.getReduceOp(), reduce_scatter.getDeviceType()); + auto dtensor_allreduce = mlir::TF::DTensorAllReduceOp::create( + builder, reduce_scatter.getLoc(), + reduce_scatter.getOperand(0).getType(), reduce_scatter.getOperand(0), + reduce_scatter.getGroupAssignment(), reduce_scatter.getReduceOp(), + reduce_scatter.getDeviceType()); SetSingleLayoutOnOp(dtensor_allreduce, *input_layout); mlir::Operation* dtensor_all_scatter = - builder.create( - reduce_scatter.getLoc(), reduce_scatter.getResult().getType(), - dtensor_allreduce.getResult(), + mlir::TF::DTensorAllScatterOp::create( + builder, reduce_scatter.getLoc(), + reduce_scatter.getResult().getType(), dtensor_allreduce.getResult(), mlir::dtensor::LayoutAttr::get(builder.getContext(), *input_layout), mlir::dtensor::LayoutAttr::get(builder.getContext(), *output_layout)); @@ -676,8 +680,9 @@ mlir::LogicalResult LowerReduceScatterOp( mlir::Value CreateZeroScalar(mlir::OpBuilder& builder, mlir::Location loc, mlir::RankedTensorType type) { const mlir::Value zero_scalar = ops_util::CreateScalarConst(0, builder, loc); - return builder.create( - loc, mlir::RankedTensorType::get({}, type.getElementType()), zero_scalar); + return mlir::TF::CastOp::create( + builder, loc, mlir::RankedTensorType::get({}, type.getElementType()), + zero_scalar); } // device_id is the relative device_id in a mesh (device id - mesh's 1st device @@ -691,15 +696,15 @@ mlir::Value SelectElementsBasedOnId( ops_util::GetR1Const(candidates_flat, builder, loc); const mlir::Value candidates_shape = ops_util::GetR1Const({num_devices, output_shape_size}, builder, loc); - const mlir::Value candidates = builder.create( - loc, candidates_flat_const, candidates_shape); + const mlir::Value candidates = mlir::TF::ReshapeOp::create( + builder, loc, candidates_flat_const, candidates_shape); // Add a zero after the only value in the 1x1 device_id tensor. - const mlir::Value device_id_paddings = builder.create( - loc, ops_util::GetR1Const({0, 1}, builder, loc), + const mlir::Value device_id_paddings = mlir::TF::ReshapeOp::create( + builder, loc, ops_util::GetR1Const({0, 1}, builder, loc), ops_util::GetR1Const({1, 2}, builder, loc)); - const mlir::Value device_id_padded = builder.create( - loc, candidates_shape.getType(), /*input=*/device_id, + const mlir::Value device_id_padded = mlir::TF::PadOp::create( + builder, loc, candidates_shape.getType(), /*input=*/device_id, /*paddings=*/device_id_paddings); // Slice a vertical vector out of the 2D candidates matrix. @@ -707,13 +712,15 @@ mlir::Value SelectElementsBasedOnId( {1, output_shape_size}, builder.getIntegerType(32)); const mlir::Value chosen_shape_const = ops_util::GetR1Const(chosen_shape_type.getShape(), builder, loc); - const mlir::Value chosen = builder.create( - loc, chosen_shape_type, /*input=*/candidates, /*begin=*/device_id_padded, + const mlir::Value chosen = mlir::TF::SliceOp::create( + builder, loc, chosen_shape_type, /*input=*/candidates, + /*begin=*/device_id_padded, /*size=*/chosen_shape_const); // Remove the leading dimension of size 1 before returning the result. - return builder.create( - loc, chosen, ops_util::GetR1Const({output_shape_size}, builder, loc)); + return mlir::TF::ReshapeOp::create( + builder, loc, chosen, + ops_util::GetR1Const({output_shape_size}, builder, loc)); } StatusOr GetGroupAssignment( @@ -841,8 +848,8 @@ mlir::LogicalResult LowerAllGatherOpToCollective( new_shape.push_back(input_shape_after_tr[j]); } - auto reshape_op = builder.create( - loc, /*tensor=*/collective_op->getResult(0), + auto reshape_op = mlir::TF::ReshapeOp::create( + builder, loc, /*tensor=*/collective_op->getResult(0), /*shape=*/ops_util::GetR1Const(new_shape, builder, loc)); prev_op_result = reshape_op->getResult(0); @@ -877,8 +884,8 @@ mlir::LogicalResult LowerAllGatherOpToCollective( prev_op_result = post_transpose_op->getResult(0); } - auto output_reshape_op = builder.create( - loc, /*tensor=*/prev_op_result, + auto output_reshape_op = mlir::TF::ReshapeOp::create( + builder, loc, /*tensor=*/prev_op_result, /*shape=*/ops_util::GetR1Const(output_shape, builder, loc)); SetSingleLayoutOnOp(output_reshape_op, tgt_layout); all_gather.replaceAllUsesWith(output_reshape_op->getResult(0)); @@ -900,8 +907,8 @@ mlir::LogicalResult LowerAllGatherOp(mlir::TF::DTensorAllGatherOp all_gather) { builder.setInsertionPointAfter(all_gather); if (concat_dims.empty()) { - mlir::TF::IdentityOp identity = builder.create( - all_gather.getLoc(), all_gather.getInput().getType(), + mlir::TF::IdentityOp identity = mlir::TF::IdentityOp::create( + builder, all_gather.getLoc(), all_gather.getInput().getType(), all_gather.getInput()); SetSingleLayoutOnOp(identity, tgt_layout); @@ -942,7 +949,7 @@ mlir::LogicalResult LowerAllGatherOp(mlir::TF::DTensorAllGatherOp all_gather) { const mlir::Value output_shape_const = Int64Const(builder, loc, output_shape); const mlir::Value zero_scalar = CreateZeroScalar(builder, loc, input_type); const mlir::Value zeros = - builder.create(loc, output_shape_const, zero_scalar); + mlir::TF::FillOp::create(builder, loc, output_shape_const, zero_scalar); // For every possible device ID, generate its strided slice ranges. Store all // ranges---num_devices * output_shape_size * (begin, end, stride)---as three @@ -1001,12 +1008,12 @@ mlir::LogicalResult LowerAllGatherOp(mlir::TF::DTensorAllGatherOp all_gather) { if (!tgt_layout.mesh().is_tpu_mesh()) return all_gather.emitOpError() << "source and target layout are not both on tpu"; - update_result = builder.create( - loc, zeros.getType(), /*input=*/zeros, + update_result = mlir::TF::XlaDynamicUpdateSliceOp::create( + builder, loc, zeros.getType(), /*input=*/zeros, /*update=*/all_gather.getInput(), /*indices=*/begin); } else { - update_result = builder.create( - loc, zeros.getType(), + update_result = mlir::TF::TensorStridedSliceUpdateOp::create( + builder, loc, zeros.getType(), /*input=*/zeros, begin, end, strides, /*value=*/all_gather.getInput()); } @@ -1062,9 +1069,9 @@ mlir::LogicalResult LowerAllGatherOp(mlir::TF::DTensorAllGatherOp all_gather) { absl::string_view reduce_type = kReduceOpAdd; if (type && type.getElementType().isInteger(1)) reduce_type = kReduceOpAny; mlir::TF::DTensorAllReduceOp all_reduce = - builder.create( - loc, update_result.getType(), update_result, - builder.create(loc, group_assignment), + mlir::TF::DTensorAllReduceOp::create( + builder, loc, update_result.getType(), update_result, + mlir::TF::ConstOp::create(builder, loc, group_assignment), builder.getStringAttr(std::string(reduce_type)), builder.getStringAttr(device_type)); SetSingleLayoutOnOp(all_reduce, tgt_layout); @@ -1146,12 +1153,12 @@ mlir::LogicalResult LowerAllScatterOp( mlir::Attribute matrix_attr = mlir::DenseIntElementsAttr::get(matrix_type, matrix); mlir::Value matrix_value = - builder.create(all_scatter.getLoc(), matrix_attr) + mlir::TF::ConstOp::create(builder, all_scatter.getLoc(), matrix_attr) .getResult(); // Compute the offset from mult_matrix_value and mesh_coordinates. - mlir::TF::MatMulOp offset = builder.create( - all_scatter.getLoc(), + mlir::TF::MatMulOp offset = mlir::TF::MatMulOp::create( + builder, all_scatter.getLoc(), mlir::RankedTensorType::get({1, original_layout.rank()}, builder.getIntegerType(32)), mesh_coordinates, matrix_value); @@ -1164,14 +1171,14 @@ mlir::LogicalResult LowerAllScatterOp( } // Input to slice needs to be rank 1, so we need to squeeze it. - mlir::TF::SqueezeOp offset_squeezed = builder.create( - all_scatter.getLoc(), + mlir::TF::SqueezeOp offset_squeezed = mlir::TF::SqueezeOp::create( + builder, all_scatter.getLoc(), mlir::RankedTensorType::get({original_layout.rank()}, builder.getIntegerType(32)), offset.getProduct(), builder.getI64ArrayAttr({0})); - auto result = builder.create( - all_scatter.getLoc(), output_type, all_scatter.getInput(), + auto result = mlir::TF::SliceOp::create( + builder, all_scatter.getLoc(), output_type, all_scatter.getInput(), offset_squeezed.getOutput(), slice_shape_value); SetSingleLayoutOnOp(result, desired_layout); @@ -1231,9 +1238,9 @@ mlir::LogicalResult LowerAllToAllOp(mlir::TF::DTensorAllToAllOp all_to_all) { if (mlir::StringRef(device_type).ends_with("TPU")) { // For TPUs, lower to XlaAllToAll. - mlir::Operation* xla_all_to_all = builder.create( - loc, all_to_all.getResult().getType(), all_to_all.getInput(), - builder.create(loc, group_assignment), + mlir::Operation* xla_all_to_all = mlir::TF::AllToAllOp::create( + builder, loc, all_to_all.getResult().getType(), all_to_all.getInput(), + mlir::TF::ConstOp::create(builder, loc, group_assignment), concat_dimension, split_dimension, group_size); SetSingleLayoutOnOp(xla_all_to_all, tgt_layout); all_to_all.replaceAllUsesWith(xla_all_to_all); diff --git a/tensorflow/dtensor/mlir/utils/update_tpu_metadata.cc b/tensorflow/dtensor/mlir/utils/update_tpu_metadata.cc index 6eaeacee29f611..d8a7bcd9705521 100644 --- a/tensorflow/dtensor/mlir/utils/update_tpu_metadata.cc +++ b/tensorflow/dtensor/mlir/utils/update_tpu_metadata.cc @@ -254,8 +254,8 @@ mlir::LogicalResult UpdateTPUCompileMetadata(const Mesh& mesh_config, if (mesh_config.use_xla_spmd()) { // Create a new compile op with the appropriate new number of operands. builder->setInsertionPointAfter(compile); - auto new_compile_op = builder->create( - compile.getLoc(), compile.getCompilationStatus().getType(), + auto new_compile_op = mlir::TF::_TPUCompileMlirOp::create( + *builder, compile.getLoc(), compile.getCompilationStatus().getType(), /*program=*/ llvm::SmallVector( mesh_config.num_devices(), diff --git a/tensorflow/dtensor/mlir/value_utils.cc b/tensorflow/dtensor/mlir/value_utils.cc index e9240996904fd0..9ecdfa424ac723 100644 --- a/tensorflow/dtensor/mlir/value_utils.cc +++ b/tensorflow/dtensor/mlir/value_utils.cc @@ -103,18 +103,18 @@ mlir::Value ReshapeSizeTypeToScalar(mlir::OpBuilder builder, mlir::Location loc, mlir::RankedTensorType::get({}, builder.getIntegerType(32)); mlir::Value scalar_shape = ops_util::GetR1Const(scalar_type.getShape(), builder, loc); - return builder.create( - loc, mlir::ArrayRef{scalar_type}, + return mlir::TF::ReshapeOp::create( + builder, loc, mlir::ArrayRef{scalar_type}, mlir::ArrayRef{tensor, scalar_shape}); } mlir::Value IntConst(mlir::OpBuilder& builder, mlir::Location loc, - llvm::ArrayRef values) { + llvm::ArrayRef values) { auto const_type = mlir::RankedTensorType::get( {static_cast(values.size())}, builder.getIntegerType(32)); mlir::Attribute const_attr = mlir::DenseIntElementsAttr::get(const_type, values); - return builder.create(loc, const_attr).getResult(); + return mlir::TF::ConstOp::create(builder, loc, const_attr).getResult(); } StatusOr> GetTFShapeFromType(mlir::Type type) { @@ -133,7 +133,7 @@ mlir::Value Int64Const(mlir::OpBuilder& builder, mlir::Location loc, {static_cast(values.size())}, builder.getIntegerType(64)); mlir::Attribute const_attr = mlir::DenseIntElementsAttr::get(const_type, values); - return builder.create(loc, const_attr).getResult(); + return mlir::TF::ConstOp::create(builder, loc, const_attr).getResult(); } mlir::Value FloatConst(mlir::OpBuilder& builder, mlir::Location loc, @@ -142,16 +142,17 @@ mlir::Value FloatConst(mlir::OpBuilder& builder, mlir::Location loc, {static_cast(values.size())}, builder.getF32Type()); mlir::Attribute const_attr = mlir::DenseFPElementsAttr::get(const_type, values); - return builder.create(loc, const_attr).getResult(); + return mlir::TF::ConstOp::create(builder, loc, const_attr).getResult(); } mlir::Value StringScalarConst(mlir::OpBuilder& builder, mlir::Location loc, llvm::StringRef value) { - return builder.create( - loc, mlir::DenseStringElementsAttr::get( - mlir::RankedTensorType::get( - {}, builder.getType()), - value)); + return mlir::TF::ConstOp::create( + builder, loc, + mlir::DenseStringElementsAttr::get( + mlir::RankedTensorType::get({}, + builder.getType()), + value)); } mlir::Value StringConst(mlir::OpBuilder& builder, mlir::Location loc, @@ -161,7 +162,7 @@ mlir::Value StringConst(mlir::OpBuilder& builder, mlir::Location loc, builder.getType()); mlir::Attribute const_attr = mlir::DenseStringElementsAttr::get(const_type, values); - return builder.create(loc, const_attr).getResult(); + return mlir::TF::ConstOp::create(builder, loc, const_attr).getResult(); } mlir::Value IntConstWithMatchingType(mlir::OpBuilder& builder, @@ -171,7 +172,7 @@ mlir::Value IntConstWithMatchingType(mlir::OpBuilder& builder, if (llvm::cast(type).getElementType().isInteger(64)) { return Int64Const(builder, loc, values); } else { - llvm::SmallVector values32(values.begin(), values.end()); + llvm::SmallVector values32(values.begin(), values.end()); return IntConst(builder, loc, values32); } } @@ -213,14 +214,16 @@ absl::Status ExtractConstVectorFromValue( mlir::Value CreateIntScalarConst(const int64_t value, mlir::OpBuilder builder, mlir::Location loc, bool use_int64) { if (use_int64) { - return builder.create( - loc, mlir::DenseIntElementsAttr::get( - mlir::RankedTensorType::get({}, builder.getI64Type()), value)); + return mlir::TF::ConstOp::create( + builder, loc, + mlir::DenseIntElementsAttr::get( + mlir::RankedTensorType::get({}, builder.getI64Type()), value)); } else { - return builder.create( - loc, mlir::DenseIntElementsAttr::get( - mlir::RankedTensorType::get({}, builder.getI32Type()), - static_cast(value))); + return mlir::TF::ConstOp::create( + builder, loc, + mlir::DenseIntElementsAttr::get( + mlir::RankedTensorType::get({}, builder.getI32Type()), + static_cast(value))); } } @@ -228,32 +231,32 @@ StatusOr CreateZeroScalarConst(mlir::OpBuilder& builder, mlir::Location loc, mlir::Type type) { if (type.isF64()) { - return builder - .create( - loc, mlir::DenseFPElementsAttr::get( - mlir::RankedTensorType::get({}, builder.getF64Type()), - static_cast(0.))) + return mlir::TF::ConstOp::create( + builder, loc, + mlir::DenseFPElementsAttr::get( + mlir::RankedTensorType::get({}, builder.getF64Type()), + static_cast(0.))) .getResult(); } else if (type.isF32()) { - return builder - .create( - loc, mlir::DenseFPElementsAttr::get( - mlir::RankedTensorType::get({}, builder.getF32Type()), - static_cast(0.f))) + return mlir::TF::ConstOp::create( + builder, loc, + mlir::DenseFPElementsAttr::get( + mlir::RankedTensorType::get({}, builder.getF32Type()), + static_cast(0.f))) .getResult(); } else if (type.isInteger(32)) { - return builder - .create( - loc, mlir::DenseIntElementsAttr::get( - mlir::RankedTensorType::get({}, builder.getI32Type()), - static_cast(0))) + return mlir::TF::ConstOp::create( + builder, loc, + mlir::DenseIntElementsAttr::get( + mlir::RankedTensorType::get({}, builder.getI32Type()), + static_cast(0))) .getResult(); } else if (type.isInteger(64)) { - return builder - .create( - loc, mlir::DenseIntElementsAttr::get( - mlir::RankedTensorType::get({}, builder.getI64Type()), - static_cast(0))) + return mlir::TF::ConstOp::create( + builder, loc, + mlir::DenseIntElementsAttr::get( + mlir::RankedTensorType::get({}, builder.getI64Type()), + static_cast(0))) .getResult(); } else { return errors::InvalidArgument( @@ -270,8 +273,9 @@ StatusOr SelectScalarValueFromArray(mlir::OpBuilder& builder, return errors::InvalidArgument("Input array must have shape [1, N]."); } - mlir::TF::SliceOp sliced_value = builder.create( - location, mlir::RankedTensorType::get({1, 1}, arrayType.getElementType()), + mlir::TF::SliceOp sliced_value = mlir::TF::SliceOp::create( + builder, location, + mlir::RankedTensorType::get({1, 1}, arrayType.getElementType()), /*input=*/array, /*begin=*/IntConst(builder, location, {0, index}), /*size=*/IntConst(builder, location, {1, 1})); @@ -281,8 +285,8 @@ StatusOr SelectScalarValueFromArray(mlir::OpBuilder& builder, mlir::RankedTensorType::get({}, builder.getIntegerType(32)); mlir::Value scalar_shape = mlir::TF::collection_ops_util::GetR1Const( scalar_size_type.getShape(), builder, location); - mlir::Value scalar_sliced_value = builder.create( - location, mlir::ArrayRef{scalar_size_type}, + mlir::Value scalar_sliced_value = mlir::TF::ReshapeOp::create( + builder, location, mlir::ArrayRef{scalar_size_type}, mlir::ArrayRef{sliced_value.getOutput(), scalar_shape}, mlir::ArrayRef{}); return scalar_sliced_value; diff --git a/tensorflow/dtensor/mlir/value_utils.h b/tensorflow/dtensor/mlir/value_utils.h index 804683bc56a2cc..9775f57c79db11 100644 --- a/tensorflow/dtensor/mlir/value_utils.h +++ b/tensorflow/dtensor/mlir/value_utils.h @@ -48,7 +48,7 @@ StatusOr> GetTFShapeFromType(mlir::Type type); // Return a 1-D int32 constant array with the given values. mlir::Value IntConst(mlir::OpBuilder& builder, mlir::Location loc, - llvm::ArrayRef values); + llvm::ArrayRef values); // Return a 1-D int64 constant array with the given values. mlir::Value Int64Const(mlir::OpBuilder& builder, mlir::Location loc, llvm::ArrayRef values); diff --git a/tensorflow/dtensor/python/tests/BUILD b/tensorflow/dtensor/python/tests/BUILD index 9b38fcdeb48bb0..38c84bc127ef90 100644 --- a/tensorflow/dtensor/python/tests/BUILD +++ b/tensorflow/dtensor/python/tests/BUILD @@ -746,9 +746,6 @@ dtensor_test( "tpu": 10, TPU_V3_DONUT_BACKEND: 32, }, - tags = [ - "cuda-only", - ], deps = [ ":test_util", "//tensorflow/dtensor/python:api", @@ -802,7 +799,6 @@ dtensor_test( }, tags = [ "no_oss_py38", # TODO(b/267017937) - "cuda-only", ], deps = [ ":test_util", diff --git a/tensorflow/lite/CMakeLists.txt b/tensorflow/lite/CMakeLists.txt index 1249b5c01e321f..7e877cd92f1cd1 100644 --- a/tensorflow/lite/CMakeLists.txt +++ b/tensorflow/lite/CMakeLists.txt @@ -749,9 +749,11 @@ add_library(tensorflow-lite set(_ALL_TFLITE_HDRS ${_ALL_TFLITE_SRCS}) list(FILTER _ALL_TFLITE_HDRS INCLUDE REGEX ".*\\.h$") target_include_directories(tensorflow-lite - PUBLIC $ $ - PUBLIC ${CMAKE_CURRENT_BINARY_DIR} - PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/.. + PUBLIC + $ + $ + $ + $ ) target_link_libraries(tensorflow-lite PUBLIC @@ -879,7 +881,9 @@ target_compile_options(_pywrap_tensorflow_interpreter_wrapper PRIVATE ${TFLITE_TARGET_PRIVATE_OPTIONS} ) -target_compile_options(xnnpack-delegate - PUBLIC ${TFLITE_TARGET_PUBLIC_OPTIONS} - PRIVATE ${TFLITE_TARGET_PRIVATE_OPTIONS} -) \ No newline at end of file +if(TFLITE_ENABLE_XNNPACK) + target_compile_options(xnnpack-delegate + PUBLIC ${TFLITE_TARGET_PUBLIC_OPTIONS} + PRIVATE ${TFLITE_TARGET_PRIVATE_OPTIONS} + ) +endif() \ No newline at end of file diff --git a/tensorflow/lite/core/c/common.h b/tensorflow/lite/core/c/common.h index 765c2bc12f2d7d..2c2e703735ef07 100644 --- a/tensorflow/lite/core/c/common.h +++ b/tensorflow/lite/core/c/common.h @@ -1352,7 +1352,15 @@ typedef enum TfLiteDelegateFlags { /// operator information using `Profiler::EventType::OPERATOR_INVOKE_EVENT` /// and the results will appear in the operator-wise Profiling section and not /// in the Delegate internal section. - kTfLiteDelegateFlagsPerOperatorProfiling = 4 + kTfLiteDelegateFlagsPerOperatorProfiling = 4, + + // This flag can be used by callers to hint that the delegate is likely to + // delegate the entire graph to a single delegate so certain allocations can + // be skipped. + // This is an ADVANCED feature and should only be used if the caller has + // prior knowledge that the delegate will fully delegate all subgraphs + // to a single delegate. + kTfLiteDelegateFlagsHintFullyDelegatedToSingleDelegate = 8, } TfLiteDelegateFlags; /// WARNING: This is an experimental interface that is subject to change. diff --git a/tensorflow/lite/core/subgraph.cc b/tensorflow/lite/core/subgraph.cc index 996d36b7e9725f..4d28de5a21ca2a 100644 --- a/tensorflow/lite/core/subgraph.cc +++ b/tensorflow/lite/core/subgraph.cc @@ -2489,9 +2489,11 @@ TfLiteStatus Subgraph::ModifyGraphWithDelegateImpl(TfLiteDelegate* delegate) { // Restore delegation state if applicable. TF_LITE_ENSURE_STATUS(RedoAllDelegates()); + int64_t delegate_flags = TfLiteDelegateGetFlagsInternal(delegate); const bool delegate_supports_dynamic_shapes = - TfLiteDelegateGetFlagsInternal(delegate) & - kTfLiteDelegateFlagsAllowDynamicTensors; + delegate_flags & kTfLiteDelegateFlagsAllowDynamicTensors; + const bool hint_fully_delegated_to_single_delegate = + delegate_flags & kTfLiteDelegateFlagsHintFullyDelegatedToSingleDelegate; const auto pre_delegation_state = state_; if (state_ == kStateInvokableAndImmutable) { @@ -2500,7 +2502,8 @@ TfLiteStatus Subgraph::ModifyGraphWithDelegateImpl(TfLiteDelegate* delegate) { // tensors. // Reset the state to force tensor/op reallocation. state_ = kStateUninvokable; - } else if (!delegate_supports_dynamic_shapes) { + } else if (!delegate_supports_dynamic_shapes && + !hint_fully_delegated_to_single_delegate) { // Check if graph has dynamic tensors by preparing ops. int last_execution_plan_index_prepared; TF_LITE_ENSURE_STATUS(PrepareOpsStartingAt( @@ -2533,15 +2536,25 @@ TfLiteStatus Subgraph::ModifyGraphWithDelegateImpl(TfLiteDelegate* delegate) { SwitchToKernelContext(); TF_LITE_ENSURE_STATUS(reset_delegation_if_not_ok(status)); + if (hint_fully_delegated_to_single_delegate && !IsFullyDelegated()) { + ReportError( + "Hint fully delegated to single delegate is set, but the graph is not " + "fully delegated."); + return kTfLiteApplicationError; + } + // STEP 3: Leave graph in consistent state based on delegate & previous state. // =========================================================================== if (!delegate_supports_dynamic_shapes) { // CASE 1: Current delegate does not support dynamic shapes. // Reset the state to force tensor/op reallocation. - state_ = kStateUninvokable; - TF_LITE_ENSURE_STATUS( - reset_delegation_if_not_ok(EnsureMemoryAllocations())); + if (!hint_fully_delegated_to_single_delegate) { + state_ = kStateUninvokable; + TF_LITE_ENSURE_STATUS( + reset_delegation_if_not_ok(EnsureMemoryAllocations())); + } + // After using a delegate which doesn't support dynamic tensors, make the // entire graph immutable. state_ = kStateInvokableAndImmutable; diff --git a/tensorflow/lite/delegates/serialization.cc b/tensorflow/lite/delegates/serialization.cc index 0c26589e19bf96..fec19eb6ac34a2 100644 --- a/tensorflow/lite/delegates/serialization.cc +++ b/tensorflow/lite/delegates/serialization.cc @@ -47,6 +47,12 @@ namespace { static const char kDelegatedNodesSuffix[] = "_dnodes"; +#if defined(_WIN32) +static const char kPathSeparator = '\\'; +#else +static const char kPathSeparator = '/'; +#endif // defined(_WIN32) + // Farmhash Fingerprint inline uint64_t CombineFingerprints(uint64_t l, uint64_t h) { // Murmur-inspired hashing. @@ -63,7 +69,8 @@ inline uint64_t CombineFingerprints(uint64_t l, uint64_t h) { inline std::string JoinPath(const std::string& path1, const std::string& path2) { - return (path1.back() == '/') ? (path1 + path2) : (path1 + "/" + path2); + return (path1.back() == kPathSeparator) ? (path1 + path2) + : (path1 + kPathSeparator + path2); } inline std::string GetFilePath(const std::string& cache_dir, diff --git a/tensorflow/lite/delegates/xnnpack/BUILD b/tensorflow/lite/delegates/xnnpack/BUILD index a2715c519798a5..02d51f21d4fa4e 100644 --- a/tensorflow/lite/delegates/xnnpack/BUILD +++ b/tensorflow/lite/delegates/xnnpack/BUILD @@ -333,6 +333,16 @@ cc_library( ], ) +cc_library( + name = "macros", + hdrs = ["macros.h"], + compatible_with = get_compatible_with_portable(), + copts = tflite_copts(), + deps = [ + "//tensorflow/lite:minimal_logging", + ], +) + flatbuffer_cc_library( name = "weight_cache_schema", srcs = ["weight_cache_schema.fbs"], @@ -350,8 +360,10 @@ cc_library( compatible_with = get_compatible_with_portable(), deps = [ ":file_util", + ":macros", ":mmap_handle", ":weight_cache_schema", + "//tensorflow/lite:logger", "//tensorflow/lite:minimal_logging", "//tensorflow/lite/c:common", "@XNNPACK", @@ -380,6 +392,21 @@ cc_library( ], ) +cc_library( + name = "fingerprint_test_helpers", + testonly = True, + hdrs = ["fingerprint_test_helpers.h"], + compatible_with = get_compatible_with_portable(), + deps = [ + ":weight_cache", + ":weight_cache_test_helpers", + ":xnnpack_delegate_hdrs_only", + "//tensorflow/lite/c:common", + "@XNNPACK", + "@com_google_googletest//:gtest", + ], +) + cc_library( name = "mmap_handle", srcs = ["mmap_handle.cc"], @@ -387,8 +414,8 @@ cc_library( compatible_with = get_compatible_with_portable(), deps = [ ":file_util", + ":macros", ":windows_util", - "//tensorflow/lite:minimal_logging", ], ) @@ -409,6 +436,7 @@ cc_library( hdrs = ["file_util.h"], compatible_with = get_compatible_with_portable(), deps = [ + ":macros", "//tensorflow/lite:minimal_logging", ], ) @@ -1334,6 +1362,7 @@ cc_test( "//conditions:default": [], }), deps = [ + ":fingerprint_test_helpers", ":quantized_conv_2d_tester", ":test_main", ":xnnpack_delegate_test_mode", @@ -1350,6 +1379,7 @@ cc_test( "//conditions:default": [], }), deps = [ + ":fingerprint_test_helpers", ":quantized_depthwise_conv_2d_tester", ":test_main", ":xnnpack_delegate_test_mode", @@ -1384,6 +1414,7 @@ cc_test( }), deps = [ ":conv_2d_tester", + ":fingerprint_test_helpers", ":test_main", ":xnnpack_delegate_test_mode", "//tensorflow/lite/c:c_api_types", @@ -1433,6 +1464,7 @@ cc_test( }), deps = [ ":depthwise_conv_2d_tester", + ":fingerprint_test_helpers", ":test_main", ":xnnpack_delegate_test_mode", "//tensorflow/lite/c:c_api_types", @@ -1452,6 +1484,7 @@ cc_test( tags = ["notap"], deps = [ ":dynamically_quantized_fully_connected_tester", + ":fingerprint_test_helpers", ":test_main", ":xnnpack_delegate_test_mode", "//tensorflow/lite/c:c_api_types", @@ -1468,6 +1501,7 @@ cc_test( }), deps = [ ":dynamically_quantized_conv_2d_tester", + ":fingerprint_test_helpers", ":test_main", ":xnnpack_delegate_test_mode", "//tensorflow/lite/c:c_api_types", @@ -1484,6 +1518,7 @@ cc_test( }), deps = [ ":dynamically_quantized_transpose_conv_tester", + ":fingerprint_test_helpers", ":test_main", ":xnnpack_delegate_test_mode", "//tensorflow/lite/c:c_api_types", @@ -1500,10 +1535,14 @@ cc_test( "//conditions:default": [], }), deps = [ + ":fingerprint_test_helpers", ":fully_connected_tester", ":test_main", + ":weight_cache", + ":weight_cache_test_helpers", ":xnnpack_delegate_test_mode", "//tensorflow/lite/c:c_api_types", + "@XNNPACK", "@com_google_googletest//:gtest", ], ) @@ -1851,6 +1890,7 @@ cc_test( "//conditions:default": [], }), deps = [ + ":fingerprint_test_helpers", ":quantized_conv_2d_tester", ":test_main", ":xnnpack_delegate_test_mode", @@ -1867,6 +1907,7 @@ cc_test( "//conditions:default": [], }), deps = [ + ":fingerprint_test_helpers", ":quantized_depthwise_conv_2d_tester", ":test_main", ":xnnpack_delegate_test_mode", @@ -1917,6 +1958,7 @@ cc_test( "//conditions:default": [], }), deps = [ + ":fingerprint_test_helpers", ":quantized_fully_connected_tester", ":test_main", ":xnnpack_delegate_test_mode", @@ -2150,6 +2192,7 @@ cc_test( "//conditions:default": [], }), deps = [ + ":fingerprint_test_helpers", ":quantized_transpose_conv_tester", ":test_main", ":xnnpack_delegate_test_mode", @@ -2294,6 +2337,7 @@ cc_test( "//conditions:default": [], }), deps = [ + ":fingerprint_test_helpers", ":test_main", ":transpose_conv_tester", ":xnnpack_delegate_test_mode", @@ -2373,6 +2417,7 @@ cc_test( "//conditions:default": [], }), deps = [ + ":fingerprint_test_helpers", ":quantized_conv_2d_tester", ":test_main", ":xnnpack_delegate_test_mode", @@ -2388,6 +2433,7 @@ cc_test( "//conditions:default": [], }), deps = [ + ":fingerprint_test_helpers", ":quantized_depthwise_conv_2d_tester", ":test_main", ":xnnpack_delegate_test_mode", @@ -2418,6 +2464,7 @@ cc_test( "//conditions:default": [], }), deps = [ + ":fingerprint_test_helpers", ":quantized_fully_connected_tester", ":test_main", ":xnnpack_delegate_test_mode", @@ -2628,6 +2675,7 @@ cc_test( "//conditions:default": [], }), deps = [ + ":fingerprint_test_helpers", ":quantized_transpose_conv_tester", ":test_main", ":xnnpack_delegate_test_mode", diff --git a/tensorflow/lite/delegates/xnnpack/channelwise_quantized_conv_2d_test.cc b/tensorflow/lite/delegates/xnnpack/channelwise_quantized_conv_2d_test.cc index 92293e08227593..d195d4f25435e8 100644 --- a/tensorflow/lite/delegates/xnnpack/channelwise_quantized_conv_2d_test.cc +++ b/tensorflow/lite/delegates/xnnpack/channelwise_quantized_conv_2d_test.cc @@ -24,17 +24,16 @@ limitations under the License. #include #include "tensorflow/lite/c/c_api_types.h" +#include "tensorflow/lite/delegates/xnnpack/fingerprint_test_helpers.h" #include "tensorflow/lite/delegates/xnnpack/quantized_conv_2d_tester.h" #include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h" namespace tflite { namespace xnnpack { -TEST(ChannelwiseQuantizedConv2D, 1x1) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); +struct ChannelwiseQuantizedConv2D : DelegateTest {}; +TEST_F(ChannelwiseQuantizedConv2D, 1x1) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -71,11 +70,7 @@ TEST(ChannelwiseQuantizedConv2D, 1x1) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedConv2D, 3x3) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(ChannelwiseQuantizedConv2D, 3x3) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -112,11 +107,7 @@ TEST(ChannelwiseQuantizedConv2D, 3x3) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedConv2D, 3x3Stride2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(ChannelwiseQuantizedConv2D, 3x3Stride2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -155,11 +146,7 @@ TEST(ChannelwiseQuantizedConv2D, 3x3Stride2) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedConv2D, SmallKernelWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(ChannelwiseQuantizedConv2D, SmallKernelWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -198,11 +185,7 @@ TEST(ChannelwiseQuantizedConv2D, SmallKernelWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedConv2D, SmallKernelWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(ChannelwiseQuantizedConv2D, SmallKernelWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -241,11 +224,7 @@ TEST(ChannelwiseQuantizedConv2D, SmallKernelWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedConv2D, StrideWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(ChannelwiseQuantizedConv2D, StrideWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -288,11 +267,7 @@ TEST(ChannelwiseQuantizedConv2D, StrideWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedConv2D, StrideWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(ChannelwiseQuantizedConv2D, StrideWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -335,11 +310,7 @@ TEST(ChannelwiseQuantizedConv2D, StrideWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedConv2D, DilationWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(ChannelwiseQuantizedConv2D, DilationWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -382,11 +353,7 @@ TEST(ChannelwiseQuantizedConv2D, DilationWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedConv2D, DilationWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(ChannelwiseQuantizedConv2D, DilationWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -429,11 +396,7 @@ TEST(ChannelwiseQuantizedConv2D, DilationWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedConv2D, ReluActivation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(ChannelwiseQuantizedConv2D, ReluActivation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -476,11 +439,7 @@ TEST(ChannelwiseQuantizedConv2D, ReluActivation) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedConv2D, Relu6Activation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(ChannelwiseQuantizedConv2D, Relu6Activation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -523,11 +482,7 @@ TEST(ChannelwiseQuantizedConv2D, Relu6Activation) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedConv2D, ReluMinus1To1Activation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(ChannelwiseQuantizedConv2D, ReluMinus1To1Activation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -570,13 +525,11 @@ TEST(ChannelwiseQuantizedConv2D, ReluMinus1To1Activation) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedConv2D, MultiThreading) { +TEST_F(ChannelwiseQuantizedConv2D, MultiThreading) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); delegate_options.num_threads = 2; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); @@ -619,7 +572,7 @@ TEST(ChannelwiseQuantizedConv2D, MultiThreading) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedConv2D, WeightsCache) { +TEST_F(ChannelwiseQuantizedConv2D, WeightsCache) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); @@ -673,15 +624,13 @@ TEST(ChannelwiseQuantizedConv2D, WeightsCache) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedConv2D, TransientIndirectionBuffer) { +TEST_F(ChannelwiseQuantizedConv2D, TransientIndirectionBuffer) { TfLiteXNNPackDelegateOptions xnnpack_options = TfLiteXNNPackDelegateOptionsDefault(); xnnpack_options.num_threads = 2; xnnpack_options.flags |= TFLITE_XNNPACK_DELEGATE_FLAG_TRANSIENT_INDIRECTION_BUFFER; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&xnnpack_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(xnnpack_options); std::random_device random_device; auto rng = std::mt19937(random_device()); diff --git a/tensorflow/lite/delegates/xnnpack/channelwise_quantized_depthwise_conv_2d_test.cc b/tensorflow/lite/delegates/xnnpack/channelwise_quantized_depthwise_conv_2d_test.cc index 25dada01896c34..0c6de84e9a8d2f 100644 --- a/tensorflow/lite/delegates/xnnpack/channelwise_quantized_depthwise_conv_2d_test.cc +++ b/tensorflow/lite/delegates/xnnpack/channelwise_quantized_depthwise_conv_2d_test.cc @@ -23,18 +23,16 @@ limitations under the License. #include #include -#include "tensorflow/lite/c/c_api_types.h" +#include "tensorflow/lite/delegates/xnnpack/fingerprint_test_helpers.h" #include "tensorflow/lite/delegates/xnnpack/quantized_depthwise_conv_2d_tester.h" #include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h" namespace tflite { namespace xnnpack { -TEST(ChannelwiseQuantizedDepthwiseConv2D, 1x1) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); +struct ChannelwiseQuantizedDepthwiseConv2D : DelegateTest {}; +TEST_F(ChannelwiseQuantizedDepthwiseConv2D, 1x1) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -66,11 +64,7 @@ TEST(ChannelwiseQuantizedDepthwiseConv2D, 1x1) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedDepthwiseConv2D, 2x2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(ChannelwiseQuantizedDepthwiseConv2D, 2x2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -103,11 +97,7 @@ TEST(ChannelwiseQuantizedDepthwiseConv2D, 2x2) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedDepthwiseConv2D, 3x3) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(ChannelwiseQuantizedDepthwiseConv2D, 3x3) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -140,11 +130,7 @@ TEST(ChannelwiseQuantizedDepthwiseConv2D, 3x3) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedDepthwiseConv2D, 3x3Stride2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(ChannelwiseQuantizedDepthwiseConv2D, 3x3Stride2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -179,11 +165,7 @@ TEST(ChannelwiseQuantizedDepthwiseConv2D, 3x3Stride2) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedDepthwiseConv2D, 5x5) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(ChannelwiseQuantizedDepthwiseConv2D, 5x5) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -216,11 +198,7 @@ TEST(ChannelwiseQuantizedDepthwiseConv2D, 5x5) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedDepthwiseConv2D, 5x5Stride2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(ChannelwiseQuantizedDepthwiseConv2D, 5x5Stride2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -255,11 +233,7 @@ TEST(ChannelwiseQuantizedDepthwiseConv2D, 5x5Stride2) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedDepthwiseConv2D, SmallKernelWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(ChannelwiseQuantizedDepthwiseConv2D, SmallKernelWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -297,11 +271,7 @@ TEST(ChannelwiseQuantizedDepthwiseConv2D, SmallKernelWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedDepthwiseConv2D, SmallKernelWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(ChannelwiseQuantizedDepthwiseConv2D, SmallKernelWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -339,11 +309,7 @@ TEST(ChannelwiseQuantizedDepthwiseConv2D, SmallKernelWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedDepthwiseConv2D, StrideWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(ChannelwiseQuantizedDepthwiseConv2D, StrideWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -385,11 +351,7 @@ TEST(ChannelwiseQuantizedDepthwiseConv2D, StrideWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedDepthwiseConv2D, StrideWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(ChannelwiseQuantizedDepthwiseConv2D, StrideWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -431,11 +393,7 @@ TEST(ChannelwiseQuantizedDepthwiseConv2D, StrideWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedDepthwiseConv2D, DilationWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(ChannelwiseQuantizedDepthwiseConv2D, DilationWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -477,11 +435,7 @@ TEST(ChannelwiseQuantizedDepthwiseConv2D, DilationWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedDepthwiseConv2D, DilationWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(ChannelwiseQuantizedDepthwiseConv2D, DilationWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -523,11 +477,7 @@ TEST(ChannelwiseQuantizedDepthwiseConv2D, DilationWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedDepthwiseConv2D, DepthMultiplier) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(ChannelwiseQuantizedDepthwiseConv2D, DepthMultiplier) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -573,11 +523,7 @@ TEST(ChannelwiseQuantizedDepthwiseConv2D, DepthMultiplier) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedDepthwiseConv2D, ReluActivation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(ChannelwiseQuantizedDepthwiseConv2D, ReluActivation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -619,11 +565,7 @@ TEST(ChannelwiseQuantizedDepthwiseConv2D, ReluActivation) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedDepthwiseConv2D, Relu6Activation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(ChannelwiseQuantizedDepthwiseConv2D, Relu6Activation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -665,11 +607,7 @@ TEST(ChannelwiseQuantizedDepthwiseConv2D, Relu6Activation) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedDepthwiseConv2D, ReluMinus1To1Activation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(ChannelwiseQuantizedDepthwiseConv2D, ReluMinus1To1Activation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto scale_rng = std::bind( @@ -711,13 +649,11 @@ TEST(ChannelwiseQuantizedDepthwiseConv2D, ReluMinus1To1Activation) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedDepthwiseConv2D, MultiThreading) { +TEST_F(ChannelwiseQuantizedDepthwiseConv2D, MultiThreading) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); delegate_options.num_threads = 2; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); @@ -759,7 +695,7 @@ TEST(ChannelwiseQuantizedDepthwiseConv2D, MultiThreading) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedDepthwiseConv2D, WeightsCache) { +TEST_F(ChannelwiseQuantizedDepthwiseConv2D, WeightsCache) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); @@ -812,15 +746,13 @@ TEST(ChannelwiseQuantizedDepthwiseConv2D, WeightsCache) { .Test(xnnpack_delegate.get()); } -TEST(ChannelwiseQuantizedDepthwiseConv2D, TransientIndirectionBuffer) { +TEST_F(ChannelwiseQuantizedDepthwiseConv2D, TransientIndirectionBuffer) { TfLiteXNNPackDelegateOptions xnnpack_options = TfLiteXNNPackDelegateOptionsDefault(); xnnpack_options.num_threads = 2; xnnpack_options.flags |= TFLITE_XNNPACK_DELEGATE_FLAG_TRANSIENT_INDIRECTION_BUFFER; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&xnnpack_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(xnnpack_options); std::random_device random_device; auto rng = std::mt19937(random_device()); diff --git a/tensorflow/lite/delegates/xnnpack/conv_2d_test.cc b/tensorflow/lite/delegates/xnnpack/conv_2d_test.cc index 25090bbaf2b5cf..e1b5a674946b73 100644 --- a/tensorflow/lite/delegates/xnnpack/conv_2d_test.cc +++ b/tensorflow/lite/delegates/xnnpack/conv_2d_test.cc @@ -19,18 +19,16 @@ limitations under the License. #include #include -#include "tensorflow/lite/c/c_api_types.h" #include "tensorflow/lite/delegates/xnnpack/conv_2d_tester.h" +#include "tensorflow/lite/delegates/xnnpack/fingerprint_test_helpers.h" #include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h" namespace tflite { namespace xnnpack { -TEST(Conv2D, 1x1) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); +struct Conv2D : DelegateTest {}; +TEST_F(Conv2D, 1x1) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -52,11 +50,7 @@ TEST(Conv2D, 1x1) { .Test(xnnpack_delegate.get()); } -TEST(Conv2D, 3x3) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(Conv2D, 3x3) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -78,11 +72,7 @@ TEST(Conv2D, 3x3) { .Test(xnnpack_delegate.get()); } -TEST(Conv2D, 3x3Stride2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(Conv2D, 3x3Stride2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -106,11 +96,7 @@ TEST(Conv2D, 3x3Stride2) { .Test(xnnpack_delegate.get()); } -TEST(Conv2D, Grouped) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(Conv2D, Grouped) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -136,11 +122,7 @@ TEST(Conv2D, Grouped) { .Test(xnnpack_delegate.get()); } -TEST(Conv2D, SmallKernelWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(Conv2D, SmallKernelWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -164,11 +146,7 @@ TEST(Conv2D, SmallKernelWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(Conv2D, SmallKernelWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(Conv2D, SmallKernelWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -192,11 +170,7 @@ TEST(Conv2D, SmallKernelWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(Conv2D, StrideWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(Conv2D, StrideWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -224,11 +198,7 @@ TEST(Conv2D, StrideWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(Conv2D, StrideWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(Conv2D, StrideWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -256,11 +226,7 @@ TEST(Conv2D, StrideWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(Conv2D, DilationWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(Conv2D, DilationWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -288,11 +254,7 @@ TEST(Conv2D, DilationWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(Conv2D, DilationWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(Conv2D, DilationWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -320,11 +282,7 @@ TEST(Conv2D, DilationWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(Conv2D, FP16Weights) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(Conv2D, FP16Weights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -352,11 +310,7 @@ TEST(Conv2D, FP16Weights) { .Test(xnnpack_delegate.get()); } -TEST(Conv2D, TensorWiseQuantizedInt8Weights) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(Conv2D, TensorWiseQuantizedInt8Weights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -384,11 +338,7 @@ TEST(Conv2D, TensorWiseQuantizedInt8Weights) { .Test(xnnpack_delegate.get()); } -TEST(Conv2D, ChannelWiseQuantizedInt8Weights) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(Conv2D, ChannelWiseQuantizedInt8Weights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -416,11 +366,7 @@ TEST(Conv2D, ChannelWiseQuantizedInt8Weights) { .Test(xnnpack_delegate.get()); } -TEST(Conv2D, SparseWeights) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(Conv2D, SparseWeights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -448,11 +394,7 @@ TEST(Conv2D, SparseWeights) { .Test(xnnpack_delegate.get()); } -TEST(Conv2D, SparseFP16Weights) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(Conv2D, SparseFP16Weights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -481,11 +423,7 @@ TEST(Conv2D, SparseFP16Weights) { .Test(xnnpack_delegate.get()); } -TEST(Conv2D, SparseTensorWiseQuantizedInt8Weights) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(Conv2D, SparseTensorWiseQuantizedInt8Weights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -514,11 +452,7 @@ TEST(Conv2D, SparseTensorWiseQuantizedInt8Weights) { .Test(xnnpack_delegate.get()); } -TEST(Conv2D, SparseChannelWiseQuantizedInt8Weights) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(Conv2D, SparseChannelWiseQuantizedInt8Weights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -547,11 +481,7 @@ TEST(Conv2D, SparseChannelWiseQuantizedInt8Weights) { .Test(xnnpack_delegate.get()); } -TEST(Conv2D, ReluActivation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(Conv2D, ReluActivation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -579,11 +509,7 @@ TEST(Conv2D, ReluActivation) { .Test(xnnpack_delegate.get()); } -TEST(Conv2D, Relu6Activation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(Conv2D, Relu6Activation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -611,11 +537,7 @@ TEST(Conv2D, Relu6Activation) { .Test(xnnpack_delegate.get()); } -TEST(Conv2D, ReluMinus1To1Activation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(Conv2D, ReluMinus1To1Activation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -643,11 +565,7 @@ TEST(Conv2D, ReluMinus1To1Activation) { .Test(xnnpack_delegate.get()); } -TEST(Conv2D, DISABLED_TanhActivation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(Conv2D, DISABLED_TanhActivation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -675,11 +593,7 @@ TEST(Conv2D, DISABLED_TanhActivation) { .Test(xnnpack_delegate.get()); } -TEST(Conv2D, DISABLED_SignBitActivation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(Conv2D, DISABLED_SignBitActivation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -707,13 +621,11 @@ TEST(Conv2D, DISABLED_SignBitActivation) { .Test(xnnpack_delegate.get()); } -TEST(Conv2D, MultiThreading) { +TEST_F(Conv2D, MultiThreading) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); delegate_options.num_threads = 2; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); @@ -741,7 +653,7 @@ TEST(Conv2D, MultiThreading) { .Test(xnnpack_delegate.get()); } -TEST(Conv2D, WeightsCache) { +TEST_F(Conv2D, WeightsCache) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); @@ -781,15 +690,13 @@ TEST(Conv2D, WeightsCache) { .Test(xnnpack_delegate.get()); } -TEST(Conv2D, TransientIndirectionBuffer) { - TfLiteXNNPackDelegateOptions xnnpack_options = +TEST_F(Conv2D, TransientIndirectionBuffer) { + TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); - xnnpack_options.num_threads = 2; - xnnpack_options.flags |= + delegate_options.num_threads = 2; + delegate_options.flags |= TFLITE_XNNPACK_DELEGATE_FLAG_TRANSIENT_INDIRECTION_BUFFER; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&xnnpack_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); diff --git a/tensorflow/lite/delegates/xnnpack/depthwise_conv_2d_test.cc b/tensorflow/lite/delegates/xnnpack/depthwise_conv_2d_test.cc index e894bcdc2bc46a..931fff88178dfb 100644 --- a/tensorflow/lite/delegates/xnnpack/depthwise_conv_2d_test.cc +++ b/tensorflow/lite/delegates/xnnpack/depthwise_conv_2d_test.cc @@ -19,18 +19,16 @@ limitations under the License. #include #include -#include "tensorflow/lite/c/c_api_types.h" #include "tensorflow/lite/delegates/xnnpack/depthwise_conv_2d_tester.h" +#include "tensorflow/lite/delegates/xnnpack/fingerprint_test_helpers.h" #include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h" namespace tflite { namespace xnnpack { -TEST(DepthwiseConv2D, 1x1) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); +struct DepthwiseConv2D : DelegateTest {}; +TEST_F(DepthwiseConv2D, 1x1) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto input_rng = @@ -47,11 +45,7 @@ TEST(DepthwiseConv2D, 1x1) { .Test(xnnpack_delegate.get()); } -TEST(DepthwiseConv2D, 2x2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DepthwiseConv2D, 2x2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto input_rng = @@ -69,11 +63,7 @@ TEST(DepthwiseConv2D, 2x2) { .Test(xnnpack_delegate.get()); } -TEST(DepthwiseConv2D, 3x3) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DepthwiseConv2D, 3x3) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto input_rng = @@ -91,11 +81,7 @@ TEST(DepthwiseConv2D, 3x3) { .Test(xnnpack_delegate.get()); } -TEST(DepthwiseConv2D, 3x3Stride2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DepthwiseConv2D, 3x3Stride2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto input_rng = @@ -115,11 +101,7 @@ TEST(DepthwiseConv2D, 3x3Stride2) { .Test(xnnpack_delegate.get()); } -TEST(DepthwiseConv2D, 5x5) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DepthwiseConv2D, 5x5) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto input_rng = @@ -137,11 +119,7 @@ TEST(DepthwiseConv2D, 5x5) { .Test(xnnpack_delegate.get()); } -TEST(DepthwiseConv2D, 5x5Stride2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DepthwiseConv2D, 5x5Stride2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto input_rng = @@ -161,11 +139,7 @@ TEST(DepthwiseConv2D, 5x5Stride2) { .Test(xnnpack_delegate.get()); } -TEST(DepthwiseConv2D, SmallKernelWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DepthwiseConv2D, SmallKernelWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -188,11 +162,7 @@ TEST(DepthwiseConv2D, SmallKernelWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(DepthwiseConv2D, SmallKernelWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DepthwiseConv2D, SmallKernelWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -215,11 +185,7 @@ TEST(DepthwiseConv2D, SmallKernelWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(DepthwiseConv2D, StrideWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DepthwiseConv2D, StrideWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -246,11 +212,7 @@ TEST(DepthwiseConv2D, StrideWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(DepthwiseConv2D, StrideWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DepthwiseConv2D, StrideWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -277,11 +239,7 @@ TEST(DepthwiseConv2D, StrideWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(DepthwiseConv2D, DilationWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DepthwiseConv2D, DilationWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -308,11 +266,7 @@ TEST(DepthwiseConv2D, DilationWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(DepthwiseConv2D, DilationWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DepthwiseConv2D, DilationWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -339,11 +293,7 @@ TEST(DepthwiseConv2D, DilationWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(DepthwiseConv2D, DepthMultiplier) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DepthwiseConv2D, DepthMultiplier) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -372,11 +322,7 @@ TEST(DepthwiseConv2D, DepthMultiplier) { .Test(xnnpack_delegate.get()); } -TEST(DepthwiseConv2D, FP16Weights) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DepthwiseConv2D, FP16Weights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -403,11 +349,7 @@ TEST(DepthwiseConv2D, FP16Weights) { .Test(xnnpack_delegate.get()); } -TEST(DepthwiseConv2D, TensorWiseQuantizedInt8Weights) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DepthwiseConv2D, TensorWiseQuantizedInt8Weights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -434,11 +376,7 @@ TEST(DepthwiseConv2D, TensorWiseQuantizedInt8Weights) { .Test(xnnpack_delegate.get()); } -TEST(DepthwiseConv2D, ChannelWiseQuantizedInt8Weights) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DepthwiseConv2D, ChannelWiseQuantizedInt8Weights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -465,11 +403,7 @@ TEST(DepthwiseConv2D, ChannelWiseQuantizedInt8Weights) { .Test(xnnpack_delegate.get()); } -TEST(DepthwiseConv2D, SparseWeights) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DepthwiseConv2D, SparseWeights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -496,11 +430,7 @@ TEST(DepthwiseConv2D, SparseWeights) { .Test(xnnpack_delegate.get()); } -TEST(DepthwiseConv2D, SparseFP16Weights) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DepthwiseConv2D, SparseFP16Weights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -528,11 +458,7 @@ TEST(DepthwiseConv2D, SparseFP16Weights) { .Test(xnnpack_delegate.get()); } -TEST(DepthwiseConv2D, SparseTensorWiseQuantizedInt8Weights) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DepthwiseConv2D, SparseTensorWiseQuantizedInt8Weights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -560,11 +486,7 @@ TEST(DepthwiseConv2D, SparseTensorWiseQuantizedInt8Weights) { .Test(xnnpack_delegate.get()); } -TEST(DepthwiseConv2D, SparseChannelWiseQuantizedInt8Weights) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DepthwiseConv2D, SparseChannelWiseQuantizedInt8Weights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -592,11 +514,7 @@ TEST(DepthwiseConv2D, SparseChannelWiseQuantizedInt8Weights) { .Test(xnnpack_delegate.get()); } -TEST(DepthwiseConv2D, ReluActivation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DepthwiseConv2D, ReluActivation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -623,11 +541,7 @@ TEST(DepthwiseConv2D, ReluActivation) { .Test(xnnpack_delegate.get()); } -TEST(DepthwiseConv2D, Relu6Activation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DepthwiseConv2D, Relu6Activation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -654,11 +568,7 @@ TEST(DepthwiseConv2D, Relu6Activation) { .Test(xnnpack_delegate.get()); } -TEST(DepthwiseConv2D, ReluMinus1To1Activation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DepthwiseConv2D, ReluMinus1To1Activation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -685,11 +595,7 @@ TEST(DepthwiseConv2D, ReluMinus1To1Activation) { .Test(xnnpack_delegate.get()); } -TEST(DepthwiseConv2D, DISABLED_TanhActivation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DepthwiseConv2D, DISABLED_TanhActivation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -716,11 +622,7 @@ TEST(DepthwiseConv2D, DISABLED_TanhActivation) { .Test(xnnpack_delegate.get()); } -TEST(DepthwiseConv2D, DISABLED_SignBitActivation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DepthwiseConv2D, DISABLED_SignBitActivation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -747,13 +649,11 @@ TEST(DepthwiseConv2D, DISABLED_SignBitActivation) { .Test(xnnpack_delegate.get()); } -TEST(DepthwiseConv2D, MultiThreading) { +TEST_F(DepthwiseConv2D, MultiThreading) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); delegate_options.num_threads = 2; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); @@ -780,7 +680,7 @@ TEST(DepthwiseConv2D, MultiThreading) { .Test(xnnpack_delegate.get()); } -TEST(DepthwiseConv2D, WeightsCache) { +TEST_F(DepthwiseConv2D, WeightsCache) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); @@ -818,15 +716,13 @@ TEST(DepthwiseConv2D, WeightsCache) { .Test(xnnpack_delegate.get()); } -TEST(DepthwiseConv2D, TransientIndirectionBuffer) { +TEST_F(DepthwiseConv2D, TransientIndirectionBuffer) { TfLiteXNNPackDelegateOptions xnnpack_options = TfLiteXNNPackDelegateOptionsDefault(); xnnpack_options.num_threads = 2; xnnpack_options.flags |= TFLITE_XNNPACK_DELEGATE_FLAG_TRANSIENT_INDIRECTION_BUFFER; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&xnnpack_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(xnnpack_options); std::random_device random_device; auto rng = std::mt19937(random_device()); diff --git a/tensorflow/lite/delegates/xnnpack/dynamically_quantized_conv_2d_test.cc b/tensorflow/lite/delegates/xnnpack/dynamically_quantized_conv_2d_test.cc index 59507269580cbd..52e8333db4fd04 100644 --- a/tensorflow/lite/delegates/xnnpack/dynamically_quantized_conv_2d_test.cc +++ b/tensorflow/lite/delegates/xnnpack/dynamically_quantized_conv_2d_test.cc @@ -19,22 +19,16 @@ limitations under the License. #include #include -#include "tensorflow/lite/c/c_api_types.h" #include "tensorflow/lite/delegates/xnnpack/dynamically_quantized_conv_2d_tester.h" +#include "tensorflow/lite/delegates/xnnpack/fingerprint_test_helpers.h" #include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h" namespace tflite { namespace xnnpack { -TEST(DynamicallyQuantizedConv2D, 3x3) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); +struct DynamicallyQuantizedConv2D : DelegateTest {}; +TEST_F(DynamicallyQuantizedConv2D, 3x3) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -56,15 +50,7 @@ TEST(DynamicallyQuantizedConv2D, 3x3) { .Test(xnnpack_delegate.get()); } -TEST(DynamicallyQuantizedConv2D, 3x3Stride2) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); - +TEST_F(DynamicallyQuantizedConv2D, 3x3Stride2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -88,15 +74,7 @@ TEST(DynamicallyQuantizedConv2D, 3x3Stride2) { .Test(xnnpack_delegate.get()); } -TEST(DynamicallyQuantizedConv2D, Grouped) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); - +TEST_F(DynamicallyQuantizedConv2D, Grouped) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -122,15 +100,7 @@ TEST(DynamicallyQuantizedConv2D, Grouped) { .Test(xnnpack_delegate.get()); } -TEST(DynamicallyQuantizedConv2D, SmallKernelWithSamePadding) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); - +TEST_F(DynamicallyQuantizedConv2D, SmallKernelWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -154,15 +124,7 @@ TEST(DynamicallyQuantizedConv2D, SmallKernelWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(DynamicallyQuantizedConv2D, SmallKernelWithValidPadding) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); - +TEST_F(DynamicallyQuantizedConv2D, SmallKernelWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -186,14 +148,7 @@ TEST(DynamicallyQuantizedConv2D, SmallKernelWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(DynamicallyQuantizedConv2D, StrideWithSamePadding) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); +TEST_F(DynamicallyQuantizedConv2D, StrideWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -221,15 +176,7 @@ TEST(DynamicallyQuantizedConv2D, StrideWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(DynamicallyQuantizedConv2D, StrideWithValidPadding) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); - +TEST_F(DynamicallyQuantizedConv2D, StrideWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -257,15 +204,7 @@ TEST(DynamicallyQuantizedConv2D, StrideWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(DynamicallyQuantizedConv2D, DilationWithSamePadding) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); - +TEST_F(DynamicallyQuantizedConv2D, DilationWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -293,15 +232,7 @@ TEST(DynamicallyQuantizedConv2D, DilationWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(DynamicallyQuantizedConv2D, DilationWithValidPadding) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); - +TEST_F(DynamicallyQuantizedConv2D, DilationWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -329,15 +260,7 @@ TEST(DynamicallyQuantizedConv2D, DilationWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(DynamicallyQuantizedConv2D, TensorWiseQuantizedInt8Weights) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); - +TEST_F(DynamicallyQuantizedConv2D, TensorWiseQuantizedInt8Weights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -364,15 +287,7 @@ TEST(DynamicallyQuantizedConv2D, TensorWiseQuantizedInt8Weights) { .Test(xnnpack_delegate.get()); } -TEST(DynamicallyQuantizedConv2D, ChannelWiseQuantizedInt8Weights) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); - +TEST_F(DynamicallyQuantizedConv2D, ChannelWiseQuantizedInt8Weights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -399,15 +314,7 @@ TEST(DynamicallyQuantizedConv2D, ChannelWiseQuantizedInt8Weights) { .Test(xnnpack_delegate.get()); } -TEST(DynamicallyQuantizedConv2D, ReluActivation) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); - +TEST_F(DynamicallyQuantizedConv2D, ReluActivation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -435,15 +342,7 @@ TEST(DynamicallyQuantizedConv2D, ReluActivation) { .Test(xnnpack_delegate.get()); } -TEST(DynamicallyQuantizedConv2D, Relu6Activation) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); - +TEST_F(DynamicallyQuantizedConv2D, Relu6Activation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -471,15 +370,7 @@ TEST(DynamicallyQuantizedConv2D, Relu6Activation) { .Test(xnnpack_delegate.get()); } -TEST(DynamicallyQuantizedConv2D, ReluMinus1To1Activation) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); - +TEST_F(DynamicallyQuantizedConv2D, ReluMinus1To1Activation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -507,15 +398,7 @@ TEST(DynamicallyQuantizedConv2D, ReluMinus1To1Activation) { .Test(xnnpack_delegate.get()); } -TEST(DynamicallyQuantizedConv2D, TanhActivation) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); - +TEST_F(DynamicallyQuantizedConv2D, TanhActivation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -543,15 +426,7 @@ TEST(DynamicallyQuantizedConv2D, TanhActivation) { .Test(xnnpack_delegate.get()); } -TEST(DynamicallyQuantizedConv2D, SignBitActivation) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); - +TEST_F(DynamicallyQuantizedConv2D, SignBitActivation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -579,15 +454,13 @@ TEST(DynamicallyQuantizedConv2D, SignBitActivation) { .Test(xnnpack_delegate.get()); } -TEST(DynamicallyQuantizedConv2D, MultiThreading) { +TEST_F(DynamicallyQuantizedConv2D, MultiThreading) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); delegate_options.num_threads = 2; delegate_options.flags |= TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); @@ -615,7 +488,7 @@ TEST(DynamicallyQuantizedConv2D, MultiThreading) { .Test(xnnpack_delegate.get()); } -TEST(DynamicallyQuantizedConv2D, WeightsCache) { +TEST_F(DynamicallyQuantizedConv2D, WeightsCache) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); @@ -656,16 +527,14 @@ TEST(DynamicallyQuantizedConv2D, WeightsCache) { .Test(xnnpack_delegate.get()); } -TEST(DynamicallyQuantizedConv2D, TransientIndirectionBuffer) { +TEST_F(DynamicallyQuantizedConv2D, TransientIndirectionBuffer) { TfLiteXNNPackDelegateOptions xnnpack_options = TfLiteXNNPackDelegateOptionsDefault(); xnnpack_options.num_threads = 2; xnnpack_options.flags |= TFLITE_XNNPACK_DELEGATE_FLAG_TRANSIENT_INDIRECTION_BUFFER; xnnpack_options.flags |= TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&xnnpack_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(xnnpack_options); std::random_device random_device; auto rng = std::mt19937(random_device()); diff --git a/tensorflow/lite/delegates/xnnpack/dynamically_quantized_fully_connected_test.cc b/tensorflow/lite/delegates/xnnpack/dynamically_quantized_fully_connected_test.cc index 2f198a95195f11..2d2febcb21ab66 100644 --- a/tensorflow/lite/delegates/xnnpack/dynamically_quantized_fully_connected_test.cc +++ b/tensorflow/lite/delegates/xnnpack/dynamically_quantized_fully_connected_test.cc @@ -21,8 +21,8 @@ limitations under the License. #include #include -#include "tensorflow/lite/c/c_api_types.h" #include "tensorflow/lite/delegates/xnnpack/dynamically_quantized_fully_connected_tester.h" +#include "tensorflow/lite/delegates/xnnpack/fingerprint_test_helpers.h" #include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h" namespace tflite { @@ -30,9 +30,10 @@ namespace xnnpack { // Dummy class to use with parameterized test. class DynamicallyQuantizedFullyConnectedTest - : public testing::TestWithParam {}; + : public testing::WithParamInterface, + public DelegateTest {}; -int GenInputChannels(const std::function &rng, +int GenInputChannels(const std::function& rng, WeightsType weights_type) { switch (weights_type) { case WeightsType::kChannelWiseQuantizedInt8: @@ -45,14 +46,6 @@ int GenInputChannels(const std::function &rng, } TEST_P(DynamicallyQuantizedFullyConnectedTest, 1D) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); - std::random_device random_device; auto rng = std::mt19937(random_device()); auto channels_rng = @@ -71,14 +64,6 @@ TEST_P(DynamicallyQuantizedFullyConnectedTest, 1D) { } TEST_P(DynamicallyQuantizedFullyConnectedTest, 2D) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); - std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -99,14 +84,6 @@ TEST_P(DynamicallyQuantizedFullyConnectedTest, 2D) { } TEST_P(DynamicallyQuantizedFullyConnectedTest, 2DKeepDims) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); - std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -128,13 +105,6 @@ TEST_P(DynamicallyQuantizedFullyConnectedTest, 2DKeepDims) { } TEST_P(DynamicallyQuantizedFullyConnectedTest, 3D) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); std::random_device random_device; auto rng = std::mt19937(random_device()); auto shape_rng = @@ -156,14 +126,6 @@ TEST_P(DynamicallyQuantizedFullyConnectedTest, 3D) { } TEST_P(DynamicallyQuantizedFullyConnectedTest, 3DReshape) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); - std::random_device random_device; auto rng = std::mt19937(random_device()); auto shape_rng = @@ -184,14 +146,6 @@ TEST_P(DynamicallyQuantizedFullyConnectedTest, 3DReshape) { } TEST_P(DynamicallyQuantizedFullyConnectedTest, 3DKeepDims) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); - std::random_device random_device; auto rng = std::mt19937(random_device()); auto shape_rng = @@ -214,14 +168,6 @@ TEST_P(DynamicallyQuantizedFullyConnectedTest, 3DKeepDims) { } TEST_P(DynamicallyQuantizedFullyConnectedTest, 4D) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); - std::random_device random_device; auto rng = std::mt19937(random_device()); auto shape_rng = @@ -244,14 +190,6 @@ TEST_P(DynamicallyQuantizedFullyConnectedTest, 4D) { } TEST_P(DynamicallyQuantizedFullyConnectedTest, 4DKeepDims) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); - std::random_device random_device; auto rng = std::mt19937(random_device()); auto shape_rng = @@ -275,14 +213,6 @@ TEST_P(DynamicallyQuantizedFullyConnectedTest, 4DKeepDims) { } TEST_P(DynamicallyQuantizedFullyConnectedTest, NoBias) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); - std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -304,14 +234,6 @@ TEST_P(DynamicallyQuantizedFullyConnectedTest, NoBias) { } TEST_P(DynamicallyQuantizedFullyConnectedTest, ReluActivation) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); - std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -333,14 +255,6 @@ TEST_P(DynamicallyQuantizedFullyConnectedTest, ReluActivation) { } TEST_P(DynamicallyQuantizedFullyConnectedTest, Relu6Activation) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); - std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -362,14 +276,6 @@ TEST_P(DynamicallyQuantizedFullyConnectedTest, Relu6Activation) { } TEST_P(DynamicallyQuantizedFullyConnectedTest, ReluMinus1To1Activation) { - TfLiteXNNPackDelegateOptions delegate_options = - TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); - std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -393,13 +299,8 @@ TEST_P(DynamicallyQuantizedFullyConnectedTest, ReluMinus1To1Activation) { TEST_P(DynamicallyQuantizedFullyConnectedTest, MultiThreading) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); - delegate_options.flags |= - TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; delegate_options.num_threads = 2; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); - + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -429,9 +330,7 @@ TEST_P(DynamicallyQuantizedFullyConnectedTest, WeightsCache) { weights_cache(TfLiteXNNPackDelegateWeightsCacheCreate(), TfLiteXNNPackDelegateWeightsCacheDelete); delegate_options.weights_cache = weights_cache.get(); - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = diff --git a/tensorflow/lite/delegates/xnnpack/dynamically_quantized_transpose_conv_test.cc b/tensorflow/lite/delegates/xnnpack/dynamically_quantized_transpose_conv_test.cc index de863e4f1e2125..4a40e56852b56c 100644 --- a/tensorflow/lite/delegates/xnnpack/dynamically_quantized_transpose_conv_test.cc +++ b/tensorflow/lite/delegates/xnnpack/dynamically_quantized_transpose_conv_test.cc @@ -19,18 +19,16 @@ limitations under the License. #include #include -#include "tensorflow/lite/core/c/common.h" #include "tensorflow/lite/delegates/xnnpack/dynamically_quantized_transpose_conv_tester.h" +#include "tensorflow/lite/delegates/xnnpack/fingerprint_test_helpers.h" #include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h" namespace tflite { namespace xnnpack { -TEST(DynamicallyQuantizedTransposeConvTest, 2x2Stride2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); +struct DynamicallyQuantizedTransposeConvTest : DelegateTest {}; +TEST_F(DynamicallyQuantizedTransposeConvTest, 2x2Stride2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -51,10 +49,7 @@ TEST(DynamicallyQuantizedTransposeConvTest, 2x2Stride2) { .Test(xnnpack_delegate.get()); } -TEST(DynamicallyQuantizedTransposeConvTest, 3x3Stride2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); +TEST_F(DynamicallyQuantizedTransposeConvTest, 3x3Stride2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -75,11 +70,7 @@ TEST(DynamicallyQuantizedTransposeConvTest, 3x3Stride2) { .Test(xnnpack_delegate.get()); } -TEST(DynamicallyQuantizedTransposeConvTest, 4x4Stride2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DynamicallyQuantizedTransposeConvTest, 4x4Stride2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -100,11 +91,7 @@ TEST(DynamicallyQuantizedTransposeConvTest, 4x4Stride2) { .Test(xnnpack_delegate.get()); } -TEST(DynamicallyQuantizedTransposeConvTest, 4x4Stride4) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DynamicallyQuantizedTransposeConvTest, 4x4Stride4) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -125,11 +112,7 @@ TEST(DynamicallyQuantizedTransposeConvTest, 4x4Stride4) { .Test(xnnpack_delegate.get()); } -TEST(DynamicallyQuantizedTransposeConvTest, SmallKernelWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DynamicallyQuantizedTransposeConvTest, SmallKernelWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -153,10 +136,7 @@ TEST(DynamicallyQuantizedTransposeConvTest, SmallKernelWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(DynamicallyQuantizedTransposeConvTest, SmallKernelWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); +TEST_F(DynamicallyQuantizedTransposeConvTest, SmallKernelWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -180,11 +160,7 @@ TEST(DynamicallyQuantizedTransposeConvTest, SmallKernelWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(DynamicallyQuantizedTransposeConvTest, StrideWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DynamicallyQuantizedTransposeConvTest, StrideWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -212,11 +188,7 @@ TEST(DynamicallyQuantizedTransposeConvTest, StrideWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(DynamicallyQuantizedTransposeConvTest, StrideWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(DynamicallyQuantizedTransposeConvTest, StrideWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -244,13 +216,11 @@ TEST(DynamicallyQuantizedTransposeConvTest, StrideWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(DynamicallyQuantizedTransposeConvTest, MultiThreading) { +TEST_F(DynamicallyQuantizedTransposeConvTest, MultiThreading) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); delegate_options.num_threads = 2; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); @@ -279,7 +249,7 @@ TEST(DynamicallyQuantizedTransposeConvTest, MultiThreading) { .Test(xnnpack_delegate.get()); } -TEST(DynamicallyQuantizedTransposeConvTest, WeightsCache) { +TEST_F(DynamicallyQuantizedTransposeConvTest, WeightsCache) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); diff --git a/tensorflow/lite/delegates/xnnpack/dynamically_quantized_transpose_conv_tester.cc b/tensorflow/lite/delegates/xnnpack/dynamically_quantized_transpose_conv_tester.cc index 3bdcd343373bac..abfd76c12a14f9 100644 --- a/tensorflow/lite/delegates/xnnpack/dynamically_quantized_transpose_conv_tester.cc +++ b/tensorflow/lite/delegates/xnnpack/dynamically_quantized_transpose_conv_tester.cc @@ -55,10 +55,12 @@ void DynamicallyQuantizedTransposeConvTester::Test( const Model* model = GetModel(buffer.data()); std::unique_ptr delegate_interpreter; - ASSERT_EQ(InterpreterBuilder( - model, ::tflite::ops::builtin::BuiltinOpResolverWithXNNPACK())( - &delegate_interpreter), - kTfLiteOk); + ASSERT_EQ( + InterpreterBuilder( + model, + ::tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates())( + &delegate_interpreter), + kTfLiteOk); std::unique_ptr default_interpreter; ASSERT_EQ( InterpreterBuilder( diff --git a/tensorflow/lite/delegates/xnnpack/file_util.cc b/tensorflow/lite/delegates/xnnpack/file_util.cc index b475080480ecb4..7fbb917c850e4e 100644 --- a/tensorflow/lite/delegates/xnnpack/file_util.cc +++ b/tensorflow/lite/delegates/xnnpack/file_util.cc @@ -39,7 +39,13 @@ limitations under the License. #endif // TFLITE_XNNPACK_IN_MEMORY_FILE_ENABLED #endif // defined(__linux__) || defined(__ANDROID__) +#include + +#include #include +#include + +#include "tensorflow/lite/delegates/xnnpack/macros.h" #if !TFLITE_XNNPACK_IN_MEMORY_FILE_ENABLED #include "tensorflow/lite/logger.h" @@ -57,7 +63,7 @@ FileDescriptor FileDescriptor::Duplicate() const { if (!IsValid()) { return FileDescriptor(-1); } - return FileDescriptor(dup(fd_)); + return FileDescriptor::Duplicate(fd_); } void FileDescriptor::Reset(int new_fd) { @@ -90,6 +96,9 @@ FileDescriptor::Offset FileDescriptorView::MovePos( } FileDescriptor FileDescriptor::Open(const char* path, int flags, mode_t mode) { + if (!path) { + return {}; + } #if defined(_WIN32) if (!(flags & O_TEXT)) { flags |= O_BINARY; @@ -154,5 +163,22 @@ FileDescriptor CreateInMemoryFileDescriptor(const char* path) { #endif } +bool IsFileEmpty(const char* path, const FileDescriptor& fd) { +#if defined(_WIN32) + struct _stat64 file_stats{}; + const int res = fd.IsValid() ? _fstat64(fd.Value(), &file_stats) + : _stat64(path, &file_stats); +#else + struct stat file_stats{}; + const int res = + fd.IsValid() ? fstat(fd.Value(), &file_stats) : stat(path, &file_stats); +#endif + XNNPACK_RETURN_CHECK( + res == 0 || errno == ENOENT, + "could not access file descriptor %d stats to get size ('%s'): %s.", + fd.Value(), path, strerror(errno)); + return file_stats.st_size == 0; +} + } // namespace xnnpack } // namespace tflite diff --git a/tensorflow/lite/delegates/xnnpack/file_util.h b/tensorflow/lite/delegates/xnnpack/file_util.h index cddc0a4c615f06..9817c74d9f7ee6 100644 --- a/tensorflow/lite/delegates/xnnpack/file_util.h +++ b/tensorflow/lite/delegates/xnnpack/file_util.h @@ -76,6 +76,14 @@ class FileDescriptorView { // WARNING: the file descriptor must be valid and the file must be opened. Offset MovePos(Offset offset) const; + // Returns the size of the file. + Offset Size() const { + Offset pos = GetPos(); + Offset size = SetPosFromEnd(0); + SetPos(pos); + return size; + } + // Reads `count` bytes from the file at the current position to `dst`. // // Returns true if all the data available in the file was read to the buffer @@ -167,6 +175,11 @@ class FileDescriptor : public FileDescriptorView { // descriptor. bool InMemoryFileDescriptorAvailable(); +// Returns true if the file is empty (the file may exist) +// +// Note: if `fd` is valid, then `path` is ignored. +bool IsFileEmpty(const char* path, const FileDescriptor& fd); + // Creates a new file descriptor that isn't backed by a file system. The file // will be automatically cleaned up when the last file descriptor pointing to it // is closed. diff --git a/tensorflow/lite/delegates/xnnpack/file_util_test.cc b/tensorflow/lite/delegates/xnnpack/file_util_test.cc index 69196fefa28f52..9a1ce5e50aa5f2 100644 --- a/tensorflow/lite/delegates/xnnpack/file_util_test.cc +++ b/tensorflow/lite/delegates/xnnpack/file_util_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include +#include #include #include #include @@ -25,6 +26,14 @@ limitations under the License. namespace tflite::xnnpack { namespace { +// Returns a path for a temporary file. +// +// Each call will return a new path. +std::string NewTempFilePath() { + static std::atomic i = 0; + return testing::TempDir() + "test_file_" + std::to_string(i++); +} + TEST(FileDescriptorTest, DefaultConstructedIsInvalid) { FileDescriptor fd; EXPECT_FALSE(fd.IsValid()); @@ -54,7 +63,7 @@ TEST(FileDescriptorTest, OpenNullFileFails) { } TEST(FileDescriptorTest, OpenWriteRewindAndReadWorks) { - const std::string tmp_file = testing::TempDir() + __FUNCTION__; + const std::string tmp_file = NewTempFilePath(); FileDescriptor fd = FileDescriptor::Open(tmp_file.c_str(), O_CREAT | O_TRUNC | O_RDWR, 0644); ASSERT_TRUE(fd.IsValid()); @@ -67,7 +76,7 @@ TEST(FileDescriptorTest, OpenWriteRewindAndReadWorks) { } TEST(FileDescriptorTest, WriteFailureReturnsFalse) { - const std::string tmp_file = testing::TempDir() + __FUNCTION__; + const std::string tmp_file = NewTempFilePath(); FileDescriptor fd = FileDescriptor::Open(tmp_file.c_str(), O_CREAT | O_TRUNC | O_RDONLY, 0644); ASSERT_TRUE(fd.IsValid()); @@ -76,7 +85,7 @@ TEST(FileDescriptorTest, WriteFailureReturnsFalse) { } TEST(FileDescriptorTest, ReadFailureReturnsFalse) { - const std::string tmp_file = testing::TempDir() + __FUNCTION__; + const std::string tmp_file = NewTempFilePath(); FileDescriptor fd = FileDescriptor::Open(tmp_file.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 0644); ASSERT_TRUE(fd.IsValid()); @@ -84,5 +93,50 @@ TEST(FileDescriptorTest, ReadFailureReturnsFalse) { EXPECT_FALSE(fd.Read(dst_data.data(), dst_data.size())); } +TEST(FileDescriptorTest, IsFileEmptyReturnTrueForAnEmptyFileThatExists) { + const std::string tmp_file = NewTempFilePath(); + FileDescriptor fd = FileDescriptor::Open(tmp_file.c_str(), + O_CREAT | O_TRUNC | O_WRONLY, 0644); + fd.Close(); + EXPECT_TRUE(IsFileEmpty(tmp_file.c_str(), FileDescriptor())); +} + +TEST(FileDescriptorTest, IsFileEmptyReturnTrueForAnNonExistingFile) { + const std::string tmp_file = NewTempFilePath(); + EXPECT_TRUE(IsFileEmpty(tmp_file.c_str(), FileDescriptor())); +} + +TEST(FileDescriptorTest, + IsFileEmptyReturnTrueForAnNonExistingFileWithFileDescriptor) { + const std::string tmp_file = NewTempFilePath(); + FileDescriptor fd = FileDescriptor::Open(tmp_file.c_str(), + O_CREAT | O_TRUNC | O_WRONLY, 0644); + EXPECT_TRUE(IsFileEmpty("asdfasdf", FileDescriptor())); +} + +TEST(FileDescriptorTest, IsFileEmptyReturnFalseForAFileThatHasContents) { + const std::string tmp_file = NewTempFilePath(); + FileDescriptor fd = FileDescriptor::Open(tmp_file.c_str(), + O_CREAT | O_TRUNC | O_WRONLY, 0644); + const std::string src_data = "The quick brown fox jumps over the lazy dog."; + EXPECT_TRUE(fd.Write(src_data.data(), src_data.size())); + EXPECT_FALSE(IsFileEmpty(tmp_file.c_str(), fd)); +} + +TEST(FileDescriptorTest, IsFileEmptyPrioritizesTheFileDescriptor) { + // We open 2 files, put some data only in one and then pass the file name of + // the one that has data and the file descriptor of the empty one. + const std::string tmp_file = NewTempFilePath(); + const std::string tmp_file2 = NewTempFilePath(); + FileDescriptor fd = FileDescriptor::Open(tmp_file.c_str(), + O_CREAT | O_TRUNC | O_WRONLY, 0644); + FileDescriptor fd2 = FileDescriptor::Open(tmp_file2.c_str(), + O_CREAT | O_TRUNC | O_WRONLY, 0644); + const std::string src_data = "The quick brown fox jumps over the lazy dog."; + EXPECT_TRUE(fd.Write(src_data.data(), src_data.size())); + fd.Close(); + EXPECT_TRUE(IsFileEmpty(tmp_file.c_str(), fd2)); +} + } // namespace } // namespace tflite::xnnpack diff --git a/tensorflow/lite/delegates/xnnpack/fingerprint_test_helpers.h b/tensorflow/lite/delegates/xnnpack/fingerprint_test_helpers.h new file mode 100644 index 00000000000000..29edbe5a35c841 --- /dev/null +++ b/tensorflow/lite/delegates/xnnpack/fingerprint_test_helpers.h @@ -0,0 +1,112 @@ +/* Copyright 2025 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_DELEGATES_XNNPACK_FINGERPRINT_TEST_HELPERS_H_ +#define TENSORFLOW_LITE_DELEGATES_XNNPACK_FINGERPRINT_TEST_HELPERS_H_ + +#include + +#include +#include +#include "experimental.h" // from @XNNPACK +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/delegates/xnnpack/weight_cache.h" +#include "tensorflow/lite/delegates/xnnpack/weight_cache_test_helpers.h" +#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h" + +namespace tflite::xnnpack { + +struct TfLiteDelegateDeleter { + void operator()(TfLiteDelegate* delegate) { + TfLiteXNNPackDelegateDelete(delegate); + } +}; + +using TfLiteDelegatePtr = + std::unique_ptr; + +struct DelegateTest : public virtual testing::Test { + void SetUp() override { + TfLiteXNNPackDelegateOptions delegate_options = + TfLiteXNNPackDelegateOptionsDefault(); + + // By default, we try to setup a file weight cache to also check fingerprint + // generation. If the test system doesn't support a file system, then the + // cache file will be invalid. + if (cache_file.IsValid()) { + xnn_clear_fingerprints(); + delegate_options.weight_cache_file_path = cache_file.GetCPath(); + delegate_options.weight_cache_file_descriptor = + cache_file.Duplicate().Release(); + delegate_options.flags |= + TFLITE_XNNPACK_DELEGATE_FLAG_ENABLE_LATEST_OPERATORS; + check_for_cache_fingerprints = true; + } + + xnnpack_delegate = + TfLiteDelegatePtr(TfLiteXNNPackDelegateCreate(&delegate_options)); + ASSERT_THAT(xnnpack_delegate, testing::NotNull()); + } + + void TearDown() override { + if (check_for_cache_fingerprints) { + ASSERT_TRUE(cache_file.IsValid()); + EXPECT_TRUE(IsCompatibleCacheFile(cache_file)); + if (AlterXNNPackFingerprints()) { + EXPECT_FALSE(IsCompatibleCacheFile(cache_file)); + } + } + } + + // Artificially change fingerprint values. + // + // This allows us to check that changing a fingerprint value will make the + // cache file incompatible. + // + // Returns the current number of fingerprints. + int AlterXNNPackFingerprints() { + int i = 0; + int modified = 0; + for (const xnn_fingerprint* fingerprint = xnn_get_fingerprint_by_idx(i); + fingerprint != nullptr; + fingerprint = xnn_get_fingerprint_by_idx(++i)) { + xnn_fingerprint new_fingerprint = *fingerprint; + ++new_fingerprint.value; + xnn_set_fingerprint(new_fingerprint); + ++modified; + } + return modified; + } + + // Replaces the xnnpack delegate with a custom one. + void UseCustomDelegate(const TfLiteXNNPackDelegateOptions& delegate_options) { + check_for_cache_fingerprints = false; + xnnpack_delegate = + TfLiteDelegatePtr(TfLiteXNNPackDelegateCreate(&delegate_options)); + ASSERT_THAT(xnnpack_delegate, testing::NotNull()); + } + + // Replaces the xnnpack delegate with one that sets up a file backed weight + // cache. + void UseDelegateWithFileWeightCache() {} + + // The default delegate is created in a generic way. + TfLiteDelegatePtr xnnpack_delegate; + tflite::xnnpack::TempFileDesc cache_file; + bool check_for_cache_fingerprints = false; +}; + +} // namespace tflite::xnnpack + +#endif // TENSORFLOW_LITE_DELEGATES_XNNPACK_FINGERPRINT_TEST_HELPERS_H_ diff --git a/tensorflow/lite/delegates/xnnpack/fully_connected_test.cc b/tensorflow/lite/delegates/xnnpack/fully_connected_test.cc index 92a6074c464f85..6701d0bc1c8f59 100644 --- a/tensorflow/lite/delegates/xnnpack/fully_connected_test.cc +++ b/tensorflow/lite/delegates/xnnpack/fully_connected_test.cc @@ -19,18 +19,16 @@ limitations under the License. #include #include -#include "tensorflow/lite/c/c_api_types.h" +#include "tensorflow/lite/delegates/xnnpack/fingerprint_test_helpers.h" #include "tensorflow/lite/delegates/xnnpack/fully_connected_tester.h" #include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h" namespace tflite { namespace xnnpack { -TEST(FullyConnected, 1D) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); +struct FullyConnectedTest : public DelegateTest {}; +TEST_F(FullyConnectedTest, 1D) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto channels_rng = @@ -45,11 +43,7 @@ TEST(FullyConnected, 1D) { .Test(xnnpack_delegate.get()); } -TEST(FullyConnected, 1DKeepDims) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(FullyConnectedTest, 1DKeepDims) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto channels_rng = @@ -65,11 +59,7 @@ TEST(FullyConnected, 1DKeepDims) { .Test(xnnpack_delegate.get()); } -TEST(FullyConnected, 2D) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(FullyConnectedTest, 2D) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -87,11 +77,7 @@ TEST(FullyConnected, 2D) { .Test(xnnpack_delegate.get()); } -TEST(FullyConnected, 2DKeepDims) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(FullyConnectedTest, 2DKeepDims) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -110,11 +96,7 @@ TEST(FullyConnected, 2DKeepDims) { .Test(xnnpack_delegate.get()); } -TEST(FullyConnected, 3D) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(FullyConnectedTest, 3D) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto shape_rng = @@ -133,11 +115,7 @@ TEST(FullyConnected, 3D) { .Test(xnnpack_delegate.get()); } -TEST(FullyConnected, 3DReshape) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(FullyConnectedTest, 3DReshape) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto shape_rng = @@ -156,11 +134,7 @@ TEST(FullyConnected, 3DReshape) { .Test(xnnpack_delegate.get()); } -TEST(FullyConnected, 3DKeepDims) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(FullyConnectedTest, 3DKeepDims) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto shape_rng = @@ -180,11 +154,7 @@ TEST(FullyConnected, 3DKeepDims) { .Test(xnnpack_delegate.get()); } -TEST(FullyConnected, 4D) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(FullyConnectedTest, 4D) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto shape_rng = @@ -204,11 +174,7 @@ TEST(FullyConnected, 4D) { .Test(xnnpack_delegate.get()); } -TEST(FullyConnected, 4DKeepDims) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(FullyConnectedTest, 4DKeepDims) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto shape_rng = @@ -229,11 +195,7 @@ TEST(FullyConnected, 4DKeepDims) { .Test(xnnpack_delegate.get()); } -TEST(FullyConnected, NoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(FullyConnectedTest, NoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -252,11 +214,7 @@ TEST(FullyConnected, NoBias) { .Test(xnnpack_delegate.get()); } -TEST(FullyConnected, FP16Weights) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(FullyConnectedTest, FP16Weights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -275,11 +233,7 @@ TEST(FullyConnected, FP16Weights) { .Test(xnnpack_delegate.get()); } -TEST(FullyConnected, FP16WeightsNoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(FullyConnectedTest, FP16WeightsNoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -299,11 +253,7 @@ TEST(FullyConnected, FP16WeightsNoBias) { .Test(xnnpack_delegate.get()); } -TEST(FullyConnected, DynamicWeights) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(FullyConnectedTest, DynamicWeights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -322,11 +272,7 @@ TEST(FullyConnected, DynamicWeights) { .Test(xnnpack_delegate.get()); } -TEST(FullyConnected, DynamicWeightsNoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(FullyConnectedTest, DynamicWeightsNoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -346,11 +292,7 @@ TEST(FullyConnected, DynamicWeightsNoBias) { .Test(xnnpack_delegate.get()); } -TEST(FullyConnected, DynamicBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(FullyConnectedTest, DynamicBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -369,11 +311,7 @@ TEST(FullyConnected, DynamicBias) { .Test(xnnpack_delegate.get()); } -TEST(FullyConnected, DynamicWeightsAndBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(FullyConnectedTest, DynamicWeightsAndBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -393,11 +331,7 @@ TEST(FullyConnected, DynamicWeightsAndBias) { .Test(xnnpack_delegate.get()); } -TEST(FullyConnected, TensorWiseQuantizedInt8Weights) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(FullyConnectedTest, TensorWiseQuantizedInt8Weights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -416,11 +350,7 @@ TEST(FullyConnected, TensorWiseQuantizedInt8Weights) { .Test(xnnpack_delegate.get()); } -TEST(FullyConnected, TensorWiseQuantizedInt8WeightsNoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(FullyConnectedTest, TensorWiseQuantizedInt8WeightsNoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -440,11 +370,7 @@ TEST(FullyConnected, TensorWiseQuantizedInt8WeightsNoBias) { .Test(xnnpack_delegate.get()); } -TEST(FullyConnected, ChannelWiseQuantizedInt8Weights) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(FullyConnectedTest, ChannelWiseQuantizedInt8Weights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -463,11 +389,7 @@ TEST(FullyConnected, ChannelWiseQuantizedInt8Weights) { .Test(xnnpack_delegate.get()); } -TEST(FullyConnected, ChannelWiseQuantizedInt8WeightsNoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(FullyConnectedTest, ChannelWiseQuantizedInt8WeightsNoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -487,11 +409,7 @@ TEST(FullyConnected, ChannelWiseQuantizedInt8WeightsNoBias) { .Test(xnnpack_delegate.get()); } -TEST(FullyConnected, ReluActivation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(FullyConnectedTest, ReluActivation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -510,11 +428,7 @@ TEST(FullyConnected, ReluActivation) { .Test(xnnpack_delegate.get()); } -TEST(FullyConnected, Relu6Activation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(FullyConnectedTest, Relu6Activation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -533,11 +447,7 @@ TEST(FullyConnected, Relu6Activation) { .Test(xnnpack_delegate.get()); } -TEST(FullyConnected, ReluMinus1To1Activation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(FullyConnectedTest, ReluMinus1To1Activation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -556,13 +466,11 @@ TEST(FullyConnected, ReluMinus1To1Activation) { .Test(xnnpack_delegate.get()); } -TEST(FullyConnected, MultiThreading) { +TEST_F(FullyConnectedTest, MultiThreading) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); delegate_options.num_threads = 2; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); @@ -581,7 +489,7 @@ TEST(FullyConnected, MultiThreading) { .Test(xnnpack_delegate.get()); } -TEST(FullyConnected, WeightsCache) { +TEST_F(FullyConnectedTest, WeightsCache) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); diff --git a/tensorflow/lite/delegates/xnnpack/macros.h b/tensorflow/lite/delegates/xnnpack/macros.h new file mode 100644 index 00000000000000..ef2218ec621107 --- /dev/null +++ b/tensorflow/lite/delegates/xnnpack/macros.h @@ -0,0 +1,48 @@ +/* Copyright 2025 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_DELEGATES_XNNPACK_MACROS_H_ +#define TENSORFLOW_LITE_DELEGATES_XNNPACK_MACROS_H_ + +#include + +#include "tensorflow/lite/minimal_logging.h" + +#define XNNPACK_LOG_LIMIT 4048 + +#define XNNPACK_ABORT_CHECK(TEST, ...) \ + if (!(TEST)) { \ + char msg[XNNPACK_LOG_LIMIT] = {0}; \ + int bytes = \ + snprintf(msg, XNNPACK_LOG_LIMIT, "%s:%d: ", __FILE__, __LINE__); \ + snprintf(msg + bytes, XNNPACK_LOG_LIMIT - bytes, "" __VA_ARGS__); \ + TFLITE_LOG_PROD(tflite::TFLITE_LOG_ERROR, msg); \ + std::abort(); \ + } + +#define XNNPACK_VAR_ARG_HEAD(FIRST, ...) FIRST + +#define XNNPACK_RETURN_CHECK(TEST, ...) \ + if (!(TEST)) { \ + if (sizeof(XNNPACK_VAR_ARG_HEAD("" __VA_ARGS__)) > sizeof("")) { \ + char msg[XNNPACK_LOG_LIMIT] = {0}; \ + int bytes = \ + snprintf(msg, XNNPACK_LOG_LIMIT, "%s:%d: ", __FILE__, __LINE__); \ + snprintf(msg + bytes, XNNPACK_LOG_LIMIT - bytes, "" __VA_ARGS__); \ + TFLITE_LOG_PROD(tflite::TFLITE_LOG_ERROR, msg); \ + } \ + return false; \ + } + +#endif // TENSORFLOW_LITE_DELEGATES_XNNPACK_MACROS_H_ diff --git a/tensorflow/lite/delegates/xnnpack/mmap_handle.cc b/tensorflow/lite/delegates/xnnpack/mmap_handle.cc index 169e284de47f46..92caf07fac811e 100644 --- a/tensorflow/lite/delegates/xnnpack/mmap_handle.cc +++ b/tensorflow/lite/delegates/xnnpack/mmap_handle.cc @@ -32,20 +32,8 @@ limitations under the License. #include #include "tensorflow/lite/delegates/xnnpack/file_util.h" +#include "tensorflow/lite/delegates/xnnpack/macros.h" #include "tensorflow/lite/delegates/xnnpack/windows_util.h" -#include "tensorflow/lite/logger.h" -#include "tensorflow/lite/minimal_logging.h" - -#define XNNPACK_VAR_ARG_HEAD(FIRST, ...) FIRST - -#define XNNPACK_RETURN_CHECK(TEST, ...) \ - if (!(TEST)) { \ - if (sizeof(XNNPACK_VAR_ARG_HEAD("" __VA_ARGS__)) > sizeof("")) { \ - TFLITE_LOG_PROD(tflite::TFLITE_LOG_ERROR, \ - "XNNPack weight cache: " __VA_ARGS__); \ - } \ - return false; \ - } namespace tflite::xnnpack { @@ -100,9 +88,10 @@ bool MMapHandle::Map(const FileDescriptorView& fd, const size_t offset, safe_path, strerror(errno)); #else struct stat file_stats; - XNNPACK_RETURN_CHECK(fstat(fd.Value(), &file_stats) == 0, - "could not access file stats to get size ('%s'): %s.", - safe_path, strerror(errno)); + XNNPACK_RETURN_CHECK( + fstat(fd.Value(), &file_stats) == 0, + "could not access file descriptor %d stats to get size ('%s'): %s.", + fd.Value(), safe_path, strerror(errno)); #endif // This will reset data_ and size_ on return until it is deactivated. @@ -149,8 +138,9 @@ bool MMapHandle::Map(const FileDescriptorView& fd, const size_t offset, data_ = static_cast( mmap(/*addr=*/nullptr, size_ + offset_page_adjustment_, PROT_READ, MAP_SHARED, fd.Value(), offset_ - offset_page_adjustment_)); - XNNPACK_RETURN_CHECK(data_ != MAP_FAILED, "could not mmap file (%s): %s.", - safe_path, strerror(errno)); + XNNPACK_RETURN_CHECK(data_ != MAP_FAILED, + "could not mmap file descriptor %d (%s): %s.", + fd.Value(), safe_path, strerror(errno)); #endif unmap_on_error.Deactivate(); return true; diff --git a/tensorflow/lite/delegates/xnnpack/signed_quantized_conv_2d_test.cc b/tensorflow/lite/delegates/xnnpack/signed_quantized_conv_2d_test.cc index f67ba714b01cc8..06daba0d9bada7 100644 --- a/tensorflow/lite/delegates/xnnpack/signed_quantized_conv_2d_test.cc +++ b/tensorflow/lite/delegates/xnnpack/signed_quantized_conv_2d_test.cc @@ -21,17 +21,16 @@ limitations under the License. #include #include "tensorflow/lite/c/c_api_types.h" +#include "tensorflow/lite/delegates/xnnpack/fingerprint_test_helpers.h" #include "tensorflow/lite/delegates/xnnpack/quantized_conv_2d_tester.h" #include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h" namespace tflite { namespace xnnpack { -TEST(SignedQuantizedConv2D, 1x1) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); +struct SignedQuantizedConv2D : DelegateTest {}; +TEST_F(SignedQuantizedConv2D, 1x1) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -59,11 +58,7 @@ TEST(SignedQuantizedConv2D, 1x1) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedConv2D, 3x3) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedConv2D, 3x3) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -91,11 +86,7 @@ TEST(SignedQuantizedConv2D, 3x3) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedConv2D, 3x3Stride2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedConv2D, 3x3Stride2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -125,11 +116,7 @@ TEST(SignedQuantizedConv2D, 3x3Stride2) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedConv2D, Grouped) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedConv2D, Grouped) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -163,11 +150,7 @@ TEST(SignedQuantizedConv2D, Grouped) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedConv2D, SmallKernelWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedConv2D, SmallKernelWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -197,11 +180,7 @@ TEST(SignedQuantizedConv2D, SmallKernelWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedConv2D, SmallKernelWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedConv2D, SmallKernelWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -231,11 +210,7 @@ TEST(SignedQuantizedConv2D, SmallKernelWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedConv2D, StrideWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedConv2D, StrideWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -269,11 +244,7 @@ TEST(SignedQuantizedConv2D, StrideWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedConv2D, StrideWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedConv2D, StrideWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -307,11 +278,7 @@ TEST(SignedQuantizedConv2D, StrideWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedConv2D, DilationWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedConv2D, DilationWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -345,11 +312,7 @@ TEST(SignedQuantizedConv2D, DilationWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedConv2D, DilationWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedConv2D, DilationWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -383,11 +346,7 @@ TEST(SignedQuantizedConv2D, DilationWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedConv2D, ReluActivation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedConv2D, ReluActivation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -421,11 +380,7 @@ TEST(SignedQuantizedConv2D, ReluActivation) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedConv2D, Relu6Activation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedConv2D, Relu6Activation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -459,11 +414,7 @@ TEST(SignedQuantizedConv2D, Relu6Activation) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedConv2D, ReluMinus1To1Activation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedConv2D, ReluMinus1To1Activation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -497,13 +448,11 @@ TEST(SignedQuantizedConv2D, ReluMinus1To1Activation) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedConv2D, MultiThreading) { +TEST_F(SignedQuantizedConv2D, MultiThreading) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); delegate_options.num_threads = 2; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); @@ -537,15 +486,13 @@ TEST(SignedQuantizedConv2D, MultiThreading) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedConv2D, TransientIndirectionBuffer) { - TfLiteXNNPackDelegateOptions xnnpack_options = +TEST_F(SignedQuantizedConv2D, TransientIndirectionBuffer) { + TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); - xnnpack_options.num_threads = 2; - xnnpack_options.flags |= + delegate_options.num_threads = 2; + delegate_options.flags |= TFLITE_XNNPACK_DELEGATE_FLAG_TRANSIENT_INDIRECTION_BUFFER; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&xnnpack_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); diff --git a/tensorflow/lite/delegates/xnnpack/signed_quantized_depthwise_conv_2d_test.cc b/tensorflow/lite/delegates/xnnpack/signed_quantized_depthwise_conv_2d_test.cc index 3acfbaaf34778e..c409b18002ef51 100644 --- a/tensorflow/lite/delegates/xnnpack/signed_quantized_depthwise_conv_2d_test.cc +++ b/tensorflow/lite/delegates/xnnpack/signed_quantized_depthwise_conv_2d_test.cc @@ -20,18 +20,16 @@ limitations under the License. #include #include -#include "tensorflow/lite/c/c_api_types.h" +#include "tensorflow/lite/delegates/xnnpack/fingerprint_test_helpers.h" #include "tensorflow/lite/delegates/xnnpack/quantized_depthwise_conv_2d_tester.h" #include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h" namespace tflite { namespace xnnpack { -TEST(SignedQuantizedDepthwiseConv2D, 1x1) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); +struct SignedQuantizedDepthwiseConv2D : DelegateTest {}; +TEST_F(SignedQuantizedDepthwiseConv2D, 1x1) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -54,11 +52,7 @@ TEST(SignedQuantizedDepthwiseConv2D, 1x1) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedDepthwiseConv2D, 2x2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedDepthwiseConv2D, 2x2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -82,11 +76,7 @@ TEST(SignedQuantizedDepthwiseConv2D, 2x2) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedDepthwiseConv2D, 3x3) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedDepthwiseConv2D, 3x3) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -110,11 +100,7 @@ TEST(SignedQuantizedDepthwiseConv2D, 3x3) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedDepthwiseConv2D, 3x3Stride2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedDepthwiseConv2D, 3x3Stride2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -140,11 +126,7 @@ TEST(SignedQuantizedDepthwiseConv2D, 3x3Stride2) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedDepthwiseConv2D, 5x5) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedDepthwiseConv2D, 5x5) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -168,11 +150,7 @@ TEST(SignedQuantizedDepthwiseConv2D, 5x5) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedDepthwiseConv2D, 5x5Stride2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedDepthwiseConv2D, 5x5Stride2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -198,11 +176,7 @@ TEST(SignedQuantizedDepthwiseConv2D, 5x5Stride2) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedDepthwiseConv2D, SmallKernelWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedDepthwiseConv2D, SmallKernelWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -231,11 +205,7 @@ TEST(SignedQuantizedDepthwiseConv2D, SmallKernelWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedDepthwiseConv2D, SmallKernelWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedDepthwiseConv2D, SmallKernelWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -264,11 +234,7 @@ TEST(SignedQuantizedDepthwiseConv2D, SmallKernelWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedDepthwiseConv2D, StrideWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedDepthwiseConv2D, StrideWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -301,11 +267,7 @@ TEST(SignedQuantizedDepthwiseConv2D, StrideWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedDepthwiseConv2D, StrideWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedDepthwiseConv2D, StrideWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -338,11 +300,7 @@ TEST(SignedQuantizedDepthwiseConv2D, StrideWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedDepthwiseConv2D, DilationWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedDepthwiseConv2D, DilationWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -375,11 +333,7 @@ TEST(SignedQuantizedDepthwiseConv2D, DilationWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedDepthwiseConv2D, DilationWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedDepthwiseConv2D, DilationWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -412,11 +366,7 @@ TEST(SignedQuantizedDepthwiseConv2D, DilationWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedDepthwiseConv2D, DepthMultiplier) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedDepthwiseConv2D, DepthMultiplier) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -451,11 +401,7 @@ TEST(SignedQuantizedDepthwiseConv2D, DepthMultiplier) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedDepthwiseConv2D, ReluActivation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedDepthwiseConv2D, ReluActivation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -488,11 +434,7 @@ TEST(SignedQuantizedDepthwiseConv2D, ReluActivation) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedDepthwiseConv2D, Relu6Activation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedDepthwiseConv2D, Relu6Activation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -525,11 +467,7 @@ TEST(SignedQuantizedDepthwiseConv2D, Relu6Activation) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedDepthwiseConv2D, ReluMinus1To1Activation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedDepthwiseConv2D, ReluMinus1To1Activation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -562,13 +500,11 @@ TEST(SignedQuantizedDepthwiseConv2D, ReluMinus1To1Activation) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedDepthwiseConv2D, MultiThreading) { +TEST_F(SignedQuantizedDepthwiseConv2D, MultiThreading) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); delegate_options.num_threads = 2; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); @@ -601,7 +537,7 @@ TEST(SignedQuantizedDepthwiseConv2D, MultiThreading) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedDepthwiseConv2D, WeightsCache) { +TEST_F(SignedQuantizedDepthwiseConv2D, WeightsCache) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); @@ -645,15 +579,13 @@ TEST(SignedQuantizedDepthwiseConv2D, WeightsCache) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedDepthwiseConv2D, TransientIndirectionBuffer) { - TfLiteXNNPackDelegateOptions xnnpack_options = +TEST_F(SignedQuantizedDepthwiseConv2D, TransientIndirectionBuffer) { + TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); - xnnpack_options.num_threads = 2; - xnnpack_options.flags |= + delegate_options.num_threads = 2; + delegate_options.flags |= TFLITE_XNNPACK_DELEGATE_FLAG_TRANSIENT_INDIRECTION_BUFFER; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&xnnpack_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); diff --git a/tensorflow/lite/delegates/xnnpack/signed_quantized_fully_connected_test.cc b/tensorflow/lite/delegates/xnnpack/signed_quantized_fully_connected_test.cc index 3097d314a3a6ab..5a7a9dfd77b24e 100644 --- a/tensorflow/lite/delegates/xnnpack/signed_quantized_fully_connected_test.cc +++ b/tensorflow/lite/delegates/xnnpack/signed_quantized_fully_connected_test.cc @@ -21,17 +21,16 @@ limitations under the License. #include #include "tensorflow/lite/c/c_api_types.h" +#include "tensorflow/lite/delegates/xnnpack/fingerprint_test_helpers.h" #include "tensorflow/lite/delegates/xnnpack/quantized_fully_connected_tester.h" #include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h" namespace tflite { namespace xnnpack { -TEST(SignedQuantizedFullyConnected, 1D) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); +struct SignedQuantizedFullyConnected : DelegateTest {}; +TEST_F(SignedQuantizedFullyConnected, 1D) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -52,11 +51,7 @@ TEST(SignedQuantizedFullyConnected, 1D) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedFullyConnected, 1DKeepDims) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedFullyConnected, 1DKeepDims) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -78,11 +73,7 @@ TEST(SignedQuantizedFullyConnected, 1DKeepDims) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedFullyConnected, 2D) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedFullyConnected, 2D) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -106,11 +97,7 @@ TEST(SignedQuantizedFullyConnected, 2D) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedFullyConnected, 2DKeepDims) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedFullyConnected, 2DKeepDims) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -135,11 +122,7 @@ TEST(SignedQuantizedFullyConnected, 2DKeepDims) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedFullyConnected, 3D) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedFullyConnected, 3D) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -164,11 +147,7 @@ TEST(SignedQuantizedFullyConnected, 3D) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedFullyConnected, 3DReshape) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedFullyConnected, 3DReshape) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -193,11 +172,7 @@ TEST(SignedQuantizedFullyConnected, 3DReshape) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedFullyConnected, 3DKeepDims) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedFullyConnected, 3DKeepDims) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -223,11 +198,7 @@ TEST(SignedQuantizedFullyConnected, 3DKeepDims) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedFullyConnected, 4D) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedFullyConnected, 4D) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -253,11 +224,7 @@ TEST(SignedQuantizedFullyConnected, 4D) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedFullyConnected, 4DKeepDims) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedFullyConnected, 4DKeepDims) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -284,11 +251,7 @@ TEST(SignedQuantizedFullyConnected, 4DKeepDims) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedFullyConnected, NoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedFullyConnected, NoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -313,11 +276,7 @@ TEST(SignedQuantizedFullyConnected, NoBias) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedFullyConnected, ReluActivation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedFullyConnected, ReluActivation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -342,11 +301,7 @@ TEST(SignedQuantizedFullyConnected, ReluActivation) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedFullyConnected, Relu6Activation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedFullyConnected, Relu6Activation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -371,11 +326,7 @@ TEST(SignedQuantizedFullyConnected, Relu6Activation) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedFullyConnected, ReluMinus1To1Activation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedFullyConnected, ReluMinus1To1Activation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -400,13 +351,11 @@ TEST(SignedQuantizedFullyConnected, ReluMinus1To1Activation) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedFullyConnected, MultiThreading) { +TEST_F(SignedQuantizedFullyConnected, MultiThreading) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); delegate_options.num_threads = 2; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); @@ -431,7 +380,7 @@ TEST(SignedQuantizedFullyConnected, MultiThreading) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedFullyConnected, WeightsCache) { +TEST_F(SignedQuantizedFullyConnected, WeightsCache) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); diff --git a/tensorflow/lite/delegates/xnnpack/signed_quantized_transpose_conv_test.cc b/tensorflow/lite/delegates/xnnpack/signed_quantized_transpose_conv_test.cc index 7daae13ebdea16..d4dceb9077ff26 100644 --- a/tensorflow/lite/delegates/xnnpack/signed_quantized_transpose_conv_test.cc +++ b/tensorflow/lite/delegates/xnnpack/signed_quantized_transpose_conv_test.cc @@ -20,17 +20,16 @@ limitations under the License. #include #include "tensorflow/lite/c/c_api_types.h" +#include "tensorflow/lite/delegates/xnnpack/fingerprint_test_helpers.h" #include "tensorflow/lite/delegates/xnnpack/quantized_transpose_conv_tester.h" #include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h" namespace tflite { namespace xnnpack { -TEST(SignedQuantizedTransposeConvTest, 2x2Stride2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); +struct SignedQuantizedTransposeConvTest : DelegateTest {}; +TEST_F(SignedQuantizedTransposeConvTest, 2x2Stride2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -52,11 +51,7 @@ TEST(SignedQuantizedTransposeConvTest, 2x2Stride2) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedTransposeConvTest, 2x2Stride2NoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedTransposeConvTest, 2x2Stride2NoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -79,11 +74,7 @@ TEST(SignedQuantizedTransposeConvTest, 2x2Stride2NoBias) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedTransposeConvTest, 3x3Stride2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedTransposeConvTest, 3x3Stride2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -105,11 +96,7 @@ TEST(SignedQuantizedTransposeConvTest, 3x3Stride2) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedTransposeConvTest, 3x3Stride2NoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedTransposeConvTest, 3x3Stride2NoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -132,11 +119,7 @@ TEST(SignedQuantizedTransposeConvTest, 3x3Stride2NoBias) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedTransposeConvTest, 4x4Stride2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedTransposeConvTest, 4x4Stride2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -158,11 +141,7 @@ TEST(SignedQuantizedTransposeConvTest, 4x4Stride2) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedTransposeConvTest, 4x4Stride2NoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedTransposeConvTest, 4x4Stride2NoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -185,11 +164,7 @@ TEST(SignedQuantizedTransposeConvTest, 4x4Stride2NoBias) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedTransposeConvTest, 4x4Stride4) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedTransposeConvTest, 4x4Stride4) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -211,11 +186,7 @@ TEST(SignedQuantizedTransposeConvTest, 4x4Stride4) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedTransposeConvTest, 4x4Stride4NoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedTransposeConvTest, 4x4Stride4NoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -238,11 +209,7 @@ TEST(SignedQuantizedTransposeConvTest, 4x4Stride4NoBias) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedTransposeConvTest, SmallKernelWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedTransposeConvTest, SmallKernelWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -267,11 +234,7 @@ TEST(SignedQuantizedTransposeConvTest, SmallKernelWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedTransposeConvTest, SmallKernelWithSamePaddingNoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedTransposeConvTest, SmallKernelWithSamePaddingNoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -297,11 +260,7 @@ TEST(SignedQuantizedTransposeConvTest, SmallKernelWithSamePaddingNoBias) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedTransposeConvTest, SmallKernelWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedTransposeConvTest, SmallKernelWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -326,11 +285,7 @@ TEST(SignedQuantizedTransposeConvTest, SmallKernelWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedTransposeConvTest, SmallKernelWithValidPaddingNoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedTransposeConvTest, SmallKernelWithValidPaddingNoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -356,11 +311,7 @@ TEST(SignedQuantizedTransposeConvTest, SmallKernelWithValidPaddingNoBias) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedTransposeConvTest, StrideWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedTransposeConvTest, StrideWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -389,11 +340,7 @@ TEST(SignedQuantizedTransposeConvTest, StrideWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedTransposeConvTest, StrideWithSamePaddingNoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedTransposeConvTest, StrideWithSamePaddingNoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -423,11 +370,7 @@ TEST(SignedQuantizedTransposeConvTest, StrideWithSamePaddingNoBias) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedTransposeConvTest, StrideWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedTransposeConvTest, StrideWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -456,11 +399,7 @@ TEST(SignedQuantizedTransposeConvTest, StrideWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedTransposeConvTest, StrideWithValidPaddingNoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedTransposeConvTest, StrideWithValidPaddingNoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -490,11 +429,7 @@ TEST(SignedQuantizedTransposeConvTest, StrideWithValidPaddingNoBias) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedTransposeConvTest, SparseWeights) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedTransposeConvTest, SparseWeights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -524,11 +459,7 @@ TEST(SignedQuantizedTransposeConvTest, SparseWeights) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedTransposeConvTest, SparseWeightsNoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(SignedQuantizedTransposeConvTest, SparseWeightsNoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -559,13 +490,11 @@ TEST(SignedQuantizedTransposeConvTest, SparseWeightsNoBias) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedTransposeConvTest, MultiThreading) { +TEST_F(SignedQuantizedTransposeConvTest, MultiThreading) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); delegate_options.num_threads = 2; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); @@ -595,13 +524,11 @@ TEST(SignedQuantizedTransposeConvTest, MultiThreading) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedTransposeConvTest, MultiThreadingNoBias) { +TEST_F(SignedQuantizedTransposeConvTest, MultiThreadingNoBias) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); delegate_options.num_threads = 2; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); @@ -632,7 +559,7 @@ TEST(SignedQuantizedTransposeConvTest, MultiThreadingNoBias) { .Test(xnnpack_delegate.get()); } -TEST(SignedQuantizedTransposeConvTest, WeightsCache) { +TEST_F(SignedQuantizedTransposeConvTest, WeightsCache) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); diff --git a/tensorflow/lite/delegates/xnnpack/transpose_conv_test.cc b/tensorflow/lite/delegates/xnnpack/transpose_conv_test.cc index 260fd87e282a63..d37317c34f545a 100644 --- a/tensorflow/lite/delegates/xnnpack/transpose_conv_test.cc +++ b/tensorflow/lite/delegates/xnnpack/transpose_conv_test.cc @@ -19,17 +19,16 @@ limitations under the License. #include #include +#include "tensorflow/lite/delegates/xnnpack/fingerprint_test_helpers.h" #include "tensorflow/lite/delegates/xnnpack/transpose_conv_tester.h" #include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h" namespace tflite { namespace xnnpack { -TEST(TransposeConvTest, 2x2Stride2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); +struct TransposeConvTest : DelegateTest {}; +TEST_F(TransposeConvTest, 2x2Stride2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -50,11 +49,7 @@ TEST(TransposeConvTest, 2x2Stride2) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, 2x2Stride2NoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, 2x2Stride2NoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -76,11 +71,7 @@ TEST(TransposeConvTest, 2x2Stride2NoBias) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, 3x3Stride2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, 3x3Stride2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -101,11 +92,7 @@ TEST(TransposeConvTest, 3x3Stride2) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, 3x3Stride2NoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, 3x3Stride2NoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -127,11 +114,7 @@ TEST(TransposeConvTest, 3x3Stride2NoBias) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, 4x4Stride2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, 4x4Stride2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -152,11 +135,7 @@ TEST(TransposeConvTest, 4x4Stride2) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, 4x4Stride2NoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, 4x4Stride2NoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -178,11 +157,7 @@ TEST(TransposeConvTest, 4x4Stride2NoBias) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, 4x4Stride4) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, 4x4Stride4) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -203,11 +178,7 @@ TEST(TransposeConvTest, 4x4Stride4) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, 4x4Stride4NoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, 4x4Stride4NoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -229,11 +200,7 @@ TEST(TransposeConvTest, 4x4Stride4NoBias) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, SmallKernelWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, SmallKernelWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -257,11 +224,7 @@ TEST(TransposeConvTest, SmallKernelWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, SmallKernelWithSamePaddingNoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, SmallKernelWithSamePaddingNoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -286,11 +249,7 @@ TEST(TransposeConvTest, SmallKernelWithSamePaddingNoBias) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, SmallKernelWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, SmallKernelWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -314,11 +273,7 @@ TEST(TransposeConvTest, SmallKernelWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, SmallKernelWithValidPaddingNoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, SmallKernelWithValidPaddingNoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -343,11 +298,7 @@ TEST(TransposeConvTest, SmallKernelWithValidPaddingNoBias) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, StrideWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, StrideWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -375,11 +326,7 @@ TEST(TransposeConvTest, StrideWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, StrideWithSamePaddingNoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, StrideWithSamePaddingNoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -408,11 +355,7 @@ TEST(TransposeConvTest, StrideWithSamePaddingNoBias) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, StrideWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, StrideWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -440,11 +383,7 @@ TEST(TransposeConvTest, StrideWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, StrideWithValidPaddingNoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, StrideWithValidPaddingNoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -473,11 +412,7 @@ TEST(TransposeConvTest, StrideWithValidPaddingNoBias) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, FP16Weights) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, FP16Weights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -506,11 +441,7 @@ TEST(TransposeConvTest, FP16Weights) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, FP16WeightsNoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, FP16WeightsNoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -540,11 +471,7 @@ TEST(TransposeConvTest, FP16WeightsNoBias) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, TensorWiseQuantizedInt8Weights) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, TensorWiseQuantizedInt8Weights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -573,11 +500,7 @@ TEST(TransposeConvTest, TensorWiseQuantizedInt8Weights) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, TensorWiseQuantizedInt8WeightsNoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, TensorWiseQuantizedInt8WeightsNoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -607,11 +530,7 @@ TEST(TransposeConvTest, TensorWiseQuantizedInt8WeightsNoBias) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, ChannelWiseQuantizedInt8Weights) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, ChannelWiseQuantizedInt8Weights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -640,11 +559,7 @@ TEST(TransposeConvTest, ChannelWiseQuantizedInt8Weights) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, ChannelWiseQuantizedInt8WeightsNoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, ChannelWiseQuantizedInt8WeightsNoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -674,11 +589,7 @@ TEST(TransposeConvTest, ChannelWiseQuantizedInt8WeightsNoBias) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, SparseWeights) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, SparseWeights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -707,11 +618,7 @@ TEST(TransposeConvTest, SparseWeights) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, SparseWeightsNoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, SparseWeightsNoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -741,11 +648,7 @@ TEST(TransposeConvTest, SparseWeightsNoBias) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, SparseFP16Weights) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, SparseFP16Weights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -775,11 +678,7 @@ TEST(TransposeConvTest, SparseFP16Weights) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, SparseFP16WeightsNoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, SparseFP16WeightsNoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -810,11 +709,7 @@ TEST(TransposeConvTest, SparseFP16WeightsNoBias) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, SparseTensorWiseQuantizedInt8Weights) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, SparseTensorWiseQuantizedInt8Weights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -844,11 +739,7 @@ TEST(TransposeConvTest, SparseTensorWiseQuantizedInt8Weights) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, SparseTensorWiseQuantizedInt8WeightsNoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, SparseTensorWiseQuantizedInt8WeightsNoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -879,11 +770,7 @@ TEST(TransposeConvTest, SparseTensorWiseQuantizedInt8WeightsNoBias) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, SparseChannelWiseQuantizedInt8Weights) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, SparseChannelWiseQuantizedInt8Weights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -913,11 +800,7 @@ TEST(TransposeConvTest, SparseChannelWiseQuantizedInt8Weights) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, SparseChannelWiseQuantizedInt8WeightsNoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(TransposeConvTest, SparseChannelWiseQuantizedInt8WeightsNoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -948,13 +831,11 @@ TEST(TransposeConvTest, SparseChannelWiseQuantizedInt8WeightsNoBias) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, MultiThreading) { +TEST_F(TransposeConvTest, MultiThreading) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); delegate_options.num_threads = 2; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); @@ -983,13 +864,11 @@ TEST(TransposeConvTest, MultiThreading) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, MultiThreadingNoBias) { +TEST_F(TransposeConvTest, MultiThreadingNoBias) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); delegate_options.num_threads = 2; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); @@ -1019,7 +898,7 @@ TEST(TransposeConvTest, MultiThreadingNoBias) { .Test(xnnpack_delegate.get()); } -TEST(TransposeConvTest, WeightsCache) { +TEST_F(TransposeConvTest, WeightsCache) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); diff --git a/tensorflow/lite/delegates/xnnpack/unsigned_quantized_conv_2d_test.cc b/tensorflow/lite/delegates/xnnpack/unsigned_quantized_conv_2d_test.cc index 6660fc5af75ebe..b8c9d48f4f05a2 100644 --- a/tensorflow/lite/delegates/xnnpack/unsigned_quantized_conv_2d_test.cc +++ b/tensorflow/lite/delegates/xnnpack/unsigned_quantized_conv_2d_test.cc @@ -20,17 +20,16 @@ limitations under the License. #include #include +#include "tensorflow/lite/delegates/xnnpack/fingerprint_test_helpers.h" #include "tensorflow/lite/delegates/xnnpack/quantized_conv_2d_tester.h" #include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h" namespace tflite { namespace xnnpack { -TEST(UnsignedQuantizedConv2D, 1x1) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); +struct UnsignedQuantizedConv2D : DelegateTest {}; +TEST_F(UnsignedQuantizedConv2D, 1x1) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -61,11 +60,7 @@ TEST(UnsignedQuantizedConv2D, 1x1) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedConv2D, 3x3) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedConv2D, 3x3) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -96,11 +91,7 @@ TEST(UnsignedQuantizedConv2D, 3x3) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedConv2D, 3x3Stride2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedConv2D, 3x3Stride2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -137,11 +128,7 @@ TEST(UnsignedQuantizedConv2D, 3x3Stride2) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedConv2D, Grouped) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedConv2D, Grouped) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -174,11 +161,7 @@ TEST(UnsignedQuantizedConv2D, Grouped) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedConv2D, SmallKernelWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedConv2D, SmallKernelWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -211,11 +194,7 @@ TEST(UnsignedQuantizedConv2D, SmallKernelWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedConv2D, SmallKernelWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedConv2D, SmallKernelWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -248,11 +227,7 @@ TEST(UnsignedQuantizedConv2D, SmallKernelWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedConv2D, StrideWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedConv2D, StrideWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -289,11 +264,7 @@ TEST(UnsignedQuantizedConv2D, StrideWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedConv2D, StrideWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedConv2D, StrideWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -330,11 +301,7 @@ TEST(UnsignedQuantizedConv2D, StrideWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedConv2D, DilationWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedConv2D, DilationWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -371,11 +338,7 @@ TEST(UnsignedQuantizedConv2D, DilationWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedConv2D, DilationWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedConv2D, DilationWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -412,11 +375,7 @@ TEST(UnsignedQuantizedConv2D, DilationWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedConv2D, ReluActivation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedConv2D, ReluActivation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -453,11 +412,7 @@ TEST(UnsignedQuantizedConv2D, ReluActivation) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedConv2D, Relu6Activation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedConv2D, Relu6Activation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -494,11 +449,7 @@ TEST(UnsignedQuantizedConv2D, Relu6Activation) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedConv2D, ReluMinus1To1Activation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedConv2D, ReluMinus1To1Activation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -535,13 +486,11 @@ TEST(UnsignedQuantizedConv2D, ReluMinus1To1Activation) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedConv2D, MultiThreading) { +TEST_F(UnsignedQuantizedConv2D, MultiThreading) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); delegate_options.num_threads = 2; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); @@ -578,15 +527,13 @@ TEST(UnsignedQuantizedConv2D, MultiThreading) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedConv2D, TransientIndirectionBuffer) { - TfLiteXNNPackDelegateOptions xnnpack_options = +TEST_F(UnsignedQuantizedConv2D, TransientIndirectionBuffer) { + TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); - xnnpack_options.num_threads = 2; - xnnpack_options.flags |= + delegate_options.num_threads = 2; + delegate_options.flags |= TFLITE_XNNPACK_DELEGATE_FLAG_TRANSIENT_INDIRECTION_BUFFER; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&xnnpack_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); diff --git a/tensorflow/lite/delegates/xnnpack/unsigned_quantized_depthwise_conv_2d_test.cc b/tensorflow/lite/delegates/xnnpack/unsigned_quantized_depthwise_conv_2d_test.cc index 7facb9787338c7..a269343dafc512 100644 --- a/tensorflow/lite/delegates/xnnpack/unsigned_quantized_depthwise_conv_2d_test.cc +++ b/tensorflow/lite/delegates/xnnpack/unsigned_quantized_depthwise_conv_2d_test.cc @@ -20,17 +20,16 @@ limitations under the License. #include #include +#include "tensorflow/lite/delegates/xnnpack/fingerprint_test_helpers.h" #include "tensorflow/lite/delegates/xnnpack/quantized_depthwise_conv_2d_tester.h" #include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h" namespace tflite { namespace xnnpack { -TEST(UnsignedQuantizedDepthwiseConv2D, 1x1) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); +struct UnsignedQuantizedDepthwiseConv2D : DelegateTest {}; +TEST_F(UnsignedQuantizedDepthwiseConv2D, 1x1) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -56,11 +55,7 @@ TEST(UnsignedQuantizedDepthwiseConv2D, 1x1) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedDepthwiseConv2D, 2x2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedDepthwiseConv2D, 2x2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -87,11 +82,7 @@ TEST(UnsignedQuantizedDepthwiseConv2D, 2x2) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedDepthwiseConv2D, 3x3) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedDepthwiseConv2D, 3x3) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -118,11 +109,7 @@ TEST(UnsignedQuantizedDepthwiseConv2D, 3x3) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedDepthwiseConv2D, 3x3Stride2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedDepthwiseConv2D, 3x3Stride2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -151,11 +138,7 @@ TEST(UnsignedQuantizedDepthwiseConv2D, 3x3Stride2) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedDepthwiseConv2D, 5x5) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedDepthwiseConv2D, 5x5) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -182,11 +165,7 @@ TEST(UnsignedQuantizedDepthwiseConv2D, 5x5) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedDepthwiseConv2D, 5x5Stride2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedDepthwiseConv2D, 5x5Stride2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -215,11 +194,7 @@ TEST(UnsignedQuantizedDepthwiseConv2D, 5x5Stride2) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedDepthwiseConv2D, SmallKernelWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedDepthwiseConv2D, SmallKernelWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -251,11 +226,7 @@ TEST(UnsignedQuantizedDepthwiseConv2D, SmallKernelWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedDepthwiseConv2D, SmallKernelWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedDepthwiseConv2D, SmallKernelWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -287,11 +258,7 @@ TEST(UnsignedQuantizedDepthwiseConv2D, SmallKernelWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedDepthwiseConv2D, StrideWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedDepthwiseConv2D, StrideWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -327,11 +294,7 @@ TEST(UnsignedQuantizedDepthwiseConv2D, StrideWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedDepthwiseConv2D, StrideWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedDepthwiseConv2D, StrideWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -367,11 +330,7 @@ TEST(UnsignedQuantizedDepthwiseConv2D, StrideWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedDepthwiseConv2D, DilationWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedDepthwiseConv2D, DilationWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -407,11 +366,7 @@ TEST(UnsignedQuantizedDepthwiseConv2D, DilationWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedDepthwiseConv2D, DilationWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedDepthwiseConv2D, DilationWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -447,11 +402,7 @@ TEST(UnsignedQuantizedDepthwiseConv2D, DilationWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedDepthwiseConv2D, DepthMultiplier) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedDepthwiseConv2D, DepthMultiplier) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -489,11 +440,7 @@ TEST(UnsignedQuantizedDepthwiseConv2D, DepthMultiplier) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedDepthwiseConv2D, ReluActivation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedDepthwiseConv2D, ReluActivation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -529,11 +476,7 @@ TEST(UnsignedQuantizedDepthwiseConv2D, ReluActivation) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedDepthwiseConv2D, Relu6Activation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedDepthwiseConv2D, Relu6Activation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -569,11 +512,7 @@ TEST(UnsignedQuantizedDepthwiseConv2D, Relu6Activation) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedDepthwiseConv2D, ReluMinus1To1Activation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedDepthwiseConv2D, ReluMinus1To1Activation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -609,13 +548,11 @@ TEST(UnsignedQuantizedDepthwiseConv2D, ReluMinus1To1Activation) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedDepthwiseConv2D, MultiThreading) { +TEST_F(UnsignedQuantizedDepthwiseConv2D, MultiThreading) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); delegate_options.num_threads = 2; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); @@ -651,7 +588,7 @@ TEST(UnsignedQuantizedDepthwiseConv2D, MultiThreading) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedDepthwiseConv2D, WeightsCache) { +TEST_F(UnsignedQuantizedDepthwiseConv2D, WeightsCache) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); @@ -698,15 +633,13 @@ TEST(UnsignedQuantizedDepthwiseConv2D, WeightsCache) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedDepthwiseConv2D, TransientIndirectionBuffer) { - TfLiteXNNPackDelegateOptions xnnpack_options = +TEST_F(UnsignedQuantizedDepthwiseConv2D, TransientIndirectionBuffer) { + TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); - xnnpack_options.num_threads = 2; - xnnpack_options.flags |= + delegate_options.num_threads = 2; + delegate_options.flags |= TFLITE_XNNPACK_DELEGATE_FLAG_TRANSIENT_INDIRECTION_BUFFER; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&xnnpack_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); diff --git a/tensorflow/lite/delegates/xnnpack/unsigned_quantized_fully_connected_test.cc b/tensorflow/lite/delegates/xnnpack/unsigned_quantized_fully_connected_test.cc index 90df47c884d042..25aabd2a559413 100644 --- a/tensorflow/lite/delegates/xnnpack/unsigned_quantized_fully_connected_test.cc +++ b/tensorflow/lite/delegates/xnnpack/unsigned_quantized_fully_connected_test.cc @@ -20,17 +20,16 @@ limitations under the License. #include #include +#include "tensorflow/lite/delegates/xnnpack/fingerprint_test_helpers.h" #include "tensorflow/lite/delegates/xnnpack/quantized_fully_connected_tester.h" #include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h" namespace tflite { namespace xnnpack { -TEST(UnsignedQuantizedFullyConnected, 1D) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); +struct UnsignedQuantizedFullyConnected : DelegateTest {}; +TEST_F(UnsignedQuantizedFullyConnected, 1D) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -54,11 +53,7 @@ TEST(UnsignedQuantizedFullyConnected, 1D) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedFullyConnected, 1DKeepDims) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedFullyConnected, 1DKeepDims) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -83,11 +78,7 @@ TEST(UnsignedQuantizedFullyConnected, 1DKeepDims) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedFullyConnected, 2D) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedFullyConnected, 2D) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -114,11 +105,7 @@ TEST(UnsignedQuantizedFullyConnected, 2D) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedFullyConnected, 2DKeepDims) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedFullyConnected, 2DKeepDims) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -146,11 +133,7 @@ TEST(UnsignedQuantizedFullyConnected, 2DKeepDims) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedFullyConnected, 3D) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedFullyConnected, 3D) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -178,11 +161,7 @@ TEST(UnsignedQuantizedFullyConnected, 3D) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedFullyConnected, 3DReshape) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedFullyConnected, 3DReshape) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -210,11 +189,7 @@ TEST(UnsignedQuantizedFullyConnected, 3DReshape) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedFullyConnected, 3DKeepDims) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedFullyConnected, 3DKeepDims) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -243,11 +218,7 @@ TEST(UnsignedQuantizedFullyConnected, 3DKeepDims) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedFullyConnected, 4D) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedFullyConnected, 4D) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -276,11 +247,7 @@ TEST(UnsignedQuantizedFullyConnected, 4D) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedFullyConnected, 4DKeepDims) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedFullyConnected, 4DKeepDims) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -310,11 +277,7 @@ TEST(UnsignedQuantizedFullyConnected, 4DKeepDims) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedFullyConnected, NoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedFullyConnected, NoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -342,11 +305,7 @@ TEST(UnsignedQuantizedFullyConnected, NoBias) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedFullyConnected, ReluActivation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedFullyConnected, ReluActivation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -374,11 +333,7 @@ TEST(UnsignedQuantizedFullyConnected, ReluActivation) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedFullyConnected, Relu6Activation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedFullyConnected, Relu6Activation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -406,11 +361,7 @@ TEST(UnsignedQuantizedFullyConnected, Relu6Activation) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedFullyConnected, ReluMinus1To1Activation) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedFullyConnected, ReluMinus1To1Activation) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto zero_point_rng = std::bind(std::uniform_int_distribution( @@ -438,13 +389,11 @@ TEST(UnsignedQuantizedFullyConnected, ReluMinus1To1Activation) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedFullyConnected, MultiThreading) { +TEST_F(UnsignedQuantizedFullyConnected, MultiThreading) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); delegate_options.num_threads = 2; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); diff --git a/tensorflow/lite/delegates/xnnpack/unsigned_quantized_transpose_conv_test.cc b/tensorflow/lite/delegates/xnnpack/unsigned_quantized_transpose_conv_test.cc index 8e6a779a1979f9..5167d18443ac30 100644 --- a/tensorflow/lite/delegates/xnnpack/unsigned_quantized_transpose_conv_test.cc +++ b/tensorflow/lite/delegates/xnnpack/unsigned_quantized_transpose_conv_test.cc @@ -19,17 +19,16 @@ limitations under the License. #include #include +#include "tensorflow/lite/delegates/xnnpack/fingerprint_test_helpers.h" #include "tensorflow/lite/delegates/xnnpack/quantized_transpose_conv_tester.h" #include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h" namespace tflite { namespace xnnpack { -TEST(UnsignedQuantizedTransposeConvTest, 2x2Stride2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); +struct UnsignedQuantizedTransposeConvTest : DelegateTest {}; +TEST_F(UnsignedQuantizedTransposeConvTest, 2x2Stride2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -51,11 +50,7 @@ TEST(UnsignedQuantizedTransposeConvTest, 2x2Stride2) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedTransposeConvTest, 2x2Stride2NoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedTransposeConvTest, 2x2Stride2NoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -78,11 +73,7 @@ TEST(UnsignedQuantizedTransposeConvTest, 2x2Stride2NoBias) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedTransposeConvTest, 3x3Stride2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedTransposeConvTest, 3x3Stride2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -104,11 +95,7 @@ TEST(UnsignedQuantizedTransposeConvTest, 3x3Stride2) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedTransposeConvTest, 3x3Stride2NoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedTransposeConvTest, 3x3Stride2NoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -131,11 +118,7 @@ TEST(UnsignedQuantizedTransposeConvTest, 3x3Stride2NoBias) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedTransposeConvTest, 4x4Stride2) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedTransposeConvTest, 4x4Stride2) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -157,11 +140,7 @@ TEST(UnsignedQuantizedTransposeConvTest, 4x4Stride2) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedTransposeConvTest, 4x4Stride2NoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedTransposeConvTest, 4x4Stride2NoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -184,11 +163,7 @@ TEST(UnsignedQuantizedTransposeConvTest, 4x4Stride2NoBias) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedTransposeConvTest, 4x4Stride4) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedTransposeConvTest, 4x4Stride4) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -210,11 +185,7 @@ TEST(UnsignedQuantizedTransposeConvTest, 4x4Stride4) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedTransposeConvTest, 4x4Stride4NoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedTransposeConvTest, 4x4Stride4NoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto output_rng = @@ -237,11 +208,7 @@ TEST(UnsignedQuantizedTransposeConvTest, 4x4Stride4NoBias) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedTransposeConvTest, SmallKernelWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedTransposeConvTest, SmallKernelWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -266,11 +233,7 @@ TEST(UnsignedQuantizedTransposeConvTest, SmallKernelWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedTransposeConvTest, SmallKernelWithSamePaddingNoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedTransposeConvTest, SmallKernelWithSamePaddingNoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -296,11 +259,7 @@ TEST(UnsignedQuantizedTransposeConvTest, SmallKernelWithSamePaddingNoBias) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedTransposeConvTest, SmallKernelWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedTransposeConvTest, SmallKernelWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -325,11 +284,7 @@ TEST(UnsignedQuantizedTransposeConvTest, SmallKernelWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedTransposeConvTest, SmallKernelWithValidPaddingNoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedTransposeConvTest, SmallKernelWithValidPaddingNoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -355,11 +310,7 @@ TEST(UnsignedQuantizedTransposeConvTest, SmallKernelWithValidPaddingNoBias) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedTransposeConvTest, StrideWithSamePadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedTransposeConvTest, StrideWithSamePadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -388,11 +339,7 @@ TEST(UnsignedQuantizedTransposeConvTest, StrideWithSamePadding) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedTransposeConvTest, StrideWithSamePaddingNoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedTransposeConvTest, StrideWithSamePaddingNoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -422,11 +369,7 @@ TEST(UnsignedQuantizedTransposeConvTest, StrideWithSamePaddingNoBias) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedTransposeConvTest, StrideWithValidPadding) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedTransposeConvTest, StrideWithValidPadding) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -455,11 +398,7 @@ TEST(UnsignedQuantizedTransposeConvTest, StrideWithValidPadding) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedTransposeConvTest, StrideWithValidPaddingNoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedTransposeConvTest, StrideWithValidPaddingNoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -489,11 +428,7 @@ TEST(UnsignedQuantizedTransposeConvTest, StrideWithValidPaddingNoBias) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedTransposeConvTest, SparseWeights) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedTransposeConvTest, SparseWeights) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -523,11 +458,7 @@ TEST(UnsignedQuantizedTransposeConvTest, SparseWeights) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedTransposeConvTest, SparseWeightsNoBias) { - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), - TfLiteXNNPackDelegateDelete); - +TEST_F(UnsignedQuantizedTransposeConvTest, SparseWeightsNoBias) { std::random_device random_device; auto rng = std::mt19937(random_device()); auto batch_rng = @@ -558,13 +489,11 @@ TEST(UnsignedQuantizedTransposeConvTest, SparseWeightsNoBias) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedTransposeConvTest, MultiThreading) { +TEST_F(UnsignedQuantizedTransposeConvTest, MultiThreading) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); delegate_options.num_threads = 2; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); @@ -594,13 +523,11 @@ TEST(UnsignedQuantizedTransposeConvTest, MultiThreading) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedTransposeConvTest, MultiThreadingNoBias) { +TEST_F(UnsignedQuantizedTransposeConvTest, MultiThreadingNoBias) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); delegate_options.num_threads = 2; - std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); @@ -631,7 +558,7 @@ TEST(UnsignedQuantizedTransposeConvTest, MultiThreadingNoBias) { .Test(xnnpack_delegate.get()); } -TEST(UnsignedQuantizedTransposeConvTest, WeightsCache) { +TEST_F(UnsignedQuantizedTransposeConvTest, WeightsCache) { TfLiteXNNPackDelegateOptions delegate_options = TfLiteXNNPackDelegateOptionsDefault(); std::unique_ptr - xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), - TfLiteXNNPackDelegateDelete); + UseCustomDelegate(delegate_options); std::random_device random_device; auto rng = std::mt19937(random_device()); diff --git a/tensorflow/lite/delegates/xnnpack/weight_cache.cc b/tensorflow/lite/delegates/xnnpack/weight_cache.cc index e9ccdbfd8eedd9..9aaf497700f87f 100644 --- a/tensorflow/lite/delegates/xnnpack/weight_cache.cc +++ b/tensorflow/lite/delegates/xnnpack/weight_cache.cc @@ -15,6 +15,9 @@ limitations under the License. #include "tensorflow/lite/delegates/xnnpack/weight_cache.h" #include + +#include "tensorflow/lite/logger.h" +#include "tensorflow/lite/minimal_logging.h" #if defined(_MSC_VER) #include #define F_OK 0 @@ -22,7 +25,9 @@ limitations under the License. #include #endif +#include #include // IWYU pragma: keep +#include #include #include #include @@ -33,32 +38,15 @@ limitations under the License. #include #include +#include "experimental.h" // from @XNNPACK #include "xnnpack.h" // from @XNNPACK #include "flatbuffers/flatbuffer_builder.h" // from @flatbuffers #include "flatbuffers/verifier.h" // from @flatbuffers #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/delegates/xnnpack/file_util.h" +#include "tensorflow/lite/delegates/xnnpack/macros.h" #include "tensorflow/lite/delegates/xnnpack/mmap_handle.h" #include "tensorflow/lite/delegates/xnnpack/weight_cache_schema_generated.h" -#include "tensorflow/lite/logger.h" -#include "tensorflow/lite/minimal_logging.h" - -#define XNNPACK_ABORT_CHECK(TEST, ...) \ - if (!(TEST)) { \ - TFLITE_LOG_PROD(tflite::TFLITE_LOG_ERROR, __VA_ARGS__); \ - std::abort(); \ - } - -#define XNNPACK_VAR_ARG_HEAD(FIRST, ...) FIRST - -#define XNNPACK_RETURN_CHECK(TEST, ...) \ - if (!(TEST)) { \ - if (sizeof(XNNPACK_VAR_ARG_HEAD("" __VA_ARGS__)) > sizeof("")) { \ - TFLITE_LOG_PROD(tflite::TFLITE_LOG_ERROR, \ - "XNNPack weight cache: " __VA_ARGS__); \ - } \ - return false; \ - } namespace tflite::xnnpack { @@ -92,6 +80,20 @@ bool FileExists(const char* path) { return access(path, F_OK) != -1; } +bool CheckFingerprints(const cache::schema::BufferList* buffer_list) { + if (buffer_list->fingerprints()) { + for (uint64_t cache_fingerprint : *buffer_list->fingerprints()) { + xnn_fingerprint fingerprint; + static_assert(sizeof(fingerprint) == sizeof(cache_fingerprint)); + std::memcpy(&fingerprint, &cache_fingerprint, sizeof(fingerprint)); + XNNPACK_RETURN_CHECK( + xnn_check_fingerprint(fingerprint) == xnn_status_success, + "fingerprint (id: 0x%x) could not be matched", fingerprint.id); + } + } + return true; +} + } // namespace #define XNN_MOVE_CONSTRUCT_MEMBER(x) x(std::move(other.x)) @@ -134,12 +136,17 @@ bool WeightCacheBuilder::Start(const char* path, const FileDescriptor& fd) { XNNPackCacheHeader header{XNNPackCacheHeader::kInvalidHeader}; header.buffer_list_offset = sizeof(header); - XNNPACK_RETURN_CHECK(fd_.Truncate(0), "could not truncate weight cache"); + XNNPACK_RETURN_CHECK(fd_.Truncate(0), "could not truncate weight cache."); + XNNPACK_RETURN_CHECK(fd_.SetPos(0) == 0, "couldn't move to file start."); XNNPACK_RETURN_CHECK(fd_.Write(&header, sizeof(header)), "could not write initial cache header in %s: %s.", file_path_.c_str(), strerror(errno)); schema_.base_offset = Align(sizeof(header), kMinAlignment); + + XNNPACK_RETURN_CHECK(StartBuildStep(), "failed to start initial write step."); + XNNPACK_RETURN_CHECK(StopBuildStep(), "failed to write initial step."); + return true; } @@ -191,7 +198,8 @@ void* WeightCacheBuilder::Reserve(size_t size) { } BufferLocation WeightCacheBuilder::Append(PackIdentifier pack_id, - const void* data, uint64_t size) { + const void* data, uint64_t size, + int32_t fingerprint_id) { XNNPACK_ABORT_CHECK(is_build_step_, "cannot append data to an unstarted builder."); // Add some padding so that the cache file can be mmaped and the buffer @@ -210,6 +218,34 @@ BufferLocation WeightCacheBuilder::Append(PackIdentifier pack_id, buffer.size = loc.size; schema_.buffers.push_back(std::make_unique(buffer)); + // Not passing a fingerprint id is a logic error on XNNPack's side. If we + // don't have a fingerprint for an operation, we have no way of ensuring that + // the generation of the cached data hasn't changed when reloading the cache. + // + // If we just log this and continue on with the work. This run will build a + // cache with cached data that can't be checked in the future. This will lead, + // in future runs that reuse the cache, to crashes that are impossible to + // debug or outputs that are nonsensical without any chance of linking this + // back to this error. + // + // We abort because we have no way of making that failure bubble up to the + // calling code to handle it gracefully... + XNNPACK_ABORT_CHECK(fingerprint_id != 0, + "XNNPack weight cache: no fingerprint identifier was set " + "when appending a buffer to the cache file."); + const xnn_fingerprint* fingerprint = xnn_get_fingerprint(fingerprint_id); + XNNPACK_ABORT_CHECK(fingerprint, + "XNNPack weight cache: could not find a fingerprint with " + "id 0x%x when appending a buffer to the cache file.", + fingerprint_id); + uint64_t fingerprint_value; + static_assert(sizeof(fingerprint_value) == sizeof(*fingerprint)); + std::memcpy(&fingerprint_value, fingerprint, sizeof(*fingerprint)); + if (std::find(schema_.fingerprints.begin(), schema_.fingerprints.end(), + fingerprint_value) == schema_.fingerprints.end()) { + schema_.fingerprints.push_back(fingerprint_value); + } + if (!fd_.Write(data, size)) { TFLITE_LOG_PROD(tflite::TFLITE_LOG_ERROR, "XNNPack weight cache: cannot append buffer to cache file"); @@ -242,16 +278,7 @@ bool WeightCacheBuilder::StopBuildStep() { XNNPACK_RETURN_CHECK(fd_.SetPos(layout_offset) != -1, "could not move in the file: %s", strerror(errno)); - XNNPACK_RETURN_CHECK( - sizeof(XNNPackCacheHeader::xnnpack_build_identifier) == - xnn_experimental_get_build_identifier_size(), - "cache file ('%s') header cannot hold XNNPack's build identifier: %s.", - file_path_.c_str(), strerror(errno)); - XNNPackCacheHeader header{XNNPackCacheHeader::kVersion}; - memcpy(header.xnnpack_build_identifier, - xnn_experimental_get_build_identifier_data(), - xnn_experimental_get_build_identifier_size()); header.buffer_list_offset = fd_.GetPos(); header.buffer_list_size = builder.GetSize(); @@ -339,7 +366,8 @@ bool MMapWeightCacheProvider::LoadOrStartBuild(const char* path, } const char* const safe_path = Sanitize(path); FileDescriptor build_fd = fd.Duplicate(); - if (!IsInMemoryCachePath(safe_path) && Load(safe_path, std::move(fd))) { + if (!IsInMemoryCachePath(safe_path) && !IsFileEmpty(safe_path, fd) && + Load(safe_path, std::move(fd))) { TFLITE_LOG_PROD(tflite::TFLITE_LOG_VERBOSE, "XNNPack weight cache loaded from '%s'.", safe_path); return true; @@ -409,16 +437,10 @@ bool MMapWeightCacheProvider::Load() { }(); XNNPACK_RETURN_CHECK(header.version == XNNPackCacheHeader::kVersion, - "incompatible header version. Got %zd, expected %zd. " - "Cache needs to be built again.", + "incompatible header version. Got %" PRIu64 + ", expected %" PRIu64 ". Cache needs to be built again.", header.version, XNNPackCacheHeader::kVersion); - XNNPACK_RETURN_CHECK(xnn_experimental_check_build_identifier( - header.xnnpack_build_identifier, - sizeof(header.xnnpack_build_identifier)), - "XNNPack weight cache: incompatible XNNPack version. " - "Cache needs to be built again."); - XNNPACK_RETURN_CHECK(header.buffer_list_offset < mmap_handle.size(), "invalid offset for buffer list descriptor."); @@ -438,6 +460,8 @@ bool MMapWeightCacheProvider::Load() { XNNPACK_RETURN_CHECK(buffer_list, "could not get packed weights from flatbuffer."); + XNNPACK_RETURN_CHECK(CheckFingerprints(buffer_list)); + mmap_buffer_base_offset_ = buffer_list->base_offset(); if (const auto buffers = buffer_list->buffers(); buffers) { for (auto* buffer : *buffers) { @@ -592,7 +616,8 @@ size_t MMapWeightCacheProvider::LookUpOrInsert( return offset_it->second.offset; } - const BufferLocation location = builder_.Append(pack_id, ptr, size); + const BufferLocation location = + builder_.Append(pack_id, ptr, size, cache_key->fingerprint_id); XNNPACK_ABORT_CHECK(!location.IsInvalid(), "Inserting data in the cache failed."); cache_key_to_offset_.emplace(pack_id, location); @@ -682,17 +707,39 @@ bool IsCompatibleCacheFile(const char* path) { FileDescriptor fd = FileDescriptor::Open(path, O_RDONLY); XNNPACK_RETURN_CHECK(fd.IsValid(), "Could not open file: %s: %s.", path, strerror(errno)); + return IsCompatibleCacheFile(std::move(fd)); +} + +bool IsCompatibleCacheFile(FileDescriptorView fd) { + XNNPACK_RETURN_CHECK(fd.IsValid(), "Invalid file descriptor: %d.", + fd.Value()); + const size_t current_pos = fd.GetPos(); + ScopeGuard reset_pos_on_return( + [current_pos, &fd] { fd.SetPos(current_pos); }); + XNNPACK_RETURN_CHECK(fd.SetPos(0) != -1, + "Couldn't move to the start of the file."); + XNNPackCacheHeader header; XNNPACK_RETURN_CHECK(fd.Read(&header, sizeof(header)), "Couldn't read file header."); - XNNPACK_RETURN_CHECK( - header.version == XNNPackCacheHeader::kVersion, - "Cache header version is incompatible. Expected %llu, got %llu.", - XNNPackCacheHeader::kVersion, header.version); - XNNPACK_RETURN_CHECK(xnn_experimental_check_build_identifier( - header.xnnpack_build_identifier, - sizeof(header.xnnpack_build_identifier)), - "Cache header build identifier is different."); + XNNPACK_RETURN_CHECK(header.version == XNNPackCacheHeader::kVersion, + "Cache header version is incompatible. Expected %" PRIu64 + ", got %" PRIu64 ".", + XNNPackCacheHeader::kVersion, header.version); + + fd.SetPos(header.buffer_list_offset); + auto buffer = std::make_unique(header.buffer_list_size); + XNNPACK_RETURN_CHECK(fd.Read(buffer.get(), header.buffer_list_size)); + + flatbuffers::Verifier verifier(buffer.get(), header.buffer_list_size); + XNNPACK_RETURN_CHECK(cache::schema::VerifyBufferListBuffer(verifier), + "buffer list validation failed."); + + const cache::schema::BufferList* buffer_list = + cache::schema::GetBufferList(buffer.get()); + XNNPACK_RETURN_CHECK(buffer_list, + "could not get packed weights from flatbuffer."); + XNNPACK_RETURN_CHECK(CheckFingerprints(buffer_list)); return true; } diff --git a/tensorflow/lite/delegates/xnnpack/weight_cache.h b/tensorflow/lite/delegates/xnnpack/weight_cache.h index 7dd04a20f2095f..781422b4bec662 100644 --- a/tensorflow/lite/delegates/xnnpack/weight_cache.h +++ b/tensorflow/lite/delegates/xnnpack/weight_cache.h @@ -56,15 +56,25 @@ inline constexpr char kInMemoryCachePath[] = ":memory"; // When reading a cache file, the cache should be rejected if `version` // doesn't match `kVersion`. struct XNNPackCacheHeader { - enum : uint64_t { kInvalidHeader = 0, kVersion = 1 }; + enum : uint64_t { kInvalidHeader = 0, kVersion = 2 }; uint64_t version; - uint8_t xnnpack_build_identifier[32]; uint64_t buffer_list_offset; uint64_t buffer_list_size; }; +// Checks if the file at the given path is compatible with the current XNNPack +// weight cache. bool IsCompatibleCacheFile(const char* path); +// Checks if the opened file is compatible with the current XNNPack weight +// cache. +// +// Position in the file may be changed during the function execution but is +// restored upon exiting. +// +// Note: the file descriptor must be open and valid. +bool IsCompatibleCacheFile(FileDescriptorView fd); + struct PackIdentifier { enum { kNoId = SIZE_MAX }; uint64_t pack_algorithm_id = kNoId; @@ -150,8 +160,8 @@ class WeightCacheBuilder { // The buffer space must have been reserved before using `Reserve`. If not, a // new call to `Reserve` will be done and the data will be copied over. [[nodiscard /*The location to the appended data should be saved.*/]] - BufferLocation Append(PackIdentifier pack_id, const void* data, - uint64_t size); + BufferLocation Append(PackIdentifier pack_id, const void* data, uint64_t size, + int fingerprint_id); // Writes the flatbuffer to disk. [[nodiscard /*Writing the weight cache can fail.*/]] diff --git a/tensorflow/lite/delegates/xnnpack/weight_cache_schema.fbs b/tensorflow/lite/delegates/xnnpack/weight_cache_schema.fbs index 33566b8be2208a..37f19612010709 100644 --- a/tensorflow/lite/delegates/xnnpack/weight_cache_schema.fbs +++ b/tensorflow/lite/delegates/xnnpack/weight_cache_schema.fbs @@ -32,11 +32,14 @@ table Buffer { } table BufferList { + /// A list of packing fingerprints. All of these need to be checked when + /// loading the cache to ensure that it is compatible. + fingerprints: [uint64]; /// A list of buffers. buffers: [Buffer]; /// Defines the base offset for the data in the file. That offset /// may be needed to guarantee data alignment. - base_offset:uint64; + base_offset: uint64; } root_type BufferList; diff --git a/tensorflow/lite/delegates/xnnpack/weight_cache_test.cc b/tensorflow/lite/delegates/xnnpack/weight_cache_test.cc index a74e40018e1eba..c1e4071ff4a353 100644 --- a/tensorflow/lite/delegates/xnnpack/weight_cache_test.cc +++ b/tensorflow/lite/delegates/xnnpack/weight_cache_test.cc @@ -35,6 +35,7 @@ limitations under the License. #include #include +#include "experimental.h" // from @XNNPACK #include "xnnpack.h" // from @XNNPACK #include "flatbuffers/verifier.h" // from @flatbuffers #include "tensorflow/lite/c/common.h" @@ -56,7 +57,13 @@ namespace { using testing::ElementsAreArray; -TEST(WeightCacheBuilderTest, ReserveAppendWriteWorks) { +static xnn_fingerprint kDefaultFingerprint{/*id=*/0xf00d, /*value=*/0xb33f}; + +struct WeightCacheBuilderTest : testing::Test { + void SetUp() override { xnn_set_fingerprint(kDefaultFingerprint); } +}; + +TEST_F(WeightCacheBuilderTest, ReserveAppendWriteWorks) { using std::size; const std::string payload = "This is some data in the file."; @@ -72,7 +79,8 @@ TEST(WeightCacheBuilderTest, ReserveAppendWriteWorks) { const size_t payload_size = size(payload); void* buffer = builder.Reserve(payload_size); std::memcpy(buffer, payload.c_str(), payload_size); - auto loc = builder.Append(dummy_id, buffer, payload_size); + auto loc = + builder.Append(dummy_id, buffer, payload_size, kDefaultFingerprint.id); EXPECT_EQ(loc.size, payload_size); EXPECT_GE(builder.capacity(), payload_size); @@ -123,7 +131,7 @@ TEST(WeightCacheBuilderTest, ReserveAppendWriteWorks) { EXPECT_THAT(cache_data, ElementsAreArray(payload)); } -TEST(WeightCacheBuilderTest, AppendWithoutReserveWriteWorks) { +TEST_F(WeightCacheBuilderTest, AppendWithoutReserveWriteWorks) { using std::size; const std::string payload = "This is some data in the file."; @@ -137,7 +145,8 @@ TEST(WeightCacheBuilderTest, AppendWithoutReserveWriteWorks) { ASSERT_TRUE(builder.StartBuildStep()); const size_t payload_size = size(payload); - auto loc = builder.Append(dummy_id, payload.c_str(), payload_size); + auto loc = builder.Append(dummy_id, payload.c_str(), payload_size, + kDefaultFingerprint.id); EXPECT_EQ(loc.size, payload_size); @@ -186,7 +195,7 @@ TEST(WeightCacheBuilderTest, AppendWithoutReserveWriteWorks) { EXPECT_THAT(cache_data, ElementsAreArray(payload)); } -TEST(WeightCacheBuilderTest, CorruptBufferListFailsGracefully) { +TEST_F(WeightCacheBuilderTest, CorruptBufferListFailsGracefully) { const std::string cache_path = testing::TempDir() + "/cache"; const std::string payload = "This is some data in the file."; const PackIdentifier dummy_id{1, 2, 3}; @@ -198,7 +207,8 @@ TEST(WeightCacheBuilderTest, CorruptBufferListFailsGracefully) { ASSERT_TRUE(builder.StartBuildStep()); const size_t payload_size = size(payload); - auto loc = builder.Append(dummy_id, payload.c_str(), payload_size); + auto loc = builder.Append(dummy_id, payload.c_str(), payload_size, + kDefaultFingerprint.id); EXPECT_EQ(loc.size, payload_size); ASSERT_TRUE(builder.StopBuildStep()); @@ -218,13 +228,13 @@ TEST(WeightCacheBuilderTest, CorruptBufferListFailsGracefully) { EXPECT_FALSE(builder.StartBuildStep()); } -TEST(WeightCacheBuilderTest, InvalidFileDescriptorFails) { +TEST_F(WeightCacheBuilderTest, InvalidFileDescriptorFails) { WeightCacheBuilder builder; EXPECT_FALSE(builder.Start("", FileDescriptor())); EXPECT_FALSE(builder.Start("/seldf/sedsft", FileDescriptor())); } -TEST(WeightCacheBuilderTest, InMemoryCacheCanBeBuilt) { +TEST_F(WeightCacheBuilderTest, InMemoryCacheCanBeBuilt) { if (!TfLiteXNNPackDelegateCanUseInMemoryWeightCacheProvider()) { GTEST_SKIP() << "In-memory weight cache isn't enabled for this build or " "isn't supported by the current system, skipping test."; @@ -239,7 +249,7 @@ TEST(WeightCacheBuilderTest, InMemoryCacheCanBeBuilt) { EXPECT_EQ(errno, ENOENT); } -TEST(WeightCacheBuilderTest, MultipleStepBuild) { +TEST_F(WeightCacheBuilderTest, MultipleStepBuild) { using std::size; const std::string payload1 = "This is some data in the file."; @@ -262,7 +272,8 @@ TEST(WeightCacheBuilderTest, MultipleStepBuild) { const size_t payload_size = size(payload1); void* buffer = builder.Reserve(payload_size); std::memcpy(buffer, payload1.c_str(), payload_size); - const auto loc = builder.Append(dummy_id1, buffer, payload_size); + const auto loc = + builder.Append(dummy_id1, buffer, payload_size, kDefaultFingerprint.id); EXPECT_EQ(loc.size, payload_size); EXPECT_GE(builder.capacity(), payload_size); } @@ -270,7 +281,8 @@ TEST(WeightCacheBuilderTest, MultipleStepBuild) { const size_t payload_size = size(payload3); void* buffer = builder.Reserve(payload_size); std::memcpy(buffer, payload3.c_str(), payload_size); - const auto loc = builder.Append(dummy_id3, buffer, payload_size); + const auto loc = + builder.Append(dummy_id3, buffer, payload_size, kDefaultFingerprint.id); (void)loc; } @@ -284,7 +296,8 @@ TEST(WeightCacheBuilderTest, MultipleStepBuild) { const size_t payload_size = size(payload2); void* buffer = builder.Reserve(payload_size); std::memcpy(buffer, payload2.c_str(), payload_size); - const auto loc = builder.Append(dummy_id2, buffer, payload_size); + const auto loc = + builder.Append(dummy_id2, buffer, payload_size, kDefaultFingerprint.id); EXPECT_EQ(loc.size, payload_size); EXPECT_GE(builder.capacity(), payload_size); } @@ -389,7 +402,8 @@ struct FakeContext { const int weights_index) const { return {.seed = algorithm_seed, .kernel = buffers[weights_index].data(), - .bias = nullptr}; + .bias = nullptr, + .fingerprint_id = kDefaultFingerprint.id}; } // Creates a look up key for the XNNPack weight provider C interface. @@ -398,7 +412,8 @@ struct FakeContext { const int bias_index) const { return {.seed = algorithm_seed, .kernel = buffers[weights_index].data(), - .bias = buffers[bias_index].data()}; + .bias = buffers[bias_index].data(), + .fingerprint_id = kDefaultFingerprint.id}; } // Helps creating fake packed data. @@ -505,6 +520,7 @@ struct BuildMMapWeightCacheProviderTest : testing::TestWithParam { GTEST_SKIP() << "In-memory weight cache isn't enabled for this build or " "isn't supported by the current system, skipping test."; } + xnn_set_fingerprint(kDefaultFingerprint); AddTensors(); EndSetup(); } @@ -723,6 +739,7 @@ struct MMapWeightCacheProviderTest : testing::TestWithParam { GTEST_SKIP() << "In-memory weight cache isn't enabled for this build or " "isn't supported by the current system, skipping test."; } + xnn_set_fingerprint(kDefaultFingerprint); } bool use_explicit_fd = GetParam().use_explicit_fd; const char* const explicit_fd_path = GetParam().explicit_fd_path; @@ -783,12 +800,14 @@ TEST_P(MMapWeightCacheProviderTest, XnnpackCApiJourney) { const xnn_weights_cache_look_up_key look_up_key_1{ .seed = fake_packing_algo_seed, .kernel = tensors[0].data.data, - .bias = tensors[1].data.data}; + .bias = tensors[1].data.data, + .fingerprint_id = kDefaultFingerprint.id}; const xnn_weights_cache_look_up_key look_up_key_3{ .seed = fake_packing_algo_seed, .kernel = tensors[3].data.data, - .bias = tensors[4].data.data}; + .bias = tensors[4].data.data, + .fingerprint_id = kDefaultFingerprint.id}; // Lookup non-packed tensor. ASSERT_EQ(cache->look_up(cache, &look_up_key_1), SIZE_MAX); @@ -829,7 +848,8 @@ TEST_P(MMapWeightCacheProviderTest, XnnpackCApiJourney) { const xnn_weights_cache_look_up_key look_up_key_2{ .seed = fake_packing_algo_seed, .kernel = tensors[2].data.data, - .bias = tensors[3].data.data}; + .bias = tensors[3].data.data, + .fingerprint_id = kDefaultFingerprint.id}; const size_t build_offset_2 = cache->look_up_or_insert( cache, &look_up_key_2, (void*)packed_data_ref_2, @@ -904,17 +924,20 @@ TEST_P(MMapWeightCacheProviderTest, XnnpackCApiJourney) { const xnn_weights_cache_look_up_key look_up_key_1{ .seed = fake_packing_algo_seed, .kernel = tensors[0].data.data, - .bias = tensors[1].data.data}; + .bias = tensors[1].data.data, + .fingerprint_id = kDefaultFingerprint.id}; const xnn_weights_cache_look_up_key look_up_key_2{ .seed = fake_packing_algo_seed, .kernel = tensors[2].data.data, - .bias = tensors[3].data.data}; + .bias = tensors[3].data.data, + .fingerprint_id = kDefaultFingerprint.id}; const xnn_weights_cache_look_up_key look_up_key_3{ .seed = fake_packing_algo_seed, .kernel = tensors[3].data.data, - .bias = tensors[4].data.data}; + .bias = tensors[4].data.data, + .fingerprint_id = kDefaultFingerprint.id}; ASSERT_TRUE(cache->is_finalized(cache)); @@ -945,65 +968,149 @@ TEST_P(MMapWeightCacheProviderTest, XnnpackCApiJourney) { } } -TEST_P(MMapWeightCacheProviderTest, XnnpackRebuildOnVersionMismatch) { +TEST_P(MMapWeightCacheProviderTest, CacheIsRebuiltOnFingerprintMismatch) { + if (use_in_memory_cache) { + GTEST_SUCCEED() << "In-memory cache is never reloaded."; + return; + } TempFileDesc temp_fd; const char* temp_fd_cpath = explicit_fd_path; - FileDescriptor temp_fd_value = temp_fd.Duplicate(); - { // Set bad build identifier - XNNPackCacheHeader header{.version = XNNPackCacheHeader::kVersion}; - header.xnnpack_build_identifier[0] += 1; - ASSERT_TRUE(temp_fd_value.Write(&header, sizeof(header))); + xnn_fingerprint test_fingeprint{0x7357, 0xF33D}; + { // Build a cache file with a specific fingerprint. + // Clear fingerprints and add a test fingerprint to XNNPack. + xnn_clear_fingerprints(); + xnn_set_fingerprint(test_fingeprint); + + // Build a cache file. + MMapWeightCacheProvider cache_provider; + + const char kernel[] = "Fake data."; + TfLiteTensor tensor; + tensor.data.data = (void*)kernel; + cache_provider.MapTensorIdentifiers( + &tensor, /*size=*/1, /*tensor_index_to_identifier=*/{{0, 1}}); + ASSERT_TRUE( + cache_provider.LoadOrStartBuild(temp_fd_cpath, temp_fd.Duplicate())); + ASSERT_TRUE(cache_provider.StartBuildStep()); + const xnn_weights_cache_look_up_key look_up_key_1{ + .seed = 1234, + .kernel = kernel, + .bias = nullptr, + .fingerprint_id = test_fingeprint.id}; + xnn_weights_cache_t cache = &cache_provider.GetCacheProvider(); + const size_t build_offset_1 = cache->look_up_or_insert( + cache, &look_up_key_1, + const_cast(reinterpret_cast(kernel)), + sizeof(kernel)); + (void)build_offset_1; + ASSERT_TRUE(cache_provider.StopBuildStep()); } if (!use_explicit_fd) { temp_fd.Close(); temp_fd_cpath = temp_fd.GetCPath(); - temp_fd_value.Close(); - if (use_in_memory_cache) { - temp_fd_cpath = kInMemoryCachePath; - } } + // Change the test fingerprint value. + test_fingeprint.value = 0xdeadb33f; + xnn_set_fingerprint(test_fingeprint); + + // Reload the file. auto build_cache_provider = std::make_unique(); MMapWeightCacheProvider& cache_provider = *build_cache_provider; - ASSERT_TRUE(cache_provider.LoadOrStartBuild(temp_fd_cpath, - temp_fd_value.Duplicate())); + ASSERT_TRUE( + cache_provider.LoadOrStartBuild(temp_fd_cpath, temp_fd.Duplicate())); ASSERT_TRUE(cache_provider.StartBuildStep()); } -class IsCompatibleCacheFileTest : public testing::Test { +enum class IsCompatibleCacheFileTestOverload { kPath, kDescriptor }; + +class IsCompatibleCacheFileTest + : public testing::TestWithParam { public: + using Param = IsCompatibleCacheFileTestOverload; + void SetUp() override { - header_.version = XNNPackCacheHeader::kVersion; - memcpy(header_.xnnpack_build_identifier, - xnn_experimental_get_build_identifier_data(), - xnn_experimental_get_build_identifier_size()); + xnn_clear_fingerprints(); + xnn_set_fingerprint(kDefaultFingerprint); + + // Build a cache file. + MMapWeightCacheProvider cache_provider; + + const char kernel[] = "Fake data."; + TfLiteTensor tensor; + tensor.data.data = (void*)kernel; + cache_provider.MapTensorIdentifiers( + &tensor, /*size=*/1, /*tensor_index_to_identifier=*/{{0, 1}}); + ASSERT_TRUE( + cache_provider.LoadOrStartBuild(fd_.GetCPath(), fd_.Duplicate())); + ASSERT_TRUE(cache_provider.StartBuildStep()); + const xnn_weights_cache_look_up_key look_up_key_1{ + .seed = 1234, + .kernel = kernel, + .bias = nullptr, + .fingerprint_id = kDefaultFingerprint.id}; + xnn_weights_cache_t cache = &cache_provider.GetCacheProvider(); + const size_t build_offset_1 = cache->look_up_or_insert( + cache, &look_up_key_1, + const_cast(reinterpret_cast(kernel)), + sizeof(kernel)); + (void)build_offset_1; + ASSERT_TRUE(cache_provider.StopBuildStep()); } - bool WriteHeaderAndReturnIsCompatibleCacheFile() { - const bool res = fd_.Write(&header_, sizeof(header_)); - fd_.Close(); - return res && IsCompatibleCacheFile(fd_.GetCPath()); + void ChangeRuntimeFingerprintValue() { + xnn_set_fingerprint( + {kDefaultFingerprint.id, kDefaultFingerprint.value + 1}); + } + + bool CallIsCompatibleCacheFile() { + switch (GetParam()) { + case Param::kPath: + fd_.Close(); + return IsCompatibleCacheFile(fd_.GetCPath()); + case Param::kDescriptor: { + const auto pos = fd_.GetPos(); + EXPECT_NE(pos, 0); // We test with a non zero position. + return IsCompatibleCacheFile(fd_); + EXPECT_EQ(fd_.GetPos(), pos); + } + } } - XNNPackCacheHeader header_{}; TempFileDesc fd_; }; -TEST_F(IsCompatibleCacheFileTest, ReturnsTrueForACorrectHeader) { - EXPECT_TRUE(WriteHeaderAndReturnIsCompatibleCacheFile()); +std::string Name( + const testing::TestParamInfo& info) { + switch (info.param) { + case IsCompatibleCacheFileTestOverload::kPath: + return "WithPathOverload"; + case IsCompatibleCacheFileTestOverload::kDescriptor: + return "WithFileDescriptorOverload"; + } } -TEST_F(IsCompatibleCacheFileTest, ReturnsFalseForWrongHeaderVersion) { - header_.version += 1; - EXPECT_FALSE(WriteHeaderAndReturnIsCompatibleCacheFile()); +TEST_P(IsCompatibleCacheFileTest, ReturnsTrueWhenFingerprintMatches) { + EXPECT_TRUE(CallIsCompatibleCacheFile()); } -TEST_F(IsCompatibleCacheFileTest, ReturnsFalseForWrongBuildIdentifier) { - header_.xnnpack_build_identifier[0] += 1; - EXPECT_FALSE(WriteHeaderAndReturnIsCompatibleCacheFile()); +TEST_P(IsCompatibleCacheFileTest, ReturnsFalseWhenFingerprintMismatches) { + ChangeRuntimeFingerprintValue(); + EXPECT_FALSE(CallIsCompatibleCacheFile()); } +TEST_P(IsCompatibleCacheFileTest, ReturnsFalseWhenFingerprintIsNotFound) { + xnn_clear_fingerprints(); + EXPECT_FALSE(CallIsCompatibleCacheFile()); +} + +INSTANTIATE_TEST_SUITE_P( + Test, IsCompatibleCacheFileTest, + testing::Values(IsCompatibleCacheFileTest::Param::kPath, + IsCompatibleCacheFileTest::Param::kDescriptor), + Name); + } // namespace } // namespace tflite::xnnpack diff --git a/tensorflow/lite/delegates/xnnpack/weight_cache_test_helpers.h b/tensorflow/lite/delegates/xnnpack/weight_cache_test_helpers.h index 365f94dc6ce885..ab29545730664d 100644 --- a/tensorflow/lite/delegates/xnnpack/weight_cache_test_helpers.h +++ b/tensorflow/lite/delegates/xnnpack/weight_cache_test_helpers.h @@ -86,14 +86,14 @@ class TempFileDesc : public FileDescriptor { errno_t err = tmpnam_s(filename, L_tmpnam_s); if (err) { fprintf(stderr, "Could not create temporary filename.\n"); - std::abort(); + return; } path_ = filename; FileDescriptor fd = FileDescriptor::Open(path_.c_str(), _O_CREAT | _O_EXCL | _O_RDWR, 0644); if (!fd.IsValid()) { fprintf(stderr, "Could not create temporary file.\n"); - std::abort(); + return; } Reset(fd.Release()); } diff --git a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc index a869ca74a4cc5e..f390b8065caac2 100644 --- a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc +++ b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc @@ -211,7 +211,7 @@ bool CheckZeroPointForPerChannelQuantization( // be 8. for (int c = 0; c < quantization_zero_point.size; c++) { const int zero_point = quantization_zero_point.data[c]; - if (zero_point != 0 && (tensor.type != kTfLiteInt4 && zero_point != 8)) { + if (zero_point != 0 && (tensor.type != kTfLiteInt4 || zero_point != 8)) { TF_LITE_KERNEL_LOG(context, "unsupported zero-point value (%d) in channel %d of " "%s tensor %d in XNNPACK delegate", @@ -268,7 +268,8 @@ xnn_datatype GetXNNPackDatatype(TfLiteContext* context, return xnn_datatype_quint8; } case kTfLiteInt8: - case kTfLiteInt4: { + case kTfLiteInt4: + case kTfLiteInt2: { switch (tensor.quantization.type) { case kTfLiteAffineQuantization: { const auto quantization_params = @@ -320,6 +321,8 @@ xnn_datatype GetXNNPackDatatype(TfLiteContext* context, return xnn_datatype_qcint8; case kTfLiteInt4: return xnn_datatype_qcint4; + case kTfLiteInt2: + return xnn_datatype_qcint2; default: // Outermost switch prevents this TFL_UNREACHABLE(); @@ -528,6 +531,22 @@ TfLiteStatus DefineXNNPACKValue(TfLiteContext* context, xnn_subgraph_t subgraph, dims.size(), dims.data(), data, XNN_INVALID_VALUE_ID, flags, xnnpack_id); } break; + case xnn_datatype_qcint2: { + status = xnn_define_channelwise_quantized_tensor_value_v3( + subgraph, datatype, + static_cast( + tensor.quantization.params) + ->zero_point->data[0], + static_cast( + tensor.quantization.params) + ->scale->data, + dims.size(), + static_cast( + tensor.quantization.params) + ->quantized_dimension, + dims.data(), data, XNN_INVALID_VALUE_ID, flags, xnnpack_id, + /*channelwise_zero_point=*/nullptr); + } break; case xnn_datatype_qcint4: case xnn_datatype_qcint8: case xnn_datatype_qcint32: @@ -2228,18 +2247,21 @@ class Subgraph { return kTfLiteError; } - static TfLiteStatus CheckTensorFloat32OrFloat16OrQCInt4OrQCInt8Type( - const Delegate& delegate, TfLiteContext* context, - const TfLiteTensor& tensor, int expected_quantized_dimension, - int tensor_index, int node_index) { + static TfLiteStatus CheckTensorFilterType(const Delegate& delegate, + TfLiteContext* context, + const TfLiteTensor& tensor, + int expected_quantized_dimension, + int tensor_index, int node_index) { switch (tensor.type) { case kTfLiteFloat32: case kTfLiteFloat16: return kTfLiteOk; + case kTfLiteInt2: case kTfLiteInt4: case kTfLiteInt8: if (delegate.support_signed_8bit_quantization() && - (kTfLiteInt8 == tensor.type || kTfLiteInt4 == tensor.type)) { + (kTfLiteInt8 == tensor.type || kTfLiteInt4 == tensor.type || + kTfLiteInt2 == tensor.type)) { switch (tensor.quantization.type) { case kTfLiteAffineQuantization: { const TfLiteAffineQuantization* quantization_params = @@ -2277,6 +2299,20 @@ class Subgraph { quantization_params->quantized_dimension, tensor_index, node_index); return kTfLiteError; + } else if (tensor.type == kTfLiteInt2 && + quantization_params->scale->size != + SizeOfDimension( + &tensor, + quantization_params->quantized_dimension)) { + // Only per channel quantized 2 bit weights are supported. + TF_LITE_MAYBE_KERNEL_LOG( + context, + "2 bit weights must be per channel and not per tensor " + "quantized in channel #%" PRId32 + " in tensor #%d in node #%d", + quantization_params->quantized_dimension, tensor_index, + node_index); + return kTfLiteError; } break; } @@ -4489,7 +4525,7 @@ class Subgraph { // Dynamic filter is supported, but only for FP32. if (!(delegate.support_dynamic_fully_connected_operator() && filter_tensor.type == kTfLiteFloat32)) { - TF_LITE_ENSURE_STATUS(CheckTensorFloat32OrFloat16OrQCInt4OrQCInt8Type( + TF_LITE_ENSURE_STATUS(CheckTensorFilterType( delegate, logging_context, filter_tensor, /*expected_quantized_dimension=*/0, filter_tensor_id, node_index)); if (quasi_static_tensors.count(filter_tensor_id) == 0) { @@ -4543,10 +4579,12 @@ class Subgraph { bool dynamically_quantized = (!delegate.disable_dynamically_quantized_ops() && (input_tensor.type == kTfLiteFloat32 && - (filter_tensor.type == kTfLiteInt4 || + (filter_tensor.type == kTfLiteInt2 || + filter_tensor.type == kTfLiteInt4 || filter_tensor.type == kTfLiteInt8))); bool supported_srq = (input_tensor.type == kTfLiteInt8 && - (filter_tensor.type == kTfLiteInt4 || + (filter_tensor.type == kTfLiteInt2 || + filter_tensor.type == kTfLiteInt4 || filter_tensor.type == kTfLiteInt8)); if (input_tensor.type != output_tensor.type || ((input_tensor.type != filter_tensor.type) && @@ -4567,6 +4605,15 @@ class Subgraph { return kTfLiteError; } + if (filter_tensor.type == kTfLiteInt2 && input_channels % 4 != 0) { + TF_LITE_MAYBE_KERNEL_LOG( + logging_context, + "unsupported non-multiple of 4 number of inputs channels (%d) in" + " FULLY_CONNECTED operator #%d", + input_channels, node_index); + return kTfLiteError; + } + float output_min = -std::numeric_limits::infinity(); float output_max = +std::numeric_limits::infinity(); TF_LITE_ENSURE_STATUS(ConvertActivationToOutputRange( @@ -4644,6 +4691,16 @@ class Subgraph { &filter_tensor.dims->data[NumDimensions(&filter_tensor)]); uint32_t kernel_id = XNN_INVALID_VALUE_ID; switch (filter_datatype) { + case xnn_datatype_qcint2: { + int32_t zero_point_value = filter_params->zero_point->data[0]; + status = xnn_define_channelwise_quantized_tensor_value_v3( + subgraph, filter_datatype, zero_point_value, + filter_params->scale->data, filter_dims.size(), + /*channel_dim=*/0, filter_dims.data(), + GetTensorData(&filter_tensor), XNN_INVALID_VALUE_ID, + /*flags=*/0, &kernel_id, /*channelwise_zero_point=*/nullptr); + break; + } case xnn_datatype_qcint4: case xnn_datatype_qcint8: { int32_t zero_point_value = filter_params->zero_point->data[0]; diff --git a/tensorflow/lite/experimental/microfrontend/ops/audio_microfrontend_op.cc b/tensorflow/lite/experimental/microfrontend/ops/audio_microfrontend_op.cc index 8cba5779565223..56692cbcaeecb7 100644 --- a/tensorflow/lite/experimental/microfrontend/ops/audio_microfrontend_op.cc +++ b/tensorflow/lite/experimental/microfrontend/ops/audio_microfrontend_op.cc @@ -292,8 +292,8 @@ class AudioMicrofrontendOp : public OpKernel { REGISTER_KERNEL_BUILDER(Name("AudioMicrofrontend") .Device(tensorflow::DEVICE_CPU) - .TypeConstraint("out_type"), - AudioMicrofrontendOp); + .TypeConstraint("out_type"), + AudioMicrofrontendOp); REGISTER_KERNEL_BUILDER(Name("AudioMicrofrontend") .Device(tensorflow::DEVICE_CPU) .TypeConstraint("out_type"), diff --git a/tensorflow/lite/java/BUILD b/tensorflow/lite/java/BUILD index 3249969563db1d..00fdb0c9b77f18 100644 --- a/tensorflow/lite/java/BUILD +++ b/tensorflow/lite/java/BUILD @@ -39,6 +39,21 @@ exports_files([ "tflite_version_script.lds", ]) +exports_files([ + # go/keep-sorted start + "src/main/java/org/tensorflow/lite/DataType.java", + "src/main/java/org/tensorflow/lite/DataTypeUtils.java", + "src/main/java/org/tensorflow/lite/InterpreterFactory.java", + "src/main/java/org/tensorflow/lite/NativeInterpreterWrapperExperimental.java", + "src/main/java/org/tensorflow/lite/NativeSignatureRunnerWrapper.java", + "src/main/java/org/tensorflow/lite/RuntimeFlavor.java", + "src/main/java/org/tensorflow/lite/Tensor.java", + "src/main/java/org/tensorflow/lite/TensorImpl.java", + "src/main/java/org/tensorflow/lite/annotations/UsedByReflection.java", + "src/main/java/org/tensorflow/lite/package-info.java", + # go/keep-sorted end +]) + #----------------------------------------------------------------------------- # Filegroup targets. @@ -928,6 +943,17 @@ filegroup( visibility = ["//visibility:public"], ) +filegroup( + name = "portable_tests_for_litert", + srcs = [ + "src/test/java/org/tensorflow/lite/InterpreterMobileNetTest.java", + "src/test/java/org/tensorflow/lite/SupportedFeatures.java", + "src/test/java/org/tensorflow/lite/TensorFlowLiteTest.java", + "src/test/java/org/tensorflow/lite/TestInit.java", + ], + visibility = ["//visibility:public"], +) + # portable_flex_tests includes files for testing interpreter with Flex delegate. filegroup( name = "portable_flex_tests", diff --git a/tensorflow/lite/kernels/BUILD b/tensorflow/lite/kernels/BUILD index 6a3ec9f57e2a02..5a47ace22d912b 100644 --- a/tensorflow/lite/kernels/BUILD +++ b/tensorflow/lite/kernels/BUILD @@ -167,7 +167,7 @@ cc_library( "//tensorflow/lite/tools/optimize:quantization_utils", "//tensorflow/lite/tools/serialization:writer_lib", "//tensorflow/lite/tools/versioning", - "@FP16", + "//tensorflow/lite/types:half", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/base", "@com_google_absl//absl/base:core_headers", @@ -574,6 +574,7 @@ cc_test( "//tensorflow/lite:array", "//tensorflow/lite:util", "//tensorflow/lite/core/c:common", + "//tensorflow/lite/types:half", "@com_google_googletest//:gtest_main", ], ) @@ -833,6 +834,7 @@ cc_library( "@ruy//ruy/profiler:instrumentation", "//tensorflow/lite/c:c_api_types", "//tensorflow/lite/c:common", + "//tensorflow/lite/types:half", "//tensorflow/lite:array", "//tensorflow/lite:builtin_ops", "//tensorflow/lite:cc_api_stable", @@ -1118,6 +1120,7 @@ cc_test( "//tensorflow/lite/core:framework_stable", "//tensorflow/lite/core/api", "//tensorflow/lite/schema:schema_fbs", + "//tensorflow/lite/types:half", "@com_google_absl//absl/memory", "@com_google_googletest//:gtest", "@flatbuffers", @@ -1499,6 +1502,7 @@ cc_test( "//tensorflow/lite/core/c:c_api_types", "//tensorflow/lite/kernels/internal:tensor_utils_no_eigen", "//tensorflow/lite/schema:schema_fbs", + "//tensorflow/lite/types:half", "@com_google_absl//absl/random", "@com_google_absl//absl/types:span", "@com_google_googletest//:gtest", @@ -1515,8 +1519,8 @@ cc_test( ":test_main", ":test_util", "//tensorflow/lite/schema:schema_fbs", + "//tensorflow/lite/types:half", "@com_google_googletest//:gtest", - "@eigen_archive//:eigen3", "@flatbuffers", ], ) @@ -1709,6 +1713,7 @@ cc_test( ":test_main", ":test_util", "//tensorflow/lite/schema:schema_fbs", + "//tensorflow/lite/types:half", "@com_google_googletest//:gtest", ], ) @@ -1739,6 +1744,7 @@ cc_test( ":test_main", ":test_util", "//tensorflow/lite/schema:schema_fbs", + "//tensorflow/lite/types:half", "@com_google_googletest//:gtest", "@eigen_archive//:eigen3", ], @@ -1853,6 +1859,7 @@ cc_test( ":test_main", ":test_util", "//tensorflow/lite/schema:schema_fbs", + "//tensorflow/lite/types:half", "@com_google_googletest//:gtest", "@eigen_archive//:eigen3", ], @@ -1913,8 +1920,8 @@ cc_test( ":test_util", "//tensorflow/lite/core/c:common", "//tensorflow/lite/schema:schema_fbs", + "//tensorflow/lite/types:half", "@com_google_googletest//:gtest", - "@flatbuffers", ], ) @@ -1972,6 +1979,7 @@ cc_test( "//tensorflow/lite:framework_stable", "//tensorflow/lite/core:framework_stable", "//tensorflow/lite/schema:schema_fbs", + "//tensorflow/lite/types:half", "@com_google_googletest//:gtest", "@flatbuffers", ], @@ -2082,12 +2090,12 @@ cc_test( deps = [ ":test_main", ":test_util", - "//tensorflow/lite:string", "//tensorflow/lite/core/c:common", "//tensorflow/lite/kernels/internal:tensor_utils", "//tensorflow/lite/schema:schema_fbs", + "//tensorflow/lite/types:half", "@com_google_googletest//:gtest", - "@flatbuffers", + "@eigen_archive//:eigen3", ], ) @@ -2101,7 +2109,9 @@ cc_test( "//tensorflow/lite:string", "//tensorflow/lite/core/c:common", "//tensorflow/lite/schema:schema_fbs", + "//tensorflow/lite/types:half", "@com_google_googletest//:gtest", + "@eigen_archive//:eigen3", "@flatbuffers", ], ) @@ -2485,7 +2495,9 @@ cc_test( ":test_main", ":test_util", "//tensorflow/lite/schema:schema_fbs", + "//tensorflow/lite/types:half", "@com_google_googletest//:gtest", + "@eigen_archive//:eigen3", ], ) @@ -2530,6 +2542,7 @@ cc_test( ":test_util", "//tensorflow/lite:string", "//tensorflow/lite/schema:schema_fbs", + "//tensorflow/lite/types:half", "@com_google_googletest//:gtest", "@flatbuffers", ], @@ -2544,6 +2557,7 @@ cc_test( ":test_main", ":test_util", "//tensorflow/lite/schema:schema_fbs", + "//tensorflow/lite/types:half", "@com_google_googletest//:gtest", "@flatbuffers", ], @@ -2581,8 +2595,8 @@ cc_test( "//tensorflow/lite/kernels/internal:tensor_ctypes", "//tensorflow/lite/kernels/internal:tensor_utils_no_eigen", "//tensorflow/lite/schema:schema_fbs", + "//tensorflow/lite/types:half", "@com_google_googletest//:gtest", - "@eigen_archive//:eigen3", ], ) @@ -2881,6 +2895,7 @@ cc_test( ":test_util", "//tensorflow/lite:string", "//tensorflow/lite/schema:schema_fbs", + "//tensorflow/lite/types:half", "@com_google_googletest//:gtest", "@eigen_archive//:eigen3", ], @@ -2905,6 +2920,7 @@ cc_test( ":test_main", ":test_util", "//tensorflow/lite/schema:schema_fbs", + "//tensorflow/lite/types:half", "@com_google_googletest//:gtest", ], ) @@ -3173,12 +3189,13 @@ cc_test( size = "small", srcs = ["dynamic_update_slice_test.cc"], deps = [ + ":subgraph_test_util", ":test_main", ":test_util", "//tensorflow/lite:framework_stable", "//tensorflow/lite/core:framework_stable", - "//tensorflow/lite/kernels:subgraph_test_util", "//tensorflow/lite/schema:schema_fbs", + "//tensorflow/lite/types:half", "@com_google_googletest//:gtest", "@eigen_archive//:eigen3", "@flatbuffers", diff --git a/tensorflow/lite/kernels/activations_test.cc b/tensorflow/lite/kernels/activations_test.cc index 42747a87e61b2a..96bb22ed76c431 100644 --- a/tensorflow/lite/kernels/activations_test.cc +++ b/tensorflow/lite/kernels/activations_test.cc @@ -35,6 +35,7 @@ limitations under the License. #include "tensorflow/lite/kernels/test_util.h" #include "tensorflow/lite/schema/schema_generated.h" #include "tensorflow/lite/string_type.h" +#include "tensorflow/lite/types/half.h" namespace tflite { @@ -574,18 +575,17 @@ TEST_P(TanhOpTest, Tanh) { } TEST_P(TanhOpTest, TanhFloat16) { - FloatActivationsOpModel m( - GetRegistration(), BuiltinOperator_TANH, - /*input=*/{TensorType_FLOAT16, {1, 2, 4, 1}}); + FloatActivationsOpModel m(GetRegistration(), BuiltinOperator_TANH, + /*input=*/{TensorType_FLOAT16, {1, 2, 4, 1}}); m.SetInput({ - Eigen::half(0), - Eigen::half(-6), - Eigen::half(2), - Eigen::half(4), - Eigen::half(3), - Eigen::half(-2), - Eigen::half(10), - Eigen::half(1), + half(0), + half(-6), + half(2), + half(4), + half(3), + half(-2), + half(10), + half(1), }); ASSERT_EQ(m.Invoke(), kTfLiteOk); EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear( @@ -1210,18 +1210,17 @@ TEST_P(LogisticOpTest, SigmoidFloat32) { } TEST_P(LogisticOpTest, SigmoidFloat16) { - FloatActivationsOpModel m( - GetRegistration(), BuiltinOperator_LOGISTIC, - /*input=*/{TensorType_FLOAT16, {1, 2, 4, 1}}); + FloatActivationsOpModel m(GetRegistration(), BuiltinOperator_LOGISTIC, + /*input=*/{TensorType_FLOAT16, {1, 2, 4, 1}}); m.SetInput({ - Eigen::half{-1.2f}, - Eigen::half{-6.0f}, - Eigen::half{2.0f}, - Eigen::half{4.0f}, - Eigen::half{3.0f}, - Eigen::half{-2.0f}, - Eigen::half{10.0f}, - Eigen::half{1.0f}, + half{-1.2f}, + half{-6.0f}, + half{2.0f}, + half{4.0f}, + half{3.0f}, + half{-2.0f}, + half{10.0f}, + half{1.0f}, }); ASSERT_EQ(m.Invoke(), kTfLiteOk); EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear( diff --git a/tensorflow/lite/kernels/atan2_test.cc b/tensorflow/lite/kernels/atan2_test.cc index 309ba79f284f3f..0c3839361570a6 100644 --- a/tensorflow/lite/kernels/atan2_test.cc +++ b/tensorflow/lite/kernels/atan2_test.cc @@ -17,6 +17,7 @@ #include #include "tensorflow/lite/kernels/test_util.h" #include "tensorflow/lite/schema/schema_generated.h" +#include "tensorflow/lite/types/half.h" namespace tflite { namespace { @@ -34,7 +35,7 @@ tflite::TensorType GetTTEnum() { } template <> -tflite::TensorType GetTTEnum() { +tflite::TensorType GetTTEnum() { return tflite::TensorType_FLOAT16; } @@ -74,7 +75,7 @@ class Atan2Test : public ::testing::Test { using FloatType = Float; }; -using TestTypes = ::testing::Types; +using TestTypes = ::testing::Types; TYPED_TEST_SUITE(Atan2Test, TestTypes); @@ -85,15 +86,15 @@ TYPED_TEST(Atan2Test, TestScalar) { tflite::TensorData output = {GetTTEnum(), {}}; Atan2Model m(y, x, output); - auto got = m.GetOutput({Float(0.0)}, {Float(0.0)}); + auto got = m.GetOutput({Float(0.0f)}, {Float(0.0f)}); ASSERT_EQ(got.size(), 1); EXPECT_FLOAT_EQ(got[0], 0.0); - ASSERT_FLOAT_EQ(m.GetOutput({Float(1.0)}, {Float(0.0)})[0], - Float(M_PI / 2)); - ASSERT_FLOAT_EQ(m.GetOutput({Float(0.0)}, {Float(1.0)})[0], - Float(0.0)); - ASSERT_FLOAT_EQ(m.GetOutput({Float(-1.0)}, {Float(0.0)})[0], - Float(-M_PI / 2)); + ASSERT_FLOAT_EQ(m.GetOutput({Float(1.0f)}, {Float(0.0f)})[0], + Float(static_cast(M_PI / 2))); + ASSERT_FLOAT_EQ(m.GetOutput({Float(0.0f)}, {Float(1.0f)})[0], + Float(0.0f)); + ASSERT_FLOAT_EQ(m.GetOutput({Float(-1.0f)}, {Float(0.0f)})[0], + Float(-static_cast(M_PI / 2))); } TYPED_TEST(Atan2Test, TestBatch) { @@ -102,10 +103,12 @@ TYPED_TEST(Atan2Test, TestBatch) { tflite::TensorData x = {GetTTEnum(), {4, 2, 1}}; tflite::TensorData output = {GetTTEnum(), {4, 2, 1}}; Atan2Model m(y, x, output); - std::vector y_data = {Float(0.1), Float(0.2), Float(0.3), Float(0.4), - Float(0.5), Float(0.6), Float(0.7), Float(0.8)}; - std::vector x_data = {Float(0.8), Float(0.7), Float(0.6), Float(0.5), - Float(0.4), Float(0.3), Float(0.2), Float(0.1)}; + std::vector y_data = {Float(0.1f), Float(0.2f), Float(0.3f), + Float(0.4f), Float(0.5f), Float(0.6f), + Float(0.7f), Float(0.8f)}; + std::vector x_data = {Float(0.8f), Float(0.7f), Float(0.6f), + Float(0.5f), Float(0.4f), Float(0.3f), + Float(0.2f), Float(0.1f)}; auto got = m.GetOutput(y_data, x_data); ASSERT_EQ(got.size(), 8); for (int i = 0; i < 8; ++i) { diff --git a/tensorflow/lite/kernels/cast.cc b/tensorflow/lite/kernels/cast.cc index 192a552bca4ea2..3560c21e5d498a 100644 --- a/tensorflow/lite/kernels/cast.cc +++ b/tensorflow/lite/kernels/cast.cc @@ -29,6 +29,8 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/op_macros.h" +#include "tensorflow/lite/types/fp16.h" +#include "tensorflow/lite/types/half.h" #ifdef __ARM_NEON #include @@ -99,17 +101,9 @@ void copyCast(const std::complex* in, std::complex* out, } template -void copyCast(const Eigen::half* in, ToT* out, int num_elements) { - std::transform(in, in + num_elements, out, [](Eigen::half a) { - return static_cast(Eigen::half_impl::half_to_float(a)); - }); -} - -template <> -void copyCast(const Eigen::half* in, std::complex* out, - int num_elements) { - std::transform(in, in + num_elements, out, [](Eigen::half a) { - return std::complex(Eigen::half_impl::half_to_float(a)); +void copyCast(const half* in, ToT* out, int num_elements) { + std::transform(in, in + num_elements, out, [](half a) { + return static_cast(fp16_ieee_to_fp32_value(a)); }); } @@ -122,33 +116,26 @@ void copyCast(const Eigen::bfloat16* in, std::complex* out, } template -void copyCastToFloat16(const FromT* in, Eigen::half* out, int num_elements) { +void copyCastToFloat16(const FromT* in, half* out, int num_elements) { std::transform(in, in + num_elements, out, [](FromT a) { - return Eigen::half_impl::float_to_half_rtne(static_cast(a)); + return half::from_bits(fp16_ieee_from_fp32_value(static_cast(a))); }); } template <> -void copyCastToFloat16(const std::complex* in, Eigen::half* out, +void copyCastToFloat16(const std::complex* in, half* out, int num_elements) { std::transform(in, in + num_elements, out, [](std::complex a) { - return Eigen::half_impl::float_to_half_rtne(std::real(a)); + return half::from_bits(fp16_ieee_from_fp32_value(std::real(a))); }); } template <> -void copyCastToFloat16(const Eigen::half* in, Eigen::half* out, - int num_elements) { - std::transform(in, in + num_elements, out, [](Eigen::half a) { return a; }); -} - -template <> -void copyCastToFloat16(const Eigen::bfloat16* in, Eigen::half* out, - int num_elements) { +void copyCastToFloat16(const Eigen::bfloat16* in, half* out, int num_elements) { // bfloat16 -> float -> half (fp16) std::transform(in, in + num_elements, out, [](Eigen::bfloat16 a) { - return Eigen::half_impl::float_to_half_rtne( - Eigen::bfloat16_impl::bfloat16_to_float(a)); + return half::from_bits( + fp16_ieee_from_fp32_value(Eigen::bfloat16_impl::bfloat16_to_float(a))); }); } @@ -310,7 +297,7 @@ TfLiteStatus copyToTensor(TfLiteContext* context, const FromT* in, copyCast(in, out->data.int8, num_elements); break; case kTfLiteFloat16: - copyCastToFloat16(in, reinterpret_cast(out->data.f16), + copyCastToFloat16(in, reinterpret_cast(out->data.f16), num_elements); break; case kTfLiteBFloat16: diff --git a/tensorflow/lite/kernels/cast_test.cc b/tensorflow/lite/kernels/cast_test.cc index 77cc2f3442b1c2..09cc8fbfbda37c 100644 --- a/tensorflow/lite/kernels/cast_test.cc +++ b/tensorflow/lite/kernels/cast_test.cc @@ -23,7 +23,6 @@ limitations under the License. #include #include "absl/random/random.h" #include "absl/types/span.h" -#include "Eigen/Core" // from @eigen_archive #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/core/c/c_api_types.h" #include "tensorflow/lite/kernels/cast_test_common.h" @@ -31,6 +30,7 @@ limitations under the License. #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/test_util.h" #include "tensorflow/lite/schema/schema_generated.h" +#include "tensorflow/lite/types/half.h" namespace tflite { namespace { @@ -413,11 +413,10 @@ TEST(CastOpModel, CastFloatToFloat16) { m.PopulateTensor(m.input(), {100.f, 1.0f, 0.f, 0.4f, 1.999f, 1.1f}); ASSERT_EQ(m.Invoke(), kTfLiteOk); EXPECT_THAT( - m.ExtractVector(m.output()), - ElementsAreArray( - {static_cast(100.f), static_cast(1.0f), - static_cast(0.f), static_cast(0.4f), - static_cast(1.999f), static_cast(1.1)})); + m.ExtractVector(m.output()), + ElementsAreArray({static_cast(100.f), static_cast(1.0f), + static_cast(0.f), static_cast(0.4f), + static_cast(1.999f), static_cast(1.1f)})); } TEST(CastOpModel, CastFloatToBFloat16) { @@ -435,11 +434,10 @@ TEST(CastOpModel, CastFloatToBFloat16) { TEST(CastOpModel, CastFloat16ToFloat) { CastOpModel m({TensorType_FLOAT16, {3, 2}}, {TensorType_FLOAT32, {3, 2}}); - m.PopulateTensor( - m.input(), - {static_cast(100.f), static_cast(1.0f), - static_cast(0.f), static_cast(0.4f), - static_cast(1.999f), static_cast(1.1f)}); + m.PopulateTensor(m.input(), + {static_cast(100.f), static_cast(1.0f), + static_cast(0.f), static_cast(0.4f), + static_cast(1.999f), static_cast(1.1f)}); ASSERT_EQ(m.Invoke(), kTfLiteOk); EXPECT_THAT(m.ExtractVector(m.output()), ElementsAreArray(ArrayFloatNear( @@ -462,6 +460,61 @@ TEST(CastOpModel, CastBFloat16ToFloat) { /*max_abs_err=*/0.05f))); } +TEST(CastOpModel, CastFloat16ToInt32) { + CastOpModel m({TensorType_FLOAT16, {1, 6}}, {TensorType_INT32, {1, 6}}); + m.PopulateTensor(m.input(), + {static_cast(100.f), static_cast(20.f), + static_cast(3.f), static_cast(0.4f), + static_cast(0.999f), static_cast(1.1f)}); + ASSERT_EQ(m.Invoke(), kTfLiteOk); + EXPECT_THAT(m.ExtractVector(m.output()), + ElementsAreArray({100, 20, 3, 0, 0, 1})); +} + +TEST(CastOpModel, CastInt32ToFloat16) { + CastOpModel m({TensorType_INT32, {1, 6}}, {TensorType_FLOAT16, {1, 6}}); + m.PopulateTensor(m.input(), {100, 20, 3, 0, 1, -1}); + ASSERT_EQ(m.Invoke(), kTfLiteOk); + EXPECT_THAT( + m.ExtractVector(m.output()), + ElementsAreArray({static_cast(100.f), static_cast(20.f), + static_cast(3.f), static_cast(0.f), + static_cast(1.f), static_cast(-1.f)})); +} + +TEST(CastOpModel, CastFloat16ToBFloat16) { + CastOpModel m({TensorType_FLOAT16, {1, 6}}, {TensorType_BFLOAT16, {1, 6}}); + m.PopulateTensor(m.input(), + {static_cast(100.f), static_cast(20.f), + static_cast(3.f), static_cast(0.4f), + static_cast(0.999f), static_cast(1.1f)}); + ASSERT_EQ(m.Invoke(), kTfLiteOk); + EXPECT_THAT(m.ExtractVector(m.output()), + ElementsAreArray({static_cast(100.f), + static_cast(20.f), + static_cast(3.f), + static_cast(0.4f), + static_cast(0.999f), + static_cast(1.1f)})); +} + +TEST(CastOpModel, CastBFloat16ToFloat16) { + CastOpModel m({TensorType_BFLOAT16, {1, 6}}, {TensorType_FLOAT16, {1, 6}}); + m.PopulateTensor( + m.input(), + {static_cast(100.f), static_cast(20.f), + static_cast(3.f), static_cast(0.4f), + static_cast(0.999f), + static_cast(1.1f)}); + ASSERT_EQ(m.Invoke(), kTfLiteOk); + EXPECT_THAT(m.ExtractVector(m.output()), + ElementsAreArray(ArrayFloatNear( + {static_cast(100.f), static_cast(20.f), + static_cast(3.f), static_cast(0.4f), + static_cast(0.999f), static_cast(1.1f)}, + /*max_abs_err=*/0.05f))); +} + TEST(CastOpModel, CastConstInputCachingWorks) { // This tests the implementation of a performance optimization. If that // optimization is changed, this test will likely break/need to be updated. diff --git a/tensorflow/lite/kernels/comparisons_test.cc b/tensorflow/lite/kernels/comparisons_test.cc index 10226bb60a8ed8..bc2091aa823832 100644 --- a/tensorflow/lite/kernels/comparisons_test.cc +++ b/tensorflow/lite/kernels/comparisons_test.cc @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/lite/kernels/test_util.h" #include "tensorflow/lite/schema/schema_generated.h" #include "tensorflow/lite/string_type.h" +#include "tensorflow/lite/types/half.h" namespace tflite { namespace { @@ -396,12 +397,10 @@ TEST(ComparisonsTest, LessFloat) { TEST(ComparisonsTest, LessFloat16) { ComparisonOpModel model({1, 1, 1, 4}, {1, 1, 1, 4}, TensorType_FLOAT16, BuiltinOperator_LESS); - model.PopulateTensor( - model.input1(), - {Eigen::half(0.1), Eigen::half(0.9), Eigen::half(0.7), Eigen::half(0.3)}); - model.PopulateTensor( - model.input2(), - {Eigen::half(0.1), Eigen::half(0.2), Eigen::half(0.6), Eigen::half(0.5)}); + model.PopulateTensor(model.input1(), + {half(0.1f), half(0.9f), half(0.7f), half(0.3f)}); + model.PopulateTensor(model.input2(), + {half(0.1f), half(0.2f), half(0.6f), half(0.5f)}); ASSERT_EQ(model.Invoke(), kTfLiteOk); EXPECT_THAT(model.GetOutput(), ElementsAre(false, false, false, true)); diff --git a/tensorflow/lite/kernels/concatenation_test.cc b/tensorflow/lite/kernels/concatenation_test.cc index 28692ae1528dd3..f9c765375cc20f 100644 --- a/tensorflow/lite/kernels/concatenation_test.cc +++ b/tensorflow/lite/kernels/concatenation_test.cc @@ -24,6 +24,7 @@ limitations under the License. #include "flatbuffers/flatbuffers.h" // from @flatbuffers #include "tensorflow/lite/kernels/test_util.h" #include "tensorflow/lite/schema/schema_generated.h" +#include "tensorflow/lite/types/half.h" namespace tflite { namespace { @@ -121,12 +122,11 @@ TEST(ConcatenationOpTest, ThreeDimensionalOneInputBFloat16) { } TEST(ConcatenationOpTest, ThreeDimensionalOneInputFloat16) { - ConcatenationOpModel m({TensorType_FLOAT16, {2, 1, 2}}, - /*axis=*/1, - /*num_inputs=*/1); - m.SetInput(0, - {static_cast(1.0f), static_cast(3.0f), - static_cast(4.0f), static_cast(7.0f)}); + ConcatenationOpModel m({TensorType_FLOAT16, {2, 1, 2}}, + /*axis=*/1, + /*num_inputs=*/1); + m.SetInput(0, {static_cast(1.0f), static_cast(3.0f), + static_cast(4.0f), static_cast(7.0f)}); ASSERT_EQ(m.Invoke(), kTfLiteOk); EXPECT_THAT(m.GetOutput(), ElementsAreArray({1, 3, 4, 7})); } @@ -206,23 +206,21 @@ TEST(ConcatenationOpTest, FiveDimensionalTwoInputBFloat16) { } TEST(ConcatenationOpTest, FiveDimensionalTwoInputFloat16) { - ConcatenationOpModel m({TensorType_FLOAT16, {2, 1, 2, 1, 3}}, - /*axis=*/0, - /*num_inputs=*/2); - m.SetInput( - 0, {static_cast(1.0f), static_cast(2.0f), - static_cast(3.0f), static_cast(4.0f), - static_cast(5.0f), static_cast(6.0f), - static_cast(7.0f), Eigen::half{8.0f}, - static_cast(9.0f), static_cast(10.0f), - static_cast(11.0f), static_cast(12.0f)}); - m.SetInput( - 1, {static_cast(13.0f), static_cast(14.0f), - Eigen::half{15.0f}, static_cast(16.0f), - Eigen::half{17.0f}, static_cast(18.0f), - static_cast(19.0f), static_cast(20.0f), - static_cast(21.0f), static_cast(22.0f), - static_cast(23.0f), static_cast(24.0f)}); + ConcatenationOpModel m({TensorType_FLOAT16, {2, 1, 2, 1, 3}}, + /*axis=*/0, + /*num_inputs=*/2); + m.SetInput(0, {static_cast(1.0f), static_cast(2.0f), + static_cast(3.0f), static_cast(4.0f), + static_cast(5.0f), static_cast(6.0f), + static_cast(7.0f), half{8.0f}, static_cast(9.0f), + static_cast(10.0f), static_cast(11.0f), + static_cast(12.0f)}); + m.SetInput(1, + {static_cast(13.0f), static_cast(14.0f), half{15.0f}, + static_cast(16.0f), half{17.0f}, static_cast(18.0f), + static_cast(19.0f), static_cast(20.0f), + static_cast(21.0f), static_cast(22.0f), + static_cast(23.0f), static_cast(24.0f)}); ASSERT_EQ(m.Invoke(), kTfLiteOk); EXPECT_THAT( m.GetOutput(), diff --git a/tensorflow/lite/kernels/dynamic_update_slice_test.cc b/tensorflow/lite/kernels/dynamic_update_slice_test.cc index 373a719d5ac412..99aa637a068d23 100644 --- a/tensorflow/lite/kernels/dynamic_update_slice_test.cc +++ b/tensorflow/lite/kernels/dynamic_update_slice_test.cc @@ -28,6 +28,7 @@ limitations under the License. #include "tensorflow/lite/kernels/subgraph_test_util.h" #include "tensorflow/lite/kernels/test_util.h" #include "tensorflow/lite/schema/schema_generated.h" +#include "tensorflow/lite/types/half.h" namespace tflite { namespace { @@ -112,10 +113,9 @@ TEST(DynamicUpdateSliceOpTest, SimpleTestF16InPlaceInput) { DynamicUpdateSliceOpModel m({TensorType_FLOAT16, {3, 3}}, {TensorType_FLOAT16, {2, 1}}, {TensorType_INT32, {2}}); - m.SetInput({Eigen::half(1), Eigen::half(2), Eigen::half(3), - Eigen::half(4), Eigen::half(5), Eigen::half(6), - Eigen::half(7), Eigen::half(8), Eigen::half(9)}); - m.SetUpdate({Eigen::half(-1), Eigen::half(-2)}); + m.SetInput({half(1), half(2), half(3), half(4), half(5), half(6), + half(7), half(8), half(9)}); + m.SetUpdate({half(-1), half(-2)}); m.SetStartIndices({1, 1}); const int kInplaceInputTensorIdx = 0; const int kInplaceOutputTensorIdx = 0; @@ -123,11 +123,10 @@ TEST(DynamicUpdateSliceOpTest, SimpleTestF16InPlaceInput) { TfLiteTensor* output_tensor = m.GetOutputTensor(kInplaceOutputTensorIdx); output_tensor->data.data = input_tensor->data.data; ASSERT_EQ(m.Invoke(), kTfLiteOk); - EXPECT_THAT(m.GetOutput(), - ElementsAreArray(ArrayFloatNear( - {Eigen::half(1), Eigen::half(2), Eigen::half(3), - Eigen::half(4), Eigen::half(-1), Eigen::half(6), - Eigen::half(7), Eigen::half(-2), Eigen::half(9)}))); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray( + ArrayFloatNear({half(1), half(2), half(3), half(4), half(-1), + half(6), half(7), half(-2), half(9)}))); EXPECT_EQ(output_tensor->data.data, input_tensor->data.data); } diff --git a/tensorflow/lite/kernels/fill_test.cc b/tensorflow/lite/kernels/fill_test.cc index 028623e3a0a321..a8e9815f30bc61 100644 --- a/tensorflow/lite/kernels/fill_test.cc +++ b/tensorflow/lite/kernels/fill_test.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/lite/kernels/test_util.h" #include "tensorflow/lite/schema/schema_generated.h" #include "tensorflow/lite/string_type.h" +#include "tensorflow/lite/types/half.h" namespace tflite { namespace { @@ -139,8 +140,8 @@ TEST_P(FillOpTest, FillFloat) { } TEST_P(FillOpTest, FillFloat16) { - FillOpModel m(TensorType_INT64, {3}, {2, 2, 2}, - Eigen::half(4.0f), GetParam()); + FillOpModel m(TensorType_INT64, {3}, {2, 2, 2}, half(4.0f), + GetParam()); ASSERT_EQ(m.Invoke(), kTfLiteOk); EXPECT_THAT( m.GetOutput(), diff --git a/tensorflow/lite/kernels/floor_test.cc b/tensorflow/lite/kernels/floor_test.cc index 86ea68ad39e599..13154175e334cc 100644 --- a/tensorflow/lite/kernels/floor_test.cc +++ b/tensorflow/lite/kernels/floor_test.cc @@ -20,6 +20,7 @@ limitations under the License. #include #include "tensorflow/lite/kernels/test_util.h" #include "tensorflow/lite/schema/schema_generated.h" +#include "tensorflow/lite/types/half.h" namespace tflite { namespace { @@ -79,28 +80,28 @@ TEST(FloorOpTest, MultiDims) { TEST(FloorOpTest, SingleDimFloat16) { FloorOpModel model({2}, TensorType_FLOAT16); - model.PopulateTensor<>(model.input(), {Eigen::half(8.5), Eigen::half(0.0)}); + model.PopulateTensor<>(model.input(), {half(8.5f), half(0.0f)}); ASSERT_EQ(model.Invoke(), kTfLiteOk); - EXPECT_THAT(model.GetOutput(), ElementsAreArray({8, 0})); + EXPECT_THAT(model.GetOutput(), ElementsAreArray({8, 0})); EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({2})); } TEST(FloorOpTest, MultiDimsFloat16) { FloorOpModel model({2, 1, 1, 5}, TensorType_FLOAT16); - model.PopulateTensor(model.input(), { - Eigen::half(0.75), - Eigen::half(8.25), - Eigen::half(0.49), - Eigen::half(9.99), - Eigen::half(0.5), - Eigen::half(-0.25), - Eigen::half(-8.75), - Eigen::half(-0.99), - Eigen::half(-9.49), - Eigen::half(-0.5), - }); + model.PopulateTensor(model.input(), { + half(0.75f), + half(8.25f), + half(0.49f), + half(9.99f), + half(0.5f), + half(-0.25f), + half(-8.75f), + half(-0.99f), + half(-9.49f), + half(-0.5f), + }); ASSERT_EQ(model.Invoke(), kTfLiteOk); - EXPECT_THAT(model.GetOutput(), + EXPECT_THAT(model.GetOutput(), ElementsAreArray({0, 8, 0, 9, 0, -1, -9, -1, -10, -1})); EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({2, 1, 1, 5})); } diff --git a/tensorflow/lite/kernels/gather_nd_test.cc b/tensorflow/lite/kernels/gather_nd_test.cc index 2bd9a0235ebe2c..f4b9f65711fbdc 100644 --- a/tensorflow/lite/kernels/gather_nd_test.cc +++ b/tensorflow/lite/kernels/gather_nd_test.cc @@ -20,10 +20,12 @@ limitations under the License. #include #include +#include "Eigen/Core" // from @eigen_archive #include "flatbuffers/flatbuffers.h" // from @flatbuffers #include "tensorflow/lite/kernels/test_util.h" #include "tensorflow/lite/schema/schema_generated.h" #include "tensorflow/lite/string_type.h" +#include "tensorflow/lite/types/half.h" namespace tflite { namespace { @@ -244,21 +246,19 @@ TEST(GatherNdOpTest, BFloat16Int32) { TEST(GatherNdOpTest, Float16Int32) { GatherNdOpModel m({TensorType_FLOAT16, {3, 2, 3}}, {TensorType_INT32, {2, 2}}); - m.SetInput( - {Eigen::half(1.1), Eigen::half(-1.2), Eigen::half(1.3), Eigen::half(-2.1), - Eigen::half(2.2), Eigen::half(2.3), // - Eigen::half(3.1), Eigen::half(3.2), Eigen::half(-3.3), Eigen::half(-4.1), - Eigen::half(-4.2), Eigen::half(4.3), // - Eigen::half(5.1), Eigen::half(-5.2), Eigen::half(5.3), Eigen::half(6.1), - Eigen::half(-6.2), Eigen::half(6.3)}); + m.SetInput({half(1.1f), half(-1.2f), half(1.3f), half(-2.1f), + half(2.2f), half(2.3f), // + half(3.1f), half(3.2f), half(-3.3f), half(-4.1f), + half(-4.2f), half(4.3f), // + half(5.1f), half(-5.2f), half(5.3f), half(6.1f), + half(-6.2f), half(6.3f)}); m.SetPositions({0, 1, 1, 0}); ASSERT_EQ(m.Invoke(), kTfLiteOk); EXPECT_THAT( - m.GetOutput(), - Pointwise(FloatingPointEq(), - {Eigen::half(-2.1), Eigen::half(2.2), Eigen::half(2.3), - Eigen::half(3.1), Eigen::half(3.2), Eigen::half(-3.3)})); + m.GetOutput(), + Pointwise(FloatingPointEq(), {half(-2.1f), half(2.2f), half(2.3f), + half(3.1f), half(3.2f), half(-3.3f)})); } TEST(GatherNdOpTest, Float32Int32) { @@ -297,21 +297,19 @@ TEST(GatherNdOpTest, BFloat16Int64) { TEST(GatherNdOpTest, Float16Int64) { GatherNdOpModel m({TensorType_FLOAT16, {3, 2, 3}}, {TensorType_INT64, {2, 2}}); - m.SetInput( - {Eigen::half(1.1), Eigen::half(-1.2), Eigen::half(1.3), Eigen::half(-2.1), - Eigen::half(2.2), Eigen::half(2.3), // - Eigen::half(3.1), Eigen::half(3.2), Eigen::half(-3.3), Eigen::half(-4.1), - Eigen::half(-4.2), Eigen::half(4.3), // - Eigen::half(5.1), Eigen::half(-5.2), Eigen::half(5.3), Eigen::half(6.1), - Eigen::half(-6.2), Eigen::half(6.3)}); + m.SetInput({half(1.1f), half(-1.2f), half(1.3f), half(-2.1f), + half(2.2f), half(2.3f), // + half(3.1f), half(3.2f), half(-3.3f), half(-4.1f), + half(-4.2f), half(4.3f), // + half(5.1f), half(-5.2f), half(5.3f), half(6.1f), + half(-6.2f), half(6.3f)}); m.SetPositions({0LL, 1LL, 1LL, 0LL}); ASSERT_EQ(m.Invoke(), kTfLiteOk); EXPECT_THAT( - m.GetOutput(), - Pointwise(FloatingPointEq(), - {Eigen::half(-2.1), Eigen::half(2.2), Eigen::half(2.3), - Eigen::half(3.1), Eigen::half(3.2), Eigen::half(-3.3)})); + m.GetOutput(), + Pointwise(FloatingPointEq(), {half(-2.1f), half(2.2f), half(2.3f), + half(3.1f), half(3.2f), half(-3.3f)})); } TEST(GatherNdOpTest, Float32Int64) { @@ -462,21 +460,19 @@ TEST(GatherNdOpTest, BFloat16Int16) { TEST(GatherNdOpTest, Float16Int16) { GatherNdOpModel m({TensorType_FLOAT16, {3, 2, 3}}, {TensorType_INT16, {2, 2}}); - m.SetInput( - {Eigen::half(1.1), Eigen::half(-1.2), Eigen::half(1.3), Eigen::half(-2.1), - Eigen::half(2.2), Eigen::half(2.3), // - Eigen::half(3.1), Eigen::half(3.2), Eigen::half(-3.3), Eigen::half(-4.1), - Eigen::half(-4.2), Eigen::half(4.3), // - Eigen::half(5.1), Eigen::half(-5.2), Eigen::half(5.3), Eigen::half(6.1), - Eigen::half(-6.2), Eigen::half(6.3)}); + m.SetInput({half(1.1f), half(-1.2f), half(1.3f), half(-2.1f), + half(2.2f), half(2.3f), // + half(3.1f), half(3.2f), half(-3.3f), half(-4.1f), + half(-4.2f), half(4.3f), // + half(5.1f), half(-5.2f), half(5.3f), half(6.1f), + half(-6.2f), half(6.3f)}); m.SetPositions({0, 1, 1, 0}); ASSERT_EQ(m.Invoke(), kTfLiteOk); EXPECT_THAT( - m.GetOutput(), - Pointwise(FloatingPointEq(), - {Eigen::half(-2.1), Eigen::half(2.2), Eigen::half(2.3), - Eigen::half(3.1), Eigen::half(3.2), Eigen::half(-3.3)})); + m.GetOutput(), + Pointwise(FloatingPointEq(), {half(-2.1f), half(2.2f), half(2.3f), + half(3.1f), half(3.2f), half(-3.3f)})); } TEST(GatherNdOpTest, Float32Int16) { diff --git a/tensorflow/lite/kernels/gather_test.cc b/tensorflow/lite/kernels/gather_test.cc index 23e30eb7867774..61ca1b654f6160 100644 --- a/tensorflow/lite/kernels/gather_test.cc +++ b/tensorflow/lite/kernels/gather_test.cc @@ -20,9 +20,11 @@ limitations under the License. #include #include +#include "Eigen/Core" // from @eigen_archive #include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" #include "tensorflow/lite/kernels/test_util.h" #include "tensorflow/lite/schema/schema_generated.h" +#include "tensorflow/lite/types/half.h" namespace tflite { namespace { @@ -252,7 +254,7 @@ TEST_P(GatherOpTest, LastAxis0DIndex) { } using TestTypes = testing::Types; + float, half, Eigen::bfloat16>; template struct TypedGatherOpTest : public testing::Test {}; diff --git a/tensorflow/lite/kernels/maximum_minimum_test.cc b/tensorflow/lite/kernels/maximum_minimum_test.cc index babdb4f69fad03..00e25ee9b86500 100644 --- a/tensorflow/lite/kernels/maximum_minimum_test.cc +++ b/tensorflow/lite/kernels/maximum_minimum_test.cc @@ -23,6 +23,7 @@ limitations under the License. #include #include "tensorflow/lite/kernels/test_util.h" #include "tensorflow/lite/schema/schema_generated.h" +#include "tensorflow/lite/types/half.h" namespace tflite { namespace { @@ -247,24 +248,20 @@ TEST(MaximumOpTest, Int32WithBroadcastTest5D) { } TEST(MaximumOpTest, Float16Test) { - std::initializer_list data1 = { - Eigen::half(1.0), Eigen::half(0.0), Eigen::half(-1.0), - Eigen::half(11.0), Eigen::half(-2.0), Eigen::half(-1.44)}; - std::initializer_list data2 = { - Eigen::half(-1.0), Eigen::half(0.0), Eigen::half(1.0), - Eigen::half(12.0), Eigen::half(-3.0), Eigen::half(-1.43)}; - TestModel( - BuiltinOperator_MAXIMUM, {TensorType_FLOAT16, {3, 1, 2}}, - {TensorType_FLOAT16, {3, 1, 2}}, {TensorType_FLOAT16, {3, 1, 2}}, data1, - data2, - {Eigen::half(1.0), Eigen::half(0.0), Eigen::half(1.0), Eigen::half(12.0), - Eigen::half(-2.0), Eigen::half(-1.43)}); - TestModel( - BuiltinOperator_MINIMUM, {TensorType_FLOAT16, {3, 1, 2}}, - {TensorType_FLOAT16, {3, 1, 2}}, {TensorType_FLOAT16, {3, 1, 2}}, data1, - data2, - {Eigen::half(-1.0), Eigen::half(0.0), Eigen::half(-1.0), - Eigen::half(11.0), Eigen::half(-3.0), Eigen::half(-1.44)}); + std::initializer_list data1 = {half(1.0f), half(0.0f), half(-1.0f), + half(11.0f), half(-2.0f), half(-1.44f)}; + std::initializer_list data2 = {half(-1.0f), half(0.0f), half(1.0f), + half(12.0f), half(-3.0f), half(-1.43f)}; + TestModel(BuiltinOperator_MAXIMUM, {TensorType_FLOAT16, {3, 1, 2}}, + {TensorType_FLOAT16, {3, 1, 2}}, + {TensorType_FLOAT16, {3, 1, 2}}, data1, data2, + {half(1.0f), half(0.0f), half(1.0f), half(12.0f), half(-2.0f), + half(-1.43f)}); + TestModel(BuiltinOperator_MINIMUM, {TensorType_FLOAT16, {3, 1, 2}}, + {TensorType_FLOAT16, {3, 1, 2}}, + {TensorType_FLOAT16, {3, 1, 2}}, data1, data2, + {half(-1.0f), half(0.0f), half(-1.0f), half(11.0f), + half(-3.0f), half(-1.44f)}); } TEST(MaximumOpTest, BFloat16Test) { @@ -308,42 +305,39 @@ TEST(MaximumOpTest, BFloat16WithBroadcastTest5DScalarY) { } TEST(MaximumOpTest, Float16WithBroadcastTest5DScalarY) { - std::initializer_list data1 = { - Eigen::half(1.0), Eigen::half(0.0), Eigen::half(-1.0), - Eigen::half(-2.0), Eigen::half(3.0), Eigen::half(11.0)}; - std::initializer_list data2 = {Eigen::half(2.0)}; - TestModel( - BuiltinOperator_MAXIMUM, {TensorType_FLOAT16, {3, 1, 2, 1, 1}}, - {TensorType_FLOAT16, {1}}, {TensorType_FLOAT16, {3, 1, 2, 1, 1}}, data1, - data2, - {Eigen::half(2.0), Eigen::half(2.0), Eigen::half(2.0), Eigen::half(2.0), - Eigen::half(3.0), Eigen::half(11.0)}); - TestModel( - BuiltinOperator_MINIMUM, {TensorType_FLOAT16, {3, 1, 2, 1, 1}}, - {TensorType_FLOAT16, {1}}, {TensorType_FLOAT16, {3, 1, 2, 1, 1}}, data1, - data2, - {Eigen::half(1.0), Eigen::half(0.0), Eigen::half(-1.0), Eigen::half(-2.0), - Eigen::half(2.0), Eigen::half(2.0)}); + std::initializer_list data1 = {half(1.0f), half(0.0f), half(-1.0f), + half(-2.0f), half(3.0f), half(11.0f)}; + std::initializer_list data2 = {half(2.0f)}; + TestModel(BuiltinOperator_MAXIMUM, + {TensorType_FLOAT16, {3, 1, 2, 1, 1}}, + {TensorType_FLOAT16, {1}}, + {TensorType_FLOAT16, {3, 1, 2, 1, 1}}, data1, data2, + {half(2.0f), half(2.0f), half(2.0f), half(2.0f), half(3.0f), + half(11.0f)}); + TestModel(BuiltinOperator_MINIMUM, + {TensorType_FLOAT16, {3, 1, 2, 1, 1}}, + {TensorType_FLOAT16, {1}}, + {TensorType_FLOAT16, {3, 1, 2, 1, 1}}, data1, data2, + {half(1.0f), half(0.0f), half(-1.0f), half(-2.0f), half(2.0f), + half(2.0f)}); } TEST(MaximumOpTest, Float16WithBroadcastTest5D) { - std::initializer_list data1 = { - Eigen::half(1.0), Eigen::half(0.0), Eigen::half(-1.0), - Eigen::half(-2.0), Eigen::half(-1.44), Eigen::half(11.0)}; - std::initializer_list data2 = {Eigen::half(0.5), - Eigen::half(2.0)}; - TestModel( - BuiltinOperator_MAXIMUM, {TensorType_FLOAT16, {3, 1, 1, 1, 2}}, - {TensorType_FLOAT16, {2}}, {TensorType_FLOAT16, {3, 1, 1, 1, 2}}, data1, - data2, - {Eigen::half(1.0), Eigen::half(2.0), Eigen::half(0.5), Eigen::half(2.0), - Eigen::half(0.5), Eigen::half(11.0)}); - TestModel( - BuiltinOperator_MINIMUM, {TensorType_FLOAT16, {3, 1, 1, 1, 2}}, - {TensorType_FLOAT16, {2}}, {TensorType_FLOAT16, {3, 1, 1, 1, 2}}, data1, - data2, - {Eigen::half(0.5), Eigen::half(0.0), Eigen::half(-1.0), Eigen::half(-2.0), - Eigen::half(-1.44), Eigen::half(2.0)}); + std::initializer_list data1 = {half(1.0f), half(0.0f), half(-1.0f), + half(-2.0f), half(-1.44f), half(11.0f)}; + std::initializer_list data2 = {half(0.5f), half(2.0f)}; + TestModel(BuiltinOperator_MAXIMUM, + {TensorType_FLOAT16, {3, 1, 1, 1, 2}}, + {TensorType_FLOAT16, {2}}, + {TensorType_FLOAT16, {3, 1, 1, 1, 2}}, data1, data2, + {half(1.0f), half(2.0f), half(0.5f), half(2.0f), half(0.5f), + half(11.0f)}); + TestModel(BuiltinOperator_MINIMUM, + {TensorType_FLOAT16, {3, 1, 1, 1, 2}}, + {TensorType_FLOAT16, {2}}, + {TensorType_FLOAT16, {3, 1, 1, 1, 2}}, data1, data2, + {half(0.5f), half(0.0f), half(-1.0f), half(-2.0f), + half(-1.44f), half(2.0f)}); } TEST(MaximumOpTest, BFloat16WithBroadcastTest5D) { diff --git a/tensorflow/lite/kernels/neg_test.cc b/tensorflow/lite/kernels/neg_test.cc index fe9cc68bdf8a4d..883f9182758412 100644 --- a/tensorflow/lite/kernels/neg_test.cc +++ b/tensorflow/lite/kernels/neg_test.cc @@ -22,6 +22,7 @@ limitations under the License. #include "flatbuffers/flatbuffers.h" // from @flatbuffers #include "tensorflow/lite/kernels/test_util.h" #include "tensorflow/lite/schema/schema_generated.h" +#include "tensorflow/lite/types/half.h" namespace tflite { namespace { @@ -67,14 +68,12 @@ TEST(NegOpModel, NegFloat32) { TEST(NegOpModel, NegFloat16) { NegOpModel m({TensorType_FLOAT16, {6}}, {TensorType_FLOAT16, {6}}); - m.SetInput({Eigen::half(-2.0f), Eigen::half(-1.0f), - Eigen::half(0.f), Eigen::half(1.0f), - Eigen::half(2.0f), Eigen::half(3.0f)}); + m.SetInput({half(-2.0f), half(-1.0f), half(0.f), half(1.0f), half(2.0f), + half(3.0f)}); ASSERT_EQ(m.Invoke(), kTfLiteOk); - EXPECT_THAT(m.GetOutput(), - ElementsAreArray({Eigen::half(2.0f), Eigen::half(1.0f), - Eigen::half(0.f), Eigen::half(-1.0f), - Eigen::half(-2.0f), Eigen::half(-3.0f)})); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray({half(2.0f), half(1.0f), half(0.f), half(-1.0f), + half(-2.0f), half(-3.0f)})); } TEST(NegOpModel, NegBfloat16) { diff --git a/tensorflow/lite/kernels/pad_test.cc b/tensorflow/lite/kernels/pad_test.cc index 971be96a915b4b..b985abccddcee7 100644 --- a/tensorflow/lite/kernels/pad_test.cc +++ b/tensorflow/lite/kernels/pad_test.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/lite/core/interpreter.h" #include "tensorflow/lite/kernels/test_util.h" #include "tensorflow/lite/schema/schema_generated.h" +#include "tensorflow/lite/types/half.h" namespace tflite { namespace { @@ -927,19 +928,16 @@ TEST_F(PadV2OpTest, Int16PaddingSimpleConstFloat32ValuedTestInt8) { template void SimpleConstFloat16ValuedTest() { - PadV2OpConstModel m( + PadV2OpConstModel m( {TensorType_FLOAT16, {1, 2, 2, 1}}, {4, 2}, {0, 0, 1, 1, 1, 1, 0, 0}, - Eigen::half{4.0f}, {TensorType_FLOAT16}); - m.SetInput({Eigen::half{1.5f}, Eigen::half{2.5f}, Eigen::half{3.5f}, - Eigen::half{4.5}}); + half{4.0f}, {TensorType_FLOAT16}); + m.SetInput({half{1.5f}, half{2.5f}, half{3.5f}, half{4.5f}}); ASSERT_EQ(m.Invoke(), kTfLiteOk); - EXPECT_THAT( - m.GetOutput(), - ElementsAreArray(ArrayFloatNear( - {Eigen::half{4}, Eigen::half{4}, Eigen::half{4}, Eigen::half{4}, - Eigen::half{4}, Eigen::half{1.5}, Eigen::half{2.5}, Eigen::half{4}, - Eigen::half{4}, Eigen::half{3.5}, Eigen::half{4.5}, Eigen::half{4}, - Eigen::half{4}, Eigen::half{4}, Eigen::half{4}, Eigen::half{4}}))); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear( + {half{4}, half{4}, half{4}, half{4}, half{4}, half{1.5f}, + half{2.5f}, half{4}, half{4}, half{3.5f}, half{4.5f}, + half{4}, half{4}, half{4}, half{4}, half{4}}))); EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 4, 1})); } @@ -1050,12 +1048,15 @@ TEST_F(PadV2OpTest, Int16PaddingSimple4DConstFloat32ValuedTest) { template void Simple4DConstFloat16ValuedTest() { - PadV2OpConstModel m( + PadV2OpConstModel m( {TensorType_FLOAT16, {1, 1, 2, 1}}, {4, 2}, {0, 1, 0, 0, 0, 0, 0, 1}, - Eigen::half{7.0}, {TensorType_FLOAT16}); - m.SetInput({Eigen::half{3.0f}, Eigen::half{6.0f}}); + half{7.0f}, {TensorType_FLOAT16}); + m.SetInput({half{3.0f}, half{6.0f}}); ASSERT_EQ(m.Invoke(), kTfLiteOk); - EXPECT_THAT(m.GetOutput(), ElementsAreArray({3, 7, 6, 7, 7, 7, 7, 7})); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear( + {half{3.0f}, half{7.0f}, half{6.0f}, half{7.0f}, half{7.0f}, + half{7.0f}, half{7.0f}, half{7.0f}}))); EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 1, 2, 2})); } @@ -1167,15 +1168,18 @@ TEST_F(PadV2OpTest, Int16PaddingSimpleDynamicTest) { template void SimpleDynamicTestV2Float16() { - PadV2OpDynamicModel m( - {TensorType_FLOAT16, {1, 2, 2, 1}}, {4, 2}, Eigen::half{0.0}, + PadV2OpDynamicModel m( + {TensorType_FLOAT16, {1, 2, 2, 1}}, {4, 2}, half{0.0f}, {TensorType_FLOAT16}); - m.SetInput({Eigen::half{1.0f}, Eigen::half{2.0f}, Eigen::half{3.0f}, - Eigen::half{4.0f}}); + m.SetInput({half{1.0f}, half{2.0f}, half{3.0f}, half{4.0f}}); m.SetPaddings({0, 0, 1, 1, 1, 1, 0, 0}); ASSERT_EQ(m.Invoke(), kTfLiteOk); - EXPECT_THAT(m.GetOutput(), ElementsAreArray({0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, - 0, 0, 0, 0, 0})); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear( + {half{0.0f}, half{0.0f}, half{0.0f}, half{0.0f}, half{0.0f}, + half{1.0f}, half{2.0f}, half{0.0f}, half{0.0f}, half{3.0f}, + half{4.0f}, half{0.0f}, half{0.0f}, half{0.0f}, half{0.0f}, + half{0.0f}}))); EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 4, 1})); } diff --git a/tensorflow/lite/kernels/reverse_test.cc b/tensorflow/lite/kernels/reverse_test.cc index 4301b0120f53c3..7e2d3df543ba28 100644 --- a/tensorflow/lite/kernels/reverse_test.cc +++ b/tensorflow/lite/kernels/reverse_test.cc @@ -20,6 +20,7 @@ limitations under the License. #include #include "tensorflow/lite/kernels/test_util.h" #include "tensorflow/lite/schema/schema_generated.h" +#include "tensorflow/lite/types/half.h" namespace tflite { namespace { @@ -354,45 +355,38 @@ TEST(ReverseOpTest, Int16MultiDimensions) { // float16 tests. TEST(ReverseOpTest, Float16OneDimension) { - ReverseOpModel model({TensorType_FLOAT16, {4}}, - {TensorType_INT32, {1}}); - model.PopulateTensor( - model.input(), - {Eigen::half(1), Eigen::half(2), Eigen::half(3), Eigen::half(4)}); + ReverseOpModel model({TensorType_FLOAT16, {4}}, + {TensorType_INT32, {1}}); + model.PopulateTensor(model.input(), + {half(1), half(2), half(3), half(4)}); model.PopulateTensor(model.axis(), {0}); ASSERT_EQ(model.Invoke(), kTfLiteOk); EXPECT_THAT(model.GetOutputShape(), ElementsAre(4)); EXPECT_THAT(model.GetOutput(), - ElementsAreArray({Eigen::half(4), Eigen::half(3), Eigen::half(2), - Eigen::half(1)})); + ElementsAreArray({half(4), half(3), half(2), half(1)})); } TEST(ReverseOpTest, Float16MultiDimensions) { - ReverseOpModel model({TensorType_FLOAT16, {4, 3, 2}}, - {TensorType_INT32, {1}}); - model.PopulateTensor( + ReverseOpModel model({TensorType_FLOAT16, {4, 3, 2}}, + {TensorType_INT32, {1}}); + model.PopulateTensor( model.input(), - {Eigen::half(1), Eigen::half(2), Eigen::half(3), Eigen::half(4), - Eigen::half(5), Eigen::half(6), Eigen::half(7), Eigen::half(8), - Eigen::half(9), Eigen::half(10), Eigen::half(11), Eigen::half(12), - Eigen::half(13), Eigen::half(14), Eigen::half(15), Eigen::half(16), - Eigen::half(17), Eigen::half(18), Eigen::half(19), Eigen::half(20), - Eigen::half(21), Eigen::half(22), Eigen::half(23), Eigen::half(24)}); + {half(1), half(2), half(3), half(4), half(5), half(6), + half(7), half(8), half(9), half(10), half(11), half(12), + half(13), half(14), half(15), half(16), half(17), half(18), + half(19), half(20), half(21), half(22), half(23), half(24)}); model.PopulateTensor(model.axis(), {1}); ASSERT_EQ(model.Invoke(), kTfLiteOk); EXPECT_THAT(model.GetOutputShape(), ElementsAre(4, 3, 2)); EXPECT_THAT( model.GetOutput(), - ElementsAreArray({Eigen::half(5), Eigen::half(6), Eigen::half(3), - Eigen::half(4), Eigen::half(1), Eigen::half(2), - Eigen::half(11), Eigen::half(12), Eigen::half(9), - Eigen::half(10), Eigen::half(7), Eigen::half(8), - Eigen::half(17), Eigen::half(18), Eigen::half(15), - Eigen::half(16), Eigen::half(13), Eigen::half(14), - Eigen::half(23), Eigen::half(24), Eigen::half(21), - Eigen::half(22), Eigen::half(19), Eigen::half(20)})); + ElementsAreArray({half(5), half(6), half(3), half(4), half(1), + half(2), half(11), half(12), half(9), half(10), + half(7), half(8), half(17), half(18), half(15), + half(16), half(13), half(14), half(23), half(24), + half(21), half(22), half(19), half(20)})); } // bfloat16 tests. diff --git a/tensorflow/lite/kernels/round_test.cc b/tensorflow/lite/kernels/round_test.cc index c3752827f3e61c..e3fccf888c9815 100644 --- a/tensorflow/lite/kernels/round_test.cc +++ b/tensorflow/lite/kernels/round_test.cc @@ -20,6 +20,7 @@ limitations under the License. #include #include "tensorflow/lite/kernels/test_util.h" #include "tensorflow/lite/schema/schema_generated.h" +#include "tensorflow/lite/types/half.h" namespace tflite { namespace { @@ -68,33 +69,29 @@ TEST(RoundOpTest, MultiDims) { } TEST(RoundOpTest, Float16SingleDim) { - RoundOpModel model({6}); - model.PopulateTensor( - model.input(), {Eigen::half(8.5), Eigen::half(0.0), Eigen::half(3.5), - Eigen::half(4.2), Eigen::half(-3.5), Eigen::half(-4.5)}); + RoundOpModel model({6}); + model.PopulateTensor(model.input(), + {half(8.5f), half(0.0f), half(3.5f), half(4.2f), + half(-3.5f), half(-4.5f)}); ASSERT_EQ(model.Invoke(), kTfLiteOk); - EXPECT_THAT( - model.GetOutput(), - ElementsAreArray({Eigen::half(8), Eigen::half(0), Eigen::half(4), - Eigen::half(4), Eigen::half(-4), Eigen::half(-4)})); + EXPECT_THAT(model.GetOutput(), + ElementsAreArray( + {half(8), half(0), half(4), half(4), half(-4), half(-4)})); EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({6})); } TEST(RoundOpTest, Float16MultiDims) { - RoundOpModel model({2, 1, 1, 6}); - model.PopulateTensor( + RoundOpModel model({2, 1, 1, 6}); + model.PopulateTensor( model.input(), - {Eigen::half(0.0001), Eigen::half(8.0001), Eigen::half(0.9999), - Eigen::half(9.9999), Eigen::half(0.5), Eigen::half(-0.0001), - Eigen::half(-8.0001), Eigen::half(-0.9999), Eigen::half(-9.9999), - Eigen::half(-0.5), Eigen::half(-2.5), Eigen::half(1.5)}); + {half(0.0001f), half(8.0001f), half(0.9999f), half(9.9999f), half(0.5f), + half(-0.0001f), half(-8.0001f), half(-0.9999f), half(-9.9999f), + half(-0.5f), half(-2.5f), half(1.5f)}); ASSERT_EQ(model.Invoke(), kTfLiteOk); - EXPECT_THAT( - model.GetOutput(), - ElementsAreArray({Eigen::half(0), Eigen::half(8), Eigen::half(1), - Eigen::half(10), Eigen::half(0), Eigen::half(0), - Eigen::half(-8), Eigen::half(-1), Eigen::half(-10), - Eigen::half(-0), Eigen::half(-2), Eigen::half(2)})); + EXPECT_THAT(model.GetOutput(), + ElementsAreArray({half(0), half(8), half(1), half(10), half(0), + half(0), half(-8), half(-1), half(-10), + half(-0), half(-2), half(2)})); EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({2, 1, 1, 6})); } diff --git a/tensorflow/lite/kernels/slice_test.cc b/tensorflow/lite/kernels/slice_test.cc index feb02c48d2f3aa..2f3430770f7b68 100644 --- a/tensorflow/lite/kernels/slice_test.cc +++ b/tensorflow/lite/kernels/slice_test.cc @@ -19,7 +19,6 @@ limitations under the License. #include #include -#include "Eigen/Core" #include #include #include "tensorflow/lite/core/c/common.h" @@ -29,6 +28,7 @@ limitations under the License. #include "tensorflow/lite/kernels/test_util.h" #include "tensorflow/lite/schema/schema_generated.h" #include "tensorflow/lite/string_type.h" +#include "tensorflow/lite/types/half.h" namespace tflite { namespace { @@ -338,20 +338,16 @@ TEST_P(SliceOpTest, SliceBool) { } TEST_P(SliceOpTest, SliceFloat16) { - SliceOpModel m({3, 2, 3, 1}, {4}, {1, 0, 0, 0}, {4}, - {2, 1, -1, 1}, TensorType_INT32, - TensorType_FLOAT16, GetParam()); - m.SetInput({Eigen::half(1), Eigen::half(1), Eigen::half(1), Eigen::half(2), - Eigen::half(2), Eigen::half(2), Eigen::half(3), Eigen::half(3), - Eigen::half(3), Eigen::half(4), Eigen::half(4), Eigen::half(4), - Eigen::half(5), Eigen::half(5), Eigen::half(5), Eigen::half(6), - Eigen::half(6), Eigen::half(6)}); + SliceOpModel m({3, 2, 3, 1}, {4}, {1, 0, 0, 0}, {4}, + {2, 1, -1, 1}, TensorType_INT32, + TensorType_FLOAT16, GetParam()); + m.SetInput({half(1), half(1), half(1), half(2), half(2), half(2), half(3), + half(3), half(3), half(4), half(4), half(4), half(5), half(5), + half(5), half(6), half(6), half(6)}); ASSERT_EQ(m.Invoke(), kTfLiteOk); EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 1, 3, 1})); - EXPECT_THAT( - m.GetOutput(), - ElementsAreArray({Eigen::half(3), Eigen::half(3), Eigen::half(3), - Eigen::half(5), Eigen::half(5), Eigen::half(5)})); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({half(3), half(3), half(3), + half(5), half(5), half(5)})); } TEST_P(SliceOpTest, SliceBFloat16) { @@ -373,19 +369,16 @@ TEST_P(SliceOpTest, SliceBFloat16) { } TEST_P(SliceOpTest, BeginNonZeroSizeMinus1Axis1Float16) { - SliceOpModel m({3, 3, 2, 1}, {4}, {1, 1, 0, 0}, {4}, - {2, -1, 1, 1}, TensorType_INT32, - TensorType_FLOAT16, GetParam()); - m.SetInput({Eigen::half(1), Eigen::half(1), Eigen::half(2), Eigen::half(2), - Eigen::half(3), Eigen::half(3), Eigen::half(4), Eigen::half(4), - Eigen::half(5), Eigen::half(5), Eigen::half(6), Eigen::half(6), - Eigen::half(7), Eigen::half(7), Eigen::half(8), Eigen::half(8), - Eigen::half(9), Eigen::half(9)}); + SliceOpModel m({3, 3, 2, 1}, {4}, {1, 1, 0, 0}, {4}, + {2, -1, 1, 1}, TensorType_INT32, + TensorType_FLOAT16, GetParam()); + m.SetInput({half(1), half(1), half(2), half(2), half(3), half(3), half(4), + half(4), half(5), half(5), half(6), half(6), half(7), half(7), + half(8), half(8), half(9), half(9)}); ASSERT_EQ(m.Invoke(), kTfLiteOk); EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 2, 1, 1})); EXPECT_THAT(m.GetOutput(), - ElementsAreArray({Eigen::half(5), Eigen::half(6), Eigen::half(8), - Eigen::half(9)})); + ElementsAreArray({half(5), half(6), half(8), half(9)})); } TEST_P(SliceOpTest, BeginNonZeroSizeMinus1Axis1BFloat16) { diff --git a/tensorflow/lite/kernels/strided_slice_test.cc b/tensorflow/lite/kernels/strided_slice_test.cc index 6ba4ef3b78977f..f7c79680576fe1 100644 --- a/tensorflow/lite/kernels/strided_slice_test.cc +++ b/tensorflow/lite/kernels/strided_slice_test.cc @@ -22,8 +22,10 @@ limitations under the License. #include #include +#include "Eigen/Core" // from @eigen_archive // IWYU pragma: keep #include "tensorflow/lite/kernels/test_util.h" #include "tensorflow/lite/schema/schema_generated.h" +#include "tensorflow/lite/types/half.h" namespace tflite { namespace { @@ -152,7 +154,7 @@ class StridedSliceOpModel : public SingleOpModel { template class StridedSliceOpTest : public ::testing::Test {}; -using DataTypes = ::testing::Types; TYPED_TEST_SUITE(StridedSliceOpTest, DataTypes); @@ -347,7 +349,9 @@ TYPED_TEST(StridedSliceOpTest, In1D_Int32End) { continue; } std::vector values(32768); - std::iota(values.begin(), values.end(), TypeParam(0)); + for (int i = 0; i < 32768; ++i) { + values[i] = static_cast(i); + } StridedSliceOpModel m({32768}, {1}, {1}, {1}, values, {0}, {32768}, {1}, 0, 0, 0, 0, 0, diff --git a/tensorflow/lite/kernels/test_util.cc b/tensorflow/lite/kernels/test_util.cc index 2ebeb4a9457280..f792bd31529582 100644 --- a/tensorflow/lite/kernels/test_util.cc +++ b/tensorflow/lite/kernels/test_util.cc @@ -59,6 +59,7 @@ limitations under the License. #include "tensorflow/lite/tools/logging.h" #include "tensorflow/lite/tools/serialization/writer_lib.h" #include "tensorflow/lite/tools/versioning/op_version.h" +#include "tensorflow/lite/types/fp16.h" // IWYU pragma: keep #include "tensorflow/lite/version.h" #include "tsl/platform/logging.h" diff --git a/tensorflow/lite/kernels/test_util.h b/tensorflow/lite/kernels/test_util.h index cbdb74d29d04aa..69053a598785a2 100644 --- a/tensorflow/lite/kernels/test_util.h +++ b/tensorflow/lite/kernels/test_util.h @@ -38,7 +38,6 @@ limitations under the License. #include #include -#include "fp16/fp16.h" // from @FP16 #include "absl/algorithm/container.h" #include "absl/log/absl_check.h" #include "absl/log/absl_log.h" @@ -57,6 +56,8 @@ limitations under the License. #include "tensorflow/lite/string_util.h" #include "tensorflow/lite/testing/util.h" // IWYU pragma: keep #include "tensorflow/lite/tools/optimize/quantization_utils.h" +#include "tensorflow/lite/types/fp16.h" +#include "tensorflow/lite/types/half.h" #include "tensorflow/lite/util.h" #include "tsl/platform/logging.h" @@ -134,7 +135,7 @@ inline std::vector Dequantize(const std::vector& data, float scale, } template <> -constexpr TfLiteType typeToTfLiteType() { +constexpr TfLiteType typeToTfLiteType() { return kTfLiteFloat16; } @@ -1362,7 +1363,7 @@ TFLITE_TENSOR_TYPE_ASSOC(uint16_t, TensorType_UINT16); TFLITE_TENSOR_TYPE_ASSOC(uint32_t, TensorType_UINT32); TFLITE_TENSOR_TYPE_ASSOC(uint64_t, TensorType_UINT64); TFLITE_TENSOR_TYPE_ASSOC(TfLiteFloat16, TensorType_FLOAT16); -TFLITE_TENSOR_TYPE_ASSOC(Eigen::half, TensorType_FLOAT16); +TFLITE_TENSOR_TYPE_ASSOC(half, TensorType_FLOAT16); TFLITE_TENSOR_TYPE_ASSOC(TfLiteBFloat16, TensorType_BFLOAT16); TFLITE_TENSOR_TYPE_ASSOC(Eigen::bfloat16, TensorType_BFLOAT16); TFLITE_TENSOR_TYPE_ASSOC(float, TensorType_FLOAT32); @@ -1461,13 +1462,13 @@ struct TypeUnion { }; template <> -struct TypeUnion { +struct TypeUnion { public: // NOLINTNEXTLINE static constexpr TensorType tensor_type = TensorType::TensorType_FLOAT16; // NOLINTNEXTLINE static constexpr TfLiteType tflite_type = TfLiteType::kTfLiteFloat16; - typedef Eigen::half ScalarType; + typedef half ScalarType; }; template <> diff --git a/tensorflow/lite/kernels/test_util_test.cc b/tensorflow/lite/kernels/test_util_test.cc index ed9a679b4e4d33..01f514692b0616 100644 --- a/tensorflow/lite/kernels/test_util_test.cc +++ b/tensorflow/lite/kernels/test_util_test.cc @@ -27,6 +27,7 @@ limitations under the License. #include "tensorflow/lite/array.h" #include "tensorflow/lite/core/c/common.h" #include "tensorflow/lite/kernels/test_delegate_providers.h" +#include "tensorflow/lite/types/half.h" #include "tensorflow/lite/util.h" namespace tflite { @@ -197,6 +198,14 @@ TEST(TestUtilTest, QuantizeVectorScalingUp) { EXPECT_THAT(q_data, ElementsAreArray(expected)); } +TEST(TestUtilTest, DequantizeVectorFp16) { + std::vector data = {half(-1.0f), half(-0.5f), half(0.0f), half(0.5f), + half(1.0f)}; + auto f_data = Dequantize(data, /*scale=*/0.1f, /*zero_point=*/0); + std::vector expected = {-0.1f, -0.05f, 0.0f, 0.05f, 0.1f}; + EXPECT_THAT(f_data, ElementsAreArray(tflite::ArrayFloatNear(expected, 1e-7))); +} + TEST(DimsAreMatcherTestTensor, ValidOneD) { TensorUniquePtr t = BuildTfLiteTensor(kTfLiteInt32, {2}, kTfLiteDynamic); EXPECT_THAT(t.get(), DimsAre({2})); diff --git a/tensorflow/lite/profiling/proto/CMakeLists.txt b/tensorflow/lite/profiling/proto/CMakeLists.txt index 0bfa81a41476f3..5738c992fc2839 100644 --- a/tensorflow/lite/profiling/proto/CMakeLists.txt +++ b/tensorflow/lite/profiling/proto/CMakeLists.txt @@ -17,8 +17,8 @@ find_package(Protobuf REQUIRED) add_library(profiling_info_proto profiling_info.proto) list(APPEND profiling_info_generated_files - ${CMAKE_BINARY_DIR}/tflite/profiling/proto/profiling_info.pb.cc - ${CMAKE_BINARY_DIR}/tflite/profiling/proto/profiling_info.pb.h) + ${CMAKE_BINARY_DIR}/tensorflow/lite/profiling/proto/profiling_info.pb.cc + ${CMAKE_BINARY_DIR}/tensorflow/lite/profiling/proto/profiling_info.pb.h) # Generate profiling_info.pb.cc and profiling_info.pb.h from # profiling_info.proto using protoc. Once the protobuf package version is @@ -26,7 +26,7 @@ list(APPEND profiling_info_generated_files add_custom_command( OUTPUT ${profiling_info_generated_files} COMMAND ${Protobuf_PROTOC_EXECUTABLE} - ARGS --cpp_out=${CMAKE_BINARY_DIR} --proto_path=${CMAKE_CURRENT_SOURCE_DIR}/../../.. tflite/profiling/proto/profiling_info.proto + ARGS --cpp_out=${CMAKE_BINARY_DIR} --proto_path=${TENSORFLOW_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/profiling_info.proto DEPENDS ${Protobuf_PROTOC_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/profiling_info.proto ) @@ -37,8 +37,8 @@ target_include_directories(profiling_info_proto PUBLIC ${CMAKE_BINARY_DIR}) add_library(model_runtime_info_proto model_runtime_info.proto) list(APPEND model_runtime_info_generated_files - ${CMAKE_BINARY_DIR}/tflite/profiling/proto/model_runtime_info.pb.cc - ${CMAKE_BINARY_DIR}/tflite/profiling/proto/model_runtime_info.pb.h + ${CMAKE_BINARY_DIR}/tensorflow/lite/profiling/proto/model_runtime_info.pb.cc + ${CMAKE_BINARY_DIR}/tensorflow/lite/profiling/proto/model_runtime_info.pb.h ) # Generate model_runtime_info.pb.cc and model_runtime_info.pb.h from @@ -47,7 +47,7 @@ list(APPEND model_runtime_info_generated_files add_custom_command( OUTPUT ${model_runtime_info_generated_files} COMMAND ${Protobuf_PROTOC_EXECUTABLE} - ARGS --cpp_out=${CMAKE_BINARY_DIR} --proto_path=${CMAKE_CURRENT_SOURCE_DIR}/../../.. tflite/profiling/proto/model_runtime_info.proto + ARGS --cpp_out=${CMAKE_BINARY_DIR} --proto_path=${TENSORFLOW_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/model_runtime_info.proto DEPENDS ${Protobuf_PROTOC_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/model_runtime_info.proto ${profiling_info_generated_files} ) diff --git a/tensorflow/lite/python/BUILD b/tensorflow/lite/python/BUILD index 3880f6461ed74b..3babca6c653022 100644 --- a/tensorflow/lite/python/BUILD +++ b/tensorflow/lite/python/BUILD @@ -12,6 +12,7 @@ package( "//tensorflow:__subpackages__", "//tensorflow:internal", "//third_party/odml/infra/genai/conversion:__subpackages__", + "//third_party/odml/litert/litert/python:__subpackages__", "//third_party/odml/model_customization/quantization:__subpackages__", "//third_party/py/ai_edge_torch:__subpackages__", "//third_party/py/tensorflow_federated:__subpackages__", diff --git a/tensorflow/lite/python/testdata/double_op.cc b/tensorflow/lite/python/testdata/double_op.cc index a6f8c542cd3b19..9d227a1e83e8ea 100644 --- a/tensorflow/lite/python/testdata/double_op.cc +++ b/tensorflow/lite/python/testdata/double_op.cc @@ -52,8 +52,8 @@ class DoubleOp : public OpKernel { }; REGISTER_KERNEL_BUILDER( - Name("Double").Device(DEVICE_CPU).TypeConstraint("T"), - DoubleOp); + Name("Double").Device(DEVICE_CPU).TypeConstraint("T"), + DoubleOp); REGISTER_KERNEL_BUILDER( Name("Double").Device(DEVICE_CPU).TypeConstraint("T"), DoubleOp); diff --git a/tensorflow/lite/testing/BUILD b/tensorflow/lite/testing/BUILD index 958928db4663d8..ba3ce1c9e0b9e8 100644 --- a/tensorflow/lite/testing/BUILD +++ b/tensorflow/lite/testing/BUILD @@ -275,6 +275,7 @@ cc_library( hdrs = ["split.h"], deps = [ "//tensorflow/lite:string", + "//tensorflow/lite/types:half", "@eigen_archive//:eigen3", ], ) @@ -286,7 +287,9 @@ cc_test( deps = [ ":split", "//tensorflow/lite:string", + "//tensorflow/lite/types:half", "@com_google_googletest//:gtest_main", + "@eigen_archive//:eigen3", ], ) @@ -333,6 +336,7 @@ cc_library( "//tensorflow/lite/tools:logging", "//tensorflow/lite/tools/delegates:delegate_provider_hdr", "//tensorflow/lite/tools/evaluation:utils", + "//tensorflow/lite/types:half", "@com_google_absl//absl/strings", "@eigen_archive//:eigen3", ] + select({ diff --git a/tensorflow/lite/testing/split.h b/tensorflow/lite/testing/split.h index ec932a8de8d68f..5431bccf1a72f6 100644 --- a/tensorflow/lite/testing/split.h +++ b/tensorflow/lite/testing/split.h @@ -25,6 +25,7 @@ limitations under the License. #include "Eigen/Core" // from @eigen_archive #include "tensorflow/lite/string_type.h" +#include "tensorflow/lite/types/half.h" namespace tflite { namespace testing { @@ -199,12 +200,10 @@ inline std::vector> Split(const string& s, } template <> -inline std::vector Split(const string& s, - const string& delimiter) { - std::vector fields; +inline std::vector Split(const string& s, const string& delimiter) { + std::vector fields; for (const auto& p : SplitToPos(s, delimiter)) { - fields.push_back(Eigen::half_impl::float_to_half_rtne( - strtof(s.data() + p.first, nullptr))); + fields.push_back(static_cast(strtof(s.data() + p.first, nullptr))); } return fields; } diff --git a/tensorflow/lite/testing/split_test.cc b/tensorflow/lite/testing/split_test.cc index c8824395ea97dc..90b8276b3ed654 100644 --- a/tensorflow/lite/testing/split_test.cc +++ b/tensorflow/lite/testing/split_test.cc @@ -16,7 +16,9 @@ limitations under the License. #include #include +#include "Eigen/Core" // from @eigen_archive #include "tensorflow/lite/string_type.h" +#include "tensorflow/lite/types/half.h" namespace tflite { namespace testing { @@ -45,6 +47,17 @@ TEST(SplitTest, SplitFloat) { EXPECT_THAT(Split("1.0 B 1e-5", " "), ElementsAre(1.0, 0.0, 1e-5)); } +TEST(SplitTest, SplitHalf) { + EXPECT_THAT(Split("1.0 2.5 1e-2", " "), + ElementsAre(half(1.0f), half(2.5f), half(0.01f))); +} + +TEST(SplitTest, SplitBfloat16) { + EXPECT_THAT(Split("1.0 2.5 1e-2", " "), + ElementsAre(Eigen::bfloat16(1.0f), Eigen::bfloat16(2.5f), + Eigen::bfloat16(0.01f))); +} + TEST(SplitTest, SplitInt) { EXPECT_THAT(Split("1,-1,258", ","), ElementsAre(1, -1, 258)); } diff --git a/tensorflow/lite/testing/tflite_driver.cc b/tensorflow/lite/testing/tflite_driver.cc index 89fed23bb7d2a8..5b15e6a6ed0ee5 100644 --- a/tensorflow/lite/testing/tflite_driver.cc +++ b/tensorflow/lite/testing/tflite_driver.cc @@ -36,6 +36,7 @@ limitations under the License. #include "tensorflow/lite/testing/result_expectations.h" #include "tensorflow/lite/tools/delegates/delegate_provider.h" #include "tensorflow/lite/tools/logging.h" +#include "tensorflow/lite/types/half.h" #if !defined(__APPLE__) #include "tensorflow/lite/delegates/flex/delegate.h" #endif @@ -405,11 +406,11 @@ void TfLiteDriver::SetInput(const std::string& name, break; } case kTfLiteFloat16: { - const auto& values = testing::Split(csv_values, ","); + const auto& values = testing::Split(csv_values, ","); for (auto k : values) { TFLITE_LOG(INFO) << "input" << k; } - if (!CheckSizes(tensor->bytes, values.size())) return; + if (!CheckSizes(tensor->bytes, values.size())) return; SetTensorData(values, tensor->data.raw); break; } @@ -500,7 +501,7 @@ void TfLiteDriver::SetExpectation(const std::string& name, expected_output_[id]->SetData>(csv_values); break; case kTfLiteFloat16: - expected_output_[id]->SetData(csv_values); + expected_output_[id]->SetData(csv_values); break; case kTfLiteBFloat16: expected_output_[id]->SetData(csv_values); diff --git a/tensorflow/lite/tools/benchmark/proto/CMakeLists.txt b/tensorflow/lite/tools/benchmark/proto/CMakeLists.txt index 12a7e577bd3277..6a39f06e03c32b 100644 --- a/tensorflow/lite/tools/benchmark/proto/CMakeLists.txt +++ b/tensorflow/lite/tools/benchmark/proto/CMakeLists.txt @@ -17,8 +17,8 @@ find_package(Protobuf REQUIRED) add_library(benchmark_result_proto benchmark_result.proto) list(APPEND benchmark_result_generated_files - ${CMAKE_BINARY_DIR}/tflite/tools/benchmark/proto/benchmark_result.pb.cc - ${CMAKE_BINARY_DIR}/tflite/tools/benchmark/proto/benchmark_result.pb.h) + ${CMAKE_BINARY_DIR}/tensorflow/lite/tools/benchmark/proto/benchmark_result.pb.cc + ${CMAKE_BINARY_DIR}/tensorflow/lite/tools/benchmark/proto/benchmark_result.pb.h) # Generate benchmark_result.pb.cc and benchmark_result.pb.h from # benchmark_result.proto using protoc. Once the protobuf package version is @@ -26,7 +26,7 @@ list(APPEND benchmark_result_generated_files add_custom_command( OUTPUT ${benchmark_result_generated_files} COMMAND ${Protobuf_PROTOC_EXECUTABLE} - ARGS --cpp_out=${CMAKE_BINARY_DIR} --proto_path=${CMAKE_CURRENT_SOURCE_DIR}/../../../.. tflite/tools/benchmark/proto/benchmark_result.proto + ARGS --cpp_out=${CMAKE_BINARY_DIR} --proto_path=${TENSORFLOW_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/benchmark_result.proto DEPENDS ${Protobuf_PROTOC_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/benchmark_result.proto ) diff --git a/tensorflow/lite/tools/cmake/modules/ml_dtypes/CMakeLists.txt b/tensorflow/lite/tools/cmake/modules/ml_dtypes/CMakeLists.txt index 91e893ee377048..8be897f54d728f 100644 --- a/tensorflow/lite/tools/cmake/modules/ml_dtypes/CMakeLists.txt +++ b/tensorflow/lite/tools/cmake/modules/ml_dtypes/CMakeLists.txt @@ -24,8 +24,9 @@ endif() add_library(ml_dtypes INTERFACE) target_include_directories(ml_dtypes INTERFACE - "${ML_DTYPES_SOURCE_DIR}" - "${ML_DTYPES_SOURCE_DIR}/ml_dtypes") + "$" + "$" + "$") file(GLOB ML_DTYPES_PUBLIC_HEADERS ${ML_DTYPES_SOURCE_DIR}/ml_dtypes/include/*.h) set_target_properties(ml_dtypes PROPERTIES diff --git a/tensorflow/lite/tools/cmake/modules/xnnpack.cmake b/tensorflow/lite/tools/cmake/modules/xnnpack.cmake index c781a2fc18d86a..14e4370cbbd929 100644 --- a/tensorflow/lite/tools/cmake/modules/xnnpack.cmake +++ b/tensorflow/lite/tools/cmake/modules/xnnpack.cmake @@ -23,7 +23,7 @@ OverridableFetchContent_Declare( xnnpack GIT_REPOSITORY https://github.com/google/XNNPACK # Sync with tensorflow/workspace2.bzl - GIT_TAG 1b918df9d1744ae40725254f4baa592ed05c912e + GIT_TAG 183297df5c945236cbc4bb1f625f9f2008bfc564 GIT_PROGRESS TRUE PREFIX "${CMAKE_BINARY_DIR}" SOURCE_DIR "${CMAKE_BINARY_DIR}/xnnpack" @@ -49,5 +49,6 @@ include_directories( "${PTHREADPOOL_SOURCE_DIR}/include" "${FP16_SOURCE_DIR}/include" "${XNNPACK_SOURCE_DIR}/include" + "${XNNPACK_SOURCE_DIR}" "${CPUINFO_SOURCE_DIR}/" ) diff --git a/tensorflow/lite/tools/optimize/calibration/calibrator_test.cc b/tensorflow/lite/tools/optimize/calibration/calibrator_test.cc index f73bbfa1288754..d80391b3967130 100644 --- a/tensorflow/lite/tools/optimize/calibration/calibrator_test.cc +++ b/tensorflow/lite/tools/optimize/calibration/calibrator_test.cc @@ -39,7 +39,7 @@ limitations under the License. #include "tensorflow/lite/tools/optimize/calibration/calibration_reader.h" namespace { -tensorflow::string* g_test_model_dir = nullptr; +std::string* g_test_model_dir = nullptr; } // namespace namespace tflite { @@ -716,7 +716,7 @@ TEST(CalibratorTest, CalibrationWithCallOnce) { } // namespace tflite int main(int argc, char** argv) { - tensorflow::string model_file; + std::string model_file; const std::vector flag_list = { tensorflow::Flag("test_model_file", &model_file, "Path to test tflite model file."), @@ -727,8 +727,7 @@ int main(int argc, char** argv) { std::cerr << "Required test_model_file\n"; std::abort(); } - g_test_model_dir = - new tensorflow::string(tensorflow::io::Dirname(model_file)); + g_test_model_dir = new std::string(tensorflow::io::Dirname(model_file)); ::tensorflow::port::InitMain(argv[0], &argc, &argv); return RUN_ALL_TESTS(); } diff --git a/tensorflow/lite/tools/optimize/quantization_utils_test.cc b/tensorflow/lite/tools/optimize/quantization_utils_test.cc index 33f62f0c850363..e1494788dc45b4 100644 --- a/tensorflow/lite/tools/optimize/quantization_utils_test.cc +++ b/tensorflow/lite/tools/optimize/quantization_utils_test.cc @@ -36,7 +36,7 @@ limitations under the License. #include "tensorflow/lite/testing/util.h" namespace { -tensorflow::string* g_test_model_dir = nullptr; +std::string* g_test_model_dir = nullptr; } // namespace namespace tflite { @@ -901,7 +901,7 @@ TEST_F(QuantizationUtilsTest, ExtendToPowerOfTwo) { } // namespace tflite int main(int argc, char** argv) { - tensorflow::string model_file; + std::string model_file; const std::vector flag_list = { tensorflow::Flag("test_model_file", &model_file, "Path to test tflite model file."), @@ -912,8 +912,7 @@ int main(int argc, char** argv) { std::cerr << "Required test_model_file\n"; std::abort(); } - g_test_model_dir = - new tensorflow::string(tensorflow::io::Dirname(model_file)); + g_test_model_dir = new std::string(tensorflow::io::Dirname(model_file)); ::tensorflow::port::InitMain(argv[0], &argc, &argv); return RUN_ALL_TESTS(); } diff --git a/tensorflow/lite/tools/optimize/quantize_model_test.cc b/tensorflow/lite/tools/optimize/quantize_model_test.cc index 8a0013b09e6851..319da9523aea7e 100644 --- a/tensorflow/lite/tools/optimize/quantize_model_test.cc +++ b/tensorflow/lite/tools/optimize/quantize_model_test.cc @@ -39,7 +39,7 @@ limitations under the License. // Note: More rigorous model tests can be found in subgraph_quantizer_test.cc namespace { -tensorflow::string* g_test_model_dir = nullptr; +std::string* g_test_model_dir = nullptr; } // namespace namespace tflite { @@ -2309,7 +2309,7 @@ TEST_P(BiasInputTest, QuantizationSucceeds) { } // namespace tflite int main(int argc, char** argv) { - tensorflow::string model_file; + std::string model_file; const std::vector flag_list = { tensorflow::Flag("test_model_file", &model_file, "Path to test tflite model file."), @@ -2320,8 +2320,7 @@ int main(int argc, char** argv) { std::cerr << "Required test_model_file\n"; std::abort(); } - g_test_model_dir = - new tensorflow::string(tensorflow::io::Dirname(model_file)); + g_test_model_dir = new std::string(tensorflow::io::Dirname(model_file)); ::tensorflow::port::InitMain(argv[0], &argc, &argv); return RUN_ALL_TESTS(); } diff --git a/tensorflow/lite/tools/utils.cc b/tensorflow/lite/tools/utils.cc index 6173ec1b112203..96b8bf8689e610 100644 --- a/tensorflow/lite/tools/utils.cc +++ b/tensorflow/lite/tools/utils.cc @@ -20,6 +20,7 @@ limitations under the License. #include #include #include +#include #include "absl/types/span.h" #include "Eigen/Core" // from @eigen_archive diff --git a/tensorflow/lite/types/BUILD b/tensorflow/lite/types/BUILD index c00aadb6ae46e9..0bc596f7782e2a 100644 --- a/tensorflow/lite/types/BUILD +++ b/tensorflow/lite/types/BUILD @@ -28,4 +28,8 @@ cc_library( "fp16.h", "half.h", ], + # copybara:uncomment_begin(google-only) + # compatible_with = ["//buildenv/target:non_prod"], + # copybara:uncomment_end + deps = ["@FP16"], ) diff --git a/tensorflow/lite/types/fp16.h b/tensorflow/lite/types/fp16.h index cc63fe7d21fbd8..94484350f68bcd 100644 --- a/tensorflow/lite/types/fp16.h +++ b/tensorflow/lite/types/fp16.h @@ -27,6 +27,13 @@ limitations under the License. // - https://github.com/google/XNNPACK/issues/6989 // We also don't need a lot of the functionality in the upstream library. +// If building with a library that uses //third_party/FP16, that library +// provides its own fp16 conversion functions. Avoid redefining them here to +// prevent build errors. +// FP16_H and FP16_BITCASTS_H are defined by //third_party/FP16/fp16.h and +// //third_party/FP16/bitcasts.h respectively. +#if !defined(FP16_H) && !defined(FP16_BITCASTS_H) + static inline float fp32_from_bits(uint32_t w) { union { uint32_t as_bits; @@ -216,4 +223,6 @@ static inline uint16_t fp16_ieee_from_fp32_value(float f) { (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign); } +#endif // !defined(FP16_H) && !defined(FP16_BITCASTS_H) + #endif // TENSORFLOW_LITE_TYPES_FP16_H_ diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 77eb63a7551ed6..79c85e589caa13 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -29,7 +29,7 @@ # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2025, 12, 7) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2025, 12, 24) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None diff --git a/tensorflow/python/compiler/tensorrt/test/BUILD b/tensorflow/python/compiler/tensorrt/test/BUILD index 388140b04fac1d..26582e8aac4f51 100644 --- a/tensorflow/python/compiler/tensorrt/test/BUILD +++ b/tensorflow/python/compiler/tensorrt/test/BUILD @@ -74,7 +74,6 @@ filegroup( base_tags = [ "no_cuda_on_cpu_tap", - "cuda-only", "no_windows", "nomac", # TODO(b/303453873): Re-enable tests once TensorRT has been updated diff --git a/tensorflow/python/debug/lib/BUILD b/tensorflow/python/debug/lib/BUILD index f30a5a8c6668ec..0b3860dbaa9934 100644 --- a/tensorflow/python/debug/lib/BUILD +++ b/tensorflow/python/debug/lib/BUILD @@ -331,7 +331,6 @@ cuda_py_strict_test( shard_count = 4, tags = [ "no_windows", # TODO(b/142475891): Enable this test on Windows. - "cuda-only", #TODO(ROCm) Re-enable after issue is fixed. ], xla_enable_strict_auto_jit = False, # Node names are different with autojit deps = [ @@ -363,7 +362,6 @@ cuda_py_strict_test( python_version = "PY3", tags = [ "no_windows_gpu", - "cuda-only", #TODO(ROCm) Re-enable after issue is fixed. ], deps = [ ":debug_events_reader", diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD index 7b5f6a94506487..cf156d75a4380d 100644 --- a/tensorflow/python/distribute/BUILD +++ b/tensorflow/python/distribute/BUILD @@ -712,7 +712,6 @@ distribute_py_strict_test( "multi_and_single_gpu", "no_cuda_asan", # b/213388775 "no_oss", # b/241013307 - "cuda-only", "notap", # Flaky; TODO(b/289970206) ], tpu_tags = [ @@ -2502,7 +2501,6 @@ distribute_py_strict_test( "multi_and_single_gpu", "nomac", # TODO(b/201788023): Attempt MultiProcessCluster to fix this. "notpu", - "cuda-only", #times out ], deps = [ ":distribute_lib", diff --git a/tensorflow/python/feature_column/BUILD b/tensorflow/python/feature_column/BUILD index 7e905c3d51b0c1..1c28e3b8cc706c 100644 --- a/tensorflow/python/feature_column/BUILD +++ b/tensorflow/python/feature_column/BUILD @@ -164,7 +164,6 @@ tf_py_strict_test( "no_cuda_on_cpu_tap", "no_oss", # TODO(b/206860622): Broken with numpy 1.20+ "no_pip", - "cuda-only", "no_windows", ], deps = [ @@ -209,7 +208,6 @@ tf_py_strict_test( "no_cuda_on_cpu_tap", "no_oss", # TODO(b/206860622): Broken with numpy 1.20+ "no_pip", - "cuda-only", "no_windows", ], deps = [":feature_column_v2_test_main_lib"], diff --git a/tensorflow/python/framework/BUILD b/tensorflow/python/framework/BUILD index ba216815b3a623..e3ebab0c442106 100644 --- a/tensorflow/python/framework/BUILD +++ b/tensorflow/python/framework/BUILD @@ -1502,7 +1502,6 @@ cuda_py_strict_test( srcs = ["config_test.py"], tags = [ "no_pip", # test_ops are not available in pip - "cuda-only", ], deps = [ ":config", diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py index b44ef77a7e901d..0aaa5add6081a9 100644 --- a/tensorflow/python/framework/tensor_util.py +++ b/tensorflow/python/framework/tensor_util.py @@ -1281,6 +1281,12 @@ def is_tf_type(x): # pylint: disable=invalid-name Returns: `True` if `x` is a TensorFlow-native type. """ + # ObjectProxy is a special type of object that is used by wrapt to wrap + # objects. It is not a Tensor. + if (type(x).__name__ == "ObjectProxy"): + return False + if (type(x).__name__ == "_DictWrapper"): + return False return isinstance(x, tf_type_classes) diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index 9369ffa456392a..4206ef9f882ffc 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -83,6 +83,9 @@ # pylint: disable=g-import-not-at-top try: + # Disable loading HDF5 plugins from a default path and prevent ZDI-CAN-25480. + # Importing h5py prior to importing tensorflow will restore the old behavior. + os.environ['HDF5_PLUGIN_PATH'] = 'disable' import h5py except ImportError: h5py = None diff --git a/tensorflow/python/keras/keras_parameterized.py b/tensorflow/python/keras/keras_parameterized.py index 054df939e8e59a..1a44e6b76f3276 100644 --- a/tensorflow/python/keras/keras_parameterized.py +++ b/tensorflow/python/keras/keras_parameterized.py @@ -17,6 +17,7 @@ import collections import functools import itertools +import os import unittest from absl.testing import parameterized @@ -30,6 +31,9 @@ from tensorflow.python.util import nest try: + # Disable loading HDF5 plugins from a default path and prevent ZDI-CAN-25480. + # Importing h5py prior to importing tensorflow will restore the old behavior. + os.environ['HDF5_PLUGIN_PATH'] = 'disable' import h5py # pylint:disable=g-import-not-at-top except ImportError: h5py = None diff --git a/tensorflow/python/keras/saving/hdf5_format.py b/tensorflow/python/keras/saving/hdf5_format.py index 1f6bbc43320d0a..05a2c9282909a2 100644 --- a/tensorflow/python/keras/saving/hdf5_format.py +++ b/tensorflow/python/keras/saving/hdf5_format.py @@ -34,6 +34,9 @@ # pylint: disable=g-import-not-at-top try: + # Disable loading HDF5 plugins from a default path and prevent ZDI-CAN-25480. + # Importing h5py prior to importing tensorflow will restore the old behavior. + os.environ['HDF5_PLUGIN_PATH'] = 'disable' import h5py HDF5_OBJECT_HEADER_LIMIT = 64512 except ImportError: diff --git a/tensorflow/python/keras/saving/save.py b/tensorflow/python/keras/saving/save.py index eee859233e5eba..b9ba0bc20d0ba0 100644 --- a/tensorflow/python/keras/saving/save.py +++ b/tensorflow/python/keras/saving/save.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================== """Keras model saving code.""" - +import os from tensorflow.python import tf2 from tensorflow.python.keras.saving import hdf5_format from tensorflow.python.keras.saving import saving_utils @@ -25,6 +25,9 @@ # pylint: disable=g-import-not-at-top try: + # Disable loading HDF5 plugins from a default path and prevent ZDI-CAN-25480. + # Importing h5py prior to importing tensorflow will restore the old behavior. + os.environ['HDF5_PLUGIN_PATH'] = 'disable' import h5py except ImportError: h5py = None diff --git a/tensorflow/python/kernel_tests/image_ops/BUILD b/tensorflow/python/kernel_tests/image_ops/BUILD index b7c79c74dae5f9..640edf8f97a629 100644 --- a/tensorflow/python/kernel_tests/image_ops/BUILD +++ b/tensorflow/python/kernel_tests/image_ops/BUILD @@ -141,7 +141,6 @@ cuda_py_strict_test( shard_count = 15, tags = [ "no_oss", # b/241024908 - "cuda-only", "nomac", # b/181799478 "notap", # b/31080670 ], diff --git a/tensorflow/python/kernel_tests/nn_ops/BUILD b/tensorflow/python/kernel_tests/nn_ops/BUILD index 507d00c15d196c..df5b780a6e7367 100644 --- a/tensorflow/python/kernel_tests/nn_ops/BUILD +++ b/tensorflow/python/kernel_tests/nn_ops/BUILD @@ -296,7 +296,6 @@ cuda_py_strict_test( shard_count = 4, tags = [ "no_mac_arm64", - "cuda-only", "optonly", # times out ], deps = [ @@ -438,7 +437,6 @@ cuda_py_strict_test( size = "medium", # http://b/30603882 timeout = "long", srcs = ["depthwise_conv_op_d9m_test.py"], - tags = ["cuda-only"], shard_count = 8, deps = [ ":depthwise_conv_op_base", diff --git a/tensorflow/python/kernel_tests/sparse_ops/BUILD b/tensorflow/python/kernel_tests/sparse_ops/BUILD index 37b8518b3c1ebe..20fe7ab1adfb98 100644 --- a/tensorflow/python/kernel_tests/sparse_ops/BUILD +++ b/tensorflow/python/kernel_tests/sparse_ops/BUILD @@ -108,7 +108,6 @@ cuda_py_strict_test( shard_count = 5, tags = [ "optonly", # b/77589990 - "cuda-only" ], deps = [ "//tensorflow/python/eager:def_function", diff --git a/tensorflow/python/lib/core/ndarray_tensor.cc b/tensorflow/python/lib/core/ndarray_tensor.cc index c33014cc3ae5b2..1d897c4b67e512 100644 --- a/tensorflow/python/lib/core/ndarray_tensor.cc +++ b/tensorflow/python/lib/core/ndarray_tensor.cc @@ -100,7 +100,7 @@ absl::Status PyArrayDescr_to_TF_DataType(PyArray_Descr* descr, if (!key_string) { return errors::Internal("Corrupt numpy type descriptor"); } - tensorflow::string key = key_string; + std::string key = key_string; // The typenames here should match the field names in the custom struct // types constructed in test_util.py. // TODO(mrry,keveman): Investigate Numpy type registration to replace this @@ -320,7 +320,8 @@ absl::Status EncodePyBytesArray(PyArrayObject* array, int64_t nelems, return absl::OkStatus(); } -absl::Status CopyTF_TensorStringsToPyArray(const TF_Tensor* src, uint64 nelems, +absl::Status CopyTF_TensorStringsToPyArray(const TF_Tensor* src, + uint64_t nelems, PyArrayObject* dst) { const void* tensor_data = TF_TensorData(src); DCHECK(tensor_data != nullptr); diff --git a/tensorflow/python/lib/core/ndarray_tensor_bridge.cc b/tensorflow/python/lib/core/ndarray_tensor_bridge.cc index 703bab0f65a7b8..fbb1f10c855b15 100644 --- a/tensorflow/python/lib/core/ndarray_tensor_bridge.cc +++ b/tensorflow/python/lib/core/ndarray_tensor_bridge.cc @@ -253,10 +253,10 @@ absl::Status ArrayFromMemory(int dim_size, npy_intp* dims, void* data, auto* np_array = reinterpret_cast( PyArray_SimpleNewFromData(dim_size, dims, type_num, data)); if (np_array == nullptr) { - string shape_str = absl::StrJoin( + std::string shape_str = absl::StrJoin( absl::Span{dims, static_cast(dim_size)}, ", "); if (PyErr_Occurred()) { - string exception_str = PyExceptionFetch(); + std::string exception_str = PyExceptionFetch(); PyErr_Clear(); return errors::InvalidArgument( "Failed to create numpy array from tensor of shape [", shape_str, diff --git a/tensorflow/python/lib/core/py_func.cc b/tensorflow/python/lib/core/py_func.cc index 16ba9db74ba764..54178667bfddaa 100644 --- a/tensorflow/python/lib/core/py_func.cc +++ b/tensorflow/python/lib/core/py_func.cc @@ -63,7 +63,7 @@ PyObject* GetPyTrampoline() { struct PyCall { // Passed to python runtime to call the python function registered // with this "token". - string token; + std::string token; // The device on which Tensors are stored; only used for EagerPyFunc. Device* device = nullptr; @@ -164,7 +164,8 @@ absl::Status ExtractTensorFromEagerTensor(const PyObject* eager_tensor, TF_RETURN_IF_ERROR(handle->Tensor(output_tensor)); // actual_device may be nullptr, which implies local CPU. if (expected_device == actual_device) return absl::OkStatus(); - const string& expected_device_name = expected_device->attributes().name(); + const std::string& expected_device_name = + expected_device->attributes().name(); if (actual_device == nullptr) { if (!IsCPUDevice(expected_device)) { return errors::Internal( @@ -380,7 +381,8 @@ class PyFuncOp : public OpKernel { return; } - OP_REQUIRES(ctx, static_cast(call.out.size()) == ctx->num_outputs(), + OP_REQUIRES(ctx, + static_cast(call.out.size()) == ctx->num_outputs(), errors::InvalidArgument(token_, " returns ", call.out.size(), " values, but expects to see ", ctx->num_outputs(), " values.")); @@ -396,7 +398,7 @@ class PyFuncOp : public OpKernel { } private: - string token_; + std::string token_; // True if and only if this op should execute the python function eagerly, // i.e., if and only if the eager attribute is set. diff --git a/tensorflow/python/lib/core/py_seq_tensor.cc b/tensorflow/python/lib/core/py_seq_tensor.cc index 3547cd4a8ddc81..6b2b6be8cf53e7 100644 --- a/tensorflow/python/lib/core/py_seq_tensor.cc +++ b/tensorflow/python/lib/core/py_seq_tensor.cc @@ -360,8 +360,9 @@ struct ConverterTraits { typedef Converter Int64Converter; template <> -struct ConverterTraits { - static AbstractTensorInterface* CreateScalar(TFE_Context* ctx, uint64 value) { +struct ConverterTraits { + static AbstractTensorInterface* CreateScalar(TFE_Context* ctx, + uint64_t value) { return tensorflow::unwrap(ctx)->CreateUint64Scalar(value); } @@ -370,7 +371,7 @@ struct ConverterTraits { return tensorflow::unwrap(ctx)->CreateTensor(DT_UINT64, dim_sizes); } - static const char* ConvertScalar(PyObject* v, uint64* out) { + static const char* ConvertScalar(PyObject* v, uint64_t* out) { #if PY_MAJOR_VERSION < 3 if (TF_PREDICT_TRUE(PyInt_Check(v))) { *out = PyInt_AsUnsignedLongLongMask(v); @@ -394,10 +395,10 @@ struct ConverterTraits { } }; -typedef Converter UInt64Converter; +typedef Converter UInt64Converter; template <> -struct ConverterTraits { +struct ConverterTraits { static AbstractTensorInterface* CreateScalar(TFE_Context* ctx, int32_t value) { return tensorflow::unwrap(ctx)->CreateInt32Scalar(value); @@ -408,7 +409,7 @@ struct ConverterTraits { return tensorflow::unwrap(ctx)->CreateTensor(DT_INT32, dim_sizes); } - static const char* ConvertScalar(PyObject* v, int32* out) { + static const char* ConvertScalar(PyObject* v, int32_t* out) { int64_t i; #if PY_MAJOR_VERSION < 3 if (TF_PREDICT_TRUE(PyInt_Check(v))) { @@ -432,14 +433,14 @@ struct ConverterTraits { } else { return ErrorMixedTypes; } - *out = static_cast(static_cast(i)); + *out = static_cast(static_cast(i)); // Check for 32-bit overflow. if (TF_PREDICT_FALSE(i != *out)) return ErrorFoundInt64; return nullptr; } }; -typedef Converter Int32Converter; +typedef Converter Int32Converter; // Floating-point support @@ -694,11 +695,11 @@ TFE_TensorHandle* NumpyToTFE_TensorHandle(TFE_Context* ctx, PyObject* obj) { absl::Status status = tensorflow::NdarrayToTensor(ctx, obj, &tf_tensor); if (TF_PREDICT_FALSE(!status.ok())) { - PyErr_SetString(PyExc_ValueError, - tensorflow::strings::StrCat( - "Failed to convert a NumPy array to a Tensor (", - status.message(), ").") - .c_str()); + PyErr_SetString( + PyExc_ValueError, + absl::StrCat("Failed to convert a NumPy array to a Tensor (", + status.message(), ").") + .c_str()); return nullptr; } @@ -758,8 +759,7 @@ TFE_TensorHandle* PySeqToTFE_TensorHandle(TFE_Context* ctx, PyObject* obj, .ok()) { PyErr_SetString( PyExc_TypeError, - tensorflow::strings::StrCat("Invalid dtype argument value ", dtype) - .c_str()); + absl::StrCat("Invalid dtype argument value ", dtype).c_str()); return nullptr; } } diff --git a/tensorflow/python/lib/core/py_util.cc b/tensorflow/python/lib/core/py_util.cc index a78f0a12f21c3f..fa1845bd782841 100644 --- a/tensorflow/python/lib/core/py_util.cc +++ b/tensorflow/python/lib/core/py_util.cc @@ -45,7 +45,7 @@ const char* ClassName(PyObject* py) { // Returns a PyObject containing a string, or null void TryAppendTraceback(PyObject* ptype, PyObject* pvalue, PyObject* ptraceback, - string* out) { + std::string* out) { // The "traceback" module is assumed to be imported already by script_ops.py. PyObject* tb_module = PyImport_AddModule("traceback"); @@ -84,7 +84,7 @@ void TryAppendTraceback(PyObject* ptype, PyObject* pvalue, PyObject* ptraceback, #if PY_MAJOR_VERSION < 3 strings::StrAppend(out, PyString_AS_STRING(v), "\n"); #else - strings::StrAppend(out, PyUnicode_AsUTF8(v), "\n"); + absl::StrAppend(out, PyUnicode_AsUTF8(v), "\n"); #endif } @@ -92,7 +92,7 @@ void TryAppendTraceback(PyObject* ptype, PyObject* pvalue, PyObject* ptraceback, Py_DECREF(ret_val); } -string PyExceptionFetch() { +std::string PyExceptionFetch() { CHECK(PyErr_Occurred()) << "Must only call PyExceptionFetch after an exception."; PyObject* ptype; @@ -100,7 +100,7 @@ string PyExceptionFetch() { PyObject* ptraceback; PyErr_Fetch(&ptype, &pvalue, &ptraceback); PyErr_NormalizeException(&ptype, &pvalue, &ptraceback); - string err = ClassName(ptype); + std::string err = ClassName(ptype); if (pvalue) { PyObject* str = PyObject_Str(pvalue); @@ -108,11 +108,11 @@ string PyExceptionFetch() { #if PY_MAJOR_VERSION < 3 strings::StrAppend(&err, ": ", PyString_AS_STRING(str), "\n"); #else - strings::StrAppend(&err, ": ", PyUnicode_AsUTF8(str), "\n"); + absl::StrAppend(&err, ": ", PyUnicode_AsUTF8(str), "\n"); #endif Py_DECREF(str); } else { - strings::StrAppend(&err, "(unknown error message)\n"); + absl::StrAppend(&err, "(unknown error message)\n"); } TryAppendTraceback(ptype, pvalue, ptraceback, &err); diff --git a/tensorflow/python/lib/core/py_util.h b/tensorflow/python/lib/core/py_util.h index af1b21699e6502..d6b2b9f78ddca6 100644 --- a/tensorflow/python/lib/core/py_util.h +++ b/tensorflow/python/lib/core/py_util.h @@ -25,7 +25,7 @@ namespace tensorflow { // Fetch the exception message as a string. An exception must be set // (PyErr_Occurred() must be true). -string PyExceptionFetch(); +std::string PyExceptionFetch(); // Assert that Python GIL is held. inline void DCheckPyGilState() { diff --git a/tensorflow/python/ops/BUILD b/tensorflow/python/ops/BUILD index 069280d4425fb7..c624c412f3d12d 100644 --- a/tensorflow/python/ops/BUILD +++ b/tensorflow/python/ops/BUILD @@ -1006,9 +1006,6 @@ tf_py_strict_test( name = "collective_ops_test", size = "small", srcs = ["collective_ops_test.py"], - tags = [ - "cuda-only", - ], deps = [ ":array_ops", ":collective_ops", @@ -1037,7 +1034,6 @@ tf_py_strict_test( srcs = ["collective_ops_xla_test.py"], tags = [ "no_pip", - "cuda-only", "no_windows", "nomac", ], @@ -3594,9 +3590,6 @@ cuda_py_strict_test( srcs = ["nn_fused_batchnorm_d9m_test.py"], main = "nn_fused_batchnorm_d9m_test.py", shard_count = 4, - tags = [ - "cuda-only", - ], deps = [ ":nn_grad", ":nn_impl", @@ -3618,9 +3611,6 @@ cuda_py_strict_test( srcs = ["nn_fused_batchnorm_test.py"], main = "nn_fused_batchnorm_test.py", shard_count = 24, - tags = [ - "cuda-only", - ], deps = [ ":array_ops", ":gradient_checker", @@ -3765,7 +3755,6 @@ cuda_py_strict_test( main = "special_math_ops_test.py", shard_count = 10, tags = [ - "cuda-only", "no_windows_gpu", ], deps = [ diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 9f6644b4342ada..94dadf91a0e18d 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -982,8 +982,8 @@ def slice(input_, begin, size, name=None): Args: input_: A `Tensor`. - begin: An `int32` or `int64` `Tensor`. - size: An `int32` or `int64` `Tensor`. + begin: An `int16`, `int32` or `int64` `Tensor`. + size: An `int16`, `int32` or `int64` `Tensor`. name: A name for the operation (optional). Returns: diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 087af6a842fc86..c8e8e33ca9c69b 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -1718,6 +1718,12 @@ def resize_images_v2(images, >>> max_10_20.shape.as_list() [1, 10, 10, 1] + Note: + The `bicubic` interpolation method currently does not have a GPU kernel + implementation. As a result, `tf.image.resize(..., method='bicubic')` + always executes on the CPU, even when GPU devices are available. + + Args: images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor of shape `[height, width, channels]`. diff --git a/tensorflow/python/ops/parallel_for/BUILD b/tensorflow/python/ops/parallel_for/BUILD index c1272a552ed4e0..67bc83f6dd5445 100644 --- a/tensorflow/python/ops/parallel_for/BUILD +++ b/tensorflow/python/ops/parallel_for/BUILD @@ -127,7 +127,6 @@ cuda_py_strict_test( shard_count = 16, tags = [ "no_oss", - "cuda-only", ], deps = [ ":control_flow_ops", diff --git a/tensorflow/python/ops/ragged/ragged_factory_ops.py b/tensorflow/python/ops/ragged/ragged_factory_ops.py index 55505df533d447..c279965f378982 100644 --- a/tensorflow/python/ops/ragged/ragged_factory_ops.py +++ b/tensorflow/python/ops/ragged/ragged_factory_ops.py @@ -30,9 +30,9 @@ from tensorflow.python.util.tf_export import tf_export -#=============================================================================== +# =============================================================================== # Op to construct a constant RaggedTensor from a nested Python list. -#=============================================================================== +# =============================================================================== @tf_export("ragged.constant") @dispatch.add_dispatch_support def constant( @@ -57,15 +57,16 @@ def constant( Args: pylist: A nested `list`, `tuple` or `np.ndarray`. Any nested element that - is not a `list`, `tuple` or `np.ndarray` must be a scalar value - compatible with `dtype`. + is not a `list`, `tuple` or `np.ndarray` must be a scalar value compatible + with `dtype`. dtype: The type of elements for the returned `RaggedTensor`. If not specified, then a default is chosen based on the scalar values in - `pylist`. + `pylist`. If there are no scalar values in `pylist`, then the default is + `tf.float32`. ragged_rank: An integer specifying the ragged rank of the returned `RaggedTensor`. Must be nonnegative and less than `K`. Defaults to - `max(0, K - 1)` if `inner_shape` is not specified. Defaults to - `max(0, K - 1 - len(inner_shape))` if `inner_shape` is specified. + `max(0, K - 1)` if `inner_shape` is not specified. Defaults to `max(0, K + - 1 - len(inner_shape))` if `inner_shape` is specified. inner_shape: A tuple of integers specifying the shape for individual inner values in the returned `RaggedTensor`. Defaults to `()` if `ragged_rank` is not specified. If `ragged_rank` is specified, then a default is chosen diff --git a/tensorflow/python/profiler/internal/BUILD b/tensorflow/python/profiler/internal/BUILD index 5b38d0f3f8eae8..ae4fae57926bfe 100644 --- a/tensorflow/python/profiler/internal/BUILD +++ b/tensorflow/python/profiler/internal/BUILD @@ -108,6 +108,7 @@ tf_python_pybind_extension( "//tensorflow/tools/pip_package:__subpackages__", ], deps = [ + "@local_xla//xla/python/profiler/internal:traceme_state", "@local_xla//xla/python/profiler/internal:traceme_wrapper", "@pybind11", ], diff --git a/tensorflow/python/profiler/internal/_pywrap_traceme.pyi b/tensorflow/python/profiler/internal/_pywrap_traceme.pyi index 105e2dce09d3a7..47b8b56c94a269 100644 --- a/tensorflow/python/profiler/internal/_pywrap_traceme.pyi +++ b/tensorflow/python/profiler/internal/_pywrap_traceme.pyi @@ -17,3 +17,5 @@ class TraceMe: def __init__(self, arg0: str, **kwargs) -> None: ... def SetMetadata(self, **kwargs) -> None: ... def Stop(self) -> None: ... + +def traceme_enabled(*args, **kwargs): ... diff --git a/tensorflow/python/profiler/internal/traceme_wrapper.cc b/tensorflow/python/profiler/internal/traceme_wrapper.cc index ba1b1a63674491..9397eb18134cf3 100644 --- a/tensorflow/python/profiler/internal/traceme_wrapper.cc +++ b/tensorflow/python/profiler/internal/traceme_wrapper.cc @@ -17,14 +17,33 @@ limitations under the License. #include "pybind11/attr.h" // from @pybind11 #include "pybind11/pybind11.h" // from @pybind11 +#include "xla/python/profiler/internal/traceme_state.h" namespace py = ::pybind11; using ::xla::profiler::TraceMeWrapper; +// Returns true if TraceMe is enabled. +// This is a low-overhead function that can be called frequently. +static PyObject* traceme_enabled(PyObject* self, PyObject* args) { + if (xla::profiler::traceme_enabled) { + Py_RETURN_TRUE; + } + Py_RETURN_FALSE; +} + +static PyMethodDef traceme_method_def = {"traceme_enabled", traceme_enabled, + METH_NOARGS, + "Returns true if TraceMe is enabled."}; + PYBIND11_MODULE(_pywrap_traceme, m) { py::class_(m, "TraceMe", py::module_local()) .def(py::init()) .def("SetMetadata", &TraceMeWrapper::SetMetadata) .def("Stop", &TraceMeWrapper::Stop); + + py::object module_name = m.attr("__name__"); + m.attr("traceme_enabled") = + py::reinterpret_steal(PyCFunction_NewEx( + &traceme_method_def, /*self=*/nullptr, module_name.ptr())); }; diff --git a/tensorflow/python/profiler/profiler_v2_test.py b/tensorflow/python/profiler/profiler_v2_test.py index b0b4ff301f6b0b..bec85cdc60bba8 100644 --- a/tensorflow/python/profiler/profiler_v2_test.py +++ b/tensorflow/python/profiler/profiler_v2_test.py @@ -98,6 +98,14 @@ def test_context_manager_with_options(self): file_list = gfile.ListDirectory(logdir) self.assertEqual(len(file_list), 1) + def test_callback(self): + logdir = self.get_temp_dir() + self.assertFalse(trace.enabled()) + profiler.start(logdir) + self.assertTrue(trace.enabled()) + profiler.stop() + self.assertFalse(trace.enabled()) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/profiler/trace.py b/tensorflow/python/profiler/trace.py index 6b6bc7ac243a75..4c877803ef7623 100644 --- a/tensorflow/python/profiler/trace.py +++ b/tensorflow/python/profiler/trace.py @@ -19,9 +19,9 @@ from tensorflow.python.profiler.internal import _pywrap_traceme from tensorflow.python.util.tf_export import tf_export -# This variable is modified by PythonHooks::Start/Stop() in C++. Such -# arrangement will reduce the number of calls through pybind11. -enabled = False +# This is a low-overhead function that directly calls C++ to check if the +# profiler is enabled. +enabled = _pywrap_traceme.traceme_enabled @tf_export('profiler.experimental.Trace', v1=[]) @@ -74,7 +74,7 @@ def __init__(self, name, **kwargs): The example above uses the keyword argument "step_num" to specify the training step being traced. """ - if enabled: + if enabled(): # Creating _pywrap_traceme.TraceMe starts the clock. self._traceme = _pywrap_traceme.TraceMe(name, **kwargs) else: @@ -177,7 +177,7 @@ def inner_wrapper(func): @functools.wraps(func) def wrapped(*args, **kwargs): - if enabled: + if enabled(): with Trace(trace_name, **trace_kwargs): return func(*args, **kwargs) return func(*args, **kwargs) diff --git a/tensorflow/python/trackable/data_structures.py b/tensorflow/python/trackable/data_structures.py index c920dd882aac35..3bcb4c9ccba3b9 100644 --- a/tensorflow/python/trackable/data_structures.py +++ b/tensorflow/python/trackable/data_structures.py @@ -23,9 +23,6 @@ # Fall back to the build-time dependency if the system package is not available. from .....third_party import wrapt # pylint: disable=relative-beyond-top-level -from tensorflow.python.eager import def_function -from tensorflow.python.eager import function as defun -from tensorflow.python.ops import variables from tensorflow.python.trackable import base from tensorflow.python.trackable import layer_utils from tensorflow.python.util.compat import collections_abc @@ -195,6 +192,8 @@ def trainable(self, value): def _track_value(self, value, name): """Add a dependency on `value`.""" + # pylint: disable=g-import-not-at-top + from tensorflow.python.ops import variables value = sticky_attribute_assignment( trackable=self, value=value, name=name) if isinstance(value, variables.Variable): @@ -810,6 +809,12 @@ def __reduce_ex__(self, protocol): (self.__wrapped__,)) def __getattribute__(self, name): + if name == "__dict__": + # Returns __dict__ from wrapt.ObjectProxy + try: + return object.__getattribute__(self, "__dict__") + except (AttributeError, TypeError): + return {} if (hasattr(type(self), name) and isinstance(getattr(type(self), name), property)): # Bypass ObjectProxy for properties. Whether this workaround is necessary @@ -1108,6 +1113,9 @@ def __getattribute__(self, name): def _is_function(x): + # pylint: disable=g-import-not-at-top + from tensorflow.python.eager import def_function + from tensorflow.python.eager import function as defun return isinstance(x, (def_function.Function, defun.ConcreteFunction)) diff --git a/tensorflow/python/training/BUILD b/tensorflow/python/training/BUILD index 4bc1e84a600dee..3d6038075b86cf 100644 --- a/tensorflow/python/training/BUILD +++ b/tensorflow/python/training/BUILD @@ -1185,7 +1185,6 @@ cuda_py_strict_test( name = "basic_loops_test", size = "medium", srcs = ["basic_loops_test.py"], - tags = ["cuda-only"], #TODO(ROCm) Re-enable after issue is fixed. deps = [ ":basic_loops", ":supervisor", diff --git a/tensorflow/python/util/BUILD b/tensorflow/python/util/BUILD index 5875bf0e16668d..0851f88e67434a 100644 --- a/tensorflow/python/util/BUILD +++ b/tensorflow/python/util/BUILD @@ -133,6 +133,8 @@ cc_library( "//tensorflow/core:lib_internal", "//tensorflow/core/platform:stringpiece", "//tensorflow/python/lib/core:safe_pyobject_ptr", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:string_view", "@local_xla//third_party/python_runtime:headers", ], alwayslink = 1, diff --git a/tensorflow/python/util/kernel_registry.cc b/tensorflow/python/util/kernel_registry.cc index 6a78c6668d9643..8d6a68dd7397f6 100644 --- a/tensorflow/python/util/kernel_registry.cc +++ b/tensorflow/python/util/kernel_registry.cc @@ -14,6 +14,8 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/python/util/kernel_registry.h" +#include + #include "absl/log/log.h" #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/node_def_util.h" diff --git a/tensorflow/python/util/nest.cc b/tensorflow/python/util/nest.cc index d7df8c42dde196..467359cbb9cf5e 100644 --- a/tensorflow/python/util/nest.cc +++ b/tensorflow/python/util/nest.cc @@ -19,6 +19,8 @@ limitations under the License. #include #include +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/stringpiece.h" #include "tensorflow/python/lib/core/safe_pyobject_ptr.h" diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index b80627e3bbf891..d7c5e0f9dbe5a2 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -89,7 +89,6 @@ load( "@local_xla//third_party/py/rules_pywrap:pywrap.default.bzl", "use_pywrap_rules", _pybind_extension = "pybind_extension", - _stripped_cc_info = "stripped_cc_info", ) # Do not sort: copybara rule changes this @@ -3340,8 +3339,6 @@ def pybind_extension( **kwargs ) -stripped_cc_info = _stripped_cc_info - # Note: we cannot add //third_party/tf_runtime:__subpackages__ here, # because that builds all of tf_runtime's packages, and some of them # are known not to build on big endian systems. diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010 b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010 deleted file mode 100644 index 91d501109d08a1..00000000000000 --- a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010 +++ /dev/null @@ -1,87 +0,0 @@ -# Dockerfile to build a manylinux 2010 compliant cross-compiler. -# -# Builds a devtoolset gcc/libstdc++ that targets manylinux 2010 compatible -# glibc (2.12) and system libstdc++ (4.4). -# -# To push a new version, run: -# $ docker build -f Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010 \ -# --tag "gcr.io/tensorflow-testing/nosla-cuda10.1-cudnn7-ubuntu16.04-manylinux2010" . -# $ docker push gcr.io/tensorflow-testing/nosla-cuda10.1-cudnn7-ubuntu16.04-manylinux2010 - -FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu16.04 as devtoolset - -ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y \ - cpio \ - file \ - flex \ - g++ \ - make \ - rpm2cpio \ - unar \ - wget \ - && \ - rm -rf /var/lib/apt/lists/* - -ADD devtoolset/fixlinks.sh fixlinks.sh -ADD devtoolset/build_devtoolset.sh build_devtoolset.sh -ADD devtoolset/rpm-patch.sh rpm-patch.sh - -# Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-7 in /dt7. -RUN /build_devtoolset.sh devtoolset-7 /dt7 -# Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-8 in /dt8. -RUN /build_devtoolset.sh devtoolset-8 /dt8 - -# TODO(klimek): Split up into two different docker images. -FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu16.04 -COPY --from=devtoolset /dt7 /dt7 -COPY --from=devtoolset /dt8 /dt8 - -# Install TensorRT. -RUN apt-get update && apt-get install -y \ - libnvinfer-dev=6.0.1-1+cuda10.1 \ - libnvinfer6=6.0.1-1+cuda10.1 \ - libnvinfer-plugin-dev=6.0.1-1+cuda10.1 \ - libnvinfer-plugin6=6.0.1-1+cuda10.1 \ - && \ - rm -rf /var/lib/apt/lists/* - -# Copy and run the install scripts. -ENV CLANG_VERSION="r42cab985fd95ba4f3f290e7bb26b93805edb447d" -COPY install/*.sh /install/ -ARG DEBIAN_FRONTEND=noninteractive -RUN /install/install_bootstrap_deb_packages.sh -RUN /install/install_deb_packages.sh -RUN /install/install_latest_clang.sh -RUN /install/install_bazel.sh - -# Install python 3.6. -RUN apt-get install --reinstall python3-apt -RUN yes "" | add-apt-repository ppa:deadsnakes/ppa -RUN apt-get update && apt-get install -y \ - python3.6 python3.6-dev python3-pip python3.6-venv && \ - rm -rf /var/lib/apt/lists/* && \ - python3.6 -m pip install pip --upgrade && \ - update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.6 0 - -RUN /install/install_pip_packages.sh - -# Install python 3.8. -RUN apt-get update && apt-get install -y python3.8 python3.8-dev python3.8-venv -RUN rm -rf /var/lib/apt/lists/* -# Have to download get-pip.py due to a pip circular issue -# https://stackoverflow.com/questions/58758447/how-to-fix-module-platform-has-no-attribute-linux-distribution-when-instal -RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py -RUN python3.8 get-pip.py -RUN python3.8 -m pip install --upgrade pip setuptools wheel - -# Overwrite include paths that are generated for the multipython image. -RUN ln -sf "/usr/include/x86_64-linux-gnu/python3.6m" "/dt7/usr/include/x86_64-linux-gnu/python3.6m" -RUN ln -sf "/usr/include/x86_64-linux-gnu/python3.6m" "/dt8/usr/include/x86_64-linux-gnu/python3.6m" - -RUN ln -sf "/usr/include/x86_64-linux-gnu/python3.8" "/dt7/usr/include/x86_64-linux-gnu/python3.8" -RUN ln -sf "/usr/include/x86_64-linux-gnu/python3.8" "/dt8/usr/include/x86_64-linux-gnu/python3.8" - -# Make apt work with python 3.6. -RUN cp /usr/lib/python3/dist-packages/apt_pkg.cpython-35m-x86_64-linux-gnu.so \ - /usr/lib/python3/dist-packages/apt_pkg.so diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010-multipython b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010-multipython deleted file mode 100644 index c135dd5bd5d667..00000000000000 --- a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010-multipython +++ /dev/null @@ -1,87 +0,0 @@ -# Dockerfile to build a manylinux 2010 compliant cross-compiler. -# -# Builds a devtoolset gcc/libstdc++ that targets manylinux 2010 compatible -# glibc (2.12) and system libstdc++ (4.4). -# -# To push a new version, run: -# $ docker build -f Dockerfile.rbe.cuda10.1-cudnn7-ubuntu16.04-manylinux2010-multipython \ -# --tag "gcr.io/tensorflow-testing/nosla-cuda10.1-cudnn7-ubuntu16.04-manylinux2010-multipython" . -# $ docker push gcr.io/tensorflow-testing/nosla-cuda10.1-cudnn7-ubuntu16.04-manylinux2010-multipython - -FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu16.04 as devtoolset - -ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y \ - cpio \ - file \ - flex \ - g++ \ - make \ - rpm2cpio \ - unar \ - wget \ - && \ - rm -rf /var/lib/apt/lists/* - -ADD devtoolset/fixlinks.sh fixlinks.sh -ADD devtoolset/build_devtoolset.sh build_devtoolset.sh -ADD devtoolset/rpm-patch.sh rpm-patch.sh - -# Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-7 in /dt7. -RUN /build_devtoolset.sh devtoolset-7 /dt7 -# Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-8 in /dt8. -RUN /build_devtoolset.sh devtoolset-8 /dt8 - -# TODO(klimek): Split up into two different docker images. -FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu16.04 -COPY --from=devtoolset /dt7 /dt7 -COPY --from=devtoolset /dt8 /dt8 - -# Install TensorRT. -RUN apt-get update && apt-get install -y \ - libnvinfer-dev=6.0.1-1+cuda10.1 \ - libnvinfer6=6.0.1-1+cuda10.1 \ - libnvinfer-plugin-dev=6.0.1-1+cuda10.1 \ - libnvinfer-plugin6=6.0.1-1+cuda10.1 \ - && \ - rm -rf /var/lib/apt/lists/* - -# Copy and run the install scripts. -ARG DEBIAN_FRONTEND=noninteractive - -COPY install/install_bootstrap_deb_packages.sh /install/ -RUN /install/install_bootstrap_deb_packages.sh - -COPY install/install_deb_packages.sh /install/ -RUN /install/install_deb_packages.sh - -# Install additional packages needed for this image: -# - dependencies to build Python from source -# - patchelf, as it is required by auditwheel -RUN apt-get update && apt-get install -y \ - libbz2-dev \ - libffi-dev \ - libgdbm-dev \ - libncurses5-dev \ - libnss3-dev \ - libreadline-dev \ - patchelf \ - && \ - rm -rf /var/lib/apt/lists/* - -COPY install/install_bazel.sh /install/ -RUN /install/install_bazel.sh - -COPY install/build_and_install_python.sh /install/ -RUN /install/build_and_install_python.sh "3.6.9" -RUN /install/build_and_install_python.sh "3.7.7" -RUN /install/build_and_install_python.sh "3.8.2" - -COPY install/install_pip_packages_by_version.sh /install/ -RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.6" -RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.7" -RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.8" - -ENV CLANG_VERSION="r42cab985fd95ba4f3f290e7bb26b93805edb447d" -COPY install/install_latest_clang.sh /install/ -RUN /install/install_latest_clang.sh diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu18.04-manylinux2010-multipython b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu18.04-manylinux2010-multipython deleted file mode 100644 index b8b9e2195b7830..00000000000000 --- a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.1-cudnn7-ubuntu18.04-manylinux2010-multipython +++ /dev/null @@ -1,88 +0,0 @@ -# Dockerfile to build a manylinux 2010 compliant cross-compiler. -# -# Builds a devtoolset gcc/libstdc++ that targets manylinux 2010 compatible -# glibc (2.12) and system libstdc++ (4.4). -# -# To push a new version, run: -# $ docker build -f Dockerfile.rbe.cuda10.1-cudnn7-ubuntu18.04-manylinux2010-multipython \ -# --tag "gcr.io/tensorflow-testing/nosla-cuda10.1-cudnn7-ubuntu18.04-manylinux2010-multipython" . -# $ docker push gcr.io/tensorflow-testing/nosla-cuda10.1-cudnn7-ubuntu18.04-manylinux2010-multipython - -FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 as devtoolset - -ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y \ - cpio \ - file \ - flex \ - g++ \ - make \ - rpm2cpio \ - unar \ - wget \ - && \ - rm -rf /var/lib/apt/lists/* - -ADD devtoolset/fixlinks.sh fixlinks.sh -ADD devtoolset/build_devtoolset.sh build_devtoolset.sh -ADD devtoolset/rpm-patch.sh rpm-patch.sh - -# Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-7 in /dt7. -RUN /build_devtoolset.sh devtoolset-7 /dt7 -# Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-8 in /dt8. -RUN /build_devtoolset.sh devtoolset-8 /dt8 - -# TODO(klimek): Split up into two different docker images. -FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 -COPY --from=devtoolset /dt7 /dt7 -COPY --from=devtoolset /dt8 /dt8 - -# Install TensorRT. -RUN apt-get update && apt-get install -y \ - libnvinfer-dev=6.0.1-1+cuda10.1 \ - libnvinfer6=6.0.1-1+cuda10.1 \ - libnvinfer-plugin-dev=6.0.1-1+cuda10.1 \ - libnvinfer-plugin6=6.0.1-1+cuda10.1 \ - && \ - rm -rf /var/lib/apt/lists/* - -# Copy and run the install scripts. -ARG DEBIAN_FRONTEND=noninteractive - -COPY install/install_bootstrap_deb_packages.sh /install/ -RUN /install/install_bootstrap_deb_packages.sh - -COPY install/install_deb_packages.sh /install/ -RUN /install/install_deb_packages.sh - -# Install additional packages needed for this image: -# - dependencies to build Python from source -# - patchelf, as it is required by auditwheel -RUN apt-get update && apt-get install -y \ - libbz2-dev \ - libffi-dev \ - libgdbm-dev \ - libncurses5-dev \ - libnss3-dev \ - libreadline-dev \ - patchelf \ - && \ - rm -rf /var/lib/apt/lists/* - -COPY install/install_bazel.sh /install/ -RUN /install/install_bazel.sh - -COPY install/build_and_install_python.sh /install/ -RUN /install/build_and_install_python.sh "3.5.9" -RUN /install/build_and_install_python.sh "3.6.9" -RUN /install/build_and_install_python.sh "3.7.7" -RUN /install/build_and_install_python.sh "3.8.2" - -COPY install/install_pip_packages_by_version.sh /install/ -RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.6" -RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.7" -RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.8" - -ENV CLANG_VERSION="r42cab985fd95ba4f3f290e7bb26b93805edb447d" -COPY install/install_latest_clang.sh /install/ -RUN /install/install_latest_clang.sh diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.2-cudnn7-ubuntu18.04-manylinux2010-multipython b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.2-cudnn7-ubuntu18.04-manylinux2010-multipython deleted file mode 100644 index 5a86fb05d119b6..00000000000000 --- a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.2-cudnn7-ubuntu18.04-manylinux2010-multipython +++ /dev/null @@ -1,78 +0,0 @@ -# Dockerfile to build a manylinux 2010 compliant cross-compiler. -# -# Builds a devtoolset gcc/libstdc++ that targets manylinux 2010 compatible -# glibc (2.12) and system libstdc++ (4.4). -# -# To push a new version, run: -# $ docker build -f Dockerfile.rbe.cuda10.2-cudnn7-ubuntu18.04-manylinux2010-multipython \ -# --tag "gcr.io/tensorflow-testing/nosla-cuda10.2-cudnn7-ubuntu18.04-manylinux2010-multipython" . -# $ docker push gcr.io/tensorflow-testing/nosla-cuda10.2-cudnn7-ubuntu18.04-manylinux2010-multipython - -FROM gcr.io/tensorflow-testing/nosla-cuda10.0-cudnn7-ubuntu16.04-manylinux2010 - -RUN apt-get update -RUN apt-get remove -y --allow-change-held-packages cuda-license-10-0 libcudnn7 libcudnn8 libnccl2 libnccl-dev -RUN apt-get install -y --no-install-recommends --allow-downgrades --allow-change-held-packages \ - libcublas10 \ - libcublas-dev \ - cuda-nvml-dev-10.2 \ - cuda-command-line-tools-10.2 \ - cuda-libraries-dev-10.2 \ - cuda-minimal-build-10.2 \ - libcudnn7=7.6.5.32-1+cuda10.2 \ - libcudnn7-dev=7.6.5.32-1+cuda10.2 -RUN rm -f /usr/local/cuda -RUN ln -s /usr/local/cuda-10.2 /usr/local/cuda - -ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y \ - cpio \ - file \ - flex \ - g++ \ - make \ - rpm2cpio \ - unar \ - wget \ - && \ - rm -rf /var/lib/apt/lists/* - -# Copy and run the install scripts. -ARG DEBIAN_FRONTEND=noninteractive - -COPY install/install_bootstrap_deb_packages.sh /install/ -RUN /install/install_bootstrap_deb_packages.sh - -COPY install/install_deb_packages.sh /install/ -RUN /install/install_deb_packages.sh - -# Install additional packages needed for this image: -# - dependencies to build Python from source -# - patchelf, as it is required by auditwheel -RUN apt-get update && apt-get install -y \ - libbz2-dev \ - libffi-dev \ - libgdbm-dev \ - libncurses5-dev \ - libnss3-dev \ - libreadline-dev \ - patchelf \ - && \ - rm -rf /var/lib/apt/lists/* - -COPY install/install_bazel.sh /install/ -RUN /install/install_bazel.sh - -COPY install/build_and_install_python.sh /install/ -RUN /install/build_and_install_python.sh "3.7.7" -RUN /install/build_and_install_python.sh "3.8.2" -RUN /install/build_and_install_python.sh "3.9.0" - -COPY install/install_pip_packages_by_version.sh /install/ -RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.7" -RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.8" -RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.9" - -ENV CLANG_VERSION="r42cab985fd95ba4f3f290e7bb26b93805edb447d" -COPY install/install_latest_clang.sh /install/ -RUN /install/install_latest_clang.sh diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda11.0-cudnn8-ubuntu18.04-manylinux2010-multipython b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda11.0-cudnn8-ubuntu18.04-manylinux2010-multipython deleted file mode 100644 index 3f90ac008459fc..00000000000000 --- a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda11.0-cudnn8-ubuntu18.04-manylinux2010-multipython +++ /dev/null @@ -1,93 +0,0 @@ -# Dockerfile to build a manylinux 2010 compliant cross-compiler. -# -# Builds a devtoolset gcc/libstdc++ that targets manylinux 2010 compatible -# glibc (2.12) and system libstdc++ (4.4). -# -# To push a new version, run: -# $ docker build -f Dockerfile.rbe.cuda11.0-cudnn8-ubuntu18.04-manylinux2010-multipython \ -# --tag "gcr.io/tensorflow-testing/nosla-cuda11.0-cudnn8-ubuntu18.04-manylinux2010-multipython" . -# $ docker push gcr.io/tensorflow-testing/nosla-cuda11.0-cudnn8-ubuntu18.04-manylinux2010-multipython - -FROM nvidia/cuda:11.0-cudnn8-devel-ubuntu18.04 as devtoolset - -ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y \ - cpio \ - file \ - flex \ - g++ \ - make \ - patch \ - rpm2cpio \ - unar \ - wget \ - xz-utils \ - && \ - rm -rf /var/lib/apt/lists/* - -ADD devtoolset/fixlinks.sh fixlinks.sh -ADD devtoolset/build_devtoolset.sh build_devtoolset.sh -ADD devtoolset/rpm-patch.sh rpm-patch.sh - -# Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-7 in /dt7. -RUN /build_devtoolset.sh devtoolset-7 /dt7 -# Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-8 in /dt8. -RUN /build_devtoolset.sh devtoolset-8 /dt8 - -# TODO(klimek): Split up into two different docker images. -FROM nvidia/cuda:11.0-cudnn8-devel-ubuntu18.04 -COPY --from=devtoolset /dt7 /dt7 -COPY --from=devtoolset /dt8 /dt8 - -# Install TensorRT. -RUN echo \ - deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 / \ - > /etc/apt/sources.list.d/nvidia-ml.list \ - && \ - apt-get update && apt-get install -y \ - libnvinfer-dev=7.1.3-1+cuda11.0 \ - libnvinfer7=7.1.3-1+cuda11.0 \ - libnvinfer-plugin-dev=7.1.3-1+cuda11.0 \ - libnvinfer-plugin7=7.1.3-1+cuda11.0 \ - && \ - rm -rf /var/lib/apt/lists/* - -# Copy and run the install scripts. -ARG DEBIAN_FRONTEND=noninteractive - -COPY install/install_bootstrap_deb_packages.sh /install/ -RUN /install/install_bootstrap_deb_packages.sh - -COPY install/install_deb_packages.sh /install/ -RUN /install/install_deb_packages.sh - -# Install additional packages needed for this image: -# - dependencies to build Python from source -# - patchelf, as it is required by auditwheel -RUN apt-get update && apt-get install -y \ - libbz2-dev \ - libffi-dev \ - libgdbm-dev \ - libncurses5-dev \ - libnss3-dev \ - libreadline-dev \ - patchelf \ - && \ - rm -rf /var/lib/apt/lists/* - -COPY install/install_bazel.sh /install/ -RUN /install/install_bazel.sh - -COPY install/build_and_install_python.sh /install/ -RUN /install/build_and_install_python.sh "3.6.9" -RUN /install/build_and_install_python.sh "3.7.7" -RUN /install/build_and_install_python.sh "3.8.2" - -COPY install/install_pip_packages_by_version.sh /install/ -RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.6" -RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.7" -RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.8" - -ENV CLANG_VERSION="r7f6f9f4cf966c78a315d15d6e913c43cfa45c47c" -COPY install/install_latest_clang.sh /install/ -RUN /install/install_latest_clang.sh diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda11.2-cudnn8.1-ubuntu20.04-manylinux2014-multipython b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda11.2-cudnn8.1-ubuntu20.04-manylinux2014-multipython deleted file mode 100644 index cb149c9d82ba21..00000000000000 --- a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda11.2-cudnn8.1-ubuntu20.04-manylinux2014-multipython +++ /dev/null @@ -1,105 +0,0 @@ -# Dockerfile to build a manylinxu2010/manylinux 2014 compliant cross-compiler. -# -# Builds a devtoolset-7 environment with manylinux2010 compatible glibc (2.12) and -# libstdc++ (4.4) in /dt7. -# -# Builds a devtoolset-9 environment with manylinux2014 compatible glibc (2.17) and -# libstdc++ (4.8) in /dt9. -# -# To push a new version, run: -# $ docker build -f Dockerfile.rbe.cuda11.2-cudnn8.1-ubuntu20.04-manylinux2014-multipython \ -# --tag "gcr.io/tensorflow-testing/nosla-cuda11.2-cudnn8.1-ubuntu20.04-manylinux2014-multipython" . -# $ docker push gcr.io/tensorflow-testing/nosla-cuda11.2-cudnn8.1-ubuntu20.04-manylinux2014-multipython - -FROM nvidia/cuda:11.2.1-cudnn8-devel-ubuntu20.04 as devtoolset - -ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y \ - cpio \ - file \ - flex \ - g++ \ - make \ - patch \ - rpm2cpio \ - unar \ - wget \ - xz-utils \ - && \ - rm -rf /var/lib/apt/lists/* - -ADD devtoolset/fixlinks.sh fixlinks.sh -ADD devtoolset/build_devtoolset.sh build_devtoolset.sh -ADD devtoolset/rpm-patch.sh rpm-patch.sh - -# Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-7 in /dt7. -RUN /build_devtoolset.sh devtoolset-7 /dt7 -# Set up a sysroot for glibc 2.17 / libstdc++ 4.8 / devtoolset-9 in /dt9. -RUN /build_devtoolset.sh devtoolset-9 /dt9 - -# TODO(klimek): Split up into two different docker images. -FROM nvidia/cuda:11.2.1-cudnn8-devel-ubuntu20.04 -COPY --from=devtoolset /dt7 /dt7 -COPY --from=devtoolset /dt9 /dt9 - -# Install TensorRT. -RUN echo \ - deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 / \ - > /etc/apt/sources.list.d/nvidia-ml.list \ - && \ - apt-key adv --keyserver keyserver.ubuntu.com --recv-keys F60F4B3D7FA2AF80 && \ - apt-get update && apt-get install -y \ - libnvinfer-dev=7.2.2-1+cuda11.1 \ - libnvinfer7=7.2.2-1+cuda11.1 \ - libnvinfer-plugin-dev=7.2.2-1+cuda11.1 \ - libnvinfer-plugin7=7.2.2-1+cuda11.1 \ - && \ - rm -rf /var/lib/apt/lists/* - -# Copy and run the install scripts. -ARG DEBIAN_FRONTEND=noninteractive - -COPY install/install_bootstrap_deb_packages.sh /install/ -RUN /install/install_bootstrap_deb_packages.sh - -COPY install/install_deb_packages.sh /install/ -RUN /install/install_deb_packages.sh - -# Install additional packages needed for this image: -# - dependencies to build Python from source -# - patchelf, as it is required by auditwheel -RUN apt-get update && apt-get install -y \ - libbz2-dev \ - libffi-dev \ - libgdbm-dev \ - libncurses5-dev \ - libnss3-dev \ - libreadline-dev \ - libsqlite3-dev \ - patchelf \ - && \ - rm -rf /var/lib/apt/lists/* - -COPY install/install_bazel.sh /install/ -RUN /install/install_bazel.sh - -COPY install/build_and_install_python.sh /install/ -RUN /install/build_and_install_python.sh "3.7.7" -RUN /install/build_and_install_python.sh "3.8.2" -RUN /install/build_and_install_python.sh "3.9.4" -RUN /install/build_and_install_python.sh "3.10.0" - -COPY install/install_pip_packages_by_version.sh /install/ -RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.7" -RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.8" -RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.9" -RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.10" - -ENV CLANG_VERSION="rf2b94bd7eaa83d853dc7568fac87b1f8bf4ddec6" -COPY install/install_latest_clang.sh /install/ -RUN /install/install_latest_clang.sh - -# TensorRT 7 for CUDA 11.1 is compatible with CUDA 11.2, but requires -# libnvrtc.so.11.1. See https://github.com/NVIDIA/TensorRT/issues/1064. -# TODO(b/187962120): Remove when upgrading to TensorRT 8. -ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/cuda-11.1/lib64" diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.gpu b/tensorflow/tools/ci_build/Dockerfile.rbe.gpu deleted file mode 100644 index c4912a65b65d61..00000000000000 --- a/tensorflow/tools/ci_build/Dockerfile.rbe.gpu +++ /dev/null @@ -1,26 +0,0 @@ -FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu16.04 - -LABEL maintainer="Nick Lopez " - -# In the Ubuntu 16.04 images, cudnn is placed in system paths. Move them to -# /usr/local/cuda -RUN cp -P /usr/include/cudnn.h /usr/local/cuda/include -RUN cp -P /usr/lib/x86_64-linux-gnu/libcudnn* /usr/local/cuda/lib64 - -# Copy and run the install scripts. -COPY install/*.sh /install/ -ARG DEBIAN_FRONTEND=noninteractive -RUN /install/install_bootstrap_deb_packages.sh -RUN add-apt-repository -y ppa:openjdk-r/ppa && \ - add-apt-repository -y ppa:george-edison55/cmake-3.x -RUN /install/install_deb_packages.sh -RUN /install/install_pip_packages.sh -RUN /install/install_golang.sh - -# Install clang from pre-built package -RUN cd /tmp && \ - wget https://storage.googleapis.com/clang-builds-stable/clang-ubuntu16_04/clang_r337145.tar.gz && \ - echo "ab98c63eb09c04112cc992bc95ebc0dcea8c5e9d0760438789be2896cdc69ff8 clang_r337145.tar.gz" | sha256sum -c && \ - tar -C /usr/local -xf clang_r323528.tar.gz && \ - rm clang_r337145.tar.gz - diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu18.04-manylinux2010-multipython b/tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu18.04-manylinux2010-multipython deleted file mode 100644 index e9974b05b3cab8..00000000000000 --- a/tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu18.04-manylinux2010-multipython +++ /dev/null @@ -1,82 +0,0 @@ -# Dockerfile for ROCm RBE builds. -# -# To push a new version, run: -# $ docker build -f Dockerfile.local-toolchain-ubuntu18.04-manylinux2010 \ -# --tag "local-toolchain-ubuntu18.04-manylinux2010" . -# $ docker build -f Dockerfile.rbe.rocm-ubuntu18.04-manylinux2010-multipython \ -# --tag "gcr.io/tensorflow-testing/nosla-rocm-ubuntu18.04-manylinux2010-multipython" . -# $ docker push gcr.io/tensorflow-testing/nosla-rocm-ubuntu18.04-manylinux2010-multipython - -FROM ubuntu:18.04 -COPY --from=local-toolchain-ubuntu18.04-manylinux2010 /dt7 /dt7 - -ARG DEBIAN_FRONTEND=noninteractive - -# Install ROCm packages -RUN apt-get update && apt-get install -y --no-install-recommends \ - curl libnuma-dev gnupg sudo libelf1 build-essential \ - && curl -k -sL https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - \ - && printf "deb [arch=amd64] http://repo.radeon.com/rocm/apt/5.0/ ubuntu main" | tee /etc/apt/sources.list.d/rocm.list \ - && apt-get update && apt-get install -y --no-install-recommends \ - rocm-dev rocm-libs rccl \ - && apt-get clean && rm -rf /var/lib/apt/lists/* - -# Set ROCm environment variables and paths. -# We use /opt/rocm without version suffix so that the toolchain configuration -# for builtin headers doesn't need to be adapted -ARG ROCM_PATH=/opt/rocm -ENV HCC_HOME=$ROCM_PATH/hcc -ENV HIP_PATH=$ROCM_PATH/hip -ENV OPENCL_ROOT=$ROCM_PATH/opencl -ENV PATH="$ROCM_PATH/bin:${PATH}" -ENV PATH="$HCC_HOME/bin:$HIP_PATH/bin:${PATH}" -ENV PATH="$OPENCL_ROOT/bin:${PATH}" - -# Set target file to help determine which device(s) to build for -RUN bash -c "ls -al /opt/roc*" -RUN bash -c "echo -e 'gfx900\ngfx906\ngfx908' > $ROCM_PATH/bin/target.lst" - -# Copy and run the install scripts. -COPY install/install_bootstrap_deb_packages.sh /install/ -RUN /install/install_bootstrap_deb_packages.sh - -COPY install/install_deb_packages.sh /install/ -RUN /install/install_deb_packages.sh - -# Install additional packages needed for this image: -# - bsdmainutils (hexdump) for MLIR generated GPU kernels -# - dependencies to build Python from source -# - patchelf, as it is required by auditwheel -RUN apt-get update && apt-get install -y \ - bsdmainutils \ - libbz2-dev \ - libffi-dev \ - libgdbm-dev \ - libncurses5-dev \ - libnss3-dev \ - libreadline-dev \ - patchelf \ - && \ - rm -rf /var/lib/apt/lists/* - -# Install openjdk 11 -RUN yes "" | add-apt-repository ppa:openjdk-r/ppa -RUN apt-get update && apt-get install -y openjdk-11-jdk && \ - update-alternatives --auto java - -COPY install/install_bazel.sh /install/ -RUN /install/install_bazel.sh - -COPY install/build_and_install_python.sh /install/ -RUN /install/build_and_install_python.sh "3.7.7" -RUN /install/build_and_install_python.sh "3.8.2" -RUN /install/build_and_install_python.sh "3.9.4" - -COPY install/install_pip_packages_by_version.sh /install/ -RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.7" -RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.8" -RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.9" - -ENV CLANG_VERSION="r42cab985fd95ba4f3f290e7bb26b93805edb447d" -COPY install/install_latest_clang.sh /install/ -RUN /install/install_latest_clang.sh diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu20.04-manylinux2014-multipython b/tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu20.04-manylinux2014-multipython deleted file mode 100644 index 32834ccac2a3af..00000000000000 --- a/tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu20.04-manylinux2014-multipython +++ /dev/null @@ -1,86 +0,0 @@ -# Dockerfile for ROCm RBE builds. -# -# To push a new version, run: -# $ docker build -f Dockerfile.local-toolchain-ubuntu20.04-manylinux2014 \ -# --tag "local-toolchain-ubuntu20.04-manylinux2014" . -# $ docker build -f Dockerfile.rbe.rocm-ubuntu20.04-manylinux2014-multipython \ -# --tag "gcr.io/tensorflow-testing/nosla-rocm-ubuntu20.04-manylinux2014-multipython" . -# $ docker push gcr.io/tensorflow-testing/nosla-rocm-ubuntu20.04-manylinux2014-multipython - -FROM ubuntu:20.04 -COPY --from=local-toolchain-ubuntu20.04-manylinux2014 /dt7 /dt7 -COPY --from=local-toolchain-ubuntu20.04-manylinux2014 /dt9 /dt9 - -ARG DEBIAN_FRONTEND=noninteractive - -# Install ROCm packages -RUN apt-get update && apt-get install -y --no-install-recommends \ - curl libnuma-dev gnupg sudo libelf1 build-essential \ - && curl -k -sL https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - \ - && printf "deb [arch=amd64] http://repo.radeon.com/rocm/apt/5.3/ ubuntu main" \ - | tee /etc/apt/sources.list.d/rocm.list \ - && apt-get update && apt-get install -y \ - rocm-dev rocm-libs rccl \ - && apt-get clean && rm -rf /var/lib/apt/lists/* - -# Set ROCm environment variables and paths. -# We use /opt/rocm without version suffix so that the toolchain configuration -# for builtin headers doesn't need to be adapted -ARG ROCM_PATH=/opt/rocm -ENV HCC_HOME=$ROCM_PATH/hcc -ENV HIP_PATH=$ROCM_PATH/hip -ENV OPENCL_ROOT=$ROCM_PATH/opencl -ENV PATH="$ROCM_PATH/bin:${PATH}" -ENV PATH="$HCC_HOME/bin:$HIP_PATH/bin:${PATH}" -ENV PATH="$OPENCL_ROOT/bin:${PATH}" - -# Set target file to help determine which device(s) to build for -RUN bash -c "ls -al /opt/roc*" -RUN bash -c "echo -e 'gfx900\ngfx906\ngfx908' > $ROCM_PATH/bin/target.lst" - -# Copy and run the install scripts. -COPY install/install_bootstrap_deb_packages.sh /install/ -RUN /install/install_bootstrap_deb_packages.sh - -COPY install/install_deb_packages.sh /install/ -RUN /install/install_deb_packages.sh - -# Install additional packages needed for this image: -# - dependencies to build Python from source -# - patchelf, as it is required by auditwheel -RUN apt-get update && apt-get install -y \ - libbz2-dev \ - libffi-dev \ - libgdbm-dev \ - libncurses5-dev \ - libnss3-dev \ - libreadline-dev \ - patchelf \ - && \ - rm -rf /var/lib/apt/lists/* - -# Install openjdk 11 -RUN yes "" | add-apt-repository ppa:openjdk-r/ppa -RUN apt-get update && apt-get install -y openjdk-11-jdk && \ - update-alternatives --auto java - -COPY install/install_bazel.sh /install/ -RUN /install/install_bazel.sh - -COPY install/build_and_install_python.sh /install/ -RUN /install/build_and_install_python.sh "3.7.7" -RUN /install/build_and_install_python.sh "3.8.2" -RUN /install/build_and_install_python.sh "3.9.4" -RUN /install/build_and_install_python.sh "3.10.0" -RUN /install/build_and_install_python.sh "3.11.0" - -COPY install/install_pip_packages_by_version.sh /install/ -RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.7" "nojax" -RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.8" "nojax" -RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.9" "nojax" -RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.10" "nojax" -RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.11" "nojax" - -ENV CLANG_VERSION="rf2b94bd7eaa83d853dc7568fac87b1f8bf4ddec6" -COPY install/install_latest_clang.sh /install/ -RUN /install/install_latest_clang.sh diff --git a/tensorflow/tools/ci_build/install/install_latest_clang.sh b/tensorflow/tools/ci_build/install/install_latest_clang.sh deleted file mode 100755 index 5eed5d2141fcf0..00000000000000 --- a/tensorflow/tools/ci_build/install/install_latest_clang.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -eu -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -# Contact c-toolchain-team@ for new releases or new ubuntu versions. -DIST="$(grep "DISTRIB_RELEASE" /etc/lsb-release |sed 's,.*=,,; s,\.,_,')" -INSTALL_DIR="/clang_${CLANG_VERSION}" -STORAGE="https://storage.googleapis.com/clang-builds-stable" -mkdir -p "${INSTALL_DIR}" -cd "${INSTALL_DIR}" -wget "${STORAGE}/clang-ubuntu${DIST}/clang_${CLANG_VERSION}.tar.gz" -tar xvzf clang_${CLANG_VERSION}.tar.gz -rm clang_${CLANG_VERSION}.tar.gz diff --git a/tensorflow/tools/ci_build/linux/rocm/run_gpu_single.sh b/tensorflow/tools/ci_build/linux/rocm/run_gpu_single.sh index 36c8dbd6c29948..ca6e0b612d5a47 100755 --- a/tensorflow/tools/ci_build/linux/rocm/run_gpu_single.sh +++ b/tensorflow/tools/ci_build/linux/rocm/run_gpu_single.sh @@ -64,13 +64,31 @@ if [ ! -d /tf ];then mkdir /tf fi -# vvv TODO (rocm) weekly-sync-20251021 excluded tests +# vvv TODO (rocm) weekly-sync-20251224 excluded tests EXCLUDED_TESTS=( # //tensorflow/core/kernels:matmul_op_test_gpu Test/FusedMatMulWithBiasOpTest/1.MatMul* # //tensorflow/core/common_runtime:process_function_library_runtime_test_gpu ProcessFunctionLibraryRuntimeTest.MultiDevice_ResourceOutput_GPU + + # //tensorflow/compiler/tests:randomized_tests_seeded + # //tensorflow/compiler/tests:randomized_tests_mlir_seeded + OpTest.ScatterNd + + # //tensorflow/core/util/autotune_maps:autotune_serialize_test_gpu + AutotuneSerializeTest.Consistency + AutotuneSerializeTest.VersionControl + + # //tensorflow/python/kernel_tests/nn_ops:depthwise_conv_op_d9m_test + DepthwiseConv2DDeterministicTest.testBackwardDeterminismGPU + + # //tensorflow/python/kernel_tests/sparse_ops:sparse_ops_test + SparseFillEmptyRowsTest.testSparseFillEmptyRowsGradInvalidReverseIndexMap + + # //tensorflow/core/profiler/backends/gpu:device_tracer_test + DeviceTracerTest.StartTwoTracers + DeviceTracerTest.TraceToXSpace ) # Run bazel test command. Double test timeouts to avoid flakes. diff --git a/tensorflow/tools/ci_build/linux/rocm/run_xla.sh b/tensorflow/tools/ci_build/linux/rocm/run_xla.sh index ea43eddc474626..9bef5a24a46366 100755 --- a/tensorflow/tools/ci_build/linux/rocm/run_xla.sh +++ b/tensorflow/tools/ci_build/linux/rocm/run_xla.sh @@ -138,6 +138,73 @@ EXCLUDED_TESTS=( # @local_xla//xla/tests:multioutput_fusion_test_amdgpu_any MultiOutputFusionTest.MultiOutputReduceFusionMajorWithExtraOutput + + # vvv TODO (rocm) weekly-sync-20251224 excluded tests + # @local_xla//xla/service/gpu:gpu_compiler_test_amdgpu_any + PersistedAutotuningTest.SingleOperationGetsAutotuned + + # @local_xla//xla/backends/gpu/codegen/triton:support_test + BitcastOrReshapeTestSuite/BitcastOrReshapeTest.IsTritonSupportedBitcastOrReshape* + BitcastOrReshapeTestSuite/BitcastOrReshapeTest.IsTritonSupported0DBitcastOrReshape* + BitcastConvertSuite/BitcastConvertTest.BitcastConvertDisguisedAsBitcast* + UnaryElementwiseTestSuite/UnaryElementwiseTest.IsTritonSupportedUnaryElementwise* + ConvertTestSuite/ConvertTest.Convert* + BinaryElementwiseTestSuite/BinaryElementwiseTest.IsTritonSupportedBinaryElementwise* + TernaryElementwiseTestSuite/TernaryElementwiseTest.IsTritonSupportedTernaryElementwise* + ReductionComputationTestSuite/ReductionComputationTest.DifferentBinaryOps* + TransposeTestSuite/TransposeTest.LoadTranspose3D* + SliceTestSuite/SliceTest.ContinuousSlice* + BroadcastTestSuite/BroadcastTest.Broadcast* + ParameterTestSuite/ParameterTest.Parameter* + ConstantTestSuite/ConstantTest.ConstantEffectiveScalar* + DotTestSuite/DotTypesTest.Dot* + + # @local_xla//xla/backends/gpu/codegen/triton:support_legacy_test + DotTestTestSuite/DotTest.IsTritonSupportedExecutesCorrectlyForDot/f8e5m2_dot + + # @local_xla//xla/backends/gpu/profiler:kernel_name_tracer_test + KernelNameTracerTest.Create + KernelNameTracerTest.CaptureKernelNames + KernelNameTracerTest.CaptureKernelNamesFromCommandBufferThunk + + # @local_xla//xla/service/gpu/autotuning:gemm_fusion_autotuner_test + GemmFusionAutotunerTest.Int8FusedGemm256 + GemmFusionAutotunerLevelSweep/GemmFusionAutotunerLevelTest.Deviceless/0 + + # @local_xla//xla/service/gpu/tests:swap_conv_operands_test + SwapConvOperandsTest.LargePadding + SwapConvOperandsTest.SmallPadding + SwapConvOperandsTest.DoesNotLower + + # @local_xla//xla/service/gpu/tests:gpu_triton_custom_call_test + GpuIrEmitterUnnestedTest.CanNotEmitTritonCustomCallOnPreAmpereGpu + + # @local_xla//xla/tests:convolution_autotune_disabled_test + Transposed2DConvHloTest/Transposed2DConvHloTest.Simple* + ConvolveWithAndWithoutCanonicalization_Instantiation/ConvolveWithAndWithoutCanonicalization.Convolve2D_NoSpatialDims* + ConvolutionHloTest.ConvolveBackwardInput + ConvolutionHloTest.TestConv0D + ConvolutionHloTest.TestConv2DF16 + ConvolutionHloTest.SwappedOperandConvolveWithStride + ConvolutionHloTest.TestFusedConv3D + ConvolutionHloTest.SwappedOperandConvolve + ConvolutionHloTest.TestBooleanInput + ConvolutionHloTest.SwappedOperandConvolve2 + ConvolutionTest.Convolve3D_1x4x2x3x3_2x2x2x3x3_Valid + ConvolutionTest.ConvolveF32BackwardInputGroupedConvolution + Convolve_1x1x4x4_1x1x2x2_Valid/2.Types + Convolve_1x1x4x4_1x1x2x2_Valid/1.Types + Convolve_1x1x4x4_1x1x2x2_Same/1.Types + Convolve_1x1x4x4_1x1x2x2_Same/2.Types + Convolve_1x1x4x4_1x1x3x3_Same/1.Types + Convolve_1x1x4x4_1x1x3x3_Same/2.Types + Convolve2D* + + # @local_xla//xla/tests:convolution_1d_autotune_disabled_test + ConvolutionTest.Convolve1D* + Convolve1D_1x2x5_1x2x2* + Convolve1D1WindowTest_Instantiation/Convolve1D1WindowTestFloat* + Convolve1D1WindowTest_Instantiation/Convolve1D1WindowTestHalf* ) bazel --bazelrc=tensorflow/tools/tf_sig_build_dockerfiles/devel.usertools/rocm.bazelrc test \ diff --git a/tensorflow/tools/gcs_test/Dockerfile b/tensorflow/tools/gcs_test/Dockerfile index b5fbef19051f8a..19958cb6478765 100644 --- a/tensorflow/tools/gcs_test/Dockerfile +++ b/tensorflow/tools/gcs_test/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:24.04@sha256:66460d557b25769b102175144d538d88219c077c678a49af4afca6fbfc1b5252 +FROM ubuntu:24.04@sha256:c35e29c9450151419d9448b0fd75374fec4fff364a27f176fb458d472dfc9e54 LABEL maintainer="Shanqing Cai " diff --git a/tensorflow/tools/graph_transforms/quantize_nodes.cc b/tensorflow/tools/graph_transforms/quantize_nodes.cc index 269137c997d447..3d8eabc8361f6b 100644 --- a/tensorflow/tools/graph_transforms/quantize_nodes.cc +++ b/tensorflow/tools/graph_transforms/quantize_nodes.cc @@ -32,17 +32,17 @@ namespace graph_transforms { // into the quantized equivalent. struct QuantizedOpInfo { // The name of the float op. - string float_name; + std::string float_name; // Which attributes to copy directly over. - std::vector attrs_to_copy; + std::vector attrs_to_copy; // Extra data type attributes we need to set. - std::vector> dtypes_to_set; + std::vector> dtypes_to_set; // What depth of inputs the op can read in. DataType input_bit_depth; // The depth of the op's quantized outputs. DataType output_bit_depth; // Which inputs (e.g. shapes) aren't involved in the quantization process. - std::set unquantized_inputs; + std::set unquantized_inputs; // How the outputs are arranged, either // [input0, input1, min0, max0, min1, max1] for contiguous, or // [input0, input1, min0, min1, max0, max1] for separate. @@ -145,12 +145,12 @@ const std::vector& GetQuantizedOpList() { namespace { // Replaces invalid characters in input names to get a unique node name. -string UniqueNodeNameFromInput(const string& input_name) { - string prefix; - string node_name; - string suffix; +std::string UniqueNodeNameFromInput(const std::string& input_name) { + std::string prefix; + std::string node_name; + std::string suffix; NodeNamePartsFromInput(input_name, &prefix, &node_name, &suffix); - string result; + std::string result; if (prefix == "^") { result += "__hat__"; } @@ -163,9 +163,10 @@ string UniqueNodeNameFromInput(const string& input_name) { // Pulls two float values from the named parameters, with a lot of checking. absl::Status ExtractRangeFromParams(const TransformFuncContext& context, - const string& min_name, - const string& max_name, float* min_value, - float* max_value, bool* has_range) { + const std::string& min_name, + const std::string& max_name, + float* min_value, float* max_value, + bool* has_range) { // See if we've been given quantized inputs with a known range. const bool has_min = (context.params.count(min_name) != 0); const bool has_max = (context.params.count(max_name) != 0); @@ -193,17 +194,17 @@ absl::Status MergeDuplicateNodes(const GraphDef& input_graph_def, const TransformFuncContext& context, GraphDef* output_graph_def) { // Make sure we can look up inputs and outputs quickly. - std::set input_names(context.input_names.begin(), - context.input_names.end()); - std::set output_names(context.output_names.begin(), - context.output_names.end()); + std::set input_names(context.input_names.begin(), + context.input_names.end()); + std::set output_names(context.output_names.begin(), + context.output_names.end()); GraphDef current_graph_def = input_graph_def; // Keep running the merging until no more duplicates are found. bool any_duplicates_found; do { any_duplicates_found = false; // First arrange all of the nodes by a hash of their contents. - std::map> hashed_nodes; + std::map> hashed_nodes; for (const NodeDef& node : current_graph_def.node()) { NodeDef nameless_node = node; // The name matters if it's being used as an input or output node, @@ -211,14 +212,14 @@ absl::Status MergeDuplicateNodes(const GraphDef& input_graph_def, if (!input_names.count(node.name()) && !output_names.count(node.name())) { nameless_node.set_name(""); } - const uint64 hash = HashNodeDef(nameless_node); + const uint64_t hash = HashNodeDef(nameless_node); hashed_nodes[hash].push_back(&node); } // If we have multiple nodes with the same hash, then we know they're // duplicates and can be removed, unless they're stateful. - std::map inputs_to_rename; + std::map inputs_to_rename; GraphDef merged_graph_def; - for (const std::pair>& + for (const std::pair>& hashed_node_info : hashed_nodes) { const std::vector& hash_node_list = hashed_node_info.second; @@ -229,7 +230,7 @@ absl::Status MergeDuplicateNodes(const GraphDef& input_graph_def, OpRegistry::Global()->LookUpOpDef(current_node->op(), &op_def)); const bool is_duplicate = ((!op_def->is_stateful()) && (i > 0)); if (is_duplicate) { - const string original_name = hash_node_list[0]->name(); + const std::string original_name = hash_node_list[0]->name(); inputs_to_rename[current_node->name() + ":*"] = original_name; any_duplicates_found = true; } else { @@ -241,7 +242,7 @@ absl::Status MergeDuplicateNodes(const GraphDef& input_graph_def, // Update the graph so that any nodes that referred to removed inputs now // pull from the remaining duplicate. TF_RETURN_IF_ERROR(RenameNodeInputs(merged_graph_def, inputs_to_rename, - std::unordered_set(), + std::unordered_set(), ¤t_graph_def)); } while (any_duplicates_found); @@ -261,11 +262,11 @@ absl::Status MergeDuplicateNodes(const GraphDef& input_graph_def, absl::Status RemoveRedundantQuantizations(const GraphDef& input_graph_def, const TransformFuncContext& context, GraphDef* output_graph_def) { - std::set graph_outputs; - for (const string& output_name : context.output_names) { + std::set graph_outputs; + for (const std::string& output_name : context.output_names) { graph_outputs.insert(NodeNameFromInput(output_name)); } - std::map inputs_to_rename; + std::map inputs_to_rename; GraphDef replaced_graph_def; TF_RETURN_IF_ERROR(ReplaceMatchingOpTypes( input_graph_def, // clang-format off @@ -276,10 +277,10 @@ absl::Status RemoveRedundantQuantizations(const GraphDef& input_graph_def, {"Max"}, } }, // clang-format on - [&inputs_to_rename, &graph_outputs](const NodeMatch& match, - const std::set& input_nodes, - const std::set& output_nodes, - std::vector* new_nodes) { + [&inputs_to_rename, &graph_outputs]( + const NodeMatch& match, const std::set& input_nodes, + const std::set& output_nodes, + std::vector* new_nodes) { const NodeDef& quantize_node = match.node; const NodeDef& dequantize_node = match.inputs[0].node; inputs_to_rename[quantize_node.name() + ":0"] = @@ -302,7 +303,7 @@ absl::Status RemoveRedundantQuantizations(const GraphDef& input_graph_def, {true}, &replaced_graph_def)); return RenameNodeInputs(replaced_graph_def, inputs_to_rename, - std::unordered_set(), output_graph_def); + std::unordered_set(), output_graph_def); } // If the user has passed in the input_min and input_max args, then we need to @@ -321,15 +322,15 @@ absl::Status QuantizePlaceholders(const GraphDef& input_graph_def, *output_graph_def = input_graph_def; return absl::OkStatus(); } - std::map inputs_to_rename_first_pass; - std::map inputs_to_rename_second_pass; + std::map inputs_to_rename_first_pass; + std::map inputs_to_rename_second_pass; GraphDef placeholder_graph_def; placeholder_graph_def.Clear(); for (const NodeDef& node : input_graph_def.node()) { if (node.op() != "Placeholder") { *(placeholder_graph_def.mutable_node()->Add()) = node; } else { - string namespace_prefix = node.name() + "_eightbit"; + std::string namespace_prefix = node.name() + "_eightbit"; NodeDef quantized_placeholder; quantized_placeholder = node; @@ -354,7 +355,7 @@ absl::Status QuantizePlaceholders(const GraphDef& input_graph_def, SetNodeTensorAttr("value", max_tensor, &max_node); *(placeholder_graph_def.mutable_node()->Add()) = max_node; - const string rename_suffix = "__RENAMED_PLACEHOLDER__"; + const std::string rename_suffix = "__RENAMED_PLACEHOLDER__"; NodeDef dequantize_node; dequantize_node.set_op("Dequantize"); dequantize_node.set_name(namespace_prefix + "/dequantize"); @@ -375,12 +376,12 @@ absl::Status QuantizePlaceholders(const GraphDef& input_graph_def, } GraphDef first_pass_graph_def; - TF_RETURN_IF_ERROR( - RenameNodeInputs(placeholder_graph_def, inputs_to_rename_first_pass, - std::unordered_set(), &first_pass_graph_def)); + TF_RETURN_IF_ERROR(RenameNodeInputs( + placeholder_graph_def, inputs_to_rename_first_pass, + std::unordered_set(), &first_pass_graph_def)); TF_RETURN_IF_ERROR( RenameNodeInputs(first_pass_graph_def, inputs_to_rename_second_pass, - std::unordered_set(), output_graph_def)); + std::unordered_set(), output_graph_def)); return absl::OkStatus(); } @@ -400,15 +401,15 @@ absl::Status ConvertFakeQuantsToRequantize(const GraphDef& input_graph_def, {"Const"}, } }, // clang-format on - [](const NodeMatch& match, const std::set& input_nodes, - const std::set& output_nodes, + [](const NodeMatch& match, const std::set& input_nodes, + const std::set& output_nodes, std::vector* new_nodes) { const NodeDef& fake_quant_node = match.node; const NodeDef& original_op_node = match.inputs[0].node; const NodeDef& fake_quant_min_node = match.inputs[1].node; const NodeDef& fake_quant_max_node = match.inputs[2].node; - string namespace_prefix = fake_quant_node.name() + "_eightbit"; + std::string namespace_prefix = fake_quant_node.name() + "_eightbit"; new_nodes->push_back(original_op_node); new_nodes->push_back(fake_quant_min_node); @@ -494,8 +495,8 @@ absl::Status MergeAdjacentRequantizes(const GraphDef& input_graph_def, {"Const"}, } }, // clang-format on - [](const NodeMatch& match, const std::set& input_nodes, - const std::set& output_nodes, + [](const NodeMatch& match, const std::set& input_nodes, + const std::set& output_nodes, std::vector* new_nodes) { const NodeDef& fake_requantize_node = match.node; const NodeDef& original_op_node = @@ -544,8 +545,9 @@ absl::Status HoistFakeQuants(const GraphDef& input_graph_def, GraphDef hoisted_graph_def; TF_RETURN_IF_ERROR(ReplaceMatchingOpTypes( current_graph_def, pattern, - [depth](const NodeMatch& match, const std::set& input_nodes, - const std::set& output_nodes, + [depth](const NodeMatch& match, + const std::set& input_nodes, + const std::set& output_nodes, std::vector* new_nodes) { const NodeDef& fake_quant_node = match.node; const NodeDef& fake_quant_min_node = match.inputs[1].node; @@ -633,17 +635,17 @@ absl::Status QuantizeNodes(const GraphDef& input_graph_def, // between adjacent quantized ops, but a later pass removes these where it // can. - std::set ops_to_ignore; + std::set ops_to_ignore; if (context.params.count("ignore_op") > 0) { - for (const string& name : context.params.at("ignore_op")) { + for (const std::string& name : context.params.at("ignore_op")) { ops_to_ignore.insert(name); } } const std::vector& op_list = GetQuantizedOpList(); - string op_pattern; + std::string op_pattern; bool is_first = true; - std::map op_map; + std::map op_map; for (const QuantizedOpInfo& op_info : op_list) { if (ops_to_ignore.count(op_info.float_name) == 0) { absl::StrAppend(&op_pattern, is_first ? "" : "|", op_info.float_name); @@ -692,8 +694,8 @@ absl::Status QuantizeNodes(const GraphDef& input_graph_def, TF_RETURN_IF_ERROR(ReplaceMatchingOpTypes( converted_graph_def, {op_pattern}, [&op_map, fallback_min, fallback_max, has_fallback_range]( - const NodeMatch& match, const std::set& input_nodes, - const std::set& output_nodes, + const NodeMatch& match, const std::set& input_nodes, + const std::set& output_nodes, std::vector* new_nodes) { const NodeDef& float_node = match.node; const QuantizedOpInfo& op_info = op_map[float_node.op()]; @@ -728,18 +730,18 @@ absl::Status QuantizeNodes(const GraphDef& input_graph_def, return absl::OkStatus(); } - string namespace_prefix = float_node.name() + "_eightbit"; + std::string namespace_prefix = float_node.name() + "_eightbit"; // Quantize all of the inputs. - std::vector quantized_input_names; + std::vector quantized_input_names; for (int i = 0; i < float_node.input_size(); ++i) { // Skip any non-float inputs. if (op_info.unquantized_inputs.count(i)) { continue; } - const string& input_name = float_node.input(i); - string unique_input_name = + const std::string& input_name = float_node.input(i); + std::string unique_input_name = namespace_prefix + "/" + UniqueNodeNameFromInput(input_name); // Add some common constants we need for reshaping inputs. @@ -749,8 +751,9 @@ absl::Status QuantizeNodes(const GraphDef& input_graph_def, AddNodeInput("^" + NodeNameFromInput(input_name), &reshape_dims); SetNodeAttr("dtype", DT_INT32, &reshape_dims); Tensor reshape_dims_tensor(DT_INT32, {1}); - reshape_dims_tensor.flat()(0) = -1; - SetNodeTensorAttr("value", reshape_dims_tensor, &reshape_dims); + reshape_dims_tensor.flat()(0) = -1; + SetNodeTensorAttr("value", reshape_dims_tensor, + &reshape_dims); new_nodes->push_back(reshape_dims); NodeDef reduction_dims; @@ -759,9 +762,9 @@ absl::Status QuantizeNodes(const GraphDef& input_graph_def, AddNodeInput("^" + NodeNameFromInput(input_name), &reduction_dims); SetNodeAttr("dtype", DT_INT32, &reduction_dims); Tensor reduction_dims_tensor(DT_INT32, {1}); - reduction_dims_tensor.flat()(0) = 0; - SetNodeTensorAttr("value", reduction_dims_tensor, - &reduction_dims); + reduction_dims_tensor.flat()(0) = 0; + SetNodeTensorAttr("value", reduction_dims_tensor, + &reduction_dims); new_nodes->push_back(reduction_dims); NodeDef reshape_node; @@ -806,11 +809,11 @@ absl::Status QuantizeNodes(const GraphDef& input_graph_def, NodeDef quantized_main_node; quantized_main_node.set_op("Quantized" + float_node.op()); quantized_main_node.set_name(float_node.name() + "/eightbit"); - for (const string& attr_to_copy : op_info.attrs_to_copy) { + for (const std::string& attr_to_copy : op_info.attrs_to_copy) { CopyNodeAttr(float_node, attr_to_copy, attr_to_copy, &quantized_main_node); } - for (const std::pair& dtype_to_set : + for (const std::pair& dtype_to_set : op_info.dtypes_to_set) { SetNodeAttr(dtype_to_set.first, dtype_to_set.second, &quantized_main_node); @@ -820,32 +823,35 @@ absl::Status QuantizeNodes(const GraphDef& input_graph_def, if (op_info.unquantized_inputs.count(i)) { AddNodeInput(float_node.input(i), &quantized_main_node); } else { - const string& quantized_input_name = + const std::string& quantized_input_name = quantized_input_names[quantized_input_index]; AddNodeInput(quantized_input_name + ":0", &quantized_main_node); ++quantized_input_index; } } if (op_info.min_max_order == QuantizedOpInfo::CONTIGUOUS_MIN_MAX) { - for (const string& quantized_input_name : quantized_input_names) { + for (const std::string& quantized_input_name : + quantized_input_names) { AddNodeInput(quantized_input_name + ":1", &quantized_main_node); AddNodeInput(quantized_input_name + ":2", &quantized_main_node); } } else { - for (const string& quantized_input_name : quantized_input_names) { + for (const std::string& quantized_input_name : + quantized_input_names) { AddNodeInput(quantized_input_name + ":1", &quantized_main_node); } - for (const string& quantized_input_name : quantized_input_names) { + for (const std::string& quantized_input_name : + quantized_input_names) { AddNodeInput(quantized_input_name + ":2", &quantized_main_node); } } new_nodes->push_back(quantized_main_node); - string eight_bit_node_name; + std::string eight_bit_node_name; if (op_info.output_bit_depth == DT_QINT32) { // Shrink the range of the output down from 32 bits to 8. - string requantize_min_input; - string requantize_max_input; + std::string requantize_min_input; + std::string requantize_max_input; if (has_fallback_range) { // Use constant values for the min/max range if they were given. NodeDef fallback_min_node; diff --git a/tensorflow/tools/graph_transforms/remove_attribute.cc b/tensorflow/tools/graph_transforms/remove_attribute.cc index 6fca08585fb271..128672734f7c0b 100644 --- a/tensorflow/tools/graph_transforms/remove_attribute.cc +++ b/tensorflow/tools/graph_transforms/remove_attribute.cc @@ -36,7 +36,7 @@ absl::Status RemoveAttribute(const GraphDef& input_graph_def, "argument, e.g. remove_attribute(op_name=Mul, attribute_name=foo)"); } - string op_name; + std::string op_name; if (context.params.count("op_name")) { if (context.params.at("op_name").size() != 1) { return errors::InvalidArgument( @@ -48,7 +48,7 @@ absl::Status RemoveAttribute(const GraphDef& input_graph_def, op_name = "*"; } - const string attribute_name = context.params.at("attribute_name")[0]; + const std::string attribute_name = context.params.at("attribute_name")[0]; output_graph_def->Clear(); for (const NodeDef& node : input_graph_def.node()) { NodeDef* new_node = output_graph_def->mutable_node()->Add(); diff --git a/tensorflow/tools/mlpbtxt/frommlpbtxt.cc b/tensorflow/tools/mlpbtxt/frommlpbtxt.cc index dec8b6b542a8d0..2817d919dbc915 100644 --- a/tensorflow/tools/mlpbtxt/frommlpbtxt.cc +++ b/tensorflow/tools/mlpbtxt/frommlpbtxt.cc @@ -29,15 +29,15 @@ namespace tensorflow { namespace { int Run(int argc, char** argv) { - string FLAGS_in = ""; - string FLAGS_out = ""; + std::string FLAGS_in = ""; + std::string FLAGS_out = ""; std::vector flag_list = { Flag("in", &FLAGS_in, "Input multi-line proto text (.mlpbtxt) file name"), Flag("out", &FLAGS_out, "Output proto text (.pbtxt) file name")}; // Parse the command-line. - const string usage = Flags::Usage(argv[0], flag_list); + const std::string usage = Flags::Usage(argv[0], flag_list); const bool parse_ok = Flags::Parse(&argc, argv, flag_list); if (argc != 1 || !parse_ok) { printf("%s", usage.c_str()); @@ -47,7 +47,7 @@ int Run(int argc, char** argv) { port::InitMain(argv[0], &argc, &argv); // Read the input file --in. - string in_contents; + std::string in_contents; absl::Status s = ReadFileToString(Env::Default(), FLAGS_in, &in_contents); if (!s.ok()) { printf("Error reading file %s: %s\n", FLAGS_in.c_str(), @@ -56,7 +56,7 @@ int Run(int argc, char** argv) { } // Write the output file --out. - const string out_contents = PBTxtFromMultiline(in_contents); + const std::string out_contents = PBTxtFromMultiline(in_contents); s = WriteStringToFile(Env::Default(), FLAGS_out, out_contents); if (!s.ok()) { printf("Error writing file %s: %s\n", FLAGS_out.c_str(), diff --git a/tensorflow/tools/mlpbtxt/tomlpbtxt.cc b/tensorflow/tools/mlpbtxt/tomlpbtxt.cc index 552d4075619cd3..8c69f5047bb384 100644 --- a/tensorflow/tools/mlpbtxt/tomlpbtxt.cc +++ b/tensorflow/tools/mlpbtxt/tomlpbtxt.cc @@ -30,9 +30,9 @@ namespace tensorflow { namespace { int Run(int argc, char** argv) { - string FLAGS_in = ""; - string FLAGS_out = ""; - string FLAGS_fields = "description"; + std::string FLAGS_in = ""; + std::string FLAGS_out = ""; + std::string FLAGS_fields = "description"; std::vector flag_list = { Flag("in", &FLAGS_in, "Input proto text (.pbtxt) file name"), @@ -41,7 +41,7 @@ int Run(int argc, char** argv) { Flag("fields", &FLAGS_fields, "Comma-separated list of field names")}; // Parse the command-line. - const string usage = Flags::Usage(argv[0], flag_list); + const std::string usage = Flags::Usage(argv[0], flag_list); const bool parse_ok = Flags::Parse(&argc, argv, flag_list); if (argc != 1 || !parse_ok) { printf("%s", usage.c_str()); @@ -49,7 +49,7 @@ int Run(int argc, char** argv) { } // Parse the --fields option. - std::vector fields = + std::vector fields = str_util::Split(FLAGS_fields, ',', str_util::SkipEmpty()); if (fields.empty()) { printf("--fields must be non-empty.\n%s", usage.c_str()); @@ -59,7 +59,7 @@ int Run(int argc, char** argv) { port::InitMain(argv[0], &argc, &argv); // Read the input file --in. - string in_contents; + std::string in_contents; absl::Status s = ReadFileToString(Env::Default(), FLAGS_in, &in_contents); if (!s.ok()) { printf("Error reading file %s: %s\n", FLAGS_in.c_str(), @@ -68,7 +68,7 @@ int Run(int argc, char** argv) { } // Write the output file --out. - const string out_contents = PBTxtToMultiline(in_contents, fields); + const std::string out_contents = PBTxtToMultiline(in_contents, fields); s = WriteStringToFile(Env::Default(), FLAGS_out, out_contents); if (!s.ok()) { printf("Error writing file %s: %s\n", FLAGS_out.c_str(), diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 24135c42619d31..c0de3f391710e2 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -448,7 +448,7 @@ py_test( verify_manylinux_compliance_test( name = "manylinux_compliance_test", - aarch64_compliance_tag = "manylinux_2_17_aarch64", + aarch64_compliance_tag = "manylinux_2_27_aarch64", ppc64le_compliance_tag = "manylinux_2_17_ppc64le", test_tags = [ "manual", diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile index 8d35977d14a987..b9d06f956f6d2a 100644 --- a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile +++ b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile @@ -1,5 +1,5 @@ ################################################################################ -FROM ubuntu:22.04@sha256:09506232a8004baa32c47d68f1e5c307d648fdd59f5e7eaa42aaf87914100db3 as builder +FROM ubuntu:22.04@sha256:104ae83764a5119017b8e8d6218fa0832b09df65aae7d5a6de29a85d813da2fb as builder ################################################################################ # Install devtoolset build dependencies diff --git a/tensorflow/workspace0.bzl b/tensorflow/workspace0.bzl index 144e34d7460806..005d8552b79300 100644 --- a/tensorflow/workspace0.bzl +++ b/tensorflow/workspace0.bzl @@ -8,6 +8,7 @@ load("@build_bazel_rules_swift//swift:repositories.bzl", "swift_rules_dependenci load("@com_github_grpc_grpc//bazel:grpc_extra_deps.bzl", "grpc_extra_deps") load("@local_config_android//:android.bzl", "android_workspace") load("@rules_foreign_cc//foreign_cc:repositories.bzl", "rules_foreign_cc_dependencies") +load("//third_party:repo.bzl", "tf_http_archive", "tf_mirror_urls") load("//third_party/googleapis:repository_rules.bzl", "config_googleapis") def _tf_bind(): @@ -79,14 +80,13 @@ def workspace(): # Note: We add this to fix Kokoro builds. # The rules below call into `rules_proto` but the hash has changed and # Bazel refuses to continue. So, we add our own mirror. - http_archive( + tf_http_archive( name = "rules_proto", sha256 = "20b240eba17a36be4b0b22635aca63053913d5c1ee36e16be36499d167a2f533", strip_prefix = "rules_proto-11bf7c25e666dd7ddacbcd4d4c4a9de7a25175f8", - urls = [ - "https://storage.googleapis.com/mirror.tensorflow.org/github.com/bazelbuild/rules_proto/archive/11bf7c25e666dd7ddacbcd4d4c4a9de7a25175f8.tar.gz", + urls = tf_mirror_urls( "https://github.com/bazelbuild/rules_proto/archive/11bf7c25e666dd7ddacbcd4d4c4a9de7a25175f8.tar.gz", - ], + ), ) # Now, finally use the rules @@ -106,13 +106,13 @@ def workspace(): # Toolchains for ML projects hermetic builds. # Details: https://github.com/google-ml-infra/rules_ml_toolchain - http_archive( + tf_http_archive( name = "rules_ml_toolchain", - sha256 = "b1e5e306d8b1103e73b9b778dfc3a9e069d20664437a03246a235724962b5c94", - strip_prefix = "rules_ml_toolchain-484235be45e6843db962c45d08fe4b2b65a6a24c", - urls = [ - "https://github.com/google-ml-infra/rules_ml_toolchain/archive/484235be45e6843db962c45d08fe4b2b65a6a24c.tar.gz", - ], + sha256 = "1c2c530a054e9e8b3c811ec21ed8a687fc865bec3abbc8ff65beb829b1d67ae4", + strip_prefix = "rules_ml_toolchain-6734d2a174bf29e731d3f473743d1cc1a86100c3", + urls = tf_mirror_urls( + "https://github.com/google-ml-infra/rules_ml_toolchain/archive/6734d2a174bf29e731d3f473743d1cc1a86100c3.tar.gz", + ), ) # Alias so it can be loaded without assigning to a different symbol to prevent diff --git a/tensorflow/workspace1.bzl b/tensorflow/workspace1.bzl index 399ff8f7579a7d..408e9a89183f0f 100644 --- a/tensorflow/workspace1.bzl +++ b/tensorflow/workspace1.bzl @@ -1,11 +1,11 @@ """TensorFlow workspace initialization. Consult the WORKSPACE on how to use it.""" -load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") load("@com_github_grpc_grpc//bazel:grpc_deps.bzl", "grpc_deps") load("@com_google_benchmark//:bazel/benchmark_deps.bzl", "benchmark_deps") load("@io_bazel_rules_closure//closure:defs.bzl", "closure_repositories") load("@local_xla//third_party/llvm:setup.bzl", "llvm_setup") load("@rules_pkg//:deps.bzl", "rules_pkg_dependencies") +load("//third_party:repo.bzl", "tf_http_archive", "tf_mirror_urls") load("//third_party/android:android_configure.bzl", "android_configure") # buildifier: disable=unnamed-macro @@ -21,14 +21,13 @@ def workspace(with_rules_cc = True): closure_repositories() - http_archive( + tf_http_archive( name = "bazel_toolchains", sha256 = "294cdd859e57fcaf101d4301978c408c88683fbc46fbc1a3829da92afbea55fb", strip_prefix = "bazel-toolchains-8c717f8258cd5f6c7a45b97d974292755852b658", - urls = [ - "http://mirror.tensorflow.org/github.com/bazelbuild/bazel-toolchains/archive/8c717f8258cd5f6c7a45b97d974292755852b658.tar.gz", + urls = tf_mirror_urls( "https://github.com/bazelbuild/bazel-toolchains/archive/8c717f8258cd5f6c7a45b97d974292755852b658.tar.gz", - ], + ), ) android_configure(name = "local_config_android") diff --git a/tensorflow/workspace2.bzl b/tensorflow/workspace2.bzl index 44725b23d6da6c..067de3cdcac811 100644 --- a/tensorflow/workspace2.bzl +++ b/tensorflow/workspace2.bzl @@ -1,6 +1,5 @@ """TensorFlow workspace initialization. Consult the WORKSPACE on how to use it.""" -load("//third_party:repo.bzl", "tf_vendored") load("@bazel_features//:deps.bzl", "bazel_features_deps") load("@bazel_skylib//lib:versions.bzl", "versions") load("@bazel_tools//tools/build_defs/repo:java.bzl", "java_import_external") @@ -169,18 +168,18 @@ def _tf_repositories(): # LINT.IfChange(xnnpack) tf_http_archive( name = "XNNPACK", - sha256 = "f644ad3ac88b3b0208a82742938bca35235865d6ca64950dac58b166877eb2a5", - strip_prefix = "XNNPACK-1b918df9d1744ae40725254f4baa592ed05c912e", - urls = tf_mirror_urls("https://github.com/google/XNNPACK/archive/1b918df9d1744ae40725254f4baa592ed05c912e.zip"), + sha256 = "08976c0ba6495775f78d738adbcc60a567b5826774f23d3c403486c70ff79772", + strip_prefix = "XNNPACK-183297df5c945236cbc4bb1f625f9f2008bfc564", + urls = tf_mirror_urls("https://github.com/google/XNNPACK/archive/183297df5c945236cbc4bb1f625f9f2008bfc564.zip"), ) # LINT.ThenChange(//tensorflow/lite/tools/cmake/modules/xnnpack.cmake) # XNNPack dependency. tf_http_archive( name = "KleidiAI", - sha256 = "fb4f8180171d035a08432b086194121f627d00a76d58cebaad57d7a87ad40dbd", - strip_prefix = "kleidiai-7a3a609a3278106df7157bdd27b8f0e75ab00b60", - urls = tf_mirror_urls("https://github.com/ARM-software/kleidiai/archive/7a3a609a3278106df7157bdd27b8f0e75ab00b60.zip"), + sha256 = "5e922c9afb7a0c881fc4359b58488f3faa840e8435de1a2207a6525935ed83c2", + strip_prefix = "kleidiai-63205aa90afa6803d8f58bc3081b69288e9f1906", + urls = tf_mirror_urls("https://github.com/ARM-software/kleidiai/archive/63205aa90afa6803d8f58bc3081b69288e9f1906.zip"), ) tf_http_archive( @@ -410,16 +409,8 @@ def _tf_repositories(): }, ) - # Use XLA's googletest wrapper which provides EXPECT_OK and ASSERT_OK macros. - # This wrapper adds those macros to the open-source gmock/gmock.h header, - # matching the behavior of internal builds. - tf_vendored( - name = "com_google_googletest", - path = "third_party/xla/third_party/xla_googletest_wrapper", - ) - tf_http_archive( - name = "com_google_googletest_upstream", + name = "com_google_googletest", # Use the commit on 2025/6/09: # https://github.com/google/googletest/commit/28e9d1f26771c6517c3b4be10254887673c94018 sha256 = "f253ca1a07262f8efde8328e4b2c68979e40ddfcfc001f70d1d5f612c7de2974", @@ -428,6 +419,8 @@ def _tf_repositories(): # - avoid dependencies on @fuchsia_sdk, # - refer to re2 as @com_googlesource_code_re2, # - refer to abseil as @com_google_absl. + # - add status assert macros for consistency with internal gmock (see + # README.add-status-macros.md). # # To update the patch, run: # $ cd ~ @@ -440,7 +433,11 @@ def _tf_repositories(): # $ git diff > /third_party/tensorflow/third_party/googletest/googletest.patch # # The patch path is relative to third_party/tensorflow. - patch_file = ["@local_xla//third_party/googletest:googletest.patch"], + patch_file = [ + "@local_xla//third_party/googletest:googletest.patch", + "@local_xla//third_party/googletest:0001-Add-ASSERT_OK-EXPECT_OK-ASSERT_OK_AND_ASSIGN-macros.patch", + "@local_xla//third_party/googletest:0002-Rename-dependencies-for-workspace.bzl-build.patch", + ], urls = tf_mirror_urls("https://github.com/google/googletest/archive/28e9d1f26771c6517c3b4be10254887673c940189.zip"), ) diff --git a/tensorflow/workspace3.bzl b/tensorflow/workspace3.bzl index adabcc54fc586d..b74e2e012b0e3f 100644 --- a/tensorflow/workspace3.bzl +++ b/tensorflow/workspace3.bzl @@ -1,80 +1,73 @@ """TensorFlow workspace initialization. Consult the WORKSPACE on how to use it.""" -load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") -load("//third_party:repo.bzl", "tf_vendored") +load("//third_party:repo.bzl", "tf_http_archive", "tf_mirror_urls", "tf_vendored") load("//third_party/tf_runtime:workspace.bzl", tf_runtime = "repo") def workspace(): tf_vendored(name = "local_xla", path = "third_party/xla") tf_vendored(name = "local_tsl", path = "third_party/xla/third_party/tsl") - http_archive( + tf_http_archive( name = "io_bazel_rules_closure", sha256 = "5b00383d08dd71f28503736db0500b6fb4dda47489ff5fc6bed42557c07c6ba9", strip_prefix = "rules_closure-308b05b2419edb5c8ee0471b67a40403df940149", - urls = [ - "https://storage.googleapis.com/mirror.tensorflow.org/github.com/bazelbuild/rules_closure/archive/308b05b2419edb5c8ee0471b67a40403df940149.tar.gz", + urls = tf_mirror_urls( "https://github.com/bazelbuild/rules_closure/archive/308b05b2419edb5c8ee0471b67a40403df940149.tar.gz", # 2019-06-13 - ], + ), ) tf_runtime() # https://github.com/bazelbuild/bazel-skylib/releases - http_archive( + tf_http_archive( name = "bazel_skylib", sha256 = "bc283cdfcd526a52c3201279cda4bc298652efa898b10b4db0837dc51652756f", - urls = [ - "https://storage.googleapis.com/mirror.tensorflow.org/github.com/bazelbuild/bazel-skylib/releases/download/1.7.1/bazel-skylib-1.7.1.tar.gz", + urls = tf_mirror_urls( "https://github.com/bazelbuild/bazel-skylib/releases/download/1.7.1/bazel-skylib-1.7.1.tar.gz", - ], + ), ) - http_archive( + tf_http_archive( name = "rules_license", - urls = [ - "https://mirror.bazel.build/github.com/bazelbuild/rules_license/releases/download/0.0.7/rules_license-0.0.7.tar.gz", + urls = tf_mirror_urls( "https://github.com/bazelbuild/rules_license/releases/download/0.0.7/rules_license-0.0.7.tar.gz", - ], + ), sha256 = "4531deccb913639c30e5c7512a054d5d875698daeb75d8cf90f284375fe7c360", ) - http_archive( + tf_http_archive( name = "rules_pkg", - urls = [ - "https://mirror.bazel.build/github.com/bazelbuild/rules_pkg/releases/download/0.7.1/rules_pkg-0.7.1.tar.gz", + urls = tf_mirror_urls( "https://github.com/bazelbuild/rules_pkg/releases/download/0.7.1/rules_pkg-0.7.1.tar.gz", - ], + ), sha256 = "451e08a4d78988c06fa3f9306ec813b836b1d076d0f055595444ba4ff22b867f", ) - http_archive( + tf_http_archive( name = "bazel_features", sha256 = "4fd9922d464686820ffd8fcefa28ccffa147f7cdc6b6ac0d8b07fde565c65d66", strip_prefix = "bazel_features-1.25.0", - urls = [ - "https://mirror.bazel.build/github.com/bazel-contrib/bazel_features/releases/download/v1.25.0/bazel_features-v1.25.0.tar.gz", + urls = tf_mirror_urls( "https://github.com/bazel-contrib/bazel_features/releases/download/v1.25.0/bazel_features-v1.25.0.tar.gz", - ], + ), ) # Maven dependencies. RULES_JVM_EXTERNAL_TAG = "4.3" - http_archive( + tf_http_archive( name = "rules_jvm_external", strip_prefix = "rules_jvm_external-%s" % RULES_JVM_EXTERNAL_TAG, sha256 = "6274687f6fc5783b589f56a2f1ed60de3ce1f99bc4e8f9edef3de43bdf7c6e74", - url = "https://github.com/bazelbuild/rules_jvm_external/archive/%s.zip" % RULES_JVM_EXTERNAL_TAG, + urls = tf_mirror_urls("https://github.com/bazelbuild/rules_jvm_external/archive/%s.zip" % RULES_JVM_EXTERNAL_TAG), ) # Platforms - http_archive( + tf_http_archive( name = "platforms", sha256 = "29742e87275809b5e598dc2f04d86960cc7a55b3067d97221c9abbc9926bff0f", - urls = [ - "https://mirror.bazel.build/github.com/bazelbuild/platforms/releases/download/0.0.11/platforms-0.0.11.tar.gz", + urls = tf_mirror_urls( "https://github.com/bazelbuild/platforms/releases/download/0.0.11/platforms-0.0.11.tar.gz", - ], + ), ) # Alias so it can be loaded without assigning to a different symbol to prevent diff --git a/third_party/py/python_init_pip.bzl b/third_party/py/python_init_pip.bzl index 7689b92b60a00a..39901b9b2e64ea 100644 --- a/third_party/py/python_init_pip.bzl +++ b/third_party/py/python_init_pip.bzl @@ -24,6 +24,10 @@ cc_library( cc_library( name = "numpy_headers", deps = [":numpy_headers_2", ":numpy_headers_1"], + # For the layering check to work we need to re-export the headers from the + # dependencies. + hdrs = glob(["site-packages/numpy/_core/include/**/*.h"]) + + glob(["site-packages/numpy/core/include/**/*.h"]), ) """, ), diff --git a/third_party/py/python_init_rules.bzl b/third_party/py/python_init_rules.bzl index ac9b8eb3893441..e8bfd6548965e4 100644 --- a/third_party/py/python_init_rules.bzl +++ b/third_party/py/python_init_rules.bzl @@ -1,6 +1,5 @@ """Hermetic Python initialization. Consult the WORKSPACE on how to use it.""" -load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") load("//third_party:repo.bzl", "tf_http_archive", "tf_mirror_urls") def python_init_rules(extra_patches = []): @@ -11,15 +10,14 @@ def python_init_rules(extra_patches = []): set of patches. """ - http_archive( + tf_http_archive( name = "rules_cc", - urls = ["https://github.com/bazelbuild/rules_cc/archive/refs/tags/0.1.0.tar.gz"], + urls = tf_mirror_urls("https://github.com/bazelbuild/rules_cc/archive/refs/tags/0.1.0.tar.gz"), strip_prefix = "rules_cc-0.1.0", sha256 = "4b12149a041ddfb8306a8fd0e904e39d673552ce82e4296e96fac9cbf0780e59", - patches = [ - Label("//third_party/py:rules_cc_protobuf.patch"), + patch_file = [ + "@local_xla//third_party/py:rules_cc_protobuf.patch", ], - patch_args = ["-p1"], ) tf_http_archive( @@ -34,15 +32,14 @@ def python_init_rules(extra_patches = []): }, ) - http_archive( + tf_http_archive( name = "rules_python", sha256 = "fa7dd2c6b7d63b3585028dd8a90a6cf9db83c33b250959c2ee7b583a6c130e12", strip_prefix = "rules_python-1.6.0", - url = "https://github.com/bazelbuild/rules_python/releases/download/1.6.0/rules_python-1.6.0.tar.gz", - patch_args = ["-p1"], - patches = [ - Label("//third_party/py:rules_python_pip_version.patch"), - Label("//third_party/py:rules_python_freethreaded.patch"), - Label("//third_party/py:rules_python_versions.patch"), + urls = tf_mirror_urls("https://github.com/bazelbuild/rules_python/releases/download/1.6.0/rules_python-1.6.0.tar.gz"), + patch_file = [ + "@local_xla//third_party/py:rules_python_pip_version.patch", + "@local_xla//third_party/py:rules_python_freethreaded.patch", + "@local_xla//third_party/py:rules_python_versions.patch", ] + extra_patches, ) diff --git a/third_party/py/python_init_toolchains.bzl b/third_party/py/python_init_toolchains.bzl index 860fc08ceda2a8..82d755c32bbfba 100644 --- a/third_party/py/python_init_toolchains.bzl +++ b/third_party/py/python_init_toolchains.bzl @@ -41,7 +41,6 @@ def python_init_toolchains(name = "python", python_version = None, **kwargs): tool_version = MINOR_MAPPING.get(HERMETIC_PYTHON_VERSION) if not tool_version: tool_version = HERMETIC_PYTHON_VERSION + ".0" - url_components = HERMETIC_PYTHON_URL.split("://", 1) sha256s = {} for platform in PLATFORMS.keys(): @@ -51,12 +50,12 @@ def python_init_toolchains(name = "python", python_version = None, **kwargs): python_register_toolchains( name = get_toolchain_name_per_python_version(name), - base_url = url_components[0] + "://", + base_url = "", ignore_root_user_error = True, python_version = tool_version, tool_versions = { tool_version: { - "url": url_components[1], + "url": HERMETIC_PYTHON_URL, "sha256": sha256s, "strip_prefix": HERMETIC_PYTHON_PREFIX, }, diff --git a/third_party/py/rules_python_versions.patch b/third_party/py/rules_python_versions.patch index 8dbc70bad193d7..c31b6772c2675f 100644 --- a/third_party/py/rules_python_versions.patch +++ b/third_party/py/rules_python_versions.patch @@ -1,8 +1,60 @@ diff --git a/python/versions.bzl b/python/versions.bzl -index 30929f82..8e79225a 100644 +index 30929f82..c0856d70 100644 --- a/python/versions.bzl +++ b/python/versions.bzl -@@ -855,6 +855,51 @@ TOOL_VERSIONS = { +@@ -810,6 +810,51 @@ TOOL_VERSIONS = { + "x86_64-unknown-linux-gnu-freethreaded": "python/install", + }, + }, ++ "3.13.11": { ++ "url": "20251209/cpython-{python_version}+20251209-{platform}-{build}.{ext}", ++ "sha256": { ++ "aarch64-apple-darwin": "295a9f7bc899ea1cc08baf60bbf511bdd1e4a29b2dd7e5f59b48f18bfa6bf585", ++ "aarch64-unknown-linux-gnu": "ea1e678e6e82301bb32bf3917732125949b6e46d541504465972024a3f165343", ++ "ppc64le-unknown-linux-gnu": "7660e53aad9d35ee256913c6d98427f81f078699962035c5fa8b5c3138695109", ++ "riscv64-unknown-linux-gnu": "763fa1548e6a432e9402916e690c74ea30f26dcd2e131893dd506f72b87c27c9", ++ "s390x-unknown-linux-gnu": "ffb6af51fbfabfc6fbc4e7379bdec70c2f51e972b1d2f45c053493b9da3a1bbe", ++ "x86_64-apple-darwin": "dac4a0a0a9b71f6b02a8b0886547fa22814474239bffb948e3e77185406ea136", ++ "x86_64-pc-windows-msvc": "87822417007045a28a7eccc47fe67b8c61265b99b10dbbfa24d231a3622b1c27", ++ "aarch64-pc-windows-msvc": "ba646d0c3b7dd7bdfb770d9b2ebd6cd2df02a37fda90c9c79a7cf59c7df6f165", ++ "aarch64-pc-windows-msvc-freethreaded": "6daf6d092c7294cfe68c4c7bf2698ac134235489c874b3bf796c7972b9dbba30", ++ "x86_64-unknown-linux-gnu": "1ffa06d714a44aea14c0c54c30656413e5955a6c92074b4b3cb4351dcc28b63b", ++ "x86_64-unknown-linux-musl": "969fe24017380b987c4e3ce15e9edf82a4618c1e61672b2cc9b021a1c98eae78", ++ "aarch64-apple-darwin-freethreaded": "4213058b7fcd875596c12b58cd46a399358b0a87ecde4b349cbdd00cf87ed79a", ++ "aarch64-unknown-linux-gnu-freethreaded": "290ca3bd0007db9e551f90b08dfcb6c1b2d62c33b2fc3e9a43e77d385d94f569", ++ "ppc64le-unknown-linux-gnu-freethreaded": "09d4b50f8abb443f7e3af858c920aa61c2430b0954df465e861caa7078e55e69", ++ "riscv64-unknown-linux-gnu-freethreaded": "5406f2a7cacafbd2aac3ce2de066a0929aab55423824276c36e04cb83babc36c", ++ "s390x-unknown-linux-gnu-freethreaded": "3984b67c4292892eaccdd1c094c7ec788884c4c9b3534ab6995f6be96d5ed51d", ++ "x86_64-apple-darwin-freethreaded": "d6f489464045d6895ae68b0a04a9e16477e74fe3185a75f3a9a0af8ccd25eade", ++ "x86_64-pc-windows-msvc-freethreaded": "bb9a29a7ba8f179273b79971da6aaa7be592d78c606a63f99eff3e4c12fb0fae", ++ "x86_64-unknown-linux-gnu-freethreaded": "33f89c957d986d525529b8a980103735776f4d20cf52f55960a057c760188ac3", ++ }, ++ "strip_prefix": { ++ "aarch64-apple-darwin": "python", ++ "aarch64-unknown-linux-gnu": "python", ++ "ppc64le-unknown-linux-gnu": "python", ++ "s390x-unknown-linux-gnu": "python", ++ "riscv64-unknown-linux-gnu": "python", ++ "x86_64-apple-darwin": "python", ++ "x86_64-pc-windows-msvc": "python", ++ "aarch64-pc-windows-msvc": "python", ++ "x86_64-unknown-linux-gnu": "python", ++ "x86_64-unknown-linux-musl": "python", ++ "aarch64-apple-darwin-freethreaded": "python/install", ++ "aarch64-unknown-linux-gnu-freethreaded": "python/install", ++ "ppc64le-unknown-linux-gnu-freethreaded": "python/install", ++ "riscv64-unknown-linux-gnu-freethreaded": "python/install", ++ "s390x-unknown-linux-gnu-freethreaded": "python/install", ++ "x86_64-apple-darwin-freethreaded": "python/install", ++ "x86_64-pc-windows-msvc-freethreaded": "python/install", ++ "aarch64-pc-windows-msvc-freethreaded": "python/install", ++ "x86_64-unknown-linux-gnu-freethreaded": "python/install", ++ }, ++ }, + "3.14.0rc1": { + "url": "20250808/cpython-{python_version}+20250808-{platform}-{build}.{ext}", + "sha256": { +@@ -855,6 +900,51 @@ TOOL_VERSIONS = { "x86_64-unknown-linux-gnu-freethreaded": "python/install", }, }, @@ -54,16 +106,18 @@ index 30929f82..8e79225a 100644 } # buildifier: disable=unsorted-dict-items -@@ -865,7 +910,7 @@ MINOR_MAPPING = { +@@ -864,8 +954,8 @@ MINOR_MAPPING = { + "3.10": "3.10.18", "3.11": "3.11.13", "3.12": "3.12.11", - "3.13": "3.13.6", +- "3.13": "3.13.6", - "3.14": "3.14.0rc1", ++ "3.13": "3.13.11", + "3.14": "3.14.0", } def _generate_platforms(): -@@ -1045,29 +1090,25 @@ def get_release_info(platform, python_version, base_url = DEFAULT_RELEASE_BASE_U +@@ -1045,29 +1135,25 @@ def get_release_info(platform, python_version, base_url = DEFAULT_RELEASE_BASE_U for u in url: p, _, _ = platform.partition(FREETHREADED) diff --git a/third_party/systemlibs/grpc.bazel.generate_cc.bzl b/third_party/systemlibs/grpc.bazel.generate_cc.bzl index aa5d18eaa9a488..f396b1f853e71c 100644 --- a/third_party/systemlibs/grpc.bazel.generate_cc.bzl +++ b/third_party/systemlibs/grpc.bazel.generate_cc.bzl @@ -46,7 +46,7 @@ def generate_cc_impl(ctx): includes = [ f for src in ctx.attr.srcs - for f in src[ProtoInfo].transitive_imports.to_list() + for f in src[ProtoInfo].transitive_sources.to_list() ] outs = [] proto_root = get_proto_root( diff --git a/third_party/systemlibs/grpc.bazel.protobuf.bzl b/third_party/systemlibs/grpc.bazel.protobuf.bzl index cfb124ce43b1ef..9eeb4cb4475188 100644 --- a/third_party/systemlibs/grpc.bazel.protobuf.bzl +++ b/third_party/systemlibs/grpc.bazel.protobuf.bzl @@ -163,7 +163,7 @@ def includes_from_deps(deps): return [ file for src in deps - for file in src[ProtoInfo].transitive_imports.to_list() + for file in src[ProtoInfo].transitive_sources.to_list() ] def get_proto_arguments(protos, genfiles_dir_path): diff --git a/third_party/xla/.github/workflows/bazel_dependency_violations.yml b/third_party/xla/.github/workflows/bazel_dependency_violations.yml index 0588447392e993..e3fbfbab9bee81 100644 --- a/third_party/xla/.github/workflows/bazel_dependency_violations.yml +++ b/third_party/xla/.github/workflows/bazel_dependency_violations.yml @@ -39,7 +39,7 @@ jobs: continue-on-error: true steps: - name: "Checking out repository" - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - name: "Install bazelisk" run: parallel --ungroup --retries 3 --delay 15 --nonall -- go install github.com/bazelbuild/bazelisk@24651ab # v1.20.0 - name: "Run bazel build --nobuild //xla/... with retries" diff --git a/third_party/xla/.github/workflows/bazel_query.yml b/third_party/xla/.github/workflows/bazel_query.yml index 052309ef806012..8888c7b0f3267e 100644 --- a/third_party/xla/.github/workflows/bazel_query.yml +++ b/third_party/xla/.github/workflows/bazel_query.yml @@ -34,7 +34,7 @@ jobs: timeout-minutes: 10 steps: - name: "Checking out repository" - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - name: "Install bazelisk" run: parallel --ungroup --retries 3 --delay 15 --nonall -- go install github.com/bazelbuild/bazelisk@24651ab # v1.20.0 - name: "Run bazel build --nobuild //xla/... with retries" diff --git a/third_party/xla/.github/workflows/bazel_tags.yml b/third_party/xla/.github/workflows/bazel_tags.yml index 00ed95c8e6f0e0..09ecd6f00603ef 100644 --- a/third_party/xla/.github/workflows/bazel_tags.yml +++ b/third_party/xla/.github/workflows/bazel_tags.yml @@ -34,7 +34,7 @@ jobs: timeout-minutes: 10 steps: - name: "Checking out repository" - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - name: "Install bazelisk" run: parallel --ungroup --retries 3 --delay 15 --nonall -- go install github.com/bazelbuild/bazelisk@24651ab # v1.20.0 - name: "Run bazel build --nobuild //xla/... with retries" diff --git a/third_party/xla/.github/workflows/benchmark_postsubmit.yml b/third_party/xla/.github/workflows/benchmark_postsubmit.yml index bab85cb699bd02..014f71761c2527 100644 --- a/third_party/xla/.github/workflows/benchmark_postsubmit.yml +++ b/third_party/xla/.github/workflows/benchmark_postsubmit.yml @@ -110,7 +110,7 @@ jobs: PR: ${{ steps.find_pr.outputs.pr }} - name: Checkout OpenXLA - uses: actions/checkout@v6.0.0 + uses: actions/checkout@v6.0.1 - name: Wait For Connection uses: google-ml-infra/actions/ci_connection@7f5ca0c263a81ed09ea276524c1b9192f1304e3c with: @@ -235,7 +235,7 @@ jobs: gsutil cp "$OUTPUT_FILE" "$GCS_BUCKET/$GCS_OBJECT_NAME" - name: Upload XSpace artifacts - uses: actions/upload-artifact@v5.0.0 + uses: actions/upload-artifact@v6.0.0 with: name: xspace-artifacts-${{ matrix.job_info.pool }}-${{ matrix.job_info.platform }} path: ${{ env.XSPACE_FILE }} \ No newline at end of file diff --git a/third_party/xla/.github/workflows/benchmark_presubmit.yml b/third_party/xla/.github/workflows/benchmark_presubmit.yml index 33f65f9eead53d..33dc31bd6a64d6 100644 --- a/third_party/xla/.github/workflows/benchmark_presubmit.yml +++ b/third_party/xla/.github/workflows/benchmark_presubmit.yml @@ -86,7 +86,7 @@ jobs: fi - name: Checkout OpenXLA - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - name: Configure GPU backend if: ${{ matrix.job_info.platform == 'GPU' }} diff --git a/third_party/xla/.github/workflows/buildifier.yml b/third_party/xla/.github/workflows/buildifier.yml index d61728b29b4716..079a608acc26e0 100644 --- a/third_party/xla/.github/workflows/buildifier.yml +++ b/third_party/xla/.github/workflows/buildifier.yml @@ -34,7 +34,7 @@ jobs: timeout-minutes: 6 steps: - name: "Checking out repository" - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - name: "Install buildifier" run: parallel --ungroup --retries 3 --delay 15 --nonall -- go install github.com/bazelbuild/buildtools/buildifier@433ea85 # 6.4.0 - name: "Run buildifier" diff --git a/third_party/xla/.github/workflows/check_contents.yml b/third_party/xla/.github/workflows/check_contents.yml index 820a99675525ca..afc6f9c7780e14 100644 --- a/third_party/xla/.github/workflows/check_contents.yml +++ b/third_party/xla/.github/workflows/check_contents.yml @@ -46,7 +46,7 @@ jobs: contains(github.event.pull_request.body, 'FORCE_TEST_ACTIONS') steps: - name: "Checking out repository" - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - name: "Fetch HEAD of main branch" run: git fetch origin main --depth=1 diff --git a/third_party/xla/.github/workflows/ci.yml b/third_party/xla/.github/workflows/ci.yml index daafda6979df76..db5629dac494b0 100644 --- a/third_party/xla/.github/workflows/ci.yml +++ b/third_party/xla/.github/workflows/ci.yml @@ -115,15 +115,14 @@ jobs: defaults: run: shell: bash - timeout-minutes: 60 steps: - name: "Checking out openxla/xla" - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: path: "openxla/xla" - name: Checking out ${{ matrix.job_info.repo }} if: ${{ matrix.job_info.repo != 'openxla/xla' }} - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: repository: ${{ matrix.job_info.repo }} path: ${{ matrix.job_info.repo }} @@ -133,6 +132,7 @@ jobs: halt-dispatch-input: ${{ inputs.halt-for-connection }} - name: "Run build.py" working-directory: ${{ matrix.job_info.repo }} + timeout-minutes: 60 run: | if [[ "${{ matrix.job_info.pool }}" == *windows* ]]; then python $GITHUB_WORKSPACE\\openxla\\xla\\build_tools\\ci\\build.py --build="${{ matrix.job_info.name }}_github_actions" diff --git a/third_party/xla/.github/workflows/ci_multi_device.yml b/third_party/xla/.github/workflows/ci_multi_device.yml new file mode 100644 index 00000000000000..4171626436f600 --- /dev/null +++ b/third_party/xla/.github/workflows/ci_multi_device.yml @@ -0,0 +1,64 @@ +# Copyright 2025 The OpenXLA Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +name: Multi-Device CI +permissions: + contents: read +on: + workflow_dispatch: # Allows manual triggering + +jobs: + Tests: + strategy: + # Don't fail fast - want to see results for all builds even if one fails. + fail-fast: false + matrix: + job_info: [ + { + pool: "linux-x86-a3-8g-h100-8gpu", + container: "us-docker.pkg.dev/ml-oss-artifacts-published/ml-public-container/ml-build:latest", + name: "XLA Linux x86 GPU 8xH100", + repo: "openxla/xla", + }, + ] + name: ${{ matrix.job_info.name }} + runs-on: ${{ matrix.job_info.pool }} + container: ${{ matrix.job_info.container }} + defaults: + run: + shell: bash + timeout-minutes: 60 + steps: + - name: "Checking out openxla/xla" + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + with: + path: "openxla/xla" + - name: Checking out ${{ matrix.job_info.repo }} + if: ${{ matrix.job_info.repo != 'openxla/xla' }} + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + with: + repository: ${{ matrix.job_info.repo }} + path: ${{ matrix.job_info.repo }} + - name: "Wait For Connection" + uses: google-ml-infra/actions/ci_connection@7f5ca0c263a81ed09ea276524c1b9192f1304e3c + with: + halt-dispatch-input: ${{ inputs.halt-for-connection }} + - name: "Run build.py" + working-directory: ${{ matrix.job_info.repo }} + run: | + if [[ "${{ matrix.job_info.pool }}" == *windows* ]]; then + python $GITHUB_WORKSPACE\\openxla\\xla\\build_tools\\ci\\build.py --build="${{ matrix.job_info.name }}_github_actions" + else + $GITHUB_WORKSPACE/openxla/xla/build_tools/ci/build.py --build="${{ matrix.job_info.name }}_github_actions" + fi diff --git a/third_party/xla/.github/workflows/clang_format.yml b/third_party/xla/.github/workflows/clang_format.yml index 198d0dd5df3a83..f0de7043ebb15b 100644 --- a/third_party/xla/.github/workflows/clang_format.yml +++ b/third_party/xla/.github/workflows/clang_format.yml @@ -34,7 +34,7 @@ jobs: contains(github.event.pull_request.body, 'FORCE_TEST_ACTIONS') steps: - name: "Checking out repository" - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: fetch-depth: '0' - name: "Fetch HEAD of main branch" diff --git a/third_party/xla/.github/workflows/cpu_benchmarks_nightly.yml b/third_party/xla/.github/workflows/cpu_benchmarks_nightly.yml index 39225d1aeb05d9..060249354d15d9 100644 --- a/third_party/xla/.github/workflows/cpu_benchmarks_nightly.yml +++ b/third_party/xla/.github/workflows/cpu_benchmarks_nightly.yml @@ -75,7 +75,7 @@ jobs: fi - name: Checkout OpenXLA - uses: actions/checkout@v6.0.0 + uses: actions/checkout@v6.0.1 - name: Create results directory run: @@ -207,7 +207,7 @@ jobs: gsutil cp "$OUTPUT_DIR/$FILENAME_GEMMA3" "$GCS_BUCKET/$GEMMA3_GCS_OBJECT_NAME" - name: Upload XSpace artifacts - uses: actions/upload-artifact@v5.0.0 + uses: actions/upload-artifact@v6.0.0 with: name: cpu-xla-benchmarks-xspace-${{ matrix.job_info.pool }} path: ${{ github.workspace }}/${{ matrix.job_info.output_dir }}/*_xspace.pb diff --git a/third_party/xla/.github/workflows/generate_benchmark_matrix.yml b/third_party/xla/.github/workflows/generate_benchmark_matrix.yml index 51f97449ee6b41..e96e3d44ecaab1 100644 --- a/third_party/xla/.github/workflows/generate_benchmark_matrix.yml +++ b/third_party/xla/.github/workflows/generate_benchmark_matrix.yml @@ -54,7 +54,7 @@ jobs: steps: - name: Checkout OpenXLA - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: # Use inputs.checkout_ref if provided, otherwise default to the event's ref # (e.g., PR's HEAD SHA or caller's commit SHA) diff --git a/third_party/xla/.github/workflows/gpu_benchmarks_nightly.yml b/third_party/xla/.github/workflows/gpu_benchmarks_nightly.yml index 748226655d8b9d..8ab4fb1d5dba14 100644 --- a/third_party/xla/.github/workflows/gpu_benchmarks_nightly.yml +++ b/third_party/xla/.github/workflows/gpu_benchmarks_nightly.yml @@ -56,7 +56,7 @@ jobs: OUTPUT_DIR: ${{ github.workspace }}/output steps: - name: Checkout XLA - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - name: Download Gemma Hlo Files run: | @@ -198,7 +198,7 @@ jobs: upload_to_gcs "$GEMMA3_SAMPLE_LOOP_BASE_NAME" - name: Upload XSpace artifacts - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: gpu-xla-benchmarks-xspace-${{ matrix.job_info.os }} path: ${{ github.workspace }}/output/*_xspace.pb diff --git a/third_party/xla/.github/workflows/nightly_benchmarks.yml b/third_party/xla/.github/workflows/nightly_benchmarks.yml index 23a82d9350624d..e65fd69daf6944 100644 --- a/third_party/xla/.github/workflows/nightly_benchmarks.yml +++ b/third_party/xla/.github/workflows/nightly_benchmarks.yml @@ -110,7 +110,7 @@ jobs: exit 1 fi - name: Checkout OpenXLA Repository - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: ref: ${{ env.CHECKOUT_REF }} - name: Build Binaries @@ -182,7 +182,7 @@ jobs: gsutil cp "$RESULTS_JSON_FILE_PATH" "$GCS_BUCKET/$GCS_OBJECT_NAME" - name: Upload Benchmark Artifacts if: always() - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: results-${{ env.CONFIG_ID }} path: ${{ env.RESOLVED_OUTPUT_DIR }} diff --git a/third_party/xla/.github/workflows/postsubmit_benchmark.yml b/third_party/xla/.github/workflows/postsubmit_benchmark.yml index 346c17bfaec5c4..a4f249366b6ea9 100644 --- a/third_party/xla/.github/workflows/postsubmit_benchmark.yml +++ b/third_party/xla/.github/workflows/postsubmit_benchmark.yml @@ -145,7 +145,7 @@ jobs: fi - name: Checkout OpenXLA Repository - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: ref: ${{ env.CHECKOUT_REF }} @@ -224,7 +224,7 @@ jobs: - name: Upload Benchmark Artifacts if: always() - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: results-${{ env.CONFIG_ID }} path: ${{ env.RESOLVED_OUTPUT_DIR }} diff --git a/third_party/xla/.github/workflows/presubmit_benchmark.yml b/third_party/xla/.github/workflows/presubmit_benchmark.yml index e3efa8d429bf5b..4259667c73dad0 100644 --- a/third_party/xla/.github/workflows/presubmit_benchmark.yml +++ b/third_party/xla/.github/workflows/presubmit_benchmark.yml @@ -139,7 +139,7 @@ jobs: fi - name: Checkout OpenXLA Repository - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: ref: ${{ env.CHECKOUT_REF }} @@ -199,7 +199,7 @@ jobs: - name: Upload Benchmark Artifacts if: always() - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: results-${{ env.CONFIG_ID }} path: ${{ env.RESOLVED_OUTPUT_DIR }} diff --git a/third_party/xla/.github/workflows/rollback_notification.yml b/third_party/xla/.github/workflows/rollback_notification.yml index 8978fe8d9984e5..7a3c21fdd21b9f 100644 --- a/third_party/xla/.github/workflows/rollback_notification.yml +++ b/third_party/xla/.github/workflows/rollback_notification.yml @@ -33,7 +33,7 @@ jobs: timeout-minutes: 6 steps: - name: "Checking out repository" - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - name: "Check if PR was rolled back" run: python3 .github/workflows/rollback_notification.py diff --git a/third_party/xla/.github/workflows/scorecards-analysis.yml b/third_party/xla/.github/workflows/scorecards-analysis.yml index f781a8bcb93b8a..0ec69c216d7aaf 100644 --- a/third_party/xla/.github/workflows/scorecards-analysis.yml +++ b/third_party/xla/.github/workflows/scorecards-analysis.yml @@ -44,7 +44,7 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: persist-credentials: false @@ -58,7 +58,7 @@ jobs: # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v3.pre.node20 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v3.pre.node20 with: name: SARIF file path: results.sarif @@ -67,6 +67,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard (optional). # Commenting out will disable upload of results to your repo's Code Scanning dashboard - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@fe4161a26a8629af62121b670040955b330f9af2 # v4.31.6 + uses: github/codeql-action/upload-sarif@1b168cd39490f61582a9beae412bb7057a6b2c4e # v4.31.8 with: sarif_file: results.sarif diff --git a/third_party/xla/.gitignore b/third_party/xla/.gitignore index 619ec239a7809c..734c302636dbb4 100644 --- a/third_party/xla/.gitignore +++ b/third_party/xla/.gitignore @@ -28,3 +28,8 @@ tools/python_bin_path.sh *.VC.opendb *.suo *.user + +# Ignore clangd files and directories: https://openxla.org/xla/lsp +.cache +compile_commands.json +external diff --git a/third_party/xla/MODULE.bazel b/third_party/xla/MODULE.bazel index d54ae9978b494d..114f1dd36f5315 100644 --- a/third_party/xla/MODULE.bazel +++ b/third_party/xla/MODULE.bazel @@ -3,6 +3,7 @@ module(name = "xla") ############################################################## # Bazel module dependencies +# go/keep-sorted start bazel_dep(name = "abseil-cpp", version = "20250814.0", repo_name = "com_google_absl") bazel_dep(name = "abseil-py", version = "2.1.0", repo_name = "absl_py") bazel_dep(name = "bazel_features", version = "1.36.0") @@ -10,8 +11,7 @@ bazel_dep(name = "bazel_skylib", version = "1.8.1") bazel_dep(name = "boringssl", version = "0.20250818.0") bazel_dep(name = "curl", version = "8.11.0") bazel_dep(name = "google_benchmark", version = "1.8.5", repo_name = "com_google_benchmark") -bazel_dep(name = "googletest", version = "1.17.0", repo_name = "com_google_googletest_upstream") -bazel_dep(name = "xla_googletest_wrapper", version = "1.0", repo_name = "com_google_googletest") +bazel_dep(name = "googletest", version = "1.17.0", repo_name = "com_google_googletest") bazel_dep(name = "grpc", version = "1.74.1", repo_name = "com_github_grpc_grpc") bazel_dep(name = "gutil", version = "20250502.0", repo_name = "com_google_gutil") bazel_dep(name = "jsoncpp", version = "1.9.6", repo_name = "jsoncpp_git") @@ -22,12 +22,15 @@ bazel_dep(name = "pybind11_abseil", version = "202402.0") bazel_dep(name = "pybind11_bazel", version = "2.13.6") bazel_dep(name = "pybind11_protobuf", version = "0.0.0-20250210-f02a2b7") bazel_dep(name = "re2", version = "2024-07-02.bcr.1", repo_name = "com_googlesource_code_re2") +bazel_dep(name = "riegeli", version = "0.0.0-20250822-9f2744d", repo_name = "com_google_riegeli") bazel_dep(name = "rules_cc", version = "0.2.0") +bazel_dep(name = "rules_java", version = "8.16.1") bazel_dep(name = "rules_license", version = "1.0.0") bazel_dep(name = "rules_python", version = "1.6.0") bazel_dep(name = "rules_shell", version = "0.6.1") bazel_dep(name = "snappy", version = "1.2.1") bazel_dep(name = "zlib", version = "1.3.1.bcr.5") +# go/keep-sorted end # Only for compatibility, not directly used, change repo_name to None after upgrading Bazel to latest 7.x bazel_dep(name = "eigen", version = "4.0.0-20241125.bcr.3", repo_name = "DO_NOT_USE_eigen") @@ -42,9 +45,9 @@ bazel_dep(name = "rules_ml_toolchain") # echo "sha256-${HASH}" archive_override( module_name = "rules_ml_toolchain", - integrity = "sha256-seXjBtixED5zubd438Op4GnSBmRDegMkaiNXJJYrXJQ=", - strip_prefix = "rules_ml_toolchain-484235be45e6843db962c45d08fe4b2b65a6a24c", - urls = ["https://github.com/google-ml-infra/rules_ml_toolchain/archive/484235be45e6843db962c45d08fe4b2b65a6a24c.tar.gz"], + integrity = "sha256-HCxTCgVOnos8gR7CHtimh/yGW+w6u8j/Zb64KbHWeuQ=", + strip_prefix = "rules_ml_toolchain-6734d2a174bf29e731d3f473743d1cc1a86100c3", + urls = ["https://github.com/google-ml-infra/rules_ml_toolchain/archive/6734d2a174bf29e731d3f473743d1cc1a86100c3.tar.gz"], ) # TODO: Upstream the patch? @@ -71,15 +74,14 @@ single_version_override( # Use an unreleased version of googletest archive_override( module_name = "googletest", + patch_strip = 1, + patches = [ + "//third_party/googletest:0001-Add-ASSERT_OK-EXPECT_OK-ASSERT_OK_AND_ASSIGN-macros.patch", + ], strip_prefix = "googletest-28e9d1f26771c6517c3b4be10254887673c94018", urls = ["https://github.com/google/googletest/archive/28e9d1f26771c6517c3b4be10254887673c94018.zip"], ) -local_path_override( - module_name = "xla_googletest_wrapper", - path = "third_party/xla_googletest_wrapper", -) - ############################################################## # C++ dependencies diff --git a/third_party/xla/WORKSPACE b/third_party/xla/WORKSPACE index 69ebc22643da3b..29e65b3afcc430 100644 --- a/third_party/xla/WORKSPACE +++ b/third_party/xla/WORKSPACE @@ -1,19 +1,19 @@ # buildifier: disable=load-on-top workspace(name = "xla") -load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +load("//third_party:repo.bzl", "tf_http_archive", "tf_mirror_urls") # Initialize toolchains for ML projects. # # A hermetic build system is designed to produce completely reproducible builds for C++. # Details: https://github.com/google-ml-infra/rules_ml_toolchain -http_archive( +tf_http_archive( name = "rules_ml_toolchain", - sha256 = "7f00b3e94bbca1a4737ded6b9ed5358f6d1c86430c2ec97c90081343c0482f18", - strip_prefix = "rules_ml_toolchain-29d54c875da37e74b8548924ed30e78cb28126b9", - urls = [ - "https://github.com/google-ml-infra/rules_ml_toolchain/archive/29d54c875da37e74b8548924ed30e78cb28126b9.tar.gz", - ], + sha256 = "1c2c530a054e9e8b3c811ec21ed8a687fc865bec3abbc8ff65beb829b1d67ae4", + strip_prefix = "rules_ml_toolchain-6734d2a174bf29e731d3f473743d1cc1a86100c3", + urls = tf_mirror_urls( + "https://github.com/google-ml-infra/rules_ml_toolchain/archive/6734d2a174bf29e731d3f473743d1cc1a86100c3.tar.gz", + ), ) load( diff --git a/third_party/xla/build_tools/ci/BUILD b/third_party/xla/build_tools/ci/BUILD index 3d37ca202dd82b..79a571ba22ace1 100644 --- a/third_party/xla/build_tools/ci/BUILD +++ b/third_party/xla/build_tools/ci/BUILD @@ -14,6 +14,7 @@ # ============================================================================ load("@bazel_skylib//rules:diff_test.bzl", "diff_test") +load("@rules_shell//shell:sh_binary.bzl", "sh_binary") load("//xla:pytype.bzl", "pytype_strict_binary") package( diff --git a/third_party/xla/build_tools/ci/build.py b/third_party/xla/build_tools/ci/build.py index 20d77da3bee540..8a6efe40f3a71f 100755 --- a/third_party/xla/build_tools/ci/build.py +++ b/third_party/xla/build_tools/ci/build.py @@ -111,6 +111,7 @@ class BuildType(enum.Enum): XLA_LINUX_X86_CPU_BZLMOD_GITHUB_ACTIONS = enum.auto() XLA_LINUX_ARM64_CPU_GITHUB_ACTIONS = enum.auto() XLA_LINUX_X86_GPU_L4_GITHUB_ACTIONS = enum.auto() + XLA_LINUX_X86_GPU_8X_H100_GITHUB_ACTIONS = enum.auto() XLA_LINUX_X86_GPU_ONEAPI_GITHUB_ACTIONS = enum.auto() # Presubmit builds for regression testing. @@ -273,33 +274,37 @@ def _tag_filters_for_compute_capability( return tag_filters +nvidia_gpu_filters = ( + "-no_oss", + "requires-gpu-nvidia", + "gpu", + "-rocm-only", + "-oneapi-only", +) + +nvidia_single_gpu_filters = nvidia_gpu_filters + ("-multi_gpu",) + +nvidia_only_multi_gpu_filters = nvidia_gpu_filters + ("multi_gpu",) + + def nvidia_gpu_build_with_compute_capability( *, type_: BuildType, configs: Tuple[str, ...], compute_capability: int, + multi_gpu: bool = False, ) -> Build: extra_gpu_tags = _tag_filters_for_compute_capability(compute_capability) + filter_tags = ( + nvidia_only_multi_gpu_filters if multi_gpu else nvidia_single_gpu_filters + ) return Build( type_=type_, repo="openxla/xla", target_patterns=_XLA_DEFAULT_TARGET_PATTERNS, configs=configs, - test_tag_filters=( - "-no_oss", - "requires-gpu-nvidia", - "gpu", - "-rocm-only", - "-oneapi-only", - ) - + extra_gpu_tags, - build_tag_filters=( - "-no_oss", - "requires-gpu-nvidia", - "gpu", - "-rocm-only", - "-oneapi-only", - ), + test_tag_filters=filter_tags + extra_gpu_tags, + build_tag_filters=filter_tags, options={ "run_under": "//build_tools/ci:parallel_gpu_execute", "//xla/tsl:ci_build": True, @@ -436,6 +441,14 @@ def nvidia_gpu_build_with_compute_capability( type_=BuildType.XLA_LINUX_X86_GPU_L4_GITHUB_ACTIONS, configs=("warnings", "rbe_linux_cuda_nvcc", "hermetic_cuda_umd"), compute_capability=75, + multi_gpu=False, +) + +nvidia_gpu_build_with_compute_capability( + type_=BuildType.XLA_LINUX_X86_GPU_8X_H100_GITHUB_ACTIONS, + configs=("warnings", "rbe_linux_cuda_nvcc", "hermetic_cuda_umd"), + compute_capability=90, + multi_gpu=True, ) oneapi_build_tag_filter = ( @@ -510,21 +523,9 @@ def nvidia_gpu_build_with_compute_capability( repo="openxla/xla", target_patterns=_XLA_GPU_PRESUBMIT_BENCHMARKS_DEFAULT_TARGET_PATTERNS, configs=("warnings", "rbe_linux_cuda_nvcc", "hermetic_cuda_umd"), - test_tag_filters=( - "-no_oss", - "requires-gpu-nvidia", - "gpu", - "-rocm-only", - "-oneapi-only", - ) + test_tag_filters=nvidia_single_gpu_filters + _tag_filters_for_compute_capability(compute_capability=75), - build_tag_filters=( - "-no_oss", - "requires-gpu-nvidia", - "gpu", - "-rocm-only", - "-oneapi-only", - ), + build_tag_filters=nvidia_single_gpu_filters, options={ "run_under": "//build_tools/ci:parallel_gpu_execute", "//xla/tsl:ci_build": True, @@ -542,21 +543,9 @@ def nvidia_gpu_build_with_compute_capability( repo="openxla/xla", target_patterns=_XLA_GPU_PRESUBMIT_BENCHMARKS_DEFAULT_TARGET_PATTERNS, configs=("warnings", "rbe_linux_cuda_nvcc", "hermetic_cuda_umd"), - test_tag_filters=( - "-no_oss", - "requires-gpu-nvidia", - "gpu", - "-rocm-only", - "-oneapi-only", - ) + test_tag_filters=nvidia_single_gpu_filters + _tag_filters_for_compute_capability(compute_capability=75), - build_tag_filters=( - "-no_oss", - "requires-gpu-nvidia", - "gpu", - "-rocm-only", - "-oneapi-only", - ), + build_tag_filters=nvidia_single_gpu_filters, options={ "run_under": "//build_tools/ci:parallel_gpu_execute", "//xla/tsl:ci_build": True, @@ -575,21 +564,9 @@ def nvidia_gpu_build_with_compute_capability( repo="openxla/xla", configs=("warnings", "rbe_linux_cuda_nvcc", "hermetic_cuda_umd"), target_patterns=_XLA_GPU_PRESUBMIT_BENCHMARKS_DEFAULT_TARGET_PATTERNS, - test_tag_filters=( - "-no_oss", - "requires-gpu-nvidia", - "gpu", - "-rocm-only", - "-oneapi-only", - ) + test_tag_filters=nvidia_single_gpu_filters + _tag_filters_for_compute_capability(compute_capability=75), - build_tag_filters=( - "-no_oss", - "requires-gpu-nvidia", - "gpu", - "-rocm-only", - "-oneapi-only", - ), + build_tag_filters=nvidia_single_gpu_filters, options={ "run_under": "//build_tools/ci:parallel_gpu_execute", "//xla/tsl:ci_build": True, @@ -607,21 +584,9 @@ def nvidia_gpu_build_with_compute_capability( repo="openxla/xla", configs=("warnings", "rbe_linux_cuda_nvcc", "hermetic_cuda_umd"), target_patterns=_XLA_GPU_PRESUBMIT_BENCHMARKS_DEFAULT_TARGET_PATTERNS, - test_tag_filters=( - "-no_oss", - "requires-gpu-nvidia", - "gpu", - "-rocm-only", - "-oneapi-only", - ) + test_tag_filters=nvidia_single_gpu_filters + _tag_filters_for_compute_capability(compute_capability=75), - build_tag_filters=( - "-no_oss", - "requires-gpu-nvidia", - "gpu", - "-rocm-only", - "-oneapi-only", - ), + build_tag_filters=nvidia_single_gpu_filters, options={ "run_under": "//build_tools/ci:parallel_gpu_execute", "//xla/tsl:ci_build": True, @@ -640,21 +605,9 @@ def nvidia_gpu_build_with_compute_capability( repo="openxla/xla", configs=(), target_patterns=_XLA_GPU_PRESUBMIT_BENCHMARKS_DEFAULT_TARGET_PATTERNS, - test_tag_filters=( - "-no_oss", - "requires-gpu-nvidia", - "gpu", - "-rocm-only", - "-oneapi-only", - ) + test_tag_filters=nvidia_single_gpu_filters + _tag_filters_for_compute_capability(compute_capability=100), - build_tag_filters=( - "-no_oss", - "requires-gpu-nvidia", - "gpu", - "-rocm-only", - "-oneapi-only", - ), + build_tag_filters=nvidia_single_gpu_filters, options={ "run_under": "//build_tools/ci:parallel_gpu_execute", # Use User Mode and Kernel Mode Drivers pre-installed on the system. @@ -675,21 +628,9 @@ def nvidia_gpu_build_with_compute_capability( repo="openxla/xla", configs=(), target_patterns=_XLA_GPU_PRESUBMIT_BENCHMARKS_DEFAULT_TARGET_PATTERNS, - test_tag_filters=( - "-no_oss", - "requires-gpu-nvidia", - "gpu", - "-rocm-only", - "-oneapi-only", - ) + test_tag_filters=nvidia_single_gpu_filters + _tag_filters_for_compute_capability(compute_capability=100), - build_tag_filters=( - "-no_oss", - "requires-gpu-nvidia", - "gpu", - "-rocm-only", - "-oneapi-only", - ), + build_tag_filters=nvidia_single_gpu_filters, options={ "run_under": "//build_tools/ci:parallel_gpu_execute", # Use User Mode and Kernel Mode Drivers pre-installed on the system. @@ -731,7 +672,6 @@ def nvidia_gpu_build_with_compute_capability( **_DEFAULT_BAZEL_OPTIONS, "macos_minimum_os": "10.15", "test_tmpdir": "/Volumes/BuildData/bazel_output", - "define": "xnn_enable_avxvnniint8=false", "//xla/tsl:ci_build": True, }, build_tag_filters=macos_tag_filter, @@ -767,7 +707,6 @@ def nvidia_gpu_build_with_compute_capability( "macos_minimum_os": "10.15", "test_tmpdir": "/tmpfs/bazel_output", "test_size_filters": "small,medium", - "define": "xnn_enable_avxvnniint8=false", "//xla/tsl:ci_build": True, }, build_tag_filters=macos_tag_filter, @@ -932,11 +871,7 @@ def nvidia_gpu_build_with_compute_capability( Build( type_=BuildType.TENSORFLOW_LINUX_X86_GPU_L4_GITHUB_ACTIONS, repo="tensorflow/tensorflow", - configs=( - "release_gpu_linux", - "rbe_linux_cuda", - "hermetic_cuda_umd" - ), + configs=("release_gpu_linux", "rbe_linux_cuda", "hermetic_cuda_umd"), target_patterns=( "//tensorflow/compiler/...", "-//tensorflow/compiler/tf2tensorrt/...", diff --git a/third_party/xla/build_tools/ci/golden_commands.txt b/third_party/xla/build_tools/ci/golden_commands.txt index f5e914157ec888..feccb0fc7cdd78 100644 --- a/third_party/xla/build_tools/ci/golden_commands.txt +++ b/third_party/xla/build_tools/ci/golden_commands.txt @@ -53,46 +53,52 @@ parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_fi bazel test --build_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd,-requires-gpu-intel --test_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=nonccl --config=rbe_linux_cpu --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --//xla/tsl:ci_build -- //xla/... //build_tools/... @local_tsl//tsl/... bazel analyze-profile profile.json.gz # END BuildType.XLA_LINUX_X86_CPU_GITHUB_ACTIONS +# BEGIN BuildType.XLA_LINUX_X86_GPU_8X_H100_GITHUB_ACTIONS +nvidia-smi +parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,multi_gpu --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,multi_gpu,requires-gpu-sm90-only,requires-gpu-sm60,requires-gpu-sm70,requires-gpu-sm80,requires-gpu-sm90,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --config=hermetic_cuda_umd --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=9.0 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/... //build_tools/... @local_tsl//tsl/... +bazel test --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,multi_gpu --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,multi_gpu,requires-gpu-sm90-only,requires-gpu-sm60,requires-gpu-sm70,requires-gpu-sm80,requires-gpu-sm90,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --config=hermetic_cuda_umd --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=9.0 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/... //build_tools/... @local_tsl//tsl/... +bazel analyze-profile profile.json.gz +# END BuildType.XLA_LINUX_X86_GPU_8X_H100_GITHUB_ACTIONS # BEGIN BuildType.XLA_LINUX_X86_GPU_A4_224_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS nvidia-smi -parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm100-only,requires-gpu-sm60,requires-gpu-sm70,requires-gpu-sm80,requires-gpu-sm90,requires-gpu-sm100,-requires-gpu-amd,-requires-gpu-intel --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=10 --repo_env=HERMETIC_CUDA_VERSION=12.8.0 --repo_env=HERMETIC_CUDNN_VERSION=9.8.0 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --@local_config_cuda//cuda:include_cuda_libs=False --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu -bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm100-only,requires-gpu-sm60,requires-gpu-sm70,requires-gpu-sm80,requires-gpu-sm90,requires-gpu-sm100,-requires-gpu-amd,-requires-gpu-intel --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=10 --repo_env=HERMETIC_CUDA_VERSION=12.8.0 --repo_env=HERMETIC_CUDNN_VERSION=9.8.0 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --@local_config_cuda//cuda:include_cuda_libs=False --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu +parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu,requires-gpu-sm100-only,requires-gpu-sm60,requires-gpu-sm70,requires-gpu-sm80,requires-gpu-sm90,requires-gpu-sm100,-requires-gpu-amd,-requires-gpu-intel --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=10 --repo_env=HERMETIC_CUDA_VERSION=12.8.0 --repo_env=HERMETIC_CUDNN_VERSION=9.8.0 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --@local_config_cuda//cuda:include_cuda_libs=False --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu +bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu,requires-gpu-sm100-only,requires-gpu-sm60,requires-gpu-sm70,requires-gpu-sm80,requires-gpu-sm90,requires-gpu-sm100,-requires-gpu-amd,-requires-gpu-intel --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=10 --repo_env=HERMETIC_CUDA_VERSION=12.8.0 --repo_env=HERMETIC_CUDNN_VERSION=9.8.0 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --@local_config_cuda//cuda:include_cuda_libs=False --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu bazel analyze-profile profile.json.gz # END BuildType.XLA_LINUX_X86_GPU_A4_224_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS # BEGIN BuildType.XLA_LINUX_X86_GPU_A4_224_VCPU_PRESUBMIT_GITHUB_ACTIONS nvidia-smi -parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm100-only,requires-gpu-sm60,requires-gpu-sm70,requires-gpu-sm80,requires-gpu-sm90,requires-gpu-sm100,-requires-gpu-amd,-requires-gpu-intel --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=10 --repo_env=HERMETIC_CUDA_VERSION=12.8.0 --repo_env=HERMETIC_CUDNN_VERSION=9.8.0 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu -bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm100-only,requires-gpu-sm60,requires-gpu-sm70,requires-gpu-sm80,requires-gpu-sm90,requires-gpu-sm100,-requires-gpu-amd,-requires-gpu-intel --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=10 --repo_env=HERMETIC_CUDA_VERSION=12.8.0 --repo_env=HERMETIC_CUDNN_VERSION=9.8.0 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu +parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu,requires-gpu-sm100-only,requires-gpu-sm60,requires-gpu-sm70,requires-gpu-sm80,requires-gpu-sm90,requires-gpu-sm100,-requires-gpu-amd,-requires-gpu-intel --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=10 --repo_env=HERMETIC_CUDA_VERSION=12.8.0 --repo_env=HERMETIC_CUDNN_VERSION=9.8.0 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu +bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu,requires-gpu-sm100-only,requires-gpu-sm60,requires-gpu-sm70,requires-gpu-sm80,requires-gpu-sm90,requires-gpu-sm100,-requires-gpu-amd,-requires-gpu-intel --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=10 --repo_env=HERMETIC_CUDA_VERSION=12.8.0 --repo_env=HERMETIC_CUDNN_VERSION=9.8.0 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu bazel analyze-profile profile.json.gz # END BuildType.XLA_LINUX_X86_GPU_A4_224_VCPU_PRESUBMIT_GITHUB_ACTIONS # BEGIN BuildType.XLA_LINUX_X86_GPU_L4_16_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS nvidia-smi -parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --config=hermetic_cuda_umd --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --@local_config_cuda//cuda:include_cuda_libs=False --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu -bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --config=hermetic_cuda_umd --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --@local_config_cuda//cuda:include_cuda_libs=False --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu +parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --config=hermetic_cuda_umd --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --@local_config_cuda//cuda:include_cuda_libs=False --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu +bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --config=hermetic_cuda_umd --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --@local_config_cuda//cuda:include_cuda_libs=False --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu bazel analyze-profile profile.json.gz # END BuildType.XLA_LINUX_X86_GPU_L4_16_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS # BEGIN BuildType.XLA_LINUX_X86_GPU_L4_16_VCPU_PRESUBMIT_GITHUB_ACTIONS nvidia-smi -parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --config=hermetic_cuda_umd --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu -bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --config=hermetic_cuda_umd --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu +parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --config=hermetic_cuda_umd --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu +bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --config=hermetic_cuda_umd --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu bazel analyze-profile profile.json.gz # END BuildType.XLA_LINUX_X86_GPU_L4_16_VCPU_PRESUBMIT_GITHUB_ACTIONS # BEGIN BuildType.XLA_LINUX_X86_GPU_L4_48_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS nvidia-smi -parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --config=hermetic_cuda_umd --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --@local_config_cuda//cuda:include_cuda_libs=False --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu -bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --config=hermetic_cuda_umd --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --@local_config_cuda//cuda:include_cuda_libs=False --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu +parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --config=hermetic_cuda_umd --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --@local_config_cuda//cuda:include_cuda_libs=False --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu +bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --config=hermetic_cuda_umd --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --@local_config_cuda//cuda:include_cuda_libs=False --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu bazel analyze-profile profile.json.gz # END BuildType.XLA_LINUX_X86_GPU_L4_48_VCPU_BENCHMARK_PRESUBMIT_GITHUB_ACTIONS # BEGIN BuildType.XLA_LINUX_X86_GPU_L4_48_VCPU_PRESUBMIT_GITHUB_ACTIONS nvidia-smi -parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --config=hermetic_cuda_umd --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu -bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --config=hermetic_cuda_umd --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu +parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --config=hermetic_cuda_umd --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu +bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --config=hermetic_cuda_umd --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/tools/multihost_hlo_runner:hlo_runner_main_gpu //xla/tools:compute_xspace_stats_main_gpu bazel analyze-profile profile.json.gz # END BuildType.XLA_LINUX_X86_GPU_L4_48_VCPU_PRESUBMIT_GITHUB_ACTIONS # BEGIN BuildType.XLA_LINUX_X86_GPU_L4_GITHUB_ACTIONS nvidia-smi -parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --config=hermetic_cuda_umd --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/... //build_tools/... @local_tsl//tsl/... -bazel test --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --config=hermetic_cuda_umd --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/... //build_tools/... @local_tsl//tsl/... +parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --config=hermetic_cuda_umd --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --nobuild -- //xla/... //build_tools/... @local_tsl//tsl/... +bazel test --build_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu --test_tag_filters=-no_oss,requires-gpu-nvidia,gpu,-rocm-only,-oneapi-only,-multi_gpu,requires-gpu-sm75-only,requires-gpu-sm60,requires-gpu-sm70,-requires-gpu-sm80,-requires-gpu-sm80-only,-requires-gpu-sm90,-requires-gpu-sm90-only,-requires-gpu-sm100,-requires-gpu-sm100-only,-requires-gpu-amd,-requires-gpu-intel --config=warnings --config=rbe_linux_cuda_nvcc --config=hermetic_cuda_umd --repo_env=TF_CUDA_COMPUTE_CAPABILITIES=7.5 --run_under=//build_tools/ci:parallel_gpu_execute --//xla/tsl:ci_build --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async -- //xla/... //build_tools/... @local_tsl//tsl/... bazel analyze-profile profile.json.gz # END BuildType.XLA_LINUX_X86_GPU_L4_GITHUB_ACTIONS # BEGIN BuildType.XLA_LINUX_X86_GPU_ONEAPI_GITHUB_ACTIONS @@ -104,7 +110,7 @@ bazel analyze-profile profile.json.gz df -h bazel --version mkdir -p /tmpfs/bazel_output -bazel test --build_tag_filters=-no_oss,-gpu,-no_mac,-mac_excluded,-requires-gpu-nvidia,-requires-gpu-amd,-requires-gpu-intel --test_tag_filters=-no_oss,-gpu,-no_mac,-mac_excluded,-requires-gpu-nvidia,-requires-gpu-amd,-requires-gpu-intel --config=nonccl --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --macos_minimum_os=10.15 --test_tmpdir=/tmpfs/bazel_output --test_size_filters=small,medium --define=xnn_enable_avxvnniint8=false --//xla/tsl:ci_build -- //xla/... -//xla/hlo/experimental/... -//xla/python_api/... -//xla/python/... -//xla/service/gpu/... +bazel test --build_tag_filters=-no_oss,-gpu,-no_mac,-mac_excluded,-requires-gpu-nvidia,-requires-gpu-amd,-requires-gpu-intel --test_tag_filters=-no_oss,-gpu,-no_mac,-mac_excluded,-requires-gpu-nvidia,-requires-gpu-amd,-requires-gpu-intel --config=nonccl --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --macos_minimum_os=10.15 --test_tmpdir=/tmpfs/bazel_output --test_size_filters=small,medium --//xla/tsl:ci_build -- //xla/... -//xla/hlo/experimental/... -//xla/python_api/... -//xla/python/... -//xla/service/gpu/... bazel analyze-profile profile.json.gz # END BuildType.XLA_MACOS_ARM64_CPU_KOKORO # BEGIN BuildType.XLA_MACOS_X86_CPU_KOKORO @@ -112,7 +118,7 @@ sudo wget --no-verbose -O /usr/local/bin/bazel https://github.com/bazelbuild/baz chmod +x /usr/local/bin/bazel bazel --version mkdir -p /Volumes/BuildData/bazel_output -bazel test --build_tag_filters=-no_oss,-gpu,-no_mac,-mac_excluded,-requires-gpu-nvidia,-requires-gpu-amd,-requires-gpu-intel --test_tag_filters=-no_oss,-gpu,-no_mac,-mac_excluded,-requires-gpu-nvidia,-requires-gpu-amd,-requires-gpu-intel --config=nonccl --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --macos_minimum_os=10.15 --test_tmpdir=/Volumes/BuildData/bazel_output --define=xnn_enable_avxvnniint8=false --//xla/tsl:ci_build -- //xla/... -//xla/hlo/experimental/... -//xla/python_api/... -//xla/python/... -//xla/service/gpu/... +bazel test --build_tag_filters=-no_oss,-gpu,-no_mac,-mac_excluded,-requires-gpu-nvidia,-requires-gpu-amd,-requires-gpu-intel --test_tag_filters=-no_oss,-gpu,-no_mac,-mac_excluded,-requires-gpu-nvidia,-requires-gpu-amd,-requires-gpu-intel --config=nonccl --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --macos_minimum_os=10.15 --test_tmpdir=/Volumes/BuildData/bazel_output --//xla/tsl:ci_build -- //xla/... -//xla/hlo/experimental/... -//xla/python_api/... -//xla/python/... -//xla/service/gpu/... bazel analyze-profile profile.json.gz # END BuildType.XLA_MACOS_X86_CPU_KOKORO # BEGIN BuildType.XLA_WINDOWS_X86_CPU_GITHUB_ACTIONS diff --git a/third_party/xla/build_tools/configure/configure.py b/third_party/xla/build_tools/configure/configure.py index 30729ca1031561..a54852db554b35 100755 --- a/third_party/xla/build_tools/configure/configure.py +++ b/third_party/xla/build_tools/configure/configure.py @@ -452,18 +452,6 @@ def to_bazelrc_lines( if dpav.clang_major_version and dpav.clang_major_version >= 19: self.compiler_options.append("-Wno-c23-extensions") - # Avoid XNNPACK using `-mavxvnniint8` (needs clang-16+/gcc-13+) - if ( - dpav.clang_major_version is not None and dpav.clang_major_version < 16 - ) or (dpav.gcc_major_version is not None and dpav.gcc_major_version < 13): - rc.append("build --define=xnn_enable_avxvnniint8=false") - - # Avoid XNNPACK using `-mavx512fp16` (needs clang-14+/gcc-12+). - if ( - dpav.clang_major_version is not None and dpav.clang_major_version < 14 - ) or (dpav.gcc_major_version is not None and dpav.gcc_major_version < 12): - rc.append("build --define=xnn_enable_avx512fp16=false") - rc.append(f"build --action_env PYTHON_BIN_PATH={self.python_bin_path}") rc.append(f"build --python_path {self.python_bin_path}") rc.append("test --test_env LD_LIBRARY_PATH") diff --git a/third_party/xla/build_tools/configure/testdata/gcc.bazelrc b/third_party/xla/build_tools/configure/testdata/gcc.bazelrc index 54545cbb9914bc..8eefec15ee8efb 100644 --- a/third_party/xla/build_tools/configure/testdata/gcc.bazelrc +++ b/third_party/xla/build_tools/configure/testdata/gcc.bazelrc @@ -1,6 +1,4 @@ build --action_env GCC_HOST_COMPILER_PATH=/usr/bin/gcc -build --define=xnn_enable_avxvnniint8=false -build --define=xnn_enable_avx512fp16=false build --action_env PYTHON_BIN_PATH=/usr/bin/python3 build --python_path /usr/bin/python3 test --test_env LD_LIBRARY_PATH diff --git a/third_party/xla/build_tools/configure/testdata/nvcc_gcc.bazelrc b/third_party/xla/build_tools/configure/testdata/nvcc_gcc.bazelrc index 3155b30218df08..373613415c1f7c 100644 --- a/third_party/xla/build_tools/configure/testdata/nvcc_gcc.bazelrc +++ b/third_party/xla/build_tools/configure/testdata/nvcc_gcc.bazelrc @@ -5,8 +5,6 @@ build:cuda --repo_env HERMETIC_CUDA_COMPUTE_CAPABILITIES=7.5 build:cuda --repo_env HERMETIC_CUDNN_VERSION="9.8.0" build --config nonccl build --action_env LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64 -build --define=xnn_enable_avxvnniint8=false -build --define=xnn_enable_avx512fp16=false build --action_env PYTHON_BIN_PATH=/usr/bin/python3 build --python_path /usr/bin/python3 test --test_env LD_LIBRARY_PATH diff --git a/third_party/xla/build_tools/dependencies/gen_disable_layering_check_patch.sh b/third_party/xla/build_tools/dependencies/gen_disable_layering_check_patch.sh new file mode 100755 index 00000000000000..f1d9f7d670eaa9 --- /dev/null +++ b/third_party/xla/build_tools/dependencies/gen_disable_layering_check_patch.sh @@ -0,0 +1,61 @@ +#!/bin/bash +# Copyright 2025 The OpenXLA Authors. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Generates a patch file that disables the layering check for all cc_library +# targets in the archive. Both BUILD and BUILD.bazel files are taken into account. +# +# The script takes one argument: the URL of the .tar.gz archive to download. +# +# The following tools are needed (need to be installed on the machine): +# - curl +# - git +# - buildozer (from Bazel buildtools) +# +# The tool has originally been written for ortools but should work for similarly structured +# projects as well. +# +# Example: +# build_tools/dependencies/gen_disable_layering_check_patch.sh \ +# https://github.com/google/or-tools/archive/v9.11.tar.gz \ +# > third_party/ortools/layering_check.patch + +set -euo pipefail + +readonly TMP_DIR=$(mktemp -d) +trap 'rm -rf -- $TMP_DIR' EXIT + +echo "Downloading archive $1..." >&2 +curl -Lqo "$TMP_DIR/archive.tar.gz" "$1" 1>&2 + +echo "Extracting archive..." >&2 +mkdir -p "$TMP_DIR/extracted" 1>&2 +tar -x -C "$TMP_DIR/extracted" -f "$TMP_DIR/archive.tar.gz" --strip-components=1 1>&2 + +echo "Initialzing temporary git repo..." >&2 +git -C "$TMP_DIR/extracted" init 1>&2 +git -C "$TMP_DIR/extracted" add . 1>&2 +git -C "$TMP_DIR/extracted" commit --no-verify -m "original state" -q 1>&2 + +echo "Patching build targets..." >&2 +find $TMP_DIR/extracted -name BUILD.bazel -or -name BUILD | while read f; do + buildozer 'add features "-layering_check"' $(dirname $f):%cc_library 1>&2 || exit_code=$? + if [[ $exit_code -ne 0 && $exit_code -ne 3 ]]; then + echo "Buildozer command failed with exit code: $exit_code" >&2 + exit $exit_code + fi +done + +echo "Generating diff..." >&2 +git -C "$TMP_DIR/extracted" --no-pager diff diff --git a/third_party/xla/build_tools/lint/generate_compile_commands.py b/third_party/xla/build_tools/lint/generate_compile_commands.py index ec9d6fe0d2037b..1c7e6f930931ed 100644 --- a/third_party/xla/build_tools/lint/generate_compile_commands.py +++ b/third_party/xla/build_tools/lint/generate_compile_commands.py @@ -67,7 +67,11 @@ def from_args_list(cls, args_list: list[str]) -> "CompileCommand": if arg.endswith(".cc"): cc_file = arg - filtered_args.append(arg) + # Split generated commands, because otherwise they get wrapped + # into "command with spaces" when passed to clangd, and clangd + # can't parse them correctly. + for s in arg.split(" "): + filtered_args.append(s) return cls(cc_file, filtered_args) diff --git a/third_party/xla/build_tools/pjrt_wheels/BUILD.bazel b/third_party/xla/build_tools/pjrt_wheels/BUILD.bazel index 5848bb71b1c20c..b9adc77a1690e8 100644 --- a/third_party/xla/build_tools/pjrt_wheels/BUILD.bazel +++ b/third_party/xla/build_tools/pjrt_wheels/BUILD.bazel @@ -1,6 +1,8 @@ load("@cuda_cudart//:version.bzl", cuda_major_version = "VERSION") load("@nightly_timestamp//:timestamp.bzl", "XLA_NIGHTLY_TIMESTAMP") load("@rc_number//:rc_number.bzl", "XLA_RC_NUMBER") +load("@rules_cc//cc:cc_binary.bzl", "cc_binary") +load("@rules_cc//cc:cc_test.bzl", "cc_test") load("@rules_python//python:packaging.bzl", "py_wheel") # This ensures we can only build plugins for selected CUDA versions. diff --git a/third_party/xla/docs/flags_guidance.md b/third_party/xla/docs/flags_guidance.md index f887b66c4fbc50..e2fff15a4cdcb1 100644 --- a/third_party/xla/docs/flags_guidance.md +++ b/third_party/xla/docs/flags_guidance.md @@ -79,22 +79,22 @@ data-parallel collectives (`xla_gpu_enable_pipelined_all_gather`, (`xla_gpu_enable_while_loop_double_buffering`), latency hiding scheduling (`xla_gpu_enable_latency_hiding_scheduler`), and SOL latency estimator on Hopper/Blackwell (`xla_gpu_enable_analytical_sol_latency_estimator`). See -[GPU Optimization Levels](https://openxla.org/xla/gpu_optimization_levels) for -details. - -| Flag | Type | Notes | -| :---- | :---- | :----- | -| `xla_gpu_enable_latency_hiding_scheduler` | Boolean (true/false) |This flag enables latency hiding schedulers to overlap asynchronous communication with computation efficiently. The default value is False. | -| `xla_gpu_enable_analytical_sol_latency_estimator` | Boolean (true/false) | Enables platform specific scheduling decisions, which in turn improve compute-communication overlap. The default value is true. | -| `xla_gpu_analytical_latency_estimator_options` | Structured string | Configures parameters for the `xla_gpu_enable_analytical_sol_latency_estimator`. Adjust by setting `nic_speed_gbps=$NIC_SPEED,nccl_op_launch_us=$LAUNCH_OVERHEAD,chunk_prep_us=$CHUNK_PREP,rtt_us=$RTT,chunk_size_bytes=$CHUNK_SIZE,gpus_per_node=$GPUS_PER_NODE`. The default value depends on a detected platform. | -| `xla_gpu_enable_triton_gemm` | Boolean (true/false) | Use Triton-based matrix multiplication. | -| `xla_gpu_enable_command_buffer` | List of CommandBufferCmdType | Which kind of commands should be captured in command buffers. | -| `xla_gpu_all_reduce_combine_threshold_bytes` | Integer (bytes) | These flags tune when to combine multiple small AllGather / ReduceScatter / AllReduce into one big AllGather / ReduceScatter / AllReduce to reduce time spent on cross-device communication. For example, for the AllGather / ReduceScatter thresholds on a Transformer-based workload, consider tuning them high enough so as to combine at least a Transformer Layer’s weight AllGather / ReduceScatter. By default, the combine_threshold_bytes is set to 256. | -| `xla_gpu_all_gather_combine_threshold_bytes` | Integer (bytes) | See xla_gpu_all_reduce_combine_threshold_bytes above. | -| `xla_gpu_reduce_scatter_combine_threshold_bytes` | Integer (bytes) | See xla_gpu_all_reduce_combine_threshold_bytes above. | -| `xla_gpu_enable_pipelined_all_gather` | Boolean (true/false) | Enable pipelinling of all-gather instructions. | -| `xla_gpu_enable_pipelined_reduce_scatter` | Boolean (true/false) | Enable pipelinling of reduce-scatter instructions. | -| `xla_gpu_enable_pipelined_all_reduce` | Boolean (true/false) | Enable pipelinling of all-reduce instructions. | -| `xla_gpu_enable_while_loop_double_buffering` | Boolean (true/false) | Enable double-buffering for while loop. | -| `xla_gpu_enable_all_gather_combine_by_dim` | Boolean (true/false) | Combine all-gather ops with the same gather dimension or irrespective of their dimension. | -| `xla_gpu_enable_reduce_scatter_combine_by_dim` | Boolean (true/false) | Combine reduce-scatter ops with the same dimension or irrespective of their dimension. | +[GPU Effort Levels](https://openxla.org/xla/effort_levels) for details. + +Flag | Type | Notes +:------------------------------------------------ | :--------------------------- | :---- +`xla_gpu_enable_latency_hiding_scheduler` | Boolean (true/false) | This flag enables latency hiding schedulers to overlap asynchronous communication with computation efficiently. The default value is False. +`xla_gpu_enable_analytical_sol_latency_estimator` | Boolean (true/false) | Enables platform specific scheduling decisions, which in turn improve compute-communication overlap. The default value is true. +`xla_gpu_analytical_latency_estimator_options` | Structured string | Configures parameters for the `xla_gpu_enable_analytical_sol_latency_estimator`. Adjust by setting `nic_speed_gbps=$NIC_SPEED,nccl_op_launch_us=$LAUNCH_OVERHEAD,chunk_prep_us=$CHUNK_PREP,rtt_us=$RTT,chunk_size_bytes=$CHUNK_SIZE,gpus_per_node=$GPUS_PER_NODE`. The default value depends on a detected platform. +`xla_gpu_enable_triton_gemm` | Boolean (true/false) | Use Triton-based matrix multiplication. +`xla_gpu_enable_command_buffer` | List of CommandBufferCmdType | Which kind of commands should be captured in command buffers. +`xla_gpu_all_reduce_combine_threshold_bytes` | Integer (bytes) | These flags tune when to combine multiple small AllGather / ReduceScatter / AllReduce into one big AllGather / ReduceScatter / AllReduce to reduce time spent on cross-device communication. For example, for the AllGather / ReduceScatter thresholds on a Transformer-based workload, consider tuning them high enough so as to combine at least a Transformer Layer’s weight AllGather / ReduceScatter. By default, the combine_threshold_bytes is set to 256. +`xla_gpu_all_gather_combine_threshold_bytes` | Integer (bytes) | See xla_gpu_all_reduce_combine_threshold_bytes above. +`xla_gpu_reduce_scatter_combine_threshold_bytes` | Integer (bytes) | See xla_gpu_all_reduce_combine_threshold_bytes above. +`xla_gpu_enable_pipelined_all_gather` | Boolean (true/false) | Enable pipelinling of all-gather instructions. +`xla_gpu_enable_pipelined_reduce_scatter` | Boolean (true/false) | Enable pipelinling of reduce-scatter instructions. +`xla_gpu_enable_pipelined_all_reduce` | Boolean (true/false) | Enable pipelinling of all-reduce instructions. +`xla_gpu_enable_pipelined_host_offloading` | Boolean (true/false) | Enable pipelining of host offloading instructions. +`xla_gpu_enable_while_loop_double_buffering` | Boolean (true/false) | Enable double-buffering for while loop. +`xla_gpu_enable_all_gather_combine_by_dim` | Boolean (true/false) | Combine all-gather ops with the same gather dimension or irrespective of their dimension. +`xla_gpu_enable_reduce_scatter_combine_by_dim` | Boolean (true/false) | Combine reduce-scatter ops with the same dimension or irrespective of their dimension. diff --git a/third_party/xla/docs/test_hlo_passes.md b/third_party/xla/docs/test_hlo_passes.md index 8afcf6bf773aaf..723406e74d991a 100644 --- a/third_party/xla/docs/test_hlo_passes.md +++ b/third_party/xla/docs/test_hlo_passes.md @@ -51,7 +51,7 @@ For example, some be written as follows: ``` -// RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb | FileCheck --check-prefixes=CHECK-%{PTX} %s +// RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../backends/gpu/specs/%{GPU}.txtpb | FileCheck --check-prefixes=CHECK-%{PTX} %s HloModule Test, is_scheduled=true fused_computation { diff --git a/third_party/xla/opensource_only.files b/third_party/xla/opensource_only.files index 4a78380bc9dd7d..a9bf5dcaee9f9f 100644 --- a/third_party/xla/opensource_only.files +++ b/third_party/xla/opensource_only.files @@ -1,6 +1,7 @@ tensorflow/compiler/xla/backends/cpu/nanort/package_groups.bzl: tensorflow/compiler/xla/backends/cpu/package_groups.bzl: tensorflow/compiler/xla/internal/package_groups.bzl: +tensorflow/compiler/xla/megascale/package_groups.bzl: tensorflow/compiler/xla/mlir_hlo/WORKSPACE: tensorflow/compiler/xla/package_groups.bzl: tensorflow/compiler/xla/pjrt/cpu/package_groups.bzl: @@ -156,5 +157,4 @@ xla/third_party/tensorrt/tensorrt/tensorrt_config.py.tpl: xla/third_party/tensorrt/tensorrt_configure.bzl: xla/third_party/tensorrt/workspace.bzl: xla/third_party/xla_googletest_wrapper/include/gmock/gmock.h: -xla/third_party/xla_googletest_wrapper/include/gtest/gtest.h: xla/third_party/zlib.BUILD: diff --git a/third_party/xla/tensorflow.bazelrc b/third_party/xla/tensorflow.bazelrc index 4d07e058d098cc..d9c9c10d9f3122 100644 --- a/third_party/xla/tensorflow.bazelrc +++ b/third_party/xla/tensorflow.bazelrc @@ -189,10 +189,11 @@ common:mkl_aarch64_threadpool -c opt # This is an alias for the mkl_aarch64_threadpool build. common:mkl_aarch64 --config=mkl_aarch64_threadpool -# Default CUDA, CUDNN and NVSHMEM versions. +# Default CUDA, CUDNN, NCCL and NVSHMEM versions. common:cuda_version --repo_env=HERMETIC_CUDA_VERSION="12.9.1" common:cuda_version --repo_env=HERMETIC_CUDNN_VERSION="9.8.0" common:cuda_version --repo_env=HERMETIC_NVSHMEM_VERSION="3.2.5" +common:cuda_version --repo_env=HERMETIC_NCCL_VERSION="2.27.7" # CUDA: This config refers to building CUDA op kernels with nvcc. common:cuda --repo_env TF_NEED_CUDA=1 @@ -280,8 +281,6 @@ common:rocm_base --copt=-Wno-gnu-offsetof-extensions common:rocm_base --crosstool_top=@local_config_rocm//crosstool:toolchain common:rocm_base --define=using_rocm_hipcc=true common:rocm_base --define=tensorflow_mkldnn_contraction_kernel=0 -common:rocm_base --define=xnn_enable_avxvnniint8=false -common:rocm_base --define=xnn_enable_avx512fp16=false common:rocm_base --repo_env TF_NEED_ROCM=1 common:rocm_clang_official --config=rocm_base @@ -295,7 +294,9 @@ common:rocm_ci --config=rocm common:rocm_ci_hermetic --dynamic_mode=off common:rocm_ci_hermetic --config=rocm_clang_official -common:rocm_ci_hermetic --repo_env="ROCM_DISTRO_VERSION=rocm_7.10.0_gfx94X" +common:rocm_ci_hermetic --repo_env="ROCM_DISTRO_URL=https://therock-nightly-tarball.s3.amazonaws.com/therock-dist-linux-gfx94X-dcgpu-7.10.0a20251107.tar.gz" +common:rocm_ci_hermetic --repo_env="ROCM_DISTRO_HASH=486dbf647bcf9b78f21d7477f43addc7b2075b1a322a119045db9cdc5eb98380" +common:rocm_ci_hermetic --repo_env="ROCM_DISTRO_LINKS=llvm/amdgcn:amdgcn" common:rocm_ci_hermetic --@local_config_rocm//rocm:rocm_path_type=hermetic # This config option is used for SYCL as GPU backend. @@ -538,12 +539,6 @@ common:rbe_linux_cpu_clang_local --extra_toolchains="@local_config_cuda//crossto common:rbe_linux_cpu_clang_local --repo_env=CC="/usr/lib/llvm-18/bin/clang" common:rbe_linux_cpu_clang_local --repo_env=TF_SYSROOT="/dt9" -# Download CUDA/CUDNN redistributions to preserve the repositories cache between -# CPU and GPU builds. -# TODO(ybaturina): Uncomment when RBE is ready to support this. -# common:rbe_linux_cpu --repo_env USE_CUDA_REDISTRIBUTIONS=1 -# common:rbe_linux_cpu --config=cuda_version - common:rbe_linux_cuda --config=cuda_clang_official common:rbe_linux_cuda --config=rbe_linux_cpu # dt9 is based on glibc 2.17, which is outdated and incompatible with CUDA 12.8.0 diff --git a/third_party/xla/third_party/boringssl.patch b/third_party/xla/third_party/boringssl.patch new file mode 100644 index 00000000000000..31433753e3abde --- /dev/null +++ b/third_party/xla/third_party/boringssl.patch @@ -0,0 +1,13 @@ +diff --git a/BUILD b/BUILD +index 206786442..3d1624382 100644 +--- a/BUILD ++++ b/BUILD +@@ -145,7 +145,7 @@ cc_library( + + cc_library( + name = "ssl", +- srcs = ssl_sources + ssl_internal_headers, ++ srcs = ssl_sources + ssl_internal_headers + crypto_internal_headers, + hdrs = ssl_headers, + copts = boringssl_copts_cxx, + includes = ["src/include"], diff --git a/third_party/xla/third_party/brotli/BUILD.bazel b/third_party/xla/third_party/brotli/BUILD.bazel new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/third_party/xla/third_party/brotli/workspace.bzl b/third_party/xla/third_party/brotli/workspace.bzl new file mode 100644 index 00000000000000..ec76237744b347 --- /dev/null +++ b/third_party/xla/third_party/brotli/workspace.bzl @@ -0,0 +1,11 @@ +"""Provides the repo macro to import brotli""" + +load("//third_party:repo.bzl", "tf_http_archive", "tf_mirror_urls") + +def repo(): + tf_http_archive( + name = "org_brotli", + sha256 = "e720a6ca29428b803f4ad165371771f5398faba397edf6778837a18599ea13ff", + strip_prefix = "brotli-1.1.0", + urls = tf_mirror_urls("https://github.com/google/brotli/archive/refs/tags/v1.1.0.tar.gz"), + ) diff --git a/third_party/xla/third_party/cudnn_frontend/workspace.bzl b/third_party/xla/third_party/cudnn_frontend/workspace.bzl index f954a198969f87..1bea852e045b88 100644 --- a/third_party/xla/third_party/cudnn_frontend/workspace.bzl +++ b/third_party/xla/third_party/cudnn_frontend/workspace.bzl @@ -7,7 +7,7 @@ def repo(): name = "cudnn_frontend_archive", build_file = "//third_party:cudnn_frontend.BUILD", patch_file = ["//third_party:cudnn_frontend_header_fix.patch"], - sha256 = "257b3b7f8a99abc096094abc9e5011659117b647d55293bcd2c5659f9181b99e", - strip_prefix = "cudnn-frontend-1.13.0", - urls = tf_mirror_urls("https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.13.0.zip"), + sha256 = "453d4650e6a25ede58fbbd7077c64ebe92734218d474ec7371bb13fa6d2181fa", + strip_prefix = "cudnn-frontend-1.16.1", + urls = tf_mirror_urls("https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.16.1.zip"), ) diff --git a/third_party/xla/third_party/curl.BUILD b/third_party/xla/third_party/curl.BUILD index cb33aa940fe48f..c1884050002a5e 100644 --- a/third_party/xla/third_party/curl.BUILD +++ b/third_party/xla/third_party/curl.BUILD @@ -442,6 +442,7 @@ cc_library( "@local_xla//xla/tsl:ios": [], "@local_xla//xla/tsl:windows": [], "//conditions:default": [ + "@boringssl//:crypto", "@boringssl//:ssl", ], }), diff --git a/third_party/xla/third_party/googletest/0001-Add-ASSERT_OK-EXPECT_OK-ASSERT_OK_AND_ASSIGN-macros.patch b/third_party/xla/third_party/googletest/0001-Add-ASSERT_OK-EXPECT_OK-ASSERT_OK_AND_ASSIGN-macros.patch new file mode 100644 index 00000000000000..501d22d00cb301 --- /dev/null +++ b/third_party/xla/third_party/googletest/0001-Add-ASSERT_OK-EXPECT_OK-ASSERT_OK_AND_ASSIGN-macros.patch @@ -0,0 +1,164 @@ +From 5c2d2d62a71fe19c92c6f807d533c0ea90c15f03 Mon Sep 17 00:00:00 2001 +From: Marcin Radomski +Date: Thu, 4 Dec 2025 15:37:45 +0000 +Subject: [PATCH 1/2] Add ASSERT_OK/EXPECT_OK/ASSERT_OK_AND_ASSIGN macros + +Create this patch with git diff -U2 / git format-patch -U2 to avoid +mismatches with googletest.patch. +--- + BUILD.bazel | 5 +- + googlemock/include/gmock/gmock.h | 1 + + .../include/gmock/internal/xla-gmock-macros.h | 118 ++++++++++++++++++ + 3 files changed, 123 insertions(+), 1 deletion(-) + create mode 100644 googlemock/include/gmock/internal/xla-gmock-macros.h + +diff --git a/BUILD.bazel b/BUILD.bazel +index 008af6a1..32d2a22c 100644 +--- a/BUILD.bazel ++++ b/BUILD.bazel +@@ -163,5 +163,8 @@ cc_library( + ], + "//conditions:default": [], +- }), ++ }) + [ ++ "@abseil-cpp//absl/status", ++ "@abseil-cpp//absl/status:statusor", ++ ], + ) + +diff --git a/googlemock/include/gmock/gmock.h b/googlemock/include/gmock/gmock.h +index c78fb8ee..69b33572 100644 +--- a/googlemock/include/gmock/gmock.h ++++ b/googlemock/include/gmock/gmock.h +@@ -95,3 +95,4 @@ GTEST_API_ void InitGoogleMock(); + } // namespace testing + ++#include "gmock/internal/xla-gmock-macros.h" + #endif // GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_H_ +diff --git a/googlemock/include/gmock/internal/xla-gmock-macros.h b/googlemock/include/gmock/internal/xla-gmock-macros.h +new file mode 100644 +index 00000000..b851bcca +--- /dev/null ++++ b/googlemock/include/gmock/internal/xla-gmock-macros.h +@@ -0,0 +1,118 @@ ++/* Copyright 2025 The Abseil Authors & TensorFlow Authors. All Rights Reserved. ++ ++Licensed under the Apache License, Version 2.0 (the "License"); ++you may not use this file except in compliance with the License. ++You may obtain a copy of the License at ++ ++ http://www.apache.org/licenses/LICENSE-2.0 ++ ++Unless required by applicable law or agreed to in writing, software ++distributed under the License is distributed on an "AS IS" BASIS, ++WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++See the License for the specific language governing permissions and ++limitations under the License. ++==============================================================================*/ ++ ++#ifndef GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_XLA_GMOCK_MACROS_H_ ++#define GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_XLA_GMOCK_MACROS_H_ ++ ++// gmock/gmock.h wrapper that also provides assert macros. ++// ++// These already exist in internal version of gmock, but upstream version ++// doesn't have them. We use this wrapper to make dependency translation when ++// exporting to OSS easier. ++// ++// - We want to use standard internal header and ASSERT_OK, EXPECT_OK macros ++// when developing internally. ++// - We want the same macros to work externally, rather than having to add or ++// strip TF_ prefix. ++// - We want the OSS export to still work after the export and header ++// translation. ++// - We want to minimize the amount of patching third party projects to reduce ++// maintenance overhead. ++// - To ensure the OSS patches cleanly apply onto internal repo, we need the ++// header translation to be reversible, which requires 1:1 header mapping. ++// ++// To achieve this, we add those macros to gmock for all XLA code, which ++// should (TM) make ASSERT_OK/EXPECT_OK "just work" in all XLA tests. ++// ++// absl/status/status_matchers.h depends on gmock.h, so we can't simply add it ++// here. This causes a circular dependency between this and absl - which bazel ++// doesn't allow. ++ ++#include "absl/status/status.h" ++#include "absl/status/statusor.h" ++ ++// Macros for testing the results of functions that return absl::Status or ++// absl::StatusOr (for any type T). ++#define EXPECT_OK(expression) \ ++ EXPECT_THAT(expression, ::xla_testing::internal::IsOk()) ++#define ASSERT_OK(expression) \ ++ ASSERT_THAT(expression, ::xla_testing::internal::IsOk()) ++ ++#define ASSERT_OK_AND_ASSIGN(lhs, rexpr) \ ++ ASSERT_OK_AND_ASSIGN_IMPL( \ ++ XLA_STATUS_MACROS_CONCAT_NAME(_status_or_value, __COUNTER__), \ ++ lhs, rexpr); ++ ++#define ASSERT_OK_AND_ASSIGN_IMPL(statusor, lhs, rexpr) \ ++ auto statusor = (rexpr); \ ++ ASSERT_OK(statusor.status()); \ ++ lhs = std::move(statusor).value() ++ ++#define XLA_STATUS_MACROS_CONCAT_NAME(x, y) XLA_STATUS_MACROS_CONCAT_IMPL(x, y) ++#define XLA_STATUS_MACROS_CONCAT_IMPL(x, y) x##y ++ ++namespace xla_testing { ++namespace internal { ++ ++// DO NOT USE DIRECTLY. Use absl/status/status_matchers.h instead. ++inline const absl::Status& GetStatus(const absl::Status& status) { ++ return status; ++} ++ ++// DO NOT USE DIRECTLY. Use absl/status/status_matchers.h instead. ++template ++inline const absl::Status& GetStatus(const absl::StatusOr& status) { ++ return status.status(); ++} ++ ++// DO NOT USE DIRECTLY. Use absl/status/status_matchers.h instead. ++// ++// Monomorphic implementation of matcher IsOk() for a given type T. ++// T can be Status, StatusOr<>, or a reference to either of them. ++template ++class MonoIsOkMatcherImpl : public ::testing::MatcherInterface { ++ public: ++ void DescribeTo(std::ostream* os) const override { *os << "is OK"; } ++ void DescribeNegationTo(std::ostream* os) const override { ++ *os << "is not OK"; ++ } ++ bool MatchAndExplain(T actual_value, ++ ::testing::MatchResultListener*) const override { ++ return GetStatus(actual_value).ok(); ++ } ++}; ++ ++// DO NOT USE DIRECTLY. Use absl/status/status_matchers.h instead. ++// ++// Implements IsOk() as a polymorphic matcher. ++class IsOkMatcher { ++ public: ++ template ++ /*implicit*/ operator ::testing::Matcher() const { // NOLINT ++ return ::testing::Matcher(new MonoIsOkMatcherImpl()); ++ } ++}; ++ ++// DO NOT USE DIRECTLY. Use absl/status/status_matchers.h instead. ++// ++// Returns a gMock matcher that matches a Status or StatusOr<> which is OK. ++inline ::xla_testing::internal::IsOkMatcher IsOk() { ++ return ::xla_testing::internal::IsOkMatcher(); ++} ++ ++} // namespace internal ++} // namespace xla_testing ++ ++#endif // GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_XLA_GMOCK_MACROS_H_ +-- +2.52.0.223.gf5cc29aaa4-goog + diff --git a/third_party/xla/third_party/googletest/0002-Rename-dependencies-for-workspace.bzl-build.patch b/third_party/xla/third_party/googletest/0002-Rename-dependencies-for-workspace.bzl-build.patch new file mode 100644 index 00000000000000..93fa7f98c1b156 --- /dev/null +++ b/third_party/xla/third_party/googletest/0002-Rename-dependencies-for-workspace.bzl-build.patch @@ -0,0 +1,30 @@ +From 21affdb9aaa50767264c13d607d47cb2104c4e4a Mon Sep 17 00:00:00 2001 +From: Marcin Radomski +Date: Tue, 9 Dec 2025 18:23:26 +0000 +Subject: [PATCH 2/2] Rename dependencies for workspace.bzl build + +Must be separate from googletest.patch, because: +- Tensorflow applies googletest.patch only +- XLA bzlmod build applies patch that adds assert macros only, and + needs different repository name in deps +- XLA workspace.bzl build applies everything +--- + BUILD.bazel | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/BUILD.bazel b/BUILD.bazel +index 32d2a22c..a122fa28 100644 +--- a/BUILD.bazel ++++ b/BUILD.bazel +@@ -164,6 +164,6 @@ cc_library( + "//conditions:default": [], + }) + [ +- "@abseil-cpp//absl/status", +- "@abseil-cpp//absl/status:statusor", ++ "@com_google_absl//absl/status", ++ "@com_google_absl//absl/status:statusor", + ], + ) +-- +2.52.0.223.gf5cc29aaa4-goog + diff --git a/third_party/xla/third_party/googletest/README.add-status-macros.md b/third_party/xla/third_party/googletest/README.add-status-macros.md new file mode 100644 index 00000000000000..8a169b6f6031d5 --- /dev/null +++ b/third_party/xla/third_party/googletest/README.add-status-macros.md @@ -0,0 +1,41 @@ +add-status-macros.patch adds `ASSERT_OK`, `EXPECT_OK`, `ASSERT_OK_AND_ASSIGN` +to gmock.h so that the header's provided functionality matches internal gmock. + +What other things have we tried? + +1. Introducing a custom header to be used in OSS instead of `gmock/gmock.h`. + + The export-to-OSS process imposes a few restrictions. Notably, header + rewrite has to be reversible, so we need a 1:1 mapping between headers used + internally and in OSS. + + If we introduced a custom header to be used in OSS instead of gmock, it + would have to take the place of current rewrite of internal gmock to + `gmock/gmock.h`. This means, any use of `gmock/gmock.h` in OSS XLA code can + no longer map to internal gmock. We'd have to ban the header. + + Therefore, updating OSS `gmock/gmock.h` seems necessary. + +2. Patching in the extra macros to `gmock/gmock.h` by including + `absl/status/status_macros.h`. + + This introduces a circular dependency between absl and gmock which makes + bazel strongly opposed to the idea. + +3. Introducing a googletest bazel module wrapper. + + This would be a module would proxy all `gmock/gmock.h` within XLA without + additional patching of googletest. However, having multiple sources of the + same gmock/gmock.h header path only works *sometimes*. The order of include + paths emitted by bazel depends on the target definition and ordering of + dependencies, so it ends up working in some case and not in others. + +4. Expanding 3. by renaming googletest's `gmock.h` to `gmock.upstream.h` to + avoid header name conflicts. + + `gmock/gmock.h` is also included by googletest itself, so redirecting it to + `gmock/gmock.upstream.h` is needed. That boils down to even more brittle + patching. + +Overall, the add-status-macros.patch change is the least invasive one that +works. diff --git a/third_party/xla/third_party/googletest/googletest.patch b/third_party/xla/third_party/googletest/googletest.patch index 7e6e300ed273a1..b9f95d91084e6d 100644 --- a/third_party/xla/third_party/googletest/googletest.patch +++ b/third_party/xla/third_party/googletest/googletest.patch @@ -2,40 +2,14 @@ diff --git a/BUILD.bazel b/BUILD.bazel index cc254457..49120384 100644 --- a/BUILD.bazel +++ b/BUILD.bazel -@@ -142,16 +142,16 @@ cc_library( - }), - deps = select({ - ":has_absl": [ -- "@abseil-cpp//absl/container:flat_hash_set", -- "@abseil-cpp//absl/debugging:failure_signal_handler", -- "@abseil-cpp//absl/debugging:stacktrace", -- "@abseil-cpp//absl/debugging:symbolize", -- "@abseil-cpp//absl/flags:flag", -- "@abseil-cpp//absl/flags:parse", -- "@abseil-cpp//absl/flags:reflection", -- "@abseil-cpp//absl/flags:usage", -- "@abseil-cpp//absl/strings", -- "@re2", -+ "@com_google_absl//absl/container:flat_hash_set", -+ "@com_google_absl//absl/debugging:failure_signal_handler", -+ "@com_google_absl//absl/debugging:stacktrace", -+ "@com_google_absl//absl/debugging:symbolize", -+ "@com_google_absl//absl/flags:flag", -+ "@com_google_absl//absl/flags:parse", -+ "@com_google_absl//absl/flags:reflection", -+ "@com_google_absl//absl/flags:usage", -+ "@com_google_absl//absl/strings", -+ "@com_googlesource_code_re2//:re2", - ], +@@ -178,6 +178,10 @@ alias( + cc_library( + name = "gtest_main", + srcs = ["googlemock/src/gmock_main.cc"], ++ hdrs = glob([ ++ "googletest/include/gtest/*.h", ++ "googlemock/include/gmock/*.h", ++ ]), + features = select({ + ":windows": ["windows_export_all_symbols"], "//conditions:default": [], - }) + select({ -@@ -160,9 +160,6 @@ cc_library( - # so that's why these libraries are needed. - # Otherwise, builds targeting Fuchsia would fail to compile. - ":fuchsia": [ -- "@fuchsia_sdk//pkg/fdio", -- "@fuchsia_sdk//pkg/syslog", -- "@fuchsia_sdk//pkg/zx", - ], - "//conditions:default": [], - }), diff --git a/third_party/xla/third_party/gpus/crosstool/cc_toolchain_config.bzl.tpl b/third_party/xla/third_party/gpus/crosstool/cc_toolchain_config.bzl.tpl index ffa305c772e881..e9da7383842473 100644 --- a/third_party/xla/third_party/gpus/crosstool/cc_toolchain_config.bzl.tpl +++ b/third_party/xla/third_party/gpus/crosstool/cc_toolchain_config.bzl.tpl @@ -16,6 +16,7 @@ load( "with_feature_set", ) load("@bazel_tools//tools/build_defs/cc:action_names.bzl", "ACTION_NAMES") +load("@rules_cc//cc/toolchains:cc_toolchain_config_info.bzl", "CcToolchainConfigInfo") def all_assembly_actions(): return [ diff --git a/third_party/xla/third_party/gpus/crosstool/hipcc_cc_toolchain_config.bzl.tpl b/third_party/xla/third_party/gpus/crosstool/hipcc_cc_toolchain_config.bzl.tpl index e5a942b66c17fc..a97202d8e9fb61 100644 --- a/third_party/xla/third_party/gpus/crosstool/hipcc_cc_toolchain_config.bzl.tpl +++ b/third_party/xla/third_party/gpus/crosstool/hipcc_cc_toolchain_config.bzl.tpl @@ -11,6 +11,7 @@ load( "with_feature_set", ) load("@bazel_tools//tools/build_defs/cc:action_names.bzl", "ACTION_NAMES") +load("@rules_cc//cc/toolchains:cc_toolchain_config_info.bzl", "CcToolchainConfigInfo") all_compile_actions = [ ACTION_NAMES.c_compile, diff --git a/third_party/xla/third_party/gpus/crosstool/sycl_cc_toolchain_config.bzl.tpl b/third_party/xla/third_party/gpus/crosstool/sycl_cc_toolchain_config.bzl.tpl index 5d0295a6ee448b..e754300e3dbc9d 100644 --- a/third_party/xla/third_party/gpus/crosstool/sycl_cc_toolchain_config.bzl.tpl +++ b/third_party/xla/third_party/gpus/crosstool/sycl_cc_toolchain_config.bzl.tpl @@ -16,6 +16,7 @@ load( "with_feature_set", ) load("@bazel_tools//tools/build_defs/cc:action_names.bzl", "ACTION_NAMES") +load("@rules_cc//cc/toolchains:cc_toolchain_config_info.bzl", "CcToolchainConfigInfo") def all_assembly_actions(): return [ diff --git a/third_party/xla/third_party/gpus/cuda/build_defs.bzl.tpl b/third_party/xla/third_party/gpus/cuda/build_defs.bzl.tpl index 40ca4a62607cda..3ee6d2d348b2fc 100644 --- a/third_party/xla/third_party/gpus/cuda/build_defs.bzl.tpl +++ b/third_party/xla/third_party/gpus/cuda/build_defs.bzl.tpl @@ -1,3 +1,5 @@ +load("@rules_cc//cc:cc_library.bzl", "cc_library") + # Macros for building CUDA code. def if_cuda(if_true, if_false = []): """Shorthand for select()'ing on whether we're building with CUDA. @@ -137,7 +139,7 @@ def cuda_header_library( target without virtual includes. This works around the fact that bazel can't mix 'includes' and 'include_prefix' in the same target.""" - native.cc_library( + cc_library( name = name + "_virtual", hdrs = hdrs, include_prefix = include_prefix, @@ -146,7 +148,7 @@ def cuda_header_library( visibility = ["//visibility:private"], ) - native.cc_library( + cc_library( name = name, textual_hdrs = hdrs, deps = deps + [":%s_virtual" % name], @@ -160,7 +162,7 @@ def cuda_library(copts = [], tags = [], deps = [], **kwargs): # "use of the "register" storage class specifier is not allowed" error. # This can and should be removed once we migrate on glibc-2.27 or newer. local_defines = kwargs.pop("local_defines", []) + ["register="] - native.cc_library( + cc_library( copts = cuda_default_copts() + copts, tags = tags + [ "gpu", diff --git a/third_party/xla/third_party/gpus/rocm/BUILD.tpl b/third_party/xla/third_party/gpus/rocm/BUILD.tpl index c95f9a95933fbc..de7d5421af6ffa 100644 --- a/third_party/xla/third_party/gpus/rocm/BUILD.tpl +++ b/third_party/xla/third_party/gpus/rocm/BUILD.tpl @@ -105,6 +105,7 @@ cc_library( ":hip", ":hipblas", ":hipblaslt", + ":hipfft", ":hiprand", ":hipsolver", ":hipsparse", @@ -116,12 +117,7 @@ cc_library( ":rocsolver", ":rocsparse", ":roctracer", - ] + select_threshold( - above_or_eq = [":hipfft"], - below = [":rocfft"], - threshold = 40100, - value = rocm_version_number(), - ), + ], ) cc_library( @@ -150,9 +146,11 @@ cc_library( ], ":multiple_rocm_paths": [ "-Wl,-rpath=%{rocm_lib_paths}", + "-Lexternal/local_config_rocm/rocm/%{rocm_root}/lib", ], "//conditions:default": [ "-Wl,-rpath,/opt/rocm/lib", + "-Lexternal/local_config_rocm/rocm/%{rocm_root}/lib", ], }), visibility = ["//visibility:public"], @@ -410,11 +408,15 @@ cc_library( cc_library( name = "rocsolver", hdrs = glob(["%{rocm_root}/include/rocsolver/**"]), - data = glob(["%{rocm_root}/lib/librocsolver*.so*"]), + data = glob([ + "%{rocm_root}/lib/librocsolver*.so*", + "%{rocm_root}/lib/host-math/lib/*.so*", + ]), include_prefix = "rocm", includes = [ "%{rocm_root}/include/", ], + linkopts = ["-lrocsolver"], strip_include_prefix = "%{rocm_root}", visibility = ["//visibility:public"], deps = [ @@ -425,14 +427,18 @@ cc_library( cc_library( name = "rocsparse", - srcs = glob(["%{rocm_root}/lib/librocsparse*.so*"]), + data = glob(["%{rocm_root}/lib/librocsparse*.so*"]), include_prefix = "rocm", includes = [ "%{rocm_root}/include/", ], + linkopts = ["-lrocsparse"], strip_include_prefix = "%{rocm_root}", visibility = ["//visibility:public"], - deps = [":rocm_config"], + deps = [ + ":rocm_config", + ":rocm_rpath", + ], ) cc_library( @@ -443,9 +449,14 @@ cc_library( includes = [ "%{rocm_root}/include/", ], + linkopts = ["-lhipsolver"], strip_include_prefix = "%{rocm_root}", visibility = ["//visibility:public"], - deps = [":rocm_config"], + deps = [ + ":rocm_config", + ":rocm_rpath", + ":rocsparse", + ], ) cc_library( @@ -456,6 +467,7 @@ cc_library( includes = [ "%{rocm_root}/include/", ], + linkopts = ["-lhipblas"], strip_include_prefix = "%{rocm_root}", visibility = ["//visibility:public"], deps = [ @@ -533,7 +545,8 @@ cc_library( ) cc_library( - name = "amd_comgr", + name = "amd_comgr_dynamic", + srcs = ["%{rocm_root}/lib/libamd_comgr_stub.a"], hdrs = glob(["%{rocm_root}/include/amd_comgr/**"]), data = glob([ "%{rocm_root}/lib/libamd_comgr_loader.so*", @@ -544,12 +557,7 @@ cc_library( includes = [ "%{rocm_root}/include", ], - linkopts = select({ - ":build_hermetic": [ - "-lamd_comgr_loader", - ], - "//conditions:default": [], - }), + linkopts = ["-lamd_comgr_loader"], strip_include_prefix = "%{rocm_root}", deps = [ ":rocm_config", @@ -558,6 +566,35 @@ cc_library( ], ) +cc_library( + name = "amd_comgr_static", + hdrs = glob(["%{rocm_root}/include/amd_comgr/**"]), + data = glob([ + "%{rocm_root}/lib/libamd_comgr.so*", + ]), + include_prefix = "rocm", + includes = [ + "%{rocm_root}/include", + ], + linkopts = ["-lamd_comgr"], + strip_include_prefix = "%{rocm_root}", + deps = [ + ":rocm_config", + ":rocm_rpath", + ":system_libs", + ], +) + +alias( + name = "amd_comgr", + actual = select_threshold( + above_or_eq = ":amd_comgr_dynamic", + below = ":amd_comgr_static", + threshold = 71000, + value = rocm_version_number(), + ), +) + cc_library( name = "rocm_smi", srcs = glob([ diff --git a/third_party/xla/third_party/gpus/rocm/build_defs.bzl.tpl b/third_party/xla/third_party/gpus/rocm/build_defs.bzl.tpl index a690f767d8dbd5..d04a045907f274 100644 --- a/third_party/xla/third_party/gpus/rocm/build_defs.bzl.tpl +++ b/third_party/xla/third_party/gpus/rocm/build_defs.bzl.tpl @@ -1,3 +1,5 @@ +load("@rules_cc//cc:cc_library.bzl", "cc_library") + # Macros for building ROCm code. def if_rocm(if_true, if_false = []): """Shorthand for select()'ing on whether we're building with ROCm. @@ -80,7 +82,7 @@ def rocm_library(copts = [], deps = [], **kwargs): """Wrapper over cc_library which adds default ROCm options.""" if "@local_config_rocm//rocm:rocm_headers" not in deps: deps.append("@local_config_rocm//rocm:rocm_headers") - native.cc_library(copts = rocm_default_copts() + copts, deps = deps, **kwargs) + cc_library(copts = rocm_default_copts() + copts, deps = deps, **kwargs) def get_rbe_amdgpu_pool(is_single_gpu = False): return "%{single_gpu_rbe_pool}" if is_single_gpu else "%{multi_gpu_rbe_pool}" diff --git a/third_party/xla/third_party/gpus/rocm/rocm_redist.bzl b/third_party/xla/third_party/gpus/rocm/rocm_redist.bzl index 0628122609f8a2..6f7db647259a84 100644 --- a/third_party/xla/third_party/gpus/rocm/rocm_redist.bzl +++ b/third_party/xla/third_party/gpus/rocm/rocm_redist.bzl @@ -39,3 +39,22 @@ rocm_redist = { rocm_root = "_rocm_sdk_devel", ), } + +def _parse_rocm_distro_links(distro_links): + result = [] + for pair in distro_links.split(","): + link = pair.split(":") + result.append(struct(target = link[0], link = link[1])) + return result + +def create_rocm_distro(distro_url, distro_hash, symlinks): + return struct( + packages = [ + { + "url": distro_url, + "sha256": distro_hash, + }, + ], + required_softlinks = _parse_rocm_distro_links(symlinks), + rocm_root = "", + ) diff --git a/third_party/xla/third_party/gpus/rocm_configure.bzl b/third_party/xla/third_party/gpus/rocm_configure.bzl index 0e0240f00ee501..0f7d57ccbce5f7 100644 --- a/third_party/xla/third_party/gpus/rocm_configure.bzl +++ b/third_party/xla/third_party/gpus/rocm_configure.bzl @@ -16,6 +16,7 @@ load("@bazel_skylib//lib:paths.bzl", "paths") load( "//third_party/gpus/rocm:rocm_redist.bzl", + "create_rocm_distro", "rocm_redist", ) load( @@ -53,6 +54,9 @@ _TF_ROCM_AMDGPU_TARGETS = "TF_ROCM_AMDGPU_TARGETS" _TF_ROCM_CONFIG_REPO = "TF_ROCM_CONFIG_REPO" _DISTRIBUTION_PATH = "rocm/rocm_dist" _ROCM_DISTRO_VERSION = "ROCM_DISTRO_VERSION" +_ROCM_DISTRO_URL = "ROCM_DISTRO_URL" +_ROCM_DISTRO_HASH = "ROCM_DISTRO_HASH" +_ROCM_DISTRO_LINKS = "ROCM_DISTRO_LINKS" _TMPDIR = "TMPDIR" _DEFAULT_ROCM_TOOLKIT_PATH = "/opt/rocm" @@ -553,23 +557,36 @@ def _remove_root_dir(path, root_dir): return path[len(root_dir) + 1:] return path +def _setup_rocm_distro_dir_impl(repository_ctx, rocm_distro): + repository_ctx.file("rocm/.index") + for pkg in rocm_distro.packages: + _download_package(repository_ctx, pkg) + + for entry in rocm_distro.required_softlinks: + repository_ctx.symlink( + "{}/{}".format(_DISTRIBUTION_PATH, entry.target), + "{}/{}".format(_DISTRIBUTION_PATH, entry.link), + ) + bash_bin = get_bash_bin(repository_ctx) + return _get_rocm_config(repository_ctx, bash_bin, _canonical_path("{}/{}".format(_DISTRIBUTION_PATH, rocm_distro.rocm_root)), "") + def _setup_rocm_distro_dir(repository_ctx): """Sets up the rocm hermetic installation directory to be used in hermetic build""" bash_bin = get_bash_bin(repository_ctx) + rocm_distro_url = repository_ctx.os.environ.get(_ROCM_DISTRO_URL) + if rocm_distro_url: + rocm_distro_hash = repository_ctx.os.environ.get(_ROCM_DISTRO_HASH) + if not rocm_distro_hash: + fail("{} environment variable is required", _ROCM_DISTRO_HASH) + rocm_distro_links = repository_ctx.os.environ.get(_ROCM_DISTRO_LINKS, "") + rocm_distro = create_rocm_distro(rocm_distro_url, rocm_distro_hash, rocm_distro_links) + return _setup_rocm_distro_dir_impl(repository_ctx, rocm_distro) + rocm_distro = repository_ctx.os.environ.get(_ROCM_DISTRO_VERSION) multiple_paths = repository_ctx.os.environ.get(_TF_ROCM_MULTIPLE_PATHS) if rocm_distro: redist = rocm_redist[rocm_distro] - repository_ctx.file("rocm/.index") - for pkg in redist.packages: - _download_package(repository_ctx, pkg) - - for entry in redist.required_softlinks: - repository_ctx.symlink( - "{}/{}".format(_DISTRIBUTION_PATH, entry.target), - "{}/{}".format(_DISTRIBUTION_PATH, entry.link), - ) - return _get_rocm_config(repository_ctx, bash_bin, _canonical_path("{}/{}".format(_DISTRIBUTION_PATH, redist.rocm_root)), "") + return _setup_rocm_distro_dir_impl(repository_ctx, rocm_distro) elif multiple_paths: paths_list = multiple_paths.split(":") for rocm_custom_path in paths_list: @@ -866,6 +883,9 @@ _ENVIRONS = [ _ROCM_TOOLKIT_PATH, _TF_ROCM_AMDGPU_TARGETS, _ROCM_DISTRO_VERSION, + _ROCM_DISTRO_URL, + _ROCM_DISTRO_HASH, + _ROCM_DISTRO_LINKS, _TF_ROCM_RBE_DOCKER_IMAGE, _TF_ROCM_RBE_SINGLE_GPU_POOL, _TF_ROCM_RBE_MULTI_GPU_POOL, diff --git a/third_party/xla/third_party/gpus/sycl/build_defs.bzl.tpl b/third_party/xla/third_party/gpus/sycl/build_defs.bzl.tpl index 8b4324dcc8c9da..debfd5d27639f7 100644 --- a/third_party/xla/third_party/gpus/sycl/build_defs.bzl.tpl +++ b/third_party/xla/third_party/gpus/sycl/build_defs.bzl.tpl @@ -1,3 +1,5 @@ +load("@rules_cc//cc:cc_library.bzl", "cc_library") + # Macros for building SYCL code. def if_sycl(if_true, if_false = []): """Shorthand for select()'ing on whether we're building with SYCL. @@ -40,7 +42,7 @@ def if_sycl_build_is_configured(x, y): def sycl_library(copts = [], linkopts = [], tags = [], deps = [], **kwargs): """Wrapper over cc_library which adds default SYCL options.""" - native.cc_library(copts = sycl_default_copts() + copts, + cc_library(copts = sycl_default_copts() + copts, linkopts = sycl_default_linkopts() + linkopts, tags = tags + ["gpu"], deps = deps + if_sycl_is_configured([ diff --git a/third_party/xla/third_party/highwayhash/highwayhash.BUILD b/third_party/xla/third_party/highwayhash/highwayhash.BUILD index 0314bd443f2617..2c409c8eb8597e 100644 --- a/third_party/xla/third_party/highwayhash/highwayhash.BUILD +++ b/third_party/xla/third_party/highwayhash/highwayhash.BUILD @@ -255,6 +255,7 @@ cc_library( deps = [ ":arch_specific", ":compiler_specific", + ":endianess", ":hh_types", ":iaca", ":load3", diff --git a/third_party/xla/third_party/implib_so/BUILD.bazel b/third_party/xla/third_party/implib_so/BUILD.bazel index ca6976cd8d3425..1cb7282ea89d71 100644 --- a/third_party/xla/third_party/implib_so/BUILD.bazel +++ b/third_party/xla/third_party/implib_so/BUILD.bazel @@ -1,3 +1,5 @@ +load("@rules_python//python:defs.bzl", "py_binary") + package(default_visibility = ["//visibility:public"]) licenses(["notice"]) # MIT diff --git a/third_party/xla/third_party/llvm/generated.patch b/third_party/xla/third_party/llvm/generated.patch index 509398da979e83..f82404ca1cbe14 100644 --- a/third_party/xla/third_party/llvm/generated.patch +++ b/third_party/xla/third_party/llvm/generated.patch @@ -1 +1,152 @@ Auto generated patch. Do not edit or delete it, even if empty. +diff -ruN --strip-trailing-cr a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp +--- a/clang/lib/Serialization/ASTReaderDecl.cpp ++++ b/clang/lib/Serialization/ASTReaderDecl.cpp +@@ -2107,8 +2107,9 @@ + auto *Def = DD.Definition; + DD = std::move(MergeDD); + DD.Definition = Def; +- for (auto *D : Def->redecls()) +- cast(D)->DefinitionData = ⅅ ++ for (auto *R = Reader.getMostRecentExistingDecl(Def); R; ++ R = R->getPreviousDecl()) ++ cast(R)->DefinitionData = ⅅ + return; + } + +diff -ruN --strip-trailing-cr a/libc/src/__support/FPUtil/x86_64/fenv_mxcsr_utils.h b/libc/src/__support/FPUtil/x86_64/fenv_mxcsr_utils.h +--- a/libc/src/__support/FPUtil/x86_64/fenv_mxcsr_utils.h ++++ b/libc/src/__support/FPUtil/x86_64/fenv_mxcsr_utils.h +@@ -61,14 +61,14 @@ + LIBC_INLINE static void write_mxcsr(uint32_t w) { _mm_setcsr(w); } + + LIBC_INLINE static void clear_except(uint16_t excepts) { +- uint32_t mxcsr = _MM_GET_EXCEPTION_STATE(); ++ uint32_t mxcsr = get_mxcsr(); + mxcsr &= ~static_cast(excepts); +- _MM_SET_EXCEPTION_STATE(mxcsr); ++ write_mxcsr(mxcsr); + } + + LIBC_INLINE static uint16_t test_except(uint16_t excepts) { + uint32_t mxcsr = get_mxcsr(); +- return static_cast(excepts & mxcsr); ++ return static_cast(excepts & ExceptionFlags::ALL_F & mxcsr); + } + + LIBC_INLINE static uint16_t get_except() { +@@ -83,9 +83,9 @@ + } + + LIBC_INLINE static void raise_except(uint16_t excepts) { +- uint32_t mxcsr = _MM_GET_EXCEPTION_STATE(); +- mxcsr |= excepts; +- _MM_SET_EXCEPTION_STATE(mxcsr); ++ uint32_t mxcsr = get_mxcsr(); ++ mxcsr |= excepts & ExceptionFlags::ALL_F; ++ write_mxcsr(mxcsr); + #ifdef LIBC_TRAP_ON_RAISE_FP_EXCEPT + // We will try to trigger the SIGFPE if floating point exceptions are not + // masked. Since we already set all the floating point exception flags, we +diff -ruN --strip-trailing-cr a/libcxx/include/__flat_map/flat_map.h b/libcxx/include/__flat_map/flat_map.h +--- a/libcxx/include/__flat_map/flat_map.h ++++ b/libcxx/include/__flat_map/flat_map.h +@@ -465,13 +465,13 @@ + } + + // [flat.map.access], element access +- [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 mapped_type& operator[](const key_type& __x) ++ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 mapped_type& operator[](const key_type& __x) + requires is_constructible_v + { + return try_emplace(__x).first->second; + } + +- [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 mapped_type& operator[](key_type&& __x) ++ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 mapped_type& operator[](key_type&& __x) + requires is_constructible_v + { + return try_emplace(std::move(__x)).first->second; +@@ -480,7 +480,7 @@ + template + requires(__is_compare_transparent && is_constructible_v && is_constructible_v && + !is_convertible_v<_Kp &&, const_iterator> && !is_convertible_v<_Kp &&, iterator>) +- [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 mapped_type& operator[](_Kp&& __x) { ++ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 mapped_type& operator[](_Kp&& __x) { + return try_emplace(std::forward<_Kp>(__x)).first->second; + } + +diff -ruN --strip-trailing-cr a/libcxx/include/map b/libcxx/include/map +--- a/libcxx/include/map ++++ b/libcxx/include/map +@@ -1092,9 +1092,9 @@ + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __tree_.size(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __tree_.max_size(); } + +- [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI mapped_type& operator[](const key_type& __k); ++ _LIBCPP_HIDE_FROM_ABI mapped_type& operator[](const key_type& __k); + # ifndef _LIBCPP_CXX03_LANG +- [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI mapped_type& operator[](key_type&& __k); ++ _LIBCPP_HIDE_FROM_ABI mapped_type& operator[](key_type&& __k); + # endif + + template = 20 + +- [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI mapped_type& operator[](const key_type& __k); ++ _LIBCPP_HIDE_FROM_ABI mapped_type& operator[](const key_type& __k); + # ifndef _LIBCPP_CXX03_LANG +- [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI mapped_type& operator[](key_type&& __k); ++ _LIBCPP_HIDE_FROM_ABI mapped_type& operator[](key_type&& __k); + # endif + + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI mapped_type& at(const key_type& __k); +diff -ruN --strip-trailing-cr a/libcxx/test/libcxx/diagnostics/flat_map.nodiscard.verify.cpp b/libcxx/test/libcxx/diagnostics/flat_map.nodiscard.verify.cpp +--- a/libcxx/test/libcxx/diagnostics/flat_map.nodiscard.verify.cpp ++++ b/libcxx/test/libcxx/diagnostics/flat_map.nodiscard.verify.cpp +@@ -66,9 +66,9 @@ + TransparentKey tkey; + + std::flat_map nfm; +- nfm[key]; // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} +- fm[std::move(key)]; // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} +- fm[std::move(tkey)]; // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} ++ nfm[key]; // no-warning ++ fm[std::move(key)]; // no-warning ++ fm[std::move(tkey)]; // no-warning + + fm.at(key); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + cfm.at(key); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} +diff -ruN --strip-trailing-cr a/libcxx/test/libcxx/diagnostics/map.nodiscard.verify.cpp b/libcxx/test/libcxx/diagnostics/map.nodiscard.verify.cpp +--- a/libcxx/test/libcxx/diagnostics/map.nodiscard.verify.cpp ++++ b/libcxx/test/libcxx/diagnostics/map.nodiscard.verify.cpp +@@ -55,8 +55,8 @@ + + int key = 0; + +- m[key]; // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} +- m[std::move(key)]; // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} ++ m[key]; // no-warning ++ m[std::move(key)]; // no-warning + + #if TEST_STD_VER >= 14 + std::map> strMap; +diff -ruN --strip-trailing-cr a/libcxx/test/libcxx/diagnostics/unordered_map.nodiscard.verify.cpp b/libcxx/test/libcxx/diagnostics/unordered_map.nodiscard.verify.cpp +--- a/libcxx/test/libcxx/diagnostics/unordered_map.nodiscard.verify.cpp ++++ b/libcxx/test/libcxx/diagnostics/unordered_map.nodiscard.verify.cpp +@@ -81,8 +81,8 @@ + ctm.equal_range(tkey); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + #endif + +- m[key]; // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} +- m[std::move(key)]; // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} ++ m[key]; // no-warning ++ m[std::move(key)]; // no-warning + + m.at(key); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + cm.at(key); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} diff --git a/third_party/xla/third_party/llvm/workspace.bzl b/third_party/xla/third_party/llvm/workspace.bzl index 5e3d8f2100a1be..29af0ffbd8c12c 100644 --- a/third_party/xla/third_party/llvm/workspace.bzl +++ b/third_party/xla/third_party/llvm/workspace.bzl @@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive") def repo(name): """Imports LLVM.""" - LLVM_COMMIT = "8dee997a8558b460b82b23fb43b197d68258baac" - LLVM_SHA256 = "6a26975000c2cb45787813317bfeeadeafa0cba762e9434fb7940481ec4b27de" + LLVM_COMMIT = "7d381f2a5634d1e41b61299839d652cc4a021898" + LLVM_SHA256 = "f1641918fd3f5e1667d39afb9c261da39ed9f74e30f1c2f98031d6d609a8de15" tf_http_archive( name = name, diff --git a/third_party/xla/third_party/llvm_openmp/BUILD.bazel b/third_party/xla/third_party/llvm_openmp/BUILD.bazel index fbde2733a2a302..15f0218bf2f6a2 100644 --- a/third_party/xla/third_party/llvm_openmp/BUILD.bazel +++ b/third_party/xla/third_party/llvm_openmp/BUILD.bazel @@ -17,6 +17,7 @@ load( "if_macos", "if_windows", ) +load("@rules_python//python:defs.bzl", "py_binary") package( default_visibility = [ diff --git a/third_party/xla/third_party/mkl_dnn/mkldnn_acl.BUILD b/third_party/xla/third_party/mkl_dnn/mkldnn_acl.BUILD index 83cca313adf4f5..3a079c87ab9dd6 100644 --- a/third_party/xla/third_party/mkl_dnn/mkldnn_acl.BUILD +++ b/third_party/xla/third_party/mkl_dnn/mkldnn_acl.BUILD @@ -156,13 +156,5 @@ cc_library( visibility = ["//visibility:public"], deps = [ "@compute_library//:arm_compute", - ] + select({ - # When using MKL-DNN on the AArch64 architecture, OpenMP is required - # for parallelization. Because the Hermetic C++ build environment uses - # the -nodefaultlibs flag, simply passing -fopenmp is insufficient. - # OpenMP's dependencies must be explicitly linked to ensure correct - # inclusion, as automatic linking is disabled. - "@rules_ml_toolchain//common:is_hermetic_cc_enabled": ["@rules_ml_toolchain//cc/sysroots:openmp"], - "//conditions:default": [], - }), + ], ) diff --git a/third_party/xla/third_party/nccl/build_defs.bzl.tpl b/third_party/xla/third_party/nccl/build_defs.bzl.tpl index ac7f3bc92cff33..ad447657d907a2 100644 --- a/third_party/xla/third_party/nccl/build_defs.bzl.tpl +++ b/third_party/xla/third_party/nccl/build_defs.bzl.tpl @@ -2,6 +2,7 @@ load("@local_config_cuda//cuda:build_defs.bzl", "cuda_default_copts", "cuda_gpu_architectures") load("@bazel_tools//tools/cpp:toolchain_utils.bzl", "find_cpp_toolchain") +load("@rules_cc//cc:cc_library.bzl", "cc_library") # CUDA toolkit version as tuple (e.g. '(11, 1)'). _cuda_version = %{cuda_version} @@ -311,7 +312,7 @@ def cuda_rdc_library(name, hdrs = None, copts = None, linkstatic = True, **kwarg # Compile host and device code into library. lib = name + "_lib" - native.cc_library( + cc_library( name = lib, hdrs = hdrs, copts = _rdc_copts() + copts, @@ -336,7 +337,7 @@ def cuda_rdc_library(name, hdrs = None, copts = None, linkstatic = True, **kwarg # Compile the source file into a library. dlink = name + "_dlink" - native.cc_library( + cc_library( name = dlink, srcs = [dlink_cc], textual_hdrs = [dlink_hdrs], @@ -371,7 +372,7 @@ def cuda_rdc_library(name, hdrs = None, copts = None, linkstatic = True, **kwarg ) # Create cc target from archive. - native.cc_library( + cc_library( name = name, srcs = [merged], hdrs = hdrs, diff --git a/third_party/xla/third_party/nvtx/BUILD.bazel b/third_party/xla/third_party/nvtx/BUILD.bazel index af6de99cb8fcf7..a8e181e57b1932 100644 --- a/third_party/xla/third_party/nvtx/BUILD.bazel +++ b/third_party/xla/third_party/nvtx/BUILD.bazel @@ -1,5 +1,7 @@ # NVIDIA NVTX 3 +load("@rules_cc//cc:cc_library.bzl", "cc_library") + licenses(["notice"]) exports_files(["LICENSE.txt"]) diff --git a/third_party/xla/third_party/ortools/layering_check.patch b/third_party/xla/third_party/ortools/layering_check.patch new file mode 100644 index 00000000000000..3c2240d8d39e44 --- /dev/null +++ b/third_party/xla/third_party/ortools/layering_check.patch @@ -0,0 +1,4261 @@ +diff --git a/examples/cpp/BUILD.bazel b/examples/cpp/BUILD.bazel +index 6cc1490..a7fa5c1 100644 +--- a/examples/cpp/BUILD.bazel ++++ b/examples/cpp/BUILD.bazel +@@ -711,6 +711,7 @@ cc_test( + cc_library( + name = "print_dimacs_assignment", + hdrs = ["print_dimacs_assignment.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:file", +@@ -725,6 +726,7 @@ cc_library( + cc_library( + name = "parse_dimacs_assignment", + hdrs = ["parse_dimacs_assignment.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/graph:ebert_graph", +@@ -878,6 +880,7 @@ cc_test( + cc_library( + name = "fap_parser", + hdrs = ["fap_parser.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:file", +@@ -891,6 +894,7 @@ cc_library( + cc_library( + name = "fap_model_printer", + hdrs = ["fap_model_printer.h"], ++ features = ["-layering_check"], + deps = [ + ":fap_parser", + "//ortools/base", +@@ -903,6 +907,7 @@ cc_library( + cc_library( + name = "fap_utilities", + hdrs = ["fap_utilities.h"], ++ features = ["-layering_check"], + deps = [ + ":fap_parser", + "//ortools/base", +diff --git a/ortools/algorithms/BUILD.bazel b/ortools/algorithms/BUILD.bazel +index be5f372..4d1c6ae 100644 +--- a/ortools/algorithms/BUILD.bazel ++++ b/ortools/algorithms/BUILD.bazel +@@ -65,6 +65,7 @@ cc_library( + name = "binary_search", + srcs = [], + hdrs = ["binary_search.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "@com_google_absl//absl/functional:function_ref", +@@ -95,6 +96,7 @@ cc_library( + name = "radix_sort", + srcs = [], + hdrs = ["radix_sort.h"], ++ features = ["-layering_check"], + deps = [ + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/base", +@@ -132,6 +134,7 @@ cc_library( + name = "duplicate_remover", + srcs = ["duplicate_remover.cc"], + hdrs = ["duplicate_remover.h"], ++ features = ["-layering_check"], + deps = [ + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/numeric:bits", +@@ -147,6 +150,7 @@ cc_library( + name = "hungarian", + srcs = ["hungarian.cc"], + hdrs = ["hungarian.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "@com_google_absl//absl/container:flat_hash_map", +@@ -174,6 +178,7 @@ cc_test( + cc_library( + name = "adjustable_k_ary_heap", + hdrs = ["adjustable_k_ary_heap.h"], ++ features = ["-layering_check"], + deps = ["@com_google_absl//absl/log:check"], + ) + +@@ -213,6 +218,7 @@ cc_library( + ":use_scip": ["-DUSE_SCIP"], + "//conditions:default": [], + }), ++ features = ["-layering_check"], + deps = [ + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/strings", +@@ -269,6 +275,7 @@ cc_library( + name = "set_cover_lagrangian", + srcs = ["set_cover_lagrangian.cc"], + hdrs = ["set_cover_lagrangian.h"], ++ features = ["-layering_check"], + deps = [ + ":adjustable_k_ary_heap", + ":set_cover_invariant", +@@ -282,6 +289,7 @@ cc_library( + name = "set_cover_model", + srcs = ["set_cover_model.cc"], + hdrs = ["set_cover_model.h"], ++ features = ["-layering_check"], + deps = [ + ":set_cover_cc_proto", + "//ortools/base:intops", +@@ -297,6 +305,7 @@ cc_library( + name = "set_cover_invariant", + srcs = ["set_cover_invariant.cc"], + hdrs = ["set_cover_invariant.h"], ++ features = ["-layering_check"], + deps = [ + ":set_cover_cc_proto", + ":set_cover_model", +@@ -311,6 +320,7 @@ cc_library( + name = "set_cover_heuristics", + srcs = ["set_cover_heuristics.cc"], + hdrs = ["set_cover_heuristics.h"], ++ features = ["-layering_check"], + deps = [ + ":adjustable_k_ary_heap", + ":set_cover_invariant", +@@ -328,6 +338,7 @@ cc_library( + name = "set_cover_mip", + srcs = ["set_cover_mip.cc"], + hdrs = ["set_cover_mip.h"], ++ features = ["-layering_check"], + deps = [ + ":set_cover_invariant", + ":set_cover_model", +@@ -343,6 +354,7 @@ cc_library( + name = "set_cover_reader", + srcs = ["set_cover_reader.cc"], + hdrs = ["set_cover_reader.h"], ++ features = ["-layering_check"], + deps = [ + ":set_cover_model", + "//ortools/base:file", +@@ -378,6 +390,7 @@ cc_test( + cc_library( + name = "dense_doubly_linked_list", + hdrs = ["dense_doubly_linked_list.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + ], +@@ -387,6 +400,7 @@ cc_library( + name = "dynamic_partition", + srcs = ["dynamic_partition.cc"], + hdrs = ["dynamic_partition.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:murmur", + "@com_google_absl//absl/log:check", +@@ -411,6 +425,7 @@ cc_library( + name = "sparse_permutation", + srcs = ["sparse_permutation.cc"], + hdrs = ["sparse_permutation.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "@com_google_absl//absl/strings", +@@ -433,6 +448,7 @@ cc_library( + name = "dynamic_permutation", + srcs = ["dynamic_permutation.cc"], + hdrs = ["dynamic_permutation.h"], ++ features = ["-layering_check"], + deps = [ + ":sparse_permutation", + "//ortools/base", +@@ -453,6 +469,7 @@ cc_library( + name = "find_graph_symmetries", + srcs = ["find_graph_symmetries.cc"], + hdrs = ["find_graph_symmetries.h"], ++ features = ["-layering_check"], + deps = [ + ":dense_doubly_linked_list", + ":dynamic_partition", +@@ -507,6 +524,7 @@ cc_test( + cc_library( + name = "binary_indexed_tree", + hdrs = ["binary_indexed_tree.h"], ++ features = ["-layering_check"], + deps = [ + "@com_google_absl//absl/log:check", + ], +@@ -525,6 +543,7 @@ cc_library( + name = "n_choose_k", + srcs = ["n_choose_k.cc"], + hdrs = ["n_choose_k.h"], ++ features = ["-layering_check"], + deps = [ + ":binary_search", + "//ortools/base:mathutil", +diff --git a/ortools/algorithms/python/BUILD.bazel b/ortools/algorithms/python/BUILD.bazel +index fe3de2c..0a4ccf9 100644 +--- a/ortools/algorithms/python/BUILD.bazel ++++ b/ortools/algorithms/python/BUILD.bazel +@@ -48,6 +48,7 @@ config_setting( + cc_library( + name = "knapsack_solver_doc", + hdrs = ["knapsack_solver_doc.h"], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + ) + +diff --git a/ortools/base/BUILD.bazel b/ortools/base/BUILD.bazel +index c57c0d2..6ebc65a 100644 +--- a/ortools/base/BUILD.bazel ++++ b/ortools/base/BUILD.bazel +@@ -54,6 +54,7 @@ cc_library( + "-DOR_TOOLS_MINOR=11", + "-DOR_TOOLS_PATCH=9999", + ], ++ features = ["-layering_check"], + linkopts = select({ + "on_linux": [], + "on_macos": ["-framework CoreFoundation"], +@@ -83,6 +84,7 @@ cc_library( + cc_library( + name = "accurate_sum", + hdrs = ["accurate_sum.h"], ++ features = ["-layering_check"], + ) + + cc_library( +@@ -91,6 +93,7 @@ cc_library( + "adjustable_priority_queue.h", + "adjustable_priority_queue-inl.h", + ], ++ features = ["-layering_check"], + deps = [ + ":base", + ], +@@ -99,18 +102,21 @@ cc_library( + cc_library( + name = "basictypes", + hdrs = ["basictypes.h"], ++ features = ["-layering_check"], + ) + + cc_library( + name = "bitmap", + srcs = ["bitmap.cc"], + hdrs = ["bitmap.h"], ++ features = ["-layering_check"], + ) + + cc_library( + name = "case", + srcs = ["case.cc"], + hdrs = ["case.h"], ++ features = ["-layering_check"], + deps = [":base"], + ) + +@@ -120,6 +126,7 @@ cc_library( + "commandlineflags.cc", + ], + hdrs = ["commandlineflags.h"], ++ features = ["-layering_check"], + deps = [ + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/flags:parse", +@@ -130,6 +137,7 @@ cc_library( + cc_library( + name = "container_logging", + hdrs = ["container_logging.h"], ++ features = ["-layering_check"], + deps = [":base"], + ) + +@@ -142,6 +150,7 @@ cc_library( + "on_windows": ["/Zc:preprocessor"], + "//conditions:default": [], + }), ++ features = ["-layering_check"], + deps = [ + "@com_google_absl//absl/container:inlined_vector", + ], +@@ -167,6 +176,7 @@ cc_test( + cc_library( + name = "dynamic_library", + hdrs = ["dynamic_library.h"], ++ features = ["-layering_check"], + linkopts = select({ + "on_linux": ["-Wl,--no-as-needed -ldl"], + "on_macos": [], +@@ -182,12 +192,14 @@ cc_library( + cc_library( + name = "encodingutils", + hdrs = ["encodingutils.h"], ++ features = ["-layering_check"], + deps = [":base"], + ) + + cc_library( + name = "flags", + hdrs = ["flags.h"], ++ features = ["-layering_check"], + deps = [ + "@com_google_absl//absl/flags:flag", + ], +@@ -205,6 +217,7 @@ cc_library( + "helpers.h", + "options.h", + ], ++ features = ["-layering_check"], + deps = [ + ":status_macros", + "@com_google_absl//absl/log", +@@ -218,6 +231,7 @@ cc_library( + cc_library( + name = "status_matchers", + hdrs = ["status_matchers.h"], ++ features = ["-layering_check"], + deps = [ + ":base", + "@com_google_absl//absl/status", +@@ -230,6 +244,7 @@ cc_library( + cc_library( + name = "message_matchers", + hdrs = ["message_matchers.h"], ++ features = ["-layering_check"], + deps = [ + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest", +@@ -240,6 +255,7 @@ cc_library( + cc_library( + name = "gmock", + hdrs = ["gmock.h"], ++ features = ["-layering_check"], + deps = [ + ":message_matchers", + ":status_matchers", +@@ -249,6 +265,7 @@ cc_library( + + cc_library( + name = "gmock_main", ++ features = ["-layering_check"], + deps = [ + ":gmock", + "@com_google_googletest//:gtest_main", +@@ -259,6 +276,7 @@ cc_library( + name = "gzipfile", + srcs = ["gzipfile.cc"], + hdrs = ["gzipfile.h"], ++ features = ["-layering_check"], + deps = [ + ":base", + ":basictypes", +@@ -272,6 +290,7 @@ cc_library( + cc_library( + name = "gzipstring", + hdrs = ["gzipstring.h"], ++ features = ["-layering_check"], + deps = [ + ":base", + "@zlib", +@@ -286,6 +305,7 @@ cc_library( + hdrs = [ + "hash.h", + ], ++ features = ["-layering_check"], + deps = [ + "@com_google_absl//absl/strings", + ], +@@ -294,24 +314,28 @@ cc_library( + cc_library( + name = "int_type", + hdrs = ["int_type.h"], ++ features = ["-layering_check"], + deps = [":base"], + ) + + cc_library( + name = "intops", + hdrs = ["strong_int.h"], ++ features = ["-layering_check"], + deps = [":int_type"], + ) + + cc_library( + name = "iterator_adaptors", + hdrs = ["iterator_adaptors.h"], ++ features = ["-layering_check"], + deps = [":base"], + ) + + cc_library( + name = "linked_hash_map", + hdrs = ["linked_hash_map.h"], ++ features = ["-layering_check"], + deps = [ + ":base", + ":logging", +@@ -324,6 +348,7 @@ cc_library( + name = "logging", + srcs = ["logging.cc"], + hdrs = ["logging.h"], ++ features = ["-layering_check"], + deps = [ + ":macros", + "@com_google_absl//absl/base:log_severity", +@@ -344,11 +369,13 @@ cc_library( + cc_library( + name = "macros", + hdrs = ["macros.h"], ++ features = ["-layering_check"], + ) + + cc_library( + name = "map_util", + hdrs = ["map_util.h"], ++ features = ["-layering_check"], + deps = [":base"], + ) + +@@ -356,6 +383,7 @@ cc_library( + name = "mathutil", + srcs = ["mathutil.cc"], + hdrs = ["mathutil.h"], ++ features = ["-layering_check"], + deps = [ + ":base", + ], +@@ -364,12 +392,14 @@ cc_library( + cc_library( + name = "memfile", + hdrs = ["memfile.h"], ++ features = ["-layering_check"], + deps = [], + ) + + cc_library( + name = "murmur", + hdrs = ["murmur.h"], ++ features = ["-layering_check"], + deps = [ + ":base", + ":hash", +@@ -380,6 +410,7 @@ cc_library( + cc_library( + name = "mutable_memfile", + hdrs = ["mutable_memfile.h"], ++ features = ["-layering_check"], + deps = [], + ) + +@@ -387,6 +418,7 @@ cc_library( + name = "numbers", + srcs = ["numbers.cc"], + hdrs = ["numbers.h"], ++ features = ["-layering_check"], + deps = [ + ":strtoint", + "@com_google_absl//absl/strings", +@@ -396,6 +428,7 @@ cc_library( + cc_library( + name = "parse_text_proto", + hdrs = ["parse_text_proto.h"], ++ features = ["-layering_check"], + deps = [ + "@com_google_absl//absl/log:check", + "@com_google_protobuf//:protobuf", +@@ -406,6 +439,7 @@ cc_library( + name = "path", + srcs = ["path.cc"], + hdrs = ["path.h"], ++ features = ["-layering_check"], + deps = [ + ":base", + "@com_google_absl//absl/strings", +@@ -416,6 +450,7 @@ cc_library( + name = "temp_path", + srcs = ["temp_path.cc"], + hdrs = ["temp_path.h"], ++ features = ["-layering_check"], + deps = [ + ":base", + ":file", +@@ -429,11 +464,13 @@ cc_library( + cc_library( + name = "protobuf_util", + hdrs = ["protobuf_util.h"], ++ features = ["-layering_check"], + ) + + cc_library( + name = "protoutil", + hdrs = ["protoutil.h"], ++ features = ["-layering_check"], + deps = [ + ":timer", + "@com_google_absl//absl/status", +@@ -445,12 +482,14 @@ cc_library( + cc_library( + name = "ptr_util", + hdrs = ["ptr_util.h"], ++ features = ["-layering_check"], + ) + + cc_library( + name = "recordio", + srcs = ["recordio.cc"], + hdrs = ["recordio.h"], ++ features = ["-layering_check"], + deps = [ + ":base", + ":file", +@@ -465,18 +504,21 @@ cc_library( + cc_library( + name = "small_map", + hdrs = ["small_map.h"], ++ features = ["-layering_check"], + deps = [":base"], + ) + + cc_library( + name = "source_location", + hdrs = ["source_location.h"], ++ features = ["-layering_check"], + deps = ["@com_google_absl//absl/base:config"], + ) + + cc_library( + name = "status_builder", + hdrs = ["status_builder.h"], ++ features = ["-layering_check"], + deps = [ + ":base", + "@com_google_absl//absl/status", +@@ -487,6 +529,7 @@ cc_library( + cc_library( + name = "status_macros", + hdrs = ["status_macros.h"], ++ features = ["-layering_check"], + deps = [ + ":base", + ":status_builder", +@@ -498,12 +541,14 @@ cc_library( + cc_library( + name = "stl_util", + hdrs = ["stl_util.h"], ++ features = ["-layering_check"], + deps = [":base"], + ) + + cc_library( + name = "strong_vector", + hdrs = ["strong_vector.h"], ++ features = ["-layering_check"], + deps = [ + ":base", + ":intops", +@@ -514,6 +559,7 @@ cc_library( + name = "strtoint", + srcs = ["strtoint.cc"], + hdrs = ["strtoint.h"], ++ features = ["-layering_check"], + deps = [ + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/strings", +@@ -524,6 +570,7 @@ cc_library( + name = "sysinfo", + srcs = ["sysinfo.cc"], + hdrs = ["sysinfo.h"], ++ features = ["-layering_check"], + deps = [ + "@com_google_absl//absl/strings", + ], +@@ -533,6 +580,7 @@ cc_library( + name = "threadpool", + srcs = ["threadpool.cc"], + hdrs = ["threadpool.h"], ++ features = ["-layering_check"], + deps = [ + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/synchronization", +@@ -543,6 +591,7 @@ cc_library( + name = "timer", + srcs = ["timer.cc"], + hdrs = ["timer.h"], ++ features = ["-layering_check"], + deps = [ + ":macros", + "@com_google_absl//absl/log:check", +@@ -553,22 +602,26 @@ cc_library( + cc_library( + name = "top_n", + hdrs = ["top_n.h"], ++ features = ["-layering_check"], + ) + + cc_library( + name = "typeid", + hdrs = ["typeid.h"], ++ features = ["-layering_check"], + ) + + cc_library( + name = "types", + hdrs = ["types.h"], ++ features = ["-layering_check"], + ) + + cc_library( + name = "zipfile", + srcs = ["zipfile.cc"], + hdrs = ["zipfile.h"], ++ features = ["-layering_check"], + deps = [ + ":basictypes", + ":file", +diff --git a/ortools/bop/BUILD.bazel b/ortools/bop/BUILD.bazel +index 4720990..605ce2b 100644 +--- a/ortools/bop/BUILD.bazel ++++ b/ortools/bop/BUILD.bazel +@@ -30,6 +30,7 @@ cc_proto_library( + cc_library( + name = "bop_types", + hdrs = ["bop_types.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:intops", +@@ -41,6 +42,7 @@ cc_library( + name = "bop_base", + srcs = ["bop_base.cc"], + hdrs = ["bop_base.h"], ++ features = ["-layering_check"], + deps = [ + ":bop_parameters_cc_proto", + ":bop_solution", +@@ -67,6 +69,7 @@ cc_library( + name = "bop_util", + srcs = ["bop_util.cc"], + hdrs = ["bop_util.h"], ++ features = ["-layering_check"], + deps = [ + ":bop_base", + ":bop_solution", +@@ -80,6 +83,7 @@ cc_library( + name = "bop_solution", + srcs = ["bop_solution.cc"], + hdrs = ["bop_solution.h"], ++ features = ["-layering_check"], + deps = [ + ":bop_types", + "//ortools/base", +@@ -94,6 +98,7 @@ cc_library( + name = "bop_fs", + srcs = ["bop_fs.cc"], + hdrs = ["bop_fs.h"], ++ features = ["-layering_check"], + deps = [ + ":bop_base", + ":bop_parameters_cc_proto", +@@ -126,6 +131,7 @@ cc_library( + name = "bop_lns", + srcs = ["bop_lns.cc"], + hdrs = ["bop_lns.h"], ++ features = ["-layering_check"], + deps = [ + ":bop_base", + ":bop_parameters_cc_proto", +@@ -156,6 +162,7 @@ cc_library( + name = "complete_optimizer", + srcs = ["complete_optimizer.cc"], + hdrs = ["complete_optimizer.h"], ++ features = ["-layering_check"], + deps = [ + ":bop_base", + ":bop_solution", +@@ -178,6 +185,7 @@ cc_library( + name = "bop_ls", + srcs = ["bop_ls.cc"], + hdrs = ["bop_ls.h"], ++ features = ["-layering_check"], + deps = [ + ":bop_base", + ":bop_solution", +@@ -199,6 +207,7 @@ cc_library( + name = "bop_portfolio", + srcs = ["bop_portfolio.cc"], + hdrs = ["bop_portfolio.h"], ++ features = ["-layering_check"], + deps = [ + ":bop_base", + ":bop_fs", +@@ -231,6 +240,7 @@ cc_library( + name = "bop_solver", + srcs = ["bop_solver.cc"], + hdrs = ["bop_solver.h"], ++ features = ["-layering_check"], + deps = [ + ":bop_base", + ":bop_fs", +@@ -265,6 +275,7 @@ cc_library( + name = "integral_solver", + srcs = ["integral_solver.cc"], + hdrs = ["integral_solver.h"], ++ features = ["-layering_check"], + deps = [ + ":bop_base", + ":bop_fs", +diff --git a/ortools/constraint_solver/BUILD.bazel b/ortools/constraint_solver/BUILD.bazel +index 99d9b4d..6cedaa6 100644 +--- a/ortools/constraint_solver/BUILD.bazel ++++ b/ortools/constraint_solver/BUILD.bazel +@@ -169,6 +169,7 @@ cc_library( + "constraint_solver.h", + "constraint_solveri.h", + ], ++ features = ["-layering_check"], + deps = [ + ":assignment_cc_proto", + ":demon_profiler_cc_proto", +@@ -267,6 +268,7 @@ cc_library( + name = "routing_parameters", + srcs = ["routing_parameters.cc"], + hdrs = ["routing_parameters.h"], ++ features = ["-layering_check"], + deps = [ + ":cp", + ":routing_enums_cc_proto", +@@ -286,6 +288,7 @@ cc_library( + cc_library( + name = "routing_types", + hdrs = ["routing_types.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:intops", +@@ -296,6 +299,7 @@ cc_library( + name = "routing_utils", + srcs = ["routing_utils.cc"], + hdrs = ["routing_utils.h"], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + "//ortools/base", +@@ -307,6 +311,7 @@ cc_library( + name = "routing_neighborhoods", + srcs = ["routing_neighborhoods.cc"], + hdrs = ["routing_neighborhoods.h"], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + ":cp", +@@ -320,6 +325,7 @@ cc_library( + name = "routing_index_manager", + srcs = ["routing_index_manager.cc"], + hdrs = ["routing_index_manager.h"], ++ features = ["-layering_check"], + deps = [ + ":routing_types", + "//ortools/base", +@@ -360,6 +366,7 @@ cc_library( + "on_windows": ["/Zc:preprocessor"], + "//conditions:default": [], + }), ++ features = ["-layering_check"], + deps = [ + ":cp", + ":routing_enums_cc_proto", +diff --git a/ortools/flatzinc/BUILD.bazel b/ortools/flatzinc/BUILD.bazel +index d3e8b22..5015c77 100644 +--- a/ortools/flatzinc/BUILD.bazel ++++ b/ortools/flatzinc/BUILD.bazel +@@ -46,6 +46,7 @@ cc_library( + name = "model", + srcs = ["model.cc"], + hdrs = ["model.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:hash", +@@ -71,6 +72,7 @@ cc_library( + copts = [ + "$(STACK_FRAME_UNLIMITED)", # parser.tab.cc + ], ++ features = ["-layering_check"], + deps = [ + ":model", + "//ortools/base", +@@ -90,6 +92,7 @@ cc_library( + "on_windows": [], + "//conditions:default": [], + }), ++ features = ["-layering_check"], + deps = [ + ":parser_yacc_lib", + "//ortools/base", +@@ -102,6 +105,7 @@ cc_library( + name = "parser_lib", + srcs = ["parser.cc"], + hdrs = ["parser.h"], ++ features = ["-layering_check"], + deps = [ + ":model", + ":parser_lex_lib", +@@ -113,6 +117,7 @@ cc_library( + name = "presolve", + srcs = ["presolve.cc"], + hdrs = ["presolve.h"], ++ features = ["-layering_check"], + deps = [ + ":model", + "//ortools/base", +@@ -128,6 +133,7 @@ cc_library( + name = "checker", + srcs = ["checker.cc"], + hdrs = ["checker.h"], ++ features = ["-layering_check"], + deps = [ + ":model", + "//ortools/base", +@@ -142,6 +148,7 @@ cc_library( + name = "cp_model_fz_solver", + srcs = ["cp_model_fz_solver.cc"], + hdrs = ["cp_model_fz_solver.h"], ++ features = ["-layering_check"], + deps = [ + ":checker", + ":model", +diff --git a/ortools/glop/BUILD.bazel b/ortools/glop/BUILD.bazel +index 687c48d..48e856c 100644 +--- a/ortools/glop/BUILD.bazel ++++ b/ortools/glop/BUILD.bazel +@@ -54,6 +54,7 @@ SAFE_FP_CODE = select({ + cc_library( + name = "pricing", + hdrs = ["pricing.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/lp_data:base", +@@ -69,6 +70,7 @@ cc_library( + srcs = ["revised_simplex.cc"], + hdrs = ["revised_simplex.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + ":basis_representation", + ":dual_edge_norms", +@@ -106,6 +108,7 @@ cc_library( + srcs = ["update_row.cc"], + hdrs = ["update_row.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + ":basis_representation", + ":parameters_cc_proto", +@@ -125,6 +128,7 @@ cc_library( + srcs = ["variables_info.cc"], + hdrs = ["variables_info.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/lp_data:base", +@@ -139,6 +143,7 @@ cc_library( + srcs = ["lu_factorization.cc"], + hdrs = ["lu_factorization.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + ":markowitz", + ":parameters_cc_proto", +@@ -155,6 +160,7 @@ cc_library( + srcs = ["markowitz.cc"], + hdrs = ["markowitz.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + ":parameters_cc_proto", + ":status", +@@ -174,6 +180,7 @@ cc_library( + srcs = ["basis_representation.cc"], + hdrs = ["basis_representation.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + ":lu_factorization", + ":parameters_cc_proto", +@@ -193,6 +200,7 @@ cc_library( + name = "rank_one_update", + hdrs = ["rank_one_update.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + ":status", + "//ortools/base", +@@ -210,6 +218,7 @@ cc_library( + srcs = ["initial_basis.cc"], + hdrs = ["initial_basis.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + ":markowitz", + "//ortools/base", +@@ -227,6 +236,7 @@ cc_library( + srcs = ["status.cc"], + hdrs = ["status.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + ], +@@ -255,6 +265,7 @@ cc_library( + srcs = ["dual_edge_norms.cc"], + hdrs = ["dual_edge_norms.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + ":basis_representation", + ":parameters_cc_proto", +@@ -274,6 +285,7 @@ cc_library( + srcs = ["primal_edge_norms.cc"], + hdrs = ["primal_edge_norms.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + ":basis_representation", + ":parameters_cc_proto", +@@ -293,6 +305,7 @@ cc_library( + srcs = ["reduced_costs.cc"], + hdrs = ["reduced_costs.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + ":basis_representation", + ":parameters_cc_proto", +@@ -317,6 +330,7 @@ cc_library( + srcs = ["variable_values.cc"], + hdrs = ["variable_values.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + ":basis_representation", + ":dual_edge_norms", +@@ -338,6 +352,7 @@ cc_library( + srcs = ["entering_variable.cc"], + hdrs = ["entering_variable.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + ":basis_representation", + ":parameters_cc_proto", +@@ -366,6 +381,7 @@ cc_library( + srcs = ["preprocessor.cc"], + hdrs = ["preprocessor.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + ":parameters_cc_proto", + ":revised_simplex", +@@ -389,6 +405,7 @@ cc_library( + srcs = ["lp_solver.cc"], + hdrs = ["lp_solver.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + ":parameters_cc_proto", + ":preprocessor", +@@ -413,6 +430,7 @@ cc_library( + name = "parameters_validation", + srcs = ["parameters_validation.cc"], + hdrs = ["parameters_validation.h"], ++ features = ["-layering_check"], + deps = [ + ":parameters_cc_proto", + "@com_google_absl//absl/strings", +diff --git a/ortools/glpk/BUILD.bazel b/ortools/glpk/BUILD.bazel +index 246ee67..7f2c088 100644 +--- a/ortools/glpk/BUILD.bazel ++++ b/ortools/glpk/BUILD.bazel +@@ -18,6 +18,7 @@ cc_library( + name = "glpk_env_deleter", + srcs = ["glpk_env_deleter.cc"], + hdrs = ["glpk_env_deleter.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "@glpk", +@@ -28,6 +29,7 @@ cc_library( + name = "glpk_formatters", + srcs = ["glpk_formatters.cc"], + hdrs = ["glpk_formatters.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "@com_google_absl//absl/strings", +@@ -38,6 +40,7 @@ cc_library( + cc_library( + name = "glpk_computational_form", + hdrs = ["glpk_computational_form.h"], ++ features = ["-layering_check"], + deps = [ + "@glpk", + ], +diff --git a/ortools/graph/BUILD.bazel b/ortools/graph/BUILD.bazel +index fe0f588..4bb9556 100644 +--- a/ortools/graph/BUILD.bazel ++++ b/ortools/graph/BUILD.bazel +@@ -35,6 +35,7 @@ config_setting( + cc_library( + name = "graphs", + hdrs = ["graphs.h"], ++ features = ["-layering_check"], + deps = [ + ":ebert_graph", + ":graph", +@@ -44,6 +45,7 @@ cc_library( + cc_library( + name = "graph", + hdrs = ["graph.h"], ++ features = ["-layering_check"], + deps = [ + ":iterators", + "//ortools/base", +@@ -55,6 +57,7 @@ cc_library( + cc_library( + name = "bfs", + hdrs = ["bfs.h"], ++ features = ["-layering_check"], + deps = [ + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings:str_format", +@@ -64,6 +67,7 @@ cc_library( + cc_library( + name = "bounded_dijkstra", + hdrs = ["bounded_dijkstra.h"], ++ features = ["-layering_check"], + deps = [ + ":graph", + "//ortools/base:iterator_adaptors", +@@ -78,6 +82,7 @@ cc_library( + cc_library( + name = "multi_dijkstra", + hdrs = ["multi_dijkstra.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:map_util", + "//ortools/base:types", +@@ -88,6 +93,7 @@ cc_library( + cc_library( + name = "bidirectional_dijkstra", + hdrs = ["bidirectional_dijkstra.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:iterator_adaptors", +@@ -103,6 +109,7 @@ cc_library( + name = "cliques", + srcs = ["cliques.cc"], + hdrs = ["cliques.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:int_type", +@@ -116,6 +123,7 @@ cc_library( + cc_library( + name = "hamiltonian_path", + hdrs = ["hamiltonian_path.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:types", +@@ -129,6 +137,7 @@ cc_library( + cc_library( + name = "christofides", + hdrs = ["christofides.h"], ++ features = ["-layering_check"], + deps = [ + ":eulerian_path", + ":graph", +@@ -147,6 +156,7 @@ cc_library( + cc_library( + name = "eulerian_path", + hdrs = ["eulerian_path.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + ], +@@ -155,6 +165,7 @@ cc_library( + cc_library( + name = "minimum_spanning_tree", + hdrs = ["minimum_spanning_tree.h"], ++ features = ["-layering_check"], + deps = [ + ":connected_components", + "//ortools/base:adjustable_priority_queue", +@@ -167,6 +178,7 @@ cc_library( + cc_library( + name = "one_tree_lower_bound", + hdrs = ["one_tree_lower_bound.h"], ++ features = ["-layering_check"], + deps = [ + ":christofides", + ":graph", +@@ -179,6 +191,7 @@ cc_library( + cc_library( + name = "ebert_graph", + hdrs = ["ebert_graph.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:types", +@@ -192,6 +205,7 @@ cc_library( + name = "shortest_paths", + srcs = ["shortest_paths.cc"], + hdrs = ["shortest_paths.h"], ++ features = ["-layering_check"], + deps = [ + ":ebert_graph", + ":graph", +@@ -212,6 +226,7 @@ cc_library( + cc_library( + name = "k_shortest_paths", + hdrs = ["k_shortest_paths.h"], ++ features = ["-layering_check"], + deps = [ + ":bounded_dijkstra", + ":ebert_graph", +@@ -242,6 +257,7 @@ cc_library( + name = "max_flow", + srcs = ["max_flow.cc"], + hdrs = ["max_flow.h"], ++ features = ["-layering_check"], + deps = [ + ":ebert_graph", + ":flow_problem_cc_proto", +@@ -290,6 +306,7 @@ cc_library( + "on_windows": ["/Zc:preprocessor"], + "//conditions:default": [], + }), ++ features = ["-layering_check"], + deps = [ + ":ebert_graph", + ":graph", +@@ -336,6 +353,7 @@ cc_library( + name = "assignment", + srcs = ["assignment.cc"], + hdrs = ["assignment.h"], ++ features = ["-layering_check"], + deps = [ + ":ebert_graph", + ":linear_assignment", +@@ -349,6 +367,7 @@ cc_library( + name = "linear_assignment", + srcs = ["linear_assignment.cc"], + hdrs = ["linear_assignment.h"], ++ features = ["-layering_check"], + deps = [ + ":ebert_graph", + "//ortools/base", +@@ -364,6 +383,7 @@ cc_library( + name = "perfect_matching", + srcs = ["perfect_matching.cc"], + hdrs = ["perfect_matching.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:adjustable_priority_queue", +@@ -382,6 +402,7 @@ cc_library( + name = "dag_shortest_path", + srcs = ["dag_shortest_path.cc"], + hdrs = ["dag_shortest_path.h"], ++ features = ["-layering_check"], + deps = [ + ":ebert_graph", + ":graph", +@@ -399,6 +420,7 @@ cc_library( + name = "dag_constrained_shortest_path", + srcs = ["dag_constrained_shortest_path.cc"], + hdrs = ["dag_constrained_shortest_path.h"], ++ features = ["-layering_check"], + deps = [ + ":dag_shortest_path", + ":graph", +@@ -416,6 +438,7 @@ cc_library( + cc_library( + name = "rooted_tree", + hdrs = ["rooted_tree.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:status_macros", + "@com_google_absl//absl/algorithm:container", +@@ -437,6 +460,7 @@ cc_library( + hdrs = [ + "connected_components.h", + ], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:map_util", +@@ -450,6 +474,7 @@ cc_library( + cc_library( + name = "io", + hdrs = ["io.h"], ++ features = ["-layering_check"], + deps = [ + ":graph", + "//ortools/base:numbers", +@@ -463,12 +488,14 @@ cc_library( + cc_library( + name = "iterators", + hdrs = ["iterators.h"], ++ features = ["-layering_check"], + ) + + cc_library( + name = "random_graph", + srcs = ["random_graph.cc"], + hdrs = ["random_graph.h"], ++ features = ["-layering_check"], + deps = [ + ":graph", + "//ortools/base:logging", +@@ -485,6 +512,7 @@ cc_library( + hdrs = [ + "strongly_connected_components.h", + ], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + ], +@@ -494,6 +522,7 @@ cc_library( + name = "topologicalsorter", + srcs = ["topologicalsorter.cc"], + hdrs = ["topologicalsorter.h"], ++ features = ["-layering_check"], + deps = [ + ":graph", + "//ortools/base", +@@ -512,6 +541,7 @@ cc_library( + name = "util", + srcs = ["util.cc"], + hdrs = ["util.h"], ++ features = ["-layering_check"], + deps = [ + ":connected_components", + ":graph", +diff --git a/ortools/gscip/BUILD.bazel b/ortools/gscip/BUILD.bazel +index d949483..37dd2ee 100644 +--- a/ortools/gscip/BUILD.bazel ++++ b/ortools/gscip/BUILD.bazel +@@ -39,6 +39,7 @@ cc_library( + name = "gscip_parameters", + srcs = ["gscip_parameters.cc"], + hdrs = ["gscip_parameters.h"], ++ features = ["-layering_check"], + deps = [ + ":gscip_cc_proto", + "//ortools/base:status_macros", +@@ -62,6 +63,7 @@ cc_library( + name = "legacy_scip_params", + srcs = ["legacy_scip_params.cc"], + hdrs = ["legacy_scip_params.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/linear_solver:scip_helper_macros", + "//ortools/linear_solver:scip_with_glop", +@@ -81,6 +83,7 @@ cc_library( + "gscip.h", + "gscip_event_handler.h", + ], ++ features = ["-layering_check"], + deps = [ + ":gscip_cc_proto", + ":gscip_message_handler", +@@ -106,6 +109,7 @@ cc_library( + name = "gscip_ext", + srcs = ["gscip_ext.cc"], + hdrs = ["gscip_ext.h"], ++ features = ["-layering_check"], + deps = [ + ":gscip", + "//ortools/base:status_macros", +@@ -118,6 +122,7 @@ cc_library( + name = "gscip_message_handler", + srcs = ["gscip_message_handler.cc"], + hdrs = ["gscip_message_handler.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/linear_solver:scip_helper_macros", +@@ -131,6 +136,7 @@ cc_library( + name = "gscip_callback_result", + srcs = ["gscip_callback_result.cc"], + hdrs = ["gscip_callback_result.h"], ++ features = ["-layering_check"], + deps = ["@scip//:libscip"], + ) + +@@ -138,6 +144,7 @@ cc_library( + name = "gscip_constraint_handler", + srcs = ["gscip_constraint_handler.cc"], + hdrs = ["gscip_constraint_handler.h"], ++ features = ["-layering_check"], + deps = [ + ":gscip", + ":gscip_callback_result", +diff --git a/ortools/gurobi/BUILD.bazel b/ortools/gurobi/BUILD.bazel +index d8e4a72..83da625 100644 +--- a/ortools/gurobi/BUILD.bazel ++++ b/ortools/gurobi/BUILD.bazel +@@ -21,6 +21,7 @@ cc_library( + hdrs = [ + "environment.h", + ], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:dynamic_library", +@@ -39,6 +40,7 @@ cc_library( + name = "gurobi_util", + srcs = ["gurobi_util.cc"], + hdrs = ["gurobi_util.h"], ++ features = ["-layering_check"], + deps = [ + ":environment", + "@com_google_absl//absl/strings", +@@ -51,5 +53,6 @@ cc_library( + testonly = True, + srcs = ["gurobi_stdout_matchers.cc"], + hdrs = ["gurobi_stdout_matchers.h"], ++ features = ["-layering_check"], + deps = ["//ortools/base:gmock"], + ) +diff --git a/ortools/gurobi/isv_public/BUILD.bazel b/ortools/gurobi/isv_public/BUILD.bazel +index efae616..1006da8 100644 +--- a/ortools/gurobi/isv_public/BUILD.bazel ++++ b/ortools/gurobi/isv_public/BUILD.bazel +@@ -17,6 +17,7 @@ cc_library( + name = "gurobi_isv", + srcs = ["gurobi_isv.cc"], + hdrs = ["gurobi_isv.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/gurobi:environment", + "//ortools/math_opt/solvers:gurobi_cc_proto", +diff --git a/ortools/init/BUILD.bazel b/ortools/init/BUILD.bazel +index 0705399..aec2da3 100644 +--- a/ortools/init/BUILD.bazel ++++ b/ortools/init/BUILD.bazel +@@ -16,6 +16,7 @@ package(default_visibility = ["//visibility:public"]) + cc_library( + name = "init", + hdrs = ["init.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/gurobi:environment", +diff --git a/ortools/init/python/BUILD.bazel b/ortools/init/python/BUILD.bazel +index 1774f36..eb75897 100644 +--- a/ortools/init/python/BUILD.bazel ++++ b/ortools/init/python/BUILD.bazel +@@ -21,6 +21,7 @@ load("@rules_python//python:defs.bzl", "py_test") + cc_library( + name = "init_doc", + hdrs = ["init_doc.h"], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + ) + +diff --git a/ortools/linear_solver/BUILD.bazel b/ortools/linear_solver/BUILD.bazel +index 618e192..b7bcf34 100644 +--- a/ortools/linear_solver/BUILD.bazel ++++ b/ortools/linear_solver/BUILD.bazel +@@ -252,6 +252,7 @@ cc_library( + ":use_cplex": ["-DUSE_CPLEX"], + "//conditions:default": [], + }), ++ features = ["-layering_check"], + deps = [ + ":linear_solver_cc_proto", + ":model_exporter", +@@ -323,6 +324,7 @@ cc_library( + name = "model_validator", + srcs = ["model_validator.cc"], + hdrs = ["model_validator.h"], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + ":linear_solver_cc_proto", +@@ -352,6 +354,7 @@ copy_file( + cc_library( + name = "scip_with_glop", + srcs = ["lpi_glop.cpp"], ++ features = ["-layering_check"], + deps = [ + "//ortools/glop:lp_solver", + "@scip//:libscip", +@@ -361,6 +364,7 @@ cc_library( + cc_library( + name = "scip_helper_macros", + hdrs = ["scip_helper_macros.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:status_macros", + "@com_google_absl//absl/status", +@@ -373,6 +377,7 @@ cc_library( + name = "model_exporter", + srcs = ["model_exporter.cc"], + hdrs = ["model_exporter.h"], ++ features = ["-layering_check"], + deps = [ + ":linear_solver_cc_proto", + "//ortools/base", +@@ -412,6 +417,7 @@ cc_library( + name = "solve_mp_model", + srcs = ["solve_mp_model.cc"], + hdrs = ["solve_mp_model.h"], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + ":linear_solver", +diff --git a/ortools/linear_solver/proto_solver/BUILD.bazel b/ortools/linear_solver/proto_solver/BUILD.bazel +index 57a1d82..3998779 100644 +--- a/ortools/linear_solver/proto_solver/BUILD.bazel ++++ b/ortools/linear_solver/proto_solver/BUILD.bazel +@@ -16,6 +16,7 @@ package(default_visibility = ["//visibility:public"]) + cc_library( + name = "proto_utils", + hdrs = ["proto_utils.h"], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + "//ortools/port:proto_utils", +@@ -28,6 +29,7 @@ cc_library( + name = "glop_proto_solver", + srcs = ["glop_proto_solver.cc"], + hdrs = ["glop_proto_solver.h"], ++ features = ["-layering_check"], + deps = [ + ":proto_utils", + "//ortools/glop:lp_solver", +@@ -52,6 +54,7 @@ cc_library( + name = "pdlp_proto_solver", + srcs = ["pdlp_proto_solver.cc"], + hdrs = ["pdlp_proto_solver.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:logging", + "//ortools/linear_solver:linear_solver_cc_proto", +@@ -71,6 +74,7 @@ cc_library( + name = "sat_solver_utils", + srcs = ["sat_solver_utils.cc"], + hdrs = ["sat_solver_utils.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/glop:parameters_cc_proto", + "//ortools/glop:preprocessor", +@@ -85,6 +89,7 @@ cc_library( + name = "sat_proto_solver", + srcs = ["sat_proto_solver.cc"], + hdrs = ["sat_proto_solver.h"], ++ features = ["-layering_check"], + deps = [ + ":proto_utils", + ":sat_solver_utils", +@@ -118,6 +123,7 @@ cc_library( + "//ortools/linear_solver:use_scip": ["USE_SCIP"], + "//conditions:default": [], + }), ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:timer", +@@ -144,6 +150,7 @@ cc_library( + name = "gurobi_proto_solver", + srcs = ["gurobi_proto_solver.cc"], + hdrs = ["gurobi_proto_solver.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:timer", + "//ortools/gurobi:environment", +@@ -171,6 +178,7 @@ cc_library( + "//ortools/linear_solver:use_highs": ["USE_HIGHS"], + "//conditions:default": [], + }), ++ features = ["-layering_check"], + deps = [ + "//ortools/base:timer", + "//ortools/linear_solver:linear_solver_cc_proto", +@@ -186,6 +194,7 @@ cc_library( + name = "xpress_proto_solver", + srcs = ["xpress_proto_solver.cc"], + hdrs = ["xpress_proto_solver.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:timer", + "//ortools/linear_solver:linear_solver_cc_proto", +diff --git a/ortools/linear_solver/wrappers/BUILD.bazel b/ortools/linear_solver/wrappers/BUILD.bazel +index f0f031b..fce5554 100644 +--- a/ortools/linear_solver/wrappers/BUILD.bazel ++++ b/ortools/linear_solver/wrappers/BUILD.bazel +@@ -35,6 +35,7 @@ cc_library( + "-DUSE_SCIP", + "-DUSE_LP_PARSER", + ], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + "//ortools/base:file", +diff --git a/ortools/lp_data/BUILD.bazel b/ortools/lp_data/BUILD.bazel +index c0e2993..b8bbb47 100644 +--- a/ortools/lp_data/BUILD.bazel ++++ b/ortools/lp_data/BUILD.bazel +@@ -48,6 +48,7 @@ cc_library( + name = "base", + srcs = ["lp_types.cc"], + hdrs = ["lp_types.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:hash", +@@ -61,6 +62,7 @@ cc_library( + name = "permutation", + hdrs = ["permutation.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + ":base", + "//ortools/base", +@@ -73,6 +75,7 @@ cc_library( + cc_library( + name = "scattered_vector", + hdrs = ["scattered_vector.h"], ++ features = ["-layering_check"], + deps = [ + ":base", + "//ortools/base", +@@ -86,6 +89,7 @@ cc_library( + name = "sparse_vector", + hdrs = ["sparse_vector.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + ":base", + ":permutation", +@@ -102,6 +106,7 @@ cc_library( + srcs = ["sparse_column.cc"], + hdrs = ["sparse_column.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + ":base", + ":sparse_vector", +@@ -113,6 +118,7 @@ cc_library( + name = "sparse_row", + hdrs = ["sparse_row.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + ":base", + ":sparse_vector", +@@ -127,6 +133,7 @@ cc_library( + "sparse.h", + ], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + ":base", + ":matrix_scaler_hdr", +@@ -148,6 +155,7 @@ cc_library( + srcs = ["matrix_scaler.cc"], + hdrs = ["matrix_scaler.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + ":base", + ":lp_utils", +@@ -165,6 +173,7 @@ cc_library( + cc_library( + name = "matrix_scaler_hdr", + hdrs = ["matrix_scaler.h"], ++ features = ["-layering_check"], + deps = [ + ":base", + "//ortools/base", +@@ -177,6 +186,7 @@ cc_library( + srcs = ["lp_data.cc"], + hdrs = ["lp_data.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + ":base", + ":lp_print_utils", +@@ -200,6 +210,7 @@ cc_library( + name = "lp_data_utils", + srcs = ["lp_data_utils.cc"], + hdrs = ["lp_data_utils.h"], ++ features = ["-layering_check"], + deps = [ + ":base", + ":lp_data", +@@ -213,6 +224,7 @@ cc_library( + srcs = ["lp_utils.cc"], + hdrs = ["lp_utils.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + ":base", + ":scattered_vector", +@@ -227,6 +239,7 @@ cc_library( + srcs = ["matrix_utils.cc"], + hdrs = ["matrix_utils.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + ":base", + ":sparse", +@@ -241,6 +254,7 @@ cc_library( + hdrs = ["lp_parser.h"], + copts = SAFE_FP_CODE, + defines = ["USE_LP_PARSER"], ++ features = ["-layering_check"], + deps = [ + ":base", + ":lp_data", +@@ -271,6 +285,7 @@ cc_library( + srcs = ["lp_print_utils.cc"], + hdrs = ["lp_print_utils.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + ":base", + "//ortools/base", +@@ -285,6 +300,7 @@ cc_library( + srcs = ["proto_utils.cc"], + hdrs = ["proto_utils.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + ":base", + ":lp_data", +@@ -297,6 +313,7 @@ cc_library( + name = "mps_reader_template", + srcs = ["mps_reader_template.cc"], + hdrs = ["mps_reader_template.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:map_util", +@@ -317,6 +334,7 @@ cc_library( + srcs = ["mps_reader.cc"], + hdrs = ["mps_reader.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + ":lp_data", + ":lp_print_utils", +@@ -337,6 +355,7 @@ cc_library( + name = "model_reader", + srcs = ["model_reader.cc"], + hdrs = ["model_reader.h"], ++ features = ["-layering_check"], + deps = [ + ":lp_data", + ":mps_reader", +@@ -354,6 +373,7 @@ cc_library( + srcs = ["lp_decomposer.cc"], + hdrs = ["lp_decomposer.h"], + copts = SAFE_FP_CODE, ++ features = ["-layering_check"], + deps = [ + ":base", + ":lp_data", +@@ -370,6 +390,7 @@ cc_library( + name = "sol_reader", + srcs = ["sol_reader.cc"], + hdrs = ["sol_reader.h"], ++ features = ["-layering_check"], + deps = [ + ":base", + ":lp_data", +diff --git a/ortools/math_opt/constraints/indicator/BUILD.bazel b/ortools/math_opt/constraints/indicator/BUILD.bazel +index 12fdf6d..e4d2fa4 100644 +--- a/ortools/math_opt/constraints/indicator/BUILD.bazel ++++ b/ortools/math_opt/constraints/indicator/BUILD.bazel +@@ -17,6 +17,7 @@ cc_library( + name = "indicator_constraint", + srcs = ["indicator_constraint.cc"], + hdrs = ["indicator_constraint.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:intops", + "//ortools/math_opt/constraints/util:model_util", +@@ -45,6 +46,7 @@ cc_library( + name = "storage", + srcs = ["storage.cc"], + hdrs = ["storage.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:intops", + "//ortools/math_opt:model_cc_proto", +@@ -75,6 +77,7 @@ cc_library( + name = "validator", + srcs = ["validator.cc"], + hdrs = ["validator.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:status_macros", + "//ortools/math_opt:model_cc_proto", +diff --git a/ortools/math_opt/constraints/quadratic/BUILD.bazel b/ortools/math_opt/constraints/quadratic/BUILD.bazel +index e4a0925..d521c19 100644 +--- a/ortools/math_opt/constraints/quadratic/BUILD.bazel ++++ b/ortools/math_opt/constraints/quadratic/BUILD.bazel +@@ -17,6 +17,7 @@ cc_library( + name = "quadratic_constraint", + srcs = ["quadratic_constraint.cc"], + hdrs = ["quadratic_constraint.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:intops", + "//ortools/math_opt/constraints/util:model_util", +@@ -50,6 +51,7 @@ cc_library( + name = "storage", + srcs = ["storage.cc"], + hdrs = ["storage.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/math_opt:model_cc_proto", + "//ortools/math_opt:model_update_cc_proto", +@@ -81,6 +83,7 @@ cc_library( + name = "validator", + srcs = ["validator.cc"], + hdrs = ["validator.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:status_macros", + "//ortools/math_opt:model_cc_proto", +diff --git a/ortools/math_opt/constraints/second_order_cone/BUILD.bazel b/ortools/math_opt/constraints/second_order_cone/BUILD.bazel +index 37ed646..17b383d 100644 +--- a/ortools/math_opt/constraints/second_order_cone/BUILD.bazel ++++ b/ortools/math_opt/constraints/second_order_cone/BUILD.bazel +@@ -17,6 +17,7 @@ cc_library( + name = "second_order_cone_constraint", + srcs = ["second_order_cone_constraint.cc"], + hdrs = ["second_order_cone_constraint.h"], ++ features = ["-layering_check"], + deps = [ + ":storage", + "//ortools/base:intops", +@@ -47,6 +48,7 @@ cc_library( + name = "storage", + srcs = ["storage.cc"], + hdrs = ["storage.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:intops", + "//ortools/math_opt:model_cc_proto", +@@ -79,6 +81,7 @@ cc_library( + name = "validator", + srcs = ["validator.cc"], + hdrs = ["validator.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:status_macros", + "//ortools/math_opt:model_cc_proto", +diff --git a/ortools/math_opt/constraints/sos/BUILD.bazel b/ortools/math_opt/constraints/sos/BUILD.bazel +index fade5cb..aad7cd5 100644 +--- a/ortools/math_opt/constraints/sos/BUILD.bazel ++++ b/ortools/math_opt/constraints/sos/BUILD.bazel +@@ -17,6 +17,7 @@ cc_library( + name = "sos1_constraint", + srcs = ["sos1_constraint.cc"], + hdrs = ["sos1_constraint.h"], ++ features = ["-layering_check"], + deps = [ + ":util", + "//ortools/base:intops", +@@ -50,6 +51,7 @@ cc_library( + name = "sos2_constraint", + srcs = ["sos2_constraint.cc"], + hdrs = ["sos2_constraint.h"], ++ features = ["-layering_check"], + deps = [ + ":util", + "//ortools/base:intops", +@@ -82,6 +84,7 @@ cc_test( + cc_library( + name = "storage", + hdrs = ["storage.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:intops", + "//ortools/math_opt:model_cc_proto", +@@ -112,6 +115,7 @@ cc_test( + cc_library( + name = "util", + hdrs = ["util.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/math_opt/cpp:variable_and_expressions", + "//ortools/util:fp_roundtrip_conv", +@@ -123,6 +127,7 @@ cc_library( + name = "validator", + srcs = ["validator.cc"], + hdrs = ["validator.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:status_macros", + "//ortools/math_opt:model_cc_proto", +diff --git a/ortools/math_opt/constraints/util/BUILD.bazel b/ortools/math_opt/constraints/util/BUILD.bazel +index c3d0c06..968ba25 100644 +--- a/ortools/math_opt/constraints/util/BUILD.bazel ++++ b/ortools/math_opt/constraints/util/BUILD.bazel +@@ -17,6 +17,7 @@ cc_library( + name = "model_util", + srcs = ["model_util.cc"], + hdrs = ["model_util.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:intops", + "//ortools/math_opt/cpp:variable_and_expressions", +diff --git a/ortools/math_opt/core/BUILD.bazel b/ortools/math_opt/core/BUILD.bazel +index 06da18f..45f3170 100644 +--- a/ortools/math_opt/core/BUILD.bazel ++++ b/ortools/math_opt/core/BUILD.bazel +@@ -17,6 +17,7 @@ cc_library( + name = "math_opt_proto_utils", + srcs = ["math_opt_proto_utils.cc"], + hdrs = ["math_opt_proto_utils.h"], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + ":sparse_vector_view", +@@ -42,6 +43,7 @@ cc_library( + cc_library( + name = "sparse_vector_view", + hdrs = ["sparse_vector_view.h"], ++ features = ["-layering_check"], + deps = [ + ":arrow_operator_proxy", + ":sparse_vector", +@@ -59,6 +61,7 @@ cc_library( + name = "model_summary", + srcs = ["model_summary.cc"], + hdrs = ["model_summary.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:linked_hash_map", + "//ortools/base:status_macros", +@@ -78,6 +81,7 @@ cc_library( + name = "solver_interface", + srcs = ["solver_interface.cc"], + hdrs = ["solver_interface.h"], ++ features = ["-layering_check"], + deps = [ + ":non_streamable_solver_init_arguments", + "//ortools/base:map_util", +@@ -104,6 +108,7 @@ cc_library( + name = "solver", + srcs = ["solver.cc"], + hdrs = ["solver.h"], ++ features = ["-layering_check"], + deps = [ + ":base_solver", + ":concurrent_calls_guard", +@@ -139,6 +144,7 @@ cc_library( + name = "non_streamable_solver_init_arguments", + srcs = ["non_streamable_solver_init_arguments.cc"], + hdrs = ["non_streamable_solver_init_arguments.h"], ++ features = ["-layering_check"], + deps = ["//ortools/math_opt:parameters_cc_proto"], + ) + +@@ -146,22 +152,26 @@ cc_library( + name = "solver_debug", + srcs = ["solver_debug.cc"], + hdrs = ["solver_debug.h"], ++ features = ["-layering_check"], + ) + + cc_library( + name = "arrow_operator_proxy", + hdrs = ["arrow_operator_proxy.h"], ++ features = ["-layering_check"], + ) + + cc_library( + name = "sparse_vector", + hdrs = ["sparse_vector.h"], ++ features = ["-layering_check"], + ) + + cc_library( + name = "sparse_submatrix", + srcs = ["sparse_submatrix.cc"], + hdrs = ["sparse_submatrix.h"], ++ features = ["-layering_check"], + deps = [ + ":sparse_vector", + ":sparse_vector_view", +@@ -176,6 +186,7 @@ cc_library( + name = "inverted_bounds", + srcs = ["inverted_bounds.cc"], + hdrs = ["inverted_bounds.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:status_macros", + "@com_google_absl//absl/status", +@@ -188,6 +199,7 @@ cc_library( + name = "invalid_indicators", + srcs = ["invalid_indicators.cc"], + hdrs = ["invalid_indicators.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:status_macros", + "@com_google_absl//absl/algorithm:container", +@@ -200,6 +212,7 @@ cc_library( + name = "concurrent_calls_guard", + srcs = ["concurrent_calls_guard.cc"], + hdrs = ["concurrent_calls_guard.h"], ++ features = ["-layering_check"], + deps = [ + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/log:check", +@@ -213,6 +226,7 @@ cc_library( + name = "empty_bounds", + srcs = ["empty_bounds.cc"], + hdrs = ["empty_bounds.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/math_opt:result_cc_proto", + "//ortools/util:fp_roundtrip_conv", +@@ -223,6 +237,7 @@ cc_library( + cc_library( + name = "sorted", + hdrs = ["sorted.h"], ++ features = ["-layering_check"], + deps = [ + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/container:flat_hash_map", +@@ -235,6 +250,7 @@ cc_library( + name = "base_solver", + srcs = ["base_solver.cc"], + hdrs = ["base_solver.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/math_opt:callback_cc_proto", + "//ortools/math_opt:infeasible_subsystem_cc_proto", +diff --git a/ortools/math_opt/cpp/BUILD.bazel b/ortools/math_opt/cpp/BUILD.bazel +index a606388..51e4b90 100644 +--- a/ortools/math_opt/cpp/BUILD.bazel ++++ b/ortools/math_opt/cpp/BUILD.bazel +@@ -20,6 +20,7 @@ package(default_visibility = [ + cc_library( + name = "math_opt", + hdrs = ["math_opt.h"], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + ":model", +@@ -32,6 +33,7 @@ cc_library( + name = "basis_status", + srcs = ["basis_status.cc"], + hdrs = ["basis_status.h"], ++ features = ["-layering_check"], + deps = [ + ":enums", + "//ortools/math_opt:solution_cc_proto", +@@ -44,6 +46,7 @@ cc_library( + name = "sparse_containers", + srcs = ["sparse_containers.cc"], + hdrs = ["sparse_containers.h"], ++ features = ["-layering_check"], + deps = [ + ":basis_status", + ":linear_constraint", +@@ -71,6 +74,7 @@ cc_library( + name = "model", + srcs = ["model.cc"], + hdrs = ["model.h"], ++ features = ["-layering_check"], + deps = [ + ":key_types", + ":linear_constraint", +@@ -105,6 +109,7 @@ cc_library( + name = "variable_and_expressions", + srcs = ["variable_and_expressions.cc"], + hdrs = ["variable_and_expressions.h"], ++ features = ["-layering_check"], + deps = [ + ":formatters", + ":key_types", +@@ -126,6 +131,7 @@ cc_library( + name = "objective", + srcs = ["objective.cc"], + hdrs = ["objective.h"], ++ features = ["-layering_check"], + deps = [ + ":key_types", + ":variable_and_expressions", +@@ -140,6 +146,7 @@ cc_library( + cc_library( + name = "linear_constraint", + hdrs = ["linear_constraint.h"], ++ features = ["-layering_check"], + deps = [ + ":key_types", + ":variable_and_expressions", +@@ -156,6 +163,7 @@ cc_library( + name = "solution", + srcs = ["solution.cc"], + hdrs = ["solution.h"], ++ features = ["-layering_check"], + deps = [ + ":basis_status", + ":enums", +@@ -184,6 +192,7 @@ cc_library( + name = "solve_result", + srcs = ["solve_result.cc"], + hdrs = ["solve_result.h"], ++ features = ["-layering_check"], + deps = [ + ":enums", + ":linear_constraint", +@@ -212,6 +221,7 @@ cc_library( + name = "map_filter", + srcs = ["map_filter.cc"], + hdrs = ["map_filter.h"], ++ features = ["-layering_check"], + deps = [ + ":key_types", + ":linear_constraint", +@@ -231,6 +241,7 @@ cc_library( + name = "callback", + srcs = ["callback.cc"], + hdrs = ["callback.h"], ++ features = ["-layering_check"], + deps = [ + ":enums", + ":map_filter", +@@ -256,6 +267,7 @@ cc_library( + cc_library( + name = "key_types", + hdrs = ["key_types.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/math_opt/storage:model_storage", + "@com_google_absl//absl/algorithm:container", +@@ -270,6 +282,7 @@ cc_library( + name = "model_solve_parameters", + srcs = ["model_solve_parameters.cc"], + hdrs = ["model_solve_parameters.h"], ++ features = ["-layering_check"], + deps = [ + ":linear_constraint", + ":map_filter", +@@ -295,6 +308,7 @@ cc_library( + name = "update_tracker", + srcs = ["update_tracker.cc"], + hdrs = ["update_tracker.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:logging", + "//ortools/math_opt:model_cc_proto", +@@ -310,6 +324,7 @@ cc_library( + name = "message_callback", + srcs = ["message_callback.cc"], + hdrs = ["message_callback.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:logging", + "//ortools/base:source_location", +@@ -323,6 +338,7 @@ cc_library( + cc_library( + name = "solver_init_arguments", + hdrs = ["solver_init_arguments.h"], ++ features = ["-layering_check"], + deps = [ + ":streamable_solver_init_arguments", + "//ortools/math_opt/core:non_streamable_solver_init_arguments", +@@ -333,6 +349,7 @@ cc_library( + name = "solve_arguments", + srcs = ["solve_arguments.cc"], + hdrs = ["solve_arguments.h"], ++ features = ["-layering_check"], + deps = [ + ":callback", + ":message_callback", +@@ -350,6 +367,7 @@ cc_library( + name = "solve", + srcs = ["solve.cc"], + hdrs = ["solve.h"], ++ features = ["-layering_check"], + deps = [ + ":compute_infeasible_subsystem_arguments", + ":compute_infeasible_subsystem_result", +@@ -375,6 +393,7 @@ cc_library( + name = "streamable_solver_init_arguments", + srcs = ["streamable_solver_init_arguments.cc"], + hdrs = ["streamable_solver_init_arguments.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/math_opt:parameters_cc_proto", + "//ortools/math_opt/solvers:gurobi_cc_proto", +@@ -386,6 +405,7 @@ cc_library( + name = "parameters", + srcs = ["parameters.cc"], + hdrs = ["parameters.h"], ++ features = ["-layering_check"], + deps = [ + ":enums", + "//ortools/base:linked_hash_map", +@@ -414,6 +434,7 @@ cc_library( + testonly = 1, + srcs = ["matchers.cc"], + hdrs = ["matchers.h"], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + ":linear_constraint", +@@ -435,6 +456,7 @@ cc_library( + cc_library( + name = "enums", + hdrs = ["enums.h"], ++ features = ["-layering_check"], + deps = [ + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/strings", +@@ -446,6 +468,7 @@ cc_library( + name = "statistics", + srcs = ["statistics.cc"], + hdrs = ["statistics.h"], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + ":model", +@@ -456,12 +479,14 @@ cc_library( + cc_library( + name = "formatters", + hdrs = ["formatters.h"], ++ features = ["-layering_check"], + deps = ["//ortools/util:fp_roundtrip_conv"], + ) + + cc_library( + name = "update_result", + hdrs = ["update_result.h"], ++ features = ["-layering_check"], + deps = ["//ortools/math_opt:model_update_cc_proto"], + ) + +@@ -469,6 +494,7 @@ cc_library( + name = "compute_infeasible_subsystem_result", + srcs = ["compute_infeasible_subsystem_result.cc"], + hdrs = ["compute_infeasible_subsystem_result.h"], ++ features = ["-layering_check"], + deps = [ + ":enums", + ":key_types", +@@ -500,6 +526,7 @@ cc_library( + cc_library( + name = "compute_infeasible_subsystem_arguments", + hdrs = ["compute_infeasible_subsystem_arguments.h"], ++ features = ["-layering_check"], + deps = [ + ":message_callback", + ":parameters", +@@ -511,6 +538,7 @@ cc_library( + name = "solver_resources", + srcs = ["solver_resources.cc"], + hdrs = ["solver_resources.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/math_opt:rpc_cc_proto", + "//ortools/port:proto_utils", +@@ -524,6 +552,7 @@ cc_library( + name = "solve_impl", + srcs = ["solve_impl.cc"], + hdrs = ["solve_impl.h"], ++ features = ["-layering_check"], + deps = [ + ":compute_infeasible_subsystem_arguments", + ":compute_infeasible_subsystem_result", +@@ -551,6 +580,7 @@ cc_library( + cc_library( + name = "incremental_solver", + hdrs = ["incremental_solver.h"], ++ features = ["-layering_check"], + deps = [ + ":compute_infeasible_subsystem_arguments", + ":compute_infeasible_subsystem_result", +diff --git a/ortools/math_opt/io/BUILD.bazel b/ortools/math_opt/io/BUILD.bazel +index 428beaf..8e1bb4a 100644 +--- a/ortools/math_opt/io/BUILD.bazel ++++ b/ortools/math_opt/io/BUILD.bazel +@@ -17,6 +17,7 @@ cc_library( + name = "proto_converter", + srcs = ["proto_converter.cc"], + hdrs = ["proto_converter.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:status_macros", + "//ortools/linear_solver:linear_solver_cc_proto", +@@ -41,6 +42,7 @@ cc_library( + name = "mps_converter", + srcs = ["mps_converter.cc"], + hdrs = ["mps_converter.h"], ++ features = ["-layering_check"], + deps = [ + ":proto_converter", + "//ortools/base:status_macros", +@@ -57,6 +59,7 @@ cc_library( + name = "names_removal", + srcs = ["names_removal.cc"], + hdrs = ["names_removal.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/math_opt:model_cc_proto", + "//ortools/math_opt:model_update_cc_proto", +@@ -67,6 +70,7 @@ cc_library( + name = "lp_converter", + srcs = ["lp_converter.cc"], + hdrs = ["lp_converter.h"], ++ features = ["-layering_check"], + deps = [ + ":proto_converter", + "//ortools/base:status_macros", +@@ -81,6 +85,7 @@ cc_library( + name = "lp_parser", + srcs = ["lp_parser.cc"], + hdrs = ["lp_parser.h"], ++ features = ["-layering_check"], + deps = [ + ":mps_converter", + "//ortools/base", +diff --git a/ortools/math_opt/io/lp/BUILD.bazel b/ortools/math_opt/io/lp/BUILD.bazel +index 95d079c..02f1634 100644 +--- a/ortools/math_opt/io/lp/BUILD.bazel ++++ b/ortools/math_opt/io/lp/BUILD.bazel +@@ -15,6 +15,7 @@ cc_library( + name = "lp_model", + srcs = ["lp_model.cc"], + hdrs = ["lp_model.h"], ++ features = ["-layering_check"], + deps = [ + ":lp_name", + "//ortools/base:intops", +@@ -32,6 +33,7 @@ cc_library( + name = "lp_name", + srcs = ["lp_name.cc"], + hdrs = ["lp_name.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:status_macros", + "@com_google_absl//absl/status", +@@ -44,6 +46,7 @@ cc_library( + name = "model_utils", + srcs = ["model_utils.cc"], + hdrs = ["model_utils.h"], ++ features = ["-layering_check"], + deps = [ + ":lp_model", + "//ortools/base:status_macros", +diff --git a/ortools/math_opt/labs/BUILD.bazel b/ortools/math_opt/labs/BUILD.bazel +index d048d84..c57e06a 100644 +--- a/ortools/math_opt/labs/BUILD.bazel ++++ b/ortools/math_opt/labs/BUILD.bazel +@@ -15,6 +15,7 @@ cc_library( + name = "general_constraint_to_mip", + srcs = ["general_constraint_to_mip.cc"], + hdrs = ["general_constraint_to_mip.h"], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + ":linear_expr_util", +@@ -28,6 +29,7 @@ cc_library( + name = "linear_expr_util", + srcs = ["linear_expr_util.cc"], + hdrs = ["linear_expr_util.h"], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + "//ortools/math_opt/cpp:math_opt", +@@ -39,6 +41,7 @@ cc_library( + name = "solution_feasibility_checker", + srcs = ["solution_feasibility_checker.cc"], + hdrs = ["solution_feasibility_checker.h"], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + "//ortools/base:mathutil", +@@ -57,6 +60,7 @@ cc_library( + name = "solution_improvement", + srcs = ["solution_improvement.cc"], + hdrs = ["solution_improvement.h"], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + "//ortools/base:status_macros", +@@ -77,6 +81,7 @@ cc_library( + "dualizer.cc", + ], + hdrs = ["dualizer.h"], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + "//ortools/base:map_util", +diff --git a/ortools/math_opt/solver_tests/BUILD.bazel b/ortools/math_opt/solver_tests/BUILD.bazel +index 48fc1d2..e43f488 100644 +--- a/ortools/math_opt/solver_tests/BUILD.bazel ++++ b/ortools/math_opt/solver_tests/BUILD.bazel +@@ -18,6 +18,7 @@ cc_library( + testonly = 1, + srcs = ["base_solver_test.cc"], + hdrs = ["base_solver_test.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:gmock", + "//ortools/base:linked_hash_map", +@@ -34,6 +35,7 @@ cc_library( + data = [ + "//ortools/math_opt/solver_tests/testdata:23588.mps", + ], ++ features = ["-layering_check"], + deps = [ + ":base_solver_test", + ":test_models", +@@ -69,6 +71,7 @@ cc_library( + data = [ + "//ortools/math_opt/solver_tests/testdata:23588.mps", + ], ++ features = ["-layering_check"], + deps = [ + ":test_models", + "//ortools/base", +@@ -91,6 +94,7 @@ cc_library( + testonly = 1, + srcs = ["lp_tests.cc"], + hdrs = ["lp_tests.h"], ++ features = ["-layering_check"], + deps = [ + ":base_solver_test", + "//ortools/base:gmock", +@@ -111,6 +115,7 @@ cc_library( + testonly = 1, + srcs = ["lp_incomplete_solve_tests.cc"], + hdrs = ["lp_incomplete_solve_tests.h"], ++ features = ["-layering_check"], + deps = [ + ":test_models", + "//ortools/base", +@@ -131,6 +136,7 @@ cc_library( + testonly = 1, + srcs = ["invalid_input_tests.cc"], + hdrs = ["invalid_input_tests.h"], ++ features = ["-layering_check"], + deps = [ + ":base_solver_test", + "//ortools/base:gmock", +@@ -167,6 +173,7 @@ cc_library( + testonly = 1, + srcs = ["mip_tests.cc"], + hdrs = ["mip_tests.h"], ++ features = ["-layering_check"], + deps = [ + ":base_solver_test", + "//ortools/base", +@@ -185,6 +192,7 @@ cc_library( + testonly = 1, + srcs = ["ip_model_solve_parameters_tests.cc"], + hdrs = ["ip_model_solve_parameters_tests.h"], ++ features = ["-layering_check"], + deps = [ + ":base_solver_test", + "//ortools/base:gmock", +@@ -204,6 +212,7 @@ cc_library( + testonly = 1, + srcs = ["ip_multiple_solutions_tests.cc"], + hdrs = ["ip_multiple_solutions_tests.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:gmock", + "//ortools/math_opt/cpp:matchers", +@@ -220,6 +229,7 @@ cc_library( + testonly = 1, + srcs = ["lp_model_solve_parameters_tests.cc"], + hdrs = ["lp_model_solve_parameters_tests.h"], ++ features = ["-layering_check"], + deps = [ + ":base_solver_test", + ":test_models", +@@ -237,6 +247,7 @@ cc_library( + testonly = 1, + srcs = ["lp_parameter_tests.cc"], + hdrs = ["lp_parameter_tests.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:gmock", + "//ortools/base:status_macros", +@@ -258,6 +269,7 @@ cc_library( + testonly = 1, + srcs = ["lp_initial_basis_tests.cc"], + hdrs = ["lp_initial_basis_tests.h"], ++ features = ["-layering_check"], + deps = [ + ":base_solver_test", + "//ortools/base:gmock", +@@ -279,6 +291,7 @@ cc_library( + "//ortools/math_opt/solver_tests/testdata:23588.mps", + "//ortools/math_opt/solver_tests/testdata:beavma.mps", + ], ++ features = ["-layering_check"], + deps = [ + ":test_models", + "//ortools/base", +@@ -305,6 +318,7 @@ cc_library( + testonly = 1, + srcs = ["multi_objective_tests.cc"], + hdrs = ["multi_objective_tests.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:gmock", + "//ortools/base:status_macros", +@@ -327,6 +341,7 @@ cc_library( + testonly = 1, + srcs = ["qp_tests.cc"], + hdrs = ["qp_tests.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:gmock", +@@ -345,6 +360,7 @@ cc_library( + testonly = 1, + srcs = ["qc_tests.cc"], + hdrs = ["qc_tests.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:gmock", + "//ortools/math_opt/cpp:matchers", +@@ -363,6 +379,7 @@ cc_library( + testonly = 1, + srcs = ["second_order_cone_tests.cc"], + hdrs = ["second_order_cone_tests.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:gmock", + "//ortools/math_opt/cpp:matchers", +@@ -381,6 +398,7 @@ cc_library( + testonly = 1, + srcs = ["logical_constraint_tests.cc"], + hdrs = ["logical_constraint_tests.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:gmock", + "//ortools/math_opt:model_update_cc_proto", +@@ -401,6 +419,7 @@ cc_library( + testonly = 1, + srcs = ["test_models.cc"], + hdrs = ["test_models.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/math_opt/cpp:math_opt", + "@com_google_absl//absl/log:check", +@@ -426,6 +445,7 @@ cc_library( + testonly = True, + srcs = ["generic_tests.cc"], + hdrs = ["generic_tests.h"], ++ features = ["-layering_check"], + deps = [ + ":test_models", + "//ortools/base:gmock", +@@ -452,6 +472,7 @@ cc_library( + testonly = 1, + srcs = ["infeasible_subsystem_tests.cc"], + hdrs = ["infeasible_subsystem_tests.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:gmock", + "//ortools/gurobi:gurobi_stdout_matchers", +diff --git a/ortools/math_opt/solvers/BUILD.bazel b/ortools/math_opt/solvers/BUILD.bazel +index e7e8054..ef6c123 100644 +--- a/ortools/math_opt/solvers/BUILD.bazel ++++ b/ortools/math_opt/solvers/BUILD.bazel +@@ -22,6 +22,7 @@ cc_library( + "gscip_solver.cc", + "gscip_solver.h", + ], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + ":message_callback_data", +@@ -106,6 +107,7 @@ cc_library( + name = "gurobi_callback", + srcs = ["gurobi_callback.cc"], + hdrs = ["gurobi_callback.h"], ++ features = ["-layering_check"], + deps = [ + ":message_callback_data", + "//ortools/base:linked_hash_map", +@@ -140,6 +142,7 @@ cc_library( + hdrs = [ + "gurobi_init_arguments.h", + ], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + ":gurobi_callback", +@@ -195,6 +198,7 @@ cc_library( + "glop_solver.cc", + "glop_solver.h", + ], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + "//ortools/base:map_util", +@@ -245,6 +249,7 @@ cc_library( + "cp_sat_solver.cc", + "cp_sat_solver.h", + ], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + "//ortools/base:protoutil", +@@ -311,9 +316,9 @@ cc_test( + + cc_test( + name = "cp_sat_solver_test", ++ timeout = "eternal", + srcs = ["cp_sat_solver_test.cc"], + shard_count = 10, +- timeout = "eternal", + deps = [ + ":cp_sat_solver", + "//ortools/base:gmock_main", +@@ -342,6 +347,7 @@ cc_library( + name = "message_callback_data", + srcs = ["message_callback_data.cc"], + hdrs = ["message_callback_data.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/math_opt/core:solver_interface", + "@com_google_absl//absl/strings", +@@ -365,6 +371,7 @@ cc_library( + name = "pdlp_bridge", + srcs = ["pdlp_bridge.cc"], + hdrs = ["pdlp_bridge.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:status_macros", + "//ortools/math_opt:model_cc_proto", +@@ -391,6 +398,7 @@ cc_library( + "pdlp_solver.cc", + "pdlp_solver.h", + ], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + ":pdlp_bridge", +@@ -464,6 +472,7 @@ cc_library( + "glpk_solver.cc", + "glpk_solver.h", + ], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + ":glpk_cc_proto", +@@ -563,6 +572,7 @@ cc_library( + "highs_solver.cc", + "highs_solver.h", + ], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + ":highs_cc_proto", +diff --git a/ortools/math_opt/solvers/glpk/BUILD.bazel b/ortools/math_opt/solvers/glpk/BUILD.bazel +index b33dd3b..af07950 100644 +--- a/ortools/math_opt/solvers/glpk/BUILD.bazel ++++ b/ortools/math_opt/solvers/glpk/BUILD.bazel +@@ -18,6 +18,7 @@ cc_library( + name = "rays", + srcs = ["rays.cc"], + hdrs = ["rays.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:logging", + "//ortools/base:status_macros", +@@ -35,6 +36,7 @@ cc_library( + name = "glpk_sparse_vector", + srcs = ["glpk_sparse_vector.cc"], + hdrs = ["glpk_sparse_vector.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:logging", + "@com_google_absl//absl/log:check", +@@ -54,6 +56,7 @@ cc_library( + name = "gap", + srcs = ["gap.cc"], + hdrs = ["gap.h"], ++ features = ["-layering_check"], + ) + + cc_test( +diff --git a/ortools/math_opt/solvers/gscip/BUILD.bazel b/ortools/math_opt/solvers/gscip/BUILD.bazel +index fd91d85..fcc2c9c 100644 +--- a/ortools/math_opt/solvers/gscip/BUILD.bazel ++++ b/ortools/math_opt/solvers/gscip/BUILD.bazel +@@ -17,6 +17,7 @@ cc_library( + name = "gscip_solver_constraint_handler", + srcs = ["gscip_solver_constraint_handler.cc"], + hdrs = ["gscip_solver_constraint_handler.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:linked_hash_map", + "//ortools/base:protoutil", +diff --git a/ortools/math_opt/solvers/gurobi/BUILD.bazel b/ortools/math_opt/solvers/gurobi/BUILD.bazel +index 32f8f39..5039d84 100644 +--- a/ortools/math_opt/solvers/gurobi/BUILD.bazel ++++ b/ortools/math_opt/solvers/gurobi/BUILD.bazel +@@ -19,6 +19,7 @@ cc_library( + hdrs = [ + "g_gurobi.h", + ], ++ features = ["-layering_check"], + visibility = [ + "//ortools/gurobi:__subpackages__", + "//ortools/math_opt:__subpackages__", +diff --git a/ortools/math_opt/storage/BUILD.bazel b/ortools/math_opt/storage/BUILD.bazel +index cb85a81..459d2b0 100644 +--- a/ortools/math_opt/storage/BUILD.bazel ++++ b/ortools/math_opt/storage/BUILD.bazel +@@ -16,6 +16,7 @@ package(default_visibility = ["//ortools/math_opt:__subpackages__"]) + cc_library( + name = "model_storage_types", + hdrs = ["model_storage_types.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:intops", + "@com_google_absl//absl/strings", +@@ -25,11 +26,13 @@ cc_library( + cc_library( + name = "range", + hdrs = ["range.h"], ++ features = ["-layering_check"], + ) + + cc_library( + name = "iterators", + hdrs = ["iterators.h"], ++ features = ["-layering_check"], + deps = [":range"], + ) + +@@ -37,6 +40,7 @@ cc_library( + name = "sparse_coefficient_map", + srcs = ["sparse_coefficient_map.cc"], + hdrs = ["sparse_coefficient_map.h"], ++ features = ["-layering_check"], + deps = [ + ":model_storage_types", + "//ortools/base:intops", +@@ -51,6 +55,7 @@ cc_library( + name = "sparse_matrix", + srcs = ["sparse_matrix.cc"], + hdrs = ["sparse_matrix.h"], ++ features = ["-layering_check"], + deps = [ + ":model_storage_types", + "//ortools/base:intops", +@@ -67,6 +72,7 @@ cc_library( + cc_library( + name = "linear_expression_data", + hdrs = ["linear_expression_data.h"], ++ features = ["-layering_check"], + deps = [ + ":sparse_coefficient_map", + "//ortools/math_opt:sparse_containers_cc_proto", +@@ -78,6 +84,7 @@ cc_library( + cc_library( + name = "update_trackers", + hdrs = ["update_trackers.h"], ++ features = ["-layering_check"], + deps = [ + ":model_storage_types", + "//ortools/base:intops", +@@ -93,6 +100,7 @@ cc_library( + name = "variable_storage", + srcs = ["variable_storage.cc"], + hdrs = ["variable_storage.h"], ++ features = ["-layering_check"], + deps = [ + ":model_storage_types", + ":range", +@@ -112,6 +120,7 @@ cc_library( + name = "objective_storage", + srcs = ["objective_storage.cc"], + hdrs = ["objective_storage.h"], ++ features = ["-layering_check"], + deps = [ + ":range", + ":sparse_coefficient_map", +@@ -136,6 +145,7 @@ cc_library( + name = "linear_constraint_storage", + srcs = ["linear_constraint_storage.cc"], + hdrs = ["linear_constraint_storage.h"], ++ features = ["-layering_check"], + deps = [ + ":model_storage_types", + ":range", +@@ -158,6 +168,7 @@ cc_library( + cc_library( + name = "atomic_constraint_storage", + hdrs = ["atomic_constraint_storage.h"], ++ features = ["-layering_check"], + deps = [ + ":model_storage_types", + ":range", +@@ -176,6 +187,7 @@ cc_library( + name = "model_storage", + srcs = ["model_storage.cc"], + hdrs = ["model_storage.h"], ++ features = ["-layering_check"], + deps = [ + ":atomic_constraint_storage", + ":iterators", +diff --git a/ortools/math_opt/testing/BUILD.bazel b/ortools/math_opt/testing/BUILD.bazel +index e80e4e0..058bd4b 100644 +--- a/ortools/math_opt/testing/BUILD.bazel ++++ b/ortools/math_opt/testing/BUILD.bazel +@@ -17,6 +17,7 @@ cc_library( + name = "param_name", + testonly = True, + hdrs = ["param_name.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:gmock", + ], +@@ -25,4 +26,5 @@ cc_library( + cc_library( + name = "stream", + hdrs = ["stream.h"], ++ features = ["-layering_check"], + ) +diff --git a/ortools/math_opt/tools/BUILD.bazel b/ortools/math_opt/tools/BUILD.bazel +index 55c1f2f..adfeac6 100644 +--- a/ortools/math_opt/tools/BUILD.bazel ++++ b/ortools/math_opt/tools/BUILD.bazel +@@ -66,6 +66,7 @@ cc_library( + name = "file_format_flags", + srcs = ["file_format_flags.cc"], + hdrs = ["file_format_flags.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:file", +diff --git a/ortools/math_opt/validators/BUILD.bazel b/ortools/math_opt/validators/BUILD.bazel +index 5448a93..c23ef8e 100644 +--- a/ortools/math_opt/validators/BUILD.bazel ++++ b/ortools/math_opt/validators/BUILD.bazel +@@ -17,6 +17,7 @@ cc_library( + name = "ids_validator", + srcs = ["ids_validator.cc"], + hdrs = ["ids_validator.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:status_macros", + "//ortools/math_opt/core:model_summary", +@@ -31,6 +32,7 @@ cc_library( + name = "scalar_validator", + srcs = ["scalar_validator.cc"], + hdrs = ["scalar_validator.h"], ++ features = ["-layering_check"], + deps = [ + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings", +@@ -41,6 +43,7 @@ cc_library( + name = "sparse_matrix_validator", + srcs = ["sparse_matrix_validator.cc"], + hdrs = ["sparse_matrix_validator.h"], ++ features = ["-layering_check"], + deps = [ + ":ids_validator", + "//ortools/base:status_macros", +@@ -56,6 +59,7 @@ cc_library( + cc_library( + name = "sparse_vector_validator", + hdrs = ["sparse_vector_validator.h"], ++ features = ["-layering_check"], + deps = [ + ":ids_validator", + ":scalar_validator", +@@ -70,6 +74,7 @@ cc_library( + name = "model_validator", + srcs = ["model_validator.cc"], + hdrs = ["model_validator.h"], ++ features = ["-layering_check"], + deps = [ + ":ids_validator", + ":scalar_validator", +@@ -94,6 +99,7 @@ cc_library( + name = "solve_stats_validator", + srcs = ["solve_stats_validator.cc"], + hdrs = ["solve_stats_validator.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:protoutil", + "//ortools/math_opt:result_cc_proto", +@@ -108,6 +114,7 @@ cc_library( + name = "result_validator", + srcs = ["result_validator.cc"], + hdrs = ["result_validator.h"], ++ features = ["-layering_check"], + deps = [ + ":solution_validator", + ":solve_stats_validator", +@@ -128,6 +135,7 @@ cc_library( + name = "solution_validator", + srcs = ["solution_validator.cc"], + hdrs = ["solution_validator.h"], ++ features = ["-layering_check"], + deps = [ + ":ids_validator", + ":scalar_validator", +@@ -148,6 +156,7 @@ cc_library( + name = "solve_parameters_validator", + srcs = ["solve_parameters_validator.cc"], + hdrs = ["solve_parameters_validator.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:protoutil", + "//ortools/base:status_macros", +@@ -164,6 +173,7 @@ cc_library( + name = "callback_validator", + srcs = ["callback_validator.cc"], + hdrs = ["callback_validator.h"], ++ features = ["-layering_check"], + deps = [ + ":ids_validator", + ":model_parameters_validator", +@@ -190,6 +200,7 @@ cc_library( + name = "model_parameters_validator", + srcs = ["model_parameters_validator.cc"], + hdrs = ["model_parameters_validator.h"], ++ features = ["-layering_check"], + deps = [ + ":ids_validator", + ":solution_validator", +@@ -208,6 +219,7 @@ cc_library( + name = "linear_expression_validator", + srcs = ["linear_expression_validator.cc"], + hdrs = ["linear_expression_validator.h"], ++ features = ["-layering_check"], + deps = [ + ":scalar_validator", + ":sparse_vector_validator", +@@ -223,6 +235,7 @@ cc_library( + name = "infeasible_subsystem_validator", + srcs = ["infeasible_subsystem_validator.cc"], + hdrs = ["infeasible_subsystem_validator.h"], ++ features = ["-layering_check"], + deps = [ + ":bounds_and_status_validator", + ":ids_validator", +@@ -238,6 +251,7 @@ cc_library( + name = "bounds_and_status_validator", + srcs = ["bounds_and_status_validator.cc"], + hdrs = ["bounds_and_status_validator.h"], ++ features = ["-layering_check"], + deps = [ + ":scalar_validator", + "//ortools/base:status_macros", +@@ -252,6 +266,7 @@ cc_library( + name = "termination_validator", + srcs = ["termination_validator.cc"], + hdrs = ["termination_validator.h"], ++ features = ["-layering_check"], + deps = [ + ":bounds_and_status_validator", + "//ortools/base:status_macros", +diff --git a/ortools/packing/BUILD.bazel b/ortools/packing/BUILD.bazel +index 04b7014..a774e8f 100644 +--- a/ortools/packing/BUILD.bazel ++++ b/ortools/packing/BUILD.bazel +@@ -21,6 +21,7 @@ cc_library( + name = "arc_flow_builder", + srcs = ["arc_flow_builder.cc"], + hdrs = ["arc_flow_builder.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:map_util", +@@ -37,6 +38,7 @@ cc_library( + "arc_flow_solver.cc", + ], + hdrs = ["arc_flow_solver.h"], ++ features = ["-layering_check"], + deps = [ + ":arc_flow_builder", + "//ortools/base", +@@ -65,6 +67,7 @@ cc_library( + name = "vector_bin_packing_parser", + srcs = ["vector_bin_packing_parser.cc"], + hdrs = ["vector_bin_packing_parser.h"], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + ":vector_bin_packing_cc_proto", +@@ -131,6 +134,7 @@ cc_library( + name = "binpacking_2d_parser", + srcs = ["binpacking_2d_parser.cc"], + hdrs = ["binpacking_2d_parser.h"], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + ":multiple_dimensions_bin_packing_cc_proto", +diff --git a/ortools/pdlp/BUILD.bazel b/ortools/pdlp/BUILD.bazel +index 5b68856..739a948 100644 +--- a/ortools/pdlp/BUILD.bazel ++++ b/ortools/pdlp/BUILD.bazel +@@ -20,6 +20,7 @@ package(default_visibility = ["//visibility:public"]) + cc_library( + name = "scheduler", + hdrs = ["scheduler.h"], ++ features = ["-layering_check"], + deps = [ + "@com_google_absl//absl/functional:any_invocable", + ], +@@ -62,6 +63,7 @@ py_proto_library( + cc_library( + name = "gtest_main", + srcs = ["gtest_main.cc"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:gmock", +@@ -72,6 +74,7 @@ cc_library( + name = "iteration_stats", + srcs = ["iteration_stats.cc"], + hdrs = ["iteration_stats.h"], ++ features = ["-layering_check"], + deps = [ + ":quadratic_program", + ":sharded_quadratic_program", +@@ -105,6 +108,7 @@ cc_library( + name = "primal_dual_hybrid_gradient", + srcs = ["primal_dual_hybrid_gradient.cc"], + hdrs = ["primal_dual_hybrid_gradient.h"], ++ features = ["-layering_check"], + deps = [ + ":iteration_stats", + ":quadratic_program", +@@ -169,6 +173,7 @@ cc_library( + name = "quadratic_program", + srcs = ["quadratic_program.cc"], + hdrs = ["quadratic_program.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:status_macros", +@@ -201,6 +206,7 @@ cc_library( + name = "quadratic_program_io", + srcs = ["quadratic_program_io.cc"], + hdrs = ["quadratic_program_io.h"], ++ features = ["-layering_check"], + deps = [ + ":quadratic_program", + "//ortools/base", +@@ -224,6 +230,7 @@ cc_library( + name = "sharded_optimization_utils", + srcs = ["sharded_optimization_utils.cc"], + hdrs = ["sharded_optimization_utils.h"], ++ features = ["-layering_check"], + deps = [ + ":quadratic_program", + ":sharded_quadratic_program", +@@ -256,6 +263,7 @@ cc_library( + name = "sharded_quadratic_program", + srcs = ["sharded_quadratic_program.cc"], + hdrs = ["sharded_quadratic_program.h"], ++ features = ["-layering_check"], + deps = [ + ":quadratic_program", + ":sharder", +@@ -285,6 +293,7 @@ cc_library( + name = "sharder", + srcs = ["sharder.cc"], + hdrs = ["sharder.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:mathutil", +@@ -315,6 +324,7 @@ cc_library( + name = "solvers_proto_validation", + srcs = ["solvers_proto_validation.cc"], + hdrs = ["solvers_proto_validation.h"], ++ features = ["-layering_check"], + deps = [ + ":solvers_cc_proto", + "//ortools/base:status_macros", +@@ -340,6 +350,7 @@ cc_library( + name = "termination", + srcs = ["termination.cc"], + hdrs = ["termination.h"], ++ features = ["-layering_check"], + deps = [ + ":solve_log_cc_proto", + ":solvers_cc_proto", +@@ -365,6 +376,7 @@ cc_library( + testonly = 1, + srcs = ["test_util.cc"], + hdrs = ["test_util.h"], ++ features = ["-layering_check"], + deps = [ + ":quadratic_program", + "//ortools/base", +@@ -390,6 +402,7 @@ cc_library( + name = "trust_region", + srcs = ["trust_region.cc"], + hdrs = ["trust_region.h"], ++ features = ["-layering_check"], + deps = [ + ":quadratic_program", + ":sharded_optimization_utils", +diff --git a/ortools/port/BUILD.bazel b/ortools/port/BUILD.bazel +index 00b8585..b947b31 100644 +--- a/ortools/port/BUILD.bazel ++++ b/ortools/port/BUILD.bazel +@@ -17,6 +17,7 @@ cc_library( + name = "sysinfo", + srcs = ["sysinfo.cc"], + hdrs = ["sysinfo.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:sysinfo", +@@ -27,6 +28,7 @@ cc_library( + name = "proto_utils", + srcs = ["proto_utils.cc"], + hdrs = ["proto_utils.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/util:parse_proto", +@@ -38,6 +40,7 @@ cc_library( + cc_library( + name = "utf8", + hdrs = ["utf8.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:encodingutils", +@@ -52,6 +55,7 @@ cc_library( + hdrs = [ + "file.h", + ], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:file", + "@com_google_absl//absl/status", +@@ -64,4 +68,5 @@ cc_library( + cc_library( + name = "scoped_std_stream_capture", + hdrs = ["scoped_std_stream_capture.h"], ++ features = ["-layering_check"], + ) +diff --git a/ortools/routing/parsers/BUILD.bazel b/ortools/routing/parsers/BUILD.bazel +index 94690f3..a99b6dd 100644 +--- a/ortools/routing/parsers/BUILD.bazel ++++ b/ortools/routing/parsers/BUILD.bazel +@@ -30,6 +30,7 @@ cc_library( + name = "simple_graph", + srcs = ["simple_graph.cc"], + hdrs = ["simple_graph.h"], ++ features = ["-layering_check"], + deps = [ + "@com_google_absl//absl/hash", + ], +@@ -50,6 +51,7 @@ cc_library( + name = "solomon_parser", + srcs = ["solomon_parser.cc"], + hdrs = ["solomon_parser.h"], ++ features = ["-layering_check"], + deps = [ + ":simple_graph", + "//ortools/base", +@@ -79,6 +81,7 @@ cc_library( + name = "lilim_parser", + srcs = ["lilim_parser.cc"], + hdrs = ["lilim_parser.h"], ++ features = ["-layering_check"], + deps = [ + ":simple_graph", + "//ortools/base:file", +@@ -110,6 +113,7 @@ cc_library( + name = "carp_parser", + srcs = ["carp_parser.cc"], + hdrs = ["carp_parser.h"], ++ features = ["-layering_check"], + deps = [ + ":simple_graph", + "//ortools/base", +@@ -153,6 +157,7 @@ cc_library( + name = "nearp_parser", + srcs = ["nearp_parser.cc"], + hdrs = ["nearp_parser.h"], ++ features = ["-layering_check"], + deps = [ + ":simple_graph", + "//ortools/base", +@@ -186,6 +191,7 @@ cc_library( + name = "pdtsp_parser", + srcs = ["pdtsp_parser.cc"], + hdrs = ["pdtsp_parser.h"], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + "//ortools/base", +@@ -220,6 +226,7 @@ cc_library( + name = "tsplib_parser", + srcs = ["tsplib_parser.cc"], + hdrs = ["tsplib_parser.h"], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + ":simple_graph", +@@ -268,6 +275,7 @@ cc_library( + name = "tsptw_parser", + srcs = ["tsptw_parser.cc"], + hdrs = ["tsptw_parser.h"], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + ":simple_graph", +@@ -302,6 +310,7 @@ cc_library( + name = "solution_serializer", + srcs = ["solution_serializer.cc"], + hdrs = ["solution_serializer.h"], ++ features = ["-layering_check"], + deps = [ + ":simple_graph", + "//ortools/base", +@@ -332,6 +341,7 @@ cc_library( + name = "cvrptw_lib", + srcs = ["cvrptw_lib.cc"], + hdrs = ["cvrptw_lib.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/constraint_solver:routing", +@@ -343,6 +353,7 @@ cc_library( + name = "dow_parser", + srcs = ["dow_parser.cc"], + hdrs = ["dow_parser.h"], ++ features = ["-layering_check"], + deps = [ + ":capacity_planning_cc_proto", + "//ortools/base", +diff --git a/ortools/sat/BUILD.bazel b/ortools/sat/BUILD.bazel +index 222559f..b05763d 100644 +--- a/ortools/sat/BUILD.bazel ++++ b/ortools/sat/BUILD.bazel +@@ -24,6 +24,7 @@ cc_library( + name = "cp_model", + srcs = ["cp_model.cc"], + hdrs = ["cp_model.h"], ++ features = ["-layering_check"], + deps = [ + ":cp_model_cc_proto", + ":cp_model_solver", +@@ -42,6 +43,7 @@ cc_library( + cc_library( + name = "model", + hdrs = ["model.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:typeid", +@@ -95,6 +97,7 @@ cc_library( + name = "cp_model_utils", + srcs = ["cp_model_utils.cc"], + hdrs = ["cp_model_utils.h"], ++ features = ["-layering_check"], + deps = [ + ":cp_model_cc_proto", + ":sat_base", +@@ -117,6 +120,7 @@ cc_library( + name = "synchronization", + srcs = ["synchronization.cc"], + hdrs = ["synchronization.h"], ++ features = ["-layering_check"], + deps = [ + ":cp_model_cc_proto", + ":cp_model_utils", +@@ -162,6 +166,7 @@ cc_library( + name = "cp_model_checker", + srcs = ["cp_model_checker.cc"], + hdrs = ["cp_model_checker.h"], ++ features = ["-layering_check"], + deps = [ + ":cp_model_cc_proto", + ":cp_model_utils", +@@ -185,6 +190,7 @@ cc_library( + name = "constraint_violation", + srcs = ["constraint_violation.cc"], + hdrs = ["constraint_violation.h"], ++ features = ["-layering_check"], + deps = [ + ":cp_model_cc_proto", + ":cp_model_utils", +@@ -208,6 +214,7 @@ cc_library( + name = "feasibility_jump", + srcs = ["feasibility_jump.cc"], + hdrs = ["feasibility_jump.h"], ++ features = ["-layering_check"], + deps = [ + ":constraint_violation", + ":cp_model_cc_proto", +@@ -243,6 +250,7 @@ cc_library( + name = "linear_model", + srcs = ["linear_model.cc"], + hdrs = ["linear_model.h"], ++ features = ["-layering_check"], + deps = [ + ":cp_model_cc_proto", + ":cp_model_utils", +@@ -258,6 +266,7 @@ cc_library( + name = "parameters_validation", + srcs = ["parameters_validation.cc"], + hdrs = ["parameters_validation.h"], ++ features = ["-layering_check"], + deps = [ + ":cp_model_search", + ":sat_parameters_cc_proto", +@@ -269,6 +278,7 @@ cc_library( + name = "cp_model_search", + srcs = ["cp_model_search.cc"], + hdrs = ["cp_model_search.h"], ++ features = ["-layering_check"], + deps = [ + ":cp_model_cc_proto", + ":cp_model_mapping", +@@ -297,6 +307,7 @@ cc_library( + name = "cp_model_solver_helpers", + srcs = ["cp_model_solver_helpers.cc"], + hdrs = ["cp_model_solver_helpers.h"], ++ features = ["-layering_check"], + deps = [ + ":circuit", + ":clause", +@@ -381,6 +392,7 @@ cc_library( + name = "shaving_solver", + srcs = ["shaving_solver.cc"], + hdrs = ["shaving_solver.h"], ++ features = ["-layering_check"], + deps = [ + ":cp_model_cc_proto", + ":cp_model_lns", +@@ -411,6 +423,7 @@ cc_library( + name = "cp_model_solver", + srcs = ["cp_model_solver.cc"], + hdrs = ["cp_model_solver.h"], ++ features = ["-layering_check"], + deps = [ + ":circuit", + ":clause", +@@ -498,6 +511,7 @@ cc_library( + cc_library( + name = "cp_model_mapping", + hdrs = ["cp_model_mapping.h"], ++ features = ["-layering_check"], + deps = [ + ":cp_model_cc_proto", + ":cp_model_utils", +@@ -521,6 +535,7 @@ cc_library( + name = "cp_model_loader", + srcs = ["cp_model_loader.cc"], + hdrs = ["cp_model_loader.h"], ++ features = ["-layering_check"], + deps = [ + ":all_different", + ":circuit", +@@ -582,6 +597,7 @@ cc_library( + name = "presolve_util", + srcs = ["presolve_util.cc"], + hdrs = ["presolve_util.h"], ++ features = ["-layering_check"], + deps = [ + ":cp_model_cc_proto", + ":cp_model_utils", +@@ -612,6 +628,7 @@ cc_library( + name = "presolve_context", + srcs = ["presolve_context.cc"], + hdrs = ["presolve_context.h"], ++ features = ["-layering_check"], + deps = [ + ":cp_model_cc_proto", + ":cp_model_loader", +@@ -653,6 +670,7 @@ cc_library( + "cp_model_presolve.cc", + ], + hdrs = ["cp_model_presolve.h"], ++ features = ["-layering_check"], + deps = [ + ":2d_rectangle_presolve", + ":circuit", +@@ -718,6 +736,7 @@ cc_library( + "cp_model_postsolve.cc", + ], + hdrs = ["cp_model_postsolve.h"], ++ features = ["-layering_check"], + deps = [ + ":cp_model_cc_proto", + ":cp_model_utils", +@@ -734,6 +753,7 @@ cc_library( + name = "cp_model_expand", + srcs = ["cp_model_expand.cc"], + hdrs = ["cp_model_expand.h"], ++ features = ["-layering_check"], + deps = [ + ":cp_model_cc_proto", + ":cp_model_checker", +@@ -762,6 +782,7 @@ cc_library( + cc_library( + name = "sat_base", + hdrs = ["sat_base.h"], ++ features = ["-layering_check"], + deps = [ + ":model", + "//ortools/base", +@@ -788,6 +809,7 @@ cc_library( + "sat_solver.cc", + ], + hdrs = ["sat_solver.h"], ++ features = ["-layering_check"], + deps = [ + ":clause", + ":drat_proof_handler", +@@ -826,6 +848,7 @@ cc_library( + name = "restart", + srcs = ["restart.cc"], + hdrs = ["restart.h"], ++ features = ["-layering_check"], + deps = [ + ":model", + ":sat_decision", +@@ -844,6 +867,7 @@ cc_library( + name = "probing", + srcs = ["probing.cc"], + hdrs = ["probing.h"], ++ features = ["-layering_check"], + deps = [ + ":clause", + ":implied_bounds", +@@ -874,6 +898,7 @@ cc_library( + name = "sat_inprocessing", + srcs = ["sat_inprocessing.cc"], + hdrs = ["sat_inprocessing.h"], ++ features = ["-layering_check"], + deps = [ + ":clause", + ":drat_checker", +@@ -907,6 +932,7 @@ cc_library( + name = "sat_decision", + srcs = ["sat_decision.cc"], + hdrs = ["sat_decision.h"], ++ features = ["-layering_check"], + deps = [ + ":model", + ":pb_constraint", +@@ -927,6 +953,7 @@ cc_library( + name = "clause", + srcs = ["clause.cc"], + hdrs = ["clause.h"], ++ features = ["-layering_check"], + deps = [ + ":drat_proof_handler", + ":inclusion", +@@ -959,6 +986,7 @@ cc_library( + name = "simplification", + srcs = ["simplification.cc"], + hdrs = ["simplification.h"], ++ features = ["-layering_check"], + deps = [ + ":drat_proof_handler", + ":model", +@@ -988,6 +1016,7 @@ cc_library( + name = "pb_constraint", + srcs = ["pb_constraint.cc"], + hdrs = ["pb_constraint.h"], ++ features = ["-layering_check"], + deps = [ + ":model", + ":sat_base", +@@ -1012,6 +1041,7 @@ cc_library( + name = "symmetry", + srcs = ["symmetry.cc"], + hdrs = ["symmetry.h"], ++ features = ["-layering_check"], + deps = [ + ":sat_base", + "//ortools/algorithms:sparse_permutation", +@@ -1027,6 +1057,7 @@ cc_library( + name = "symmetry_util", + srcs = ["symmetry_util.cc"], + hdrs = ["symmetry_util.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/algorithms:dynamic_partition", + "//ortools/algorithms:sparse_permutation", +@@ -1040,6 +1071,7 @@ cc_library( + name = "var_domination", + srcs = ["var_domination.cc"], + hdrs = ["var_domination.h"], ++ features = ["-layering_check"], + deps = [ + ":cp_model_cc_proto", + ":cp_model_utils", +@@ -1068,6 +1100,7 @@ cc_library( + name = "integer", + srcs = ["integer.cc"], + hdrs = ["integer.h"], ++ features = ["-layering_check"], + deps = [ + ":model", + ":sat_base", +@@ -1097,6 +1130,7 @@ cc_library( + name = "integer_search", + srcs = ["integer_search.cc"], + hdrs = ["integer_search.h"], ++ features = ["-layering_check"], + deps = [ + ":clause", + ":cp_model_cc_proto", +@@ -1134,6 +1168,7 @@ cc_library( + name = "lb_tree_search", + srcs = ["lb_tree_search.cc"], + hdrs = ["lb_tree_search.h"], ++ features = ["-layering_check"], + deps = [ + ":cp_model_mapping", + ":integer", +@@ -1165,6 +1200,7 @@ cc_library( + name = "pseudo_costs", + srcs = ["pseudo_costs.cc"], + hdrs = ["pseudo_costs.h"], ++ features = ["-layering_check"], + deps = [ + ":cp_model_mapping", + ":integer", +@@ -1187,6 +1223,7 @@ cc_library( + name = "intervals", + srcs = ["intervals.cc"], + hdrs = ["intervals.h"], ++ features = ["-layering_check"], + deps = [ + ":cp_constraints", + ":implied_bounds", +@@ -1216,6 +1253,7 @@ cc_library( + name = "precedences", + srcs = ["precedences.cc"], + hdrs = ["precedences.h"], ++ features = ["-layering_check"], + deps = [ + ":clause", + ":cp_constraints", +@@ -1251,6 +1289,7 @@ cc_library( + name = "integer_expr", + srcs = ["integer_expr.cc"], + hdrs = ["integer_expr.h"], ++ features = ["-layering_check"], + deps = [ + ":integer", + ":linear_constraint", +@@ -1278,6 +1317,7 @@ cc_library( + name = "linear_propagation", + srcs = ["linear_propagation.cc"], + hdrs = ["linear_propagation.h"], ++ features = ["-layering_check"], + deps = [ + ":integer", + ":model", +@@ -1310,6 +1350,7 @@ cc_library( + name = "all_different", + srcs = ["all_different.cc"], + hdrs = ["all_different.h"], ++ features = ["-layering_check"], + deps = [ + ":integer", + ":model", +@@ -1331,6 +1372,7 @@ cc_library( + name = "theta_tree", + srcs = ["theta_tree.cc"], + hdrs = ["theta_tree.h"], ++ features = ["-layering_check"], + deps = [ + ":integer", + "//ortools/base", +@@ -1342,6 +1384,7 @@ cc_library( + name = "disjunctive", + srcs = ["disjunctive.cc"], + hdrs = ["disjunctive.h"], ++ features = ["-layering_check"], + deps = [ + ":all_different", + ":integer", +@@ -1368,6 +1411,7 @@ cc_library( + name = "timetable", + srcs = ["timetable.cc"], + hdrs = ["timetable.h"], ++ features = ["-layering_check"], + deps = [ + ":integer", + ":intervals", +@@ -1384,6 +1428,7 @@ cc_library( + name = "timetable_edgefinding", + srcs = ["timetable_edgefinding.cc"], + hdrs = ["timetable_edgefinding.h"], ++ features = ["-layering_check"], + deps = [ + ":integer", + ":intervals", +@@ -1399,6 +1444,7 @@ cc_library( + name = "cumulative", + srcs = ["cumulative.cc"], + hdrs = ["cumulative.h"], ++ features = ["-layering_check"], + deps = [ + ":cumulative_energy", + ":disjunctive", +@@ -1425,6 +1471,7 @@ cc_library( + name = "cumulative_energy", + srcs = ["cumulative_energy.cc"], + hdrs = ["cumulative_energy.h"], ++ features = ["-layering_check"], + deps = [ + ":2d_orthogonal_packing", + ":diffn_util", +@@ -1447,6 +1494,7 @@ cc_library( + name = "boolean_problem", + srcs = ["boolean_problem.cc"], + hdrs = ["boolean_problem.h"], ++ features = ["-layering_check"], + deps = [ + ":boolean_problem_cc_proto", + ":cp_model_cc_proto", +@@ -1480,6 +1528,7 @@ cc_library( + name = "linear_relaxation", + srcs = ["linear_relaxation.cc"], + hdrs = ["linear_relaxation.h"], ++ features = ["-layering_check"], + deps = [ + ":circuit", + ":clause", +@@ -1524,6 +1573,7 @@ cc_library( + name = "linear_constraint", + srcs = ["linear_constraint.cc"], + hdrs = ["linear_constraint.h"], ++ features = ["-layering_check"], + deps = [ + ":integer", + ":model", +@@ -1544,6 +1594,7 @@ cc_library( + name = "linear_programming_constraint", + srcs = ["linear_programming_constraint.cc"], + hdrs = ["linear_programming_constraint.h"], ++ features = ["-layering_check"], + deps = [ + ":cp_model_cc_proto", + ":cp_model_mapping", +@@ -1590,6 +1641,7 @@ cc_library( + name = "linear_constraint_manager", + srcs = ["linear_constraint_manager.cc"], + hdrs = ["linear_constraint_manager.h"], ++ features = ["-layering_check"], + deps = [ + ":integer", + ":linear_constraint", +@@ -1620,6 +1672,7 @@ cc_library( + name = "cuts", + srcs = ["cuts.cc"], + hdrs = ["cuts.h"], ++ features = ["-layering_check"], + deps = [ + ":clause", + ":implied_bounds", +@@ -1653,6 +1706,7 @@ cc_library( + name = "routing_cuts", + srcs = ["routing_cuts.cc"], + hdrs = ["routing_cuts.h"], ++ features = ["-layering_check"], + deps = [ + ":cp_model_cc_proto", + ":cuts", +@@ -1680,6 +1734,7 @@ cc_library( + name = "scheduling_cuts", + srcs = ["scheduling_cuts.cc"], + hdrs = ["scheduling_cuts.h"], ++ features = ["-layering_check"], + deps = [ + ":cuts", + ":implied_bounds", +@@ -1710,6 +1765,7 @@ cc_library( + name = "diffn_cuts", + srcs = ["diffn_cuts.cc"], + hdrs = ["diffn_cuts.h"], ++ features = ["-layering_check"], + deps = [ + ":cuts", + ":diffn_util", +@@ -1741,6 +1797,7 @@ cc_library( + name = "zero_half_cuts", + srcs = ["zero_half_cuts.cc"], + hdrs = ["zero_half_cuts.h"], ++ features = ["-layering_check"], + deps = [ + ":integer", + ":util", +@@ -1756,6 +1813,7 @@ cc_library( + name = "lp_utils", + srcs = ["lp_utils.cc"], + hdrs = ["lp_utils.h"], ++ features = ["-layering_check"], + deps = [ + ":boolean_problem", + ":boolean_problem_cc_proto", +@@ -1786,6 +1844,7 @@ cc_library( + name = "optimization", + srcs = ["optimization.cc"], + hdrs = ["optimization.h"], ++ features = ["-layering_check"], + deps = [ + ":boolean_problem", + ":boolean_problem_cc_proto", +@@ -1825,6 +1884,7 @@ cc_library( + name = "max_hs", + srcs = ["max_hs.cc"], + hdrs = ["max_hs.h"], ++ features = ["-layering_check"], + deps = [ + ":boolean_problem", + ":cp_model_cc_proto", +@@ -1871,6 +1931,7 @@ cc_library( + name = "util", + srcs = ["util.cc"], + hdrs = ["util.h"], ++ features = ["-layering_check"], + deps = [ + ":model", + ":sat_base", +@@ -1904,6 +1965,7 @@ cc_library( + name = "stat_tables", + srcs = ["stat_tables.cc"], + hdrs = ["stat_tables.h"], ++ features = ["-layering_check"], + deps = [ + ":cp_model_cc_proto", + ":cp_model_lns", +@@ -1926,6 +1988,7 @@ cc_library( + name = "table", + srcs = ["table.cc"], + hdrs = ["table.h"], ++ features = ["-layering_check"], + deps = [ + ":integer", + ":model", +@@ -1943,6 +2006,7 @@ cc_library( + name = "cp_constraints", + srcs = ["cp_constraints.cc"], + hdrs = ["cp_constraints.h"], ++ features = ["-layering_check"], + deps = [ + ":integer", + ":model", +@@ -1960,6 +2024,7 @@ cc_library( + name = "diffn_util", + srcs = ["diffn_util.cc"], + hdrs = ["diffn_util.h"], ++ features = ["-layering_check"], + deps = [ + ":integer", + ":intervals", +@@ -1981,6 +2046,7 @@ cc_library( + name = "2d_orthogonal_packing", + srcs = ["2d_orthogonal_packing.cc"], + hdrs = ["2d_orthogonal_packing.h"], ++ features = ["-layering_check"], + deps = [ + ":2d_packing_brute_force", + ":integer", +@@ -2000,6 +2066,7 @@ cc_library( + name = "2d_packing_brute_force", + srcs = ["2d_packing_brute_force.cc"], + hdrs = ["2d_packing_brute_force.h"], ++ features = ["-layering_check"], + deps = [ + ":diffn_util", + ":integer", +@@ -2016,6 +2083,7 @@ cc_library( + name = "2d_rectangle_presolve", + srcs = ["2d_rectangle_presolve.cc"], + hdrs = ["2d_rectangle_presolve.h"], ++ features = ["-layering_check"], + deps = [ + ":diffn_util", + ":integer", +@@ -2032,6 +2100,7 @@ cc_library( + testonly = 1, + srcs = ["2d_orthogonal_packing_testing.cc"], + hdrs = ["2d_orthogonal_packing_testing.h"], ++ features = ["-layering_check"], + deps = [ + ":diffn_util", + ":integer", +@@ -2046,6 +2115,7 @@ cc_library( + name = "diffn", + srcs = ["diffn.cc"], + hdrs = ["diffn.h"], ++ features = ["-layering_check"], + deps = [ + ":2d_orthogonal_packing", + ":cumulative_energy", +@@ -2075,6 +2145,7 @@ cc_library( + name = "circuit", + srcs = ["circuit.cc"], + hdrs = ["circuit.h"], ++ features = ["-layering_check"], + deps = [ + ":integer", + ":model", +@@ -2097,6 +2168,7 @@ cc_library( + name = "encoding", + srcs = ["encoding.cc"], + hdrs = ["encoding.h"], ++ features = ["-layering_check"], + deps = [ + ":boolean_problem_cc_proto", + ":pb_constraint", +@@ -2117,6 +2189,7 @@ cc_library( + name = "cp_model_lns", + srcs = ["cp_model_lns.cc"], + hdrs = ["cp_model_lns.h"], ++ features = ["-layering_check"], + deps = [ + ":cp_model_cc_proto", + ":cp_model_mapping", +@@ -2160,6 +2233,7 @@ cc_library( + name = "feasibility_pump", + srcs = ["feasibility_pump.cc"], + hdrs = ["feasibility_pump.h"], ++ features = ["-layering_check"], + deps = [ + ":cp_model_mapping", + ":integer", +@@ -2193,6 +2267,7 @@ cc_library( + name = "rins", + srcs = ["rins.cc"], + hdrs = ["rins.h"], ++ features = ["-layering_check"], + deps = [ + ":cp_model_mapping", + ":integer", +@@ -2211,6 +2286,7 @@ cc_library( + name = "subsolver", + srcs = ["subsolver.cc"], + hdrs = ["subsolver.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:threadpool", +@@ -2230,6 +2306,7 @@ cc_library( + name = "drat_proof_handler", + srcs = ["drat_proof_handler.cc"], + hdrs = ["drat_proof_handler.h"], ++ features = ["-layering_check"], + deps = [ + ":drat_checker", + ":drat_writer", +@@ -2246,6 +2323,7 @@ cc_library( + name = "drat_checker", + srcs = ["drat_checker.cc"], + hdrs = ["drat_checker.h"], ++ features = ["-layering_check"], + deps = [ + ":sat_base", + "//ortools/base", +@@ -2265,6 +2343,7 @@ cc_library( + name = "drat_writer", + srcs = ["drat_writer.cc"], + hdrs = ["drat_writer.h"], ++ features = ["-layering_check"], + deps = [ + ":sat_base", + "//ortools/base", +@@ -2311,6 +2390,7 @@ cc_binary( + cc_library( + name = "sat_cnf_reader", + hdrs = ["sat_cnf_reader.h"], ++ features = ["-layering_check"], + deps = [ + ":boolean_problem_cc_proto", + ":cp_model_cc_proto", +@@ -2328,6 +2408,7 @@ cc_library( + name = "cp_model_symmetries", + srcs = ["cp_model_symmetries.cc"], + hdrs = ["cp_model_symmetries.h"], ++ features = ["-layering_check"], + deps = [ + ":cp_model_cc_proto", + ":cp_model_checker", +@@ -2367,6 +2448,7 @@ cc_library( + name = "swig_helper", + srcs = ["swig_helper.cc"], + hdrs = ["swig_helper.h"], ++ features = ["-layering_check"], + visibility = [ + "//ortools/sat/java:__pkg__", + "//ortools/sat/python:__pkg__", +@@ -2389,6 +2471,7 @@ cc_library( + name = "implied_bounds", + srcs = ["implied_bounds.cc"], + hdrs = ["implied_bounds.h"], ++ features = ["-layering_check"], + deps = [ + "linear_constraint", + ":clause", +@@ -2418,6 +2501,7 @@ cc_library( + cc_library( + name = "inclusion", + hdrs = ["inclusion.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "@com_google_absl//absl/log:check", +@@ -2429,6 +2513,7 @@ cc_library( + name = "diophantine", + srcs = ["diophantine.cc"], + hdrs = ["diophantine.h"], ++ features = ["-layering_check"], + deps = [ + ":util", + "@com_google_absl//absl/log:check", +@@ -2441,6 +2526,7 @@ cc_library( + name = "work_assignment", + srcs = ["work_assignment.cc"], + hdrs = ["work_assignment.h"], ++ features = ["-layering_check"], + deps = [ + ":cp_model_mapping", + ":cp_model_utils", +diff --git a/ortools/scheduling/BUILD.bazel b/ortools/scheduling/BUILD.bazel +index d2c0ef0..5c794d4 100644 +--- a/ortools/scheduling/BUILD.bazel ++++ b/ortools/scheduling/BUILD.bazel +@@ -34,6 +34,7 @@ cc_library( + name = "jobshop_scheduling_parser", + srcs = ["jobshop_scheduling_parser.cc"], + hdrs = ["jobshop_scheduling_parser.h"], ++ features = ["-layering_check"], + deps = [ + ":jobshop_scheduling_cc_proto", + "//ortools/base", +@@ -63,6 +64,7 @@ cc_library( + name = "rcpsp_parser", + srcs = ["rcpsp_parser.cc"], + hdrs = ["rcpsp_parser.h"], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + deps = [ + ":rcpsp_cc_proto", +diff --git a/ortools/util/BUILD.bazel b/ortools/util/BUILD.bazel +index b2ee315..a123c8d 100644 +--- a/ortools/util/BUILD.bazel ++++ b/ortools/util/BUILD.bazel +@@ -56,6 +56,7 @@ py_proto_library( + cc_library( + name = "affine_relation", + hdrs = ["affine_relation.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:iterator_adaptors", +@@ -65,6 +66,7 @@ cc_library( + cc_library( + name = "filelineiter", + hdrs = ["filelineiter.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:file", +@@ -77,6 +79,7 @@ cc_library( + name = "bitset", + srcs = ["bitset.cc"], + hdrs = ["bitset.h"], ++ features = ["-layering_check"], + deps = ["//ortools/base"], + ) + +@@ -85,6 +88,7 @@ cc_library( + hdrs = [ + "integer_pq.h", + ], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + ], +@@ -94,6 +98,7 @@ cc_library( + name = "cached_log", + srcs = ["cached_log.cc"], + hdrs = ["cached_log.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:types", +@@ -103,18 +108,21 @@ cc_library( + cc_library( + name = "zvector", + hdrs = ["zvector.h"], ++ features = ["-layering_check"], + deps = ["//ortools/base"], + ) + + cc_library( + name = "permutation", + hdrs = ["permutation.h"], ++ features = ["-layering_check"], + deps = ["//ortools/base"], + ) + + cc_library( + name = "saturated_arithmetic", + hdrs = ["saturated_arithmetic.h"], ++ features = ["-layering_check"], + deps = [ + ":bitset", + "//ortools/base", +@@ -126,6 +134,7 @@ cc_library( + name = "piecewise_linear_function", + srcs = ["piecewise_linear_function.cc"], + hdrs = ["piecewise_linear_function.h"], ++ features = ["-layering_check"], + deps = [ + ":saturated_arithmetic", + "//ortools/base", +@@ -140,6 +149,7 @@ cc_library( + name = "rational_approximation", + srcs = ["rational_approximation.cc"], + hdrs = ["rational_approximation.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "@com_google_absl//absl/strings", +@@ -150,6 +160,7 @@ cc_library( + name = "sorted_interval_list", + srcs = ["sorted_interval_list.cc"], + hdrs = ["sorted_interval_list.h"], ++ features = ["-layering_check"], + deps = [ + ":saturated_arithmetic", + "//ortools/base", +@@ -163,6 +174,7 @@ cc_library( + cc_library( + name = "string_array", + hdrs = ["string_array.h"], ++ features = ["-layering_check"], + deps = [ + "@com_google_absl//absl/strings", + ], +@@ -171,6 +183,7 @@ cc_library( + cc_library( + name = "tuple_set", + hdrs = ["tuple_set.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:hash", +@@ -183,6 +196,7 @@ cc_library( + name = "stats", + srcs = ["stats.cc"], + hdrs = ["stats.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:stl_util", +@@ -200,6 +214,7 @@ cc_library( + name = "time_limit", + srcs = ["time_limit.cc"], + hdrs = ["time_limit.h"], ++ features = ["-layering_check"], + deps = [ + ":running_stat", + "//ortools/base", +@@ -216,6 +231,7 @@ cc_library( + name = "sigint", + srcs = ["sigint.cc"], + hdrs = ["sigint.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + ], +@@ -234,6 +250,7 @@ cc_library( + "on_windows": [], + "//conditions:default": ["-frounding-math"], + }), ++ features = ["-layering_check"], + deps = [ + ":bitset", + "//ortools/base", +@@ -244,18 +261,21 @@ cc_library( + name = "monoid_operation_tree", + srcs = [], + hdrs = ["monoid_operation_tree.h"], ++ features = ["-layering_check"], + deps = ["//ortools/base"], + ) + + cc_library( + name = "return_macros", + hdrs = ["return_macros.h"], ++ features = ["-layering_check"], + deps = ["//ortools/base"], + ) + + cc_library( + name = "running_stat", + hdrs = ["running_stat.h"], ++ features = ["-layering_check"], + deps = ["//ortools/base"], + ) + +@@ -263,6 +283,7 @@ cc_library( + name = "proto_tools", + srcs = ["proto_tools.cc"], + hdrs = ["proto_tools.h"], ++ features = ["-layering_check"], + deps = [ + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", +@@ -302,6 +323,7 @@ cc_library( + hdrs = [ + "functions_swig_helpers.h", + ], ++ features = ["-layering_check"], + deps = ["//ortools/base"], + ) + +@@ -311,12 +333,14 @@ cc_library( + hdrs = [ + "functions_swig_test_helpers.h", + ], ++ features = ["-layering_check"], + deps = ["//ortools/base"], + ) + + cc_library( + name = "range_minimum_query", + hdrs = ["range_minimum_query.h"], ++ features = ["-layering_check"], + deps = [":bitset"], + ) + +@@ -324,6 +348,7 @@ cc_library( + name = "range_query_function", + srcs = ["range_query_function.cc"], + hdrs = ["range_query_function.h"], ++ features = ["-layering_check"], + deps = [ + ":range_minimum_query", + "//ortools/base", +@@ -333,6 +358,7 @@ cc_library( + cc_library( + name = "rev", + hdrs = ["rev.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:map_util", +@@ -343,6 +369,7 @@ cc_library( + cc_library( + name = "vector_or_function", + hdrs = ["vector_or_function.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + ], +@@ -354,6 +381,7 @@ cc_library( + name = "qap_reader", + srcs = ["qap_reader.cc"], + hdrs = ["qap_reader.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/util:filelineiter", + "@com_google_absl//absl/strings", +@@ -363,6 +391,7 @@ cc_library( + cc_library( + name = "sort", + hdrs = ["sort.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + ], +@@ -372,6 +401,7 @@ cc_library( + name = "file_util", + srcs = ["file_util.cc"], + hdrs = ["file_util.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:dump_vars", +@@ -391,6 +421,7 @@ cc_library( + cc_library( + name = "random_engine", + hdrs = ["random_engine.h"], ++ features = ["-layering_check"], + deps = [], + ) + +@@ -398,6 +429,7 @@ cc_library( + name = "string_util", + srcs = ["string_util.cc"], + hdrs = ["string_util.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "@com_google_absl//absl/strings", +@@ -408,12 +440,14 @@ cc_library( + cc_library( + name = "adaptative_parameter_value", + hdrs = ["adaptative_parameter_value.h"], ++ features = ["-layering_check"], + deps = ["//ortools/base"], + ) + + cc_library( + name = "lazy_mutable_copy", + hdrs = ["lazy_mutable_copy.h"], ++ features = ["-layering_check"], + deps = ["@com_google_absl//absl/memory"], + ) + +@@ -421,6 +455,7 @@ cc_library( + name = "logging", + srcs = ["logging.cc"], + hdrs = ["logging.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:timer", +@@ -431,11 +466,13 @@ cc_library( + cc_library( + name = "testing_utils", + hdrs = ["testing_utils.h"], ++ features = ["-layering_check"], + ) + + cc_library( + name = "strong_integers", + hdrs = ["strong_integers.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "@com_google_absl//absl/strings", +@@ -445,6 +482,7 @@ cc_library( + cc_library( + name = "status_macros", + hdrs = ["status_macros.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:status_macros", + "@com_google_absl//absl/status", +@@ -455,6 +493,7 @@ cc_library( + name = "fp_roundtrip_conv", + srcs = ["fp_roundtrip_conv.cc"], + hdrs = ["fp_roundtrip_conv.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:status_builder", +@@ -468,6 +507,7 @@ cc_library( + cc_library( + name = "flat_matrix", + hdrs = ["flat_matrix.h"], ++ features = ["-layering_check"], + deps = [ + "@com_google_absl//absl/types:span", + ], +@@ -477,6 +517,7 @@ cc_library( + name = "fp_roundtrip_conv_testing", + testonly = 1, + hdrs = ["fp_roundtrip_conv_testing.h"], ++ features = ["-layering_check"], + deps = [ + "@com_google_absl//absl/strings", + ], +@@ -486,6 +527,7 @@ cc_library( + name = "aligned_memory", + srcs = ["aligned_memory_internal.h"], + hdrs = ["aligned_memory.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base:mathutil", + ], +@@ -495,6 +537,7 @@ cc_library( + name = "vector_sum", + srcs = ["vector_sum_internal.h"], + hdrs = ["vector_sum.h"], ++ features = ["-layering_check"], + deps = [ + ":aligned_memory", + "@com_google_absl//absl/base:core_headers", +@@ -506,6 +549,7 @@ cc_library( + name = "parse_proto", + srcs = ["parse_proto.cc"], + hdrs = ["parse_proto.h"], ++ features = ["-layering_check"], + deps = [ + "@com_google_absl//absl/strings", + "@com_google_protobuf//:protobuf", +@@ -516,6 +560,7 @@ cc_library( + name = "solve_interrupter", + srcs = ["solve_interrupter.cc"], + hdrs = ["solve_interrupter.h"], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:intops", +@@ -529,6 +574,7 @@ cc_library( + cc_library( + name = "dense_set", + hdrs = ["dense_set.h"], ++ features = ["-layering_check"], + deps = [ + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/types:span", +diff --git a/ortools/util/python/BUILD.bazel b/ortools/util/python/BUILD.bazel +index 925cf57..765f573 100644 +--- a/ortools/util/python/BUILD.bazel ++++ b/ortools/util/python/BUILD.bazel +@@ -21,6 +21,7 @@ load("@rules_python//python:defs.bzl", "py_test") + cc_library( + name = "sorted_interval_list_doc", + hdrs = ["sorted_interval_list_doc.h"], ++ features = ["-layering_check"], + visibility = ["//visibility:public"], + ) + +diff --git a/ortools/xpress/BUILD.bazel b/ortools/xpress/BUILD.bazel +index 22b6ed9..a86bc1d 100644 +--- a/ortools/xpress/BUILD.bazel ++++ b/ortools/xpress/BUILD.bazel +@@ -21,6 +21,7 @@ cc_library( + hdrs = [ + "environment.h", + ], ++ features = ["-layering_check"], + deps = [ + "//ortools/base", + "//ortools/base:dynamic_library", diff --git a/third_party/xla/third_party/py/py_import.bzl b/third_party/xla/third_party/py/py_import.bzl index 08aa56f1b42c20..7ab46f8bfa00ef 100644 --- a/third_party/xla/third_party/py/py_import.bzl +++ b/third_party/xla/third_party/py/py_import.bzl @@ -49,13 +49,15 @@ def py_import( wheel, deps = [], wheel_deps = [], - zip_deps = []): + zip_deps = [], + testonly = False): unpacked_wheel_name = name + "_unpacked_wheel" _unpacked_wheel( name = unpacked_wheel_name, wheel = wheel, wheel_deps = wheel_deps, zip_deps = zip_deps, + testonly = testonly, ) py_library( name = name, @@ -63,6 +65,7 @@ def py_import( imports = [unpacked_wheel_name], deps = deps, visibility = ["//visibility:public"], + testonly = testonly, ) """Unpacks the wheel and uses its content as a py_library. diff --git a/third_party/xla/third_party/py/python_init_pip.bzl b/third_party/xla/third_party/py/python_init_pip.bzl index 7689b92b60a00a..39901b9b2e64ea 100644 --- a/third_party/xla/third_party/py/python_init_pip.bzl +++ b/third_party/xla/third_party/py/python_init_pip.bzl @@ -24,6 +24,10 @@ cc_library( cc_library( name = "numpy_headers", deps = [":numpy_headers_2", ":numpy_headers_1"], + # For the layering check to work we need to re-export the headers from the + # dependencies. + hdrs = glob(["site-packages/numpy/_core/include/**/*.h"]) + + glob(["site-packages/numpy/core/include/**/*.h"]), ) """, ), diff --git a/third_party/xla/third_party/py/python_init_rules.bzl b/third_party/xla/third_party/py/python_init_rules.bzl index ac9b8eb3893441..e8bfd6548965e4 100644 --- a/third_party/xla/third_party/py/python_init_rules.bzl +++ b/third_party/xla/third_party/py/python_init_rules.bzl @@ -1,6 +1,5 @@ """Hermetic Python initialization. Consult the WORKSPACE on how to use it.""" -load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") load("//third_party:repo.bzl", "tf_http_archive", "tf_mirror_urls") def python_init_rules(extra_patches = []): @@ -11,15 +10,14 @@ def python_init_rules(extra_patches = []): set of patches. """ - http_archive( + tf_http_archive( name = "rules_cc", - urls = ["https://github.com/bazelbuild/rules_cc/archive/refs/tags/0.1.0.tar.gz"], + urls = tf_mirror_urls("https://github.com/bazelbuild/rules_cc/archive/refs/tags/0.1.0.tar.gz"), strip_prefix = "rules_cc-0.1.0", sha256 = "4b12149a041ddfb8306a8fd0e904e39d673552ce82e4296e96fac9cbf0780e59", - patches = [ - Label("//third_party/py:rules_cc_protobuf.patch"), + patch_file = [ + "@local_xla//third_party/py:rules_cc_protobuf.patch", ], - patch_args = ["-p1"], ) tf_http_archive( @@ -34,15 +32,14 @@ def python_init_rules(extra_patches = []): }, ) - http_archive( + tf_http_archive( name = "rules_python", sha256 = "fa7dd2c6b7d63b3585028dd8a90a6cf9db83c33b250959c2ee7b583a6c130e12", strip_prefix = "rules_python-1.6.0", - url = "https://github.com/bazelbuild/rules_python/releases/download/1.6.0/rules_python-1.6.0.tar.gz", - patch_args = ["-p1"], - patches = [ - Label("//third_party/py:rules_python_pip_version.patch"), - Label("//third_party/py:rules_python_freethreaded.patch"), - Label("//third_party/py:rules_python_versions.patch"), + urls = tf_mirror_urls("https://github.com/bazelbuild/rules_python/releases/download/1.6.0/rules_python-1.6.0.tar.gz"), + patch_file = [ + "@local_xla//third_party/py:rules_python_pip_version.patch", + "@local_xla//third_party/py:rules_python_freethreaded.patch", + "@local_xla//third_party/py:rules_python_versions.patch", ] + extra_patches, ) diff --git a/third_party/xla/third_party/py/python_init_toolchains.bzl b/third_party/xla/third_party/py/python_init_toolchains.bzl index 860fc08ceda2a8..82d755c32bbfba 100644 --- a/third_party/xla/third_party/py/python_init_toolchains.bzl +++ b/third_party/xla/third_party/py/python_init_toolchains.bzl @@ -41,7 +41,6 @@ def python_init_toolchains(name = "python", python_version = None, **kwargs): tool_version = MINOR_MAPPING.get(HERMETIC_PYTHON_VERSION) if not tool_version: tool_version = HERMETIC_PYTHON_VERSION + ".0" - url_components = HERMETIC_PYTHON_URL.split("://", 1) sha256s = {} for platform in PLATFORMS.keys(): @@ -51,12 +50,12 @@ def python_init_toolchains(name = "python", python_version = None, **kwargs): python_register_toolchains( name = get_toolchain_name_per_python_version(name), - base_url = url_components[0] + "://", + base_url = "", ignore_root_user_error = True, python_version = tool_version, tool_versions = { tool_version: { - "url": url_components[1], + "url": HERMETIC_PYTHON_URL, "sha256": sha256s, "strip_prefix": HERMETIC_PYTHON_PREFIX, }, diff --git a/third_party/xla/third_party/py/rules_python_versions.patch b/third_party/xla/third_party/py/rules_python_versions.patch index 8dbc70bad193d7..c31b6772c2675f 100644 --- a/third_party/xla/third_party/py/rules_python_versions.patch +++ b/third_party/xla/third_party/py/rules_python_versions.patch @@ -1,8 +1,60 @@ diff --git a/python/versions.bzl b/python/versions.bzl -index 30929f82..8e79225a 100644 +index 30929f82..c0856d70 100644 --- a/python/versions.bzl +++ b/python/versions.bzl -@@ -855,6 +855,51 @@ TOOL_VERSIONS = { +@@ -810,6 +810,51 @@ TOOL_VERSIONS = { + "x86_64-unknown-linux-gnu-freethreaded": "python/install", + }, + }, ++ "3.13.11": { ++ "url": "20251209/cpython-{python_version}+20251209-{platform}-{build}.{ext}", ++ "sha256": { ++ "aarch64-apple-darwin": "295a9f7bc899ea1cc08baf60bbf511bdd1e4a29b2dd7e5f59b48f18bfa6bf585", ++ "aarch64-unknown-linux-gnu": "ea1e678e6e82301bb32bf3917732125949b6e46d541504465972024a3f165343", ++ "ppc64le-unknown-linux-gnu": "7660e53aad9d35ee256913c6d98427f81f078699962035c5fa8b5c3138695109", ++ "riscv64-unknown-linux-gnu": "763fa1548e6a432e9402916e690c74ea30f26dcd2e131893dd506f72b87c27c9", ++ "s390x-unknown-linux-gnu": "ffb6af51fbfabfc6fbc4e7379bdec70c2f51e972b1d2f45c053493b9da3a1bbe", ++ "x86_64-apple-darwin": "dac4a0a0a9b71f6b02a8b0886547fa22814474239bffb948e3e77185406ea136", ++ "x86_64-pc-windows-msvc": "87822417007045a28a7eccc47fe67b8c61265b99b10dbbfa24d231a3622b1c27", ++ "aarch64-pc-windows-msvc": "ba646d0c3b7dd7bdfb770d9b2ebd6cd2df02a37fda90c9c79a7cf59c7df6f165", ++ "aarch64-pc-windows-msvc-freethreaded": "6daf6d092c7294cfe68c4c7bf2698ac134235489c874b3bf796c7972b9dbba30", ++ "x86_64-unknown-linux-gnu": "1ffa06d714a44aea14c0c54c30656413e5955a6c92074b4b3cb4351dcc28b63b", ++ "x86_64-unknown-linux-musl": "969fe24017380b987c4e3ce15e9edf82a4618c1e61672b2cc9b021a1c98eae78", ++ "aarch64-apple-darwin-freethreaded": "4213058b7fcd875596c12b58cd46a399358b0a87ecde4b349cbdd00cf87ed79a", ++ "aarch64-unknown-linux-gnu-freethreaded": "290ca3bd0007db9e551f90b08dfcb6c1b2d62c33b2fc3e9a43e77d385d94f569", ++ "ppc64le-unknown-linux-gnu-freethreaded": "09d4b50f8abb443f7e3af858c920aa61c2430b0954df465e861caa7078e55e69", ++ "riscv64-unknown-linux-gnu-freethreaded": "5406f2a7cacafbd2aac3ce2de066a0929aab55423824276c36e04cb83babc36c", ++ "s390x-unknown-linux-gnu-freethreaded": "3984b67c4292892eaccdd1c094c7ec788884c4c9b3534ab6995f6be96d5ed51d", ++ "x86_64-apple-darwin-freethreaded": "d6f489464045d6895ae68b0a04a9e16477e74fe3185a75f3a9a0af8ccd25eade", ++ "x86_64-pc-windows-msvc-freethreaded": "bb9a29a7ba8f179273b79971da6aaa7be592d78c606a63f99eff3e4c12fb0fae", ++ "x86_64-unknown-linux-gnu-freethreaded": "33f89c957d986d525529b8a980103735776f4d20cf52f55960a057c760188ac3", ++ }, ++ "strip_prefix": { ++ "aarch64-apple-darwin": "python", ++ "aarch64-unknown-linux-gnu": "python", ++ "ppc64le-unknown-linux-gnu": "python", ++ "s390x-unknown-linux-gnu": "python", ++ "riscv64-unknown-linux-gnu": "python", ++ "x86_64-apple-darwin": "python", ++ "x86_64-pc-windows-msvc": "python", ++ "aarch64-pc-windows-msvc": "python", ++ "x86_64-unknown-linux-gnu": "python", ++ "x86_64-unknown-linux-musl": "python", ++ "aarch64-apple-darwin-freethreaded": "python/install", ++ "aarch64-unknown-linux-gnu-freethreaded": "python/install", ++ "ppc64le-unknown-linux-gnu-freethreaded": "python/install", ++ "riscv64-unknown-linux-gnu-freethreaded": "python/install", ++ "s390x-unknown-linux-gnu-freethreaded": "python/install", ++ "x86_64-apple-darwin-freethreaded": "python/install", ++ "x86_64-pc-windows-msvc-freethreaded": "python/install", ++ "aarch64-pc-windows-msvc-freethreaded": "python/install", ++ "x86_64-unknown-linux-gnu-freethreaded": "python/install", ++ }, ++ }, + "3.14.0rc1": { + "url": "20250808/cpython-{python_version}+20250808-{platform}-{build}.{ext}", + "sha256": { +@@ -855,6 +900,51 @@ TOOL_VERSIONS = { "x86_64-unknown-linux-gnu-freethreaded": "python/install", }, }, @@ -54,16 +106,18 @@ index 30929f82..8e79225a 100644 } # buildifier: disable=unsorted-dict-items -@@ -865,7 +910,7 @@ MINOR_MAPPING = { +@@ -864,8 +954,8 @@ MINOR_MAPPING = { + "3.10": "3.10.18", "3.11": "3.11.13", "3.12": "3.12.11", - "3.13": "3.13.6", +- "3.13": "3.13.6", - "3.14": "3.14.0rc1", ++ "3.13": "3.13.11", + "3.14": "3.14.0", } def _generate_platforms(): -@@ -1045,29 +1090,25 @@ def get_release_info(platform, python_version, base_url = DEFAULT_RELEASE_BASE_U +@@ -1045,29 +1135,25 @@ def get_release_info(platform, python_version, base_url = DEFAULT_RELEASE_BASE_U for u in url: p, _, _ = platform.partition(FREETHREADED) diff --git a/third_party/xla/third_party/riegeli/BUILD.bazel b/third_party/xla/third_party/riegeli/BUILD.bazel new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/third_party/xla/third_party/riegeli/workspace.bzl b/third_party/xla/third_party/riegeli/workspace.bzl new file mode 100644 index 00000000000000..577511ee10e83e --- /dev/null +++ b/third_party/xla/third_party/riegeli/workspace.bzl @@ -0,0 +1,11 @@ +"""Provides the repo macro to import riegeli""" + +load("//third_party:repo.bzl", "tf_http_archive", "tf_mirror_urls") + +def repo(): + tf_http_archive( + name = "com_google_riegeli", + sha256 = "f63337f63f794ba9dc7dd281b20af3d036dfe0c1a5a4b7b8dc20b39f7e323b97", + strip_prefix = "riegeli-9f2744dc23e81d84c02f6f51244e9e9bb9802d57", + urls = tf_mirror_urls("https://github.com/google/riegeli/archive/9f2744dc23e81d84c02f6f51244e9e9bb9802d57.tar.gz"), + ) diff --git a/third_party/xla/third_party/rocm_device_libs/rocm_device_libs.BUILD b/third_party/xla/third_party/rocm_device_libs/rocm_device_libs.BUILD index 11795b3537e7a9..1e52bb31c540fc 100644 --- a/third_party/xla/third_party/rocm_device_libs/rocm_device_libs.BUILD +++ b/third_party/xla/third_party/rocm_device_libs/rocm_device_libs.BUILD @@ -24,6 +24,7 @@ cc_binary( "@llvm-project//llvm:Core", "@llvm-project//llvm:IRReader", "@llvm-project//llvm:Support", + "@llvm-project//llvm:config", ], ) diff --git a/third_party/xla/third_party/shardy/temporary.patch b/third_party/xla/third_party/shardy/temporary.patch index 1f51d21f432dd8..e69de29bb2d1d6 100644 --- a/third_party/xla/third_party/shardy/temporary.patch +++ b/third_party/xla/third_party/shardy/temporary.patch @@ -1,1157 +0,0 @@ -diff --git a/third_party/llvm/generated.patch b/third_party/llvm/generated.patch -index f04aa96..509398d 100644 ---- a/third_party/llvm/generated.patch -+++ b/third_party/llvm/generated.patch -@@ -1,1137 +1 @@ - Auto generated patch. Do not edit or delete it, even if empty. --diff -ruN --strip-trailing-cr a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst ----- a/clang/docs/LanguageExtensions.rst --+++ b/clang/docs/LanguageExtensions.rst --@@ -1833,23 +1833,6 @@ -- -- Clang provides a few builtin aliases to improve the throughput of certain metaprogramming facilities. -- ---__builtin_common_reference ----------------------------- --- ---.. code-block:: c++ --- --- template