From a21175a141beac07b12ad8828b1d37c21674166a Mon Sep 17 00:00:00 2001
From: Lifann <804114271@qq.com>
Date: Sun, 29 Jun 2025 21:51:39 +0800
Subject: [PATCH 1/2] feat(build_deps): Remove unused cudnn for lite deployment

---
 README.md                                     | 64 +++++++--------
 build_deps/toolchains/gpu/cuda/BUILD.tpl      | 22 -----
 .../toolchains/gpu/cuda/BUILD.windows.tpl     | 17 ----
 .../toolchains/gpu/cuda/cuda_config.h.tpl     |  1 -
 build_deps/toolchains/gpu/cuda_configure.bzl  | 80 ++-----------------
 build_deps/toolchains/gpu/find_cuda_config.py | 35 +-------
 configure.py                                  |  5 --
 tools/build_dev_container.sh                  | 11 +--
 tools/docker/build_wheel.Dockerfile           |  2 -
 tools/docker/dev_container.Dockerfile         |  3 -
 tools/serving_padding/.bazelrc_gpu_padding    |  2 -
 11 files changed, 40 insertions(+), 202 deletions(-)

diff --git a/README.md b/README.md
index ece12d425..acab7f45a 100644
--- a/README.md
+++ b/README.md
@@ -104,26 +104,24 @@ is compiled differently. A typical example of this would be `conda`-installed Te
 #### Compatibility Matrix
 *GPU is supported by version `0.2.0` and later.*
 
-| TFRA  | TensorFlow | Compiler   | CUDA | CUDNN | Compute Capability           | CPU      |
-|:------|:-----------|:-----------|:-----|:------|:-----------------------------|:---------|
-| 0.8.0 | 2.16.2     | GCC 8.2.1  | 12.3 | 8.9   | 7.0, 7.5, 8.0, 8.6, 8.9, 9.0 | x86      |
-| 0.8.0 | 2.16.2     | Xcode 13.1 | -    | -     | -                            | Apple M1 |
-| 0.7.0 | 2.15.1     | GCC 8.2.1  | 12.2 | 8.9   | 7.0, 7.5, 8.0, 8.6, 8.9, 9.0 | x86      |
-| 0.7.0 | 2.15.1     | Xcode 13.1 | -    | -     | -                            | Apple M1 |
-| 0.6.0 | 2.8.3      | GCC 7.3.1  | 11.2 | 8.1   | 6.0, 6.1, 7.0, 7.5, 8.0, 8.6 | x86      |
-| 0.6.0 | 2.6.0      | Xcode 13.1 | -    | -     | -                            | Apple M1 |
-| 0.5.1 | 2.8.3      | GCC 7.3.1  | 11.2 | 8.1   | 6.0, 6.1, 7.0, 7.5, 8.0, 8.6 | x86      |
-| 0.5.1 | 2.6.0      | Xcode 13.1 | -    | -     | -                            | Apple M1 |
-| 0.5.0 | 2.8.3      | GCC 7.3.1  | 11.2 | 8.1   | 6.0, 6.1, 7.0, 7.5, 8.0, 8.6 | x86      |
-| 0.5.0 | 2.6.0      | Xcode 13.1 | -    | -     | -                            | Apple M1 |
-| 0.4.0 | 2.5.1      | GCC 7.3.1  | 11.2 | 8.1   | 6.0, 6.1, 7.0, 7.5, 8.0, 8.6 | x86      |
-| 0.4.0 | 2.5.0      | Xcode 13.1 | -    | -     | -                            | Apple M1 |
-| 0.3.1 | 2.5.1      | GCC 7.3.1  | 11.2 | 8.1   | 6.0, 6.1, 7.0, 7.5, 8.0, 8.6 | x86      |
-| 0.2.0 | 2.4.1      | GCC 7.3.1  | 11.0 | 8.0   | 6.0, 6.1, 7.0, 7.5, 8.0      | x86      |
-| 0.2.0 | 1.15.2     | GCC 7.3.1  | 10.0 | 7.6   | 6.0, 6.1, 7.0, 7.5           | x86      |
-| 0.1.0 | 2.4.1      | GCC 7.3.1  | -    | -     | -                            | x86      |
-
-Check [nvidia-support-matrix](https://docs.nvidia.com/deeplearning/cudnn/support-matrix/index.html) for more details.
+| TFRA  | TensorFlow | Compiler   | CUDA | Compute Capability           | CPU      |
+|:------|:-----------|:-----------|:-----|:-----------------------------|:---------|
+| 0.8.0 | 2.16.2     | GCC 8.2.1  | 12.3 | 7.0, 7.5, 8.0, 8.6, 8.9, 9.0 | x86      |
+| 0.8.0 | 2.16.2     | Xcode 13.1 | -    | -                            | Apple M1 |
+| 0.7.0 | 2.15.1     | GCC 8.2.1  | 12.2 | 7.0, 7.5, 8.0, 8.6, 8.9, 9.0 | x86      |
+| 0.7.0 | 2.15.1     | Xcode 13.1 | -    | -                            | Apple M1 |
+| 0.6.0 | 2.8.3      | GCC 7.3.1  | 11.2 | 6.0, 6.1, 7.0, 7.5, 8.0, 8.6 | x86      |
+| 0.6.0 | 2.6.0      | Xcode 13.1 | -    | -                            | Apple M1 |
+| 0.5.1 | 2.8.3      | GCC 7.3.1  | 11.2 | 6.0, 6.1, 7.0, 7.5, 8.0, 8.6 | x86      |
+| 0.5.1 | 2.6.0      | Xcode 13.1 | -    | -                            | Apple M1 |
+| 0.5.0 | 2.8.3      | GCC 7.3.1  | 11.2 | 6.0, 6.1, 7.0, 7.5, 8.0, 8.6 | x86      |
+| 0.5.0 | 2.6.0      | Xcode 13.1 | -    | -                            | Apple M1 |
+| 0.4.0 | 2.5.1      | GCC 7.3.1  | 11.2 | 6.0, 6.1, 7.0, 7.5, 8.0, 8.6 | x86      |
+| 0.4.0 | 2.5.0      | Xcode 13.1 | -    | -                            | Apple M1 |
+| 0.3.1 | 2.5.1      | GCC 7.3.1  | 11.2 | 6.0, 6.1, 7.0, 7.5, 8.0, 8.6 | x86      |
+| 0.2.0 | 2.4.1      | GCC 7.3.1  | 11.0 | 6.0, 6.1, 7.0, 7.5, 8.0      | x86      |
+| 0.2.0 | 1.15.2     | GCC 7.3.1  | 10.0 | 6.0, 6.1, 7.0, 7.5           | x86      |
+| 0.1.0 | 2.4.1      | GCC 7.3.1  | -    | -                            | x86      |
 
 **NOTICE**
 
@@ -178,9 +176,7 @@ export TF_VERSION="2.15.1"  # "2.11.0" is well tested.
 export PY_VERSION="3.9" 
 export TF_NEED_CUDA=1
 export TF_CUDA_VERSION=12.2 # nvcc --version to check version
-export TF_CUDNN_VERSION=8.9 # print("cuDNN version:", tf.sysconfig.get_build_info()["cudnn_version"])
 export CUDA_TOOLKIT_PATH="/usr/local/cuda"
-export CUDNN_INSTALL_PATH="/usr/lib/x86_64-linux-gnu"
 
 python configure.py
 ```
@@ -284,18 +280,18 @@ sess_config.gpu_options.allow_growth = True
 ### With TensorFlow Serving
 
 #### Compatibility Matrix
-| TFRA  | TensorFlow | Serving branch | Compiler  | CUDA | CUDNN | Compute Capability           |
-|:------|:-----------|:---------------|:----------|:-----|:------|:-----------------------------|
-| 0.8.0 | 2.16.2     | r2.16          | GCC 8.2.1 | 12.3 | 8.9   | 7.0, 7.5, 8.0, 8.6, 8.9, 9.0 |
-| 0.7.0 | 2.15.1     | r2.15          | GCC 8.2.1 | 12.2 | 8.9   | 7.0, 7.5, 8.0, 8.6, 8.9, 9.0 |
-| 0.6.0 | 2.8.3      | r2.8           | GCC 7.3.1 | 11.2 | 8.1   | 6.0, 6.1, 7.0, 7.5, 8.0, 8.6 |
-| 0.5.1 | 2.8.3      | r2.8           | GCC 7.3.1 | 11.2 | 8.1   | 6.0, 6.1, 7.0, 7.5, 8.0, 8.6 |
-| 0.5.0 | 2.8.3      | r2.8           | GCC 7.3.1 | 11.2 | 8.1   | 6.0, 6.1, 7.0, 7.5, 8.0, 8.6 |
-| 0.4.0 | 2.5.1      | r2.5           | GCC 7.3.1 | 11.2 | 8.1   | 6.0, 6.1, 7.0, 7.5, 8.0, 8.6 |
-| 0.3.1 | 2.5.1      | r2.5           | GCC 7.3.1 | 11.2 | 8.1   | 6.0, 6.1, 7.0, 7.5, 8.0, 8.6 |
-| 0.2.0 | 2.4.1      | r2.4           | GCC 7.3.1 | 11.0 | 8.0   | 6.0, 6.1, 7.0, 7.5, 8.0      |
-| 0.2.0 | 1.15.2     | r1.15          | GCC 7.3.1 | 10.0 | 7.6   | 6.0, 6.1, 7.0, 7.5           |
-| 0.1.0 | 2.4.1      | r2.4           | GCC 7.3.1 | -    | -     | -                            |
+| TFRA  | TensorFlow | Serving branch | Compiler  | CUDA | Compute Capability           |
+|:------|:-----------|:---------------|:----------|:-----|:-----------------------------|
+| 0.8.0 | 2.16.2     | r2.16          | GCC 8.2.1 | 12.3 | 7.0, 7.5, 8.0, 8.6, 8.9, 9.0 |
+| 0.7.0 | 2.15.1     | r2.15          | GCC 8.2.1 | 12.2 | 7.0, 7.5, 8.0, 8.6, 8.9, 9.0 |
+| 0.6.0 | 2.8.3      | r2.8           | GCC 7.3.1 | 11.2 | 6.0, 6.1, 7.0, 7.5, 8.0, 8.6 |
+| 0.5.1 | 2.8.3      | r2.8           | GCC 7.3.1 | 11.2 | 6.0, 6.1, 7.0, 7.5, 8.0, 8.6 |
+| 0.5.0 | 2.8.3      | r2.8           | GCC 7.3.1 | 11.2 | 6.0, 6.1, 7.0, 7.5, 8.0, 8.6 |
+| 0.4.0 | 2.5.1      | r2.5           | GCC 7.3.1 | 11.2 | 6.0, 6.1, 7.0, 7.5, 8.0, 8.6 |
+| 0.3.1 | 2.5.1      | r2.5           | GCC 7.3.1 | 11.2 | 6.0, 6.1, 7.0, 7.5, 8.0, 8.6 |
+| 0.2.0 | 2.4.1      | r2.4           | GCC 7.3.1 | 11.0 | 6.0, 6.1, 7.0, 7.5, 8.0      |
+| 0.2.0 | 1.15.2     | r1.15          | GCC 7.3.1 | 10.0 | 6.0, 6.1, 7.0, 7.5           |
+| 0.1.0 | 2.4.1      | r2.4           | GCC 7.3.1 | -    | -                            |
 
 Serving TFRA-enable models by custom ops in TensorFlow Serving. 
  
diff --git a/build_deps/toolchains/gpu/cuda/BUILD.tpl b/build_deps/toolchains/gpu/cuda/BUILD.tpl
index 1ac5643f1..f8abdb0ec 100644
--- a/build_deps/toolchains/gpu/cuda/BUILD.tpl
+++ b/build_deps/toolchains/gpu/cuda/BUILD.tpl
@@ -117,27 +117,6 @@ cc_library(
     visibility = ["//visibility:public"],
 )
 
-cc_library(
-    name = "cudnn",
-    srcs = ["cuda/lib/%{cudnn_lib}"],
-    data = ["cuda/lib/%{cudnn_lib}"],
-    includes = [
-        ".",
-        "cuda/include",
-    ],
-    linkstatic = 1,
-    visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "cudnn_header",
-    includes = [
-        ".",
-        "cuda/include",
-    ],
-    visibility = ["//visibility:public"],
-)
-
 cc_library(
     name = "cufft",
     srcs = ["cuda/lib/%{cufft_lib}"],
@@ -169,7 +148,6 @@ cc_library(
         ":cublas",
         ":cuda_headers",
         ":cudart",
-        ":cudnn",
         ":cufft",
         ":curand",
     ],
diff --git a/build_deps/toolchains/gpu/cuda/BUILD.windows.tpl b/build_deps/toolchains/gpu/cuda/BUILD.windows.tpl
index 3ed4fd415..5d3a75cde 100644
--- a/build_deps/toolchains/gpu/cuda/BUILD.windows.tpl
+++ b/build_deps/toolchains/gpu/cuda/BUILD.windows.tpl
@@ -91,22 +91,6 @@ cc_import(
     visibility = ["//visibility:public"],
 )
 
-cc_import(
-    name = "cudnn",
-    interface_library = "cuda/lib/%{cudnn_lib}",
-    system_provided = 1,
-    visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "cudnn_header",
-    includes = [
-        ".",
-        "cuda/include",
-    ],
-    visibility = ["//visibility:public"],
-)
-
 cc_import(
     name = "cufft",
     interface_library = "cuda/lib/%{cufft_lib}",
@@ -128,7 +112,6 @@ cc_library(
         ":cublas",
         ":cuda_headers",
         ":cudart",
-        ":cudnn",
         ":cufft",
         ":curand",
     ],
diff --git a/build_deps/toolchains/gpu/cuda/cuda_config.h.tpl b/build_deps/toolchains/gpu/cuda/cuda_config.h.tpl
index 811b040e8..b0eda3864 100644
--- a/build_deps/toolchains/gpu/cuda/cuda_config.h.tpl
+++ b/build_deps/toolchains/gpu/cuda/cuda_config.h.tpl
@@ -19,7 +19,6 @@ limitations under the License.
 #define TF_CUDA_CAPABILITIES %{cuda_compute_capabilities}
 
 #define TF_CUDA_VERSION "%{cuda_version}"
-#define TF_CUDNN_VERSION "%{cudnn_version}"
 
 #define TF_CUDA_TOOLKIT_PATH "%{cuda_toolkit_path}"
 
diff --git a/build_deps/toolchains/gpu/cuda_configure.bzl b/build_deps/toolchains/gpu/cuda_configure.bzl
index 18f530c0b..dfae05f37 100644
--- a/build_deps/toolchains/gpu/cuda_configure.bzl
+++ b/build_deps/toolchains/gpu/cuda_configure.bzl
@@ -6,15 +6,12 @@
   * `TF_CUDA_CLANG`: Whether to use clang as a cuda compiler.
   * `CLANG_CUDA_COMPILER_PATH`: The clang compiler path that will be used for
     both host and device code compilation if TF_CUDA_CLANG is 1.
-  * `TF_CUDA_PATHS`: The base paths to look for CUDA and cuDNN. Default is
+  * `TF_CUDA_PATHS`: The base paths to look for CUDA. Default is
     `/usr/local/cuda,usr/`.
   * `CUDA_TOOLKIT_PATH`: The path to the CUDA toolkit. Default is
     `/usr/local/cuda`.
   * `TF_CUDA_VERSION`: The version of the CUDA toolkit. If this is blank, then
     use the system default.
-  * `TF_CUDNN_VERSION`: The version of the cuDNN library.
-  * `CUDNN_INSTALL_PATH`: The path to the cuDNN library. Default is
-    `/usr/local/cuda`.
   * `TF_CUDA_COMPUTE_CAPABILITIES`: The CUDA compute capabilities. Default is
     `3.5,5.2`.
   * `PYTHON_BIN_PATH`: The python binary path
@@ -40,10 +37,6 @@ _CUDA_TOOLKIT_PATH = "CUDA_TOOLKIT_PATH"
 
 _TF_CUDA_VERSION = "TF_CUDA_VERSION"
 
-_TF_CUDNN_VERSION = "TF_CUDNN_VERSION"
-
-_CUDNN_INSTALL_PATH = "CUDNN_INSTALL_PATH"
-
 _TF_CUDA_COMPUTE_CAPABILITIES = "TF_CUDA_COMPUTE_CAPABILITIES"
 
 _TF_DOWNLOAD_CLANG = "TF_DOWNLOAD_CLANG"
@@ -554,12 +547,12 @@ def _find_cuda_lib(
         basedir,
         version,
         static = False):
-    """Finds the given CUDA or cuDNN library on the system.
+    """Finds the given CUDA library on the system.
       Args:
         lib: The name of the library, such as "cudart"
         repository_ctx: The repository context.
         cpu_value: The name of the host operating system.
-        basedir: The install directory of CUDA or cuDNN.
+        basedir: The install directory of CUDA.
         version: The version of the library.
         static: True if static library, False if shared object.
       Returns:
@@ -581,7 +574,7 @@ def _find_cuda_lib(
     )
 
 def _find_libs(repository_ctx, cuda_config):
-    """Returns the CUDA and cuDNN libraries on the system.
+    """Returns the CUDA libraries on the system.
       Args:
         repository_ctx: The repository context.
         cuda_config: The CUDA config as returned by _get_cuda_config
@@ -641,13 +634,6 @@ def _find_libs(repository_ctx, cuda_config):
             cuda_config.config["cufft_library_dir"],
             cuda_config.cufft_version,
         ),
-        "cudnn": _find_cuda_lib(
-            "cudnn",
-            repository_ctx,
-            cpu_value,
-            cuda_config.config["cudnn_library_dir"],
-            cuda_config.cudnn_version,
-        ),
         "cupti": _find_cuda_lib(
             "cupti",
             repository_ctx,
@@ -668,14 +654,12 @@ def _get_cuda_config(repository_ctx):
       Returns:
         A struct containing the following fields:
           cuda_toolkit_path: The CUDA toolkit installation directory.
-          cudnn_install_basedir: The cuDNN installation directory.
           cuda_version: The version of CUDA on the system.
           cudart_version: The CUDA runtime version on the system.
-          cudnn_version: The version of cuDNN on the system.
           compute_capabilities: A list of the system's CUDA compute capabilities.
           cpu_value: The name of the host operating system.
       """
-    config = find_cuda_config(repository_ctx, ["cuda", "cudnn"])
+    config = find_cuda_config(repository_ctx, ["cuda"])
     cpu_value = get_cpu_value(repository_ctx)
     toolkit_path = config["cuda_toolkit_path"]
 
@@ -685,7 +669,6 @@ def _get_cuda_config(repository_ctx):
     cuda_minor = cuda_version[1]
 
     cuda_version = ("64_%s%s" if is_windows else "%s.%s") % (cuda_major, cuda_minor)
-    cudnn_version = ("64_%s" if is_windows else "%s") % config["cudnn_version"]
 
     if int(cuda_major) >= 11:
         # The libcudart soname in CUDA 11.x is versioned as 11.0 for backward compatability.
@@ -721,7 +704,6 @@ def _get_cuda_config(repository_ctx):
         cusolver_version = cusolver_version,
         curand_version = curand_version,
         cufft_version = cufft_version,
-        cudnn_version = cudnn_version,
         compute_capabilities = compute_capabilities(repository_ctx, cuda_version),
         cpu_value = cpu_value,
         config = config,
@@ -769,7 +751,6 @@ def _create_dummy_repository(repository_ctx):
             "%{cudart_lib}": lib_name("cudart", cpu_value),
             "%{cublas_lib}": lib_name("cublas", cpu_value),
             "%{cusolver_lib}": lib_name("cusolver", cpu_value),
-            "%{cudnn_lib}": lib_name("cudnn", cpu_value),
             "%{cufft_lib}": lib_name("cufft", cpu_value),
             "%{curand_lib}": lib_name("curand", cpu_value),
             "%{cupti_lib}": lib_name("cupti", cpu_value),
@@ -782,7 +763,6 @@ def _create_dummy_repository(repository_ctx):
     # tensorflow/core/platform/default/build_config:cuda.
     repository_ctx.file("cuda/cuda/include/cuda.h")
     repository_ctx.file("cuda/cuda/include/cublas.h")
-    repository_ctx.file("cuda/cuda/include/cudnn.h")
     repository_ctx.file("cuda/cuda/extras/CUPTI/include/cupti.h")
     repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cuda", cpu_value))
     repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cudart", cpu_value))
@@ -791,7 +771,6 @@ def _create_dummy_repository(repository_ctx):
     )
     repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cublas", cpu_value))
     repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cusolver", cpu_value))
-    repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cudnn", cpu_value))
     repository_ctx.file("cuda/cuda/lib/%s" % lib_name("curand", cpu_value))
     repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cufft", cpu_value))
     repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cupti", cpu_value))
@@ -896,7 +875,6 @@ def _create_local_cuda_repository(repository_ctx):
 
     cuda_include_path = cuda_config.config["cuda_include_dir"]
     cublas_include_path = cuda_config.config["cublas_include_dir"]
-    cudnn_header_dir = cuda_config.config["cudnn_include_dir"]
     cupti_header_dir = cuda_config.config["cupti_include_dir"]
     nvvm_libdevice_dir = cuda_config.config["nvvm_library_dir"]
 
@@ -957,46 +935,6 @@ def _create_local_cuda_repository(repository_ctx):
         out_dir = "cuda/bin",
     ))
 
-    # Copy cudnn.h if cuDNN was not installed to CUDA_TOOLKIT_PATH.
-    included_files = _read_dir(repository_ctx, cuda_include_path)
-    if not any([file.endswith("cudnn.h") for file in included_files]):
-        cudnn_headers = ["cudnn.h"]
-        if cuda_config.cudnn_version.rsplit("_", 1)[-1] >= "9":
-            cudnn_headers += [
-                "cudnn_adv.h",
-                "cudnn_backend.h",
-                "cudnn_cnn.h",
-                "cudnn_graph.h",
-                "cudnn_ops.h",
-                "cudnn_version.h",
-            ]
-        elif cuda_config.cudnn_version.rsplit("_", 1)[-1] >= "8":
-            cudnn_headers += [
-                "cudnn_backend.h",
-                "cudnn_adv_infer.h",
-                "cudnn_adv_train.h",
-                "cudnn_cnn_infer.h",
-                "cudnn_cnn_train.h",
-                "cudnn_ops_infer.h",
-                "cudnn_ops_train.h",
-                "cudnn_version.h",
-            ]
-
-        cudnn_srcs = []
-        cudnn_outs = []
-        for header in cudnn_headers:
-            cudnn_srcs.append(cudnn_header_dir + "/" + header)
-            cudnn_outs.append("cudnn/include/" + header)
-
-        copy_rules.append(make_copy_files_rule(
-            repository_ctx,
-            name = "cudnn-include",
-            srcs = cudnn_srcs,
-            outs = cudnn_outs,
-        ))
-    else:
-        copy_rules.append("filegroup(name = 'cudnn-include')\n")
-
     # Set up BUILD file for cuda/
     _tpl(
         repository_ctx,
@@ -1017,13 +955,12 @@ def _create_local_cuda_repository(repository_ctx):
             "%{cudart_lib}": cuda_libs["cudart"].basename,
             "%{cublas_lib}": cuda_libs["cublas"].basename,
             "%{cusolver_lib}": cuda_libs["cusolver"].basename,
-            "%{cudnn_lib}": cuda_libs["cudnn"].basename,
             "%{cufft_lib}": cuda_libs["cufft"].basename,
             "%{curand_lib}": cuda_libs["curand"].basename,
             "%{cupti_lib}": cuda_libs["cupti"].basename,
             "%{copy_rules}": "\n".join(copy_rules),
             "%{cuda_headers}": (
-                '":cuda-include",\n' + '        ":cudnn-include",'
+                '":cuda-include",'
             ),
         },
         "cuda/BUILD",
@@ -1058,8 +995,7 @@ def _create_local_cuda_repository(repository_ctx):
             repository_ctx,
             cuda_config,
         ) +
-        "\n  cxx_builtin_include_directory: \"%s\"" % cupti_header_dir +
-        "\n  cxx_builtin_include_directory: \"%s\"" % cudnn_header_dir
+        "\n  cxx_builtin_include_directory: \"%s\"" % cupti_header_dir
     )
 
     # For gcc, do not canonicalize system header paths; some versions of gcc
@@ -1155,9 +1091,7 @@ cuda_configure = repository_rule(
         "TF_CUDA_CLANG",
         _TF_DOWNLOAD_CLANG,
         _CUDA_TOOLKIT_PATH,
-        _CUDNN_INSTALL_PATH,
         _TF_CUDA_VERSION,
-        _TF_CUDNN_VERSION,
         _TF_CUDA_COMPUTE_CAPABILITIES,
         "NVVMIR_LIBRARY_DIR",
         _PYTHON_BIN_PATH,
diff --git a/build_deps/toolchains/gpu/find_cuda_config.py b/build_deps/toolchains/gpu/find_cuda_config.py
index d3b4d1ca3..d913bbde2 100644
--- a/build_deps/toolchains/gpu/find_cuda_config.py
+++ b/build_deps/toolchains/gpu/find_cuda_config.py
@@ -21,7 +21,7 @@
 to stderr and returns an error code.
 
 The list of libraries to find is specified as arguments. Supported libraries
-are CUDA (includes cuBLAS), cuDNN, NCCL, and TensorRT.
+are CUDA (includes cuBLAS), NCCL, and TensorRT.
 
 The script takes a list of base directories specified by the TF_CUDA_PATHS
 environment variable as comma-separated glob list. The script looks for headers
@@ -40,7 +40,6 @@
   ----------------------------------------------------------------
   CUDA      TF_CUDA_VERSION       CUDA_TOOLKIT_PATH
   cuBLAS    TF_CUBLAS_VERSION     CUDA_TOOLKIT_PATH
-  cuDNN     TF_CUDNN_VERSION      CUDNN_INSTALL_PATH
   NCCL      TF_NCCL_VERSION       NCCL_INSTALL_PATH, NCCL_HDR_PATH
   TensorRT  TF_TENSORRT_VERSION   TENSORRT_INSTALL_PATH
 
@@ -165,7 +164,6 @@ def _get_default_cuda_paths(cuda_version):
       "/usr/local/cuda-%s" % cuda_version,
       "/usr/local/cuda",
       "/usr",
-      "/usr/local/cudnn",
   ] + _get_ld_config_paths()
 
 
@@ -443,30 +441,6 @@ def get_header_version(path):
   }
 
 
-def _find_cudnn_config(base_paths, required_version):
-
-  def get_header_version(path):
-    version = [
-        _get_header_version(path, name)
-        for name in ("CUDNN_MAJOR", "CUDNN_MINOR", "CUDNN_PATCHLEVEL")
-    ]
-    return ".".join(version) if version[0] else None
-
-  header_path, header_version = _find_header(base_paths,
-                                             ("cudnn.h", "cudnn_version.h"),
-                                             required_version,
-                                             get_header_version)
-  cudnn_version = header_version.split(".")[0]
-
-  library_path = _find_library(base_paths, "cudnn", cudnn_version)
-
-  return {
-      "cudnn_version": cudnn_version,
-      "cudnn_include_dir": os.path.dirname(header_path),
-      "cudnn_library_dir": os.path.dirname(library_path),
-  }
-
-
 def _find_cusparse_config(base_paths, required_version, cuda_version):
 
   if _at_least_version(cuda_version, "11.0"):
@@ -562,7 +536,7 @@ def _list_from_env(env_name, default=[]):
 def _get_legacy_path(env_name, default=[]):
   """Returns a path specified by a legacy environment variable.
 
-    CUDNN_INSTALL_PATH, NCCL_INSTALL_PATH, TENSORRT_INSTALL_PATH set to
+    NCCL_INSTALL_PATH, TENSORRT_INSTALL_PATH set to
     '/usr/lib/x86_64-linux-gnu' would previously find both library and header
     paths. Detect those and return '/usr', otherwise forward to _list_from_env().
     """
@@ -630,11 +604,6 @@ def find_cuda_config():
     result.update(
         _find_cusparse_config(cusparse_paths, cusparse_version, cuda_version))
 
-  if "cudnn" in libraries:
-    cudnn_paths = _get_legacy_path("CUDNN_INSTALL_PATH", base_paths)
-    cudnn_version = os.environ.get("TF_CUDNN_VERSION", "")
-    result.update(_find_cudnn_config(cudnn_paths, cudnn_version))
-
   if "nccl" in libraries:
     nccl_paths = _get_legacy_path("NCCL_INSTALL_PATH", base_paths)
     nccl_version = os.environ.get("TF_NCCL_VERSION", "")
diff --git a/configure.py b/configure.py
index 47bf3a99b..c4a100eff 100644
--- a/configure.py
+++ b/configure.py
@@ -273,12 +273,7 @@ def configure_cuda():
   write_action_env("TF_NEED_CUDA", "1")
   write_action_env("CUDA_TOOLKIT_PATH",
                    os.getenv("CUDA_TOOLKIT_PATH", "/usr/local/cuda"))
-  write_action_env(
-      "CUDNN_INSTALL_PATH",
-      os.getenv("CUDNN_INSTALL_PATH", "/usr/lib/x86_64-linux-gnu"),
-  )
   write_action_env("TF_CUDA_VERSION", os.getenv("TF_CUDA_VERSION", "11.0"))
-  write_action_env("TF_CUDNN_VERSION", os.getenv("TF_CUDNN_VERSION", "8.0"))
 
   write("test --config=cuda")
   write("build --config=cuda")
diff --git a/tools/build_dev_container.sh b/tools/build_dev_container.sh
index 532d64528..ccfd52d1f 100755
--- a/tools/build_dev_container.sh
+++ b/tools/build_dev_container.sh
@@ -25,31 +25,24 @@ export TF_NAME='tensorflow'
 if [[ "$TF_VERSION" =~ ^2\.(16)\.[0-9]+$ ]] ; then
   export BUILD_IMAGE="tfra/nosla-cuda12.3-cudnn8.9-ubuntu20.04-manylinux2014-python$PY_VERSION"
   export TF_CUDA_VERSION="12.3"
-  export TF_CUDNN_VERSION="8.9"
 elif [[ "$TF_VERSION" =~ ^2\.(15)\.[0-9]+$ ]] ; then
   export BUILD_IMAGE="tfra/nosla-cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-python$PY_VERSION"
   export TF_CUDA_VERSION="12.2"
-  export TF_CUDNN_VERSION="8.9"
 elif [[ "$TF_VERSION" =~ ^2\.(14)\.[0-9]+$ ]] ; then
   export BUILD_IMAGE="tfra/nosla-cuda11.8-cudnn8.7-ubuntu20.04-manylinux2014-python$PY_VERSION"
   export TF_CUDA_VERSION="11.8"
-  export TF_CUDNN_VERSION="8.7"
 elif [[ "$TF_VERSION" =~ ^2\.(12|13)\.[0-9]+$ ]] ; then
   export BUILD_IMAGE="tfra/nosla-cuda11.8-cudnn8.6-ubuntu20.04-manylinux2014-python$PY_VERSION"
   export TF_CUDA_VERSION="11.8"
-  export TF_CUDNN_VERSION="8.6"
 elif [[ "$TF_VERSION" =~ ^2\.([6-9]|10|11)\.[0-9]+$ ]] ; then
   export BUILD_IMAGE="tfra/nosla-cuda11.2-cudnn8-ubuntu20.04-manylinux2014-python$PY_VERSION"
   export TF_CUDA_VERSION="11.2"
-  export TF_CUDNN_VERSION="8.1"
 elif [ $TF_VERSION == "2.4.1" ] ; then
   export BUILD_IMAGE='tfra/nosla-cuda11.0-cudnn8-ubuntu18.04-manylinux2010-multipython'
   export TF_CUDA_VERSION="11.0"
-  export TF_CUDNN_VERSION="8.0"
 elif [ $TF_VERSION == "1.15.2" ] ; then
   export BUILD_IMAGE='tfra/nosla-cuda10.0-cudnn7-ubuntu16.04-manylinux2010-multipython'
   export TF_CUDA_VERSION="10.0"
-  export TF_CUDNN_VERSION="7.6"
 else
   echo "TF_VERSION is invalid: $TF_VERSION!"
   exit 1
@@ -57,7 +50,6 @@ fi
 
 echo "BUILD_IMAGE is $BUILD_IMAGE"
 echo "TF_CUDA_VERSION is $TF_CUDA_VERSION"
-echo "TF_CUDNN_VERSION is $TF_CUDNN_VERSION"
 
 if [ -z $HOROVOD_VERSION ] ; then
   export HOROVOD_VERSION='0.28.1'
@@ -75,10 +67,9 @@ docker build \
     --build-arg TF_NAME \
     --build-arg TF_NEED_CUDA \
     --build-arg TF_CUDA_VERSION \
-    --build-arg TF_CUDNN_VERSION \
     --build-arg HOROVOD_VERSION \
     --build-arg BUILD_IMAGE \
     --build-arg PROTOBUF_VERSION \
     --no-cache \
     --target dev_container \
-    -t tfra/dev_container:latest-tf$TF_VERSION-python$PY_VERSION ./
\ No newline at end of file
+    -t tfra/dev_container:latest-tf$TF_VERSION-python$PY_VERSION ./
diff --git a/tools/docker/build_wheel.Dockerfile b/tools/docker/build_wheel.Dockerfile
index 8ba7a113e..7c0088dbd 100644
--- a/tools/docker/build_wheel.Dockerfile
+++ b/tools/docker/build_wheel.Dockerfile
@@ -72,12 +72,10 @@ ARG NIGHTLY_FLAG
 ARG NIGHTLY_TIME
 ARG TF_NEED_CUDA
 ARG TF_CUDA_VERSION
-ARG TF_CUDNN_VERSION
 ARG HOROVOD_VERSION
 ARG PROTOBUF_VERSION
 ENV TF_NEED_CUDA=$TF_NEED_CUDA
 ENV TF_CUDA_VERSION=$TF_CUDA_VERSION
-ENV TF_CUDNN_VERSION=$TF_CUDNN_VERSION
 
 RUN python -m pip install --upgrade pip
 RUN python configure.py
diff --git a/tools/docker/dev_container.Dockerfile b/tools/docker/dev_container.Dockerfile
index 7104b443d..f5bf2a0cb 100644
--- a/tools/docker/dev_container.Dockerfile
+++ b/tools/docker/dev_container.Dockerfile
@@ -64,7 +64,6 @@ ARG TF_VERSION
 ARG PY_VERSION
 ARG TF_NEED_CUDA
 ARG TF_CUDA_VERSION
-ARG TF_CUDNN_VERSION
 ARG TF_NAME
 ARG HOROVOD_VERSION
 ARG BUILD_IMAGE
@@ -84,6 +83,4 @@ RUN echo "export TF_NAME=$TF_NAME" >> ~/.bashrc
 RUN echo "export PROTOBUF_VERSION=$PROTOBUF_VERSION" >> ~/.bashrc
 RUN echo "export TF_NEED_CUDA=1" >> ~/.bashrc
 RUN echo "export TF_CUDA_VERSION=$TF_CUDA_VERSION" >> ~/.bashrc
-RUN echo "export TF_CUDNN_VERSION=$TF_CUDNN_VERSION" >> ~/.bashrc
 RUN echo "export CUDA_TOOLKIT_PATH='/usr/local/cuda'" >> ~/.bashrc
-RUN echo "export CUDNN_INSTALL_PATH='/usr/lib/x86_64-linux-gnu'" >> ~/.bashrc
diff --git a/tools/serving_padding/.bazelrc_gpu_padding b/tools/serving_padding/.bazelrc_gpu_padding
index 6159d5da1..0573e6396 100644
--- a/tools/serving_padding/.bazelrc_gpu_padding
+++ b/tools/serving_padding/.bazelrc_gpu_padding
@@ -1,8 +1,6 @@
 build --action_env TF_NEED_CUDA="1"
 build --action_env CUDA_TOOLKIT_PATH="/usr/local/cuda"
-build --action_env CUDNN_INSTALL_PATH="/usr/lib/x86_64-linux-gnu"
 build --action_env TF_CUDA_VERSION="11.2"
-build --action_env TF_CUDNN_VERSION="8.1"
 test --config=cuda
 build --config=cuda
 build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true

From 9afee0e61ae959202fae9d729c24301695a82dfc Mon Sep 17 00:00:00 2001
From: Lifann <804114271@qq.com>
Date: Mon, 30 Jun 2025 04:26:21 +0800
Subject: [PATCH 2/2] Remove cudnn from CI procedure

---
 .github/workflows/make_wheel_Linux_x86.sh | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/.github/workflows/make_wheel_Linux_x86.sh b/.github/workflows/make_wheel_Linux_x86.sh
index 69de8a043..202c66151 100644
--- a/.github/workflows/make_wheel_Linux_x86.sh
+++ b/.github/workflows/make_wheel_Linux_x86.sh
@@ -11,32 +11,25 @@ export TF_NAME='tensorflow'
 if [[ "$TF_VERSION" =~ ^2\.(16)\.[0-9]+$ ]] ; then
   export BUILD_IMAGE="tfra/nosla-cuda12.3-cudnn8.9-ubuntu20.04-manylinux2014-python$PY_VERSION"
   export TF_CUDA_VERSION="12.3"
-  export TF_CUDNN_VERSION="8.9"
   export TF_USE_LEGACY_KERAS=1
 elif [[ "$TF_VERSION" =~ ^2\.(15)\.[0-9]+$ ]] ; then
   export BUILD_IMAGE="tfra/nosla-cuda12.2-cudnn8.9-ubuntu20.04-manylinux2014-python$PY_VERSION"
   export TF_CUDA_VERSION="12.2"
-  export TF_CUDNN_VERSION="8.9"
 elif [[ "$TF_VERSION" =~ ^2\.(14)\.[0-9]+$ ]] ; then
   export BUILD_IMAGE="tfra/nosla-cuda11.8-cudnn8.7-ubuntu20.04-manylinux2014-python$PY_VERSION"
   export TF_CUDA_VERSION="11.8"
-  export TF_CUDNN_VERSION="8.7"
 elif [[ "$TF_VERSION" =~ ^2\.(12|13)\.[0-9]+$ ]] ; then
   export BUILD_IMAGE="tfra/nosla-cuda11.8-cudnn8.6-ubuntu20.04-manylinux2014-python$PY_VERSION"
   export TF_CUDA_VERSION="11.8"
-  export TF_CUDNN_VERSION="8.6"
 elif [[ "$TF_VERSION" =~ ^2\.([6-9]|10|11)\.[0-9]+$ ]] ; then
   export BUILD_IMAGE="tfra/nosla-cuda11.2-cudnn8-ubuntu20.04-manylinux2014-python$PY_VERSION"
   export TF_CUDA_VERSION="11.2"
-  export TF_CUDNN_VERSION="8.1"
 elif [ $TF_VERSION == "2.4.1" ] ; then
   export BUILD_IMAGE='tfra/nosla-cuda11.0-cudnn8-ubuntu18.04-manylinux2010-multipython'
   export TF_CUDA_VERSION="11.0"
-  export TF_CUDNN_VERSION="8.0"
 elif [ $TF_VERSION == "1.15.2" ] ; then
   export BUILD_IMAGE='tfra/nosla-cuda10.0-cudnn7-ubuntu16.04-manylinux2010-multipython'
   export TF_CUDA_VERSION="10.0"
-  export TF_CUDNN_VERSION="7.6"
 else
   echo "TF_VERSION is invalid: $TF_VERSION!"
   exit 1
@@ -44,7 +37,6 @@ fi
 
 echo "BUILD_IMAGE is $BUILD_IMAGE"
 echo "TF_CUDA_VERSION is $TF_CUDA_VERSION"
-echo "TF_CUDNN_VERSION is $TF_CUDNN_VERSION"
 
 if [ -z $HOROVOD_VERSION ] ; then
   export HOROVOD_VERSION='0.28.1'
@@ -66,7 +58,6 @@ DOCKER_BUILDKIT=1 docker build --no-cache \
     --build-arg TF_NAME \
     --build-arg TF_NEED_CUDA \
     --build-arg TF_CUDA_VERSION \
-    --build-arg TF_CUDNN_VERSION \
     --build-arg HOROVOD_VERSION \
     --build-arg BUILD_IMAGE \
     --build-arg NIGHTLY_FLAG \