[cuDNN][SDPA][Convolution] Expose cuDNN runtime version in CUDA hooks (pytorch#167327)

pytorchbot · eqy · web-flow · commit 5811a8d7da87 · 2025-11-07T17:04:27.000-05:00
[cuDNN][SDPA][Convolution] Expose cuDNN runtime version in CUDA hooks (pytorch#167111) cuDNN dispatching heuristics rely on versions checks but currently only that compile-time version is exposed, if we want to allow users to resolve pytorch#166643 on their end by updating their cuDNN version locally we need to check the runtime version rather than compile-time version. Pull Request resolved: pytorch#167111 Approved by: https://github.com/Skylion007 (cherry picked from commit e678450) Co-authored-by: Eddie Yan <eddiey@nvidia.com>
diff --git a/aten/src/ATen/Context.h b/aten/src/ATen/Context.h
@@ -155,6 +155,12 @@ class TORCH_API Context {
   static long versionCuDNN() {
     return detail::getCUDAHooks().versionCuDNN();
   }
+  static long versionRuntimeCuDNN() {
+    return detail::getCUDAHooks().versionRuntimeCuDNN();
+  }
+  static long versionCuDNNFrontend() {
+    return detail::getCUDAHooks().versionCuDNNFrontend();
+  }
   static bool hasCuSOLVER() {
     return detail::getCUDAHooks().hasCuSOLVER();
   }
diff --git a/aten/src/ATen/cuda/detail/CUDAHooks.cpp b/aten/src/ATen/cuda/detail/CUDAHooks.cpp
@@ -21,6 +21,7 @@
 
 #if AT_CUDNN_ENABLED()
 #include <ATen/cudnn/cudnn-wrapper.h>
+#include <cudnn_frontend.h>
 #endif
 
 #if AT_MAGMA_ENABLED()
@@ -325,6 +326,26 @@ long CUDAHooks::versionCuDNN() const {
 #endif
 }
 
+long CUDAHooks::versionRuntimeCuDNN() const {
+#if AT_CUDNN_ENABLED()
+#ifndef USE_STATIC_CUDNN
+  return cudnnGetVersion();
+#else
+  return CUDNN_VERSION;
+#endif
+#else
+  TORCH_CHECK(false, "Cannot query CuDNN version if ATen_cuda is not built with CuDNN");
+#endif
+}
+
+long CUDAHooks::versionCuDNNFrontend() const {
+#if AT_CUDNN_ENABLED()
+  return CUDNN_FRONTEND_VERSION;
+#else
+  TORCH_CHECK(false, "Cannot query CuDNN Frontend version if ATen_cuda is not built with CuDNN");
+#endif
+}
+
 long CUDAHooks::versionMIOpen() const {
 #if AT_ROCM_ENABLED()
   return MIOPEN_VERSION_MAJOR * 10000 +
diff --git a/aten/src/ATen/cuda/detail/CUDAHooks.h b/aten/src/ATen/cuda/detail/CUDAHooks.h
@@ -48,6 +48,8 @@ struct CUDAHooks : public at::CUDAHooksInterface {
   bool hasCUDART() const override;
   long versionCUDART() const override;
   long versionCuDNN() const override;
+  long versionRuntimeCuDNN() const override;
+  long versionCuDNNFrontend() const override;
   long versionMIOpen() const override;
   std::string showConfig() const override;
   double batchnormMinEpsilonCuDNN() const override;
diff --git a/aten/src/ATen/detail/CUDAHooksInterface.h b/aten/src/ATen/detail/CUDAHooksInterface.h
@@ -170,6 +170,14 @@ struct TORCH_API CUDAHooksInterface : AcceleratorHooksInterface {
     TORCH_CHECK(false, "Cannot query cuDNN version without ATen_cuda library. ", CUDA_HELP);
   }
 
+  virtual long versionRuntimeCuDNN() const {
+    TORCH_CHECK(false, "Cannot query cuDNN version without ATen_cuda library. ", CUDA_HELP);
+  }
+
+  virtual long versionCuDNNFrontend() const {
+    TORCH_CHECK(false, "Cannot query cuDNN Frontend version without ATen_cuda library. ", CUDA_HELP);
+  }
+
   virtual long versionMIOpen() const {
     TORCH_CHECK(false, "Cannot query MIOpen version without ATen_cuda library. ", CUDA_HELP);
   }
diff --git a/aten/src/ATen/native/Convolution.cpp b/aten/src/ATen/native/Convolution.cpp
@@ -413,7 +413,7 @@ struct ConvParams {
     if (!detail::getCUDAHooks().compiledWithCuDNN() || !input.is_cuda() || !cudnn_enabled) {
       return false;
     }
-    static long cudnn_version = detail::getCUDAHooks().versionCuDNN();
+    static long cudnn_version = detail::getCUDAHooks().versionRuntimeCuDNN();
     // broken on cuDNN 9.8 - 9.14
     if (cudnn_version >= 90800 && cudnn_version < 91500) {
       if (cudnn_conv_suggest_memory_format(input, weight) == at::MemoryFormat::Contiguous &&
@@ -457,7 +457,7 @@ struct ConvParams {
     }
     // native kernel doesn't support 64-bit non-splittable case
     if (!(canUse32BitIndexMath(input) && canUse32BitIndexMath(weight))) {
-      static long cudnn_version = detail::getCUDAHooks().compiledWithCuDNN() ? detail::getCUDAHooks().versionCuDNN() : -1;
+      static long cudnn_version = detail::getCUDAHooks().compiledWithCuDNN() ? detail::getCUDAHooks().versionRuntimeCuDNN() : -1;
       // TODO(eqy): remove this once cuDNN fixes 64-bit depthwise support, first broken in 9.11x
       if (cudnn_conv_suggest_memory_format(input, weight) != at::MemoryFormat::Contiguous) {
         if (cudnn_version < 0 || cudnn_version > 91000) {
diff --git a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp
@@ -437,7 +437,7 @@ bool check_cudnn_tensor_shapes(sdp_params const& params, bool debug) {
   const auto s_k = params.key.sym_size(2);
   const auto d_qk = params.query.sym_size(3);
   const auto d_v = params.value.sym_size(3);
-  long cudnn_version = at::detail::getCUDAHooks().versionCuDNN();
+  long cudnn_version = at::detail::getCUDAHooks().versionRuntimeCuDNN();
   if (cudnn_version < 8903) {
     if (debug) {
       TORCH_WARN("SDPA fprop requires cudnn 8.9.3 or higher");
@@ -668,7 +668,7 @@ bool can_use_cudnn_attention(const sdp_params& params, bool debug) {
   return false;
 #endif
 #if defined(CUDNN_VERSION)
-  static auto cudnn_version = cudnnGetVersion();
+  static auto cudnn_version = at::detail::getCUDAHooks().versionRuntimeCuDNN();
   if (params.dropout > 0.0 && cudnn_version > 91100 && cudnn_version < 91400) {
     if (debug) {
       TORCH_WARN(CUDNN_VERSION, " cuDNN version does not support droppout in SDPA (9.11 - 9.13).");
diff --git a/torch/csrc/cuda/shared/cudnn.cpp b/torch/csrc/cuda/shared/cudnn.cpp
@@ -2,6 +2,7 @@
 // This file should only be compiled if this condition holds, so it should be
 // safe.
 #if defined(USE_CUDNN) || defined(USE_ROCM)
+#include <ATen/detail/CUDAHooksInterface.h>
 #include <torch/csrc/utils/pybind.h>
 
 #include <tuple>
@@ -32,11 +33,7 @@ version_tuple getRuntimeVersion() {
 }
 
 size_t getVersionInt() {
-#ifndef USE_STATIC_CUDNN
-  return cudnnGetVersion();
-#else
-  return CUDNN_VERSION;
-#endif
+  return at::detail::getCUDAHooks().versionRuntimeCuDNN();
 }
 
 } // namespace

Original file line number	Diff line number	Diff line change
`@@ -155,6 +155,12 @@ class TORCH_API Context {`
`155`	`155`	`static long versionCuDNN() {`
`156`	`156`	`return detail::getCUDAHooks().versionCuDNN();`
`157`	`157`	`}`
	`158`	`+ static long versionRuntimeCuDNN() {`
	`159`	`+ return detail::getCUDAHooks().versionRuntimeCuDNN();`
	`160`	`+ }`
	`161`	`+ static long versionCuDNNFrontend() {`
	`162`	`+ return detail::getCUDAHooks().versionCuDNNFrontend();`
	`163`	`+ }`
`158`	`164`	`static bool hasCuSOLVER() {`
`159`	`165`	`return detail::getCUDAHooks().hasCuSOLVER();`
`160`	`166`	`}`
Original file line number	Diff line number	Diff line change
`@@ -170,6 +170,14 @@ struct TORCH_API CUDAHooksInterface : AcceleratorHooksInterface {`
`170`	`170`	`TORCH_CHECK(false, "Cannot query cuDNN version without ATen_cuda library. ", CUDA_HELP);`
`171`	`171`	`}`
`172`	`172`
	`173`	`+ virtual long versionRuntimeCuDNN() const {`
	`174`	`+ TORCH_CHECK(false, "Cannot query cuDNN version without ATen_cuda library. ", CUDA_HELP);`
	`175`	`+ }`
	`176`	`+`
	`177`	`+ virtual long versionCuDNNFrontend() const {`
	`178`	`+ TORCH_CHECK(false, "Cannot query cuDNN Frontend version without ATen_cuda library. ", CUDA_HELP);`
	`179`	`+ }`
	`180`	`+`
`173`	`181`	`virtual long versionMIOpen() const {`
`174`	`182`	`TORCH_CHECK(false, "Cannot query MIOpen version without ATen_cuda library. ", CUDA_HELP);`
`175`	`183`	`}`