Revert "[ROCm] change preferred blas lib defaults (pytorch#150249)" (pytorch#150658)

atalman · web-flow · commit 06c6a81a987e · 2025-04-03T20:39:27.000-04:00
This reverts commit 8b6bc59.
diff --git a/aten/src/ATen/BlasBackend.h b/aten/src/ATen/BlasBackend.h
@@ -7,12 +7,10 @@
 
 namespace at {
 
-enum class BlasBackend : int8_t { Default, Cublas, Cublaslt, Ck };
+enum class BlasBackend : int8_t { Cublas, Cublaslt, Ck };
 
 inline std::string BlasBackendToString(at::BlasBackend backend) {
   switch (backend) {
-    case BlasBackend::Default:
-      return "at::BlasBackend::Default";
     case BlasBackend::Cublas:
       return "at::BlasBackend::Cublas";
     case BlasBackend::Cublaslt:
diff --git a/aten/src/ATen/Context.cpp b/aten/src/ATen/Context.cpp
@@ -326,34 +326,7 @@ void Context::setLinalgPreferredBackend(at::LinalgBackend b) {
 }
 
 at::BlasBackend Context::blasPreferredBackend() {
-  // Rather than put logic for interpreting what Default means at every
-  // call site for blasPreferredBackend(), we set it to an actual value.
-  if (blas_preferred_backend == at::BlasBackend::Default) {
-    blas_preferred_backend = at::BlasBackend::Cublas;
 #ifdef USE_ROCM
-    // AMD Instinct targets prefer hipblaslt
-    static const bool hipblaslt_preferred = []() {
-      static const std::vector<std::string> archs = {
-          "gfx90a", "gfx942",
-#if ROCM_VERSION >= 60500
-          "gfx950"
-#endif
-      };
-      for (auto index: c10::irange(detail::getCUDAHooks().deviceCount())) {
-        if (!detail::getCUDAHooks().isGPUArch(index, archs)) {
-          return false;
-        }
-      }
-      return true;
-    }();
-    if (hipblaslt_preferred) {
-      blas_preferred_backend = at::BlasBackend::Cublaslt;
-    }
-#endif
-  }
-
-#ifdef USE_ROCM
-  // hipblaslt support for all archs is not as complete as hipblas
   if (blas_preferred_backend == at::BlasBackend::Cublaslt) {
     static const bool hipblaslt_unsupported = []() {
       static const std::vector<std::string> archs = {
@@ -365,7 +338,7 @@ at::BlasBackend Context::blasPreferredBackend() {
           "gfx950"
 #endif
       };
-      for (auto index: c10::irange(detail::getCUDAHooks().deviceCount())) {
+      for (auto index: c10::irange(getNumGPUs())) {
         if (!detail::getCUDAHooks().isGPUArch(index, archs)) {
           TORCH_WARN_ONCE(
             "Attempting to use hipBLASLt on an unsupported architecture! "
@@ -392,7 +365,7 @@ void Context::setBlasPreferredBackend(at::BlasBackend b) {
       "Cannot set preferred backend to cuBLASLt if PyTorch has not been compiled with cuBLASLt.");
   TORCH_CHECK((b != at::BlasBackend::Ck) || hasROCM(),
       "Cannot set preferred backend to Ck if PyTorch has not been compiled for ROCm.");
-  if (b != at::BlasBackend::Default && b != at::BlasBackend::Cublas) {
+  if (b != at::BlasBackend::Cublas) {
     TORCH_WARN_ONCE(
       "torch.backends.cuda.preferred_blas_library is an experimental feature. "
       "If you see any error or unexpected behavior when this flag is set "
@@ -418,7 +391,7 @@ void Context::setROCmFAPreferredBackend(at::ROCmFABackend b) {
       static const std::vector<std::string> archs = {
           "gfx90a",  "gfx942"
       };
-      for (auto index: c10::irange(detail::getCUDAHooks().deviceCount())) {
+      for (auto index: c10::irange(getNumGPUs())) {
         if (!detail::getCUDAHooks().isGPUArch(index, archs)) {
           TORCH_WARN_ONCE(
             "Attempting to use CK on an unsupported architecture! Cannot set backend to CK");
diff --git a/aten/src/ATen/Context.h b/aten/src/ATen/Context.h
@@ -446,15 +446,17 @@ class TORCH_API Context {
   bool allow_tf32_onednn = false;
   bool enabled_nnpack = true;
   at::LinalgBackend linalg_preferred_backend =
-      (c10::utils::check_env("TORCH_LINALG_PREFER_CUSOLVER") == true ||
-       c10::utils::check_env("TORCH_LINALG_PREFER_HIPSOLVER") == true) // alias
+      c10::utils::check_env("TORCH_LINALG_PREFER_CUSOLVER") == true
       ? at::LinalgBackend::Cusolver
       : at::LinalgBackend::Default;
   at::BlasBackend blas_preferred_backend =
-      (c10::utils::check_env("TORCH_BLAS_PREFER_CUBLASLT") == true ||
-       c10::utils::check_env("TORCH_BLAS_PREFER_HIPBLASLT") == true) // alias
+#ifdef USE_ROCM
+      (c10::utils::check_env("TORCH_BLAS_PREFER_HIPBLASLT") != false)
+#else
+      (c10::utils::check_env("TORCH_BLAS_PREFER_CUBLASLT") == true)
+#endif
       ? at::BlasBackend::Cublaslt
-      : at::BlasBackend::Default;
+      : at::BlasBackend::Cublas;
   at::ROCmFABackend rocm_fa_preferred_backend =
       c10::utils::check_env("TORCH_ROCM_FA_PREFER_CK") == true
       ? at::ROCmFABackend::Ck
diff --git a/test/test_cuda.py b/test/test_cuda.py
@@ -586,64 +586,6 @@ def test_serialization_array_with_storage(self):
         q_copy[1].fill_(10)
         self.assertEqual(q_copy[3], torch.cuda.IntStorage(10).fill_(10))
 
-    @setBlasBackendsToDefaultFinally
-    def test_preferred_blas_library_settings(self):
-        def _check_default():
-            default = torch.backends.cuda.preferred_blas_library()
-            if torch.version.cuda:
-                # CUDA logic is easy, it's always cublas
-                self.assertTrue(default == torch._C._BlasBackend.Cublas)
-            else:
-                # ROCm logic is less so, it's cublaslt for some Instinct, cublas for all else
-                gcn_arch = str(
-                    torch.cuda.get_device_properties(0).gcnArchName.split(":", 1)[0]
-                )
-                if gcn_arch in ["gfx90a", "gfx942", "gfx950"]:
-                    self.assertTrue(default == torch._C._BlasBackend.Cublaslt)
-                else:
-                    self.assertTrue(default == torch._C._BlasBackend.Cublas)
-
-        _check_default()
-        # "Default" can be set but is immediately reset internally to the actual default value.
-        self.assertTrue(
-            torch.backends.cuda.preferred_blas_library("default")
-            != torch._C._BlasBackend.Default
-        )
-        _check_default()
-        self.assertTrue(
-            torch.backends.cuda.preferred_blas_library("cublas")
-            == torch._C._BlasBackend.Cublas
-        )
-        self.assertTrue(
-            torch.backends.cuda.preferred_blas_library("hipblas")
-            == torch._C._BlasBackend.Cublas
-        )
-        # check bad strings
-        with self.assertRaisesRegex(
-            RuntimeError,
-            "Unknown input value. Choose from: default, cublas, hipblas, cublaslt, hipblaslt, ck.",
-        ):
-            torch.backends.cuda.preferred_blas_library("unknown")
-        # check bad input type
-        with self.assertRaisesRegex(RuntimeError, "Unknown input value type."):
-            torch.backends.cuda.preferred_blas_library(1.0)
-        # check env var override
-        custom_envs = [
-            {"TORCH_BLAS_PREFER_CUBLASLT": "1"},
-            {"TORCH_BLAS_PREFER_HIPBLASLT": "1"},
-        ]
-        test_script = "import torch;print(torch.backends.cuda.preferred_blas_library())"
-        for env_config in custom_envs:
-            env = os.environ.copy()
-            for key, value in env_config.items():
-                env[key] = value
-            r = (
-                subprocess.check_output([sys.executable, "-c", test_script], env=env)
-                .decode("ascii")
-                .strip()
-            )
-            self.assertEqual("_BlasBackend.Cublaslt", r)
-
     @unittest.skipIf(TEST_CUDAMALLOCASYNC, "temporarily disabled for async")
     @setBlasBackendsToDefaultFinally
     def test_cublas_workspace_explicit_allocation(self):
diff --git a/torch/_C/__init__.pyi.in b/torch/_C/__init__.pyi.in
@@ -1309,7 +1309,6 @@ def _get_blas_preferred_backend() -> torch._C._BlasBackend: ...
 def _set_blas_preferred_backend(arg: torch._C._BlasBackend): ...
 
 class _BlasBackend:
-    Default: _BlasBackend
     Cublas: _BlasBackend
     Cublaslt: _BlasBackend
     Ck: _BlasBackend
diff --git a/torch/backends/cuda/__init__.py b/torch/backends/cuda/__init__.py
@@ -218,9 +218,7 @@ def preferred_linalg_library(
 
 
 _BlasBackends = {
-    "default": torch._C._BlasBackend.Default,
     "cublas": torch._C._BlasBackend.Cublas,
-    "hipblas": torch._C._BlasBackend.Cublas,  # alias
     "cublaslt": torch._C._BlasBackend.Cublaslt,
     "hipblaslt": torch._C._BlasBackend.Cublaslt,  # alias
     "ck": torch._C._BlasBackend.Ck,
@@ -243,7 +241,6 @@ def preferred_blas_library(
     * If `"cublas"` is set then cuBLAS will be used wherever possible.
     * If `"cublaslt"` is set then cuBLASLt will be used wherever possible.
     * If `"ck"` is set then CK will be used wherever possible.
-    * If `"default"` (the default) is set then heuristics will be used to pick between the other options.
     * When no input is given, this function returns the currently preferred library.
     * User may use the environment variable TORCH_BLAS_PREFER_CUBLASLT=1 to set the preferred library to cuBLASLt
       globally.
diff --git a/torch/csrc/Module.cpp b/torch/csrc/Module.cpp
@@ -2243,7 +2243,6 @@ Call this whenever a new thread is created in order to propagate values from
   });
 
   py::enum_<at::BlasBackend>(py_module, "_BlasBackend")
-      .value("Default", at::BlasBackend::Default)
       .value("Cublas", at::BlasBackend::Cublas)
       .value("Cublaslt", at::BlasBackend::Cublaslt)
       .value("Ck", at::BlasBackend::Ck);