[rocm6.5_internal_testing] MIOpen: Get current device from Torch rather than HIP in handle creation (pytorch#154549) (#2223)

apakbin · jeffdaily · jithunnair-amd · commit 08da4c9387b3 · 2025-06-07T07:56:36.000-05:00
(This is a cherry-pick of pytorch#154549) Get current device from Torch rather than HIP in MIOpen handle creation. The device may have already been set from torch side, otherwise device is set to 0 for handle. Additional audits of cudnn vs miopen Handle.cpp file. Pull Request resolved: pytorch#154549 Approved by: https://github.com/jeffdaily, https://github.com/cyyever Co-authored-by: Jeff Daily <jeff.daily@amd.com>
diff --git a/aten/src/ATen/miopen/Handle.cpp b/aten/src/ATen/miopen/Handle.cpp
@@ -1,40 +1,45 @@
-#include <ATen/miopen/Exceptions.h>
-#include <ATen/miopen/Handle.h>
 #include <ATen/hip/detail/DeviceThreadHandles.h>
+#include <ATen/miopen/Handle.h>
 #include <c10/hip/HIPStream.h>
 
-namespace at { namespace native {
+#include <ATen/hip/Exceptions.h>
+#include <ATen/miopen/Exceptions.h>
+
+namespace at::native {
 namespace {
 
 void createMIOpenHandle(miopenHandle_t *handle) {
   MIOPEN_CHECK(miopenCreate(handle));
 }
 
 void destroyMIOpenHandle(miopenHandle_t handle) {
-// this is because of something dumb in the ordering of
-// destruction. Sometimes atexit, the cuda context (or something)
-// would already be destroyed by the time this gets destroyed. It
-// happens in fbcode setting. @colesbury and I decided to not destroy
-// the handle as a workaround.
-//   - @soumith
-//
-// Further note: this is now disabled globally, because we are seeing
-// the same issue as mentioned above in CUDA 11 CI.
-//   - @zasdfgbnm
-//
-// #ifdef NO_MIOPEN_DESTROY_HANDLE
-// #else
-//   miopenDestroy(handle);
-// #endif
+  // this is because of something dumb in the ordering of
+  // destruction. Sometimes atexit, the cuda context (or something)
+  // would already be destroyed by the time this gets destroyed. It
+  // happens in fbcode setting. @colesbury and I decided to not destroy
+  // the handle as a workaround.
+  //   - @soumith
+  //
+  // Further note: this is now disabled globally, because we are seeing
+  // the same issue as mentioned above in CUDA 11 CI.
+  //   - @zasdfgbnm
+  //
+  // #ifdef NO_MIOPEN_DESTROY_HANDLE
+  // #else
+  //   miopenDestroy(handle);
+  // #endif
 }
 
-using MIOpenPoolType = at::cuda::DeviceThreadHandlePool<miopenHandle_t, createMIOpenHandle, destroyMIOpenHandle>;
+using MIOpenPoolType = at::cuda::DeviceThreadHandlePool<
+    miopenHandle_t,
+    createMIOpenHandle,
+    destroyMIOpenHandle>;
 
 } // namespace
 
 miopenHandle_t getMiopenHandle() {
-  int device;
-  HIP_CHECK(hipGetDevice(&device));
+  c10::DeviceIndex device = 0;
+  AT_CUDA_CHECK(c10::hip::GetDevice(&device));
 
   // Thread local PoolWindows are lazily-initialized
   // to avoid initialization issues that caused hangs on Windows.
@@ -46,8 +51,8 @@ miopenHandle_t getMiopenHandle() {
       pool->newPoolWindow());
 
   auto handle = myPoolWindow->reserve(device);
-  MIOPEN_CHECK(miopenSetStream(handle, at::hip::getCurrentHIPStream()));
+  MIOPEN_CHECK(miopenSetStream(handle, c10::hip::getCurrentHIPStream()));
   return handle;
 }
 
-}} // namespace at::native
+} // namespace at::native