diff --git a/backends/aoti/common_shims_slim.h b/backends/aoti/common_shims_slim.h index b8948c0a311..dfcdecd2bc2 100644 --- a/backends/aoti/common_shims_slim.h +++ b/backends/aoti/common_shims_slim.h @@ -18,7 +18,7 @@ // CUDA backend (SlimTensor) and other backends like MPS (ETensor). // The caller determines which path is used by defining CUDA_AVAILABLE. #ifdef CUDA_AVAILABLE -#include +#include #else #include #endif diff --git a/backends/aoti/slim/core/SlimTensor.h b/backends/aoti/slim/core/slim_tensor.h similarity index 98% rename from backends/aoti/slim/core/SlimTensor.h rename to backends/aoti/slim/core/slim_tensor.h index 226776d2bc8..b32186152e2 100644 --- a/backends/aoti/slim/core/SlimTensor.h +++ b/backends/aoti/slim/core/slim_tensor.h @@ -20,9 +20,9 @@ #include #include #include -#include -#include -#include +#include +#include +#include #include namespace executorch::backends::aoti::slim { @@ -568,4 +568,4 @@ class SlimTensor { // Include view operations implementations (must be after SlimTensor class // definition) -#include +#include diff --git a/backends/aoti/slim/core/SlimTensorView-incl.h b/backends/aoti/slim/core/slim_tensor_view_incl.h similarity index 98% rename from backends/aoti/slim/core/SlimTensorView-incl.h rename to backends/aoti/slim/core/slim_tensor_view_incl.h index a437f187299..82b605fb569 100644 --- a/backends/aoti/slim/core/SlimTensorView-incl.h +++ b/backends/aoti/slim/core/slim_tensor_view_incl.h @@ -9,7 +9,7 @@ #pragma once #include -#include +#include namespace executorch::backends::aoti::slim { diff --git a/backends/aoti/slim/core/Storage.h b/backends/aoti/slim/core/storage.h similarity index 98% rename from backends/aoti/slim/core/Storage.h rename to backends/aoti/slim/core/storage.h index 156556aa9e1..0ef1b204484 100644 --- a/backends/aoti/slim/core/Storage.h +++ b/backends/aoti/slim/core/storage.h @@ -17,9 +17,9 @@ #include #include -#include -#include -#include +#include +#include +#include #include #include diff --git a/backends/aoti/slim/core/targets.bzl b/backends/aoti/slim/core/targets.bzl index 408738edd35..735e8b52e1b 100644 --- a/backends/aoti/slim/core/targets.bzl +++ b/backends/aoti/slim/core/targets.bzl @@ -7,7 +7,7 @@ def define_common_targets(): runtime.cxx_library( name = "storage", headers = [ - "Storage.h", + "storage.h", ], visibility = ["@EXECUTORCH_CLIENTS"], exported_deps = [ @@ -25,8 +25,8 @@ def define_common_targets(): runtime.cxx_library( name = "slimtensor", headers = [ - "SlimTensor.h", - "SlimTensorView-incl.h", + "slim_tensor.h", + "slim_tensor_view_incl.h", ], visibility = ["@EXECUTORCH_CLIENTS"], exported_deps = [ diff --git a/backends/aoti/slim/core/test/test_as_strided.cpp b/backends/aoti/slim/core/test/test_as_strided.cpp index f73104b5ba0..13729a0f240 100644 --- a/backends/aoti/slim/core/test/test_as_strided.cpp +++ b/backends/aoti/slim/core/test/test_as_strided.cpp @@ -8,9 +8,9 @@ #include -#include -#include -#include +#include +#include +#include #ifdef CUDA_AVAILABLE #include diff --git a/backends/aoti/slim/core/test/test_permute_reshape.cpp b/backends/aoti/slim/core/test/test_permute_reshape.cpp index 688245d8be7..34c730996e2 100644 --- a/backends/aoti/slim/core/test/test_permute_reshape.cpp +++ b/backends/aoti/slim/core/test/test_permute_reshape.cpp @@ -8,9 +8,9 @@ #include -#include -#include -#include +#include +#include +#include #ifdef CUDA_AVAILABLE #include diff --git a/backends/aoti/slim/core/test/test_slimtensor_basic.cpp b/backends/aoti/slim/core/test/test_slimtensor_basic.cpp index e23178d4de6..0337cca544a 100644 --- a/backends/aoti/slim/core/test/test_slimtensor_basic.cpp +++ b/backends/aoti/slim/core/test/test_slimtensor_basic.cpp @@ -8,8 +8,8 @@ #include -#include -#include +#include +#include namespace executorch::backends::aoti::slim { diff --git a/backends/aoti/slim/core/test/test_slimtensor_copy.cpp b/backends/aoti/slim/core/test/test_slimtensor_copy.cpp index 6d2ed745446..6c48689619d 100644 --- a/backends/aoti/slim/core/test/test_slimtensor_copy.cpp +++ b/backends/aoti/slim/core/test/test_slimtensor_copy.cpp @@ -8,9 +8,9 @@ #include -#include -#include -#include +#include +#include +#include namespace executorch::backends::aoti::slim { diff --git a/backends/aoti/slim/core/test/test_slimtensor_dtypes.cpp b/backends/aoti/slim/core/test/test_slimtensor_dtypes.cpp index 8ecb8d977b7..6883ea5414b 100644 --- a/backends/aoti/slim/core/test/test_slimtensor_dtypes.cpp +++ b/backends/aoti/slim/core/test/test_slimtensor_dtypes.cpp @@ -11,7 +11,7 @@ #include #include -#include +#include namespace executorch::backends::aoti::slim { diff --git a/backends/aoti/slim/core/test/test_storage.cpp b/backends/aoti/slim/core/test/test_storage.cpp index 5d61019aa2b..d641c6be85a 100644 --- a/backends/aoti/slim/core/test/test_storage.cpp +++ b/backends/aoti/slim/core/test/test_storage.cpp @@ -8,7 +8,7 @@ #include -#include +#include #ifdef CUDA_AVAILABLE #include diff --git a/backends/aoti/slim/cuda/guard.h b/backends/aoti/slim/cuda/guard.h new file mode 100644 index 00000000000..f553e0dcc4b --- /dev/null +++ b/backends/aoti/slim/cuda/guard.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#ifdef CUDA_AVAILABLE + +#include +#include + +#include +#include + +namespace executorch::backends::aoti::slim::cuda { + +/** + * CUDAGuard - RAII class that sets the current CUDA device. + * + * This class saves the current CUDA device on construction and restores it + * on destruction, providing exception-safe device switching. + * + * Thread Safety: NOT THREAD-SAFE + * - Must only be used within a single thread + */ +struct CUDAGuard { + /// No default constructor - device must be specified. + CUDAGuard() = delete; + + /// Sets the current CUDA device to the specified device index. + /// @param device_index The CUDA device index to switch to. + explicit CUDAGuard(c10::DeviceIndex device_index) { + set_index(device_index); + } + + /// Sets the current CUDA device to the specified device. + /// @param device The CUDA device to switch to. Must be a CUDA device. + explicit CUDAGuard(c10::Device device) { + ET_CHECK_MSG(device.is_cuda(), "Expected a CUDA device for CUDAGuard"); + set_index(device.index()); + } + + // Copy is not allowed + CUDAGuard(const CUDAGuard&) = delete; + CUDAGuard& operator=(const CUDAGuard&) = delete; + + // Move is not allowed + CUDAGuard(CUDAGuard&& other) = delete; + CUDAGuard& operator=(CUDAGuard&& other) = delete; + + /// Restores the original CUDA device on destruction. + ~CUDAGuard() { + if (original_device_index_ != current_device_index_) { + ET_CUDA_LOG_WARN(cudaSetDevice(original_device_index_)); + } + } + + /// Sets the CUDA device to the given device index. + /// @param device_index The device index to switch to. + void set_index(c10::DeviceIndex device_index) { + int orig_index = -1; + ET_CUDA_CHECK(cudaGetDevice(&orig_index)); + + original_device_index_ = orig_index; + current_device_index_ = device_index; + if (current_device_index_ != original_device_index_) { + ET_CUDA_CHECK(cudaSetDevice(current_device_index_)); + } + } + + private: + c10::DeviceIndex original_device_index_; + c10::DeviceIndex current_device_index_; +}; + +} // namespace executorch::backends::aoti::slim::cuda + +#endif // CUDA_AVAILABLE diff --git a/backends/aoti/slim/factory/Empty.h b/backends/aoti/slim/factory/empty.h similarity index 94% rename from backends/aoti/slim/factory/Empty.h rename to backends/aoti/slim/factory/empty.h index c0ab9d7248d..b48ce223c5b 100644 --- a/backends/aoti/slim/factory/Empty.h +++ b/backends/aoti/slim/factory/empty.h @@ -10,9 +10,9 @@ #include -#include -#include -#include +#include +#include +#include namespace executorch::backends::aoti::slim { diff --git a/backends/aoti/slim/factory/FromBlob.h b/backends/aoti/slim/factory/from_blob.h similarity index 95% rename from backends/aoti/slim/factory/FromBlob.h rename to backends/aoti/slim/factory/from_blob.h index b0c659419e9..4e9fda25abf 100644 --- a/backends/aoti/slim/factory/FromBlob.h +++ b/backends/aoti/slim/factory/from_blob.h @@ -8,9 +8,9 @@ #pragma once -#include -#include -#include +#include +#include +#include namespace executorch::backends::aoti::slim { diff --git a/backends/aoti/slim/factory/targets.bzl b/backends/aoti/slim/factory/targets.bzl index 5b10967e166..b26549c01a2 100644 --- a/backends/aoti/slim/factory/targets.bzl +++ b/backends/aoti/slim/factory/targets.bzl @@ -7,7 +7,7 @@ def define_common_targets(): runtime.cxx_library( name = "empty", headers = [ - "Empty.h", + "empty.h", ], visibility = ["@EXECUTORCH_CLIENTS"], exported_deps = [ @@ -20,7 +20,7 @@ def define_common_targets(): runtime.cxx_library( name = "from_blob", headers = [ - "FromBlob.h", + "from_blob.h", ], visibility = ["@EXECUTORCH_CLIENTS"], exported_deps = [ diff --git a/backends/aoti/slim/factory/test/test_empty.cpp b/backends/aoti/slim/factory/test/test_empty.cpp index 18e7ead14ef..39a39ac0f76 100644 --- a/backends/aoti/slim/factory/test/test_empty.cpp +++ b/backends/aoti/slim/factory/test/test_empty.cpp @@ -8,7 +8,7 @@ #include -#include +#include #ifdef CUDA_AVAILABLE #include diff --git a/backends/aoti/slim/factory/test/test_from_blob.cpp b/backends/aoti/slim/factory/test/test_from_blob.cpp index 16d43d545f3..a0c8f3cc495 100644 --- a/backends/aoti/slim/factory/test/test_from_blob.cpp +++ b/backends/aoti/slim/factory/test/test_from_blob.cpp @@ -8,9 +8,9 @@ #include -#include -#include -#include +#include +#include +#include #ifdef CUDA_AVAILABLE #include diff --git a/backends/aoti/slim/util/ArrayRefUtil.h b/backends/aoti/slim/util/array_ref_util.h similarity index 100% rename from backends/aoti/slim/util/ArrayRefUtil.h rename to backends/aoti/slim/util/array_ref_util.h diff --git a/backends/aoti/slim/util/SharedPtr.h b/backends/aoti/slim/util/shared_ptr.h similarity index 100% rename from backends/aoti/slim/util/SharedPtr.h rename to backends/aoti/slim/util/shared_ptr.h diff --git a/backends/aoti/slim/util/SizeUtil.h b/backends/aoti/slim/util/size_util.h similarity index 99% rename from backends/aoti/slim/util/SizeUtil.h rename to backends/aoti/slim/util/size_util.h index aaa41329b14..a8fa994273d 100644 --- a/backends/aoti/slim/util/SizeUtil.h +++ b/backends/aoti/slim/util/size_util.h @@ -12,7 +12,7 @@ #include #include -#include +#include #include namespace executorch::backends::aoti::slim { diff --git a/backends/aoti/slim/util/targets.bzl b/backends/aoti/slim/util/targets.bzl index 60a7933a861..65930ce1d84 100644 --- a/backends/aoti/slim/util/targets.bzl +++ b/backends/aoti/slim/util/targets.bzl @@ -7,7 +7,7 @@ def define_common_targets(): runtime.cxx_library( name = "shared_ptr", headers = [ - "SharedPtr.h", + "shared_ptr.h", ], visibility = ["@EXECUTORCH_CLIENTS"], exported_deps = [ @@ -19,7 +19,7 @@ def define_common_targets(): runtime.cxx_library( name = "array_ref_util", headers = [ - "ArrayRefUtil.h", + "array_ref_util.h", ], visibility = ["@EXECUTORCH_CLIENTS"], exported_deps = [ @@ -31,7 +31,7 @@ def define_common_targets(): runtime.cxx_library( name = "size_util", headers = [ - "SizeUtil.h", + "size_util.h", ], visibility = ["@EXECUTORCH_CLIENTS"], exported_deps = [ diff --git a/backends/aoti/slim/util/test/test_size_util.cpp b/backends/aoti/slim/util/test/test_size_util.cpp index fd1deed1909..95412a05c92 100644 --- a/backends/aoti/slim/util/test/test_size_util.cpp +++ b/backends/aoti/slim/util/test/test_size_util.cpp @@ -8,7 +8,7 @@ #include -#include +#include namespace executorch::backends::aoti::slim { diff --git a/backends/aoti/tests/test_common_shims_slim.cpp b/backends/aoti/tests/test_common_shims_slim.cpp index ca744565955..94319c6f94d 100644 --- a/backends/aoti/tests/test_common_shims_slim.cpp +++ b/backends/aoti/tests/test_common_shims_slim.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include diff --git a/backends/cuda/runtime/shims/memory_slim.cpp b/backends/cuda/runtime/shims/memory_slim.cpp index 93fd884958c..58bf43b34b0 100644 --- a/backends/cuda/runtime/shims/memory_slim.cpp +++ b/backends/cuda/runtime/shims/memory_slim.cpp @@ -8,9 +8,9 @@ #include -#include -#include -#include +#include +#include +#include #include namespace executorch::backends::cuda { diff --git a/backends/cuda/runtime/shims/memory_slim.h b/backends/cuda/runtime/shims/memory_slim.h index ec8b8db14f8..5a0845f243c 100644 --- a/backends/cuda/runtime/shims/memory_slim.h +++ b/backends/cuda/runtime/shims/memory_slim.h @@ -11,8 +11,8 @@ #include #include -#include -#include +#include +#include #include namespace executorch::backends::cuda {