Revert "Add utility to get computed kernel in torch.library (pytorch#158393)"

pragupta · pragupta · commit d1c2fe44db16 · 2025-09-24T21:34:56.000Z
This reverts commit 1196bb1. (cherry picked from commit a975dfe)
diff --git a/aten/src/ATen/core/boxing/KernelFunction.h b/aten/src/ATen/core/boxing/KernelFunction.h
@@ -6,8 +6,6 @@
 #include <c10/core/DispatchKeySet.h>
 #include <c10/util/TypeList.h>
 #include <c10/util/intrusive_ptr.h>
-#include <atomic>
-#include <memory>
 #include <type_traits>
 
 namespace c10 {
@@ -19,9 +17,6 @@ class OperatorHandle;
 struct OperatorKernel;
 class KernelFunction;
 
-class KernelToken;
-class SafeKernelFunction;
-
 template <typename T>
 using has_symint = std::disjunction<
     std::is_same<c10::SymInt, T>,
@@ -95,12 +90,6 @@ class TORCH_API KernelFunction final {
       BoxedKernel::BoxedKernelFunction_withDispatchKeys;
 
   KernelFunction();
-  ~KernelFunction();
-
-  KernelFunction(const KernelFunction&) = default;
-  KernelFunction& operator=(const KernelFunction&) = default;
-
-  KernelFunction(KernelFunction&&) noexcept = default;
 
   // Fast path for dispatch to allow not touching the boxed kernel in
   // the common case where unboxed is available.
@@ -273,13 +262,6 @@ class TORCH_API KernelFunction final {
   // For testing internal invariants only
   bool _equalsBoxedAndUnboxed(const KernelFunction&) const;
 
-  // Register a token to be invalidated when this KernelFunction is destroyed
-  void registerToken(std::weak_ptr<KernelToken> token) const;
-
-  // List of tokens that need to be invalidated when this KernelFunction is
-  // destroyed
-  mutable std::vector<std::weak_ptr<KernelToken>> tokens_;
-
  private:
   explicit KernelFunction(
       std::unique_ptr<OperatorKernel> functor,
@@ -296,47 +278,6 @@ class TORCH_API KernelFunction final {
   void* sym_unboxed_kernel_func_;
 };
 
-// Token held by SafeKernelFunction that gets invalidated when KernelFunction is
-// destroyed
-class KernelToken {
- public:
-  bool isValid() const;
-  void invalidate();
-
- private:
-  std::atomic<bool> invalid_{false};
-};
-
-class SafeKernelFunction {
- public:
-  SafeKernelFunction(
-      const KernelFunction* kernel,
-      std::string debug,
-      std::shared_ptr<OperatorHandle> opHandle);
-
-  // Safe callBoxed - checks token validity first
-  void callBoxed(
-      const OperatorHandle& opHandle,
-      DispatchKeySet dispatchKeySet,
-      Stack* stack) const;
-
-  // Get debug information
-  const std::string& debug() const {
-    return debug_;
-  }
-
-  // Get the OpHandle that lives on this SafeKernelFunction
-  const OperatorHandle& opHandle() const {
-    return *opHandle_;
-  }
-
- private:
-  KernelFunction kernel_;
-  std::shared_ptr<KernelToken> token_;
-  std::string debug_;
-  std::shared_ptr<OperatorHandle> opHandle_;
-};
-
 } // namespace c10
 
 #include <ATen/core/boxing/KernelFunction_impl.h>
diff --git a/aten/src/ATen/core/boxing/KernelFunction_impl.h b/aten/src/ATen/core/boxing/KernelFunction_impl.h
@@ -24,14 +24,6 @@ inline KernelFunction::KernelFunction()
       unboxed_kernel_func_(nullptr),
       sym_unboxed_kernel_func_(nullptr) {}
 
-inline KernelFunction::~KernelFunction() {
-  for (auto& weak_token : tokens_) {
-    if (auto token = weak_token.lock()) {
-      token->invalidate();
-    }
-  }
-}
-
 inline KernelFunction::KernelFunction(
     std::unique_ptr<OperatorKernel> functor,
     InternalBoxedKernelFunction* boxed_kernel_func,
@@ -165,11 +157,6 @@ C10_ALWAYS_INLINE Return KernelFunction::call(
       std::forward<Args>(args)...);
 }
 
-inline void KernelFunction::registerToken(
-    std::weak_ptr<KernelToken> token) const {
-  tokens_.push_back(std::move(token));
-}
-
 inline KernelFunction KernelFunction::makeFromBoxedKernel(
     BoxedKernel boxed_fn) {
   return KernelFunction(
@@ -330,38 +317,4 @@ KernelFunction::makeFromUnboxedLambda(Lambda&& lambda) {
           std::forward<Lambda>(lambda)));
 }
 
-inline bool KernelToken::isValid() const {
-  return !invalid_.load(std::memory_order_acquire);
-}
-
-inline void KernelToken::invalidate() {
-  invalid_.store(true, std::memory_order_release);
-}
-
-inline SafeKernelFunction::SafeKernelFunction(
-    const KernelFunction* kernel,
-    std::string debug,
-    std::shared_ptr<OperatorHandle> opHandle)
-    : kernel_(kernel ? *kernel : KernelFunction()),
-      token_(std::make_shared<KernelToken>()),
-      debug_(std::move(debug)),
-      opHandle_(std::move(opHandle)) {
-  // Register the token with the original kernel so it gets invalidated when the
-  // kernel is destroyed
-  if (kernel) {
-    kernel->registerToken(token_);
-  }
-}
-
-inline void SafeKernelFunction::callBoxed(
-    const OperatorHandle& opHandle,
-    DispatchKeySet dispatchKeySet,
-    Stack* stack) const {
-  TORCH_CHECK(
-      token_ && token_->isValid(),
-      "SafeKernelFunction has been invalidated ",
-      debug_);
-  kernel_.callBoxed(opHandle, dispatchKeySet, stack);
-}
-
 } // namespace c10
diff --git a/aten/src/ATen/core/dispatch/Dispatcher.h b/aten/src/ATen/core/dispatch/Dispatcher.h
@@ -487,10 +487,6 @@ class TORCH_API OperatorHandle {
     return operatorDef_->op.hasComputedKernelForDispatchKey(k);
   }
 
-  SafeKernelFunction getComputedKernelForDispatchKey(DispatchKey k) const {
-    return operatorDef_->op.getComputedKernelForDispatchKey(k);
-  }
-
   std::string dumpComputedTable() const {
     return operatorDef_->op.dumpComputedTable();
   }
diff --git a/aten/src/ATen/core/dispatch/OperatorEntry.cpp b/aten/src/ATen/core/dispatch/OperatorEntry.cpp
@@ -315,42 +315,6 @@ const AnnotatedKernel* OperatorEntry::getKernelForDispatchKey(DispatchKey dispat
   return nullptr;
 }
 
-SafeKernelFunction OperatorEntry::getComputedKernelForDispatchKey(
-    DispatchKey k) const {
-  TORCH_CHECK(
-      !isAliasDispatchKey(k),
-      "Alias keys do not have runtime kernel registrations.");
-  const auto dispatch_ix = getDispatchTableIndexForDispatchKey(k);
-  TORCH_CHECK(
-      dispatchTable_[dispatch_ix].isValid(),
-      "no kernel for ",
-      k,
-      " for ",
-      name_);
-
-  // Get the KernelFunction object from kernels_ to pass to SafeKernelFunction
-
-  // The KernelFunction object in dispatchTable_ is a copy of the KernelFunction
-  // in the AnnotatedKernel in kernels_. A KernelFunction is only truly
-  // deregistered when the kernel is removed from kernels_. However, the
-  // KernelFunction in dispatchTable_ might be removed before it is deregistered
-  // (when a newer kernel is registered). Therefore, here we want to return a
-  // SafeKernelFunction that is backed by the original KernelFunction in
-  // kernels_, so that we only invalidate it when the kernel is deregistered.
-  auto [annotatedKernel, _] =
-      computeDispatchTableEntryWithDebug(c10::Dispatcher::singleton(), k);
-
-  // Use findSchemaOrThrow to get OpHandle for the OperatorEntry
-  auto& dispatcher = c10::Dispatcher::singleton();
-  auto opHandle = dispatcher.findSchemaOrThrow(
-      name_.name.c_str(), name_.overload_name.c_str());
-
-  return SafeKernelFunction(
-      &annotatedKernel.kernel,
-      annotatedKernel.debug,
-      std::make_shared<OperatorHandle>(opHandle));
-}
-
 const std::vector<at::Tag>& OperatorEntry::getTags() const {
   #if defined C10_MOBILE
     TORCH_CHECK(false, "tags are not saved for Mobile");
diff --git a/aten/src/ATen/core/dispatch/OperatorEntry.h b/aten/src/ATen/core/dispatch/OperatorEntry.h
@@ -217,8 +217,6 @@ class TORCH_API OperatorEntry final {
   const KernelFunction& kernelForDispatchKey(DispatchKey k) const;
   // Returns true if the "computed table" has an entry for a particular key.
   bool hasComputedKernelForDispatchKey(DispatchKey k) const;
-  // Returns a KernelFunction corresponding to the kernel in dispatchTable
-  SafeKernelFunction getComputedKernelForDispatchKey(DispatchKey k) const;
   // Returns all the operator tags added at the time of registration
   const std::vector<at::Tag>& getTags() const;
   void setReportErrorCallback_(std::unique_ptr<c10::SafePyObject> callback);
diff --git a/docs/source/library.md b/docs/source/library.md
@@ -56,7 +56,6 @@ via PyTorch's C++ operator registration APIs).
 .. autofunction:: infer_schema
 .. autoclass:: torch._library.custom_ops.CustomOpDef
    :members: set_kernel_enabled
-.. autofunction:: get_kernel
 ```
 
 ## Low-level APIs
diff --git a/test/test_custom_ops.py b/test/test_custom_ops.py
@@ -11,7 +11,6 @@
 import tempfile
 import typing
 import unittest
-from functools import partial
 from pathlib import Path
 from typing import *  # noqa: F403
 
@@ -4157,148 +4156,6 @@ def test_any_output_is_alias_to_input_or_output(self):
             )
         )
 
-    def test_library_get_kernel(self):
-        """Test registering a custom kernel, using it, then deregistering and verifying error."""
-
-        # Register a dummy kernel for arange to the CPU key that returns a tensor of ones
-        def dummy_arange_cpu(
-            dispatch_keys,
-            start,
-            end,
-            dtype=None,
-            layout=torch.strided,
-            device=None,
-            pin_memory=False,
-        ):
-            size = max(0, int(end - start))
-            return torch.ones(size, dtype=dtype, device=device)
-
-        with torch.library._scoped_library("aten", "IMPL") as lib:
-            lib.impl("arange.start", dummy_arange_cpu, "CPU", with_keyset=True)
-
-            kernel = torch.library.get_kernel("aten::arange.start", "CPU")
-            dispatch_keys = torch._C.DispatchKeySet(torch._C.DispatchKey.CPU)
-            result = kernel.call_boxed(dispatch_keys, 0, 5)
-
-            self.assertEqual(result, torch.ones(5))
-
-        # The kernel should now be invalidated after exiting the scoped_library context
-        with self.assertRaisesRegex(RuntimeError, "has been invalidated"):
-            kernel.call_boxed(dispatch_keys, 0, 5)
-
-    def test_library_get_kernel_with_conditional_dispatch(self):
-        """Test registering a custom kernel with conditional dispatch logic."""
-
-        def conditional_arange_cpu1(
-            original_kernel,
-            dispatch_keys,
-            start,
-            end,
-            dtype=None,
-            layout=torch.strided,
-            device=None,
-            pin_memory=False,
-        ):
-            # If end is even, use the original kernel, otherwise return ones tensor
-            if end % 2 == 0:
-                op_handle = torch.ops.aten.arange.start._handle
-                return original_kernel.call_boxed(
-                    dispatch_keys,
-                    start,
-                    end,
-                    dtype=dtype,
-                    layout=layout,
-                    device=device,
-                    pin_memory=pin_memory,
-                )
-            else:
-                size = max(0, int(end - start))
-                return torch.ones(size, dtype=dtype, device=device)
-
-        def conditional_arange_cpu2(
-            original_kernel,
-            dispatch_keys,
-            start,
-            end,
-            dtype=None,
-            layout=torch.strided,
-            device=None,
-            pin_memory=False,
-        ):
-            # If start is even, use the original kernel, otherwise return twos tensor
-            if start % 2 == 0:
-                op_handle = torch.ops.aten.arange.start._handle
-                return original_kernel.call_boxed(
-                    dispatch_keys,
-                    start,
-                    end,
-                    dtype=dtype,
-                    layout=layout,
-                    device=device,
-                    pin_memory=pin_memory,
-                )
-            else:
-                size = max(0, int(end - start))
-                return torch.empty(size, dtype=dtype, device=device).fill_(2)
-
-        original_kernel = torch.library.get_kernel("aten::arange.start", "CPU")
-        expected_result1, expected_result2 = torch.ones(5), torch.arange(0, 6)
-        expected_result3, expected_result4, expected_result5 = (
-            torch.ones(5),
-            torch.arange(0, 6),
-            torch.ones(5).fill_(2),
-        )
-
-        with torch.library._scoped_library("aten", "IMPL") as lib2:
-            with torch.library._scoped_library("aten", "IMPL") as lib1:
-                lib1.impl(
-                    "arange.start",
-                    partial(conditional_arange_cpu1, original_kernel),
-                    "CPU",
-                    with_keyset=True,
-                )
-
-                self.assertEqual(torch.arange(0, 5), expected_result1)
-                self.assertEqual(torch.arange(0, 6), expected_result2)
-                new_original_kernel = torch.library.get_kernel(
-                    "aten::arange.start", "CPU"
-                )
-                lib2.impl(
-                    "arange.start",
-                    partial(conditional_arange_cpu2, new_original_kernel),
-                    "CPU",
-                    allow_override=True,
-                    with_keyset=True,
-                )
-
-                self.assertEqual(torch.arange(0, 5), expected_result3)
-                self.assertEqual(torch.arange(0, 6), expected_result4)
-                self.assertEqual(torch.arange(1, 6), expected_result5)
-
-            # The kernel should now be invalidated after destroying lib1
-            with self.assertRaisesRegex(RuntimeError, "has been invalidated"):
-                torch.arange(0, 5)
-
-            # Should still work after destroying lib1
-            self.assertEqual(torch.arange(1, 6), expected_result5)
-
-    def test_library_get_kernel_invalid(self):
-        """Test that get_kernel raises an error when no kernel is available."""
-        with torch.library._scoped_library("test_invalid_kernel", "DEF") as lib:
-            lib.define("cpu_only_op(Tensor x) -> Tensor")
-            lib.impl("cpu_only_op", lambda x: x * 2, "CPU")
-
-            cpu_kernel = torch.library.get_kernel(
-                "test_invalid_kernel::cpu_only_op", "CPU"
-            )
-            self.assertIsNotNone(cpu_kernel)
-
-            # CUDA should fail at the isValid() check since no CUDA kernel exists
-            with self.assertRaisesRegex(
-                RuntimeError, "no kernel for CUDA for test_invalid_kernel::cpu_only_op"
-            ):
-                torch.library.get_kernel("test_invalid_kernel::cpu_only_op", "CUDA")
-
 
 class MiniOpTestOther(CustomOpTestCaseBase):
     test_ns = "mini_op_test"
diff --git a/torch/_C/__init__.pyi.in b/torch/_C/__init__.pyi.in
diff --git a/torch/csrc/utils/python_dispatch.cpp b/torch/csrc/utils/python_dispatch.cpp
diff --git a/torch/library.py b/torch/library.py

Original file line number	Diff line number	Diff line change
`@@ -487,10 +487,6 @@ class TORCH_API OperatorHandle {`
`487`	`487`	`return operatorDef_->op.hasComputedKernelForDispatchKey(k);`
`488`	`488`	`}`
`489`	`489`
`490`		`- SafeKernelFunction getComputedKernelForDispatchKey(DispatchKey k) const {`
`491`		`- return operatorDef_->op.getComputedKernelForDispatchKey(k);`
`492`		`- }`
`493`		`-`
`494`	`490`	`std::string dumpComputedTable() const {`
`495`	`491`	`return operatorDef_->op.dumpComputedTable();`
`496`	`492`	`}`