ROCm
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎BUILD.bazel‎
Lines changed: 3 additions & 0 deletions b/‎BUILD.bazel‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎aten/src/ATen/FunctionalStorageImpl.cpp‎
Lines changed: 5 additions & 11 deletions b/‎aten/src/ATen/FunctionalStorageImpl.cpp‎
Lines changed: 5 additions & 11 deletions
diff --git a/‎aten/src/ATen/FunctionalStorageImpl.h‎
Lines changed: 76 additions & 24 deletions b/‎aten/src/ATen/FunctionalStorageImpl.h‎
Lines changed: 76 additions & 24 deletions
@@ -82,6 +82,7 @@ torch/return_types.pyi
 torch/nn/functional.pyi
 torch/utils/data/datapipes/datapipe.pyi
 torch/csrc/autograd/generated/*
+torch/csrc/functionalization/generated/*
 torch/csrc/lazy/generated/*.[!m]*
 torch_compile_debug/
 # Listed manually because some files in this directory are not generated
 
@@ -91,6 +91,8 @@ generated_cpu_cpp = [
     "aten/src/ATen/NativeMetaFunctions.h",
     "aten/src/ATen/RegistrationDeclarations.h",
     "aten/src/ATen/VmapGeneratedPlumbing.h",
+    "aten/src/ATen/ViewMetaClasses.h",
+    "aten/src/ATen/ViewMetaClasses.cpp",
     "aten/src/ATen/core/aten_interned_strings.h",
     "aten/src/ATen/core/enum_tag.h",
     "aten/src/ATen/core/TensorBody.h",
@@ -1106,6 +1108,7 @@ test_suite(
         "aten/src/ATen/templates/LazyNonNativeIr.h",
         "aten/src/ATen/templates/RegisterDispatchKey.cpp",
         "aten/src/ATen/templates/RegisterDispatchDefinitions.ini",
+        "aten/src/ATen/templates/ViewMetaClassesPythonBinding.cpp",
         "aten/src/ATen/native/native_functions.yaml",
         "aten/src/ATen/native/tags.yaml",
         "aten/src/ATen/native/ts_native_functions.yaml",
 
@@ -9,11 +9,6 @@
 
 namespace at::functionalization {
 
-ViewMeta ViewMeta::to_out_idx(int64_t out_idx) {
-  if (out_idx == this->out_index) return *this;
-  return ViewMeta(forward_fn, reverse_fn, has_symbolic_inputs, is_multi_output, is_as_strided, out_idx);
-}
-
 // Note [Functionalization: Alias Removal Part 2]
 // See Note [Functionalization: Alias Removal] for more details.
 // This function applies a single update from one of the views to the StorageImpl.
@@ -42,22 +37,21 @@ ViewMeta ViewMeta::to_out_idx(int64_t out_idx) {
 static const Tensor apply_update(const FunctionalStorageImpl::Update& update, const Tensor& base) {
   at::Tensor t = update.new_val;
   TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(t));
-  if (update.view_metas.empty()) return t;
+  if (update.view_metas.empty()) { return t; }
 
   std::vector<at::Tensor> tmp_values({base});
   tmp_values.reserve(update.view_metas.size());
   for (size_t i = 0; i < update.view_metas.size() - 1; ++i) {
-    at::Tensor next_view = update.view_metas[i].forward_fn(tmp_values.back(), update.view_metas[i].out_index);
+    at::Tensor next_view = update.view_metas[i]->forward(tmp_values.back());
     // NB: We only actually need tmp_values for ops like select/slice/diagonal/squeeze/as_strided
     // All of these ops require additional information to recover the sizes of the original tensor.
     // If need to, we could probably apply this optimization and only bother computing tmp_values
     // for those necessary view ops.
     tmp_values.push_back(std::move(next_view));
   }
   for(int64_t i = static_cast<int64_t>(update.view_metas.size()) - 1; i >= 0; --i) {
-    int64_t out_idx = update.view_metas[i].out_index;
     // Each view inverse is implemented in ViewInverses.cpp.
-    t = update.view_metas[i].reverse_fn(tmp_values[i], t, out_idx);
+    t = update.view_metas[i]->reverse(tmp_values[i], t);
   }
   TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(t));
   return t;
@@ -111,13 +105,13 @@ FunctionalStorageImpl::FunctionalStorageImpl(const Tensor& base)
   TORCH_INTERNAL_ASSERT(!at::functionalization::impl::isFunctionalTensor(base_));
 }
 
-void FunctionalStorageImpl::add_update(const Tensor& updated_val, const std::vector<ViewMeta>& metas) {
+void FunctionalStorageImpl::add_update(const Tensor& updated_val, const std::vector<std::shared_ptr<ViewMeta>>& metas) {
   TORCH_CHECK(!frozen_, "cannot mutate tensors with frozen storage");
 
   if (metas.size() > 1) {
     for (size_t i = 1; i < metas.size(); ++i) {
       // Skipping this check for XLA. Would be good to add it back, but it is failing XLA CI
-      TORCH_CHECK(updated_val.device().type() == c10::DeviceType::XLA || !metas[i].is_as_strided,
+      TORCH_CHECK(updated_val.device().type() == c10::DeviceType::XLA || !metas[i]->is_as_strided,
 "During torch.compile, encountered a mutation on a view chain of length ", metas.size(), ", where view ", i,
 " was an as_strided() call. as_strided() is non-compositional, and therefore is not possible to functionalize properly today,"
 "so this behavior is banned in compile. As a workaround, you can either remove the mutation from the model code, or you "
 
@@ -8,44 +8,89 @@ namespace at::functionalization {
 
 // See Note [Functionalization Pass In Core]
 
+enum class InverseReturnMode {
+  /// Specifies that functional inverses should always return a view.
+  AlwaysView,
+  /// Specifies that functional inverses should always return a non-view / copy.
+  NeverView,
+  /// Specifies that functional inverses should return a view unless a (copying)
+  /// scatter
+  /// inverse exists, in which case that will be used instead.
+  /// This avoids as_strided() calls that can be difficult for subclasses to
+  /// handle.
+  ViewOrScatterInverse,
+};
+
+#define FUNCTIONALIZATION_VIEWMETA_NAME(TYPE) \
+  static const char* name() {                 \
+    return #TYPE;                             \
+  }
+
+#define FUNCTIONALIZATION_VIEWMETA_SERIALIZABLE_TUPLE(...) \
+  using SerializableTuple = std::tuple<__VA_ARGS__>
+
 // ViewMeta is a class used by the functionalization pass to navigate between
 // a base tensor and a view tensor.
 // For example, if I call `b = a.view1(...)`
-// the functionalization pass will generate and store a ViewMeta on b that looks
-// like:
+// the functionalization pass will generate and store a ViewMeta specialization
+// for `view1` operation on b that looks like:
 //
-// ViewMeta(
-//   [<captures>](const Tensor& base, int64_t mutated_view_idx) {
-//     return base.view1(...);
-//   },
-//   [<captures>](const at::Tensor& base, const at::Tensor& mutated_view,
-//   int64_t mutated_view_idx) -> at::Tensor {
-//     return at::functionalization::impl::view1_inverse(base, mutated_view,
-//     ...);
+// struct TORCH_API view1_ViewMeta : public ViewMeta {
+//   FUNCTIONALIZATION_VIEWMETA_NAME(view1_ViewMeta);
+//   FUNCTIONALIZATION_VIEWMETA_SERIALIZABLE_TUPLE(
+//       bool /* reapply_views */,
+//       const std::vector<int64_t>&);
+//
+//   view1_ViewMeta(const SerializableTuple& tpl)
+//       : view1_ViewMeta(std::get<0>(tpl), std::get<1>(tpl)) {}
+//
+//   view1_ViewMeta(bool reapply_views, const std::vector<int64_t>& size)
+//       : ViewMeta(/*has_symbolic_inputs=*/false),
+//         reapply_views(reapply_views),
+//         size(size) {}
+//
+//   Tensor forward(const Tensor& base) override {
+//       return base.view1(...);
 //   }
 //
-// The forward_fn lambda describes how to replay view1 on a tensor.
+//   Tensor reverse(const Tensor& base, const Tensor& mutated_view) override {
+//       return at::functionalization::impl::view1_inverse(base, mutated_view,
+//       ...);
+//   }
 //
-// The reverse_fn lambda describes how, given a tensor that is already a view,
+//   SerializableTuple to_serializable_tuple() {
+//     return std::make_tuple(reapply_views, size);
+//   }
+//
+//   bool reapply_views;
+//   std::vector<int64_t> size;
+// };
+//
+// The forward function describes how to replay view1 on a tensor.
+//
+// The reverse function describes how, given a tensor that is already a view,
 // how to get the corresponding base tensor. See Note [Functionalization Pass:
 // View Inverses] for details.
+//
+// `SerializedTuple` is a typedef that defines an `std::tuple<...>` type
+// representing the `ViewMeta` instance state. Methods that take in/return such
+// a type are used for supporting pickle serialization.
 struct ViewMeta {
   ViewMeta(
-      std::function<Tensor(const Tensor&, int64_t)> forward,
-      std::function<Tensor(const Tensor&, const Tensor&, int64_t)> reverse,
       bool has_symbolic_inputs,
       bool is_multi_output = false,
       bool is_as_strided = false,
       int64_t out_idx = 0)
-      : forward_fn(std::move(forward)),
-        reverse_fn(std::move(reverse)),
-        out_index(out_idx),
+      : out_index(out_idx),
         is_multi_output(is_multi_output),
         is_as_strided(is_as_strided),
         has_symbolic_inputs(has_symbolic_inputs) {}
 
-  std::function<Tensor(const Tensor&, int64_t)> forward_fn;
-  std::function<Tensor(const Tensor&, const Tensor&, int64_t)> reverse_fn;
+  virtual ~ViewMeta() = default;
+
+  virtual Tensor forward(const Tensor& base) = 0;
+  virtual Tensor reverse(const Tensor& base, const Tensor& mutated_view) = 0;
+
   // See Note [out_idx in ViewMeta]
   int64_t out_index;
 
@@ -57,10 +102,17 @@ struct ViewMeta {
   // Tells us if this view operation has any symbolic inputs
   bool has_symbolic_inputs;
 
-  // Returns a copy of the current ViewMeta, if out_idx matches the current
-  // out_index. Otherwise, returns a new ViewMeta with the same forward/reverse
+  // Returns a new ViewMeta with the same forward/reverse
   // functions, but a new out index.
-  ViewMeta to_out_idx(int64_t out_idx);
+  //
+  // This method should be implemented by those `ViewMeta` that have more than
+  // one output.
+  virtual std::shared_ptr<ViewMeta> to_out_index(int64_t out_index) {
+    TORCH_CHECK_NOT_IMPLEMENTED(
+        false,
+        "ViewMeta::to_out_index not implemented. ",
+        "Likely because there's only one output.");
+  }
 };
 
 // FunctionalStorageImpl is a subclass of StorageImpl used by the
@@ -93,14 +145,14 @@ struct TORCH_API FunctionalStorageImpl : public c10::StorageImpl {
     // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
     const at::Tensor new_val;
     // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
-    const std::vector<ViewMeta> view_metas;
+    const std::vector<std::shared_ptr<ViewMeta>> view_metas;
   };
 
   explicit FunctionalStorageImpl(const Tensor& value);
 
   void add_update(
       const Tensor& updated_val,
-      const std::vector<ViewMeta>& view_metas);
+      const std::vector<std::shared_ptr<ViewMeta>>& view_metas);
   bool apply_updates();
   const Tensor& base() {
     return base_;