diff --git a/backends/vulkan/op_registry.py b/backends/vulkan/op_registry.py
index a6cc59e26f0..a711f81b738 100644
--- a/backends/vulkan/op_registry.py
+++ b/backends/vulkan/op_registry.py
@@ -489,10 +489,8 @@ def register_rotary_emb_op():
 
 @update_features(
     [
-        exir_ops.edge.aten.clone.default,
         exir_ops.edge.aten.permute.default,
         exir_ops.edge.aten.permute_copy.default,
-        exir_ops.edge.aten.view_copy.default,
     ]
 )
 def register_view_ops():
@@ -502,6 +500,21 @@ def register_view_ops():
     )
 
 
+@update_features(
+    [
+        exir_ops.edge.aten.view_copy.default,
+        exir_ops.edge.aten.squeeze_copy.dims,
+        exir_ops.edge.aten.unsqueeze_copy.default,
+        exir_ops.edge.aten.clone.default,
+    ]
+)
+def register_view_ops_with_buffer_meta():
+    return OpFeatures(
+        inputs_storage=utils.ANY_STORAGE,
+        supports_resize=True,
+    )
+
+
 # Fully featured transfer operators (i.e. operators that copy data from the input
 # tensor(s) to the output tensor(s)), which have memory layout agnostic implementations
 # for both texture and buffer storage types.
@@ -562,9 +575,6 @@ def register_ported_op():
 # Ops ported from PyTorch Vulkan backend. These ops are in a separate registry because they support all packed dimensions
 @update_features(
     [
-        # Shape Manipulation
-        exir_ops.edge.aten.squeeze_copy.dims,
-        exir_ops.edge.aten.unsqueeze_copy.default,
         # Tensor combination
         exir_ops.edge.aten.repeat.default,
         exir_ops.edge.aten.split_with_sizes_copy.default,
diff --git a/backends/vulkan/runtime/graph/ops/glsl/view_buffer.glsl b/backends/vulkan/runtime/graph/ops/glsl/view_buffer.glsl
new file mode 100644
index 00000000000..2c02803a9b1
--- /dev/null
+++ b/backends/vulkan/runtime/graph/ops/glsl/view_buffer.glsl
@@ -0,0 +1,44 @@
+#version 450 core
+
+#define PRECISION ${PRECISION}
+
+#define T ${buffer_scalar_type(DTYPE)}
+
+${define_required_extensions(DTYPE)}
+
+layout(std430) buffer;
+
+#include "indexing.glslh"
+
+${layout_declare_tensor(B, "w", "t_outp", DTYPE, STORAGE)}
+${layout_declare_tensor(B, "r", "t_inp", DTYPE, STORAGE)}
+
+${layout_declare_ubo(B, "BufferMetadata", "outp")}
+${layout_declare_ubo(B, "BufferMetadata", "inp")}
+
+layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
+
+/*
+ * The insight behind the view operation is that the contiguous index of each
+ * tensor element in the input and output tensors are the same.
+ */
+void main() {
+  const uint outp_bufi = gl_GlobalInvocationID.x;
+  if (outp_bufi >= numel(outp)) {
+    return;
+  }
+
+  TensorIndex outp_tidx;
+  linear_idx_to_tensor_idx(outp, outp_bufi, outp_tidx);
+
+  // To map the output to the input, find the input element that has the same
+  // contiguous index as the output element.
+  const uint contig_idx = tensor_idx_to_contiguous_idx(outp, outp_tidx);
+
+  TensorIndex inp_tidx;
+  contiguous_idx_to_tensor_idx(inp, contig_idx, inp_tidx);
+
+  const uint inp_bufi = tensor_idx_to_linear_idx(inp, inp_tidx);
+
+  t_outp[outp_bufi] = t_inp[inp_bufi];
+}
diff --git a/backends/vulkan/runtime/graph/ops/glsl/view_buffer.yaml b/backends/vulkan/runtime/graph/ops/glsl/view_buffer.yaml
new file mode 100644
index 00000000000..ec92bf483c8
--- /dev/null
+++ b/backends/vulkan/runtime/graph/ops/glsl/view_buffer.yaml
@@ -0,0 +1,20 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+view_buffer:
+  parameter_names_with_default_values:
+    DTYPE: float
+    STORAGE: buffer
+  generate_variant_forall:
+    DTYPE:
+      - VALUE: half
+      - VALUE: float
+      - VALUE: double
+      - VALUE: int8
+      - VALUE: uint8
+      - VALUE: int32
+  shader_variants:
+    - NAME: view_buffer
diff --git a/backends/vulkan/runtime/graph/ops/impl/Clone.cpp b/backends/vulkan/runtime/graph/ops/impl/Clone.cpp
index 04e74af4e0c..0ae9d53a481 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Clone.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Clone.cpp
@@ -143,7 +143,11 @@ void clone(ComputeGraph& graph, const std::vector<ValueRef>& args) {
   if (src_storage == utils::kBuffer && dst_storage == utils::kTexture3D) {
     return add_buffer_to_image_node(graph, src, dst);
   }
-  VK_THROW("Buffer to buffer memory layout transition not supported yet!");
+
+  std::vector<ValueRef> extra_args = {};
+  // Buffer to buffer copy
+  return add_view_copy_buffer_node(
+      graph, src, dst, extra_args, resize_clone_node);
 }
 
 // Clone node is not the most efficient implementation for the aten.clone
diff --git a/backends/vulkan/runtime/graph/ops/impl/Squeeze.cpp b/backends/vulkan/runtime/graph/ops/impl/Squeeze.cpp
index 249f5e7fa6b..13801b45cc7 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Squeeze.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Squeeze.cpp
@@ -10,6 +10,7 @@
 
 #include <executorch/backends/vulkan/runtime/graph/ops/impl/Clone.h>
 #include <executorch/backends/vulkan/runtime/graph/ops/impl/Permute.h>
+#include <executorch/backends/vulkan/runtime/graph/ops/impl/View.h>
 #include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.h>
 #include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>
 
@@ -55,8 +56,49 @@ void add_squeeze_copy_dims_node(
   }
 }
 
+void resize_squeeze_node(
+    ComputeGraph* graph,
+    const std::vector<ArgGroup>& args,
+    const std::vector<ValueRef>& extra_args) {
+  const ValueRef out = args.at(0).refs.at(0);
+  const ValueRef in = args.at(1).refs.at(0);
+  const ValueRef dims_ref = extra_args.at(0);
+
+  const IntListPtr dims = graph->get_int_list(dims_ref);
+
+  std::vector<int64_t> out_sizes = graph->sizes_of(in);
+
+  // Remove the dimensions specified in dims if their size is 1
+  for (int64_t dim : *dims) {
+    if (dim >= 0 && dim < static_cast<int64_t>(out_sizes.size()) &&
+        out_sizes[dim] == 1) {
+      out_sizes.erase(out_sizes.begin() + dim);
+      // After erasing, all subsequent dims shift left by one
+      // So we need to decrement all subsequent dims in dims
+      for (auto& d : *dims) {
+        if (d > dim) {
+          --d;
+        }
+      }
+    }
+  }
+
+  graph->virtual_resize(out, out_sizes);
+}
+
 void squeeze_copy_dims(ComputeGraph& graph, const std::vector<ValueRef>& args) {
-  return add_squeeze_copy_dims_node(graph, args[0], args[1], args[2]);
+  int idx = 0;
+  const ValueRef in = args.at(idx++);
+  const ValueRef dims = args.at(idx++);
+  const ValueRef out = args.at(idx++);
+
+  std::vector<ValueRef> resize_args = {dims};
+
+  if (graph.is_buffer_storage(in)) {
+    return add_view_copy_buffer_node(
+        graph, in, out, resize_args, resize_squeeze_node);
+  }
+  return add_squeeze_copy_dims_node(graph, in, dims, out);
 }
 
 REGISTER_OPERATORS {
diff --git a/backends/vulkan/runtime/graph/ops/impl/Unsqueeze.cpp b/backends/vulkan/runtime/graph/ops/impl/Unsqueeze.cpp
index c4de5d88f30..0a98f6d8f43 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Unsqueeze.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Unsqueeze.cpp
@@ -9,6 +9,7 @@
 #include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h>
 
 #include <executorch/backends/vulkan/runtime/graph/ops/impl/Permute.h>
+#include <executorch/backends/vulkan/runtime/graph/ops/impl/View.h>
 #include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.h>
 #include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>
 
@@ -45,8 +46,42 @@ void add_unsqueeze_node(
   add_permute_node(graph, in, permute_dims_ref, out);
 }
 
+void resize_unsqueeze_node(
+    ComputeGraph* graph,
+    const std::vector<ArgGroup>& args,
+    const std::vector<ValueRef>& extra_args) {
+  const ValueRef out = args.at(0).refs.at(0);
+  const ValueRef in = args.at(1).refs.at(0);
+  const ValueRef dims_ref = extra_args.at(0);
+
+  const IntListPtr dims = graph->get_int_list(dims_ref);
+
+  std::vector<int64_t> out_sizes = graph->sizes_of(in);
+
+  // Insert singleton dimensions at the specified positions
+  for (auto dim : *dims) {
+    int64_t d = dim;
+    if (d < 0) {
+      d += static_cast<int64_t>(out_sizes.size()) + 1;
+    }
+    out_sizes.insert(out_sizes.begin() + d, 1);
+  }
+
+  graph->virtual_resize(out, out_sizes);
+}
+
 void unsqueeze(ComputeGraph& graph, const std::vector<ValueRef>& args) {
-  return add_unsqueeze_node(graph, args[0], args[1], args[2]);
+  int idx = 0;
+  const ValueRef in = args.at(idx++);
+  const ValueRef dims = args.at(idx++);
+  const ValueRef out = args.at(idx++);
+
+  std::vector<ValueRef> resize_args = {dims};
+  if (graph.is_buffer_storage(in)) {
+    return add_view_copy_buffer_node(
+        graph, in, out, resize_args, resize_unsqueeze_node);
+  }
+  return add_unsqueeze_node(graph, in, dims, out);
 }
 
 REGISTER_OPERATORS {
diff --git a/backends/vulkan/runtime/graph/ops/impl/View.cpp b/backends/vulkan/runtime/graph/ops/impl/View.cpp
index cb868acf7e9..8701a6246b0 100644
--- a/backends/vulkan/runtime/graph/ops/impl/View.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/View.cpp
@@ -89,8 +89,47 @@ void add_view_node(
       resize_view_node));
 }
 
+void add_view_copy_buffer_node(
+    ComputeGraph& graph,
+    ValueRef in,
+    ValueRef out,
+    const std::vector<ValueRef>& resize_args,
+    const ExecuteNode::ResizeFunction& resize_fn) {
+  std::string kernel_name = "view_buffer";
+  add_dtype_suffix(kernel_name, graph.dtype_of(out));
+
+  graph.execute_nodes().emplace_back(new DynamicDispatchNode(
+      graph,
+      VK_KERNEL_FROM_STR(kernel_name),
+      default_pick_global_wg_size,
+      default_pick_local_wg_size,
+      // Inputs and Outputs
+      {{out, vkapi::kWrite}, {in, vkapi::kRead}},
+      // Parameter Buffers
+      {graph.buffer_meta_ubo(out), graph.buffer_meta_ubo(in)},
+      // Push Constants
+      {},
+      // Specialization Constants
+      {},
+      // Resize Args
+      resize_args,
+      // Resizing Logic
+      resize_fn));
+}
+
 void view(ComputeGraph& graph, const std::vector<ValueRef>& args) {
-  return add_view_node(graph, args[0], args[1], args[2]);
+  int idx = 0;
+  const ValueRef in = args.at(idx++);
+  const ValueRef sizes = args.at(idx++);
+  const ValueRef out = args.at(idx++);
+
+  std::vector<ValueRef> resize_args = {sizes};
+
+  if (graph.is_buffer_storage(out)) {
+    return add_view_copy_buffer_node(
+        graph, in, out, resize_args, resize_view_node);
+  }
+  return add_view_node(graph, in, sizes, out);
 }
 
 REGISTER_OPERATORS {
diff --git a/backends/vulkan/runtime/graph/ops/impl/View.h b/backends/vulkan/runtime/graph/ops/impl/View.h
index a2038d184c3..7a7a8d57742 100644
--- a/backends/vulkan/runtime/graph/ops/impl/View.h
+++ b/backends/vulkan/runtime/graph/ops/impl/View.h
@@ -12,6 +12,18 @@
 
 namespace vkcompute {
 
+/*
+ * Dispatches the view_copy compute shader. This can be used to implement ops
+ * that preserve the "contiguous" indexes of elements between the input and
+ * output such as view_copy, squeeze_copy, unsqueeze_copy, etc.
+ */
+void add_view_copy_buffer_node(
+    ComputeGraph& graph,
+    ValueRef in,
+    ValueRef out,
+    const std::vector<ValueRef>& resize_args,
+    const ExecuteNode::ResizeFunction& resize_fn);
+
 void add_view_node(
     ComputeGraph& graph,
     ValueRef in,
diff --git a/backends/vulkan/test/op_tests/cases.py b/backends/vulkan/test/op_tests/cases.py
index f03b9a50737..e04ad80aa86 100644
--- a/backends/vulkan/test/op_tests/cases.py
+++ b/backends/vulkan/test/op_tests/cases.py
@@ -911,7 +911,28 @@ def get_view_inputs():
         "utils::kHeightPacked",
         "utils::kChannelsPacked",
     ]
-    return test_suite
+
+    highdim_test_suite = VkTestSuite(
+        [
+            ((1, 1, 3, 3, 3), (9, 3)),
+            ((2, 3, 4, 6, 5, 4), (6, 4, 6, 5, 4)),
+            ((2, 3, 3, 7, 8), (2, 3, 3, 8 * 7)),
+        ]
+    )
+    highdim_test_suite.storage_types = [
+        "utils::kBuffer",
+    ]
+    highdim_test_suite.test_name_suffix = "highdim"
+    highdim_test_suite.data_gen = "make_seq_tensor"
+
+    for suite in [test_suite, highdim_test_suite]:
+        suite.layouts = [
+            # "utils::kWidthPacked",
+            "utils::kHeightPacked",
+            "utils::kChannelsPacked",
+        ]
+
+    return [test_suite, highdim_test_suite]
 
 
 @register_test_suite("aten.slice_copy.Tensor")
@@ -1124,12 +1145,34 @@ def get_unsqueeze_inputs():
             ((1, 10), -1),
         ]
     )
-    test_suite.layouts = [
-        "utils::kWidthPacked",
-        "utils::kChannelsPacked",
+
+    highdim_test_suite = VkTestSuite(
+        [
+            ((2, 3, 4, 5, 6), 0),
+            ((2, 3, 4, 5, 6), 1),
+            ((2, 3, 4, 5, 6), 5),
+            ((2, 3, 4, 5, 6), -1),
+            ((2, 3, 4, 5, 6), -2),
+            ((1, 2, 3, 4, 5), 0),
+            ((1, 2, 3, 4, 5), 3),
+            ((1, 2, 3, 4, 5), -1),
+            ((2, 3, 4, 5), 0),
+            ((1, 2, 3, 4), 1),
+        ]
+    )
+    highdim_test_suite.storage_types = [
+        "utils::kBuffer",
     ]
-    test_suite.data_gen = "make_seq_tensor"
-    return test_suite
+    highdim_test_suite.test_name_suffix = "highdim"
+
+    for suite in [test_suite, highdim_test_suite]:
+        suite.layouts = [
+            "utils::kWidthPacked",
+            "utils::kChannelsPacked",
+        ]
+        suite.data_gen = "make_seq_tensor"
+
+    return [test_suite, highdim_test_suite]
 
 
 @register_test_suite("aten.clone.default")
@@ -1149,11 +1192,28 @@ def get_clone_inputs():
             ((XS,),),
         ]
     )
-    test_suite.layouts = [
-        "utils::kChannelsPacked",
+
+    highdim_test_suite = VkTestSuite(
+        [
+            ((2, 3, 4, 5, 6),),
+            ((2, 3, 4, 5, 1),),
+            ((1, 1, 3, 4, 5),),
+            ((2, 3, 4, 5, 6, 7),),
+            ((1, 2, 3, 4, 5, 6),),
+        ]
+    )
+    highdim_test_suite.storage_types = [
+        "utils::kBuffer",
     ]
-    test_suite.data_gen = "make_seq_tensor"
-    return test_suite
+    highdim_test_suite.test_name_suffix = "highdim"
+
+    for suite in [test_suite, highdim_test_suite]:
+        suite.layouts = [
+            "utils::kChannelsPacked",
+        ]
+        suite.data_gen = "make_seq_tensor"
+
+    return [test_suite, highdim_test_suite]
 
 
 @register_test_suite("aten.repeat.default")
@@ -1773,7 +1833,31 @@ def get_squeeze_copy_dim_inputs():
             ([1, M1, M1], 0),
         ]
     )
-    return test_suite
+
+    highdim_test_suite = VkTestSuite(
+        [
+            ([1, 2, 3, 4, 5, 1], 0),
+            ([1, 2, 3, 4, 5, 1], 5),
+            ([1, 2, 3, 4, 5, 1], [0, 5]),
+            ([2, 1, 3, 1, 5, 6], 1),
+            ([2, 1, 3, 1, 5, 6], 3),
+            ([2, 1, 3, 1, 5, 6], [1, 3]),
+            ([1, 1, 3, 4, 5, 6], [0, 1]),
+            ([2, 3, 4, 1, 1, 6], [3, 4]),
+        ]
+    )
+    highdim_test_suite.storage_types = [
+        "utils::kBuffer",
+    ]
+    highdim_test_suite.test_name_suffix = "highdim"
+
+    for suite in [test_suite, highdim_test_suite]:
+        suite.layouts = [
+            "utils::kWidthPacked",
+            "utils::kChannelsPacked",
+        ]
+
+    return [test_suite, highdim_test_suite]
 
 
 @register_test_suite("aten.flip.default")
diff --git a/backends/vulkan/test/test_vulkan_delegate.py b/backends/vulkan/test/test_vulkan_delegate.py
index 33536acb662..687a8761c6b 100644
--- a/backends/vulkan/test/test_vulkan_delegate.py
+++ b/backends/vulkan/test/test_vulkan_delegate.py
@@ -1777,20 +1777,6 @@ def forward(self, x):
             (torch.rand(size=[1, 5, 2, 3]),),
         )
 
-    def test_vulkan_backend_high_dim_tensors_fail(self):
-        class UnsqueezeHigherDim(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
-            def forward(self, x):
-                return torch.unsqueeze(x, 2)
-
-        self.lower_module_and_test_output(
-            UnsqueezeHigherDim(),
-            (torch.ones(size=[5, 4, 1, 2, 6]),),
-            expect_no_delegates=True,
-        )
-
     def test_vulkan_backend_large_linear_layer(self):
         class LinearModel(torch.nn.Module):
             def __init__(self, large_out_channels: int) -> None:
@@ -2298,6 +2284,28 @@ def forward(self, x1, x2, x3, x4, x5, x6):
             test_inputs=test_inputs,
         )
 
+    def test_vulkan_backend_high_dimensional_tensors(self):
+        class HighDimTensorModule(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+
+            def forward(self, x, y):
+                # Unsqueeze inputs twice to create 5-dim tensors
+                x_5d = torch.unsqueeze(torch.unsqueeze(x, 0), 0)
+                y_5d = torch.unsqueeze(torch.unsqueeze(y, 0), 0)
+                # Add tensors together
+                result = x_5d + y_5d
+                return result
+
+        high_dim_module = HighDimTensorModule()
+        # Create 2 4-dim inputs
+        sample_inputs = (
+            torch.rand(size=(2, 3, 4, 5), dtype=torch.float32),
+            torch.rand(size=(2, 3, 4, 5), dtype=torch.float32),
+        )
+
+        self.lower_module_and_test_output(high_dim_module, sample_inputs)
+
     def test_vulkan_backend_torchao_wo_quantized_linear(self):
         in_features = 1024
         out_features = 512