diff --git a/backends/vulkan/_passes/tag_memory_meta_pass.py b/backends/vulkan/_passes/tag_memory_meta_pass.py
index 0a6a2d42d44..1d08817e26a 100644
--- a/backends/vulkan/_passes/tag_memory_meta_pass.py
+++ b/backends/vulkan/_passes/tag_memory_meta_pass.py
@@ -23,8 +23,6 @@
 
 from executorch.exir.pass_base import ExportPass, PassResult
 
-from torch._subclasses.fake_tensor import FakeTensor
-
 from torch.fx.passes.tools_common import NodeList
 from torch.fx.passes.utils.fuser_utils import topo_sort
 
@@ -138,9 +136,7 @@ def propose_node_storage(
                 return storage
 
         for arg in node.args:
-            if isinstance(arg, torch.fx.Node) and isinstance(
-                arg.meta["val"], FakeTensor
-            ):
+            if isinstance(arg, torch.fx.Node) and utils.is_tensor_node(arg):
                 storage = utils.get_node_storage_type(arg)
                 if storage is not None and storage in valid_storage_types:
                     return storage
@@ -178,9 +174,7 @@ def propose_node_layout(
                 return layout
 
         for arg in node.args:
-            if isinstance(arg, torch.fx.Node) and isinstance(
-                arg.meta["val"], FakeTensor
-            ):
+            if isinstance(arg, torch.fx.Node) and utils.is_tensor_node(arg):
                 layout = utils.get_node_memory_layout(arg)
                 if layout is not None and layout in valid_layouts:
                     return layout
@@ -202,7 +196,7 @@ def should_annotate(self, node) -> bool:
         if not isinstance(node, torch.fx.Node):
             return False
 
-        if not isinstance(node.meta["val"], FakeTensor):
+        if not utils.is_tensor_node(node):
             return False
 
         # Storage type and memory layout for tensorref will be determined at runtime
@@ -210,6 +204,11 @@ def should_annotate(self, node) -> bool:
         if node.meta.get("vkdg_tensorref", False):
             return False
 
+        # Skip annotating output node. The output tensors should be annotated by the
+        # time the output node is observed.
+        if node.op == "output":
+            return False
+
         return True
 
     def should_delay_annotation(self, node: torch.fx.Node) -> bool:
diff --git a/backends/vulkan/test/test_vulkan_delegate.py b/backends/vulkan/test/test_vulkan_delegate.py
index 54db1a4b778..2816dd10202 100644
--- a/backends/vulkan/test/test_vulkan_delegate.py
+++ b/backends/vulkan/test/test_vulkan_delegate.py
@@ -10,8 +10,6 @@
 import unittest
 from typing import Tuple
 
-import executorch.backends.vulkan.serialization.vulkan_graph_schema as vk_graph_schema
-
 import torch
 
 from executorch.backends.transforms.convert_dtype_pass import I64toI32
@@ -98,7 +96,6 @@ def lower_module_and_test_output(
         rtol=1e-01,
         dynamic_shapes=None,
         test_inputs=None,
-        memory_layouts=None,
         first_output_only=False,
     ):
         """
@@ -107,10 +104,8 @@ def lower_module_and_test_output(
         outputs with the outputs of the eager module.
         """
 
-        def run_test(memory_layout):
-            compile_options = {
-                "memory_layout_override": memory_layout,
-            }
+        def run_test():
+            compile_options = {}
 
             # At least model should run in eager mode.
             model.eval()
@@ -168,16 +163,7 @@ def run_test(memory_layout):
                         first_output_only=first_output_only,
                     )
 
-        memory_layouts_to_test = [
-            vk_graph_schema.VkMemoryLayout.TENSOR_WIDTH_PACKED,
-            vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED,
-        ]
-
-        if memory_layouts is not None:
-            memory_layouts_to_test = memory_layouts
-
-        for memory_layout in memory_layouts_to_test:
-            run_test(memory_layout)
+        run_test()
 
     def test_vulkan_backend_add(self):
         # This test is the simplest test by manually lowering some submodules, we can use paritioner
@@ -549,7 +535,6 @@ def forward(self, x):
             sample_inputs,
             dynamic_shapes=dynamic_shapes,
             test_inputs=test_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
             first_output_only=True,
         )
 
@@ -584,7 +569,6 @@ def forward(self, x):
             sample_inputs,
             dynamic_shapes=dynamic_shapes,
             test_inputs=test_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_abs(self):
@@ -744,7 +728,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             module,
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     @unittest.skip(
@@ -766,7 +749,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             module,
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_conv2d(self):
@@ -793,7 +775,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             conv2d_module,
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_conv_transpose2d(self):
@@ -821,7 +802,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             conv_transpose2d_module,
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_conv2d_dw(self):
@@ -846,7 +826,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             conv2d_module,
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_conv2d_pw(self):
@@ -871,7 +850,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             conv2d_module,
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_conv2d_bias_false(self):
@@ -898,7 +876,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             conv2d_module,
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_conv1d(self):
@@ -925,7 +902,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             conv1d_module,
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_conv1d_bias_false(self):
@@ -949,7 +925,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             conv1d_module,
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_native_layer_norm(self):
@@ -966,7 +941,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             NativeLayerNormModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_batch_norm(self):
@@ -983,7 +957,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             BatchNormModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_full(self):
@@ -1013,19 +986,16 @@ def forward(self, x):
         self.lower_module_and_test_output(
             FullModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
         self.lower_module_and_test_output(
             ZerosModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
         self.lower_module_and_test_output(
             OnesModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_full_like(self):
@@ -1055,19 +1025,16 @@ def forward(self, x):
         self.lower_module_and_test_output(
             FullLikeModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
         self.lower_module_and_test_output(
             ZerosLikeModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
         self.lower_module_and_test_output(
             OnesLikeModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_upsample_nearest2d(self):
@@ -1084,7 +1051,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             UpsampleNearest2d(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_minimum(self):
@@ -1103,7 +1069,6 @@ def forward(self, x, y):
         self.lower_module_and_test_output(
             MinimumModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_reshape(self):
@@ -1119,7 +1084,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             ReshapeModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_view(self):
@@ -1135,7 +1099,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             ViewModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_view_int(self):
@@ -1151,7 +1114,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             ViewModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_unsqueeze(self):
@@ -1169,7 +1131,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             UnsqueezeModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_squeeze(self):
@@ -1185,7 +1146,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             SqueezeModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_select(self):
@@ -1201,7 +1161,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             SelectModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_permute_copy(self):
@@ -1217,7 +1176,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             PermuteModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_permute_copy_int(self):
@@ -1233,7 +1191,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             PermuteModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_cat(self):
@@ -1254,7 +1211,6 @@ def forward(self, x, y, z, w):
         self.lower_module_and_test_output(
             TestModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_cat_with_zero_size(self):
@@ -1275,7 +1231,6 @@ def forward(self, x, y, z, w):
         self.lower_module_and_test_output(
             TestModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_slice(self):
@@ -1291,7 +1246,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             TestModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_split_with_sizes(self):
@@ -1307,7 +1261,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             TestModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_split_tensor(self):
@@ -1323,7 +1276,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             TestModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_clone(self):
@@ -1339,7 +1291,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             TestModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_constant_pad_nd(self):
@@ -1355,7 +1306,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             TestModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_repeat(self):
@@ -1371,7 +1321,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             TestModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_t_default(self):
@@ -1389,7 +1338,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             TestModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     @unittest.skip(
@@ -1411,7 +1359,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             SoftmaxModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     @unittest.skip(
@@ -1433,7 +1380,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             LogSoftmaxModule(),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_gelu(self):
@@ -1467,31 +1413,26 @@ def forward(self, x):
         self.lower_module_and_test_output(
             MeanModule(dims=[-1, -2]),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
         self.lower_module_and_test_output(
             MeanModule(dims=[1]),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
         self.lower_module_and_test_output(
             MeanModule(dims=[0, 1, 2, 3]),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
         self.lower_module_and_test_output(
             MeanModule(dims=[-1, -2], keepdim=False),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
         self.lower_module_and_test_output(
             MeanModule(dims=[1], keepdim=False),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_index_select_int(self):
@@ -1509,7 +1450,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             IndexSelectModule(dim=1, indices=[2, 3, 5, 6, 7]),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_index_select(self):
@@ -1527,7 +1467,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             IndexSelectModule(dim=0, indices=[1, 3, 5, 7, 8, 9, 10, 11, 2, 3]),
             sample_inputs,
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_arange_int(self):
@@ -1556,7 +1495,6 @@ def forward(self, x):
             self.lower_module_and_test_output(
                 ArangeModule(i),
                 (torch.randn(size=(1,), dtype=torch.float32),),  # dummy input
-                memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
             )
 
     def test_vulkan_backend_arange_float(self):
@@ -1578,7 +1516,6 @@ def forward(self, x):
             self.lower_module_and_test_output(
                 ArangeModule(i),
                 (torch.randn(size=(1,), dtype=torch.float32),),  # dummy input
-                memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
             )
 
     def test_vulkan_backend_arange_int64(self):
@@ -1604,12 +1541,10 @@ def forward(self, x):
             self.lower_module_and_test_output(
                 ArangeModule(i),
                 (torch.randn(size=(1,), dtype=torch.float32),),  # dummy input
-                memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
             )
             self.lower_module_and_test_output(
                 ArangeModule(i),
                 (torch.randint(low=-100, high=100, size=(5, 5)),),  # dummy input
-                memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
             )
 
     def test_vulkan_backend_embedding_1d(self):
@@ -1624,7 +1559,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             EmbeddingModule(torch.nn.Embedding(5, 4)),
             (torch.tensor([0, 1, 0, 4, 2, 0]),),
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_embedding_2d(self):
@@ -1639,7 +1573,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             EmbeddingModule(torch.nn.Embedding(5, 4)),
             (torch.tensor([[0, 1, 0], [4, 2, 0]]),),
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_embedding_3d(self):
@@ -1654,7 +1587,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             EmbeddingModule(torch.nn.Embedding(5, 4)),
             (torch.tensor([[[0, 1], [0, 1]], [[4, 2], [3, 3]]]),),
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_flip(self):
@@ -1668,7 +1600,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             FlipModule(),
             (torch.arange(48).reshape(2, 3, 4, 2),),
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_conv_with_clamp(self):
@@ -1704,7 +1635,6 @@ def forward(self, x):
         self.lower_module_and_test_output(
             ConvWithClampModule(),
             (torch.randn(size=(1, 6, 40, 50), dtype=torch.float32),),
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
 
     def test_vulkan_backend_grid_priors(self):
@@ -1722,5 +1652,33 @@ def forward(self, x):
         self.lower_module_and_test_output(
             GridPriorsModule(),
             (torch.rand(size=[1, 5, 2, 3]),),
-            memory_layouts=[vk_graph_schema.VkMemoryLayout.TENSOR_CHANNELS_PACKED],
         )
+
+    # def test_vulkan_backend_conv_with_dim_order(self):
+    #     class Conv2dSequential(torch.nn.Module):
+    #         def __init__(self, bias=True, channel_last=False):
+    #             super().__init__()
+    #             self.first = torch.nn.Conv2d(
+    #                 in_channels=1,
+    #                 out_channels=3,
+    #                 kernel_size=(3, 3),
+    #                 padding=1,
+    #                 bias=bias,
+    #             )
+    #             self.second = torch.nn.Conv2d(
+    #                 in_channels=3,
+    #                 out_channels=2,
+    #                 kernel_size=(3, 3),
+    #                 padding=1,
+    #                 bias=bias,
+    #             )
+
+    #         def forward(self, x):
+    #             x = x.to(memory_format=torch.channels_last)
+    #             return self.second(self.first(x))
+
+    #     self.lower_module_and_test_output(
+    #         Conv2dSequential(),
+    #         (torch.rand(size=[1, 1, 3, 3]),),
+    #
+    #     )
diff --git a/backends/vulkan/utils.py b/backends/vulkan/utils.py
index a6db780309d..1a030e5e8f5 100644
--- a/backends/vulkan/utils.py
+++ b/backends/vulkan/utils.py
@@ -202,7 +202,7 @@ def set_node_spec_attr(node: torch.fx.Node, attr: str, value):
     spec = node.meta["spec"]
     if isinstance(spec, TensorSpec):
         setattr(spec, attr, value)
-    elif isinstance(spec, list) or isinstance(spec, tuple):
+    elif isinstance(spec, (list, tuple)):
         for s in spec:
             assert isinstance(s, TensorSpec)
             setattr(s, attr, value)
@@ -215,9 +215,9 @@ def get_node_spec_attr(node: torch.fx.Node, attr: str, return_first: bool = True
     spec = node.meta["spec"]
     if isinstance(spec, TensorSpec):
         return getattr(spec, attr) if hasattr(spec, attr) else None
-    elif isinstance(spec, list) or isinstance(spec, tuple):
+    elif isinstance(spec, (list, tuple)):
         if return_first:
-            return getattr(spec[0], attr) if hasattr(spec, attr) else None
+            return getattr(spec[0], attr) if hasattr(spec[0], attr) else None
         else:
             return [getattr(s, attr) if hasattr(s, attr) else None for s in spec]
     else: