pytorch
diff --git a/‎.ci/scripts/setup-emscripten.sh‎
Lines changed: 6 additions & 0 deletions b/‎.ci/scripts/setup-emscripten.sh‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎.github/workflows/apple.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/apple.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/apple/coreml/compiler/coreml_preprocess.py‎
Lines changed: 2 additions & 2 deletions b/‎backends/apple/coreml/compiler/coreml_preprocess.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/arm/test/passes/test_decompose_cosine_similarity_pass.py‎
Lines changed: 2 additions & 1 deletion b/‎backends/arm/test/passes/test_decompose_cosine_similarity_pass.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎examples/models/llama/static_attention.py‎
Lines changed: 12 additions & 9 deletions b/‎examples/models/llama/static_attention.py‎
Lines changed: 12 additions & 9 deletions
diff --git a/‎examples/wasm/test_build_wasm.sh‎
Lines changed: 6 additions & 0 deletions b/‎examples/wasm/test_build_wasm.sh‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎exir/tests/test_memory_planning.py‎
Lines changed: 15 additions & 13 deletions b/‎exir/tests/test_memory_planning.py‎
Lines changed: 15 additions & 13 deletions
@@ -1,3 +1,9 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
 
 set -ex
 
 
@@ -149,6 +149,7 @@ jobs:
         VERSION="${{ needs.set-version.outputs.version }}"
         FRAMEWORKS=(
           "executorch"
+          "executorch_llm"
           "backend_coreml"
           "backend_mps"
           "backend_xnnpack"
 
@@ -365,7 +365,7 @@ def preprocess_model(
 
         match model_type:
             case CoreMLBackend.MODEL_TYPE.COMPILED_MODEL:
-                shutil.rmtree(str(model_path.resolve()), ignore_errors=True)
+                shutil.rmtree(str(model_path.resolve()))
                 model_path = model_dir_path / MODEL_PATHS.COMPILED_MODEL.value
                 compiled_model_path = mlmodel.get_compiled_model_path()
                 shutil.move(
@@ -396,7 +396,7 @@ def preprocess_model(
                 for key, value in model_debug_info.debugSymbolToHandles.items()
             }
 
-        shutil.rmtree(str(dir_path.resolve()), ignore_errors=True)
+        shutil.rmtree(str(dir_path.resolve()))
         return PreprocessResult(
             processed_bytes=processed_bytes,
             debug_handle_map=debug_handle_map,
 
@@ -34,7 +34,8 @@ def test_decompose_cosine_similarity_tosa_BI(module):
         "executorch_exir_dialects_edge__ops_aten_mul_Tensor": 5,
         "executorch_exir_dialects_edge__ops_aten_sum_dim_IntList": 3,
         "executorch_exir_dialects_edge__ops_aten_pow_Tensor_Scalar": 2,
-        "executorch_exir_dialects_edge__ops_aten_full_like_default": 1,
+        # TODO(masnesral): uncomment after https://github.com/pytorch/pytorch/pull/144765
+        # "executorch_exir_dialects_edge__ops_aten_full_default": 1,
         "executorch_exir_dialects_edge__ops_aten_maximum_default": 2,
         "executorch_exir_dialects_edge__ops_aten_reciprocal_default": 1,
     }
 
@@ -138,14 +138,16 @@ def update(
 
 
 class StaticAttentionMask:
-    def __init__(self, input_len, cache_len, style, mask_val=float("-inf")):
+    def __init__(
+        self, input_len, cache_len, style, mask_val=float("-inf"), dtype=torch.float32
+    ):
         self.input_len = input_len
         self.cache_len = cache_len
         assert style in ("shift_pointer", "smart_mask")
         self.style = style
         self.mask_val = mask_val
         self.unmasked_len = 0
-        self.tensor = torch.zeros(1, input_len, input_len + cache_len)
+        self.tensor = torch.zeros(1, input_len, input_len + cache_len, dtype=dtype)
         self.reset()
 
     def reset(self):
@@ -200,44 +202,45 @@ def __init__(
         config: ModelArgs,
         input_len: int,
         cache_len: int,
+        dtype=torch.float32,
         style: str = "shift_pointer",
         mask_val: float = float("-inf"),
     ):
         self.mask = StaticAttentionMask(
-            input_len, cache_len, style=style, mask_val=mask_val
+            input_len, cache_len, style=style, mask_val=mask_val, dtype=dtype
         )
 
         rope = Rope(config)
         freqs = rope.get_freqs(None, config.max_seq_len)
-        self.freqs_cos = freqs[0]
-        self.freqs_sin = freqs[1]
+        self.freqs_cos = freqs[0].to(dtype)
+        self.freqs_sin = freqs[1].to(dtype)
 
         split_mha = config.attention_type in ("static", "static_shas")
         if split_mha:
             self.k_caches = {
                 StaticKVCache.calculate_cache_key(layer_id, head_id): torch.zeros(
-                    1, cache_len, config.head_dim
+                    1, cache_len, config.head_dim, dtype=dtype
                 )
                 for layer_id in range(config.n_layers)
                 for head_id in range(config.n_kv_heads)
             }
             self.v_caches = {
                 StaticKVCache.calculate_cache_key(layer_id, head_id): torch.zeros(
-                    1, cache_len, config.head_dim
+                    1, cache_len, config.head_dim, dtype=dtype
                 )
                 for layer_id in range(config.n_layers)
                 for head_id in range(config.n_kv_heads)
             }
         else:
             self.k_caches = {
                 StaticKVCache.calculate_cache_key(layer_id, 0): torch.zeros(
-                    1, config.n_kv_heads, cache_len, config.head_dim
+                    1, config.n_kv_heads, cache_len, config.head_dim, dtype=dtype
                 )
                 for layer_id in range(config.n_layers)
             }
             self.v_caches = {
                 StaticKVCache.calculate_cache_key(layer_id, 0): torch.zeros(
-                    1, config.n_kv_heads, cache_len, config.head_dim
+                    1, config.n_kv_heads, cache_len, config.head_dim, dtype=dtype
                 )
                 for layer_id in range(config.n_layers)
             }
 
@@ -1,3 +1,9 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
 
 set -e
 
 
@@ -868,21 +868,23 @@ def forward(self, input, label):
 
         ep.dump_executorch_program(True)
 
-        # 147 just so happens to be the index of the user_grad output arg of
+        # 149 just so happens to be the index of the user_grad output arg of
         # convolution_backward.out. This is fairly fragile.
         # Check that the None output is not memory planned.
-        self.assertEqual(
-            ep.executorch_program.execution_plan[0]
-            .values[147]
-            .val.data_buffer_idx,  # pyright: ignore
-            0,
-        )
-        self.assertEqual(
-            ep.executorch_program.execution_plan[0]
-            .values[147]
-            .val.allocation_info,  # pyright: ignore
-            None,
-        )
+        # TODO(masnesral): restore after https://github.com/pytorch/pytorch/pull/144765
+        # self.assertEqual(len(ep.executorch_program.execution_plan[0].values), 151)
+        # self.assertEqual(
+        #     ep.executorch_program.execution_plan[0]
+        #     .values[149]
+        #     .val.data_buffer_idx,  # pyright: ignore
+        #     0,
+        # )
+        # self.assertEqual(
+        #     ep.executorch_program.execution_plan[0]
+        #     .values[149]
+        #     .val.allocation_info,  # pyright: ignore
+        #     None,
+        # )
 
 
 def _get_specs(gm: torch.fx.GraphModule) -> set[TensorSpec]: