pytorch
diff --git a/‎.Package.swift/executorch_llm/dummy.swift‎
Lines changed: 1 addition & 0 deletions b/‎.Package.swift/executorch_llm/dummy.swift‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.Package.swift/executorch_llm_debug/dummy.swift‎
Lines changed: 1 addition & 0 deletions b/‎.Package.swift/executorch_llm_debug/dummy.swift‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/scripts/setup-emscripten.sh‎
Lines changed: 6 additions & 0 deletions b/‎.ci/scripts/setup-emscripten.sh‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎.github/workflows/apple.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/apple.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Package.swift‎
Lines changed: 5 additions & 0 deletions b/‎Package.swift‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎backends/apple/coreml/compiler/coreml_preprocess.py‎
Lines changed: 2 additions & 2 deletions b/‎backends/apple/coreml/compiler/coreml_preprocess.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/arm/test/passes/test_decompose_cosine_similarity_pass.py‎
Lines changed: 2 additions & 1 deletion b/‎backends/arm/test/passes/test_decompose_cosine_similarity_pass.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎devtools/bundled_program/CMakeLists.txt‎
Lines changed: 14 additions & 19 deletions b/‎devtools/bundled_program/CMakeLists.txt‎
Lines changed: 14 additions & 19 deletions
diff --git a/‎examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj‎
Lines changed: 1 addition & 1 deletion b/‎examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/models/llama/static_attention.py‎
Lines changed: 12 additions & 9 deletions b/‎examples/models/llama/static_attention.py‎
Lines changed: 12 additions & 9 deletions
@@ -0,0 +1 @@
+
@@ -0,0 +1 @@
+
@@ -1,3 +1,9 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
 
 set -ex
 
 
@@ -149,6 +149,7 @@ jobs:
         VERSION="${{ needs.set-version.outputs.version }}"
         FRAMEWORKS=(
           "executorch"
+          "executorch_llm"
           "backend_coreml"
           "backend_mps"
           "backend_xnnpack"
 
@@ -69,6 +69,11 @@ let products = deliverables([
       "c++",
     ],
   ],
+  "executorch_llm": [
+    "targets": [
+      "executorch",
+    ],
+  ],
   "kernels_llm": [:],
   "kernels_optimized": [
     "frameworks": [
 
@@ -365,7 +365,7 @@ def preprocess_model(
 
         match model_type:
             case CoreMLBackend.MODEL_TYPE.COMPILED_MODEL:
-                shutil.rmtree(str(model_path.resolve()), ignore_errors=True)
+                shutil.rmtree(str(model_path.resolve()))
                 model_path = model_dir_path / MODEL_PATHS.COMPILED_MODEL.value
                 compiled_model_path = mlmodel.get_compiled_model_path()
                 shutil.move(
@@ -396,7 +396,7 @@ def preprocess_model(
                 for key, value in model_debug_info.debugSymbolToHandles.items()
             }
 
-        shutil.rmtree(str(dir_path.resolve()), ignore_errors=True)
+        shutil.rmtree(str(dir_path.resolve()))
         return PreprocessResult(
             processed_bytes=processed_bytes,
             debug_handle_map=debug_handle_map,
 
@@ -34,7 +34,8 @@ def test_decompose_cosine_similarity_tosa_BI(module):
         "executorch_exir_dialects_edge__ops_aten_mul_Tensor": 5,
         "executorch_exir_dialects_edge__ops_aten_sum_dim_IntList": 3,
         "executorch_exir_dialects_edge__ops_aten_pow_Tensor_Scalar": 2,
-        "executorch_exir_dialects_edge__ops_aten_full_like_default": 1,
+        # TODO(masnesral): uncomment after https://github.com/pytorch/pytorch/pull/144765
+        # "executorch_exir_dialects_edge__ops_aten_full_default": 1,
         "executorch_exir_dialects_edge__ops_aten_maximum_default": 2,
         "executorch_exir_dialects_edge__ops_aten_reciprocal_default": 1,
     }
 
@@ -4,18 +4,20 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-set(
-  _schema_files
-  bundled_program_schema.fbs
-  scalar_type.fbs
-)
+set(_schema_files bundled_program_schema.fbs scalar_type.fbs)
 
 set(_schema_outputs)
 foreach(schema_file ${_schema_files})
-  list(APPEND _bundled_program_schema__srcs "${CMAKE_CURRENT_SOURCE_DIR}/schema/${schema_file}")
+  list(APPEND _bundled_program_schema__srcs
+       "${CMAKE_CURRENT_SOURCE_DIR}/schema/${schema_file}"
+  )
 
   string(REGEX REPLACE "[.]fbs$" "_generated.h" generated "${schema_file}")
-  list(APPEND _schema_outputs "${DEVTOOLS_INCLUDE_DIR}/executorch/devtools/bundled_program/schema/${generated}")
+  list(
+    APPEND
+    _schema_outputs
+    "${DEVTOOLS_INCLUDE_DIR}/executorch/devtools/bundled_program/schema/${generated}"
+  )
 endforeach()
 
 file(MAKE_DIRECTORY ${DEVTOOLS_INCLUDE_DIR}/executorch/devtools/bundled_program)
@@ -32,20 +34,13 @@ add_custom_command(
 )
 
 add_library(
-  bundled_program
-  ${_schema_outputs}
-  ${CMAKE_CURRENT_SOURCE_DIR}/bundled_program.cpp
-)
-target_link_libraries(
-  bundled_program
-  PUBLIC
-    executorch
+  bundled_program ${_schema_outputs}
+                  ${CMAKE_CURRENT_SOURCE_DIR}/bundled_program.cpp
 )
+target_link_libraries(bundled_program PUBLIC executorch)
 target_include_directories(
-  bundled_program
-  PUBLIC
-    ${DEVTOOLS_INCLUDE_DIR}
-    ${PROJECT_SOURCE_DIR}/third-party/flatbuffers/include
+  bundled_program PUBLIC ${DEVTOOLS_INCLUDE_DIR}
+                         ${PROJECT_SOURCE_DIR}/third-party/flatbuffers/include
 )
 
 install(
 
@@ -362,7 +362,7 @@
 				03D151D62E0E9E43007A38BE /* ExecuTorchLLM */,
 			);
 			name = apple;
-			path = ../../../../../../../../executorch/extension/llm/apple;
+			path = ../../../../../../extension/llm/apple;
 			sourceTree = "<group>";
 		};
 		F292B0842D88B0D200BE6839 /* tokenizers */ = {
 
@@ -138,14 +138,16 @@ def update(
 
 
 class StaticAttentionMask:
-    def __init__(self, input_len, cache_len, style, mask_val=float("-inf")):
+    def __init__(
+        self, input_len, cache_len, style, mask_val=float("-inf"), dtype=torch.float32
+    ):
         self.input_len = input_len
         self.cache_len = cache_len
         assert style in ("shift_pointer", "smart_mask")
         self.style = style
         self.mask_val = mask_val
         self.unmasked_len = 0
-        self.tensor = torch.zeros(1, input_len, input_len + cache_len)
+        self.tensor = torch.zeros(1, input_len, input_len + cache_len, dtype=dtype)
         self.reset()
 
     def reset(self):
@@ -200,44 +202,45 @@ def __init__(
         config: ModelArgs,
         input_len: int,
         cache_len: int,
+        dtype=torch.float32,
         style: str = "shift_pointer",
         mask_val: float = float("-inf"),
     ):
         self.mask = StaticAttentionMask(
-            input_len, cache_len, style=style, mask_val=mask_val
+            input_len, cache_len, style=style, mask_val=mask_val, dtype=dtype
         )
 
         rope = Rope(config)
         freqs = rope.get_freqs(None, config.max_seq_len)
-        self.freqs_cos = freqs[0]
-        self.freqs_sin = freqs[1]
+        self.freqs_cos = freqs[0].to(dtype)
+        self.freqs_sin = freqs[1].to(dtype)
 
         split_mha = config.attention_type in ("static", "static_shas")
         if split_mha:
             self.k_caches = {
                 StaticKVCache.calculate_cache_key(layer_id, head_id): torch.zeros(
-                    1, cache_len, config.head_dim
+                    1, cache_len, config.head_dim, dtype=dtype
                 )
                 for layer_id in range(config.n_layers)
                 for head_id in range(config.n_kv_heads)
             }
             self.v_caches = {
                 StaticKVCache.calculate_cache_key(layer_id, head_id): torch.zeros(
-                    1, cache_len, config.head_dim
+                    1, cache_len, config.head_dim, dtype=dtype
                 )
                 for layer_id in range(config.n_layers)
                 for head_id in range(config.n_kv_heads)
             }
         else:
             self.k_caches = {
                 StaticKVCache.calculate_cache_key(layer_id, 0): torch.zeros(
-                    1, config.n_kv_heads, cache_len, config.head_dim
+                    1, config.n_kv_heads, cache_len, config.head_dim, dtype=dtype
                 )
                 for layer_id in range(config.n_layers)
             }
             self.v_caches = {
                 StaticKVCache.calculate_cache_key(layer_id, 0): torch.zeros(
-                    1, config.n_kv_heads, cache_len, config.head_dim
+                    1, config.n_kv_heads, cache_len, config.head_dim, dtype=dtype
                 )
                 for layer_id in range(config.n_layers)
             }