pytorch
diff --git a/‎CMakeLists.txt‎
Lines changed: 5 additions & 4 deletions b/‎CMakeLists.txt‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎backends/apple/coreml/TARGETS‎
Lines changed: 8 additions & 10 deletions b/‎backends/apple/coreml/TARGETS‎
Lines changed: 8 additions & 10 deletions
diff --git a/‎backends/arm/tosa_mapping.py‎
Lines changed: 4 additions & 1 deletion b/‎backends/arm/tosa_mapping.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎backends/qualcomm/aot/python/targets.bzl‎
Lines changed: 3 additions & 5 deletions b/‎backends/qualcomm/aot/python/targets.bzl‎
Lines changed: 3 additions & 5 deletions
diff --git a/‎docs/source/using-executorch-building-from-source.md‎
Lines changed: 8 additions & 0 deletions b/‎docs/source/using-executorch-building-from-source.md‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎examples/models/checkpoint.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/models/checkpoint.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/models/llama/runner/generation.py‎
Lines changed: 8 additions & 0 deletions b/‎examples/models/llama/runner/generation.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎exir/_serialize/_named_data_store.py‎
Lines changed: 27 additions & 0 deletions b/‎exir/_serialize/_named_data_store.py‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎exir/_serialize/test/test_named_data_store.py‎
Lines changed: 59 additions & 0 deletions b/‎exir/_serialize/test/test_named_data_store.py‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎exir/backend/backend_api.py‎
Lines changed: 4 additions & 3 deletions b/‎exir/backend/backend_api.py‎
Lines changed: 4 additions & 3 deletions
@@ -248,14 +248,15 @@ cmake_dependent_option(
   "NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF
 )
 
-if(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR)
+if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
   set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON)
+  set(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON)
+  set(EXECUTORCH_BUILD_EXTENSION_MODULE ON)
+  set(EXECUTORCH_BUILD_EXTENSION_TENSOR ON)
 endif()
 
-if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
-  set(EXECUTORCH_BUILD_EXTENSION_TENSOR ON)
+if(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR)
   set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON)
-  set(EXECUTORCH_BUILD_EXTENSION_MODULE ON)
 endif()
 
 if(EXECUTORCH_BUILD_EXTENSION_MODULE)
 
@@ -14,10 +14,10 @@ runtime.python_library(
         "@EXECUTORCH_CLIENTS",
     ],
     deps = [
+        "fbsource//third-party/pypi/coremltools:coremltools",
         ":executorchcoreml",
         "//executorch/exir/backend:backend_details",
         "//executorch/exir/backend:compile_spec_schema",
-        "fbsource//third-party/pypi/coremltools:coremltools",
     ],
 )
 
@@ -30,13 +30,13 @@ runtime.python_library(
         "@EXECUTORCH_CLIENTS",
     ],
     deps = [
+        "fbsource//third-party/pypi/coremltools:coremltools",
         ":backend",
         "//caffe2:torch",
         "//executorch/exir:lib",
         "//executorch/exir/backend:compile_spec_schema",
         "//executorch/exir/backend:partitioner",
         "//executorch/exir/backend:utils",
-        "fbsource//third-party/pypi/coremltools:coremltools",
     ],
 )
 
@@ -64,25 +64,23 @@ runtime.cxx_python_extension(
     headers = glob([
         "runtime/inmemoryfs/**/*.hpp",
     ]),
+    base_module = "",
+    compiler_flags = [
+        "-std=c++17",
+    ],
     preprocessor_flags = [
         "-Iexecutorch/backends/apple/coreml/runtime/util",
     ],
     types = [
         "executorchcoreml.pyi",
     ],
-    compiler_flags = [
-        "-std=c++17",
-    ],
-    base_module = "",
     visibility = [
         "//executorch/examples/apple/coreml/...",
         "@EXECUTORCH_CLIENTS",
     ],
-    external_deps = [
-        "pybind11",
-    ],
     deps = [
         "fbsource//third-party/nlohmann-json:nlohmann-json",
+        "fbsource//third-party/pybind11:pybind11",
     ],
 )
 
@@ -92,10 +90,10 @@ runtime.python_test(
         "test/*.py",
     ]),
     deps = [
+        "fbsource//third-party/pypi/pytest:pytest",
         ":partitioner",
         ":quantizer",
         "//caffe2:torch",
         "//pytorch/vision:torchvision",
-        "fbsource//third-party/pypi/pytest:pytest",
     ],
 )
@@ -107,7 +107,10 @@ def __init__(self, argument: Any) -> None:
         if isinstance(argument, (int, float)):
             self.__process_number(argument)
             return
+        if isinstance(argument, torch.dtype):
+            # Dtype is parsed from fake tensor
+            return
 
-        RuntimeError(
+        raise RuntimeError(
             f"Unhandled node input argument: {argument}, of type {type(argument)}"
         )
@@ -33,10 +33,10 @@ def define_common_targets():
             "//executorch/backends/qualcomm:schema",
             "//executorch/backends/qualcomm/aot/ir:qcir_utils",
             "//executorch/backends/qualcomm/runtime:runtime",
+            "fbsource//third-party/pybind11:pybind11",
             "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_verision()),
         ],
         external_deps = [
-            "pybind11",
             "libtorch_python",
         ],
         use_static_deps = True,
@@ -66,10 +66,10 @@ def define_common_targets():
             "//executorch/backends/qualcomm:schema",
             "//executorch/backends/qualcomm/aot/ir:qcir_utils",
             "//executorch/backends/qualcomm/runtime:runtime",
+            "fbsource//third-party/pybind11:pybind11",
             "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_verision()),
         ],
         external_deps = [
-            "pybind11",
             "libtorch_python",
         ],
         use_static_deps = True,
@@ -93,9 +93,7 @@ def define_common_targets():
             "//executorch/backends/qualcomm:schema",
             "//executorch/backends/qualcomm/aot/ir:qcir_utils",
             "//executorch/backends/qualcomm/runtime:runtime",
+            "fbsource//third-party/pybind11:pybind11",
             "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_verision()),
         ],
-        external_deps = [
-            "pybind11",
-        ],
     )
@@ -80,6 +80,14 @@ portability details.
    ./install_executorch.sh --pybind off
    ```
 
+   For development, install the package in `--editable` mode, which allows to modify Python source code and see changes reflected immediately.
+   ```
+   ./install_executorch.sh --editable [--pybind xnnpack]
+   
+   # Or you can directly do the following if dependencies are already installed.
+   pip install -e .
+   ```
+
 > **_NOTE:_**  Cleaning the build system
 >
 > When fetching a new version of the upstream repo (via `git fetch` or `git
 
@@ -64,7 +64,7 @@ def get_checkpoint_dtype(checkpoint: Dict[str, Any]) -> Optional[str]:
         mismatched_dtypes = [
             (key, value.dtype)
             for key, value in checkpoint.items()
-            if value.dtype != dtype
+            if hasattr(value, "dtype") and value.dtype != dtype
         ]
         if len(mismatched_dtypes) > 0:
             print(
 
@@ -4,6 +4,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import time
 from abc import ABC, abstractmethod
 from typing import List, Optional
 
@@ -97,6 +98,7 @@ def generate(  # noqa: C901
         pos_base: int = 0,
     ) -> List[int]:
         # Prefill
+        prefill_start = time.time()
         logits = self.forward(
             tokens=torch.tensor([prompt_tokens], dtype=torch.long, device=self.device),
             input_pos=(
@@ -105,11 +107,13 @@ def generate(  # noqa: C901
                 else None
             ),
         )
+        prefill_time = time.time() - prefill_start
 
         current_token = next_token(logits, temperature, top_p)
         print(f"{self.tokenizer.decode_token(current_token)}", end="", flush=True)
         tokens = prompt_tokens + [current_token]
 
+        generate_start = time.time()
         while len(tokens) < max_seq_len:
             if self.use_kv_cache:
                 logits = self.forward(
@@ -140,6 +144,10 @@ def generate(  # noqa: C901
             print(f"{self.tokenizer.decode_token(current_token)}", end="", flush=True)
         print("\n")
 
+        generate_time = time.time() - generate_start
+        print(f"Prefill time: {prefill_time}")
+        print(f"Generation tok/s: {len(tokens) / generate_time}")
+
         return tokens if echo else tokens[len(prompt_tokens) :]
 
     def text_completion(
 
@@ -181,3 +181,30 @@ def get_named_data_store_output(self) -> NamedDataStoreOutput:
         # Clean up empty maps inside self.external_data
         self.external_data = {k: v for k, v in self.external_data.items() if len(v) > 0}
         return NamedDataStoreOutput(self.buffers, self.pte_data, self.external_data)
+
+    def merge_named_data_store(self, other: NamedDataStoreOutput) -> None:
+        """
+        Merge another NamedDataStore into this one.
+        Args:
+            other (NamedDataStore): the other NamedDataStore to merge.
+        Raises:
+            ValueError: when the key exists in both stores, and corresponding
+                data is different between them.
+        """
+        # Merge the pte_data.
+        for key, buffer_idx in other.pte_data.items():
+            self.add_named_data(
+                key,
+                other.buffers[buffer_idx].buffer,
+                other.buffers[buffer_idx].alignment,
+            )
+
+        # Merge the external_data.
+        for filename, key_to_buffer_idx in other.external_data.items():
+            for key, buffer_idx in key_to_buffer_idx.items():
+                self.add_named_data(
+                    key,
+                    other.buffers[buffer_idx].buffer,
+                    other.buffers[buffer_idx].alignment,
+                    external_tag=filename,
+                )
@@ -83,3 +83,62 @@ def test_add_duplicate_key_fail(self) -> None:
         self.assertEqual(len(output.pte_data), 1)
         self.assertEqual(output.pte_data["key"], 0)
         self.assertEqual(len(output.external_data), 0)
+
+    def test_merge(self) -> None:
+        store1 = NamedDataStore()
+        store1.add_named_data("key1", b"data1", None, None)
+        store1.add_named_data("key2", b"data2", 16, "file1")
+
+        # Check items in the store1.
+        output = store1.get_named_data_store_output()
+        self.assertEqual(len(output.buffers), 2)
+        self.assertEqual(len(output.pte_data), 1)
+        self.assertEqual(len(output.external_data), 1)
+        self.assertEqual(len(output.external_data["file1"]), 1)
+
+        store2 = NamedDataStore()
+        store2.add_named_data("key1", b"data1", None, None)
+        store2.add_named_data("key3", b"data3", None, None)
+        store2.add_named_data("key4", b"data4", 16, "file1")
+        store2.add_named_data("key5", b"data5", 16, "file2")
+
+        # Check items in store2.
+        output2 = store2.get_named_data_store_output()
+        self.assertEqual(len(output2.buffers), 4)
+        self.assertEqual(len(output2.pte_data), 2)
+        self.assertEqual(len(output2.external_data), 2)
+        self.assertEqual(len(output2.external_data["file1"]), 1)
+        self.assertEqual(len(output2.external_data["file2"]), 1)
+
+        # Merge store2 into store1.
+        store1.merge_named_data_store(output2)
+
+        # Check items in store2 are merged into store1.
+        output = store1.get_named_data_store_output()
+        # key1, data1 exist in both store1 and store2, so we only have one copy of it.
+        self.assertEqual(len(output.buffers), 5)
+        self.assertEqual(len(output.pte_data), 2)
+        self.assertEqual(len(output.external_data), 2)
+        self.assertEqual(len(output.external_data["file1"]), 2)
+        self.assertEqual(len(output.external_data["file2"]), 1)
+
+    def test_merge_duplicate_error(self) -> None:
+        store1 = NamedDataStore()
+        store1.add_named_data("key1", b"data1", None, None)
+
+        # Check items in the store1.
+        output = store1.get_named_data_store_output()
+        self.assertEqual(len(output.buffers), 1)
+        self.assertEqual(len(output.pte_data), 1)
+
+        store2 = NamedDataStore()
+        store2.add_named_data("key1", b"data2", None, None)
+
+        # Check items in store2.
+        output2 = store2.get_named_data_store_output()
+        self.assertEqual(len(output2.buffers), 1)
+        self.assertEqual(len(output2.pte_data), 1)
+
+        # Merge store2 into store1 raises error as key1 is already in store1
+        # with different data.
+        self.assertRaises(ValueError, store1.merge_named_data_store, output2)
@@ -1,5 +1,6 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
+# Copyright 2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -56,9 +57,9 @@ def to_backend(
      ) -> LoweredBackendModule:
 
      def to_backend(
-         graph_module: torch.fx.GraphModule,
-         partitioner: Type[TPartitioner],
-     ) -> torch.fx.GraphModule
+         edge_program: ExportedProgram,
+         partitioner: Partitioner,
+     ) -> ExportedProgram:
     """
     pass
Original file line number	Diff line number	Diff line change
`@@ -64,7 +64,7 @@ def get_checkpoint_dtype(checkpoint: Dict[str, Any]) -> Optional[str]:`
`64`	`64`	`mismatched_dtypes = [`
`65`	`65`	`(key, value.dtype)`
`66`	`66`	`for key, value in checkpoint.items()`
`67`		`- if value.dtype != dtype`
	`67`	`+ if hasattr(value, "dtype") and value.dtype != dtype`
`68`	`68`	`]`
`69`	`69`	`if len(mismatched_dtypes) > 0:`
`70`	`70`	`print(`