pytorch
diff --git a/‎.ci/scripts/build_llama_android.sh‎
Lines changed: 2 additions & 2 deletions b/‎.ci/scripts/build_llama_android.sh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.ci/scripts/test_llama.sh‎
Lines changed: 4 additions & 4 deletions b/‎.ci/scripts/test_llama.sh‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎.ci/scripts/test_model.sh‎
Lines changed: 3 additions & 3 deletions b/‎.ci/scripts/test_model.sh‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/android-perf.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/android-perf.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/apple-perf.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/apple-perf.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 9 additions & 3 deletions b/‎.github/workflows/pull.yml‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 6 additions & 4 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎README.md‎
Lines changed: 2 additions & 2 deletions b/‎README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/apple/coreml/runtime/inmemoryfs/inmemory_filesystem.cpp‎
Lines changed: 6 additions & 6 deletions b/‎backends/apple/coreml/runtime/inmemoryfs/inmemory_filesystem.cpp‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎backends/qualcomm/_passes/annotate_quant_attrs.py‎
Lines changed: 24 additions & 4 deletions b/‎backends/qualcomm/_passes/annotate_quant_attrs.py‎
Lines changed: 24 additions & 4 deletions
@@ -48,9 +48,9 @@ build_llama_runner() {
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
     -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-    -Bcmake-android-out/examples/models/llama2 examples/models/llama2
+    -Bcmake-android-out/examples/models/llama examples/models/llama
 
-    cmake --build cmake-android-out/examples/models/llama2 -j4 --config Release
+    cmake --build cmake-android-out/examples/models/llama -j4 --config Release
 }
 install_flatc_from_source
 install_executorch_and_backend_lib
 
@@ -125,7 +125,7 @@ cmake_install_executorch_libraries() {
 
 cmake_build_llama_runner() {
     echo "Building llama runner"
-    dir="examples/models/llama2"
+    dir="examples/models/llama"
     retry cmake \
         -DCMAKE_INSTALL_PREFIX=cmake-out \
         -DCMAKE_BUILD_TYPE=Debug \
@@ -206,7 +206,7 @@ if [[ "${QNN}" == "ON" ]]; then
   EXPORT_ARGS="${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape"
 fi
 # Add dynamically linked library location
-$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama ${EXPORT_ARGS}
+$PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}
 
 # Create tokenizer.bin.
 echo "Creating tokenizer.bin"
@@ -219,15 +219,15 @@ echo "Running ${EXPORTED_MODEL_NAME} in portable mode"
 if [[ "${BUILD_TOOL}" == "buck2" ]]; then
   # Run model.
   # shellcheck source=/dev/null
-  $BUCK run examples/models/llama2:main -- ${RUNTIME_ARGS} > result.txt
+  $BUCK run examples/models/llama:main -- ${RUNTIME_ARGS} > result.txt
 elif [[ "${BUILD_TOOL}" == "cmake" ]]; then
   cmake_install_executorch_libraries
   cmake_build_llama_runner
   # Run llama runner
   NOW=$(date +"%H:%M:%S")
   echo "Starting to run llama runner at ${NOW}"
   # shellcheck source=/dev/null
-  cmake-out/examples/models/llama2/llama_main ${RUNTIME_ARGS} > result.txt
+  cmake-out/examples/models/llama/llama_main ${RUNTIME_ARGS} > result.txt
   NOW=$(date +"%H:%M:%S")
   echo "Finished at ${NOW}"
 else
 
@@ -75,9 +75,9 @@ run_portable_executor_runner() {
 test_model() {
   if [[ "${MODEL_NAME}" == "llama2" ]]; then
     # Install requirements for export_llama
-    bash examples/models/llama2/install_requirements.sh
-    # Test export_llama script: python3 -m examples.models.llama2.export_llama
-    "${PYTHON_EXECUTABLE}" -m examples.models.llama2.export_llama -c examples/models/llama2/params/demo_rand_params.pth -p examples/models/llama2/params/demo_config.json
+    bash examples/models/llama/install_requirements.sh
+    # Test export_llama script: python3 -m examples.models.llama.export_llama
+    "${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama -c examples/models/llama/params/demo_rand_params.pth -p examples/models/llama/params/demo_config.json
     run_portable_executor_runner
     rm "./${MODEL_NAME}.pte"
   fi
 
@@ -160,7 +160,7 @@ jobs:
 
         if [[ ${{ matrix.model }} =~ ^stories* ]]; then
             # Install requirements for export_llama
-            PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
+            PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
             # Test llama2
             if [[ ${{ matrix.delegate }} == "xnnpack" ]]; then
                 DELEGATE_CONFIG="xnnpack+custom+qe"
 
@@ -162,7 +162,7 @@ jobs:
         if [[ ${{ matrix.model }} =~ ^stories* ]]; then
           # Install requirements for export_llama
           PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
-            bash examples/models/llama2/install_requirements.sh
+            bash examples/models/llama/install_requirements.sh
 
           # Test llama2
           if [[ ${{ matrix.delegate }} == "xnnpack" ]]; then
 
@@ -98,6 +98,12 @@ jobs:
           - dtype: bf16
             build-tool: buck2
             mode: portable
+          - dtype: bf16
+            build-tool: cmake
+            mode: custom
+          - dtype: bf16
+            build-tool: buck2
+            mode: custom
       fail-fast: false
     with:
       runner: linux.2xlarge
@@ -117,7 +123,7 @@ jobs:
         # Setup executorch
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2
         # Install requirements for export_llama
-        PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
+        PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
         # Test llama2
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
 
@@ -216,7 +222,7 @@ jobs:
         bash install_requirements.sh --pybind xnnpack
 
         # install Llava requirements
-        bash examples/models/llama2/install_requirements.sh
+        bash examples/models/llama/install_requirements.sh
         bash examples/models/llava/install_requirements.sh
 
         # run python unittest
@@ -411,7 +417,7 @@ jobs:
         # Setup executorch
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2
         # Install requirements for export_llama
-        PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
+        PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
         # Test llama2
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
 
 
@@ -227,6 +227,8 @@ jobs:
         include:
           - dtype: bf16
             mode: portable
+          - dtype: bf16
+            mode: custom
       fail-fast: false
     with:
       runner: macos-m1-stable
@@ -255,7 +257,7 @@ jobs:
         fi
 
         # Install requirements for export_llama
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama2/install_requirements.sh
+        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh
         # Test llama2
         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh stories110M cmake "${DTYPE}" "${MODE}"
 
@@ -279,7 +281,7 @@ jobs:
   #       GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
 
   #       # install Llava requirements
-  #       ${CONDA_RUN} bash examples/models/llama2/install_requirements.sh
+  #       ${CONDA_RUN} bash examples/models/llama/install_requirements.sh
   #       ${CONDA_RUN} bash examples/models/llava/install_requirements.sh
 
   #       # run python unittest
@@ -385,7 +387,7 @@ jobs:
         cmake --build cmake-out -j9 --target install --config Release
 
         echo "Build llama runner"
-        dir="examples/models/llama2"
+        dir="examples/models/llama"
         cmake \
             -DCMAKE_INSTALL_PREFIX=cmake-out \
             -DCMAKE_BUILD_TYPE=Release \
@@ -437,5 +439,5 @@ jobs:
 
         python -m extension.export_util.export_hf_model -hfm=${{ matrix.hf_model_repo }} -o ${ET_MODEL_NAME}
 
-        cmake-out/examples/models/llama2/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
+        cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
         echo "::endgroup::"
@@ -22,10 +22,10 @@ please visit our documentation website [for the latest release](https://pytorch.
 
 Check out the [Getting Started](https://pytorch.org/executorch/stable/getting-started-setup.html#quick-setup-colab-jupyter-notebook-prototype) page for a quick spin.
 
-Check out the examples of [Llama](./examples/models/llama2/README.md), [Llava](./examples/models/llava/README.md) and [other models](./examples/README.md) running on edge devices using ExecuTorch.
+Check out the examples of [Llama](./examples/models/llama/README.md), [Llava](./examples/models/llava/README.md) and [other models](./examples/README.md) running on edge devices using ExecuTorch.
 
 
-**[UPDATE - 09/25]** We have added support for running [Llama 3.2 1B/3B](./examples/models/llama2/README.md) models via ExecuTorch.
+**[UPDATE - 09/25]** We have added support for running [Llama 3.2 1B/3B](./examples/models/llama/README.md) models via ExecuTorch.
 
 ## Feedback
 
 
@@ -253,11 +253,11 @@ bool write_directory_node(InMemoryDirectoryNode* node,
         return false;
     }
 
-    for (const auto& [_, node]: node->get_items()) {
-        if (node.get()->isDirectory() && !recursive) {
+    for (const auto& [_, node_2]: node->get_items()) {
+        if (node_2.get()->isDirectory() && !recursive) {
             continue;
         }
-        if (!write_node(node.get(), dir_path, recursive, error)) {
+        if (!write_node(node_2.get(), dir_path, recursive, error)) {
             return false;
         }
     }
@@ -383,9 +383,9 @@ FlattenedInMemoryNode::unflatten(const std::vector<FlattenedInMemoryNode>& flatt
             case InMemoryFileSystem::InMemoryNode::Kind::Directory: {
                 std::unordered_map<std::string, std::unique_ptr<InMemoryFileSystem::InMemoryNode>> items;
                 items.reserve(flattened_node_metadata.child_name_to_indices_map.size());
-                for (const auto& [name, index]: flattened_node_metadata.child_name_to_indices_map) {
-                    auto moveIt = std::make_move_iterator(nodes.begin() + index);
-                    items[name] = *moveIt;
+                for (const auto& [name_2, index_2]: flattened_node_metadata.child_name_to_indices_map) {
+                    auto moveIt = std::make_move_iterator(nodes.begin() + index_2);
+                    items[name_2] = *moveIt;
                 }
                 auto directory_node =
                     std::make_unique<InMemoryDirectoryNode>(std::move(name), std::move(attributes), std::move(items));
 
@@ -27,9 +27,12 @@ class AnnotateQuantAttrs(ExportPass):
     generated after quatization process.
     """
 
-    def __init__(self, edge_program: torch.export.ExportedProgram):
+    def __init__(
+        self, edge_program: torch.export.ExportedProgram, skip_advanced_requat: bool
+    ):
         super(AnnotateQuantAttrs, self).__init__()
         self.edge_program = edge_program
+        self.skip_advanced_requant = skip_advanced_requat
 
     def _annotate_source_nodes(
         self, quant_node: torch.fx.Node, quant_attrs: Dict[str, Any]
@@ -68,9 +71,26 @@ def _annotate_requant(self, n):
 
             # TODO: Store multiple pairs of requantize attributes when we have an op builder
             # that has multiple outputs that requires quant attributes.
-            if q_attrs["dtype"] != dq_attrs["dtype"]:
-                dq_attrs[QCOM_ENCODING] = q_attrs[QCOM_ENCODING]
-                n.args[0].meta[QCOM_REQUANTIZE] = dq_attrs
+            if self.skip_advanced_requant:
+                if q_attrs["dtype"] != dq_attrs["dtype"]:
+                    dq_attrs[QCOM_ENCODING] = q_attrs[QCOM_ENCODING]
+                    n.args[0].meta[QCOM_REQUANTIZE] = dq_attrs
+            else:
+                # When dtype is the same but other specs such as scale and offset are different,
+                # insert requant to improve accuracy.
+                # Users can turn this feature off if any inference speed drop is observed.
+                if any(
+                    q_attrs[attr] != dq_attrs[attr]
+                    for attr in [
+                        "scale",
+                        "zero_point",
+                        "quant_min",
+                        "quant_max",
+                        "dtype",
+                    ]
+                ):
+                    dq_attrs[QCOM_ENCODING] = q_attrs[QCOM_ENCODING]
+                    n.args[0].meta[QCOM_REQUANTIZE] = dq_attrs
 
     # Dequant all the fold_quant parameters back to fp32.
     # If an operation is not supported by QNN and got fallback, it will expect a fp32 param.
Original file line number	Diff line number	Diff line change
`@@ -253,11 +253,11 @@ bool write_directory_node(InMemoryDirectoryNode* node,`
`253`	`253`	`return false;`
`254`	`254`	`}`
`255`	`255`
`256`		`- for (const auto& [_, node]: node->get_items()) {`
`257`		`- if (node.get()->isDirectory() && !recursive) {`
	`256`	`+ for (const auto& [_, node_2]: node->get_items()) {`
	`257`	`+ if (node_2.get()->isDirectory() && !recursive) {`
`258`	`258`	`continue;`
`259`	`259`	`}`
`260`		`- if (!write_node(node.get(), dir_path, recursive, error)) {`
	`260`	`+ if (!write_node(node_2.get(), dir_path, recursive, error)) {`
`261`	`261`	`return false;`
`262`	`262`	`}`
`263`	`263`	`}`
`@@ -383,9 +383,9 @@ FlattenedInMemoryNode::unflatten(const std::vector<FlattenedInMemoryNode>& flatt`
`383`	`383`	`case InMemoryFileSystem::InMemoryNode::Kind::Directory: {`
`384`	`384`	`std::unordered_map<std::string, std::unique_ptr<InMemoryFileSystem::InMemoryNode>> items;`
`385`	`385`	`items.reserve(flattened_node_metadata.child_name_to_indices_map.size());`
`386`		`- for (const auto& [name, index]: flattened_node_metadata.child_name_to_indices_map) {`
`387`		`- auto moveIt = std::make_move_iterator(nodes.begin() + index);`
`388`		`- items[name] = *moveIt;`
	`386`	`+ for (const auto& [name_2, index_2]: flattened_node_metadata.child_name_to_indices_map) {`
	`387`	`+ auto moveIt = std::make_move_iterator(nodes.begin() + index_2);`
	`388`	`+ items[name_2] = *moveIt;`
`389`	`389`	`}`
`390`	`390`	`auto directory_node =`
`391`	`391`	`std::make_unique<InMemoryDirectoryNode>(std::move(name), std::move(attributes), std::move(items));`