Update on "[0.6 documentation] Fix Page Developer Tools: Bundled Program"

zhenyanzhang · zhenyanzhang · commit e4dd23a816fa · 2025-04-15T22:26:19.000-07:00
#10193 - Update the hyperlink of “Generate Sample ExecuTorch program”, as this part has been heavily revamped in v0.6: - Update to https://pytorch.org/executorch/0.6/getting-started.html#exporting - Fixed the emit stage sample code: - Use `register_buffer` as warning suggests. - Update `to_edge` to be `to_edge_transform_and_lower`, as updated in https://pytorch.org/executorch/0.6/getting-started.html. - Remove function blocks for `get_program_data`, `load_bundled_input` and `load_bundled_output`, as there are no other similar usage in the doc. - Changed to links of real-world examples. - Remove hard-coded cpp code examples. - For APIs, provide links to real-world examples. - For runtime example, change to a ready-to-use snippet for people to quickly try out. Differential Revision: [D73027728](https://our.internmc.facebook.com/intern/diff/D73027728/) [ghstack-poisoned]
diff --git a/.ci/scripts/gather_benchmark_configs.py b/.ci/scripts/gather_benchmark_configs.py
@@ -23,6 +23,7 @@
     "samsung_galaxy_s22": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa",
     "samsung_galaxy_s24": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98f8788c-2e25-4a3c-8bb2-0d1e8897c0db",
     "google_pixel_8_pro": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/d65096ab-900b-4521-be8b-a3619b69236a",
+    "google_pixel_3_private_rooted": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98d23ca8-ea9e-4fb7-b725-d402017b198d",
 }
 
 # Predefined benchmark configurations
diff --git a/.github/workflows/android-perf-private-device-experiment.yml b/.github/workflows/android-perf-private-device-experiment.yml
@@ -0,0 +1,62 @@
+name: android-perf (private devices)
+
+on:
+  schedule:
+    - cron: 0 0,4,8,12,16,20 * * *
+  pull_request:
+    paths:
+      - .github/workflows/android-perf-private-device-experiment.yml
+  push:
+    branches:
+      - main
+    paths:
+      - .github/workflows/android-perf-private-device-experiment.yml
+  # Note: GitHub has an upper limit of 10 inputs
+  workflow_dispatch:
+    inputs:
+      models:
+        description: Models to be benchmarked
+        required: false
+        type: string
+        default: mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8
+      devices:
+        description: Target devices to run benchmark
+        required: false
+        type: string
+        default: google_pixel_3_private_rooted
+      benchmark_configs:
+        description: The list of configs used the benchmark
+        required: false
+        type: string
+  workflow_call:
+    inputs:
+      models:
+        description: Models to be benchmarked
+        required: false
+        type: string
+        default: mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8
+      devices:
+        description: Target devices to run benchmark
+        required: false
+        type: string
+        default: google_pixel_3_private_rooted
+      benchmark_configs:
+        description: The list of configs used the benchmark
+        required: false
+        type: string
+
+concurrency:
+  group: android-perf-private-devices-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
+  cancel-in-progress: true
+
+jobs:
+  android:
+    uses: ./.github/workflows/android-perf.yml
+    secrets: inherit
+    permissions:
+      id-token: write
+      contents: read
+    with:
+      models: ${{ inputs.models }}
+      devices: google_pixel_3_private_rooted
+      benchmark_configs: ${{ inputs.benchmark_configs }}
diff --git a/.github/workflows/android-release-artifacts.yml b/.github/workflows/android-release-artifacts.yml
@@ -49,7 +49,8 @@ jobs:
       contents: read
     with:
       secrets-env: EXECUTORCH_MAVEN_SIGNING_KEYID EXECUTORCH_MAVEN_SIGNING_PASSWORD EXECUTORCH_MAVEN_CENTRAL_PASSWORD EXECUTORCH_MAVEN_CENTRAL_USERNAME EXECUTORCH_MAVEN_SIGNING_GPG_KEY_CONTENTS
-      runner: linux.2xlarge
+      # As this job has access to Maven credential, run this on a fresh ephemeral runner
+      runner: ephemeral.linux.2xlarge
       docker-image: executorch-ubuntu-22.04-clang12-android
       submodules: 'recursive'
       ref: ${{ github.sha }}
diff --git a/backends/arm/operator_support/tosa_supported_operators.py b/backends/arm/operator_support/tosa_supported_operators.py
@@ -229,6 +229,7 @@ def is_node_supported(
             exir_ops.edge.aten.__lshift__.Scalar,
             torch.ops.aten.scalar_tensor.default,
             exir_ops.edge.aten.gelu.default,
+            exir_ops.edge.aten.alias_copy.default,
         ]
 
         return supported
diff --git a/backends/arm/operators/__init__.py b/backends/arm/operators/__init__.py
@@ -22,7 +22,6 @@
     op_erf,
     op_exp,
     op_ge,
-    op_get_item,
     op_gt,
     op_le,
     op_log,
@@ -51,5 +50,6 @@
     op_view,
     op_where,
     ops_binary,
+    ops_identity,
     ops_unary,
 )
diff --git a/backends/arm/operators/op_get_item.py b/backends/arm/operators/op_get_item.py
diff --git a/backends/arm/operators/ops_identity.py b/backends/arm/operators/ops_identity.py
@@ -0,0 +1,47 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+
+from typing import List
+
+import torch
+import torch.fx
+
+import tosa_tools.v0_80.serializer.tosa_serializer as ts
+
+from executorch.backends.arm.operators.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.arm.tosa_mapping import TosaArg
+
+
+def identity_operator_factory(identity_target: str):
+    """
+    Creates and registers NodeVisitors for operators that map directly
+    to a TOSA IDENTITY op.
+    """
+
+    class IdentityOperatorVisitor(NodeVisitor):
+        target = identity_target
+
+        def define_node(
+            self,
+            node: torch.fx.Node,
+            tosa_graph: ts.TosaSerializer,
+            inputs: List[TosaArg],
+            output: TosaArg,
+        ) -> None:
+            # Simply add an identityOp
+            tosa_graph.addOperator(
+                ts.TosaOp.Op().IDENTITY, [inputs[0].name], [output.name]
+            )
+
+    register_node_visitor(IdentityOperatorVisitor)
+
+
+identity_operator_factory("getitem")
+identity_operator_factory("aten.alias_copy.default")
diff --git a/backends/arm/quantizer/quantization_annotator.py b/backends/arm/quantizer/quantization_annotator.py
@@ -244,6 +244,11 @@ def _match_pattern(
     operator.getitem,
 ]
 
+_one_to_one_shared_input_or_input_act_qspec = [
+    torch.ops.aten.adaptive_avg_pool2d.default,
+    torch.ops.aten.alias_copy.default,
+]
+
 
 def get_quant_properties(  # noqa: C901
     node: Node, gm: torch.fx.GraphModule, quantization_config
@@ -332,7 +337,7 @@ def any_or_hardtanh_min_zero(n: Node):
             _QuantProperty(2, shared_qspec),  # type: ignore[arg-type]
         ]
         quant_properties.quant_output = _QuantProperty(0, shared_qspec)  # type: ignore[arg-type]
-    elif node.target == torch.ops.aten.adaptive_avg_pool2d.default:
+    elif node.target in _one_to_one_shared_input_or_input_act_qspec:
         input_qspec = (
             SharedQuantizationSpec(node.args[0])  # type: ignore[arg-type]
             if arm_quantizer_utils.is_output_annotated(node.args[0])  # type: ignore
diff --git a/backends/arm/test/ops/test_alias_copy.py b/backends/arm/test/ops/test_alias_copy.py
@@ -0,0 +1,83 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Tuple
+
+import torch
+from executorch.backends.arm.test import common
+from executorch.backends.arm.test.tester.test_pipeline import (
+    EthosU55PipelineBI,
+    EthosU85PipelineBI,
+    TosaPipelineBI,
+    TosaPipelineMI,
+)
+
+input_t1 = Tuple[torch.Tensor]
+
+
+class AliasCopy(torch.nn.Module):
+    """
+    Tests proper handling of alias_copy when used directly.
+
+    alias_copy can also appear from PyTorch/ExecuTorch optimizations
+    such as `x.transpose(0, 0)`. This is optimized to an alias_copy but
+    not before dq/q operators are added.
+    """
+
+    aten_op = "torch.ops.aten.alias_copy.default"
+    exir_op = "executorch_exir_dialects_edge__ops_aten_alias_copy_default"
+
+    test_data: dict[input_t1] = {
+        "1d_ramp": (torch.arange(-16, 16, 0.2),),
+        "2d_ones": (torch.ones(5, 5),),
+        "3d_rand": (torch.rand(3, 5, 5),),
+        "4d_zeros": (torch.zeros(1, 10, 10, 10),),
+    }
+
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x: torch.Tensor):
+        return torch.alias_copy(x)
+
+
+@common.parametrize("test_data", AliasCopy.test_data)
+def test_alias_copy_tosa_MI(test_data: input_t1):
+    TosaPipelineMI[input_t1](
+        AliasCopy(),
+        test_data,
+        AliasCopy.aten_op,
+        AliasCopy.exir_op,
+    ).run()
+
+
+@common.parametrize("test_data", AliasCopy.test_data)
+def test_alias_copy_tosa_BI(test_data: input_t1):
+    TosaPipelineBI[input_t1](
+        AliasCopy(),
+        test_data,
+        AliasCopy.aten_op,
+        AliasCopy.exir_op,
+    ).run()
+
+
+@common.parametrize("test_data", AliasCopy.test_data)
+def test_alias_copy_u55_BI(test_data: input_t1):
+    EthosU55PipelineBI[input_t1](
+        AliasCopy(),
+        test_data,
+        AliasCopy.aten_op,
+        AliasCopy.exir_op,
+    ).run()
+
+
+@common.parametrize("test_data", AliasCopy.test_data)
+def test_alias_copy_u85_BI(test_data: input_t1):
+    EthosU85PipelineBI[input_t1](
+        AliasCopy(),
+        test_data,
+        AliasCopy.aten_op,
+        AliasCopy.exir_op,
+    ).run()
diff --git a/docs/source/bundled-io.md b/docs/source/bundled-io.md
@@ -217,7 +217,7 @@ cd executorch
    ./cmake-out/examples/devtools/example_runner --bundled_program_path {your-bpte-file} --output_verification
 ```
 
-It is expected to see no input from running the above mentioned snippet.
+It is expected to see no output from running the above mentioned snippet.
 
 For a detailed example of how the runner should be like, please refer to our [example runner](https://github.com/pytorch/executorch/blob/release/0.6/examples/devtools/example_runner/example_runner.cpp).
 
diff --git a/docs/source/llm/getting-started.md b/docs/source/llm/getting-started.md
@@ -159,7 +159,7 @@ example_inputs = (torch.randint(0, 100, (1, model.config.block_size), dtype=torc
 # long as they adhere to the rules specified in the dynamic shape configuration.
 # Here we set the range of 0th model input's 1st dimension as
 # [0, model.config.block_size].
-# See https://pytorch.org/executorch/main/concepts.html#dynamic-shapes
+# See https://pytorch.org/executorch/main/concepts#dynamic-shapes
 # for details about creating dynamic shapes.
 dynamic_shape = (
     {1: torch.export.Dim("token_dim", max=model.config.block_size)},
diff --git a/examples/demo-apps/apple_ios/LLaMA/README.md b/examples/demo-apps/apple_ios/LLaMA/README.md
@@ -56,7 +56,7 @@ Link your binary with the ExecuTorch runtime and any backends or kernels used by
 
 Note: To access logs, link against the Debug build of the ExecuTorch runtime, i.e., the executorch_debug framework. For optimal performance, always link against the Release version of the deliverables (those without the _debug suffix), which have all logging overhead removed.
 
-For more details integrating and Running ExecuTorch on Apple Platforms, checkout this [link](https://pytorch.org/executorch/main/using-executorch-ios.html).
+For more details integrating and Running ExecuTorch on Apple Platforms, checkout this [link](https://pytorch.org/executorch/main/using-executorch-ios).
 
 ### XCode
 * Open XCode and select "Open an existing project" to open `examples/demo-apps/apple_ios/LLama`.
diff --git a/examples/demo-apps/apple_ios/LLaMA/docs/delegates/xnnpack_README.md b/examples/demo-apps/apple_ios/LLaMA/docs/delegates/xnnpack_README.md
@@ -163,7 +163,7 @@ If you cannot add the package into your app target (it's greyed out), it might h
 
 
 
- More details on integrating and Running ExecuTorch on Apple Platforms, check out the detailed guide [here](https://pytorch.org/executorch/main/using-executorch-ios.html#local-build).
+ More details on integrating and Running ExecuTorch on Apple Platforms, check out the detailed guide [here](https://pytorch.org/executorch/main/using-executorch-ios#local-build).
 
 ### 3. Configure Build Schemes
 
@@ -175,7 +175,7 @@ Navigate to `Product --> Scheme --> Edit Scheme --> Info --> Build Configuration
 
 We recommend that you only use the Debug build scheme during development, where you might need to access additional logs. Debug build has logging overhead and will impact inferencing performance, while release build has compiler optimizations enabled and all logging overhead removed.
 
-For more details integrating and Running ExecuTorch on Apple Platforms or building the package locally, checkout this [link](https://pytorch.org/executorch/main/using-executorch-ios.html).
+For more details integrating and Running ExecuTorch on Apple Platforms or building the package locally, checkout this [link](https://pytorch.org/executorch/main/using-executorch-ios).
 
 ### 4. Build and Run the project
 
diff --git a/examples/llm_manual/README.md b/examples/llm_manual/README.md
@@ -1,3 +1,3 @@
 # LLM Manual
 
-This repository is a storage place for the files that [LLM Manual](https://pytorch.org/executorch/main/llm/getting-started.html) needs. Please refer to the documentation website for more information.
+This repository is a storage place for the files that [LLM Manual](https://pytorch.org/executorch/main/llm/getting-started) needs. Please refer to the documentation website for more information.
diff --git a/examples/models/llama/README.md b/examples/models/llama/README.md
@@ -379,10 +379,10 @@ adb shell "cd /data/local/tmp/llama && ./llama_main --model_path <model.pte> --t
 
 ### iOS
 
-Please refer to [this tutorial](https://pytorch.org/executorch/main/llm/llama-demo-ios.html) to for full instructions on building the iOS LLAMA Demo App. Rename `tokenizer.model` file to `tokenizer.bin` because the demo app looks for the tokenizer file with .bin extension.
+Please refer to [this tutorial](https://pytorch.org/executorch/main/llm/llama-demo-ios) to for full instructions on building the iOS LLAMA Demo App. Rename `tokenizer.model` file to `tokenizer.bin` because the demo app looks for the tokenizer file with .bin extension.
 
 ### Android
-Please refer to [this tutorial](https://pytorch.org/executorch/main/llm/llama-demo-android.html) to for full instructions on building the Android LLAMA Demo App.
+Please refer to [this tutorial](https://pytorch.org/executorch/main/llm/llama-demo-android) to for full instructions on building the Android LLAMA Demo App.
 
 ## Running with low-bit kernels
 
diff --git a/extension/benchmark/apple/Benchmark/README.md b/extension/benchmark/apple/Benchmark/README.md
@@ -33,7 +33,7 @@ This command performs a shallow clone to speed up the process.
 
 The Benchmark App is configured to use a Swift PM package that provides the prebuilt ExecuTorch frameworks.
 
-By default, the app relies on the package referencing locally built binaries. To ensure it functions correctly, you must first build the frameworks by following the [guide](https://pytorch.org/executorch/main/using-executorch-ios.html#building-from-source).
+By default, the app relies on the package referencing locally built binaries. To ensure it functions correctly, you must first build the frameworks by following the [guide](https://pytorch.org/executorch/main/using-executorch-ios#building-from-source).
 
 ## Adding Models and Resources
 
diff --git a/extension/data_loader/file_data_loader.cpp b/extension/data_loader/file_data_loader.cpp
@@ -86,6 +86,9 @@ Result<FileDataLoader> FileDataLoader::from(
       "Alignment %zu is not a power of 2",
       alignment);
 
+  ET_CHECK_OR_RETURN_ERROR(
+      file_name != nullptr, InvalidArgument, "File name cannot be empty.");
+
   // Use open() instead of fopen() to avoid the layer of buffering that
   // fopen() does. We will be reading large portions of the file in one shot,
   // so buffering does not help.
diff --git a/extension/data_loader/test/file_data_loader_test.cpp b/extension/data_loader/test/file_data_loader_test.cpp
@@ -154,6 +154,12 @@ TEST_P(FileDataLoaderTest, FromMissingFileFails) {
   EXPECT_NE(fdl.error(), Error::Ok);
 }
 
+TEST_P(FileDataLoaderTest, FromEmptyFilePathFails) {
+  // Nullptr should fail
+  Result<FileDataLoader> fdl = FileDataLoader::from(nullptr);
+  EXPECT_NE(fdl.error(), Error::Ok);
+}
+
 TEST_P(FileDataLoaderTest, BadAlignmentFails) {
   // Create a temp file; contents don't matter.
   uint8_t data[256] = {};
diff --git a/extension/llm/tokenizers b/extension/llm/tokenizers
@@ -1 +1 @@
-Subproject commit 71167bf9cf4bed861eb9547d1d77e993fd1004f1
+Subproject commit 295ee78e4b0d99d4527bbe81bc3156341366de11

Original file line number	Diff line number	Diff line change
`@@ -23,6 +23,7 @@`
`23`	`23`	`"samsung_galaxy_s22": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa",`
`24`	`24`	`"samsung_galaxy_s24": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98f8788c-2e25-4a3c-8bb2-0d1e8897c0db",`
`25`	`25`	`"google_pixel_8_pro": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/d65096ab-900b-4521-be8b-a3619b69236a",`
	`26`	`+ "google_pixel_3_private_rooted": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98d23ca8-ea9e-4fb7-b725-d402017b198d",`
`26`	`27`	`}`
`27`	`28`
`28`	`29`	`# Predefined benchmark configurations`
Original file line number	Diff line number	Diff line change
`@@ -229,6 +229,7 @@ def is_node_supported(`
`229`	`229`	`exir_ops.edge.aten.__lshift__.Scalar,`
`230`	`230`	`torch.ops.aten.scalar_tensor.default,`
`231`	`231`	`exir_ops.edge.aten.gelu.default,`
	`232`	`+ exir_ops.edge.aten.alias_copy.default,`
`232`	`233`	`]`
`233`	`234`
`234`	`235`	`return supported`