Merge branch 'main' into gh/SS-JIA/161/orig

SS-JIA · web-flow · commit ccda565e1cb5 · 2025-01-02T20:06:40.000-05:00
diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml
@@ -98,6 +98,7 @@ jobs:
       - uses: actions/checkout@v3
 
       - name: Prepare the spec
+        id: prepare
         shell: bash
         env:
           BENCHMARK_CONFIG: ${{ toJSON(matrix) }}
@@ -111,7 +112,7 @@ jobs:
           # so let's just sed it
           sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' android-llm-device-farm-test-spec.yml.j2
 
-          BENCHMARK_CONFIG_ID="${{ matrix.model }}_${{ matrix.config }}"
+          BENCHMARK_CONFIG_ID=$(echo "${{ matrix.model }}_${{ matrix.config }}" | sed -e 's/[^A-Za-z0-9._-]/_/g')
           # The config for this benchmark runs, we save it in the test spec so that it can be fetched
           # later by the upload script
           sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' android-llm-device-farm-test-spec.yml.j2
@@ -122,6 +123,7 @@ jobs:
 
           # Save the benchmark configs so that we can use it later in the dashboard
           echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json"
+          echo "benchmark-config-id=${BENCHMARK_CONFIG_ID}" >> $GITHUB_OUTPUT
 
       - name: Upload the spec
         uses: seemethere/upload-artifact-s3@v5
@@ -141,7 +143,7 @@ jobs:
             ${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/
           retention-days: 1
           if-no-files-found: error
-          path: extension/benchmark/android/benchmark/${{ matrix.model }}_${{ matrix.config }}.json
+          path: extension/benchmark/android/benchmark/${{ steps.prepare.outputs.benchmark-config-id }}.json
 
   export-models:
     name: export-models
diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml
@@ -100,6 +100,7 @@ jobs:
       - uses: actions/checkout@v3
 
       - name: Prepare the spec
+        id: prepare
         shell: bash
         env:
           BENCHMARK_CONFIG: ${{ toJSON(matrix) }}
@@ -113,7 +114,7 @@ jobs:
           # so let's just sed it
           sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' default-ios-device-farm-appium-test-spec.yml.j2
 
-          BENCHMARK_CONFIG_ID="${{ matrix.model }}_${{ matrix.config }}"
+          BENCHMARK_CONFIG_ID=$(echo "${{ matrix.model }}_${{ matrix.config }}" | sed -e 's/[^A-Za-z0-9._-]/_/g')
           # The config for this benchmark runs, we save it in the test spec so that it can be fetched
           # later by the upload script
           sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' default-ios-device-farm-appium-test-spec.yml.j2
@@ -124,6 +125,7 @@ jobs:
 
           # Save the benchmark configs so that we can use it later in the dashboard
           echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json"
+          echo "benchmark-config-id=${BENCHMARK_CONFIG_ID}" >> $GITHUB_OUTPUT
 
       - name: Upload the spec
         uses: seemethere/upload-artifact-s3@v5
@@ -143,7 +145,7 @@ jobs:
             ${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/
           retention-days: 1
           if-no-files-found: error
-          path: extension/benchmark/apple/Benchmark/${{ matrix.model }}_${{ matrix.config }}.json
+          path: extension/benchmark/apple/Benchmark/${{ steps.prepare.outputs.benchmark-config-id }}.json
 
   export-models:
     name: export-models
diff --git a/backends/cadence/aot/compiler_utils.py b/backends/cadence/aot/compiler_utils.py
@@ -129,16 +129,16 @@ def get_transposed_dims(node: torch.fx.Node, dims: List[int]) -> List[int]:
 
 
 # Capture the effect of permute op on incoming dimension order
-def get_permuted_dims(node: torch.fx.Node, dims: Optional[List[int]]) -> List[int]:
+def get_permuted_dims(node: torch.fx.Node, dims: Optional[Sequence[int]]) -> List[int]:
     """
     Given a permute node, and the incoming dimension ordering of the input
     tensor to the permute node, return the net effect of permute op on the
     dimension order.
     """
     assert node.target == exir_ops.edge.aten.permute_copy.default
     # Permute each index of the dimension ordering (dims)
-    permute_dims = node.args[1]
-    assert isinstance(permute_dims, List)
+    # pyre-fixme[6]: This combined typecheck isn't supported yet.
+    permute_dims: List[int] = list(node.args[1])
     assert all(isinstance(x, int) for x in permute_dims)
     # If the dims is empty, we can simply return the permute order
     if not dims:
diff --git a/backends/cadence/aot/reorder_ops.py b/backends/cadence/aot/reorder_ops.py
@@ -438,9 +438,9 @@ def postpone_dequantize_op(self, graph_module: torch.fx.GraphModule) -> bool:
                         args=(user, *node.args[1:]),
                     )
                     dequant_node.meta = user.meta.copy()
-                    # Remove meta["debug_handle"] on new node. Reassign it at the
-                    # caller level by calling generate_missing_debug_handles
-                    dequant_node.meta.pop("debug_handle")
+                    # Remove meta["debug_handle"] on new node if it exists.
+                    # Reassign it at the caller level by calling generate_missing_debug_handles
+                    dequant_node.meta.pop("debug_handle", None)
                     user.replace_all_uses_with(dequant_node)
                     dequant_node.args = (user, *node.args[1:])
 
diff --git a/examples/demo-apps/apple_ios/LLaMA/docs/delegates/xnnpack_README.md b/examples/demo-apps/apple_ios/LLaMA/docs/delegates/xnnpack_README.md
@@ -34,7 +34,10 @@ Install dependencies
 ```
 ./install_requirements.sh
 ```
-
+Optional: Use the --pybind flag to install with pybindings.
+```
+./install_requirements.sh --pybind xnnpack
+```
 ## Prepare Models
 In this demo app, we support text-only inference with up-to-date Llama models and image reasoning inference with LLaVA 1.5.
 * You can request and download model weights for Llama through Meta official [website](https://llama.meta.com/).
diff --git a/examples/models/llama/install_requirements.sh b/examples/models/llama/install_requirements.sh
@@ -9,9 +9,6 @@
 # Install sentencepiece for llama tokenizer
 pip install snakeviz sentencepiece
 
-# Install torchao.
-pip install "$(dirname "$0")/../../../third-party/ao"
-
 # Install lm-eval for Model Evaluation with lm-evalution-harness
 # Install tiktoken for tokenizer
 pip install lm_eval==0.4.5
diff --git a/examples/models/llama3_2_vision/install_requirements.sh b/examples/models/llama3_2_vision/install_requirements.sh
@@ -9,6 +9,3 @@ NIGHTLY_VERSION="dev20241112"
 
 # Install torchtune nightly for model definitions.
 pip install --pre torchtune==0.4.0.${NIGHTLY_VERSION} --extra-index-url https://download.pytorch.org/whl/nightly/cpu --no-cache-dir
-
-# Install torchao.
-pip install "$(dirname "$0")/../../../third-party/ao"
diff --git a/examples/models/phi-3-mini-lora/install_requirements.sh b/examples/models/phi-3-mini-lora/install_requirements.sh
@@ -8,6 +8,3 @@
 pip install torchvision
 pip install torchtune
 pip install tiktoken
-
-# Install torchao.
-pip install "$(dirname "$0")/../../../third-party/ao"
diff --git a/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2 b/extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2
@@ -18,14 +18,17 @@ phases:
       # Copy the model to sdcard. This prints too much progress info when the files
       # are large, so it's better to just silent them
       - adb -s $DEVICEFARM_DEVICE_UDID push *.bin /sdcard > /dev/null && echo OK
+      - adb -s $DEVICEFARM_DEVICE_UDID push *.model /sdcard > /dev/null && echo OK
       - adb -s $DEVICEFARM_DEVICE_UDID push *.pte /sdcard > /dev/null && echo OK
 
       # Prepare the model and the tokenizer
       - adb -s $DEVICEFARM_DEVICE_UDID shell "ls -la /sdcard/"
       - adb -s $DEVICEFARM_DEVICE_UDID shell "mkdir -p /data/local/tmp/minibench/"
       - adb -s $DEVICEFARM_DEVICE_UDID shell "mv /sdcard/*.bin /data/local/tmp/minibench/"
+      - adb -s $DEVICEFARM_DEVICE_UDID shell "mv /sdcard/*.model /data/local/tmp/minibench/"
       - adb -s $DEVICEFARM_DEVICE_UDID shell "mv /sdcard/*.pte /data/local/tmp/minibench/"
       - adb -s $DEVICEFARM_DEVICE_UDID shell "chmod 664 /data/local/tmp/minibench/*.bin"
+      - adb -s $DEVICEFARM_DEVICE_UDID shell "chmod 664 /data/local/tmp/minibench/*.model"
       - adb -s $DEVICEFARM_DEVICE_UDID shell "chmod 664 /data/local/tmp/minibench/*.pte"
       - adb -s $DEVICEFARM_DEVICE_UDID shell "ls -la /data/local/tmp/minibench/"
       - adb -s $DEVICEFARM_DEVICE_UDID shell "run-as org.pytorch.minibench rm -rf files"
@@ -86,21 +89,33 @@ phases:
       - |
         BIN_FOUND="$(adb -s $DEVICEFARM_DEVICE_UDID shell find /data/local/tmp/minibench/ -name '*.bin')"
         if [ -z "$BIN_FOUND" ]; then
-          echo "No tokenizer files found in /data/local/tmp/minibench/"
+          echo "No *.bin tokenizer files found in /data/local/tmp/minibench/"
         else
-          echo "tokenizer files found in /data/local/tmp/minibench/"
+          echo "*.bin tokenizer files found in /data/local/tmp/minibench/"
+        fi
+
+        MODEL_FOUND="$(adb -s $DEVICEFARM_DEVICE_UDID shell find /data/local/tmp/minibench/ -name '*.model')"
+        if [ -z "$MODEL_FOUND" ]; then
+          echo "No *.model tokenizer files found in /data/local/tmp/minibench/"
+        else
+          echo "*.model tokenizer files found in /data/local/tmp/minibench/"
         fi
 
       - echo "Run benchmark"
       - |
         adb -s $DEVICEFARM_DEVICE_UDID shell am force-stop org.pytorch.minibench
-        if [ -z "$BIN_FOUND" ]; then
-          adb -s $DEVICEFARM_DEVICE_UDID shell am start -W -n org.pytorch.minibench/.BenchmarkActivity \
-            --es "model_dir" "/data/local/tmp/minibench"
-        else
+
+        if [ -n "$BIN_FOUND" ]; then
           adb -s $DEVICEFARM_DEVICE_UDID shell am start -W -n org.pytorch.minibench/.LlmBenchmarkActivity \
             --es "model_dir" "/data/local/tmp/minibench" \
             --es "tokenizer_path" "/data/local/tmp/minibench/tokenizer.bin"
+        elif [ -n "$MODEL_FOUND" ]; then
+          adb -s $DEVICEFARM_DEVICE_UDID shell am start -W -n org.pytorch.minibench/.LlmBenchmarkActivity \
+            --es "model_dir" "/data/local/tmp/minibench" \
+            --es "tokenizer_path" "/data/local/tmp/minibench/tokenizer.model"
+        else
+          adb -s $DEVICEFARM_DEVICE_UDID shell am start -W -n org.pytorch.minibench/.BenchmarkActivity \
+            --es "model_dir" "/data/local/tmp/minibench"
         fi
 
 
diff --git a/install_requirements.py b/install_requirements.py
@@ -170,6 +170,23 @@ def python_is_compatible():
     check=True,
 )
 
+LOCAL_REQUIREMENTS = [
+    "third-party/ao",  # We need the latest kernels for fast iteration, so not relying on pypi.
+]
+
+# Install packages directly from local copy instead of pypi.
+# This is usually not recommended.
+subprocess.run(
+    [
+        sys.executable,
+        "-m",
+        "pip",
+        "install",
+        *LOCAL_REQUIREMENTS,
+    ],
+    check=True,
+)
+
 #
 # Install executorch pip package. This also makes `flatc` available on the path.
 # The --extra-index-url may be necessary if pyproject.toml has a dependency on a