pytorch
diff --git a/‎.ci/docker/build.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/build.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/build_llama_android.sh‎
Lines changed: 2 additions & 4 deletions b/‎.ci/scripts/build_llama_android.sh‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎.ci/scripts/test_llama.sh‎
Lines changed: 4 additions & 4 deletions b/‎.ci/scripts/test_llama.sh‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎.ci/scripts/test_llava.sh‎
Lines changed: 0 additions & 2 deletions b/‎.ci/scripts/test_llava.sh‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎.ci/scripts/test_model.sh‎
Lines changed: 3 additions & 3 deletions b/‎.ci/scripts/test_model.sh‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/scripts/extract_benchmark_results.py‎
Lines changed: 48 additions & 26 deletions b/‎.github/scripts/extract_benchmark_results.py‎
Lines changed: 48 additions & 26 deletions
diff --git a/‎.github/scripts/propose_ghstack_orig_pr.py‎
Lines changed: 135 additions & 0 deletions b/‎.github/scripts/propose_ghstack_orig_pr.py‎
Lines changed: 135 additions & 0 deletions
diff --git a/‎.github/workflows/android-perf.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/android-perf.yml‎
Lines changed: 1 addition & 1 deletion
@@ -41,7 +41,7 @@ case "${IMAGE_NAME}" in
     LINTRUNNER=""
     CLANG_VERSION=12
     # From https://developer.android.com/ndk/downloads
-    ANDROID_NDK_VERSION=r26c
+    ANDROID_NDK_VERSION=r27b
     ;;
   *)
     echo "Invalid image name ${IMAGE_NAME}"
 
@@ -19,7 +19,6 @@ install_executorch_and_backend_lib() {
   cmake -DBUCK2="${BUCK2}" \
     -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \
     -DANDROID_ABI="${ANDROID_ABI}" \
-    -DANDROID_PLATFORM=android-23 \
     -DCMAKE_INSTALL_PREFIX=cmake-android-out \
     -DCMAKE_BUILD_TYPE=Release \
     -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
@@ -41,16 +40,15 @@ build_llama_runner() {
     cmake -DBUCK2="${BUCK2}" \
     -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK"/build/cmake/android.toolchain.cmake  \
     -DANDROID_ABI="${ANDROID_ABI}" \
-    -DANDROID_PLATFORM=android-23 \
     -DCMAKE_INSTALL_PREFIX=cmake-android-out \
     -DCMAKE_BUILD_TYPE=Release -DPYTHON_EXECUTABLE=python \
     -DEXECUTORCH_BUILD_XNNPACK=ON \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
     -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-    -Bcmake-android-out/examples/models/llama2 examples/models/llama2
+    -Bcmake-android-out/examples/models/llama examples/models/llama
 
-    cmake --build cmake-android-out/examples/models/llama2 -j4 --config Release
+    cmake --build cmake-android-out/examples/models/llama -j4 --config Release
 }
 install_flatc_from_source
 install_executorch_and_backend_lib
 
@@ -125,7 +125,7 @@ cmake_install_executorch_libraries() {
 
 cmake_build_llama_runner() {
     echo "Building llama runner"
-    dir="examples/models/llama2"
+    dir="examples/models/llama"
     retry cmake \
         -DCMAKE_INSTALL_PREFIX=cmake-out \
         -DCMAKE_BUILD_TYPE=Debug \
@@ -206,7 +206,7 @@ if [[ "${QNN}" == "ON" ]]; then
   EXPORT_ARGS="${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape"
 fi
 # Add dynamically linked library location
-$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama ${EXPORT_ARGS}
+$PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}
 
 # Create tokenizer.bin.
 echo "Creating tokenizer.bin"
@@ -219,15 +219,15 @@ echo "Running ${EXPORTED_MODEL_NAME} in portable mode"
 if [[ "${BUILD_TOOL}" == "buck2" ]]; then
   # Run model.
   # shellcheck source=/dev/null
-  $BUCK run examples/models/llama2:main -- ${RUNTIME_ARGS} > result.txt
+  $BUCK run examples/models/llama:main -- ${RUNTIME_ARGS} > result.txt
 elif [[ "${BUILD_TOOL}" == "cmake" ]]; then
   cmake_install_executorch_libraries
   cmake_build_llama_runner
   # Run llama runner
   NOW=$(date +"%H:%M:%S")
   echo "Starting to run llama runner at ${NOW}"
   # shellcheck source=/dev/null
-  cmake-out/examples/models/llama2/llama_main ${RUNTIME_ARGS} > result.txt
+  cmake-out/examples/models/llama/llama_main ${RUNTIME_ARGS} > result.txt
   NOW=$(date +"%H:%M:%S")
   echo "Finished at ${NOW}"
 else
 
@@ -56,7 +56,6 @@ cmake_install_executorch_libraries_for_android() {
     cmake                                                                       \
         -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
         -DANDROID_ABI=arm64-v8a                                                 \
-        -DANDROID_PLATFORM=android-23                                           \
         ${EXECUTORCH_COMMON_CMAKE_ARGS}                                         \
         -B${BUILD_DIR} .
 
@@ -93,7 +92,6 @@ cmake_build_llava_runner_for_android() {
     cmake                                                                       \
         -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
         -DANDROID_ABI=arm64-v8a                                                 \
-        -DANDROID_PLATFORM=android-23                                           \
         ${LLAVA_COMMON_CMAKE_ARGS}                                              \
         -DCMAKE_PREFIX_PATH="$python_lib"                                       \
         -DLLAVA_RUNNER_NO_TORCH_DUMMY_IMAGE=ON                                  \
 
@@ -75,9 +75,9 @@ run_portable_executor_runner() {
 test_model() {
   if [[ "${MODEL_NAME}" == "llama2" ]]; then
     # Install requirements for export_llama
-    bash examples/models/llama2/install_requirements.sh
-    # Test export_llama script: python3 -m examples.models.llama2.export_llama
-    "${PYTHON_EXECUTABLE}" -m examples.models.llama2.export_llama -c examples/models/llama2/params/demo_rand_params.pth -p examples/models/llama2/params/demo_config.json
+    bash examples/models/llama/install_requirements.sh
+    # Test export_llama script: python3 -m examples.models.llama.export_llama
+    "${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama -c examples/models/llama/params/demo_rand_params.pth -p examples/models/llama/params/demo_config.json
     run_portable_executor_runner
     rm "./${MODEL_NAME}.pte"
   fi
 
@@ -9,7 +9,6 @@
 import logging
 import os
 import re
-import time
 import zipfile
 from argparse import Action, ArgumentParser, Namespace
 from io import BytesIO
@@ -26,12 +25,15 @@
 
 # iOS-related regexes and variables
 IOS_TEST_SPEC_REGEX = re.compile(
-    r"Test Case\s+'-\[(?P<test_class>\w+)\s+(?P<test_name>\w+)\]'\s+measured\s+\[(?P<metric>.+)\]\s+average:\s+(?P<value>[\d\.]+),"
+    r"Test Case\s+'-\[(?P<test_class>\w+)\s+(?P<test_name>[\w\+]+)\]'\s+measured\s+\[(?P<metric>.+)\]\s+average:\s+(?P<value>[\d\.]+),"
 )
 IOS_TEST_NAME_REGEX = re.compile(
-    r"test_(?P<method>forward|load|generate)_(?P<model_name>\w+)_pte.*iOS_(?P<ios_ver>\w+)_iPhone(?P<iphone_ver>\w+)"
+    r"test_(?P<method>forward|load|generate)_(?P<model_name>[\w\+]+)_pte.*iOS_(?P<ios_ver>\w+)_iPhone(?P<iphone_ver>\w+)"
+)
+# The backend name could contain +, i.e. tinyllama_xnnpack+custom+qe_fp32
+IOS_MODEL_NAME_REGEX = re.compile(
+    r"(?P<model>[^_]+)_(?P<backend>[\w\+]+)_(?P<dtype>\w+)"
 )
-IOS_MODEL_NAME_REGEX = re.compile(r"(?P<model>[^_]+)_(?P<backend>\w+)_(?P<dtype>\w+)")
 
 
 class ValidateArtifacts(Action):
@@ -159,19 +161,8 @@ def initialize_ios_metadata(test_name: str) -> Dict[str, any]:
     ios_ver = m.group("ios_ver").replace("_", ".")
     iphone_ver = m.group("iphone_ver").replace("_", ".")
 
-    # NB: This looks brittle, but unless we can return iOS benchmark results in JSON
-    # format by the test, the mapping is needed to match with Android test
-    if method == "load":
-        metric = "model_load_time(ms)"
-    elif method == "forward":
-        metric = (
-            "generate_time(ms)"
-            if "llama" in model_name
-            else "avg_inference_latency(ms)"
-        )
-    elif method == "generate":
-        metric = "token_per_sec"
-
+    # The default backend and quantization dtype if the script couldn't extract
+    # them from the model name
     backend = ""
     quantization = "unknown"
 
@@ -194,8 +185,9 @@ def initialize_ios_metadata(test_name: str) -> Dict[str, any]:
             "availMem": 0,
             "totalMem": 0,
         },
-        "metric": metric,
+        "method": method,
         # These fields will be populated later by extract_ios_metric
+        "metric": "",
         "actualValue": 0,
         "targetValue": 0,
     }
@@ -210,10 +202,38 @@ def extract_ios_metric(
     """
     Map the metric name from iOS xcresult to the benchmark result
     """
-    if metric_name == "Clock Monotonic Time, s":
-        # The benchmark value is in ms
-        benchmark_result["actualValue"] = metric_value * 1000
-    elif metric_name == "Tokens Per Second, t/s":
+    method = benchmark_result.get("method", "")
+    if not method:
+        return benchmark_result
+
+    # NB: This looks brittle, but unless we can return iOS benchmark results in JSON
+    # format by the test, the mapping is needed to match with Android test
+    if method == "load":
+        if metric_name == "Clock Monotonic Time, s":
+            benchmark_result["metric"] = "model_load_time(ms)"
+            benchmark_result["actualValue"] = metric_value * 1000
+
+        elif metric_name == "Memory Peak Physical, kB":
+            # NB: Showing the value in mB is friendlier IMO
+            benchmark_result["metric"] = "peak_load_mem_usage(mb)"
+            benchmark_result["actualValue"] = metric_value / 1024
+
+    elif method == "forward":
+        if metric_name == "Clock Monotonic Time, s":
+            benchmark_result["metric"] = (
+                "generate_time(ms)"
+                if "llama" in test_name
+                else "avg_inference_latency(ms)"
+            )
+            benchmark_result["actualValue"] = metric_value * 1000
+
+        elif metric_name == "Memory Peak Physical, kB":
+            # NB: Showing the value in mB is friendlier IMO
+            benchmark_result["metric"] = "peak_inference_mem_usage(mb)"
+            benchmark_result["actualValue"] = metric_value / 1024
+
+    elif method == "generate" and metric_name == "Tokens Per Second, t/s":
+        benchmark_result["metric"] = "token_per_sec"
         benchmark_result["actualValue"] = metric_value
 
     return benchmark_result
@@ -235,31 +255,33 @@ def extract_ios_benchmark_results(
 
         with request.urlopen(artifact_s3_url) as data:
             current_test_name = ""
+            current_metric_name = ""
             current_record = {}
 
             for line in data.read().decode("utf8").splitlines():
                 s = IOS_TEST_SPEC_REGEX.search(line)
                 if not s:
                     continue
 
-                test_class = s.group("test_class")
                 test_name = s.group("test_name")
                 metric_name = s.group("metric")
                 metric_value = float(s.group("value"))
 
-                if test_name != current_test_name:
-                    if current_record:
+                if test_name != current_test_name or metric_name != current_metric_name:
+                    if current_record and current_record.get("metric", ""):
                         # Save the benchmark result in the same format used by Android
                         benchmark_results.append(current_record.copy())
 
                     current_test_name = test_name
+                    current_metric_name = metric_name
                     current_record = initialize_ios_metadata(current_test_name)
 
                 current_record = extract_ios_metric(
                     current_record, test_name, metric_name, metric_value
                 )
 
-            benchmark_results.append(current_record.copy())
+            if current_record and current_record.get("metric", ""):
+                benchmark_results.append(current_record.copy())
 
         return benchmark_results
 
 
@@ -0,0 +1,135 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import os
+import re
+
+from typing import List
+
+# Provided by the PyGithub pip package.
+from github import Auth, Github
+from github.Repository import Repository
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
+    )
+    parser.add_argument(
+        "--repo",
+        type=str,
+        help='The github repo to modify: e.g. "pytorch/executorch".',
+        required=True,
+    )
+    parser.add_argument(
+        "--pr",
+        type=int,
+        help="Number of the PR in the stack to check and create corresponding PR",
+        required=True,
+    )
+    return parser.parse_args()
+
+
+def extract_stack_from_body(pr_body: str) -> List[int]:
+    """Extracts a list of PR numbers from a ghexport-generated PR body.
+
+    The base of the stack is in index 0.
+    """
+
+    # Expected format. The `__->__` could appear on any line. Stop parsing
+    # after the blank line. This would return [1, 2, 3].
+    """
+    Stack from [ghstack](https://github.com/ezyang/ghstack) (oldest at bottom):
+    * #3
+    * __->__ #2
+    * #1
+
+    <PR description details>
+    """
+
+    prs = []
+    ghstack_begin = (
+        "Stack from [ghstack](https://github.com/ezyang/ghstack) (oldest at bottom):"
+    )
+    ghstack_begin_seen = False
+    for line in pr_body.splitlines():
+        if ghstack_begin in line:
+            ghstack_begin_seen = True
+        if not ghstack_begin_seen:
+            continue
+        match = re.match(r"\*(?:.*?)? #(\d+)", line)
+        if match:
+            # It's a bullet followed by an integer.
+            prs.append(int(match.group(1)))
+    return list(reversed(prs))
+
+
+def get_pr_stack_from_number(pr_number: int, repo: Repository) -> List[int]:
+    pr_stack = extract_stack_from_body(repo.get_pull(pr_number).body)
+
+    if not pr_stack:
+        raise Exception(
+            f"Could not find PR stack in body of #{pr_number}. "
+            + "Please make sure that the PR was created with ghstack."
+        )
+
+    return pr_stack
+
+
+def create_prs_for_orig_branch(pr_stack: List[int], repo: Repository):
+    # For the first PR, we want to merge to `main` branch, and we will update
+    # as we go through the stack
+    orig_branch_merge_base = "main"
+    for i in range(len(pr_stack)):
+        pr = repo.get_pull(pr_stack[i])
+        if not pr.is_merged():
+            print("The PR (and stack above) is not merged yet, skipping")
+            return
+        # Check for invariant: For the current PR, it must be gh/user/x/base <- gh/user/x/head
+        assert pr.base.ref.replace("base", "head") == pr.head.ref
+        # The PR we want to create is then "branch_to_merge" <- gh/user/x/orig
+        # gh/user/x/orig is the clean diff between gh/user/x/base <- gh/user/x/head
+        orig_branch_merge_head = pr.base.ref.replace("base", "orig")
+        bot_metadata = f"""This PR was created by the merge bot to help merge the original PR into the main branch.
+ghstack PR number: https://github.com/pytorch/executorch/pull/{pr.number}
+^ Please use this as the source of truth for the PR details, comments, and reviews
+ghstack PR base: https://github.com/pytorch/executorch/tree/{pr.base.ref}
+ghstack PR head: https://github.com/pytorch/executorch/tree/{pr.head.ref}
+Merge bot PR base: https://github.com/pytorch/executorch/tree/{orig_branch_merge_base}
+Merge bot PR head: https://github.com/pytorch/executorch/tree/{orig_branch_merge_head}"""
+
+        existing_orig_pr = repo.get_pulls(
+            head="pytorch:" + orig_branch_merge_head,
+            base=orig_branch_merge_base,
+            state="open",
+        )
+        if existing_orig_pr.totalCount > 0:
+            print(
+                f"PR for {orig_branch_merge_head} already exists {existing_orig_pr[0]}"
+            )
+            # We don't need to create/edit because the head PR is merged and orig is finalized.
+        else:
+            repo.create_pull(
+                base=orig_branch_merge_base,
+                head=orig_branch_merge_head,
+                title=pr.title,
+                body=bot_metadata,
+            )
+        # Advance the base for the next PR
+        orig_branch_merge_base = orig_branch_merge_head
+
+
+def main():
+    args = parse_args()
+
+    with Github(auth=Auth.Token(os.environ["GITHUB_TOKEN"])) as gh:
+        repo = gh.get_repo(args.repo)
+        create_prs_for_orig_branch(get_pr_stack_from_number(args.pr, repo), repo)
+
+
+if __name__ == "__main__":
+    main()
@@ -160,7 +160,7 @@ jobs:
 
         if [[ ${{ matrix.model }} =~ ^stories* ]]; then
             # Install requirements for export_llama
-            PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
+            PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
             # Test llama2
             if [[ ${{ matrix.delegate }} == "xnnpack" ]]; then
                 DELEGATE_CONFIG="xnnpack+custom+qe"