pytorch
diff --git a/‎.ci/docker/requirements-ci.txt‎
Lines changed: 12 additions & 9 deletions b/‎.ci/docker/requirements-ci.txt‎
Lines changed: 12 additions & 9 deletions
diff --git a/‎.ci/scripts/build-qnn-sdk.sh‎
Lines changed: 1 addition & 0 deletions b/‎.ci/scripts/build-qnn-sdk.sh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/scripts/setup-openvino.sh‎
Lines changed: 9 additions & 11 deletions b/‎.ci/scripts/setup-openvino.sh‎
Lines changed: 9 additions & 11 deletions
diff --git a/‎.ci/scripts/test_backend_linux.sh‎ ‎.ci/scripts/test_backend.sh‎.ci/scripts/test_backend_linux.sh renamed to .ci/scripts/test_backend.sh
Lines changed: 28 additions & 7 deletions b/‎.ci/scripts/test_backend_linux.sh‎ ‎.ci/scripts/test_backend.sh‎.ci/scripts/test_backend_linux.sh renamed to .ci/scripts/test_backend.sh
Lines changed: 28 additions & 7 deletions
diff --git a/‎.ci/scripts/test_backend_macos.sh‎
Lines changed: 0 additions & 30 deletions b/‎.ci/scripts/test_backend_macos.sh‎
Lines changed: 0 additions & 30 deletions
diff --git a/‎.ci/scripts/test_llama.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/test_llama.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/test_llama_torchao_lowbit.sh‎
Lines changed: 1 addition & 0 deletions b/‎.ci/scripts/test_llama_torchao_lowbit.sh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/scripts/test_model.sh‎
Lines changed: 28 additions & 4 deletions b/‎.ci/scripts/test_model.sh‎
Lines changed: 28 additions & 4 deletions
diff --git a/‎.ci/scripts/test_openvino.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/test_openvino.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/test_torchao_huggingface_checkpoints.sh‎
Lines changed: 25 additions & 7 deletions b/‎.ci/scripts/test_torchao_huggingface_checkpoints.sh‎
Lines changed: 25 additions & 7 deletions
@@ -16,18 +16,21 @@ hypothesis==6.84.2
 parameterized==0.9.0
 
 # Doc build requirements, same as https://github.com/pytorch/pytorch/blob/main/.ci/docker/requirements-docs.txt
-sphinx==5.3.0
+sphinx==7.2.6
+sphinxcontrib.katex==0.9.10
+breathe==4.36.0  # only if generating C++
+exhale==0.3.7  # only if generating C++ docs
+docutils==0.18.1,<0.21
+sphinx-design==0.6.1
+sphinxcontrib-mermaid==1.0.0
+myst-parser==3.0.1  # if want to contribute in markdown
+sphinx-gallery==0.14.0  # only if hosting interactive tutorials
+sphinx-sitemap==2.7.1
 sphinx-reredirects==0.1.4
-sphinx-gallery==0.14.0
-breathe==4.34.0
-exhale==0.2.3
-docutils==0.16
 matplotlib>=3.9.4
+sphinx-copybutton==0.5.2
 # PyTorch Theme
--e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
-myst-parser==0.18.1
-sphinx_design==0.4.1
-sphinx-copybutton==0.5.0
+-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@pytorch_sphinx_theme2#egg=pytorch_sphinx_theme2
 
 # script unit test requirements
 yaspin==3.1.0
@@ -38,6 +38,7 @@ set_up_aot() {
       -DEXECUTORCH_BUILD_EXTENSION_EXTENSION_LLM=ON \
       -DEXECUTORCH_BUILD_EXTENSION_EXTENSION_LLM_RUNNER=ON \
       -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
+      -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \
       -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
       -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
       -DPYTHON_EXECUTABLE=python3
 
@@ -10,19 +10,17 @@ set -ex
 # shellcheck source=/dev/null
 source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
 
-git clone https://github.com/openvinotoolkit/openvino.git
-cd openvino && git checkout releases/2025/1
-git submodule update --init --recursive
-sudo ./install_build_dependencies.sh
-mkdir build && cd build
-cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_PYTHON=ON
-make -j$(nproc)
+# Download and install OpenVINO from release packages
+OPENVINO_VERSION="2025.3"
+OPENVINO_BUILD="2025.3.0.19807.44526285f24"
+OPENVINO_URL="https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION}/linux/openvino_toolkit_ubuntu22_${OPENVINO_BUILD}_x86_64.tgz"
 
-cd ..
-cmake --install build --prefix dist
+curl -Lo /tmp/openvino_toolkit.tgz --retry 3 --fail ${OPENVINO_URL}
+tar -xzf /tmp/openvino_toolkit.tgz
+mv openvino_toolkit_ubuntu22_${OPENVINO_BUILD}_x86_64 openvino
 
-source dist/setupvars.sh
-cd ../backends/openvino
+source openvino/setupvars.sh
+cd backends/openvino
 pip install -r requirements.txt
 cd scripts
 ./openvino_build.sh --enable_python
@@ -1,6 +1,7 @@
 #!/usr/bin/env bash
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
+# Copyright 2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -10,16 +11,26 @@ SUITE=$1
 FLOW=$2
 ARTIFACT_DIR=$3
 
-REPORT_FILE="$ARTIFACT_DIR/test-report-$FLOW-$SUITE.csv"
+REPORT_FILE="$ARTIFACT_DIR/test-report-$FLOW-$SUITE.json"
 
 echo "Running backend test job for suite $SUITE, flow $FLOW."
 echo "Saving job artifacts to $ARTIFACT_DIR."
 
-# The generic Linux job chooses to use base env, not the one setup by the image
 eval "$(conda shell.bash hook)"
 CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
 conda activate "${CONDA_ENV}"
 
+if [[ "$(uname)" == "Darwin" ]]; then
+    bash .ci/scripts/setup-conda.sh
+    eval "$(conda shell.bash hook)"
+    CONDA_RUN_CMD="${CONDA_RUN} --no-capture-output"
+    ${CONDA_RUN_CMD} pip install awscli==1.37.21
+    IS_MACOS=1
+else
+    CONDA_RUN_CMD=""
+    IS_MACOS=0
+fi
+
 export PYTHON_EXECUTABLE=python
 
 # CMake options to use, in addition to the defaults.
@@ -48,13 +59,23 @@ fi
 if [[ "$FLOW" == *arm* ]]; then
     # Setup ARM deps.
     .ci/scripts/setup-arm-baremetal-tools.sh
+    source examples/arm/ethos-u-scratch/setup_path.sh
+
+    if [[ "$FLOW" == *ethos_u* ]]; then
+        # Prepare a test runner binary that can run on the Corstone-3x0 FVPs
+        backends/arm/scripts/build_executorch.sh
+        backends/arm/test/setup_testing.sh
+    fi
 fi
 
-# We need the runner to test the built library.
-PYTHON_EXECUTABLE=python CMAKE_ARGS="$EXTRA_BUILD_ARGS" .ci/scripts/setup-linux.sh --build-tool cmake --build-mode Release --editable true
+if [[ $IS_MACOS -eq 1 ]]; then
+    SETUP_SCRIPT=.ci/scripts/setup-macos.sh
+else
+    SETUP_SCRIPT=.ci/scripts/setup-linux.sh
+fi
+CMAKE_ARGS="$EXTRA_BUILD_ARGS" ${CONDA_RUN_CMD} $SETUP_SCRIPT --build-tool cmake --build-mode Release --editable true
 
 EXIT_CODE=0
-python -m executorch.backends.test.suite.runner $SUITE --flow $FLOW --report "$REPORT_FILE" || EXIT_CODE=$?
-
+${CONDA_RUN_CMD} pytest -c /dev/nul -n auto backends/test/suite/$SUITE/ -m flow_$FLOW --json-report --json-report-file="$REPORT_FILE" || EXIT_CODE=$?
 # Generate markdown summary.
-python -m executorch.backends.test.suite.generate_markdown_summary "$REPORT_FILE" > ${GITHUB_STEP_SUMMARY:-"step_summary.md"} --exit-code $EXIT_CODE
+${CONDA_RUN_CMD} python -m executorch.backends.test.suite.generate_markdown_summary_json "$REPORT_FILE" > ${GITHUB_STEP_SUMMARY:-"step_summary.md"} --exit-code $EXIT_CODE
@@ -237,7 +237,7 @@ if [[ "${CUSTOM}" == "ON" ]]; then
   EXPORT_ARGS="${EXPORT_ARGS} model.use_sdpa_with_kv_cache=true"
 fi
 if [[ "${QE}" == "ON" ]]; then
-  EXPORT_ARGS="${EXPORT_ARGS} quantization.embedding_quantize=\"8,1024\""
+  EXPORT_ARGS="${EXPORT_ARGS} quantization.embedding_quantize=\"8,768\""
 fi
 if [[ "${MPS}" == "ON" ]]; then
   EXPORT_ARGS="${EXPORT_ARGS} backend.mps.enabled=true model.enable_dynamic_shape=false debug.verbose=true"
 
@@ -31,6 +31,7 @@ cmake -DPYTHON_EXECUTABLE=python \
     -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
     -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
     -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \
     -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
     -DEXECUTORCH_BUILD_XNNPACK=OFF \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
 
@@ -48,22 +48,33 @@ prepare_artifacts_upload() {
   fi
 }
 
+
 build_cmake_executor_runner() {
   local backend_string_select="${1:-}"
   echo "Building executor_runner"
   rm -rf ${CMAKE_OUTPUT_DIR}
   mkdir ${CMAKE_OUTPUT_DIR}
+  # Common options:
+  COMMON="-DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE"
   if [[ "$backend_string_select" == "XNNPACK" ]]; then
     echo "Backend $backend_string_select selected"
-    (cd ${CMAKE_OUTPUT_DIR} \
-      && cmake -DCMAKE_BUILD_TYPE=Release \
+    cmake -DCMAKE_BUILD_TYPE=Release \
         -DEXECUTORCH_BUILD_XNNPACK=ON \
-        -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..)
+        ${COMMON} \
+        -B${CMAKE_OUTPUT_DIR} .
+    cmake --build ${CMAKE_OUTPUT_DIR} -j4
+  elif [[ "$backend_string_select" == "CUDA" ]]; then
+    echo "Backend $backend_string_select selected"
+    cmake -DCMAKE_BUILD_TYPE=Release \
+        -DEXECUTORCH_BUILD_CUDA=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
+        ${COMMON} \
+        -B${CMAKE_OUTPUT_DIR} .
     cmake --build ${CMAKE_OUTPUT_DIR} -j4
   else
     cmake -DCMAKE_BUILD_TYPE=Debug \
         -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-        -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
+        ${COMMON} \
         -B${CMAKE_OUTPUT_DIR} .
     cmake --build ${CMAKE_OUTPUT_DIR} -j4 --config Debug
   fi
@@ -320,6 +331,13 @@ test_model_with_mediatek() {
   EXPORTED_MODEL=$(find "./${EXPORT_SCRIPT}" -type f -name "*.pte" -print -quit)
 }
 
+test_model_with_cuda() {
+  # Export a basic .pte and .ptd, then run the model.
+  "${PYTHON_EXECUTABLE}" -m examples.cuda.scripts.export --model_name="${MODEL_NAME}" --output_dir "./"
+  build_cmake_executor_runner "CUDA"
+  ./${CMAKE_OUTPUT_DIR}/executor_runner --model_path "./${MODEL_NAME}.pte" --data_path "./aoti_cuda_blob.ptd"
+}
+
 
 if [[ "${BACKEND}" == "portable" ]]; then
   echo "Testing ${MODEL_NAME} with portable kernels..."
@@ -372,6 +390,12 @@ elif [[ "${BACKEND}" == "mediatek" ]]; then
   if [[ $? -eq 0 ]]; then
     prepare_artifacts_upload
   fi
+elif [[ "${BACKEND}" == "cuda" ]]; then
+  echo "Testing ${MODEL_NAME} with cuda..."
+  test_model_with_cuda
+  if [[ $? -eq 0 ]]; then
+    prepare_artifacts_upload
+  fi
 else
   set +e
   if [[ "${BACKEND}" == *"quantization"* ]]; then
 
@@ -10,7 +10,7 @@ set -ex
 # shellcheck source=/dev/null
 source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
 
-source openvino/dist/setupvars.sh
+source openvino/setupvars.sh
 cd backends/openvino/tests
 python test_runner.py --test_type ops
 python test_runner.py --test_type models
@@ -5,6 +5,7 @@ set -euxo pipefail
 # Args / flags
 # -------------------------
 TEST_WITH_RUNNER=0
+USE_TORCHAO_KERNELS=0
 MODEL_NAME=""
 
 # Parse args
@@ -22,10 +23,14 @@ while [[ $# -gt 0 ]]; do
     --test_with_runner)
       TEST_WITH_RUNNER=1
       ;;
+    --use_torchao_kernels)
+      USE_TORCHAO_KERNELS=1
+      ;;
     -h|--help)
-      echo "Usage: $0 <model_name> [--test_with_runner]"
+      echo "Usage: $0 <model_name> [--test_with_runner] [--use_torchao_kernels]"
       echo "  model_name: qwen3_4b | phi_4_mini"
       echo "  --test_with_runner: build ET + run llama_main to sanity-check the export"
+      echo "  --use_torchao_kernels: use torchao kernels for linear and tied embedding"
       exit 0
       ;;
     *)
@@ -42,6 +47,13 @@ fi
 
 MODEL_OUT=model.pte
 
+
+# Default to XNNPACK
+BACKEND_ARGS="-X --xnnpack-extended-ops"
+if [[ "$USE_TORCHAO_KERNELS" -eq 1 ]]; then
+  BACKEND_ARGS="--use-torchao-kernels"
+fi
+
 case "$MODEL_NAME" in
   qwen3_4b)
     echo "Running Qwen3-4B export..."
@@ -58,12 +70,12 @@ case "$MODEL_NAME" in
       --output_name $MODEL_OUT \
       -kv \
       --use_sdpa_with_kv_cache \
-      -X \
-      --xnnpack-extended-ops \
       --max_context_length 1024 \
       --max_seq_length 1024 \
+      --metadata '{"get_bos_id":199999, "get_eos_ids":[200020,199999]}' \
+      --verbose \
       --dtype fp32 \
-      --metadata '{"get_bos_id":199999, "get_eos_ids":[200020,199999]}'
+      ${BACKEND_ARGS}
     ;;
 
   phi_4_mini)
@@ -81,12 +93,12 @@ case "$MODEL_NAME" in
       --output_name $MODEL_OUT \
       -kv \
       --use_sdpa_with_kv_cache \
-      -X \
-      --xnnpack-extended-ops \
       --max_context_length 1024 \
       --max_seq_length 1024 \
+      --metadata '{"get_bos_id":199999, "get_eos_ids":[200020,199999]}' \
+      --verbose \
       --dtype fp32 \
-      --metadata '{"get_bos_id":199999, "get_eos_ids":[200020,199999]}'
+      ${BACKEND_ARGS}
     ;;
 
   *)
@@ -104,6 +116,10 @@ if [[ $MODEL_SIZE -gt $EXPECTED_MODEL_SIZE_UPPER_BOUND ]]; then
 fi
 
 # Install ET with CMake
+EXECUTORCH_BUILD_KERNELS_TORCHAO="OFF"
+if [[ "$USE_TORCHAO_KERNELS" -eq 1 ]]; then
+  EXECUTORCH_BUILD_KERNELS_TORCHAO="ON"
+fi
 if [[ "$TEST_WITH_RUNNER" -eq 1 ]]; then
   echo "[runner] Building and testing llama_main ..."
     cmake -DPYTHON_EXECUTABLE=python \
@@ -113,13 +129,15 @@ if [[ "$TEST_WITH_RUNNER" -eq 1 ]]; then
         -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
         -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
         -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \
         -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
         -DEXECUTORCH_BUILD_XNNPACK=ON \
         -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
         -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
         -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \
         -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
         -DEXECUTORCH_BUILD_KERNELS_LLM=ON \
+        -DEXECUTORCH_BUILD_KERNELS_TORCHAO=${EXECUTORCH_BUILD_KERNELS_TORCHAO} \
         -Bcmake-out .
     cmake --build cmake-out -j16 --config Release --target install