pytorch
diff --git a/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/requirements-ci.txt‎
Lines changed: 2 additions & 2 deletions b/‎.ci/docker/requirements-ci.txt‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.ci/scripts/setup-windows-msvc.ps1‎
Lines changed: 52 additions & 0 deletions b/‎.ci/scripts/setup-windows-msvc.ps1‎
Lines changed: 52 additions & 0 deletions
diff --git a/‎.ci/scripts/test-cuda-build.sh‎
Lines changed: 0 additions & 3 deletions b/‎.ci/scripts/test-cuda-build.sh‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎.ci/scripts/test_llama_lora.sh‎
Lines changed: 46 additions & 4 deletions b/‎.ci/scripts/test_llama_lora.sh‎
Lines changed: 46 additions & 4 deletions
diff --git a/‎.ci/scripts/test_phi_3_mini.sh‎
Lines changed: 11 additions & 12 deletions b/‎.ci/scripts/test_phi_3_mini.sh‎
Lines changed: 11 additions & 12 deletions
diff --git a/‎.ci/scripts/test_qnn_static_llama.sh‎
Lines changed: 0 additions & 69 deletions b/‎.ci/scripts/test_qnn_static_llama.sh‎
Lines changed: 0 additions & 69 deletions
diff --git a/‎.ci/scripts/test_qnn_static_llm.sh‎
Lines changed: 94 additions & 0 deletions b/‎.ci/scripts/test_qnn_static_llm.sh‎
Lines changed: 94 additions & 0 deletions
@@ -1 +1 @@
-44d8d54e38c0258357d4e92e1fefe21e845947a3
+467660923a5a25e4718e1d6697b93ff1bab4e807
@@ -1 +1 @@
-53a2908a10f414a2f85caa06703a26a40e873869
+e6f766c7d750d40603eee3f66c5915bac606b3ea
@@ -1,12 +1,12 @@
 mpmath==1.3.0
 numpy>=2.0.0; python_version >= '3.10'
 PyYAML==6.0.1
-ruamel.yaml==0.17.32
+ruamel.yaml==0.18.15
 sympy==1.12
 timm==0.6.13
 tomli==2.0.1
 torchsr==1.0.4
-transformers==4.47.1
+transformers==4.56.1
 zstd==1.5.5.1
 pandas>=2.2.2; python_version >= '3.10'
 pytest==7.2.0
 
@@ -0,0 +1,52 @@
+conda create --yes --quiet -n et python=3.12
+conda activate et
+
+# Install cmake
+conda install -y cmake
+
+# Activate the VS environment - this is required for MSVC to work
+# There are a bunch of environment variables that it requires.
+# See https://learn.microsoft.com/en-us/cpp/build/building-on-the-command-line.
+& "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\Common7\Tools\Launch-VsDevShell.ps1" -Arch amd64
+
+# Install CI requirements
+pip install -r .ci/docker/requirements-ci.txt
+
+# Create build directory
+$buildDir = "cmake-out-msvc"
+if (Test-Path -Path $buildDir) {
+    Remove-Item -Path $buildDir -Recurse -Force
+}
+New-Item -Path $buildDir -ItemType Directory
+
+# Configure CMake with MSVC (not ClangCL) and disable custom/quantized ops
+cmake -S . -B $buildDir `
+    -DCMAKE_BUILD_TYPE=Release `
+    -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON `
+    -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON `
+    -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON `
+    -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON `
+    -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON `
+    -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON `
+    -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON `
+    -DEXECUTORCH_BUILD_KERNELS_CUSTOM=OFF `
+    -DEXECUTORCH_BUILD_KERNELS_CUSTOM_AOT=OFF `
+    -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=OFF `
+    -DEXECUTORCH_BUILD_XNNPACK=ON `
+    -DEXECUTORCH_BUILD_EXTENSION_LLM=ON `
+    -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON
+
+if ($LASTEXITCODE -ne 0) {
+    Write-Host "CMake configuration failed. Exit code: $LASTEXITCODE."
+    exit $LASTEXITCODE
+}
+
+# Build with MSVC
+cmake --build $buildDir --config Release -j16
+
+if ($LASTEXITCODE -ne 0) {
+    Write-Host "Build failed. Exit code: $LASTEXITCODE."
+    exit $LASTEXITCODE
+}
+
+Write-Host "MSVC build completed successfully!"
@@ -27,9 +27,6 @@ test_executorch_cuda_build() {
     nvcc --version || echo "nvcc not found"
     nvidia-smi || echo "nvidia-smi not found"
 
-    # Set CMAKE_ARGS to enable CUDA build - ExecuTorch will handle PyTorch installation automatically
-    export CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON"
-
     echo "=== Starting ExecuTorch Installation ==="
     # Install ExecuTorch with CUDA support with timeout and error handling
     timeout 5400 ./install_executorch.sh || {
 
@@ -55,7 +55,7 @@ cmake_build_llama_runner
 # Constants.
 RUNTIME_ARGS="--tokenizer_path=${DOWNLOADED_PATH}/tokenizer.model --temperature=0 --seq_len=20 --warmup=1"
 PROMPT="What happens if you eat watermelon seeds?"
-EXPECTED_PREFIX="What happens if you eat watermelon seeds? Watermelon seeds are a good source of vitamin C,"
+EXPECTED_PREFIX="What happens if you eat watermelon seeds? Watermelon seeds are a good source of vitamin C and"
 
 # Export LoRA PTE file.
 MODEL_NAME="llama_3_2_1B_lora"
@@ -94,7 +94,7 @@ else
   exit 1
 fi
 
-# Export LoRA PTE, PTD file.
+# Export LoRA PTE, foundation PTD file.
 MODEL_SEPARATE="${MODEL_NAME}_separate"
 $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
     base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
@@ -114,20 +114,62 @@ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
 NOW=$(date +"%H:%M:%S")
 echo "Starting to run llama runner at ${NOW}"
 # shellcheck source=/dev/null
-cmake-out/examples/models/llama/llama_main --model_path=${MODEL_SEPARATE}.pte --data_path=${MODEL_SEPARATE}.ptd --prompt="${PROMPT}" ${RUNTIME_ARGS} > result2.txt
+cmake-out/examples/models/llama/llama_main --model_path=${MODEL_SEPARATE}.pte --data_paths=${MODEL_SEPARATE}.ptd --prompt="${PROMPT}" ${RUNTIME_ARGS} > result2.txt
 NOW=$(date +"%H:%M:%S")
 echo "Finished at ${NOW}"
 
 RESULT2=$(cat result2.txt)
 if [[ "${RESULT2}" == "${EXPECTED_PREFIX}"* ]]; then
   echo "Expected result prefix: ${EXPECTED_PREFIX}"
   echo "Actual result: ${RESULT2}"
+  # Do not clean up files if test passes, as they're re-used in the next test.
   echo "Success"
-  cleanup_files
 else
   echo "Expected result prefix: ${EXPECTED_PREFIX}"
   echo "Actual result: ${RESULT2}"
   echo "Failure; results not the same"
   cleanup_files
   exit 1
 fi
+
+# Export LoRA PTE, LoRA PTD, foundation PTD file.
+MODEL_PROGRAM_ONLY="${MODEL_NAME}_program"
+MODEL_LORA_WEIGHTS="lora_weights"
+MODEL_FOUNDATION_WEIGHTS="foundation_weights"
+$PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
+    base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
+    base.params="${DOWNLOADED_PATH}/params.json" \
+    base.adapter_checkpoint="${DOWNLOADED_PATH}/adapter_model.pt" \
+    base.adapter_config="${DOWNLOADED_PATH}/adapter_config.json" \
+    base.tokenizer_path="${DOWNLOADED_PATH}/tokenizer.model" \
+    model.use_kv_cache=true \
+    model.use_sdpa_with_kv_cache=true \
+    model.dtype_override="fp32" \
+    backend.xnnpack.enabled=true \
+    backend.xnnpack.extended_ops=true \
+    export.output_name="${MODEL_PROGRAM_ONLY}.pte" \
+    export.foundation_weights_file="${MODEL_FOUNDATION_WEIGHTS}.ptd" \
+    export.lora_weights_file="${MODEL_LORA_WEIGHTS}.ptd"
+
+# Run llama runner.
+NOW=$(date +"%H:%M:%S")
+echo "Starting to run llama runner at ${NOW}"
+# shellcheck source=/dev/null
+cmake-out/examples/models/llama/llama_main --model_path=${MODEL_PROGRAM_ONLY}.pte --data_paths="${MODEL_FOUNDATION_WEIGHTS}.ptd,${MODEL_LORA_WEIGHTS}.ptd" --prompt="${PROMPT}" ${RUNTIME_ARGS} > result3.txt
+NOW=$(date +"%H:%M:%S")
+echo "Finished at ${NOW}"
+
+RESULT3=$(cat result3.txt)
+if [[ "${RESULT3}" == "${EXPECTED_PREFIX}"* ]]; then
+  echo "Expected result prefix: ${EXPECTED_PREFIX}"
+  echo "Actual result: ${RESULT3}"
+  echo "Success"
+else
+  echo "Expected result prefix: ${EXPECTED_PREFIX}"
+  echo "Actual result: ${RESULT3}"
+  echo "Failure; results not the same"
+  cleanup_files
+  exit 1
+fi
+
+cleanup_files
@@ -36,34 +36,33 @@ cmake_build_phi_3_mini() {
   cmake --build ${BUILD_DIR}/${MODEL_DIR} -j${NPROC} --config ${BUILD_TYPE}
 }
 
-# Download and convert tokenizer.model
+# Download tokenizer.model
 prepare_tokenizer() {
-  echo "Downloading and converting tokenizer.model"
-  wget -O tokenizer.model "https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/resolve/main/tokenizer.model?download=true"
-  $PYTHON_EXECUTABLE -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin
+  echo "Downloading tokenizer.model"
+  wget -O tokenizer.model "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/tokenizer.model?download=true"
 }
 
 # Export phi-3-mini model to pte
 export_phi_3_mini () {
   echo "Exporting phi-3-mini. This will take a few minutes"
-  $PYTHON_EXECUTABLE -m executorch.examples.models.phi-3-mini.export_phi-3-mini -c "4k" -s 128 -o phi-3-mini.pte
+  optimum-cli export executorch --model microsoft/Phi-3-mini-4k-instruct --task text-generation --recipe xnnpack --output_dir ./
 }
 
 run_and_verify() {
     NOW=$(date +"%H:%M:%S")
     echo "Starting to run phi-3-mini runner at ${NOW}"
-    if [[ ! -f "phi-3-mini.pte" ]]; then
-        echo "Export failed. Abort"
+    if [[ ! -f "model.pte" ]]; then
+        echo "Missing model artifact. Abort"
         exit 1
     fi
-    if [[ ! -f "tokenizer.bin" ]]; then
-        echo "tokenizer.bin is missing."
+    if [[ ! -f "tokenizer.model" ]]; then
+        echo "tokenizer.model is missing."
         exit 1
     fi
 
     ${BUILD_DIR}/${MODEL_DIR}/phi_3_mini_runner \
-    --model_path=phi-3-mini.pte \
-    --tokenizer_path=tokenizer.bin \
+    --model_path=model.pte \
+    --tokenizer_path=tokenizer.model \
     --seq_len=60 \
     --temperature=0 \
     --prompt="<|system|>
@@ -92,7 +91,7 @@ What is the capital of France?<|end|>
 cmake_install_executorch_libraries
 cmake_build_phi_3_mini
 
-# Step 2. Export the tokenizer and model
+# Step 2. Export the model
 prepare_tokenizer
 export_phi_3_mini
 
 
@@ -0,0 +1,94 @@
+#!/bin/bash
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -euxo pipefail
+
+source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
+
+TASK_NAME=$1
+if [[ -z "${TASK_NAME:-}" ]]; then
+  echo "Missing task name, exiting..."
+  exit 1
+fi
+
+
+# Download QNN_SDK. If already downloaded, export environment path
+source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_qnn_sdk.sh"
+install_qnn
+
+export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
+export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang"
+export PYTHONPATH=".."
+cp schema/program.fbs exir/_serialize/program.fbs
+cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs
+cp -f build-x86/backends/qualcomm/PyQnnManagerAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
+cp -f build-x86/backends/qualcomm/PyQnnWrapperAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
+
+if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
+  PYTHON_EXECUTABLE=python3
+fi
+
+which "${PYTHON_EXECUTABLE}"
+
+# Although static llama CI does not require graphviz, it is required by test_qnn_delegate.py
+pip install graphviz
+
+set +e
+
+echo "Executing task: $TASK_NAME"
+if [[ "${TASK_NAME}" == "stories_110m" ]]; then
+    # Download stories llama110m artifacts
+    download_stories_model_artifacts
+    echo "Creating tokenizer.bin"
+    $PYTHON_EXECUTABLE -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin
+
+    # Compile only as weight sharing is not applicable on x86.
+    $PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-android/ --executorch_root . --artifact_dir ./stories_110m_pte_size --llama_artifacts . --compile_only
+    exit_code1=$?
+
+    # Checks accuracy with weight sharing disabled since x86 does not support weight sharing.
+    $PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-x86/ --executorch_root . --artifact_dir ./stories_110m_accuracy --llama_artifacts . --enable_x86_64
+    exit_code2=$?
+
+    # Check the exit codes and print messages
+    if [ $exit_code1 -ne 0 ]; then
+        echo "Static Llama compile only with weight sharing test failed. $exit_code1."
+    fi
+
+    if [ $exit_code2 -ne 0 ]; then
+        echo "Static Llama accuracy test failed. $exit_code2."
+    fi
+
+    if [ $exit_code1 -ne 0 ] || [ $exit_code2 -ne 0 ]; then
+        exit 1
+    else
+        exit 0
+    fi
+
+elif [[ "${TASK_NAME}" == "stories_260k_bc" ]]; then
+
+    # Check BC
+    bash backends/qualcomm/bc/test_qnn_static_llama_bc.sh
+    exit_code1=$?
+    if [ $exit_code1 -ne 0 ]; then
+        exit 1
+    else
+        exit 0
+    fi
+
+elif [[ "${TASK_NAME}" == "smollm2_135m" ]]; then
+    $PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_static_smollm2 --model SM8650 --build_folder build-x86/ --executorch_root . --artifact_dir ./static_smollm2 --enable_x86_64
+    exit_code1=$?
+    if [ $exit_code1 -ne 0 ]; then
+        exit 1
+    else
+        exit 0
+    fi
+else
+    echo "Unsupported task: $TASK_NAME"
+    exit 1
+fi
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-44d8d54e38c0258357d4e92e1fefe21e845947a3`
	`1`	`+467660923a5a25e4718e1d6697b93ff1bab4e807`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-53a2908a10f414a2f85caa06703a26a40e873869`
	`1`	`+e6f766c7d750d40603eee3f66c5915bac606b3ea`