pytorch
diff --git a/‎.ci/docker/requirements-ci.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/requirements-ci.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/test-cuda-build.sh‎
Lines changed: 0 additions & 3 deletions b/‎.ci/scripts/test-cuda-build.sh‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎.ci/scripts/test_llama_lora.sh‎
Lines changed: 46 additions & 4 deletions b/‎.ci/scripts/test_llama_lora.sh‎
Lines changed: 46 additions & 4 deletions
diff --git a/‎.githooks/pre-commit‎
Lines changed: 13 additions & 4 deletions b/‎.githooks/pre-commit‎
Lines changed: 13 additions & 4 deletions
diff --git a/‎.github/scripts/update_pytorch_pin.py‎
Lines changed: 10 additions & 4 deletions b/‎.github/scripts/update_pytorch_pin.py‎
Lines changed: 10 additions & 4 deletions
diff --git a/‎.github/workflows/cuda.yml‎
Lines changed: 4 additions & 4 deletions b/‎.github/workflows/cuda.yml‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎.github/workflows/lint.yml‎
Lines changed: 22 additions & 13 deletions b/‎.github/workflows/lint.yml‎
Lines changed: 22 additions & 13 deletions
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/pull.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md‎
Lines changed: 7 additions & 3 deletions b/‎README.md‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎backends/aoti/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎backends/aoti/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
@@ -1,7 +1,7 @@
 mpmath==1.3.0
 numpy>=2.0.0; python_version >= '3.10'
 PyYAML==6.0.1
-ruamel.yaml==0.17.32
+ruamel.yaml==0.18.15
 sympy==1.12
 timm==0.6.13
 tomli==2.0.1
 
@@ -27,9 +27,6 @@ test_executorch_cuda_build() {
     nvcc --version || echo "nvcc not found"
     nvidia-smi || echo "nvidia-smi not found"
 
-    # Set CMAKE_ARGS to enable CUDA build - ExecuTorch will handle PyTorch installation automatically
-    export CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON"
-
     echo "=== Starting ExecuTorch Installation ==="
     # Install ExecuTorch with CUDA support with timeout and error handling
     timeout 5400 ./install_executorch.sh || {
 
@@ -55,7 +55,7 @@ cmake_build_llama_runner
 # Constants.
 RUNTIME_ARGS="--tokenizer_path=${DOWNLOADED_PATH}/tokenizer.model --temperature=0 --seq_len=20 --warmup=1"
 PROMPT="What happens if you eat watermelon seeds?"
-EXPECTED_PREFIX="What happens if you eat watermelon seeds? Watermelon seeds are a good source of vitamin C,"
+EXPECTED_PREFIX="What happens if you eat watermelon seeds? Watermelon seeds are a good source of vitamin C and"
 
 # Export LoRA PTE file.
 MODEL_NAME="llama_3_2_1B_lora"
@@ -94,7 +94,7 @@ else
   exit 1
 fi
 
-# Export LoRA PTE, PTD file.
+# Export LoRA PTE, foundation PTD file.
 MODEL_SEPARATE="${MODEL_NAME}_separate"
 $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
     base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
@@ -114,20 +114,62 @@ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
 NOW=$(date +"%H:%M:%S")
 echo "Starting to run llama runner at ${NOW}"
 # shellcheck source=/dev/null
-cmake-out/examples/models/llama/llama_main --model_path=${MODEL_SEPARATE}.pte --data_path=${MODEL_SEPARATE}.ptd --prompt="${PROMPT}" ${RUNTIME_ARGS} > result2.txt
+cmake-out/examples/models/llama/llama_main --model_path=${MODEL_SEPARATE}.pte --data_paths=${MODEL_SEPARATE}.ptd --prompt="${PROMPT}" ${RUNTIME_ARGS} > result2.txt
 NOW=$(date +"%H:%M:%S")
 echo "Finished at ${NOW}"
 
 RESULT2=$(cat result2.txt)
 if [[ "${RESULT2}" == "${EXPECTED_PREFIX}"* ]]; then
   echo "Expected result prefix: ${EXPECTED_PREFIX}"
   echo "Actual result: ${RESULT2}"
+  # Do not clean up files if test passes, as they're re-used in the next test.
   echo "Success"
-  cleanup_files
 else
   echo "Expected result prefix: ${EXPECTED_PREFIX}"
   echo "Actual result: ${RESULT2}"
   echo "Failure; results not the same"
   cleanup_files
   exit 1
 fi
+
+# Export LoRA PTE, LoRA PTD, foundation PTD file.
+MODEL_PROGRAM_ONLY="${MODEL_NAME}_program"
+MODEL_LORA_WEIGHTS="lora_weights"
+MODEL_FOUNDATION_WEIGHTS="foundation_weights"
+$PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
+    base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
+    base.params="${DOWNLOADED_PATH}/params.json" \
+    base.adapter_checkpoint="${DOWNLOADED_PATH}/adapter_model.pt" \
+    base.adapter_config="${DOWNLOADED_PATH}/adapter_config.json" \
+    base.tokenizer_path="${DOWNLOADED_PATH}/tokenizer.model" \
+    model.use_kv_cache=true \
+    model.use_sdpa_with_kv_cache=true \
+    model.dtype_override="fp32" \
+    backend.xnnpack.enabled=true \
+    backend.xnnpack.extended_ops=true \
+    export.output_name="${MODEL_PROGRAM_ONLY}.pte" \
+    export.foundation_weights_file="${MODEL_FOUNDATION_WEIGHTS}.ptd" \
+    export.lora_weights_file="${MODEL_LORA_WEIGHTS}.ptd"
+
+# Run llama runner.
+NOW=$(date +"%H:%M:%S")
+echo "Starting to run llama runner at ${NOW}"
+# shellcheck source=/dev/null
+cmake-out/examples/models/llama/llama_main --model_path=${MODEL_PROGRAM_ONLY}.pte --data_paths="${MODEL_FOUNDATION_WEIGHTS}.ptd,${MODEL_LORA_WEIGHTS}.ptd" --prompt="${PROMPT}" ${RUNTIME_ARGS} > result3.txt
+NOW=$(date +"%H:%M:%S")
+echo "Finished at ${NOW}"
+
+RESULT3=$(cat result3.txt)
+if [[ "${RESULT3}" == "${EXPECTED_PREFIX}"* ]]; then
+  echo "Expected result prefix: ${EXPECTED_PREFIX}"
+  echo "Actual result: ${RESULT3}"
+  echo "Success"
+else
+  echo "Expected result prefix: ${EXPECTED_PREFIX}"
+  echo "Actual result: ${RESULT3}"
+  echo "Failure; results not the same"
+  cleanup_files
+  exit 1
+fi
+
+cleanup_files
@@ -8,7 +8,11 @@ if git diff --cached --name-only | grep -q "^torch_pin.py$"; then
     echo "📝 Updating PyTorch commit pin..."
 
     # Run the update script
-    if python .github/scripts/update_pytorch_pin.py; then
+    hook_output=$(python .github/scripts/update_pytorch_pin.py 2>&1)
+    hook_status=$?
+    echo "$hook_output"
+
+    if [ $hook_status -eq 0 ]; then
         # Check if pytorch.txt was modified
         if ! git diff --quiet .ci/docker/ci_commit_pins/pytorch.txt; then
             echo "✅ PyTorch commit pin updated successfully"
@@ -19,9 +23,14 @@ if git diff --cached --name-only | grep -q "^torch_pin.py$"; then
             echo "ℹ️  PyTorch commit pin unchanged"
         fi
     else
-        echo "❌ Failed to update PyTorch commit pin"
-        echo "Please run: python .github/scripts/update_pytorch_pin.py"
-        exit 1
+        if echo "$hook_output" | grep -qi "rate limit exceeded"; then
+            echo "⚠️  PyTorch commit pin not updated due to GitHub API rate limiting."
+            echo "   Please manually update .ci/docker/ci_commit_pins/pytorch.txt if needed."
+        else
+            echo "❌ Failed to update PyTorch commit pin"
+            echo "Please run: python .github/scripts/update_pytorch_pin.py"
+            exit 1
+        fi
     fi
 fi
 
 
@@ -4,7 +4,6 @@
 import re
 import sys
 import urllib.request
-from datetime import datetime
 
 
 def parse_nightly_version(nightly_version):
@@ -53,7 +52,7 @@ def get_commit_hash_for_nightly(date_str):
         Commit hash string
     """
     api_url = "https://api.github.com/repos/pytorch/pytorch/commits"
-    params = f"?sha=nightly&per_page=100"
+    params = f"?sha=nightly&per_page=50"
     url = api_url + params
 
     req = urllib.request.Request(url)
@@ -74,14 +73,21 @@ def get_commit_hash_for_nightly(date_str):
         commit_msg = commit.get("commit", {}).get("message", "")
         # Check if the first line of commit message matches
         first_line = commit_msg.split("\n")[0].strip()
-        if first_line == target_title or first_line.startswith(f"{date_str} nightly"):
-            return commit["sha"]
+        if first_line.startswith(f"{date_str} nightly"):
+            return extract_hash_from_title(first_line)
 
     raise ValueError(
         f"Could not find commit with title matching '{target_title}' in nightly branch"
     )
 
 
+def extract_hash_from_title(title):
+    match = re.search(r"\(([0-9a-fA-F]{7,40})\)", title)
+    if not match:
+        raise ValueError(f"Could not extract commit hash from title '{title}'")
+    return match.group(1)
+
+
 def update_pytorch_pin(commit_hash):
     """
     Update .ci/docker/ci_commit_pins/pytorch.txt with the new commit hash.
 
@@ -1,7 +1,7 @@
 # Test ExecuTorch CUDA Build Compatibility
 # This workflow tests whether ExecuTorch can be successfully built with CUDA support
 # across different CUDA versions (12.6, 12.8, 12.9) using the command:
-# CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_executorch.sh
+#   ./install_executorch.sh
 #
 # Note: ExecuTorch automatically detects the system CUDA version using nvcc and
 # installs the appropriate PyTorch wheel. No manual CUDA/PyTorch installation needed.
@@ -43,7 +43,7 @@ jobs:
         set -eux
 
         # Test ExecuTorch CUDA build - ExecuTorch will automatically detect CUDA version
-        # and install the appropriate PyTorch wheel when CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON"
+        # and install the appropriate PyTorch wheel
         source .ci/scripts/test-cuda-build.sh "${{ matrix.cuda-version }}"
 
   # This job will fail if any of the CUDA versions fail
@@ -83,7 +83,7 @@ jobs:
       script: |
         set -eux
 
-        PYTHON_EXECUTABLE=python CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_executorch.sh
+        PYTHON_EXECUTABLE=python ./install_executorch.sh
         export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
         PYTHON_EXECUTABLE=python source .ci/scripts/test_model.sh "${{ matrix.model }}" cmake cuda
 
@@ -110,7 +110,7 @@ jobs:
         set -eux
 
         echo "::group::Setup ExecuTorch"
-        CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_executorch.sh
+        ./install_executorch.sh
         echo "::endgroup::"
 
         echo "::group::Setup Huggingface"
 
@@ -143,19 +143,28 @@ jobs:
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout: 90
       script: |
-        FILES_NEEDS_FORMAT=$(/opt/google-java-format -n \
-          extension/android/executorch_android/src/main/java/org/pytorch/executorch/*.java \
-          extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/*.java \
-          extension/android/executorch_android/src/main/java/org/pytorch/executorch/annotations/*.java \
-          extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/*.java \
-          extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/*.java \
-          extension/benchmark/android/benchmark/app/src/androidTest/java/org/pytorch/minibench/*.java)
+        FILES_NEEDS_FORMAT=$(find extension/android/executorch_android/src/main/java/org/pytorch/executorch \
+                            extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm \
+                            extension/android/executorch_android/src/main/java/org/pytorch/executorch/annotations \
+                            extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch \
+                            extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench \
+                            extension/benchmark/android/benchmark/app/src/androidTest/java/org/pytorch/minibench \
+                            -type f -name "*.java" 2>/dev/null | \
+                            xargs -r /opt/google-java-format -n)
+
         if [ -n "$FILES_NEEDS_FORMAT" ]; then
-          echo "Warning: The following files need formatting. Please use google-java-format."
-          echo "Use a binary from https://github.com/google/google-java-format/releases/"
-          echo "For example:"
-          echo "wget https://github.com/google/google-java-format/releases/download/v1.23.0/google-java-format_linux-x86-64"
-          echo "chmod +x google-java-format_linux-x86-64"
-          echo "./google-java-format_linux-x86-64 -i $FILES_NEEDS_FORMAT"
+          echo "Warning: The following files need formatting:"
+          echo "$FILES_NEEDS_FORMAT"
+          echo ""
+          echo "Please use google-java-format from https://github.com/google/google-java-format/releases/"
+          echo ""
+          echo "To fix, run one of these commands:"
+          echo "  # Using xargs (recommended):"
+          echo "  find <paths> -type f -name '*.java' | xargs google-java-format -i"
+          echo ""
+          echo "  # Or format specific files:"
+          echo "$FILES_NEEDS_FORMAT" | while IFS= read -r file; do
+            echo "  google-java-format -i \"$file\""
+          done
           exit 1
         fi
@@ -892,6 +892,7 @@ jobs:
         # Install test requirements
         pip install -r backends/nxp/requirements-tests-pypi.txt
         pip install -r backends/nxp/requirements-tests-eiq.txt
+        PYTHON_EXECUTABLE=python bash examples/nxp/setup.sh
 
         # Run pytest
         PYTHON_EXECUTABLE=python bash backends/nxp/run_unittests.sh
 
@@ -104,14 +104,16 @@ outputs = method.execute([torch.randn(1, 3, 224, 224)])
 
 Module module("model.pte");
 auto tensor = make_tensor_ptr({2, 2}, {1.0f, 2.0f, 3.0f, 4.0f});
-auto outputs = module.forward({tensor});
+auto outputs = module.forward(tensor);
 ```
 
 **[Swift (iOS)](https://docs.pytorch.org/executorch/main/ios-section.html)**
 ```swift
+import ExecuTorch
+
 let module = Module(filePath: "model.pte")
-let input = Tensor<Float>([1.0, 2.0, 3.0, 4.0])
-let outputs: [Value] = try module.forward([input])
+let input = Tensor<Float>([1.0, 2.0, 3.0, 4.0], shape: [2, 2])
+let outputs = try module.forward(input)
 ```
 
 **[Kotlin (Android)](https://docs.pytorch.org/executorch/main/android-section.html)**
@@ -151,6 +153,8 @@ runner->generate("Hello, how are you?", config);
 
 **[Swift (iOS)](https://docs.pytorch.org/executorch/main/llm/run-on-ios.html)**
 ```swift
+import ExecuTorchLLM
+
 let runner = TextRunner(modelPath: "llama.pte", tokenizerPath: "tiktoken.bin")
 try runner.generate("Hello, how are you?", Config {
     $0.sequenceLength = 128
 
@@ -26,7 +26,7 @@ include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
 find_package_torch()
 
 # Common AOTI functionality - combines all AOTI common components
-set(_aoti_common_sources aoti_model_container.cpp common_shims.cpp)
+set(_aoti_common_sources common_shims.cpp)
 add_library(aoti_common STATIC ${_aoti_common_sources})
 target_include_directories(
   aoti_common