pytorch
diff --git a/‎.buckconfig‎
Lines changed: 5 additions & 4 deletions b/‎.buckconfig‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎.ci/scripts/gather_test_models.py‎
Lines changed: 5 additions & 1 deletion b/‎.ci/scripts/gather_test_models.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎.ci/scripts/test_model.sh‎
Lines changed: 11 additions & 1 deletion b/‎.ci/scripts/test_model.sh‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎.ci/scripts/unittest-linux.sh‎
Lines changed: 15 additions & 9 deletions b/‎.ci/scripts/unittest-linux.sh‎
Lines changed: 15 additions & 9 deletions
diff --git a/‎.ci/scripts/unittest-macos.sh‎
Lines changed: 11 additions & 9 deletions b/‎.ci/scripts/unittest-macos.sh‎
Lines changed: 11 additions & 9 deletions
diff --git a/‎.github/workflows/_android.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/_android.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/android-perf.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/android-perf.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/android-release-artifacts.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/android-release-artifacts.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 25 additions & 1 deletion b/‎.github/workflows/pull.yml‎
Lines changed: 25 additions & 1 deletion
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 57 additions & 53 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 57 additions & 53 deletions
@@ -8,14 +8,15 @@
   root = .
   prelude = third-party/prelude
   shim = shim
+  shim_et = shim_et
 
 [repository_aliases]
   config = prelude
   ovr_config = prelude
-  toolchains = shim
-  fbcode = shim
-  fbcode_macros = shim
-  fbsource = shim
+  toolchains = shim_et
+  fbcode = shim_et
+  fbcode_macros = shim_et
+  fbsource = shim_et
   buck = shim
 
 [cxx]
 
@@ -104,8 +104,12 @@ def model_should_run_on_target_os(model: str, target_os: str) -> bool:
     For example, a big model can be disabled in macos due to the limited macos resources.
     """
     if target_os == "macos":
+        # Disabled in macos due to limited resources, and should stay that way even if
+        # we otherwise re-enable.
         return model not in ["llava"]
-    return True
+    # Disabled globally because we have test-llava-runner-linux that does a more
+    # comprehensive E2E test of llava.
+    return model not in ["llava"]
 
 
 def export_models_for_ci() -> dict[str, dict]:
 
@@ -91,7 +91,17 @@ test_model() {
     # Install requirements for llama vision.
     bash examples/models/llama3_2_vision/install_requirements.sh
   fi
-  # python3 -m examples.portable.scripts.export --model_name="llama2" should works too
+  if [[ "${MODEL_NAME}" == "qwen2_5" ]]; then
+      # Install requirements for export_llama
+      bash examples/models/llama/install_requirements.sh
+      # Test export_llama script: python3 -m examples.models.llama.export_llama.
+      # Use Llama random checkpoint with Qwen 2.5 1.5b model configuration.
+      "${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -c examples/models/llama/params/demo_rand_params.pth -p examples/models/qwen2_5/1_5b_config.json
+      rm "./${MODEL_NAME}.pte"
+      return  # Skip running with portable executor runnner since portable doesn't support Qwen's biased linears.
+  fi
+
+  # Export a basic .pte and run the model.
   "${PYTHON_EXECUTABLE}" -m examples.portable.scripts.export --model_name="${MODEL_NAME}" "${STRICT}"
   run_portable_executor_runner
 }
 
@@ -27,20 +27,26 @@ eval "$(conda shell.bash hook)"
 CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
 conda activate "${CONDA_ENV}"
 
-# Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate
-source .ci/scripts/setup-vulkan-linux-deps.sh
+if [[ "$BUILD_TOOL" == "cmake" ]]; then
+    # Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate
+    source .ci/scripts/setup-vulkan-linux-deps.sh
 
-PYTHON_EXECUTABLE=python \
-EXECUTORCH_BUILD_PYBIND=ON \
-CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \
-.ci/scripts/setup-linux.sh "$BUILD_TOOL" "$BUILD_MODE"
+    PYTHON_EXECUTABLE=python \
+    EXECUTORCH_BUILD_PYBIND=ON \
+    CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \
+    .ci/scripts/setup-linux.sh "$BUILD_TOOL" "$BUILD_MODE"
 
-# Install llama3_2_vision dependencies.
-PYTHON_EXECUTABLE=python ./examples/models/llama3_2_vision/install_requirements.sh
+    # Install llama3_2_vision dependencies.
+    PYTHON_EXECUTABLE=python ./examples/models/llama3_2_vision/install_requirements.sh
 
-if [[ "$BUILD_TOOL" == "cmake" ]]; then
     .ci/scripts/unittest-linux-cmake.sh
 elif [[ "$BUILD_TOOL" == "buck2" ]]; then
+    # Removing this breaks sccache in the Buck build, apparently
+    # because TMPDIR gets messed up? Please feel free to fix this and
+    # speed up this CI job!
+    PYTHON_EXECUTABLE=python \
+    .ci/scripts/setup-linux.sh "$BUILD_TOOL" "$BUILD_MODE"
+
     .ci/scripts/unittest-buck2.sh
 else
     echo "Unknown build tool $BUILD_TOOL"
 
@@ -30,17 +30,19 @@ export TMP_DIR=$(mktemp -d)
 export PATH="${TMP_DIR}:$PATH"
 trap 'rm -rfv ${TMP_DIR}' EXIT
 
-# Setup MacOS dependencies as there is no Docker support on MacOS atm
-PYTHON_EXECUTABLE=python \
-EXECUTORCH_BUILD_PYBIND=ON \
-CMAKE_ARGS="-DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_MPS=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \
-${CONDA_RUN} --no-capture-output \
-.ci/scripts/setup-macos.sh "${BUILD_TOOL}" "${BUILD_MODE}"
+if [[ "$BUILD_TOOL" == "cmake" ]]; then
+    # Setup MacOS dependencies as there is no Docker support on MacOS atm
+    PYTHON_EXECUTABLE=python \
+    EXECUTORCH_BUILD_PYBIND=ON \
+    CMAKE_ARGS="-DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_MPS=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \
+    ${CONDA_RUN} --no-capture-output \
+    .ci/scripts/setup-macos.sh "${BUILD_TOOL}" "${BUILD_MODE}"
 
-# Install llama3_2_vision dependencies.
-PYTHON_EXECUTABLE=python ./examples/models/llama3_2_vision/install_requirements.sh
+    # Install llama3_2_vision dependencies.
+    PYTHON_EXECUTABLE=python \
+    ${CONDA_RUN} --no-capture-output \
+    ./examples/models/llama3_2_vision/install_requirements.sh
 
-if [[ "$BUILD_TOOL" == "cmake" ]]; then
     .ci/scripts/unittest-macos-cmake.sh
 elif [[ "$BUILD_TOOL" == "buck2" ]]; then
     .ci/scripts/unittest-buck2.sh
 
@@ -29,7 +29,7 @@ jobs:
         export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded
 
         # Build LLM Demo for Android
-        bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}
+        bash build/build_android_library.sh ${ARTIFACTS_DIR_NAME}
 
   # Running Android emulator directly on the runner and not using Docker
   run-emulator:
 
@@ -363,7 +363,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
 
         export ANDROID_ABIS="arm64-v8a"
-        PYTHON_EXECUTABLE=python EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029 bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}
+        PYTHON_EXECUTABLE=python EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029 bash build/build_android_library.sh ${ARTIFACTS_DIR_NAME}
 
   # Let's see how expensive this job is, we might want to tone it down by running it periodically
   benchmark-on-device:
 
@@ -53,7 +53,7 @@ jobs:
         export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded
 
         # Build LLM Demo for Android
-        bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}
+        bash build/build_android_library.sh ${ARTIFACTS_DIR_NAME}
 
         shasum -a 256 "${ARTIFACTS_DIR_NAME}/llm_demo/executorch.aar"
 
 
@@ -56,6 +56,30 @@ jobs:
         # Build and test ExecuTorch with the add model on portable backend.
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "add" "${BUILD_TOOL}" "portable"
 
+  test-pip-install-editable-mode-linux:
+    name: test-pip-install-editable-mode-linux
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+    with:
+      runner: linux.2xlarge
+      docker-image: executorch-ubuntu-22.04-clang12
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 90
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+        # Debug
+        which pip
+        PYTHON_EXECUTABLE=python bash ./install_executorch.sh --editable --pybind xnnpack --use-pt-pinned-commit
+        # Try to import extension library
+        python -c "from executorch.extension.llm.custom_ops import custom_ops"
+
   test-models-linux:
     name: test-models-linux
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
@@ -480,7 +504,7 @@ jobs:
 
         # Setup install_requirements for llama
         PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
-        
+
         # Test static llama weight sharing and accuracy
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_qnn_static_llama.sh
 
 
@@ -36,6 +36,31 @@ jobs:
 
           PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --target-os macos --event "${GITHUB_EVENT_NAME}"
 
+  test-pip-install-editable-mode-macos:
+    name: test-pip-install-editable-mode-macos
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+    with:
+      runner: macos-m1-stable
+      python-version: '3.11'
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 90
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+        # Debug
+        which pip
+        bash .ci/scripts/setup-conda.sh
+        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash ./install_executorch.sh --editable --pybind xnnpack
+        # Try to import extension library
+        python -c "from executorch.extension.llm.custom_ops import custom_ops"
+
   test-models-macos:
     name: test-models-macos
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
@@ -374,7 +399,13 @@ jobs:
     secrets: inherit
     strategy:
       matrix:
-        hf_model_repo: [google/gemma-2-2b]
+        hf_model_id: [
+          google/gemma-2-2b,
+          Qwen/Qwen2.5-0.5B,
+          HuggingFaceTB/SmolLM2-135M,
+          meta-llama/Llama-3.2-1B,
+          allenai/OLMo-1B-hf
+        ]
       fail-fast: false
     with:
       secrets-env: EXECUTORCH_HF_TOKEN
@@ -389,66 +420,39 @@ jobs:
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
         conda activate "${CONDA_ENV}"
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake
-
-        echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
-        rm -rf cmake-out
-        cmake \
-            -DCMAKE_INSTALL_PREFIX=cmake-out \
-            -DCMAKE_BUILD_TYPE=Release \
-            -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-            -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-            -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-            -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-            -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-            -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-            -DEXECUTORCH_BUILD_XNNPACK=ON \
-            -DPYTHON_EXECUTABLE=python \
-            -Bcmake-out .
-        cmake --build cmake-out -j9 --target install --config Release
-
-        echo "Build llama runner"
-        dir="examples/models/llama"
-        cmake \
-            -DCMAKE_INSTALL_PREFIX=cmake-out \
-            -DCMAKE_BUILD_TYPE=Release \
-            -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-            -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-            -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-            -DEXECUTORCH_BUILD_XNNPACK=ON \
-            -DPYTHON_EXECUTABLE=python \
-            -Bcmake-out/${dir} \
-            ${dir}
-        cmake --build cmake-out/${dir} -j9 --config Release
         echo "::endgroup::"
 
-        echo "::group::Set up HuggingFace Dependencies"
-        if [ -z "$SECRET_EXECUTORCH_HF_TOKEN" ]; then
-          echo "::error::SECRET_EXECUTORCH_HF_TOKEN is empty. For security reason secrets won't be accessible on forked PRs. Please make sure you submit a non-forked PR."
-          exit 1
-        fi
+        echo "::group::Set up Hugging Face"
         pip install -U "huggingface_hub[cli]"
         huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
+        git clone https://github.com/huggingface/optimum-executorch
+        cd optimum-executorch
+        # There is no release yet, for CI stability, always test from the same commit on main
+        git checkout 6a7e83f3eee2976fa809335bfb78a45b1ea1cb25
+        pip install .
         pip install accelerate sentencepiece
         pip list
         echo "::endgroup::"
 
-        echo "::group::Export to ExecuTorch"
-        TOKENIZER_FILE=tokenizer.model
-        TOKENIZER_BIN_FILE=tokenizer.bin
-        ET_MODEL_NAME=et_model
-        DOWNLOADED_TOKENIZER_FILE_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${{ matrix.hf_model_repo }}" --files "${TOKENIZER_FILE}")
-        if [ -f "$DOWNLOADED_TOKENIZER_FILE_PATH/$TOKENIZER_FILE" ]; then
-            echo "${TOKENIZER_FILE} downloaded successfully at: $DOWNLOADED_TOKENIZER_FILE_PATH"
-            python -m extension.llm.tokenizer.tokenizer -t "$DOWNLOADED_TOKENIZER_FILE_PATH/$TOKENIZER_FILE" -o ./${TOKENIZER_BIN_FILE}
-            ls ./tokenizer.bin
-        else
-            echo "Failed to download ${TOKENIZER_FILE} from ${{ matrix.hf_model_repo }}."
-            exit 1
-        fi
-
-        python -m extension.export_util.export_hf_model -hfm=${{ matrix.hf_model_repo }} -o ${ET_MODEL_NAME}
-
-        cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
+        echo "::group::Export and Run ${{ matrix.hf_model_id }}"
+        # Pass matrix variable as environment variable
+        export MODEL_ID="${{ matrix.hf_model_id }}"
+        python -c "
+        import os
+        from optimum.executorch import ExecuTorchModelForCausalLM
+        from transformers import AutoTokenizer
+
+        model_id = os.getenv('MODEL_ID')
+        print(f'Loading model: {model_id}')
+        model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe='xnnpack')
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        generated_text = model.text_generation(
+          tokenizer=tokenizer,
+          prompt='Simply put, the theory of relativity states that',
+          max_seq_len=64
+        )
+        print(generated_text)
+        "
         echo "::endgroup::"