pytorch
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/setup-linux.sh‎
Lines changed: 1 addition & 0 deletions b/‎.ci/scripts/setup-linux.sh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/scripts/test_backend_linux.sh‎
Lines changed: 57 additions & 0 deletions b/‎.ci/scripts/test_backend_linux.sh‎
Lines changed: 57 additions & 0 deletions
diff --git a/‎.ci/scripts/test_backend_macos.sh‎
Lines changed: 30 additions & 0 deletions b/‎.ci/scripts/test_backend_macos.sh‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎.ci/scripts/test_huggingface_optimum_model.py‎
Lines changed: 108 additions & 15 deletions b/‎.ci/scripts/test_huggingface_optimum_model.py‎
Lines changed: 108 additions & 15 deletions
diff --git a/‎.ci/scripts/test_ios_ci.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/test_ios_ci.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/unittest-buck2.sh‎
Lines changed: 4 additions & 3 deletions b/‎.ci/scripts/unittest-buck2.sh‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎.ci/scripts/utils.sh‎
Lines changed: 0 additions & 2 deletions b/‎.ci/scripts/utils.sh‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎.github/workflows/add-unanswered-to-project.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/add-unanswered-to-project.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/nightly.yml‎
Lines changed: 48 additions & 0 deletions b/‎.github/workflows/nightly.yml‎
Lines changed: 48 additions & 0 deletions
@@ -1 +1 @@
-6fc0ad22f0a07b6f38d138861c56a765d5a9bb02
+e7152ff8a6a929a0db7f3f4a72a5b6d471769cd3
@@ -11,6 +11,7 @@ set -exu
 source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
 
 read -r BUILD_TOOL BUILD_MODE EDITABLE < <(parse_args "$@")
+echo "Build tool: $BUILD_TOOL, Mode: $BUILD_MODE"
 
 # As Linux job is running inside a Docker container, all of its dependencies
 # have already been installed, so we use PyTorch build from source here instead
 
@@ -0,0 +1,57 @@
+#!/usr/bin/env bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+set -eux
+
+SUITE=$1
+FLOW=$2
+ARTIFACT_DIR=$3
+
+REPORT_FILE="$ARTIFACT_DIR/test-report-$FLOW-$SUITE.csv"
+
+echo "Running backend test job for suite $SUITE, flow $FLOW."
+echo "Saving job artifacts to $ARTIFACT_DIR."
+
+# The generic Linux job chooses to use base env, not the one setup by the image
+eval "$(conda shell.bash hook)"
+CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+conda activate "${CONDA_ENV}"
+
+export PYTHON_EXECUTABLE=python
+
+# CMake options to use, in addition to the defaults.
+EXTRA_BUILD_ARGS=""
+
+if [[ "$FLOW" == *qnn* ]]; then
+    # Setup QNN sdk and deps - note that this is a bit hacky due to the nature of the
+    # Qualcomm build. TODO (gjcomer) Clean this up once the QNN pybinding integration is
+    # cleaned up.
+    PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
+    PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
+    PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
+    QNN_X86_LIB_DIR=`realpath build-x86/lib/`
+    QNN_SDK_ROOT="/tmp/qnn/2.28.0.241029"
+    export LD_LIBRARY_PATH"=$QNN_X86_LIB_DIR:$QNN_SDK_ROOT/lib/x86_64-linux-clang/:${LD_LIBRARY_PATH:-}"
+
+    # TODO Get SDK root from install scripts
+    EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_QNN=ON -DQNN_SDK_ROOT=$QNN_SDK_ROOT"
+fi
+
+if [[ "$FLOW" == *vulkan* ]]; then
+    # Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate
+    source .ci/scripts/setup-vulkan-linux-deps.sh
+
+    EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_VULKAN=ON"
+fi
+
+# We need the runner to test the built library.
+PYTHON_EXECUTABLE=python CMAKE_ARGS="$EXTRA_BUILD_ARGS" .ci/scripts/setup-linux.sh --build-tool cmake --build-mode Release --editable true
+
+EXIT_CODE=0
+python -m executorch.backends.test.suite.runner $SUITE --flow $FLOW --report "$REPORT_FILE" || EXIT_CODE=$?
+
+# Generate markdown summary.
+python -m executorch.backends.test.suite.generate_markdown_summary "$REPORT_FILE" > ${GITHUB_STEP_SUMMARY:-"step_summary.md"} --exit-code $EXIT_CODE
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+set -eux
+
+SUITE=$1
+FLOW=$2
+ARTIFACT_DIR=$3
+
+REPORT_FILE="$ARTIFACT_DIR/test-report-$FLOW-$SUITE.csv"
+
+echo "Running backend test job for suite $SUITE, flow $FLOW."
+echo "Saving job artifacts to $ARTIFACT_DIR."
+
+${CONDA_RUN} --no-capture-output pip install awscli==1.37.21
+
+bash .ci/scripts/setup-conda.sh
+eval "$(conda shell.bash hook)"
+
+PYTHON_EXECUTABLE=python
+${CONDA_RUN} --no-capture-output .ci/scripts/setup-macos.sh --build-tool cmake --build-mode Release
+
+EXIT_CODE=0
+${CONDA_RUN} --no-capture-output python -m executorch.backends.test.suite.runner $SUITE --flow $FLOW --report "$REPORT_FILE" || EXIT_CODE=$?
+
+# Generate markdown summary.
+${CONDA_RUN} --no-capture-output python -m executorch.backends.test.suite.generate_markdown_summary "$REPORT_FILE" > ${GITHUB_STEP_SUMMARY:-"step_summary.md"} --exit-code $EXIT_CODE
@@ -1,7 +1,11 @@
 import argparse
+import gc
+import logging
+import math
 import subprocess
 import tempfile
 from pathlib import Path
+from typing import List
 
 import torch
 from datasets import load_dataset
@@ -15,6 +19,7 @@
 )
 from transformers import (
     AutoConfig,
+    AutoModelForCausalLM,
     AutoModelForImageClassification,
     AutoProcessor,
     AutoTokenizer,
@@ -37,6 +42,56 @@ def cli_export(command, model_dir):
         print(f"Export failed with error: {e}")
 
 
+def check_causal_lm_output_quality(
+    model_id: str, generated_tokens: List[int], max_perplexity_threshold: float = 100.0
+):
+    """
+    Evaluates the quality of text generated by a causal language model by calculating its perplexity.
+
+    Args:
+        model_id: HuggingFace model identifier (e.g., "google/gemma2-2b")
+        generated_tokens: The tokens generated by the exported model to evaluate
+        max_perplexity_threshold: Maximum acceptable perplexity (lower is better)
+
+    Returns:
+        tuple: (is_quality_ok, reason) with boolean result and explanation
+    """
+    logging.info(f"Starting perplexity check with model '{model_id}' ...")
+    # Load model
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        low_cpu_mem_usage=True,
+        use_cache=False,
+        torch_dtype=torch.bfloat16,
+    )
+
+    with torch.no_grad():
+        outputs = model(input_ids=generated_tokens, labels=generated_tokens)
+
+    # Get the loss (negative log-likelihood)
+    loss = outputs.loss.item()
+
+    # Calculate perplexity (exp of the average negative log-likelihood)
+    perplexity = math.exp(loss)
+
+    is_quality_ok = perplexity <= max_perplexity_threshold
+    if is_quality_ok:
+        logging.info(
+            f"✓ Perplexity check passed: {perplexity:.2f} <= {max_perplexity_threshold}"
+        )
+    else:
+        logging.warning(
+            f"✗ Perplexity check failed: {perplexity:.2f} > {max_perplexity_threshold}"
+        )
+
+    # Clean up immediately
+    del model
+    del outputs
+    gc.collect()
+
+    return is_quality_ok
+
+
 def test_text_generation(model_id, model_dir, recipe, *, quantize=True, run_only=False):
     command = [
         "optimum-cli",
@@ -51,7 +106,19 @@ def test_text_generation(model_id, model_dir, recipe, *, quantize=True, run_only
         "--output_dir",
         model_dir,
     ]
-    if "coreml" in recipe:
+    if "xnnpack" in recipe:
+        command += [
+            "--use_custom_sdpa",
+            "--use_custom_kv_cache",
+        ]
+        if quantize:
+            command += [
+                "--qlinear",
+                "8da4w",
+                "--qembedding",
+                "8w",
+            ]
+    elif "coreml" in recipe:
         command += [
             "--disable_dynamic_shapes",
         ]
@@ -63,7 +130,9 @@ def test_text_generation(model_id, model_dir, recipe, *, quantize=True, run_only
                 "8w",
             ]
     else:
-        assert not quantize, "Quantization is not supported for non-CoreML recipes yet"
+        assert (
+            not quantize
+        ), "Quantization is only supported for XnnPack and CoreML recipes at the moment."
 
     if not run_only:
         cli_export(command, model_dir)
@@ -77,6 +146,14 @@ def test_text_generation(model_id, model_dir, recipe, *, quantize=True, run_only
         max_seq_len=64,
     )
     print(f"\nGenerated text:\n\t{generated_text}")
+    generated_tokens = tokenizer(generated_text, return_tensors="pt").input_ids
+
+    # Free memory before loading eager for quality check
+    del model
+    del tokenizer
+    gc.collect()
+
+    assert check_causal_lm_output_quality(model_id, generated_tokens) is True
 
 
 def test_fill_mask(model_id, model_dir, recipe, *, quantize=True, run_only=False):
@@ -278,23 +355,39 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
     )
     args = parser.parse_args()
 
-    model_to_model_id_and_test_function = {
-        "smollm": ("HuggingFaceTB/SmolLM2-135M", test_text_generation),  # works
-        "qwen3": ("Qwen/Qwen3-0.6B", test_text_generation),  # works
-        "olmo": ("allenai/OLMo-1B-hf", test_text_generation),  # works
-        "gemma3": ("unsloth/gemma-3-1b-it", test_text_generation),  # does not export
-        "phi4": (
+    _text_generation_mapping = {
+        "llama3.2-1b": ("NousResearch/Llama-3.2-1B", test_text_generation),
+        "qwen3-0.6b": ("Qwen/Qwen3-0.6B", test_text_generation),
+        "qwen3-1.7b": ("Qwen/Qwen3-1.7B", test_text_generation),
+        "gemma3-1b": (
+            "unsloth/gemma-3-1b-it",
+            test_text_generation,
+        ),  # does not export for CoreML
+        "phi4-mini": (
             "microsoft/Phi-4-mini-instruct",
             test_text_generation,
-        ),  # fails to lower
-        "llama3": ("NousResearch/Llama-3.2-1B", test_text_generation),  # works
-        "bert": ("google-bert/bert-base-uncased", test_fill_mask),  # works
-        "roberta": ("FacebookAI/xlmcl-roberta-base", test_fill_mask),  # works
-        "distilbert": ("distilbert/distilbert-base-uncased", test_fill_mask),  # works
-        "whisper": ("openai/whisper-tiny", test_whisper),  # works
+        ),  # fails to lower for CoreML
+        "smollm2-135m": ("HuggingFaceTB/SmolLM2-135M", test_text_generation),
+        "smollm3-3b": ("HuggingFaceTB/SmolLM3-3B", test_text_generation),
+        "olmo-1b": ("allenai/OLMo-1B-hf", test_text_generation),
+    }
+
+    _mask_fill_mapping = {
+        "bert": ("google-bert/bert-base-uncased", test_fill_mask),
+        "roberta": ("FacebookAI/xlmcl-roberta-base", test_fill_mask),
+        "distilbert": ("distilbert/distilbert-base-uncased", test_fill_mask),
+    }
+
+    _misc_model_mapping = {
+        "whisper": ("openai/whisper-tiny", test_whisper),
         "t5": ("google-t5/t5-small", test_t5),  # CoreML runime failure
-        "vit": ("google/vit-base-patch16-224", test_vit),  # works
+        "vit": ("google/vit-base-patch16-224", test_vit),
     }
+
+    model_to_model_id_and_test_function = (
+        _text_generation_mapping | _mask_fill_mapping | _misc_model_mapping
+    )
+
     if args.model not in model_to_model_id_and_test_function:
         raise ValueError(
             f"Unknown model name: {args.model}. Available models: {model_to_model_id_and_test_function.keys()}"
 
@@ -36,7 +36,7 @@ say() {
 
 say "Cloning the Demo App"
 
-git clone --depth 1 https://github.com/pytorch-labs/executorch-examples.git
+git clone --depth 1 https://github.com/meta-pytorch/executorch-examples.git
 
 say "Installing CoreML Backend Requirements"
 
 
@@ -11,9 +11,10 @@ set -eux
 # TODO: can't query //kernels/prim_ops because of non-buckified stuff in OSS.
 buck2 query "//backends/apple/... + //backends/example/... + \
 //backends/mediatek/... + //backends/transforms/... + \
-//backends/xnnpack/... + //configurations/... + //kernels/aten/... + \
-//kernels/optimized/... + //kernels/portable/... + //kernels/quantized/... + \
-//kernels/test/... + //runtime/... + //schema/... + //test/... + //util/..."
+//backends/xnnpack/... + //configurations/... + //extension/flat_tensor: + \
+//extension/llm/runner: + //kernels/aten/... + //kernels/optimized/... + \
+//kernels/portable/... + //kernels/quantized/... + //kernels/test/... + \
+//runtime/... + //schema/... + //test/... + //util/..."
 
 # TODO: optimized ops are unbuildable because they now use ATen; put
 # them back after we can use PyTorch in OSS buck.
 
@@ -131,8 +131,6 @@ build_executorch_runner_cmake() {
   else
       CXXFLAGS=""
   fi
-  # This command uses buck2 to gather source files and buck2 could crash flakily
-  # on MacOS
   CXXFLAGS="$CXXFLAGS" retry cmake -DPYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" -DCMAKE_BUILD_TYPE="${1:-Release}" ..
   popd || return
 
 
@@ -1,10 +1,10 @@
 name: Add Open External Contributor PRs and Issues to PyTorch Org Project 136
 
 on:
- # schedule:
-  #  - cron: '0 * * * *'
   workflow_dispatch:
-
+  pull_request: 
+   paths: 
+     .github/workflows/add-unanswered-to-project.yml
 jobs:
   add_to_project:
     runs-on: ubuntu-latest
 
@@ -36,3 +36,51 @@ jobs:
     uses: ./.github/workflows/_link_check.yml
     with:
       ref: ${{ github.sha }}
+
+  backend-test-linux:
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    strategy:
+      fail-fast: false
+      matrix:
+        flow: [
+          qnn, qnn_16a16w, qnn_16a8w, qnn_16a4w, qnn_16a4w_block, qnn_8a8w,
+          vulkan, vulkan_static_int8_per_channel,
+          xnnpack, xnnpack_dynamic_int8_per_channel, xnnpack_static_int8_per_channel, xnnpack_static_int8_per_tensor
+        ]
+        suite: [models, operators]
+    with:
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      runner: linux.4xlarge.memory
+      docker-image: ci-image:executorch-ubuntu-22.04-clang12
+      submodules: recursive
+      timeout: 120
+      upload-artifact: test-report-${{ matrix.flow }}-${{ matrix.suite }}
+      script: |
+        set -eux
+
+        source .ci/scripts/test_backend_linux.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}"
+
+  backend-test-macos:
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        flow: [coreml, coreml_static_int8]
+        suite: [models, operators]
+    with:
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      runner: macos-m1-stable
+      python-version: 3.12
+      submodules: recursive
+      timeout: 120
+      upload-artifact: test-report-${{ matrix.flow }}-${{ matrix.suite }}
+      script: |
+        set -eux
+
+        # This is needed to get the prebuilt PyTorch wheel from S3
+        ${CONDA_RUN} --no-capture-output pip install awscli==1.37.21
+
+        source .ci/scripts/test_backend_macos.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}"
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-6fc0ad22f0a07b6f38d138861c56a765d5a9bb02`
	`1`	`+e7152ff8a6a929a0db7f3f4a72a5b6d471769cd3`