pytorch
diff --git a/‎.ci/scripts/gather_test_models.py‎
Lines changed: 11 additions & 9 deletions b/‎.ci/scripts/gather_test_models.py‎
Lines changed: 11 additions & 9 deletions
diff --git a/‎.ci/scripts/setup-macos.sh‎
Lines changed: 3 additions & 0 deletions b/‎.ci/scripts/setup-macos.sh‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎.ci/scripts/test_llama.sh‎
Lines changed: 11 additions & 0 deletions b/‎.ci/scripts/test_llama.sh‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎.github/workflows/apple.yml‎
Lines changed: 6 additions & 0 deletions b/‎.github/workflows/apple.yml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎.github/workflows/build-wheels-linux.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/build-wheels-linux.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/build-wheels-m1.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/build-wheels-m1.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 4 additions & 2 deletions b/‎.github/workflows/pull.yml‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 40 additions & 2 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 40 additions & 2 deletions
diff --git a/‎.gitmodules‎
Lines changed: 3 additions & 0 deletions b/‎.gitmodules‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/apple/coreml/runtime/test/ETCoreMLModelDebuggerTests.mm‎
Lines changed: 0 additions & 1 deletion b/‎backends/apple/coreml/runtime/test/ETCoreMLModelDebuggerTests.mm‎
Lines changed: 0 additions & 1 deletion
@@ -20,16 +20,16 @@
 CUSTOM_RUNNERS = {
     "linux": {
         # This one runs OOM on smaller runner, the root cause is unclear (T163016365)
-        "w2l": "linux.12xlarge",
-        "ic4": "linux.12xlarge",
-        "resnet50": "linux.12xlarge",
-        "llava": "linux.12xlarge",
-        "llama3_2_vision_encoder": "linux.12xlarge",
-        # "llama3_2_text_decoder": "linux.12xlarge",  # TODO: re-enable test when Huy's change is in / model gets smaller.
+        "w2l": "linux.4xlarge.memory",
+        "ic4": "linux.4xlarge.memory",
+        "resnet50": "linux.4xlarge.memory",
+        "llava": "linux.4xlarge.memory",
+        "llama3_2_vision_encoder": "linux.4xlarge.memory",
+        "llama3_2_text_decoder": "linux.4xlarge.memory",
         # This one causes timeout on smaller runner, the root cause is unclear (T161064121)
-        "dl3": "linux.12xlarge",
-        "emformer_join": "linux.12xlarge",
-        "emformer_predict": "linux.12xlarge",
+        "dl3": "linux.4xlarge.memory",
+        "emformer_join": "linux.4xlarge.memory",
+        "emformer_predict": "linux.4xlarge.memory",
     }
 }
 
@@ -39,10 +39,12 @@
     "linux": {
         "mobilebert": 90,
         "emformer_predict": 360,
+        "llama3_2_text_decoder": 360,
     },
     "macos": {
         "mobilebert": 90,
         "emformer_predict": 360,
+        "llama3_2_text_decoder": 360,
     },
 }
 
 
@@ -49,6 +49,9 @@ install_buck() {
 
   rm "${BUCK2}"
   popd
+
+  # Kill all running buck2 daemon for a fresh start
+  buck2 killall || true
 }
 
 function write_sccache_stub() {
 
@@ -27,6 +27,10 @@ while [[ $# -gt 0 ]]; do
       MODE="$2" # portable or xnnpack+custom or xnnpack+custom+qe
       shift 2
       ;;
+    -pt2e_quantize)
+      PT2E_QUANTIZE="$2"
+      shift 2
+      ;;
     -upload)
       UPLOAD_DIR="$2"
       shift 2
@@ -44,6 +48,9 @@ MODE=${MODE:-"xnnpack+custom"}
 # Default UPLOAD_DIR to empty string if not set
 UPLOAD_DIR="${UPLOAD_DIR:-}"
 
+# Default PT2E_QUANTIZE to empty string if not set
+PT2E_QUANTIZE="${PT2E_QUANTIZE:-}"
+
 if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args
     echo "Expecting atleast 4 positional arguments"
     echo "Usage: [...]"
@@ -234,6 +241,10 @@ if [[ "${COREML}" == "ON" ]]; then
 fi
 if [[ "${QNN}" == "ON" ]]; then
   EXPORT_ARGS="${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape"
+  echo "PT2E_QUANTIZE is ${PT2E_QUANTIZE}"
+  if [[ "${PT2E_QUANTIZE}" == "qnn_16a16w" ]]; then
+    EXPORT_ARGS+=" --tokenizer_path tokenizer.model --pt2e_quantize qnn_16a16w --calibration_tasks wikitext --calibration_limit 1 --calibration_seq_length 128 --calibration_data Once "
+  fi
 fi
 # Add dynamically linked library location
 $PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}
 
@@ -42,6 +42,8 @@ jobs:
 
   build-demo-ios:
     name: build-demo-ios
+    # NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
+    if: ${{ !github.event.pull_request.head.repo.fork }}
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     secrets: inherit
     with:
@@ -190,6 +192,8 @@ jobs:
         ) done
 
   upload-frameworks-ios:
+    # NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
+    if: ${{ !github.event.pull_request.head.repo.fork }}
     runs-on: ubuntu-22.04
     needs: [build-frameworks-ios, set-version]
     timeout-minutes: 30
@@ -278,6 +282,8 @@ jobs:
 
   build-benchmark-app:
     name: build-benchmark-app
+    # NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
+    if: ${{ !github.event.pull_request.head.repo.fork }}
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     secrets: inherit
     with:
 
@@ -27,6 +27,7 @@ jobs:
       test-infra-ref: main
       with-cuda: disabled
       with-rocm: disabled
+      python-versions: '["3.10", "3.11", "3.12"]'
 
   build:
     needs: generate-matrix
 
@@ -27,6 +27,7 @@ jobs:
       test-infra-ref: main
       with-cuda: disabled
       with-rocm: disabled
+      python-versions: '["3.10", "3.11", "3.12"]'
 
   build:
     needs: generate-matrix
 
@@ -332,7 +332,7 @@ jobs:
       docker-image: executorch-ubuntu-22.04-clang12
 
   unittest-arm:
-    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     with:
       runner: linux.2xlarge
       docker-image: executorch-ubuntu-22.04-arm-sdk
@@ -368,6 +368,7 @@ jobs:
     strategy:
       matrix:
         dtype: [fp32]
+        pt2e_quantize: [qnn_16a16w, qnn_8a8w]
         mode: [qnn]
       fail-fast: false
     with:
@@ -384,6 +385,7 @@ jobs:
         DTYPE=${{ matrix.dtype }}
         BUILD_TOOL="cmake"
         MODE=${{ matrix.mode }}
+        PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
 
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
         PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
@@ -393,7 +395,7 @@ jobs:
         # Install requirements for export_llama
         PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
         # Test llama2
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}"
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
 
   test-phi-3-mini-runner-linux:
     name: test-phi-3-mini-runner-linux
 
@@ -131,7 +131,7 @@ jobs:
 
   test-arm-backend-delegation:
     name: test-arm-backend-delegation
-    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     with:
       runner: linux.2xlarge
       docker-image: executorch-ubuntu-22.04-arm-sdk
@@ -157,7 +157,7 @@ jobs:
 
   test-arm-reference-delegation:
     name: test-arm-reference-delegation
-    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     with:
       runner: linux.2xlarge
       docker-image: executorch-ubuntu-22.04-arm-sdk
@@ -351,6 +351,8 @@ jobs:
         done
 
   test-huggingface-transformers:
+    # NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
+    if: ${{ !github.event.pull_request.head.repo.fork }}
     name: test-huggingface-transformers
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     secrets: inherit
@@ -441,3 +443,39 @@ jobs:
 
         cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
         echo "::endgroup::"
+
+
+  test-llama-runner-qnn-linux:
+    name: test-llama-runner-qnn-linux
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    strategy:
+      matrix:
+        dtype: [fp32]
+        pt2e_quantize: [qnn_16a16w, qnn_8a8w]
+        mode: [qnn]
+      fail-fast: false
+    with:
+      runner: linux.2xlarge
+      docker-image: executorch-ubuntu-22.04-qnn-sdk
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 900
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        BUILD_TOOL="cmake"
+        DTYPE=${{ matrix.dtype }}
+        MODE=${{ matrix.mode }}
+        PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
+
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
+        PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
+
+        # Setup executorch
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
+        # Install requirements for export_llama
+        PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
+        # Test llama2
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
@@ -64,6 +64,9 @@
 [submodule "third-party/pybind11"]
 	path = third-party/pybind11
 	url = https://github.com/pybind/pybind11.git
+[submodule "backends/cadence/fusion_g3/third-party/nnlib/nnlib-FusionG3"]
+	path = backends/cadence/fusion_g3/third-party/nnlib/nnlib-FusionG3
+	url = https://github.com/foss-xtensa/nnlib-FusionG3/
 [submodule "third-party/ao"]
 	path = third-party/ao
 	url = https://github.com/pytorch/ao.git
@@ -151,7 +151,6 @@ - (void)testMV3ProgramDebugging {
     XCTAssertNotNil(debuggingResults[make_path_with_output_name("aten__native_batch_norm_legit_no_training_default_13_cast_fp16")]);
     XCTAssertNotNil(debuggingResults[make_path_with_output_name("_inversed_aten_div_tensor_24_cast_fp16")]);
     XCTAssertNotNil(debuggingResults[make_path_with_output_name("aten_mean_dim_7_cast_fp16")]);
-    XCTAssertNotNil(debuggingResults[make_path_with_output_name("aten_clamp_default_54_cast_fp16")]);
     XCTAssertNotNil(debuggingResults[make_path_with_output_name("aten__native_batch_norm_legit_no_training_default_22_cast_fp16")]);
     XCTAssertNotNil(debuggingResults[make_path_with_output_name("aten_mul_tensor_27_cast_fp16")]);
 }
Original file line number	Diff line number	Diff line change
`@@ -49,6 +49,9 @@ install_buck() {`
`49`	`49`
`50`	`50`	`rm "${BUCK2}"`
`51`	`51`	`popd`
	`52`	`+`
	`53`	`+ # Kill all running buck2 daemon for a fresh start`
	`54`	`+ buck2 killall \|\| true`
`52`	`55`	`}`
`53`	`56`
`54`	`57`	`function write_sccache_stub() {`
Original file line number	Diff line number	Diff line change
`@@ -151,7 +151,6 @@ - (void)testMV3ProgramDebugging {`
`151`	`151`	`XCTAssertNotNil(debuggingResults[make_path_with_output_name("aten__native_batch_norm_legit_no_training_default_13_cast_fp16")]);`
`152`	`152`	`XCTAssertNotNil(debuggingResults[make_path_with_output_name("_inversed_aten_div_tensor_24_cast_fp16")]);`
`153`	`153`	`XCTAssertNotNil(debuggingResults[make_path_with_output_name("aten_mean_dim_7_cast_fp16")]);`
`154`		`- XCTAssertNotNil(debuggingResults[make_path_with_output_name("aten_clamp_default_54_cast_fp16")]);`
`155`	`154`	`XCTAssertNotNil(debuggingResults[make_path_with_output_name("aten__native_batch_norm_legit_no_training_default_22_cast_fp16")]);`
`156`	`155`	`XCTAssertNotNil(debuggingResults[make_path_with_output_name("aten_mul_tensor_27_cast_fp16")]);`
`157`	`156`	`}`