pytorch
diff --git a/‎.ci/scripts/test_llama.sh‎
Lines changed: 11 additions & 0 deletions b/‎.ci/scripts/test_llama.sh‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎.github/workflows/build-wheels-linux.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/build-wheels-linux.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/build-wheels-m1.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/build-wheels-m1.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 3 additions & 1 deletion b/‎.github/workflows/pull.yml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 36 additions & 0 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎.gitmodules‎
Lines changed: 3 additions & 0 deletions b/‎.gitmodules‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 9 additions & 4 deletions b/‎CONTRIBUTING.md‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎backends/apple/coreml/runtime/delegate/ETCoreMLModelCompiler.mm‎
Lines changed: 29 additions & 16 deletions b/‎backends/apple/coreml/runtime/delegate/ETCoreMLModelCompiler.mm‎
Lines changed: 29 additions & 16 deletions
diff --git a/‎backends/apple/coreml/scripts/install_requirements.sh‎
Lines changed: 1 addition & 1 deletion b/‎backends/apple/coreml/scripts/install_requirements.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/apple/coreml/test/test_coreml_partitioner.py‎
Lines changed: 5 additions & 13 deletions b/‎backends/apple/coreml/test/test_coreml_partitioner.py‎
Lines changed: 5 additions & 13 deletions
@@ -27,6 +27,10 @@ while [[ $# -gt 0 ]]; do
       MODE="$2" # portable or xnnpack+custom or xnnpack+custom+qe
       shift 2
       ;;
+    -pt2e_quantize)
+      PT2E_QUANTIZE="$2"
+      shift 2
+      ;;
     -upload)
       UPLOAD_DIR="$2"
       shift 2
@@ -44,6 +48,9 @@ MODE=${MODE:-"xnnpack+custom"}
 # Default UPLOAD_DIR to empty string if not set
 UPLOAD_DIR="${UPLOAD_DIR:-}"
 
+# Default PT2E_QUANTIZE to empty string if not set
+PT2E_QUANTIZE="${PT2E_QUANTIZE:-}"
+
 if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args
     echo "Expecting atleast 4 positional arguments"
     echo "Usage: [...]"
@@ -234,6 +241,10 @@ if [[ "${COREML}" == "ON" ]]; then
 fi
 if [[ "${QNN}" == "ON" ]]; then
   EXPORT_ARGS="${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape"
+  echo "PT2E_QUANTIZE is ${PT2E_QUANTIZE}"
+  if [[ "${PT2E_QUANTIZE}" == "qnn_16a16w" ]]; then
+    EXPORT_ARGS+=" --tokenizer_path tokenizer.model --pt2e_quantize qnn_16a16w --calibration_tasks wikitext --calibration_limit 1 --calibration_seq_length 128 --calibration_data Once "
+  fi
 fi
 # Add dynamically linked library location
 $PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}
 
@@ -27,6 +27,7 @@ jobs:
       test-infra-ref: main
       with-cuda: disabled
       with-rocm: disabled
+      python-versions: '["3.10", "3.11", "3.12"]'
 
   build:
     needs: generate-matrix
 
@@ -27,6 +27,7 @@ jobs:
       test-infra-ref: main
       with-cuda: disabled
       with-rocm: disabled
+      python-versions: '["3.10", "3.11", "3.12"]'
 
   build:
     needs: generate-matrix
 
@@ -368,6 +368,7 @@ jobs:
     strategy:
       matrix:
         dtype: [fp32]
+        pt2e_quantize: [qnn_16a16w, qnn_8a8w]
         mode: [qnn]
       fail-fast: false
     with:
@@ -384,6 +385,7 @@ jobs:
         DTYPE=${{ matrix.dtype }}
         BUILD_TOOL="cmake"
         MODE=${{ matrix.mode }}
+        PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
 
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
         PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
@@ -393,7 +395,7 @@ jobs:
         # Install requirements for export_llama
         PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
         # Test llama2
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}"
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
 
   test-phi-3-mini-runner-linux:
     name: test-phi-3-mini-runner-linux
 
@@ -441,3 +441,39 @@ jobs:
 
         cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
         echo "::endgroup::"
+
+
+  test-llama-runner-qnn-linux:
+    name: test-llama-runner-qnn-linux
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    strategy:
+      matrix:
+        dtype: [fp32]
+        pt2e_quantize: [qnn_16a16w, qnn_8a8w]
+        mode: [qnn]
+      fail-fast: false
+    with:
+      runner: linux.2xlarge
+      docker-image: executorch-ubuntu-22.04-qnn-sdk
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 900
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        BUILD_TOOL="cmake"
+        DTYPE=${{ matrix.dtype }}
+        MODE=${{ matrix.mode }}
+        PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
+
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
+        PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
+
+        # Setup executorch
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
+        # Install requirements for export_llama
+        PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
+        # Test llama2
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
@@ -64,6 +64,9 @@
 [submodule "third-party/pybind11"]
 	path = third-party/pybind11
 	url = https://github.com/pybind/pybind11.git
+[submodule "backends/cadence/fusion_g3/third-party/nnlib/nnlib-FusionG3"]
+	path = backends/cadence/fusion_g3/third-party/nnlib/nnlib-FusionG3
+	url = https://github.com/foss-xtensa/nnlib-FusionG3/
 [submodule "third-party/ao"]
 	path = third-party/ao
 	url = https://github.com/pytorch/ao.git
@@ -283,10 +283,15 @@ for basics.
    - If the reviewers have requests or questions, follow up with them.
    - The goal of the reviewer is to ensure that the code in the `main` branch of
      the repo is consistent, maintainable, and of high quality.
-1. Once approved, your reviewer will import the PR into Meta's internal system
-   and merge it from there.
-   - If the PR is approved and not merged within a few business days, please
-     comment on the PR to ask about its status.
+1. Once the PR has been approved,
+   - If you have the "write permission" in this repo, you can merge it yourself
+     by clicking the "Squash and merge" button once it is green and all CI
+     signals are passing.
+   - If you don't have "write permission" in this repo, the reviewer will take
+     care of the PR. The reviewer may import the PR into Meta's internal system
+     to validate it against internal CI.
+   - If the PR is approved but not merged within 5 business days, please comment
+     on the PR to ask about its status.
    - Note that if the `main` [CI](#continuous-integration) jobs are broken, we
      will only merge PRs that fix the broken jobs until all critical jobs are
      fixed.
 
@@ -26,25 +26,38 @@ + (nullable NSURL *)compileModelAtURL:(NSURL *)modelURL
 #else
     __block NSError *localError = nil;
     __block NSURL *result = nil;
-    
-    dispatch_semaphore_t sema = dispatch_semaphore_create(0);
-    [MLModel compileModelAtURL:modelURL completionHandler:^(NSURL * _Nullable tempURL, NSError * _Nullable compilationError) {
-        result = [tempURL copy];
-        localError = compilationError;
-        dispatch_semaphore_signal(sema);
-    }];
-    
-    long status = dispatch_semaphore_wait(sema, dispatch_time(DISPATCH_TIME_NOW, (int64_t)(maxWaitTimeInSeconds * NSEC_PER_SEC)));
-    if (status != 0) {
+
+    if (@available(iOS 16, macOS 13, watchOS 9, tvOS 16, *)) {
+        dispatch_semaphore_t sema = dispatch_semaphore_create(0);
+        [MLModel compileModelAtURL:modelURL completionHandler:^(NSURL * _Nullable tempURL, NSError * _Nullable compilationError) {
+            result = [tempURL copy];
+            localError = compilationError;
+            dispatch_semaphore_signal(sema);
+        }];
+
+        long status = dispatch_semaphore_wait(sema, dispatch_time(DISPATCH_TIME_NOW, (int64_t)(maxWaitTimeInSeconds * NSEC_PER_SEC)));
+        if (status != 0) {
+            ETCoreMLLogErrorAndSetNSError(error,
+                                        ETCoreMLErrorCompilationFailed,
+                                        "%@: Failed to compile model in %f seconds.",
+                                        NSStringFromClass(ETCoreMLModelCompiler.class),
+                                        maxWaitTimeInSeconds);
+            return nil;
+        }
+    } else {
+        result = [MLModel compileModelAtURL:modelURL error:&localError];
+    }
+
+    if (localError) {
         ETCoreMLLogErrorAndSetNSError(error,
-                                      ETCoreMLErrorCompilationFailed,
-                                      "%@: Failed to compile model in %f seconds.",
-                                      NSStringFromClass(ETCoreMLModelCompiler.class),
-                                      maxWaitTimeInSeconds);
+                                    ETCoreMLErrorCompilationFailed,
+                                    "%@: Failed to compile model, error: %@",
+                                    NSStringFromClass(ETCoreMLModelCompiler.class),
+                                    localError);
         return nil;
+    } else {
+        return result;
     }
-    
-    return result;
 #endif
 }
 
 
@@ -24,7 +24,7 @@ rm -rf "$COREML_DIR_PATH/third-party"
 mkdir "$COREML_DIR_PATH/third-party"
 
 echo "${green}ExecuTorch: Cloning coremltools."
-git clone --depth 1 --branch 8.0 "https://github.com/apple/coremltools.git" $COREMLTOOLS_DIR_PATH
+git clone --depth 1 --branch 8.1 "https://github.com/apple/coremltools.git" $COREMLTOOLS_DIR_PATH
 cd $COREMLTOOLS_DIR_PATH
 
 STATUS=$?
 
@@ -71,23 +71,15 @@ def test_vit_skip_conv(self):
             )
         )
 
-        conv_block = ["aten.convolution.default", "executorch_call_delegate"]
-        safe_softmax_block = [
-            "getitem",
-            "getitem",
-            "getitem",
-            "getitem",
-            "aten.any.dim",
-            "executorch_call_delegate",
-        ]
-        final_block = ["getitem"]
-        total = conv_block + 12 * safe_softmax_block + final_block
-
         assert [
             node.target.__name__
             for node in delegated_program_manager.exported_program().graph.nodes
             if node.op == "call_function"
-        ] == total
+        ] == [
+            "aten.convolution.default",
+            "executorch_call_delegate",
+            "getitem",
+        ]
 
     def test_buffer(self):
         embedding_dim = 3