Merge branch 'main' into ccache_enable

mergennachin · web-flow · commit 33ef87de1496 · 2025-06-26T11:47:49.000-06:00
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
@@ -718,3 +718,32 @@ jobs:
       build-mode: Release
       build-tool: cmake
       docker-image: executorch-ubuntu-22.04-clang12
+
+  unittest-nxp-neutron:
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    with:
+      runner: linux.2xlarge
+      docker-image: executorch-ubuntu-22.04-clang12
+      submodules: 'recursive'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 90
+      script: |
+        set -eux
+        
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+        
+        # Build and install Executorch
+        PYTHON_EXECUTABLE=python \
+        CMAKE_ARGS="-DEXECUTORCH_BUILD_NXP_NEUTRON=ON" \
+        .ci/scripts/setup-linux.sh --build-tool "cmake"
+        
+        # Install test requirements
+        pip install -r backends/nxp/requirements-tests.txt
+        
+        # Run pytest
+        PYTHON_EXECUTABLE=python bash backends/nxp/run_unittests.sh
diff --git a/backends/nxp/requirements-tests.txt b/backends/nxp/requirements-tests.txt
@@ -3,4 +3,4 @@ tensorflow==2.18.0
 pytest-mock
 tflite
 GvGen
-neutron-converter_SDK_25_03
+neutron_converter_SDK_25_03
diff --git a/backends/nxp/run_unittests.sh b/backends/nxp/run_unittests.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+# Copyright 2025 NXP
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+set -eux
+
+SCRIPT_DIR=$(dirname $(readlink -fm $0))
+EXECUTORCH_DIR=$(dirname $(dirname $SCRIPT_DIR))
+
+cd $EXECUTORCH_DIR
+
+# '-c /dev/null' is used to ignore root level pytest.ini.
+PYTHONPATH=`cd ..; pwd` pytest -c /dev/null backends/nxp/tests/
diff --git a/docs/source/using-executorch-building-from-source.md b/docs/source/using-executorch-building-from-source.md
@@ -91,7 +91,7 @@ Or alternatively, [install conda on your machine](https://conda.io/projects/cond
 
    # Or you can directly do the following if dependencies are already installed
    # either via a previous invocation of `./install_executorch.sh` or by explicitly installing requirements via `./install_requirements.sh` first.
-   pip install -e .
+   pip install -e . --no-build-isolation
    ```
 
    If C++ files are being modified, you will still have to reinstall ExecuTorch from source.
diff --git a/examples/nxp/setup.sh b/examples/nxp/setup.sh
@@ -7,4 +7,4 @@
 set -u
 
 # Install neutron-converter
-pip install --extra-index-url https://eiq.nxp.com/repository neutron-converter_SDK_25_03
+pip install --extra-index-url https://eiq.nxp.com/repository neutron_converter_SDK_25_03
diff --git a/install_executorch.py b/install_executorch.py
@@ -19,7 +19,6 @@
     install_optional_example_requirements,
     install_requirements,
     python_is_compatible,
-    TORCH_NIGHTLY_URL,
 )
 
 # Set up logging
@@ -221,8 +220,6 @@ def main(args):
             ".",
             "--no-build-isolation",
             "-v",
-            "--extra-index-url",
-            TORCH_NIGHTLY_URL,
         ]
     )
     subprocess.run(cmd, check=True)
diff --git a/kernels/portable/cpu/util/dtype_util.cpp b/kernels/portable/cpu/util/dtype_util.cpp
@@ -27,6 +27,8 @@ bool check_tensor_dtype(
       return executorch::runtime::tensor_is_floating_type(t);
     case SupportedTensorDtypes::INTB:
       return executorch::runtime::tensor_is_integral_type(t, true);
+    case SupportedTensorDtypes::BOOL:
+      return executorch::runtime::tensor_is_type(t, ScalarType::Bool);
     case SupportedTensorDtypes::BOOL_OR_BYTE:
       return (executorch::runtime::tensor_is_type(
           t, ScalarType::Bool, ScalarType::Byte));
diff --git a/kernels/portable/cpu/util/dtype_util.h b/kernels/portable/cpu/util/dtype_util.h
@@ -72,6 +72,16 @@ load_to_compute_fn<CTYPE_COMPUTE> get_load_to_compute_fn_intb(const Tensor& t) {
   return result;
 }
 
+template <typename CTYPE_COMPUTE, const char* op_name>
+load_to_compute_fn<CTYPE_COMPUTE> get_load_to_compute_fn_bool(const Tensor& t) {
+  ET_CHECK_MSG(
+      t.scalar_type() == ScalarType::Bool,
+      "Unhandled dtype %s for %s",
+      ::executorch::runtime::toString(t.scalar_type()),
+      op_name);
+  return internal::load_and_convert<CTYPE_COMPUTE, bool>;
+}
+
 template <typename CTYPE_COMPUTE, const char* op_name>
 load_to_compute_fn<CTYPE_COMPUTE> get_load_to_compute_fn_bool_or_byte(
     const Tensor& t) {
@@ -165,6 +175,17 @@ store_compute_to_tensor_fn<CTYPE_COMPUTE> get_store_compute_to_tensor_fn_intb(
   return result;
 }
 
+template <typename CTYPE_COMPUTE, const char* op_name>
+store_compute_to_tensor_fn<CTYPE_COMPUTE> get_store_compute_to_tensor_fn_bool(
+    const Tensor& t) {
+  ET_CHECK_MSG(
+      t.scalar_type() == ScalarType::Bool,
+      "Unhandled dtype %s for %s",
+      ::executorch::runtime::toString(t.scalar_type()),
+      op_name);
+  return internal::convert_and_store<bool, CTYPE_COMPUTE>;
+}
+
 template <typename CTYPE_COMPUTE, const char* op_name>
 store_compute_to_tensor_fn<CTYPE_COMPUTE>
 get_store_compute_to_tensor_fn_bool_or_byte(const Tensor& t) {
@@ -219,6 +240,7 @@ enum class SupportedTensorDtypes {
   REALHBF16,
   FLOATHBF16,
   INTB,
+  BOOL,
   BOOL_OR_BYTE,
   // DEPRECATED: not likely to be correct; use SAME_AS_COMMON.
   SAME_AS_COMPUTE,
@@ -240,6 +262,8 @@ load_to_compute_fn<CTYPE_COMPUTE> get_load_to_compute_fn_impl(
       return get_load_to_compute_fn_realhbf16<CTYPE_COMPUTE, op_name>(t);
     case SupportedTensorDtypes::INTB:
       return get_load_to_compute_fn_intb<CTYPE_COMPUTE, op_name>(t);
+    case SupportedTensorDtypes::BOOL:
+      return get_load_to_compute_fn_bool<CTYPE_COMPUTE, op_name>(t);
     case SupportedTensorDtypes::BOOL_OR_BYTE:
       return get_load_to_compute_fn_bool_or_byte<CTYPE_COMPUTE, op_name>(t);
     case SupportedTensorDtypes::SAME_AS_COMPUTE:
@@ -271,6 +295,8 @@ store_compute_to_tensor_fn<CTYPE_COMPUTE> get_store_compute_to_tensor_fn(
           t);
     case SupportedTensorDtypes::INTB:
       return get_store_compute_to_tensor_fn_intb<CTYPE_COMPUTE, op_name>(t);
+    case SupportedTensorDtypes::BOOL:
+      return get_store_compute_to_tensor_fn_bool<CTYPE_COMPUTE, op_name>(t);
     case SupportedTensorDtypes::BOOL_OR_BYTE:
       return get_store_compute_to_tensor_fn_bool_or_byte<
           CTYPE_COMPUTE,
@@ -318,12 +344,14 @@ bool check_tensor_dtype(
     const ScalarType compute_type);
 
 /// Return the one output type we are willing to emit specialized code
-/// to handle, given a compute type of CTYPE_COMMON and supported
+/// to handle, given a compute type of CTYPE_COMPUTE and supported
 /// output types of out_dtypes.
 template <typename CTYPE_COMPUTE>
 inline constexpr ScalarType specialized_output_scalar_type(
     SupportedTensorDtypes out_dtypes) {
   switch (out_dtypes) {
+    case SupportedTensorDtypes::BOOL:
+      return ScalarType::Bool;
     case SupportedTensorDtypes::BOOL_OR_BYTE:
       return ScalarType::Bool;
     case SupportedTensorDtypes::REALHBBF16:
diff --git a/kernels/test/UnaryUfuncRealHBBF16ToFloatHBF16Test.h b/kernels/test/UnaryUfuncRealHBBF16ToFloatHBF16Test.h
@@ -72,20 +72,16 @@ class UnaryUfuncRealHBBF16ToFloatHBF16Test : public OperatorTest {
 
     auto expected = tf_out.make({1, 6}, expected_vector);
     if (IN_DTYPE == ScalarType::BFloat16 || OUT_DTYPE == ScalarType::BFloat16) {
-      double rtol = executorch::runtime::testing::internal::kDefaultRtol;
-      // It appears we need a higher tolerance for at least some ATen
-      // tests, like aten_op_acosh_test.
-      if (get_supported_features()->is_aten) {
-        rtol = 3e-3;
-      }
+      // Raise tolerance because both we and ATen run these
+      // computations at internal float32 precision rather than
+      // float64.
+      double rtol = 3e-3;
       EXPECT_TENSOR_CLOSE_WITH_TOL(out, expected, rtol, executorch::runtime::testing::internal::kDefaultBFloat16Atol);
     } else if (IN_DTYPE == ScalarType::Half || OUT_DTYPE == ScalarType::Half) {
-      double rtol = executorch::runtime::testing::internal::kDefaultRtol;
-      // It appears we need a higher tolerance for at least some ATen
-      // tests, like aten_op_acosh_test.
-      if (get_supported_features()->is_aten) {
-        rtol = 1e-3;
-      }
+      // Raise tolerance because both we and ATen run these
+      // computations at internal float32 precision rather than
+      // float64.
+      double rtol = 1e-3;
       EXPECT_TENSOR_CLOSE_WITH_TOL(out, expected, rtol, executorch::runtime::testing::internal::kDefaultHalfAtol);
     } else {
       EXPECT_TENSOR_CLOSE(out, expected);
diff --git a/kernels/test/op_mul_test.cpp b/kernels/test/op_mul_test.cpp
@@ -746,6 +746,21 @@ TEST_F(OpMulOutTest, DynamicShapeUnbound) {
   EXPECT_TENSOR_CLOSE(out, expected_result);
 }
 
+// >>> torch.ops.aten.mul(torch.tensor([100], dtype=torch.int8),
+// torch.tensor([100], dtype=torch.int8), out=torch.zeros([1],
+// dtype=torch.long)) tensor([16])
+TEST_F(OpMulOutTest, MixedIntegerDtypeMatchesATen) {
+  TensorFactory<ScalarType::Char> tf_in;
+  TensorFactory<ScalarType::Long> tf_out;
+
+  Tensor in = tf_in.make({1}, {100});
+  Tensor out = tf_out.zeros({1});
+  Tensor ret = op_mul_out(in, in, out);
+
+  Tensor expected = tf_out.make({1}, {16});
+  EXPECT_TENSOR_CLOSE(out, expected);
+}
+
 TEST_F(OpMulScalarOutTest, SanityCheck) {
   TensorFactory<ScalarType::Bool> tf_a;
   TensorFactory<ScalarType::Float> tf_out;
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,6 +7,7 @@ requires = [
   "tomli",  # Imported by extract_sources.py when using python < 3.11.
   "wheel",  # For building the pip package archive.
   "zstd",  # Imported by resolve_buck.py.
+  "certifi",  # Imported by resolve_buck.py.
 ]
 build-backend = "setuptools.build_meta"
 
@@ -49,7 +50,7 @@ classifiers = [
 ]
 
 # Python dependencies required for use.
-# coremltools has issue with python 3.13, see https://github.com/apple/coremltools/issues/2487 
+# coremltools has issue with python 3.13, see https://github.com/apple/coremltools/issues/2487
 requires-python = ">=3.10,<3.13"
 dependencies=[
   "expecttest",
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -7,6 +7,7 @@ setuptools>=63  # For building the pip package contents.
 tomli  # Imported by extract_sources.py when using python < 3.11.
 wheel  # For building the pip package archive.
 zstd  # Imported by resolve_buck.py.
+certifi  # Imported by resolve_buck.py.
 lintrunner==0.12.7
 lintrunner-adapters==0.12.4
 hydra-core>=1.3.0
diff --git a/tools/cmake/Utils.cmake b/tools/cmake/Utils.cmake
@@ -128,7 +128,7 @@ function(resolve_buck2)
 
   set(resolve_buck2_command
       ${PYTHON_EXECUTABLE} ${executorch_root}/tools/cmake/resolve_buck.py
-      --cache_dir=buck2-bin
+      --cache_dir=${executorch_root}/buck2-bin
   )
 
   if(NOT ${BUCK2} STREQUAL "")

Original file line number	Diff line number	Diff line change
`@@ -19,7 +19,6 @@`
`19`	`19`	`install_optional_example_requirements,`
`20`	`20`	`install_requirements,`
`21`	`21`	`python_is_compatible,`
`22`		`- TORCH_NIGHTLY_URL,`
`23`	`22`	`)`
`24`	`23`
`25`	`24`	`# Set up logging`
`@@ -221,8 +220,6 @@ def main(args):`
`221`	`220`	`".",`
`222`	`221`	`"--no-build-isolation",`
`223`	`222`	`"-v",`
`224`		`- "--extra-index-url",`
`225`		`- TORCH_NIGHTLY_URL,`
`226`	`223`	`]`
`227`	`224`	`)`
`228`	`225`	`subprocess.run(cmd, check=True)`