Skip to content

Commit 083701d

Browse files
committed
Update
[ghstack-poisoned]
2 parents 07b3463 + b8b2ecb commit 083701d

File tree

105 files changed

+4998
-746
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

105 files changed

+4998
-746
lines changed

.github/workflows/android-release-artifacts.yml

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ on:
1616
options:
1717
- "xnnpack"
1818
- "vulkan+xnnpack"
19+
- "qnn"
1920
schedule:
2021
- cron: 0 10 * * *
2122

@@ -83,17 +84,29 @@ jobs:
8384
8485
echo -n "$SECRET_EXECUTORCH_MAVEN_SIGNING_GPG_KEY_CONTENTS" | base64 -d > /tmp/secring.gpg
8586
87+
GRADLE_ARGS=""
88+
8689
# Update the version name in build.gradle in case of maven publish
8790
VERSION="${{ inputs.version }}"
8891
if [ ! -z "$VERSION" ]; then
89-
sed -i "s/\(coordinates(\"org.pytorch\", \"executorch-android\", \"\)\([0-9]\+.[0-9]\+.[0-9]\+\)\(\")\)/\1$VERSION\3/" extension/android/executorch_android/build.gradle
92+
GRADLE_ARGS+=" -DexecuTorchVersion=${VERSION}"
9093
fi
9194
9295
FLAVOR="${{ inputs.flavor }}"
9396
if [[ "$FLAVOR" == "vulkan+xnnpack" || -z "$FLAVOR" ]]; then
9497
export EXECUTORCH_BUILD_VULKAN=ON
9598
fi
9699
100+
if [[ "$FLAVOR" == "qnn" ]]; then
101+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
102+
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
103+
export EXECUTORCH_BUILD_QNN=ON
104+
source backends/qualcomm/scripts/qnn_config.sh
105+
export QNN_SDK_ROOT="/tmp/qnn/${QNN_VERSION}"
106+
export ANDROID_ABIS=arm64-v8a
107+
GRADLE_ARGS+=" -DqnnVersion=2.28.0"
108+
fi
109+
97110
# Build AAR Package
98111
mkdir aar-out
99112
export BUILD_AAR_DIR=aar-out
@@ -106,7 +119,7 @@ jobs:
106119
# Publish to maven staging
107120
UPLOAD_TO_MAVEN="${{ inputs.upload_to_maven }}"
108121
if [[ "$UPLOAD_TO_MAVEN" == "true" ]]; then
109-
(cd extension/android; ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew :executorch_android:publishToMavenCentral)
122+
(cd extension/android; ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew ${GRADLE_ARGS} :executorch_android:publishToMavenCentral)
110123
fi
111124
112125
upload-release-aar:

.github/workflows/pull.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -855,7 +855,8 @@ jobs:
855855
.ci/scripts/setup-linux.sh --build-tool "cmake"
856856
857857
# Install test requirements
858-
pip install -r backends/nxp/requirements-tests.txt
858+
pip install -r backends/nxp/requirements-tests-pypi.txt
859+
pip install -r backends/nxp/requirements-tests-eiq.txt
859860
860861
# Run pytest
861862
PYTHON_EXECUTABLE=python bash backends/nxp/run_unittests.sh

.github/workflows/trunk.yml

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -940,3 +940,42 @@ jobs:
940940
build-mode: Release
941941
build-tool: cmake
942942
docker-image: ci-image:executorch-ubuntu-22.04-clang12
943+
944+
test-mcu-models:
945+
name: test-mcu-models
946+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
947+
strategy:
948+
matrix:
949+
include:
950+
- build-tool: cmake
951+
fail-fast: false
952+
permissions:
953+
id-token: write
954+
contents: read
955+
with:
956+
runner: linux.2xlarge
957+
docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
958+
submodules: 'recursive'
959+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
960+
timeout: 90
961+
script: |
962+
BUILD_TOOL=${{ matrix.build-tool }}
963+
964+
# The generic Linux job chooses to use base env, not the one setup by the image
965+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
966+
conda activate "${CONDA_ENV}"
967+
968+
# Try to mirror these as closely as possible
969+
source .ci/scripts/utils.sh
970+
install_executorch "--use-pt-pinned-commit"
971+
972+
.ci/scripts/setup-arm-baremetal-tools.sh
973+
source examples/arm/ethos-u-scratch/setup_path.sh
974+
975+
# Run selective Build
976+
chmod +x examples/selective_build/test_selective_build.sh
977+
examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"
978+
979+
# Run MCU models
980+
chmod +x examples/arm/run_mcu_models_fvp.sh
981+
examples/arm/run_mcu_models_fvp.sh --target=cortex-m55

CMakeLists.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -582,7 +582,6 @@ endif()
582582

583583
if(EXECUTORCH_BUILD_EXTENSION_APPLE)
584584
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/apple)
585-
list(APPEND _executorch_extensions apple_extension)
586585
endif()
587586

588587
if(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER)
@@ -649,7 +648,6 @@ endif()
649648

650649
if(EXECUTORCH_BUILD_EXTENSION_LLM_APPLE)
651650
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/apple)
652-
list(APPEND _executorch_extensions extension_llm_apple)
653651
endif()
654652

655653
if(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL)
@@ -968,6 +966,10 @@ else()
968966
endif()
969967
target_link_libraries(executorch_kernels INTERFACE ${_executorch_kernels})
970968

969+
install(TARGETS executorch_backends executorch_extensions executorch_kernels
970+
EXPORT ExecuTorchTargets
971+
)
972+
971973
if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
972974
# Baseline libraries that executor_runner will link against.
973975
set(_executor_runner_libs executorch extension_evalue_util

backends/arm/_passes/decompose_linear_pass.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,13 @@ def call(self, graph_module):
9090
kwargs={},
9191
from_node=node,
9292
)
93+
# Quantization parameters are inherited from original linear node, but
94+
# output reshape should use the linear node's output qparams for both input
95+
# and output.
96+
if "input_qparams" in output.meta:
97+
output.meta["input_qparams"] = output.meta.get(
98+
"output_qparams", None
99+
)
93100

94101
node.replace_all_uses_with(output)
95102
graph_module.graph.erase_node(node)

backends/arm/operator_support/tosa_supported_operators.py

Lines changed: 83 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ def tosa_support_factory(
116116

117117
# Negative checks: Remove nodes from partitioning
118118
negative_checks: list[OperatorSupportBase] = [
119-
CheckInt64Inputs(exported_program, reporter),
119+
CheckInt64InputsAndOutputs(exported_program, reporter),
120120
CheckFloat64Inputs(exported_program, reporter),
121121
RankCheck(reporter, max_rank=5),
122122
*[
@@ -454,7 +454,18 @@ def is_node_supported(
454454
return True
455455

456456

457-
class CheckInt64Inputs(OperatorSupportBase):
457+
class CheckInt64InputsAndOutputs(OperatorSupportBase):
458+
"""TOSA does not support int64 tensors so in general, ops with int64 inputs or outputs should not be partitioned.
459+
There are however some exceptions:
460+
- Nodes with int64 output can be partitioned if they are constant, within int32,
461+
and all users cast to something else. In this case, the int64 tensor can safely be cast to int32 AOT.
462+
- Nodes with int64 output can be partitioned if all users are getitem with non-int64 output.
463+
In this case, there are multiple outputs and the int64 ones are not used.
464+
- Nodes with int64 inputs can be partitioned if the inputs are constant placeholders, or constant
465+
ops fulfilling the criteria above.
466+
Note that we don't check placeholders here, they are partitioned based on whether their users are partitioned
467+
or not.
468+
"""
458469

459470
def __init__(
460471
self, exported_program: ExportedProgram, reporter: WhyNoPartitionReporter
@@ -465,27 +476,85 @@ def __init__(
465476
if spec.kind == InputKind.USER_INPUT
466477
]
467478
self.reporter = reporter
479+
self.int32_min = torch.iinfo(torch.int32).min
480+
self.int32_max = torch.iinfo(torch.int32).max
468481
super().__init__()
469482

483+
def inside_int32_bounds(self, node: torch.fx.Node) -> bool:
484+
"""Node is assumed to be call_function with int64 output."""
485+
if isinstance(node.target, str):
486+
return False
487+
data = node.target(*node.args, **node.kwargs)
488+
min_val, max_val = int(torch.min(data)), int(torch.max(data))
489+
return min_val >= self.int32_min and max_val <= self.int32_max
490+
470491
def is_node_supported(
471492
self, submodules: typing.Mapping[str, torch.nn.Module], node: fx.Node
472493
) -> bool:
473494

495+
vals = node.meta["val"]
496+
tensor_list = vals if isinstance(vals, (list, tuple)) else [vals]
497+
498+
any_int64 = any(tensor.dtype == torch.int64 for tensor in tensor_list)
499+
# Don't partition nodes with int64 output...
500+
if any_int64:
501+
# ... Except for constant ops that are directly cast to something non-int64.
502+
# This could be an explicit cast, or something like a less than that outputs a different dtype than the input.
503+
users_output_non_int64 = all(
504+
get_first_fake_tensor(output_node).dtype != torch.int64
505+
for output_node in node.users
506+
)
507+
if (
508+
node.target in ComputeConstantOpsAOT.targeted_ops
509+
and users_output_non_int64
510+
):
511+
if not self.inside_int32_bounds(node):
512+
self.reporter.report_reject(
513+
node, "Constant node outside int32 range."
514+
)
515+
return False
516+
# Will never have input nodes, safe to return True
517+
return True
518+
519+
# ... Or ops with multiple outputs where only non-int64 are used.
520+
users_are_getitem = all(
521+
user.target == operator.getitem for user in node.users
522+
)
523+
if users_are_getitem and users_output_non_int64:
524+
# Passed output check, go to input check.
525+
pass
526+
else:
527+
self.reporter.report_reject(
528+
node, "Non-constant node with int64 output."
529+
)
530+
return False
531+
532+
# Ops with int64 inputs are only partitioned if input nodes are constant and will be partitioned.
533+
# If it is not partitioned, the partition will get an int64 input and fail.
474534
for input_node in node.all_input_nodes:
475-
# We can cast constant placeholders and constant ops AOT, such int64 are ok.
476-
# Otherwise, don't partition if one or more inputs are int64.
535+
tensor_in = get_first_fake_tensor(input_node)
536+
if tensor_in.dtype != torch.int64:
537+
continue
538+
# Constant placeholder
477539
if (
478-
input_node.name in self.input_names
479-
or not input_node.op == "placeholder"
540+
input_node.op != "call_function"
541+
and input_node.name not in self.input_names
480542
):
481-
tensor = get_first_fake_tensor(input_node)
482-
if tensor.dtype == torch.int64:
483-
if input_node.target not in ComputeConstantOpsAOT.targeted_ops:
484-
self.reporter.report_reject(
485-
node,
486-
f"Had int64 input {input_node.name} that couldn't be handled.",
487-
)
488-
return False
543+
continue
544+
# Constant operator
545+
if input_node.op == "call_function":
546+
if input_node.target in ComputeConstantOpsAOT.targeted_ops:
547+
# This is not perfect since the input_node can still be rejected by other checks but
548+
# this should cover the majority of cases.
549+
if self.is_node_supported(
550+
None, input_node # type: ignore[arg-type] #(we don't use 'submodules')
551+
):
552+
continue
553+
self.reporter.report_reject(
554+
node, f"Non-constant int64 input {input_node.name}"
555+
)
556+
return False
557+
489558
return True
490559

491560

0 commit comments

Comments
 (0)