Skip to content

Commit 3595425

Browse files
author
pytorchbot
committed
2025-07-24 nightly release (9236a68)
1 parent 18412f8 commit 3595425

File tree

110 files changed

+3826
-596
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

110 files changed

+3826
-596
lines changed

.ci/scripts/setup-qnn-deps.sh

Lines changed: 2 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -7,47 +7,7 @@
77

88
set -ex
99

10-
verify_pkg_installed() {
11-
echo $(dpkg-query -W --showformat='${Status}\n' $1|grep "install ok installed")
12-
}
10+
source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_qnn_sdk.sh"
1311

14-
install_qnn() {
15-
echo "Start installing qnn."
16-
QNN_INSTALLATION_DIR=/tmp/qnn
17-
mkdir -p "${QNN_INSTALLATION_DIR}"
18-
19-
curl -Lo /tmp/v2.28.0.24.10.29.zip "https://softwarecenter.qualcomm.com/api/download/software/qualcomm_neural_processing_sdk/v2.28.0.241029.zip"
20-
echo "Finishing downloading qnn sdk."
21-
unzip -qo /tmp/v2.28.0.24.10.29.zip -d /tmp
22-
echo "Finishing unzip qnn sdk."
23-
24-
25-
# Print the content for manual verification
26-
ls -lah "/tmp/qairt"
27-
mv "/tmp/qairt"/* "${QNN_INSTALLATION_DIR}"
28-
echo "Finishing installing qnn '${QNN_INSTALLATION_DIR}' ."
29-
30-
ls -lah "${QNN_INSTALLATION_DIR}"
31-
}
32-
33-
setup_libc++() {
34-
clang_version=$1
35-
sudo apt-get update
36-
pkgs_to_check=("libc++-${clang_version}-dev")
37-
j=0
38-
while [ $j -lt ${#pkgs_to_check[*]} ]; do
39-
install_status=$(verify_pkg_installed ${pkgs_to_check[$j]})
40-
if [ "$install_status" == "" ]; then
41-
sudo apt-get install -y ${pkgs_to_check[$j]}
42-
if [[ $? -ne 0 ]]; then
43-
echo "ERROR: Failed to install required packages for libc++"
44-
exit 1
45-
fi
46-
fi
47-
j=$(( $j +1));
48-
done
49-
}
50-
51-
# This needs to match with the clang version from the Docker image
52-
setup_libc++ 12
12+
setup_libcpp 12
5313
install_qnn

.ci/scripts/test_ane_static_llama.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,6 @@ pushd $EXECUTORCH_ROOT/examples/apple/coreml/llama
2828
# Download stories llama110m artifacts
2929
download_stories_model_artifacts
3030

31-
python export.py -n model.pte -p params.json -c stories110M.pt --seq_length 32 --max_seq_length 64 --dtype fp16 --coreml-quantize c4w
31+
python export.py -n model.pte -p params.json -c stories110M.pt --seq_length 32 --max_seq_length 64 --dtype fp16 --coreml-quantize c4w --embedding-quantize 4,32
3232

3333
popd

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,7 @@ target_link_libraries(executorch PRIVATE executorch_core)
416416
target_include_directories(executorch PUBLIC ${_common_include_directories})
417417
target_compile_definitions(executorch PUBLIC C10_USING_CUSTOM_GENERATED_MACROS)
418418
target_compile_options(executorch PUBLIC ${_common_compile_options})
419-
target_link_options_shared_lib(executorch)
419+
executorch_target_link_options_shared_lib(executorch)
420420

421421
#
422422
# portable_ops_lib: A library to register core ATen ops using portable kernels,
@@ -690,7 +690,7 @@ endif()
690690

691691
if(EXECUTORCH_BUILD_KERNELS_QUANTIZED)
692692
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/quantized)
693-
target_link_options_shared_lib(quantized_ops_lib)
693+
executorch_target_link_options_shared_lib(quantized_ops_lib)
694694
endif()
695695

696696
if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)

backends/apple/coreml/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ if(APPLE)
199199
${CMAKE_CURRENT_SOURCE_DIR}/third-party/coremltools/deps/protobuf/cmake
200200
)
201201

202-
target_link_options_shared_lib(libprotobuf-lite)
202+
executorch_target_link_options_shared_lib(libprotobuf-lite)
203203
target_link_libraries(coremldelegate PRIVATE libprotobuf-lite)
204204
endif()
205205

@@ -210,7 +210,7 @@ if(APPLE)
210210
${FOUNDATION_FRAMEWORK} ${SQLITE_LIBRARY}
211211
)
212212

213-
target_link_options_shared_lib(coremldelegate)
213+
executorch_target_link_options_shared_lib(coremldelegate)
214214

215215
if(EXECUTORCH_COREML_BUILD_EXECUTOR_RUNNER)
216216
target_link_libraries(

backends/apple/coreml/partition/coreml_partitioner.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,25 +23,27 @@
2323
from torch.fx.passes.operator_support import OperatorSupportBase
2424

2525
logger = logging.getLogger(__name__)
26-
logger.setLevel(logging.WARNING)
26+
logger.setLevel(logging.INFO)
2727

2828

29-
class OperatorsSupportedForCoreMLBackend(OperatorSupportBase):
29+
class _OperatorsSupportedForCoreMLBackend(OperatorSupportBase):
3030
def __init__(
3131
self,
3232
skip_ops_for_coreml_delegation: Optional[List[str]] = None,
3333
lower_full_graph: bool = False,
34+
log: bool = False,
3435
) -> None:
3536
if skip_ops_for_coreml_delegation is None:
3637
skip_ops_for_coreml_delegation = []
3738
super().__init__()
3839
self.skip_ops_for_coreml_delegation = skip_ops_for_coreml_delegation
3940
self.lower_full_graph = lower_full_graph
4041
self._logged_msgs = set()
42+
self._log = log
4143

4244
def log_once(self, msg: str) -> None:
43-
if msg not in self._logged_msgs:
44-
logging.info(msg)
45+
if self._log and msg not in self._logged_msgs:
46+
logger.info(msg)
4547
self._logged_msgs.add(msg)
4648

4749
def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
@@ -154,8 +156,10 @@ def partition(self, exported_program: ExportedProgram) -> PartitionResult:
154156

155157
capability_partitioner = CapabilityBasedPartitioner(
156158
exported_program.graph_module,
157-
OperatorsSupportedForCoreMLBackend(
158-
self.skip_ops_for_coreml_delegation, self.lower_full_graph
159+
_OperatorsSupportedForCoreMLBackend(
160+
self.skip_ops_for_coreml_delegation,
161+
self.lower_full_graph,
162+
log=True,
159163
),
160164
allows_single_node_partition=True,
161165
)
@@ -191,8 +195,10 @@ def ops_to_not_decompose(
191195
self, ep: ExportedProgram
192196
) -> Tuple[List[torch._ops.OpOverload], Optional[Callable[[torch.fx.Node], bool]]]:
193197
do_not_decompose = []
194-
op_support = OperatorsSupportedForCoreMLBackend(
195-
self.skip_ops_for_coreml_delegation, self.lower_full_graph
198+
op_support = _OperatorsSupportedForCoreMLBackend(
199+
self.skip_ops_for_coreml_delegation,
200+
self.lower_full_graph,
201+
log=False,
196202
)
197203

198204
# CoreML prevents certain ops (like triu) from lowering to CoreML when put in the ExecuTorch op namespace

backends/apple/coreml/test/test_coreml_partitioner.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
from executorch.backends.apple.coreml.compiler import CoreMLBackend
1717
from executorch.backends.apple.coreml.partition import CoreMLPartitioner
1818
from executorch.exir.backend.utils import format_delegated_graph
19-
from executorch.runtime import Runtime
2019

2120

2221
@torch.library.custom_op("unsupported::linear", mutates_args=())
@@ -37,7 +36,13 @@ def _(
3736
return torch.ops.aten.linear.default(x, w, b)
3837

3938

40-
_TEST_RUNTIME = sys.platform == "darwin"
39+
def is_fbcode():
40+
return not hasattr(torch.version, "git_version")
41+
42+
43+
_TEST_RUNTIME = (sys.platform == "darwin") and not is_fbcode()
44+
if _TEST_RUNTIME:
45+
from executorch.runtime import Runtime
4146

4247

4348
class TestCoreMLPartitioner(unittest.TestCase):

backends/apple/coreml/test/test_torch_ops.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,20 @@
1414

1515
from executorch.backends.apple.coreml.compiler import CoreMLBackend
1616
from executorch.backends.apple.coreml.partition import CoreMLPartitioner
17-
from executorch.runtime import Runtime
1817
from torchao.quantization import IntxWeightOnlyConfig, PerAxis, PerGroup, quantize_
1918

20-
_TEST_RUNTIME = sys.platform == "darwin" and tuple(
21-
map(int, platform.mac_ver()[0].split("."))
22-
) >= (15, 0)
19+
20+
def is_fbcode():
21+
return not hasattr(torch.version, "git_version")
22+
23+
24+
_TEST_RUNTIME = (
25+
(sys.platform == "darwin")
26+
and not is_fbcode()
27+
and tuple(map(int, platform.mac_ver()[0].split("."))) >= (15, 0)
28+
)
29+
if _TEST_RUNTIME:
30+
from executorch.runtime import Runtime
2331

2432

2533
class TestTorchOps(unittest.TestCase):

backends/apple/mps/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ target_link_libraries(
7070
${MPS_FRAMEWORK} ${MPS_GRAPH_FRAMEWORK}
7171
)
7272

73-
target_link_options_shared_lib(mpsdelegate)
73+
executorch_target_link_options_shared_lib(mpsdelegate)
7474
target_compile_options(mpsdelegate PUBLIC ${_common_compile_options})
7575
target_compile_options(mpsdelegate PRIVATE "-fno-objc-arc")
7676

backends/arm/_passes/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
from .decompose_leaky_relu_pass import DecomposeLeakyReLUPass # noqa
4141
from .decompose_linalg_vector_norm_pass import DecomposeLinearVectorNormPass # noqa
4242
from .decompose_linear_pass import DecomposeLinearPass # noqa
43+
from .decompose_masked_fill import DecomposeMaskedFill # noqa
4344
from .decompose_maxpool2d_with_dilation import DecomposeMaxPool2DPass # noqa
4445
from .decompose_meandim_pass import DecomposeMeanDimPass # noqa
4546
from .decompose_ne_pass import DecomposeNotEqualPass # noqa

backends/arm/_passes/arm_pass_manager.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
DecomposeLeakyReLUPass,
4646
DecomposeLinearPass,
4747
DecomposeLinearVectorNormPass,
48+
DecomposeMaskedFill,
4849
DecomposeMaxPool2DPass,
4950
DecomposeMeanDimPass,
5051
DecomposeNotEqualPass,
@@ -113,6 +114,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
113114
self.add_pass(
114115
DecomposeMeanDimPass(exported_program.graph_module, self.tosa_spec)
115116
)
117+
116118
self.add_pass(ConvertFullLikeToFullPass())
117119
self.add_pass(ConvertToClampPass())
118120
self.add_pass(ConvertMinMaxPass())
@@ -146,6 +148,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
146148
self.add_pass(DecomposeMaxPool2DPass())
147149
self.add_pass(SizeAdjustInputPass())
148150
self.add_pass(DecomposeSelectPass())
151+
149152
self.add_pass(ConvertSqueezesToViewPass())
150153

151154
self.add_pass(FuseViewCopyTransform())
@@ -160,6 +163,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
160163
return self._transform(exported_program.graph_module)
161164

162165
def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
166+
self.add_pass(DecomposeMaskedFill())
163167
self.add_pass(DecomposeRoundPass())
164168
self.add_pass(DecomposeAcoshPass())
165169
self.add_pass(DecomposeAsinPass())
@@ -285,4 +289,8 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
285289
self.add_pass(ReplaceInfValues())
286290
self.add_pass(DecomposeSumPass())
287291

292+
if not self.tosa_spec.is_U55_subset:
293+
# Uses where which is not supported on Ethos-U55
294+
self.add_pass(DecomposeMaskedFill())
295+
288296
return self._transform(graph_module)

0 commit comments

Comments
 (0)