Skip to content

Commit 70e72ea

Browse files
authored
Merge branch 'main' into toupstream/notebook
2 parents 792dee2 + 0cca3ca commit 70e72ea

File tree

99 files changed

+3063
-1127
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

99 files changed

+3063
-1127
lines changed

.ci/scripts/build-qnn-sdk.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@ set -o xtrace
1111

1212
build_qnn_backend() {
1313
echo "Start building qnn backend."
14-
export ANDROID_NDK_ROOT=${ANDROID_NDK_ROOT:-/opt/ndk}
15-
export QNN_SDK_ROOT=${QNN_SDK_ROOT:-/tmp/qnn/2.28.0.241029}
14+
# Source QNN configuration
15+
source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_qnn_sdk.sh"
16+
setup_android_ndk
17+
install_qnn
1618
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"
1719

1820
parallelism=$(( $(nproc) - 1 ))

.ci/scripts/setup-qnn-deps.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,5 @@ set -ex
1010
source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_qnn_sdk.sh"
1111

1212
setup_libcpp 12
13+
setup_android_ndk
1314
install_qnn

.ci/scripts/test_llama.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,12 @@ echo "COREML option ${COREML}"
119119

120120
if [[ "${MODE}" =~ .*qnn.* ]]; then
121121
QNN=ON
122+
123+
# Download QNN_SDK. If already downloaded, export environment path
124+
source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_qnn_sdk.sh"
125+
install_qnn
126+
122127
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
123-
export QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029
124128
export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang"
125129
export PYTHONPATH=".."
126130
cp schema/program.fbs exir/_serialize/program.fbs
@@ -150,6 +154,7 @@ cmake_install_executorch_libraries() {
150154
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
151155
rm -rf cmake-out
152156
retry cmake --preset llm \
157+
-DEXECUTORCH_BUILD_TESTS=ON \
153158
-DBUILD_TESTING=OFF \
154159
-DCMAKE_INSTALL_PREFIX=cmake-out \
155160
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
@@ -166,6 +171,7 @@ cmake_build_llama_runner() {
166171
popd
167172
dir="examples/models/llama"
168173
retry cmake \
174+
-DEXECUTORCH_BUILD_TESTS=ON \
169175
-DBUILD_TESTING=OFF \
170176
-DCMAKE_INSTALL_PREFIX=cmake-out \
171177
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \

.ci/scripts/test_qnn_static_llama.sh

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,13 @@ set -euxo pipefail
99

1010
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
1111

12+
# Source QNN configuration
13+
source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/qnn_config.sh"
14+
# Download QNN_SDK. If already downloaded, export environment path
15+
source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_qnn_sdk.sh"
16+
install_qnn
17+
1218
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
13-
export QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029
1419
export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang"
1520
export PYTHONPATH=".."
1621
cp schema/program.fbs exir/_serialize/program.fbs

.github/workflows/android-perf.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ jobs:
292292
export.output_name="${OUT_ET_MODEL_NAME}.pte"
293293
ls -lh "${OUT_ET_MODEL_NAME}.pte"
294294
elif [[ ${{ matrix.config }} == "llama3_qnn_htp" ]]; then
295-
export QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029
295+
export QNN_SDK_ROOT=/tmp/qnn/2.37.0.25072
296296
export LD_LIBRARY_PATH=$QNN_SDK_ROOT/lib/x86_64-linux-clang/
297297
export PYTHONPATH=$(pwd)/..
298298
@@ -432,7 +432,7 @@ jobs:
432432
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
433433
434434
mkdir -p aar-out
435-
PYTHON_EXECUTABLE=python ANDROID_ABIS="arm64-v8a" BUILD_AAR_DIR=aar-out EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029 EXECUTORCH_ANDROID_PROFILING=ON bash scripts/build_android_library.sh
435+
PYTHON_EXECUTABLE=python ANDROID_ABIS="arm64-v8a" BUILD_AAR_DIR=aar-out EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.37.0.25072 EXECUTORCH_ANDROID_PROFILING=ON bash scripts/build_android_library.sh
436436
mkdir -p extension/benchmark/android/benchmark/app/libs
437437
cp aar-out/executorch.aar extension/benchmark/android/benchmark/app/libs
438438
pushd extension/benchmark/android/benchmark

.github/workflows/android-release-artifacts.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ jobs:
104104
source backends/qualcomm/scripts/qnn_config.sh
105105
export QNN_SDK_ROOT="/tmp/qnn/${QNN_VERSION}"
106106
export ANDROID_ABIS=arm64-v8a
107-
GRADLE_ARGS+=" -DqnnVersion=2.28.0"
107+
GRADLE_ARGS+=" -DqnnVersion=2.37.0"
108108
fi
109109
110110
# Build AAR Package

.github/workflows/apple-perf.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ jobs:
230230
model.use_sdpa_with_kv_cache=true \
231231
backend.xnnpack.enabled=true \
232232
backend.xnnpack.extended_ops=true \
233-
base.preq_mode="8da4w_output_8da8w" \
233+
base.preq_mode="preq_8da4w_out_8da8w" \
234234
base.preq_group_size=32 \
235235
export.max_seq_length=2048 \
236236
export.max_context_length=2048 \
@@ -256,7 +256,7 @@ jobs:
256256
base.params="${DOWNLOADED_PATH}/params.json" \
257257
quantization.use_qat=true \
258258
base.use_lora=16 \
259-
base.preq_mode="8da4w_output_8da8w" \
259+
base.preq_mode="preq_8da4w_out_8da8w" \
260260
base.preq_group_size=32 \
261261
base.preq_embedding_quantize=\'8,0\' \
262262
model.use_sdpa_with_kv_cache=true \

backends/arm/_passes/to_tosa_memory_format_pass.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
get_first_fake_tensor,
1313
is_param_node,
1414
)
15-
from executorch.backends.arm.tosa_utils import is_consumer_node_depthwise_conv2d
1615
from executorch.exir import ExportedProgram
1716
from executorch.exir.dialects._ops import ops as exir_ops
1817
from executorch.exir.pass_base import ExportPass, PassResult
@@ -43,6 +42,19 @@ def __init__(self, exported_program: ExportedProgram) -> None:
4342
self.exported_program = exported_program
4443
super().__init__()
4544

45+
@staticmethod
46+
def _is_consumer_node_depthwise_conv2d(node: torch.fx.Node):
47+
consumer_node = list(node.users)[0]
48+
if consumer_node.target == exir_ops.edge.aten.convolution.default:
49+
consumer_node_inputs = consumer_node.all_input_nodes
50+
groups = consumer_node.args[-1]
51+
in_channels = consumer_node_inputs[0].meta["val"].shape[1]
52+
out_channels = consumer_node_inputs[1].meta["val"].shape[0]
53+
if (in_channels == groups) and (out_channels % in_channels) == 0:
54+
return True
55+
56+
return False
57+
4658
def is_weight_node_for_depthwise_conv2d(self, node: torch.fx.Node):
4759
"""
4860
returns True for w in the following sequence;
@@ -53,7 +65,7 @@ def is_weight_node_for_depthwise_conv2d(self, node: torch.fx.Node):
5365
consumer_node = list(node.users)[0]
5466
if self.is_weight_node_for_depthwise_conv2d(consumer_node):
5567
return True
56-
if is_consumer_node_depthwise_conv2d(node):
68+
if self._is_consumer_node_depthwise_conv2d(node):
5769
# Check that node is the weight-argument and not input or bias
5870
return consumer_node.args[1] == node
5971

backends/arm/debug/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Copyright 2025 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.

backends/arm/debug/schema.py

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
# Copyright 2025 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
from __future__ import annotations
7+
8+
import json
9+
10+
from dataclasses import asdict, dataclass
11+
from typing import Any
12+
13+
import serializer.tosa_serializer as ts # type: ignore
14+
import torch
15+
16+
from torch.fx.traceback import NodeSource
17+
18+
19+
@dataclass
20+
class TosaDebugSchema:
21+
node_name: str
22+
operator_name: str
23+
operator_id: int
24+
25+
26+
@dataclass
27+
class ATenDebugSchema:
28+
node_name: str
29+
operator_name: str
30+
31+
@staticmethod
32+
def from_node(node: torch.fx.Node) -> ATenDebugSchema:
33+
# node.target is Union[Callable[..., Any], str], so we need to access this correctly depending on the type
34+
if callable(node.target):
35+
operator_name = node.target.__name__
36+
else:
37+
operator_name = node.target
38+
39+
return ATenDebugSchema(node_name=node.name, operator_name=operator_name)
40+
41+
42+
@dataclass
43+
class TorchDebugSchema:
44+
stack_trace: list[str]
45+
node_trace: list[dict[str, Any]] | str
46+
nn_module_stack: dict[str, Any] | str
47+
torch_fn: tuple[str, str] | str
48+
49+
@staticmethod
50+
def serialize_node_trace(node_trace: list[NodeSource]) -> list[dict[str, Any]]:
51+
"""Flatten the from_node dictionary to remove nesting."""
52+
flattened = []
53+
node_stack = []
54+
55+
for n in node_trace:
56+
node_stack.append((n, -1))
57+
58+
while len(node_stack) > 0:
59+
node, parent_id = node_stack.pop()
60+
flattened.append(
61+
{
62+
"name": node.name,
63+
"target": node.target,
64+
"graph_id": node.graph_id,
65+
"pass_name": node.pass_name,
66+
"action": node._get_action_string(),
67+
"parent_graph_id": parent_id,
68+
}
69+
)
70+
71+
for n in node.from_node:
72+
node_stack.append((n, node.graph_id))
73+
74+
return flattened
75+
76+
@staticmethod
77+
def from_node(node: torch.fx.Node) -> TorchDebugSchema:
78+
node_trace: str | list[dict[str, Any]] = "No node trace available."
79+
80+
if "from_node" in node.meta:
81+
# Flatten the node_trace dictionary, so there is no nesting
82+
node_trace = TorchDebugSchema.serialize_node_trace(node.meta["from_node"])
83+
84+
return TorchDebugSchema(
85+
stack_trace=node.meta.get("stack_trace", "No stack trace available").split(
86+
"\n"
87+
),
88+
node_trace=node_trace,
89+
nn_module_stack=node.meta.get(
90+
"nn_module_stack", "No module stack trace available"
91+
),
92+
torch_fn=node.meta.get("torch_fn", "No torch_fn available"),
93+
)
94+
95+
96+
@dataclass
97+
class DebugSchema:
98+
event_id: int
99+
aten_info: ATenDebugSchema
100+
tosa_info: TosaDebugSchema
101+
torch_info: TorchDebugSchema
102+
103+
104+
class DebugHook:
105+
def __init__(self) -> None:
106+
self._debug_events: list[DebugSchema] = []
107+
self.__op_id_to_name = {}
108+
109+
# Build up a mapping from TOSA 1.0 operator IDs to their names
110+
for name, val in vars(ts.Op).items():
111+
self.__op_id_to_name[val] = name
112+
113+
def add(self, node: torch.fx.Node, tosa_op: Any, tosa_op_id: int) -> None:
114+
tosa_debug_info = TosaDebugSchema(
115+
node_name=str(tosa_op),
116+
operator_name=self.__op_id_to_name[tosa_op_id],
117+
operator_id=tosa_op_id,
118+
)
119+
120+
aten_debug_info = ATenDebugSchema.from_node(node)
121+
torch_debug_info = TorchDebugSchema.from_node(node)
122+
123+
self._debug_events.append(
124+
DebugSchema(
125+
event_id=len(self._debug_events),
126+
aten_info=aten_debug_info,
127+
tosa_info=tosa_debug_info,
128+
torch_info=torch_debug_info,
129+
)
130+
)
131+
132+
def serialize(self) -> str:
133+
return json.dumps([asdict(event) for event in self._debug_events], indent=4)

0 commit comments

Comments
 (0)