Skip to content

Commit 315f47c

Browse files
authored
Merge branch 'release/1.0' into cherry-pick-14786-by-pytorch_bot_bot_
2 parents 80ca2f7 + f8151e6 commit 315f47c

File tree

69 files changed

+1267
-232
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+1267
-232
lines changed

backends/arm/_passes/annotate_decomposed_matmul.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,10 @@ def call(self, graph_module: GraphModule) -> PassResult:
6868
node for node in partition.nodes if node.target in matmul_targets
6969
][0]
7070

71-
if quantized_input:
71+
if quantized_input and not all(
72+
input_node.target in DQ_OPS
73+
for input_node in matmul_node.all_input_nodes
74+
):
7275
matmul_args = matmul_node.all_input_nodes
7376
for node in matmul_args:
7477
# Find the dq-node connected to this mm/bmm arg
@@ -94,7 +97,9 @@ def call(self, graph_module: GraphModule) -> PassResult:
9497

9598
partition_output = list(partition.output_nodes[0].users)[0]
9699
quantized_output = partition_output.target in Q_OPS
97-
if quantized_output:
100+
if quantized_output and not all(
101+
user.target in Q_OPS for user in matmul_node.users
102+
):
98103
with graph_module.graph.inserting_after(matmul_node):
99104
# Create q-node after matmul
100105
q_node = create_node(

backends/arm/test/ops/test_matmul.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
class MatMul(torch.nn.Module):
2424
test_data_generators = {
25+
"rand_rand_2d": lambda: (torch.rand(5, 5), torch.rand(5, 2)),
2526
"rand_rand_3d": lambda: (torch.rand(2, 3, 5), torch.rand(2, 5, 2)),
2627
"rand_rand_4d": lambda: (torch.rand(1, 2, 3, 5), torch.rand(1, 2, 5, 2)),
2728
}
@@ -32,6 +33,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor):
3233

3334
class MatMulSingleInput(torch.nn.Module):
3435
test_data_generators = {
36+
"rand_2d": lambda: (torch.rand(5, 5),),
3537
"rand_3d": lambda: (torch.rand(2, 5, 5),),
3638
"rand_4d": lambda: (torch.rand(1, 2, 5, 5),),
3739
}
@@ -42,6 +44,11 @@ def forward(self, x: torch.Tensor):
4244

4345
class MatMulCombo(torch.nn.Module):
4446
test_data_generators = {
47+
"rand_rand_rand_2d": lambda: (
48+
torch.rand(5, 5),
49+
torch.rand(5, 2),
50+
torch.rand(2, 5),
51+
),
4552
"rand_rand_rand_3d": lambda: (
4653
torch.rand(2, 5, 5),
4754
torch.rand(2, 5, 2),

backends/arm/test/tester/arm_tester.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,10 @@ def run_method_and_compare_outputs(
458458
for run_iteration in range(num_runs):
459459
reference_input = inputs if inputs else next(self.generate_random_inputs())
460460

461+
# Avoid issues with inplace operators
462+
test_input = copy.deepcopy(reference_input)
463+
original_input = copy.deepcopy(reference_input)
464+
461465
input_shapes = [
462466
generated_input.shape if hasattr(generated_input, "shape") else (1,)
463467
for generated_input in reference_input
@@ -472,16 +476,16 @@ def run_method_and_compare_outputs(
472476
# Run exported module directly
473477
test_outputs, _ = pytree.tree_flatten(
474478
self._calculate_reference_output(
475-
exported_program.module(), reference_input
479+
exported_program.module(), test_input
476480
)
477481
)
478482
else:
479483
# Run lowered model with target
480484
test_outputs, _ = pytree.tree_flatten(
481-
test_stage.run_artifact(reference_input)
485+
test_stage.run_artifact(test_input)
482486
)
483487

484-
logger.info(f"\n Input: {reference_input}")
488+
logger.info(f"\n Input: {original_input}")
485489
logger.info(f"\n Ref output: {reference_outputs}")
486490
logger.info(f"\nTest output: {test_outputs}")
487491

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
(advanced-topics-section)=
2+
3+
# Advanced
4+
5+
Deep dive into ExecuTorch's advanced features for optimization, customization, and integration.
6+
7+
This section covers advanced concepts for developers who need to customize ExecuTorch for specific use cases, optimize performance, or integrate with custom hardware backends.
8+
9+
## Quantization & Optimization
10+
11+
Techniques for model compression and performance optimization.
12+
13+
**→ {doc}`quantization-optimization` — Quantization strategies and performance optimization**
14+
15+
Key topics:
16+
17+
- Quantization strategies and techniques
18+
- Performance profiling and optimization
19+
20+
## Model Export
21+
22+
Learn the core ExecuTorch workflow, exporting PyTorch models to the `.pte` format for edge deployment.
23+
24+
**→ {doc}`using-executorch-export`** - Model Export & Lowering
25+
26+
Key topics:
27+
28+
- Export and Lowering Workflow
29+
- Hardware Backend Selection & Optimization
30+
- Dynamic Shapes & Advanced Model Features
31+
32+
33+
## Kernel Library
34+
35+
Deep dive into ExecuTorch's kernel implementation and customization.
36+
37+
**→ {doc}`kernel-library-advanced` — Kernel library deep dive and customization**
38+
39+
Key topics:
40+
41+
- Kernel library architecture
42+
- Custom kernel implementation
43+
- Selective build and optimization
44+
45+
## Backend & Delegates
46+
47+
**→ {doc}`backend-delegate-advanced` — Backend delegate integration**
48+
49+
Key topics:
50+
51+
- Learn how to integrate Backend Delegate into ExecuTorch and more
52+
- XNNPACK Delegate Internals
53+
- Debugging Delegation
54+
55+
56+
## Runtime & Integration
57+
58+
Advanced runtime features and backend integration.
59+
60+
**→ {doc}`runtime-integration-advanced` — Runtime customization and backend integration**
61+
62+
Key topics:
63+
64+
- Backend delegate implementation
65+
- Platform abstraction layer
66+
- Custom runtime integration
67+
68+
## Compiler & IR
69+
70+
Advanced compiler features and intermediate representation details.
71+
72+
**→ {doc}`compiler-ir-advanced` — Compiler passes and IR specification**
73+
74+
Key topics:
75+
76+
- Custom compiler passes
77+
- Memory planning strategies
78+
- Backend dialect and EXIR
79+
- Ops set definition
80+
81+
82+
## File Formats
83+
84+
ExecuTorch file format specifications and internals.
85+
86+
**→ {doc}`file-formats-advanced` — PTE and PTD file format specifications**
87+
88+
Key topics:
89+
90+
- PTE file format internals
91+
- PTD file format specification
92+
- Custom file format handling
93+
94+
## Next Steps
95+
96+
After exploring advanced topics:
97+
98+
- **{doc}`tools-sdk-section`** - Developer tools for debugging and profiling
99+
- **{doc}`api-section`** - Complete API reference documentation
100+
101+
```{toctree}
102+
:hidden:
103+
:maxdepth: 2
104+
:caption: Advanced Topics
105+
106+
quantization-optimization
107+
using-executorch-export
108+
kernel-library-advanced
109+
backend-delegate-advanced
110+
runtime-integration-advanced
111+
compiler-ir-advanced
112+
file-formats-advanced

docs/source/android-arm-vgf.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
```{include} backends-arm-vgf.md

docs/source/android-backends.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
(android-backends)=
2+
# Backends
3+
4+
Available hardware acceleration backends for Android deployment.
5+
6+
## CPU Acceleration
7+
8+
- {doc}`android-xnnpack` — XNNPACK CPU acceleration
9+
10+
## GPU Acceleration
11+
12+
- {doc}`android-vulkan` — Vulkan GPU acceleration
13+
14+
## NPU/Accelerator Backends
15+
16+
- {doc}`android-qualcomm` — Qualcomm AI Engine (NPU)
17+
- {doc}`android-mediatek` — MediaTek NPU acceleration
18+
- {doc}`android-arm-vgf` — ARM VGF Backend
19+
- {doc}`android-samsung-exynos` — Samsung Exynos NPU
20+
21+
```{toctree}
22+
:hidden:
23+
android-xnnpack
24+
android-vulkan
25+
android-qualcomm
26+
android-mediatek
27+
android-arm-vgf
28+
android-samsung-exynos

docs/source/android-examples.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Examples & Demos
2+
3+
- [Working with LLMs - Android Examples](https://github.com/meta-pytorch/executorch-examples/tree/main/llm/android)
4+
- [Demo Apps](https://github.com/meta-pytorch/executorch-examples/tree/main/dl3/android/DeepLabV3Demo#executorch-android-demo-app)
5+
- {doc}`tutorial-arm-vgf` — Export a simple PyTorch model for the ExecuTorch VGF backend
6+
7+
```{toctree}
8+
:hidden:
9+
tutorial-arm-vgf

docs/source/android-mediatek.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
```{include} backends-mediatek.md

docs/source/android-qualcomm.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
```{include} backends-qualcomm.md
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
```{include} backends-samsung-exynos.md

0 commit comments

Comments
 (0)