Skip to content

Commit e62191a

Browse files
committed
Update
[ghstack-poisoned]
2 parents bc3fd39 + 6440d3d commit e62191a

File tree

16 files changed

+236
-42
lines changed

16 files changed

+236
-42
lines changed

.ci/docker/common/install_base.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@ install_ubuntu() {
2626
libssl-dev \
2727
zip
2828

29+
# These libraries are needed by TorchVision
30+
apt-get install -y --no-install-recommends \
31+
libjpeg-dev \
32+
libpng-dev
33+
2934
# Cleanup package manager
3035
apt-get autoclean && apt-get clean
3136
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

.ci/docker/common/install_conda.sh

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,16 @@ install_miniconda() {
3131

3232
install_python() {
3333
pushd /opt/conda
34-
# Install the correct Python version
34+
# Install the selected Python version for CI jobs
3535
as_ci_user conda create -n "py_${PYTHON_VERSION}" -y --file /opt/conda/conda-env-ci.txt python="${PYTHON_VERSION}"
36+
37+
# From https://github.com/pytorch/pytorch/blob/main/.ci/docker/common/install_conda.sh
38+
if [[ $(uname -m) == "aarch64" ]]; then
39+
conda_install "openblas==0.3.28=*openmp*"
40+
else
41+
conda_install mkl=2022.1.0 mkl-include=2022.1.0
42+
fi
43+
3644
popd
3745
}
3846

@@ -53,7 +61,7 @@ fix_conda_ubuntu_libstdcxx() {
5361
# PyTorch sev: https://github.com/pytorch/pytorch/issues/105248
5462
# Ref: https://github.com/pytorch/pytorch/blob/main/.ci/docker/common/install_conda.sh
5563
if grep -e "2[02].04." /etc/issue >/dev/null; then
56-
rm "/opt/conda/envs/py_${PYTHON_VERSION}/lib/libstdc++.so.6"
64+
rm /opt/conda/envs/py_${PYTHON_VERSION}/lib/libstdc++.so*
5765
fi
5866
}
5967

.github/workflows/lint.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ jobs:
3131
# The generic Linux job chooses to use base env, not the one setup by the image
3232
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
3333
conda activate "${CONDA_ENV}"
34-
34+
3535
# For mypy linting, we need to first install executorch first so that
3636
# it builds the python package information.
3737
BUILD_TOOL="cmake"
@@ -74,6 +74,7 @@ jobs:
7474
docker-image: executorch-ubuntu-22.04-linter
7575
fetch-depth: 0
7676
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
77+
timeout: 90
7778
script: |
7879
FILES_NEEDS_FORMAT=$(/opt/google-java-format -n extension/android/src/main/java/org/pytorch/executorch/*.java \
7980
examples/demo-apps/android/ExecuTorchDemo/app/src/main/java/com/example/executorchdemo/*.java \

backends/arm/_passes/decompose_select.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,13 @@ def call(self, graph_module: torch.fx.GraphModule):
3737
rank = len(input_node.meta["val"].size())
3838
dim = dim % rank if dim < 0 else dim
3939
index = index % rank if index < 0 else index
40-
dim_list = list(range(rank))
4140

4241
with graph_module.graph.inserting_before(node):
4342
slice_node = create_node(
4443
graph_module.graph, slice_op, (input_node, dim, index, index + 1)
4544
)
4645
squeeze_node = create_node(
47-
graph_module.graph, squeeze_op, (slice_node, dim_list)
46+
graph_module.graph, squeeze_op, (slice_node, [dim])
4847
)
4948

5049
node.replace_all_uses_with(squeeze_node)

build/cmake_deps.toml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -365,10 +365,7 @@ buck_targets = [
365365
"//extension/llm/custom_ops:custom_ops",
366366
]
367367
filters = [
368-
# Second clause is to pick up fht_neon.c/fht_avx.c from FFHT. TODO:
369-
# remove filters and patch extract_sources.py's Buck query to fetch
370-
# srcs; presumably filters is here to remove .h files.
371-
"(.cpp$)|(fht.*\\.c$)",
368+
".cpp$",
372369
]
373370
excludes = [
374371
"^codegen",

codegen/tools/gen_oplist.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,23 @@ def _dump_yaml(
189189
)
190190

191191

192+
def create_kernel_key(maybe_kernel_key: str) -> str:
193+
# It is a kernel key.
194+
if maybe_kernel_key.lstrip().startswith("v1"):
195+
return maybe_kernel_key
196+
# It is a dtype.
197+
else:
198+
# Generate a kernel key based on the dtype provided.
199+
# Note: no dim order is included in this kernel key.
200+
# For a description of the kernel key format, see
201+
# executorch/blob/main/runtime/kernel/operator_registry.h#L97-L123
202+
try:
203+
dtype = ScalarType[maybe_kernel_key]
204+
return "v1/" + str(dtype.value) + ";"
205+
except KeyError:
206+
raise Exception(f"Unknown dtype: {maybe_kernel_key}")
207+
208+
192209
def gen_oplist(
193210
output_path: str,
194211
model_file_path: Optional[str] = None,
@@ -223,7 +240,11 @@ def gen_oplist(
223240
ops_and_metadata = json.loads(ops_dict)
224241
for op, metadata in ops_and_metadata.items():
225242
op_set.update({op})
226-
op_metadata = metadata if len(metadata) > 0 else ["default"]
243+
op_metadata = (
244+
[create_kernel_key(x) for x in metadata]
245+
if len(metadata) > 0
246+
else ["default"]
247+
)
227248
et_kernel_metadata = merge_et_kernel_metadata(
228249
et_kernel_metadata, {op: op_metadata}
229250
)

codegen/tools/test/test_gen_oplist.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
import executorch.codegen.tools.gen_oplist as gen_oplist
1515
import yaml
16+
from executorch.codegen.tools.gen_oplist import ScalarType
1617

1718

1819
class TestGenOpList(unittest.TestCase):
@@ -89,7 +90,7 @@ def test_gen_op_list_with_root_ops_and_dtypes(
8990
) -> None:
9091
output_path = os.path.join(self.temp_dir.name, "output.yaml")
9192
ops_dict = {
92-
"aten::add": ["v1/3;0,1|3;0,1|3;0,1|3;0,1", "v1/6;0,1|6;0,1|6;0,1|6;0,1"],
93+
"aten::add": ["v1/3;0,1|3;0,1|3;0,1|3;0,1", ScalarType.Float.name],
9394
"aten::mul": [],
9495
}
9596
args = [
@@ -104,7 +105,7 @@ def test_gen_op_list_with_root_ops_and_dtypes(
104105
{
105106
"aten::add": [
106107
"v1/3;0,1|3;0,1|3;0,1|3;0,1",
107-
"v1/6;0,1|6;0,1|6;0,1|6;0,1",
108+
"v1/6;",
108109
],
109110
"aten::mul": ["default"],
110111
},

codegen/tools/test/test_gen_selected_op_variants.py

Lines changed: 74 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import expecttest
1313

1414

15-
class TestGenSelectedMobileOpsHeader(expecttest.TestCase):
15+
class TestGenSelectedOpVariants(expecttest.TestCase):
1616
def setUp(self):
1717
self.temp_dir = tempfile.TemporaryDirectory()
1818
self.addCleanup(self.temp_dir.cleanup)
@@ -84,7 +84,79 @@ def test_generates_correct_header(self) -> None:
8484
)
8585

8686

87-
class TestGenSelectedMobileOpsHeader_Empty(expecttest.TestCase):
87+
class TestGenSelectedOpVariants_UsingDtypeString(expecttest.TestCase):
88+
def setUp(self):
89+
self.temp_dir = tempfile.TemporaryDirectory()
90+
self.addCleanup(self.temp_dir.cleanup)
91+
self.selected_ops_yaml = os.path.join(
92+
self.temp_dir.name, "selected_operators.yaml"
93+
)
94+
with open(self.selected_ops_yaml, "w") as f:
95+
f.write(
96+
"""
97+
include_all_non_op_selectives: False
98+
include_all_operators: False
99+
debug_info:
100+
- model1@v100
101+
- model2@v50
102+
operators:
103+
aten::add:
104+
is_root_operator: Yes
105+
is_used_for_training: Yes
106+
include_all_overloads: No
107+
aten::add.int:
108+
is_root_operator: No
109+
is_used_for_training: No
110+
include_all_overloads: Yes
111+
kernel_metadata: {}
112+
et_kernel_metadata:
113+
aten::add.out:
114+
# A list of different kernel keys (tensors with dtype-enum/dim-order) combinations used in model
115+
- v1/6; # Float
116+
- v1/3; # Int
117+
aten::mul.out:
118+
- v1/6; # Float
119+
aten::sub.out:
120+
- default
121+
build_features: []
122+
custom_classes: []
123+
"""
124+
)
125+
126+
def tearDown(self):
127+
self.temp_dir.cleanup()
128+
129+
def test_generates_correct_header(self) -> None:
130+
gen_selected_op_variants.write_selected_op_variants(
131+
os.path.join(self.temp_dir.name, "selected_operators.yaml"),
132+
self.temp_dir.name,
133+
)
134+
with open(
135+
os.path.join(self.temp_dir.name, "selected_op_variants.h"), "r"
136+
) as result:
137+
self.assertExpectedInline(
138+
result.read(),
139+
"""#pragma once
140+
/**
141+
* Generated by executorch/codegen/tools/gen_selected_op_variants.py
142+
*/
143+
144+
inline constexpr bool should_include_kernel_dtype(
145+
const char *operator_name,
146+
executorch::aten::ScalarType scalar_type
147+
) {
148+
return ((executorch::aten::string_view(operator_name).compare("add.out") == 0)
149+
&& (scalar_type == executorch::aten::ScalarType::Float || scalar_type == executorch::aten::ScalarType::Int))
150+
|| ((executorch::aten::string_view(operator_name).compare("mul.out") == 0)
151+
&& (scalar_type == executorch::aten::ScalarType::Float))
152+
|| ((executorch::aten::string_view(operator_name).compare("sub.out") == 0)
153+
&& (true));
154+
}
155+
""",
156+
)
157+
158+
159+
class TestGenSelectedOpVariants_Empty(expecttest.TestCase):
88160
def setUp(self):
89161
self.temp_dir = tempfile.TemporaryDirectory()
90162
self.addCleanup(self.temp_dir.cleanup)

docs/source/llm/getting-started.md

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -434,18 +434,18 @@ to the backend(s) targeted at export. To support multiple devices, such as
434434
XNNPACK acceleration for Android and Core ML for iOS, export a separate PTE file
435435
for each backend.
436436

437-
To delegate to a backend at export time, ExecuTorch provides the `to_backend()`
438-
function in the `EdgeProgramManager` object, which takes a backend-specific
439-
partitioner object. The partitioner is responsible for finding parts of the
440-
computation graph that can be accelerated by the target backend,and
441-
`to_backend()` function will delegate matched part to given backend for
442-
acceleration and optimization. Any portions of the computation graph not
443-
delegated will be executed by the ExecuTorch operator implementations.
437+
To delegate a model to a specific backend during export, ExecuTorch uses the
438+
`to_edge_transform_and_lower()` function. This function takes the exported program
439+
from `torch.export` and a backend-specific partitioner object. The partitioner
440+
identifies parts of the computation graph that can be optimized by the target
441+
backend. Within `to_edge_transform_and_lower()`, the exported program is
442+
converted to an edge dialect program. The partitioner then delegates compatible
443+
graph sections to the backend for acceleration and optimization. Any graph parts
444+
not delegated are executed by ExecuTorch's default operator implementations.
444445

445446
To delegate the exported model to a specific backend, we need to import its
446447
partitioner as well as edge compile config from ExecuTorch codebase first, then
447-
call `to_backend` with an instance of partitioner on the `EdgeProgramManager`
448-
object `to_edge` function created.
448+
call `to_edge_transform_and_lower`.
449449

450450
Here's an example of how to delegate nanoGPT to XNNPACK (if you're deploying to an Android phone for instance):
451451

@@ -457,7 +457,7 @@ from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPar
457457

458458
# Model to be delegated to specific backend should use specific edge compile config
459459
from executorch.backends.xnnpack.utils.configs import get_xnnpack_edge_compile_config
460-
from executorch.exir import EdgeCompileConfig, to_edge
460+
from executorch.exir import EdgeCompileConfig, to_edge_transform_and_lower
461461

462462
import torch
463463
from torch.export import export
@@ -495,17 +495,14 @@ with torch.nn.attention.sdpa_kernel([SDPBackend.MATH]), torch.no_grad():
495495
# Convert the model into a runnable ExecuTorch program.
496496
# To be further lowered to Xnnpack backend, `traced_model` needs xnnpack-specific edge compile config
497497
edge_config = get_xnnpack_edge_compile_config()
498-
edge_manager = to_edge(traced_model, compile_config=edge_config)
499-
500-
# Delegate exported model to Xnnpack backend by invoking `to_backend` function with Xnnpack partitioner.
501-
edge_manager = edge_manager.to_backend(XnnpackPartitioner())
498+
# Converted to edge program and then delegate exported model to Xnnpack backend
499+
# by invoking `to` function with Xnnpack partitioner.
500+
edge_manager = to_edge_transform_and_lower(traced_model, partitioner = [XnnpackPartitioner()], compile_config = edge_config)
502501
et_program = edge_manager.to_executorch()
503502

504503
# Save the Xnnpack-delegated ExecuTorch program to a file.
505504
with open("nanogpt.pte", "wb") as file:
506505
file.write(et_program.buffer)
507-
508-
509506
```
510507

511508
Additionally, update CMakeLists.txt to build and link the XNNPACK backend to
@@ -651,8 +648,8 @@ DuplicateDynamicQuantChainPass()(m)
651648
traced_model = export(m, example_inputs)
652649
```
653650

654-
Additionally, add or update the `to_backend()` call to use `XnnpackPartitioner`. This instructs ExecuTorch to
655-
optimize the model for CPU execution via the XNNPACK backend.
651+
Additionally, add or update the `to_edge_transform_and_lower()` call to use `XnnpackPartitioner`. This
652+
instructs ExecuTorch to optimize the model for CPU execution via the XNNPACK backend.
656653

657654
```python
658655
from executorch.backends.xnnpack.partition.xnnpack_partitioner import (
@@ -661,8 +658,9 @@ from executorch.backends.xnnpack.partition.xnnpack_partitioner import (
661658
```
662659

663660
```python
664-
edge_manager = to_edge(traced_model, compile_config=edge_config)
665-
edge_manager = edge_manager.to_backend(XnnpackPartitioner()) # Lower to XNNPACK.
661+
edge_config = get_xnnpack_edge_compile_config()
662+
# Convert to edge dialect and lower to XNNPack.
663+
edge_manager = to_edge_transform_and_lower(traced_model, partitioner = [XnnpackPartitioner()], compile_config = edge_config)
666664
et_program = edge_manager.to_executorch()
667665
```
668666

@@ -682,20 +680,20 @@ target_link_libraries(
682680
For more information, see [Quantization in ExecuTorch](../quantization-overview.md).
683681

684682
## Profiling and Debugging
685-
After lowering a model by calling `to_backend()`, you may want to see what got delegated and what didn’t. ExecuTorch
683+
After lowering a model by calling `to_edge_transform_and_lower()`, you may want to see what got delegated and what didn’t. ExecuTorch
686684
provides utility methods to give insight on the delegation. You can use this information to gain visibility into
687685
the underlying computation and diagnose potential performance issues. Model authors can use this information to
688686
structure the model in a way that is compatible with the target backend.
689687

690688
### Visualizing the Delegation
691689

692-
The `get_delegation_info()` method provides a summary of what happened to the model after the `to_backend()` call:
690+
The `get_delegation_info()` method provides a summary of what happened to the model after the `to_edge_transform_and_lower()` call:
693691

694692
```python
695693
from executorch.devtools.backend_debug import get_delegation_info
696694
from tabulate import tabulate
697695

698-
# ... After call to to_backend(), but before to_executorch()
696+
# ... After call to to_edge_transform_and_lower(), but before to_executorch()
699697
graph_module = edge_manager.exported_program().graph_module
700698
delegation_info = get_delegation_info(graph_module)
701699
print(delegation_info.get_summary())
@@ -762,7 +760,7 @@ Through the ExecuTorch Developer Tools, users are able to profile model executio
762760
An ETRecord is an artifact generated at the time of export that contains model graphs and source-level metadata linking the ExecuTorch program to the original PyTorch model. You can view all profiling events without an ETRecord, though with an ETRecord, you will also be able to link each event to the types of operators being executed, module hierarchy, and stack traces of the original PyTorch source code. For more information, see [the ETRecord docs](../etrecord.md).
763761

764762

765-
In your export script, after calling `to_edge()` and `to_executorch()`, call `generate_etrecord()` with the `EdgeProgramManager` from `to_edge()` and the `ExecuTorchProgramManager` from `to_executorch()`. Make sure to copy the `EdgeProgramManager`, as the call to `to_backend()` mutates the graph in-place.
763+
In your export script, after calling `to_edge()` and `to_executorch()`, call `generate_etrecord()` with the `EdgeProgramManager` from `to_edge()` and the `ExecuTorchProgramManager` from `to_executorch()`. Make sure to copy the `EdgeProgramManager`, as the call to `to_edge_transform_and_lower()` mutates the graph in-place.
766764

767765
```
768766
# export_nanogpt.py

examples/qualcomm/oss_scripts/llama/TARGETS

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,12 @@ python_library(
2222
"//caffe2:torch",
2323
"//executorch/backends/qualcomm/partition:partition",
2424
"//executorch/backends/qualcomm/quantizer:quantizer",
25+
"//executorch/devtools/backend_debug:delegation_info",
2526
"//executorch/devtools:lib",
2627
"//executorch/examples/models:models",
2728
"//executorch/examples/qualcomm/oss_scripts/llama:static_llama",
2829
"//executorch/examples/qualcomm:utils",
2930
"//executorch/extension/export_util:export_util",
30-
"//executorch/extension/llm/custom_ops:model_sharding_py",
3131
"//executorch/extension/llm/export:export_lib",
3232
"//executorch/extension/pybindings:aten_lib",
3333
],
@@ -46,6 +46,7 @@ python_binary(
4646
"//executorch/extension/pybindings:aten_lib",
4747
"//executorch/backends/qualcomm/partition:partition",
4848
"//executorch/backends/qualcomm/quantizer:quantizer",
49+
"//executorch/devtools/backend_debug:delegation_info",
4950
"//executorch/devtools:lib",
5051
"//executorch/examples/models:models",
5152
"//executorch/examples/qualcomm:utils",

0 commit comments

Comments
 (0)