Skip to content

Commit 2b20ed6

Browse files
author
Github Executorch
committed
Update base for Update on "Reuse GELU implementation from PyTorch core"
kernels/optimized doesn't need to support embedded systems, so it can just take a header-only dep on PyTorch. Note that, because we will pick up Sleef internally and ignore it externally thanks to ATen vec, this PR gets to enable optimized GELU in OSS. Testing: CI to make sure this doesn't break mobile build modes; happy to take advice on anything not currently covered that might break. Differential Revision: [D66335522](https://our.internmc.facebook.com/intern/diff/D66335522/) [ghstack-poisoned]
2 parents fc56fcd + 81f7c4f commit 2b20ed6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+2063
-527
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0a94bb432ed75cc2d950d81b2921363218a7e459
1+
27e35de6c288bffad1b4d18b393579c1d1a95547

.ci/docker/conda-env-ci.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
cmake=3.22.1
22
ninja=1.10.2
33
libuv
4+
llvm-openmp
45
pkg-config

.ci/scripts/setup-macos.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ setup_macos_env_variables
121121
# NB: we need buck2 in all cases because cmake build also depends on calling
122122
# buck2 atm
123123
install_buck
124+
brew install libomp
124125
install_pip_dependencies
125126

126127
# TODO(huydhn): Unlike our self-hosted runner, GitHub runner doesn't have access

.github/workflows/pull.yml

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -212,17 +212,14 @@ jobs:
212212
docker-image: executorch-ubuntu-22.04-clang12
213213
submodules: 'true'
214214
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
215-
timeout: 90
215+
timeout: 180
216216
script: |
217217
# The generic Linux job chooses to use base env, not the one setup by the image
218218
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
219219
conda activate "${CONDA_ENV}"
220220
221221
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
222222
223-
# install pybind
224-
bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
225-
226223
# install Llava requirements
227224
bash examples/models/llama/install_requirements.sh
228225
bash examples/models/llava/install_requirements.sh
@@ -483,9 +480,6 @@ jobs:
483480
484481
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
485482
486-
# install pybind
487-
bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
488-
489483
# install phi-3-mini requirements
490484
bash examples/models/phi-3-mini/install_requirements.sh
491485
@@ -513,9 +507,6 @@ jobs:
513507
514508
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
515509
516-
# install pybind
517-
bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
518-
519510
# install llama requirements
520511
bash examples/models/llama/install_requirements.sh
521512
@@ -535,17 +526,14 @@ jobs:
535526
docker-image: executorch-ubuntu-22.04-clang12
536527
submodules: 'true'
537528
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
538-
timeout: 90
529+
timeout: 180
539530
script: |
540531
# The generic Linux job chooses to use base env, not the one setup by the image
541532
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
542533
conda activate "${CONDA_ENV}"
543534
544535
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
545536
546-
# install pybind
547-
bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
548-
549537
# install llama requirements
550538
bash examples/models/llama/install_requirements.sh
551539
@@ -573,9 +561,6 @@ jobs:
573561
574562
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
575563
576-
# install pybind
577-
bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
578-
579564
# install llama requirements
580565
bash examples/models/llama/install_requirements.sh
581566

.mypy.ini

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ ignore_missing_imports = True
7777
[mypy-ruamel]
7878
ignore_missing_imports = True
7979

80+
[mypy-serializer.*]
81+
ignore_missing_imports = True
82+
8083
[mypy-setuptools.*]
8184
ignore_missing_imports = True
8285

CMakeLists.txt

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,13 @@ cmake_dependent_option(
240240
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF
241241
)
242242

243+
244+
if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
245+
set(EXECUTORCH_BUILD_EXTENSION_TENSOR ON)
246+
set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON)
247+
set(EXECUTORCH_BUILD_EXTENSION_MODULE ON)
248+
endif()
249+
243250
if(EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT)
244251
set(EXECUTORCH_BUILD_EXTENSION_TENSOR ON)
245252
set(EXECUTORCH_BUILD_KERNELS_CUSTOM ON)
@@ -802,6 +809,35 @@ if(EXECUTORCH_BUILD_PYBIND)
802809
install(TARGETS portable_lib
803810
LIBRARY DESTINATION executorch/extension/pybindings
804811
)
812+
813+
if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
814+
815+
set(_pybind_training_dep_libs
816+
${TORCH_PYTHON_LIBRARY}
817+
etdump
818+
executorch
819+
util
820+
torch
821+
extension_training
822+
)
823+
824+
if(EXECUTORCH_BUILD_XNNPACK)
825+
# need to explicitly specify XNNPACK and microkernels-prod
826+
# here otherwise uses XNNPACK and microkernel-prod symbols from libtorch_cpu
827+
list(APPEND _pybind_training_dep_libs xnnpack_backend XNNPACK microkernels-prod)
828+
endif()
829+
830+
# pybind training
831+
pybind11_add_module(_training_lib SHARED extension/training/pybindings/_training_lib.cpp)
832+
833+
target_include_directories(_training_lib PRIVATE ${TORCH_INCLUDE_DIRS})
834+
target_compile_options(_training_lib PUBLIC ${_pybind_compile_options})
835+
target_link_libraries(_training_lib PRIVATE ${_pybind_training_dep_libs})
836+
837+
install(TARGETS _training_lib
838+
LIBRARY DESTINATION executorch/extension/training/pybindings
839+
)
840+
endif()
805841
endif()
806842

807843
if(EXECUTORCH_BUILD_KERNELS_CUSTOM)

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212
<hr>
1313
</div>
1414

15-
**ExecuTorch** is an end-to-end solution for on-device inference and training. It powers much of Meta's on-device AI functionality including: Facebook, Oculus, Meta Glasses, Instagram, WhatsApp and more.
15+
**ExecuTorch** is an end-to-end solution for on-device inference and training. It powers much of Meta's on-device AI experiences across Facebook, Instagram, Meta Quest, Ray-Ban Meta Smart Glasses, WhatsApp, and more.
1616

17-
It covers a wide gamut of models including: LLMs (Large Language Models), CV (Computer Vision), ASR (Automatic Speech Recognition), TTS (Text to Speech).
17+
It supports a wide range of models including LLMs (Large Language Models), CV (Computer Vision), ASR (Automatic Speech Recognition), and TTS (Text to Speech).
1818

1919
Platform Support:
2020
- Operating Systems:
@@ -26,9 +26,10 @@ Platform Support:
2626

2727
- Hardware Acceleration:
2828
- Apple
29-
- ARM
29+
- Arm
3030
- Cadence
3131
- MediaTek
32+
- Qualcomm
3233
- Vulkan
3334
- XNNPACK
3435

backends/arm/_passes/insert_table_ops.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class InsertTableOpsPass(ExportPass):
3131
"""
3232
For ops in self.table_ops they need to be serialized as a TOSA TABLE. This pass replaces these
3333
edge ops with a tosa._table(input: Tensor, target_str: str) where target_str == str(node.target).
34-
When loweringthe _table node target_str will be used to find the corresponding torch operator
34+
When lowering the _table node target_str will be used to find the corresponding torch operator
3535
which will be used to produce the table values in operators/op_table.py.
3636
"""
3737

@@ -43,6 +43,7 @@ class InsertTableOpsPass(ExportPass):
4343
exir_ops.edge.aten.sigmoid.default: torch.sigmoid,
4444
exir_ops.edge.aten.tanh.default: torch.tanh,
4545
exir_ops.edge.aten.hardsigmoid.default: torch.nn.functional.hardsigmoid,
46+
exir_ops.edge.aten.hardswish.default: torch.nn.functional.hardswish,
4647
}
4748

4849
def __init__(self, exported_program: ExportedProgram) -> None:

backends/arm/arm_partitioner.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ def ops_to_not_decompose(
115115
) -> Tuple[List[torch._ops.OpOverload], Optional[Callable[[torch.fx.Node], bool]]]:
116116
ops_to_not_decompose_if_quant_op = [
117117
torch.ops.aten.hardsigmoid.default,
118+
torch.ops.aten.hardswish.default,
118119
]
119120

120121
def filter_fn(node: torch.fx.Node) -> bool:

backends/arm/operator_support/tosa_supported_operators.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ def is_node_supported(self, submodules, node: fx.Node) -> bool:
8181
exir_ops.edge.aten.permute_copy.default,
8282
exir_ops.edge.aten.hardsigmoid.default,
8383
exir_ops.edge.aten.hardtanh.default,
84+
exir_ops.edge.aten.hardswish.default,
8485
exir_ops.edge.aten.convolution.default,
8586
exir_ops.edge.aten.div.Tensor,
8687
exir_ops.edge.aten.eq.Tensor,

0 commit comments

Comments
 (0)