Skip to content

Commit 157df30

Browse files
author
Github Executorch
committed
Update on "Reuse GELU implementation from PyTorch core"
kernels/optimized doesn't need to support embedded systems, so it can just take a header-only dep on PyTorch. Note that, because we will pick up Sleef internally and ignore it externally thanks to ATen vec, this PR gets to enable optimized GELU in OSS. Testing: CI to make sure this doesn't break mobile build modes; happy to take advice on anything not currently covered that might break. Differential Revision: [D66335522](https://our.internmc.facebook.com/intern/diff/D66335522/) [ghstack-poisoned]
2 parents 7d837ab + 2b20ed6 commit 157df30

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+2063
-527
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0a94bb432ed75cc2d950d81b2921363218a7e459
1+
27e35de6c288bffad1b4d18b393579c1d1a95547

.ci/docker/conda-env-ci.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
cmake=3.22.1
22
ninja=1.10.2
33
libuv
4+
llvm-openmp
45
pkg-config

.ci/scripts/setup-macos.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ setup_macos_env_variables
121121
# NB: we need buck2 in all cases because cmake build also depends on calling
122122
# buck2 atm
123123
install_buck
124+
brew install libomp
124125
install_pip_dependencies
125126

126127
# TODO(huydhn): Unlike our self-hosted runner, GitHub runner doesn't have access

.github/workflows/pull.yml

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -214,17 +214,14 @@ jobs:
214214
docker-image: executorch-ubuntu-22.04-clang12
215215
submodules: 'true'
216216
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
217-
timeout: 90
217+
timeout: 180
218218
script: |
219219
# The generic Linux job chooses to use base env, not the one setup by the image
220220
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
221221
conda activate "${CONDA_ENV}"
222222
223223
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
224224
225-
# install pybind
226-
bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
227-
228225
# install Llava requirements
229226
bash examples/models/llama/install_requirements.sh
230227
bash examples/models/llava/install_requirements.sh
@@ -485,9 +482,6 @@ jobs:
485482
486483
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
487484
488-
# install pybind
489-
bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
490-
491485
# install phi-3-mini requirements
492486
bash examples/models/phi-3-mini/install_requirements.sh
493487
@@ -515,9 +509,6 @@ jobs:
515509
516510
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
517511
518-
# install pybind
519-
bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
520-
521512
# install llama requirements
522513
bash examples/models/llama/install_requirements.sh
523514
@@ -537,17 +528,14 @@ jobs:
537528
docker-image: executorch-ubuntu-22.04-clang12
538529
submodules: 'true'
539530
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
540-
timeout: 90
531+
timeout: 180
541532
script: |
542533
# The generic Linux job chooses to use base env, not the one setup by the image
543534
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
544535
conda activate "${CONDA_ENV}"
545536
546537
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
547538
548-
# install pybind
549-
bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
550-
551539
# install llama requirements
552540
bash examples/models/llama/install_requirements.sh
553541
@@ -575,9 +563,6 @@ jobs:
575563
576564
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
577565
578-
# install pybind
579-
bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
580-
581566
# install llama requirements
582567
bash examples/models/llama/install_requirements.sh
583568

.mypy.ini

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ ignore_missing_imports = True
7777
[mypy-ruamel]
7878
ignore_missing_imports = True
7979

80+
[mypy-serializer.*]
81+
ignore_missing_imports = True
82+
8083
[mypy-setuptools.*]
8184
ignore_missing_imports = True
8285

CMakeLists.txt

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,13 @@ cmake_dependent_option(
240240
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF
241241
)
242242

243+
244+
if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
245+
set(EXECUTORCH_BUILD_EXTENSION_TENSOR ON)
246+
set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON)
247+
set(EXECUTORCH_BUILD_EXTENSION_MODULE ON)
248+
endif()
249+
243250
if(EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT)
244251
set(EXECUTORCH_BUILD_EXTENSION_TENSOR ON)
245252
set(EXECUTORCH_BUILD_KERNELS_CUSTOM ON)
@@ -802,6 +809,35 @@ if(EXECUTORCH_BUILD_PYBIND)
802809
install(TARGETS portable_lib
803810
LIBRARY DESTINATION executorch/extension/pybindings
804811
)
812+
813+
if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
814+
815+
set(_pybind_training_dep_libs
816+
${TORCH_PYTHON_LIBRARY}
817+
etdump
818+
executorch
819+
util
820+
torch
821+
extension_training
822+
)
823+
824+
if(EXECUTORCH_BUILD_XNNPACK)
825+
# need to explicitly specify XNNPACK and microkernels-prod
826+
# here otherwise uses XNNPACK and microkernel-prod symbols from libtorch_cpu
827+
list(APPEND _pybind_training_dep_libs xnnpack_backend XNNPACK microkernels-prod)
828+
endif()
829+
830+
# pybind training
831+
pybind11_add_module(_training_lib SHARED extension/training/pybindings/_training_lib.cpp)
832+
833+
target_include_directories(_training_lib PRIVATE ${TORCH_INCLUDE_DIRS})
834+
target_compile_options(_training_lib PUBLIC ${_pybind_compile_options})
835+
target_link_libraries(_training_lib PRIVATE ${_pybind_training_dep_libs})
836+
837+
install(TARGETS _training_lib
838+
LIBRARY DESTINATION executorch/extension/training/pybindings
839+
)
840+
endif()
805841
endif()
806842

807843
if(EXECUTORCH_BUILD_KERNELS_CUSTOM)

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212
<hr>
1313
</div>
1414

15-
**ExecuTorch** is an end-to-end solution for on-device inference and training. It powers much of Meta's on-device AI functionality including: Facebook, Oculus, Meta Glasses, Instagram, WhatsApp and more.
15+
**ExecuTorch** is an end-to-end solution for on-device inference and training. It powers much of Meta's on-device AI experiences across Facebook, Instagram, Meta Quest, Ray-Ban Meta Smart Glasses, WhatsApp, and more.
1616

17-
It covers a wide gamut of models including: LLMs (Large Language Models), CV (Computer Vision), ASR (Automatic Speech Recognition), TTS (Text to Speech).
17+
It supports a wide range of models including LLMs (Large Language Models), CV (Computer Vision), ASR (Automatic Speech Recognition), and TTS (Text to Speech).
1818

1919
Platform Support:
2020
- Operating Systems:
@@ -26,9 +26,10 @@ Platform Support:
2626

2727
- Hardware Acceleration:
2828
- Apple
29-
- ARM
29+
- Arm
3030
- Cadence
3131
- MediaTek
32+
- Qualcomm
3233
- Vulkan
3334
- XNNPACK
3435

backends/arm/_passes/insert_table_ops.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class InsertTableOpsPass(ExportPass):
3131
"""
3232
For ops in self.table_ops they need to be serialized as a TOSA TABLE. This pass replaces these
3333
edge ops with a tosa._table(input: Tensor, target_str: str) where target_str == str(node.target).
34-
When loweringthe _table node target_str will be used to find the corresponding torch operator
34+
When lowering the _table node target_str will be used to find the corresponding torch operator
3535
which will be used to produce the table values in operators/op_table.py.
3636
"""
3737

@@ -43,6 +43,7 @@ class InsertTableOpsPass(ExportPass):
4343
exir_ops.edge.aten.sigmoid.default: torch.sigmoid,
4444
exir_ops.edge.aten.tanh.default: torch.tanh,
4545
exir_ops.edge.aten.hardsigmoid.default: torch.nn.functional.hardsigmoid,
46+
exir_ops.edge.aten.hardswish.default: torch.nn.functional.hardswish,
4647
}
4748

4849
def __init__(self, exported_program: ExportedProgram) -> None:

backends/arm/arm_partitioner.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ def ops_to_not_decompose(
115115
) -> Tuple[List[torch._ops.OpOverload], Optional[Callable[[torch.fx.Node], bool]]]:
116116
ops_to_not_decompose_if_quant_op = [
117117
torch.ops.aten.hardsigmoid.default,
118+
torch.ops.aten.hardswish.default,
118119
]
119120

120121
def filter_fn(node: torch.fx.Node) -> bool:

backends/arm/operator_support/tosa_supported_operators.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ def is_node_supported(self, submodules, node: fx.Node) -> bool:
8181
exir_ops.edge.aten.permute_copy.default,
8282
exir_ops.edge.aten.hardsigmoid.default,
8383
exir_ops.edge.aten.hardtanh.default,
84+
exir_ops.edge.aten.hardswish.default,
8485
exir_ops.edge.aten.convolution.default,
8586
exir_ops.edge.aten.div.Tensor,
8687
exir_ops.edge.aten.eq.Tensor,

0 commit comments

Comments
 (0)