Skip to content

Commit 0fc4475

Browse files
committed
Update
[ghstack-poisoned]
2 parents 4b43363 + 995c4b5 commit 0fc4475

File tree

66 files changed

+1643
-201
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+1643
-201
lines changed

.Package.swift/kernels_torchao/dummy.swift

Whitespace-only changes.

.Package.swift/kernels_torchao_debug/dummy.swift

Whitespace-only changes.

.ci/scripts/test_llama_torchao_lowbit.sh

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,27 +29,22 @@ cmake -DPYTHON_EXECUTABLE=python \
2929
-DEXECUTORCH_ENABLE_LOGGING=1 \
3030
-DCMAKE_BUILD_TYPE=Release \
3131
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
32-
-DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
33-
-DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \
34-
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
3532
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
33+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
3634
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
3735
-DEXECUTORCH_BUILD_XNNPACK=OFF \
3836
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
3937
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
38+
-DEXECUTORCH_BUILD_KERNELS_TORCHAO=ON \
39+
-DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \
40+
-DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
4041
-DEXECUTORCH_BUILD_KERNELS_LLM=ON \
4142
-Bcmake-out .
42-
cmake --build cmake-out -j16 --target install --config Release
43+
cmake --build cmake-out -j16 --config Release --target install
4344

4445
# Install llama runner with torchao
4546
cmake -DPYTHON_EXECUTABLE=python \
46-
-DBUILD_TESTING=OFF \
4747
-DCMAKE_BUILD_TYPE=Release \
48-
-DEXECUTORCH_BUILD_KERNELS_LLM=ON \
49-
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
50-
-DEXECUTORCH_BUILD_XNNPACK=OFF \
51-
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
52-
-DEXECUTORCH_BUILD_TORCHAO=ON \
5348
-Bcmake-out/examples/models/llama \
5449
examples/models/llama
5550
cmake --build cmake-out/examples/models/llama -j16 --config Release

.github/workflows/trunk.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -485,7 +485,7 @@ jobs:
485485
eval "$(conda shell.bash hook)"
486486
487487
# Install requirements
488-
${CONDA_RUN} EXECUTORCH_BUILD_TORCHAO=1 python install_executorch.py
488+
${CONDA_RUN} EXECUTORCH_BUILD_KERNELS_TORCHAO=1 python install_executorch.py
489489
${CONDA_RUN} sh examples/models/llama/install_requirements.sh
490490
491491
# Run test

.gitmodules

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
[submodule "backends/arm/third-party/ethos-u-core-driver"]
22
path = backends/arm/third-party/ethos-u-core-driver
33
url = https://git.gitlab.arm.com/artificial-intelligence/ethos-u/ethos-u-core-driver.git
4-
[submodule "backends/arm/third-party/serialization_lib"]
5-
path = backends/arm/third-party/serialization_lib
6-
url = https://git.gitlab.arm.com/tosa/tosa-serialization.git
74
[submodule "backends/vulkan/third-party/Vulkan-Headers"]
85
path = backends/vulkan/third-party/Vulkan-Headers
96
url = https://github.com/KhronosGroup/Vulkan-Headers

CMakeLists.txt

Lines changed: 59 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -278,29 +278,6 @@ if(EXECUTORCH_BUILD_PTHREADPOOL)
278278
)
279279
endif()
280280

281-
if(EXECUTORCH_BUILD_KERNELS_TORCHAO)
282-
set(TORCHAO_BUILD_ATEN_OPS OFF)
283-
set(TORCHAO_BUILD_EXECUTORCH_OPS ON)
284-
set(TORCHAO_BUILD_CPU_AARCH64 ON)
285-
set(TORCHAO_ENABLE_ARM_NEON_DOT ON)
286-
287-
list(
288-
APPEND
289-
TORCHAO_INCLUDE_DIRS
290-
${EXECUTORCH_ROOT}/backends/xnnpack/third-party/pthreadpool/include
291-
${EXECUTORCH_ROOT}/backends/xnnpack/third-party/cpuinfo/include
292-
${EXECUTORCH_ROOT}/third-party/ao
293-
)
294-
295-
set(EXECUTORCH_INCLUDE_DIRS ${TORCHAO_INCLUDE_DIRS})
296-
297-
add_subdirectory(
298-
${CMAKE_CURRENT_SOURCE_DIR}/third-party/ao/torchao/experimental
299-
)
300-
executorch_target_link_options_shared_lib(torchao_ops_executorch)
301-
list(APPEND _executorch_kernels torchao_ops_executorch)
302-
endif()
303-
304281
if(EXECUTORCH_BUILD_TESTS)
305282
set(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON)
306283
include(CTest)
@@ -705,6 +682,65 @@ if(EXECUTORCH_BUILD_PTHREADPOOL AND EXECUTORCH_BUILD_CPUINFO)
705682
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/threadpool)
706683
endif()
707684

685+
if(EXECUTORCH_BUILD_KERNELS_TORCHAO)
686+
if(NOT TARGET cpuinfo)
687+
message(
688+
FATAL_ERROR
689+
"EXECUTORCH_BUILD_KERNELS_TORCHAO requires EXECUTORCH_BUILD_CPUINFO be set ON"
690+
)
691+
endif()
692+
if(NOT TARGET pthreadpool)
693+
message(
694+
FATAL_ERROR
695+
"EXECUTORCH_BUILD_KERNELS_TORCHAO requires EXECUTORCH_BUILD_PTHREADPOOL be set ON"
696+
)
697+
endif()
698+
699+
# Configure TorchAO kernels
700+
set(TORCHAO_BUILD_ATEN_OPS OFF)
701+
set(TORCHAO_BUILD_EXECUTORCH_OPS ON)
702+
set(TORCHAO_BUILD_CPU_AARCH64 ON)
703+
set(TORCHAO_ENABLE_ARM_NEON_DOT ON)
704+
set(TORCHAO_BUILD_KLEIDIAI ON)
705+
706+
# TorchAO kernels look for EXECUTORCH_INCLUDE_DIRS
707+
if(DEFINED EXECUTORCH_INCLUDE_DIRS)
708+
message(FATAL_ERROR "EXECUTORCH_INCLUDE_DIRS is already defined")
709+
endif()
710+
set(EXECUTORCH_INCLUDE_DIRS
711+
${EXECUTORCH_ROOT}/backends/xnnpack/third-party/pthreadpool/include
712+
${EXECUTORCH_ROOT}/backends/xnnpack/third-party/cpuinfo/include
713+
)
714+
add_subdirectory(
715+
${CMAKE_CURRENT_SOURCE_DIR}/third-party/ao/torchao/experimental
716+
)
717+
unset(EXECUTORCH_INCLUDE_DIRS)
718+
719+
executorch_target_link_options_shared_lib(torchao_ops_executorch)
720+
list(APPEND _executorch_kernels torchao_ops_executorch)
721+
722+
install(
723+
TARGETS torchao_ops_executorch torchao_kernels_aarch64
724+
EXPORT ExecuTorchTargets
725+
DESTINATION lib
726+
INCLUDES
727+
DESTINATION ${_common_include_directories}
728+
)
729+
# If using KleidiAI and XNNPACK has not installed it already, install it
730+
if(TORCHAO_BUILD_KLEIDIAI AND NOT (EXECUTORCH_BUILD_XNNPACK
731+
AND EXECUTORCH_XNNPACK_ENABLE_KLEIDI)
732+
)
733+
install(
734+
TARGETS kleidiai
735+
EXPORT ExecuTorchTargets
736+
DESTINATION lib
737+
INCLUDES
738+
DESTINATION ${_common_include_directories}
739+
)
740+
endif()
741+
742+
endif()
743+
708744
if(EXECUTORCH_BUILD_PYBIND)
709745

710746
# Add codegen tools subdirectory for selective_build pybind module

Package.swift

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,11 @@ let products = deliverables([
8484
],
8585
],
8686
"kernels_quantized": [:],
87+
"kernels_torchao": [
88+
"targets": [
89+
"threadpool",
90+
],
91+
],
8792
])
8893

8994
let targets = deliverables([

backends/arm/_passes/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
from .decompose_leaky_relu_pass import DecomposeLeakyReLUPass # noqa
4545
from .decompose_linalg_vector_norm_pass import DecomposeLinearVectorNormPass # noqa
4646
from .decompose_linear_pass import DecomposeLinearPass # noqa
47+
from .decompose_logit_pass import DecomposeLogitPass # noqa
4748
from .decompose_masked_fill import DecomposeMaskedFill # noqa
4849
from .decompose_maxpool2d_with_dilation import DecomposeMaxPool2DPass # noqa
4950
from .decompose_meandim_pass import DecomposeMeanDimPass # noqa

backends/arm/_passes/arm_pass_manager.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
DecomposeLeakyReLUPass,
5050
DecomposeLinearPass,
5151
DecomposeLinearVectorNormPass,
52+
DecomposeLogitPass,
5253
DecomposeMaskedFill,
5354
DecomposeMaxPool2DPass,
5455
DecomposeMeanDimPass,
@@ -166,6 +167,7 @@ def _tosa_INT_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
166167

167168
def _tosa_FP_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
168169
self.add_pass(DecomposeExpm1Pass())
170+
self.add_pass(DecomposeLogitPass())
169171
self.add_pass(DecomposeMaskedFill())
170172
self.add_pass(DecomposeRoundPass())
171173
self.add_pass(DecomposeAcoshPass())
@@ -257,6 +259,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
257259
self.add_pass(DecomposeEmbeddingPass())
258260
self.add_pass(DecomposeScaledDotProductAttention())
259261
self.add_pass(DecomposeRoundPass())
262+
self.add_pass(DecomposeLogitPass())
260263
self.add_pass(CastBoolToInt8Pass())
261264
self.add_pass(DecomposeSignPass())
262265
self.add_pass(DecomposeAddmmPass())
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# Copyright 2025 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
import torch
7+
8+
from executorch.backends.arm._passes import ArmPass
9+
from executorch.exir.dialects._ops import ops as exir_ops
10+
11+
12+
# For FP case
13+
edge_logit = exir_ops.edge.aten.logit.default
14+
# For INT case
15+
aten_logit = torch.ops.aten.logit.default
16+
17+
18+
def get_ops(op):
19+
"""Returns the appropriate operator functions based on the input operator."""
20+
if op == edge_logit:
21+
return (
22+
exir_ops.edge.aten.log.default,
23+
exir_ops.edge.aten.add.Scalar,
24+
exir_ops.edge.aten.reciprocal.default,
25+
exir_ops.edge.aten.mul.Tensor,
26+
exir_ops.edge.aten.mul.Scalar,
27+
exir_ops.edge.aten.clamp.default,
28+
)
29+
elif op == aten_logit:
30+
return (
31+
torch.ops.aten.log.default,
32+
torch.ops.aten.add.Scalar,
33+
torch.ops.aten.reciprocal.default,
34+
torch.ops.aten.mul.Tensor,
35+
torch.ops.aten.mul.Scalar,
36+
torch.ops.aten.clamp.default,
37+
)
38+
else:
39+
raise ValueError(f"Unsupported operator: {op}")
40+
41+
42+
class DecomposeLogitPass(ArmPass):
43+
"""
44+
Decomposes the `logit` operator into a sequence of primitive operations.
45+
46+
If `eps` is provided, the input tensor `x` is first clamped to the range
47+
[eps, 1 - eps].
48+
49+
The decomposition follows the identity:
50+
51+
logit(x) = log(x / (1 - x))
52+
53+
Examples:
54+
55+
logit(x) becomes:
56+
log(x * reciprocal((-1) * x + 1))
57+
58+
logit(x, eps) becomes:
59+
y = clamp(x, eps, 1 - eps)
60+
log(y * reciprocal((-1) * y + 1))
61+
"""
62+
63+
def call_operator(self, op, args, kwargs, meta):
64+
if op not in [edge_logit, aten_logit]:
65+
return super().call_operator(op, args, kwargs, meta)
66+
67+
X = args[0]
68+
eps = args[1] if len(args) > 1 else kwargs.get("eps", None)
69+
70+
(
71+
log_op,
72+
add_scalar_op,
73+
recip_op,
74+
mul_tensor_op,
75+
mul_scalar_op,
76+
clamp_op,
77+
) = get_ops(op)
78+
79+
if eps is not None:
80+
X = super().call_operator(
81+
clamp_op, (X, eps, 1.0 - eps), {}, meta, updated=True
82+
)
83+
84+
neg_X = super().call_operator(mul_scalar_op, (X, -1.0), {}, meta, updated=True)
85+
86+
denom = super().call_operator(
87+
add_scalar_op, (neg_X, 1.0), {}, meta, updated=True
88+
)
89+
90+
frac = super().call_operator(recip_op, (denom,), {}, meta, updated=True)
91+
92+
log_input = super().call_operator(
93+
mul_tensor_op, (X, frac), {}, meta, updated=True
94+
)
95+
96+
return super().call_operator(log_op, (log_input,), {}, meta, updated=True)

0 commit comments

Comments
 (0)