Skip to content

Commit a0ef05a

Browse files
authored
Merge branch 'main' into gh/jackzhxng/11/orig
2 parents 8f1c751 + 18e9149 commit a0ef05a

File tree

148 files changed

+7110
-1555
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

148 files changed

+7110
-1555
lines changed

.ci/scripts/unittest-buck2.sh

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ buck2 query "//backends/apple/... + //backends/example/... + \
1515
//kernels/optimized/... + //kernels/portable/... + //kernels/quantized/... + \
1616
//kernels/test/... + //runtime/... + //schema/... + //test/... + //util/..."
1717

18-
UNBUILDABLE_OPTIMIZED_OPS_REGEX="gelu|fft_r2c|log_softmax"
18+
UNBUILDABLE_OPTIMIZED_OPS_REGEX="_elu|gelu|fft|log_softmax"
1919
BUILDABLE_OPTIMIZED_OPS=$(buck2 query //kernels/optimized/cpu/... | grep -E -v $UNBUILDABLE_OPTIMIZED_OPS_REGEX)
2020

2121
# TODO: build prim_ops_test_cpp again once supported_features works in
@@ -24,6 +24,8 @@ BUILDABLE_KERNELS_PRIM_OPS_TARGETS=$(buck2 query //kernels/prim_ops/... | grep -
2424
# TODO: expand the covered scope of Buck targets.
2525
# //runtime/kernel/... is failing because //third-party:torchgen_files's shell script can't find python on PATH.
2626
# //runtime/test/... requires Python torch, which we don't have in our OSS buck setup.
27-
buck2 test $BUILDABLE_OPTIMIZED_OPS //kernels/portable/... \
28-
$BUILDABLE_KERNELS_PRIM_OPS_TARGETS //runtime/backend/... //runtime/core/... \
29-
//runtime/executor: //runtime/kernel/... //runtime/platform/...
27+
for op in "build" "test"; do
28+
buck2 $op $BUILDABLE_OPTIMIZED_OPS //kernels/portable/... \
29+
$BUILDABLE_KERNELS_PRIM_OPS_TARGETS //runtime/backend/... //runtime/core/... \
30+
//runtime/executor: //runtime/kernel/... //runtime/platform/...
31+
done

.lintrunner.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,10 @@ exclude_patterns = [
271271
'examples/**',
272272
'exir/verification/bindings.cpp',
273273
'extension/**',
274+
# Uses properly-gated (ET_USE_PYTORCH_HEADERS) ATen include.
275+
'kernels/portable/cpu/util/elementwise_util.h',
276+
'kernels/portable/cpu/util/math_util.h',
277+
'kernels/portable/cpu/util/vectorized_math.h',
274278
'kernels/optimized/**',
275279
'runtime/core/exec_aten/**',
276280
# Want to be able to keep c10 in sync with PyTorch core.

CMakeLists.txt

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -430,14 +430,6 @@ endif()
430430

431431
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/configurations)
432432

433-
#
434-
# gflags: Commandline flag host library.
435-
#
436-
437-
if(EXECUTORCH_BUILD_GFLAGS)
438-
add_subdirectory(third-party/gflags)
439-
endif()
440-
441433
# Install `executorch` library as well as `executorch-config.cmake` under
442434
# ${CMAKE_INSTALL_PREFIX}/
443435
install(

backends/apple/coreml/scripts/build_tests.sh

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,7 @@ cmake "$EXECUTORCH_ROOT_PATH" -B"$CMAKE_EXECUTORCH_BUILD_DIR_PATH" \
3333
-DPLATFORM=MAC_UNIVERSAL \
3434
-DDEPLOYMENT_TARGET=13.0 \
3535
-DEXECUTORCH_BUILD_EXECUTOR_RUNNER=OFF \
36-
-DEXECUTORCH_BUILD_XNNPACK=OFF \
37-
-DEXECUTORCH_BUILD_GFLAGS=OFF
36+
-DEXECUTORCH_BUILD_XNNPACK=OFF
3837

3938
cmake --build "$CMAKE_EXECUTORCH_BUILD_DIR_PATH" -j9 -t executorch
4039

backends/arm/_passes/scalars_to_attribute_pass.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
from executorch.backends.arm._passes.arm_pass_utils import get_first_fake_tensor
1313

1414
from executorch.exir.pass_base import ExportPass, PassResult
15-
from torch.ao.quantization.fx.utils import get_new_attr_name_with_prefix
1615
from torch.fx import GraphModule, Node
16+
from torchao.quantization.pt2e.utils import get_new_attr_name_with_prefix
1717

1818

1919
class ScalarsToAttributePass(ExportPass):

backends/cadence/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ elseif(EXECUTORCH_FUSION_G3_OPT)
8282
${EXECUTORCH_ROOT}/runtime/core/portable_type/c10)
8383
else()
8484
set(TARGET_DIR reference)
85+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/kernels)
8586
endif()
8687

8788

backends/cadence/aot/functions_hifi.yaml

Lines changed: 104 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,36 @@
3232
- arg_meta: null
3333
kernel_name: cadence::impl::HiFi::add_out
3434

35+
- op: bitwise_and.Scalar_out
36+
kernels:
37+
- arg_meta: null
38+
kernel_name: cadence::impl::HiFi::bitwise_and_Scalar_out
39+
40+
- op: bitwise_and.Tensor_out
41+
kernels:
42+
- arg_meta: null
43+
kernel_name: cadence::impl::HiFi::bitwise_and_Tensor_out
44+
45+
- op: bitwise_or.Scalar_out
46+
kernels:
47+
- arg_meta: null
48+
kernel_name: cadence::impl::HiFi::bitwise_or_Scalar_out
49+
50+
- op: bitwise_or.Tensor_out
51+
kernels:
52+
- arg_meta: null
53+
kernel_name: cadence::impl::HiFi::bitwise_or_Tensor_out
54+
55+
- op: bitwise_xor.Scalar_out
56+
kernels:
57+
- arg_meta: null
58+
kernel_name: cadence::impl::HiFi::bitwise_xor_Scalar_out
59+
60+
- op: bitwise_xor.Tensor_out
61+
kernels:
62+
- arg_meta: null
63+
kernel_name: cadence::impl::HiFi::bitwise_xor_Tensor_out
64+
3565
- op: bmm.out
3666
kernels:
3767
- arg_meta: null
@@ -65,27 +95,82 @@
6595
- op: embedding.out
6696
kernels:
6797
- arg_meta: null
68-
kernel_name: torch::executor::embedding_out
98+
kernel_name: cadence::impl::HiFi::embedding_out
99+
100+
- op: eq.Tensor_out
101+
kernels:
102+
- arg_meta: null
103+
kernel_name: cadence::impl::HiFi::eq_tensor_out
104+
105+
- op: fmod.Tensor_out
106+
kernels:
107+
- arg_meta: null
108+
kernel_name: cadence::impl::HiFi::fmod_Tensor_out
109+
110+
- op: fmod.Scalar_out
111+
kernels:
112+
- arg_meta: null
113+
kernel_name: cadence::impl::HiFi::fmod_Scalar_out
69114

70115
- op: full.out
71116
kernels:
72117
- arg_meta: null
73118
kernel_name: cadence::impl::HiFi::full_out
74119

75-
- op: gt.Scalar_out
120+
- op: ge.Scalar_out
121+
kernels:
122+
- arg_meta: null
123+
kernel_name: cadence::impl::HiFi::ge_scalar_out
124+
125+
- op: ge.Tensor_out
76126
kernels:
77127
- arg_meta: null
78-
kernel_name: torch::executor::gt_scalar_out
128+
kernel_name: cadence::impl::HiFi::ge_tensor_out
79129

80130
- op: gelu.out
81131
kernels:
82132
- arg_meta: null
83133
kernel_name: torch::executor::gelu_out
84134

135+
- op: gt.Scalar_out
136+
kernels:
137+
- arg_meta: null
138+
kernel_name: cadence::impl::HiFi::gt_scalar_out
139+
140+
- op: gt.Tensor_out
141+
kernels:
142+
- arg_meta: null
143+
kernel_name: cadence::impl::HiFi::gt_tensor_out
144+
85145
- op: hardtanh.out
86146
kernels:
87147
- arg_meta: null
88-
kernel_name: torch::executor::hardtanh_out
148+
kernel_name: cadence::impl::HiFi::hardtanh_out
149+
150+
- op: le.Scalar_out
151+
kernels:
152+
- arg_meta: null
153+
kernel_name: cadence::impl::HiFi::le_scalar_out
154+
155+
- op: le.Tensor_out
156+
kernels:
157+
- arg_meta: null
158+
kernel_name: cadence::impl::HiFi::le_tensor_out
159+
160+
- op: lt.Scalar_out
161+
kernels:
162+
- arg_meta: null
163+
kernel_name: cadence::impl::HiFi::lt_scalar_out
164+
165+
- op: lt.Tensor_out
166+
kernels:
167+
- arg_meta: null
168+
kernel_name: cadence::impl::HiFi::lt_tensor_out
169+
170+
- op: masked_fill.Scalar_out
171+
kernels:
172+
- arg_meta: null
173+
kernel_name: cadence::impl::HiFi::masked_fill_scalar_out
89174

90175
- op: max_pool2d_with_indices.out
91176
kernels:
@@ -117,6 +202,11 @@
117202
- arg_meta: null
118203
kernel_name: cadence::impl::HiFi::mul_out
119204

205+
- op: ne.Tensor_out
206+
kernels:
207+
- arg_meta: null
208+
kernel_name: cadence::impl::HiFi::ne_tensor_out
209+
120210
- op: permute_copy.out
121211
kernels:
122212
- arg_meta: null
@@ -147,6 +237,11 @@
147237
- arg_meta: null
148238
kernel_name: cadence::impl::HiFi::rsqrt_out
149239

240+
- op: select_copy.int_out
241+
kernels:
242+
- arg_meta: null
243+
kernel_name: cadence::impl::HiFi::select_copy_int_out
244+
150245
- op: sigmoid.out
151246
kernels:
152247
- arg_meta: null
@@ -239,6 +334,11 @@
239334
- arg_meta: null
240335
kernel_name: cadence::impl::HiFi::quantized_fully_connected_out
241336

337+
- func: cadence::quantized_matmul.out(Tensor X, int X_zero_point, Tensor Y, int Y_zero_point, Tensor? bias, int out_multiplier, int out_shift, int out_zero_point, bool transposed, *, Tensor(a!) out) -> Tensor(a!)
338+
kernels:
339+
- arg_meta: null
340+
kernel_name: cadence::impl::HiFi::quantized_matmul_out
341+
242342
- func: cadence::quantized_fully_connected.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
243343
kernels:
244344
- arg_meta: null

0 commit comments

Comments
 (0)