Skip to content

Commit 4995717

Browse files
Merge branch 'main' into issue9971-profile
2 parents c6f99c1 + 17cbef5 commit 4995717

File tree

65 files changed

+2813
-617
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+2813
-617
lines changed

.ci/docker/requirements-ci.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ parameterized==0.9.0
1717

1818
# Doc build requirements, same as https://github.com/pytorch/pytorch/blob/main/.ci/docker/requirements-docs.txt
1919
sphinx==5.3.0
20+
sphinx-reredirects==0.1.4
2021
sphinx-gallery==0.14.0
2122
breathe==4.34.0
2223
exhale==0.2.3

.github/workflows/_android.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ jobs:
2222
script: |
2323
set -eux
2424
25+
# Use sccache for NDK compiler as well
26+
export CMAKE_CXX_COMPILER_LAUNCHER=sccache
27+
export CMAKE_C_COMPILER_LAUNCHER=sccache
28+
2529
# The generic Linux job chooses to use base env, not the one setup by the image
2630
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
2731
conda activate "${CONDA_ENV}"

.github/workflows/android-perf-private-device-experiment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,6 @@ jobs:
5757
id-token: write
5858
contents: read
5959
with:
60-
models: ${{ inputs.models }}
60+
models: ${{ inputs.models || 'mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8' }}
6161
devices: google_pixel_3_private_rooted
6262
benchmark_configs: ${{ inputs.benchmark_configs }}

.github/workflows/android-perf.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,10 @@ jobs:
353353
script: |
354354
set -eux
355355
356+
# Use sccache for NDK compiler as well
357+
export CMAKE_CXX_COMPILER_LAUNCHER=sccache
358+
export CMAKE_C_COMPILER_LAUNCHER=sccache
359+
356360
# The generic Linux job chooses to use base env, not the one setup by the image
357361
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
358362
conda activate "${CONDA_ENV}"
@@ -392,7 +396,7 @@ jobs:
392396
fail-fast: false
393397
with:
394398
# Due to scheduling a job may be pushed beyond the default 60m threshold
395-
timeout: 120
399+
timeout: 240
396400
device-type: android
397401
runner: linux.2xlarge
398402
test-infra-ref: ''

.github/workflows/android-release-artifacts.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,10 @@ jobs:
6060
script: |
6161
set -eux
6262
63+
# Use sccache for NDK compiler as well
64+
export CMAKE_CXX_COMPILER_LAUNCHER=sccache
65+
export CMAKE_C_COMPILER_LAUNCHER=sccache
66+
6367
# The generic Linux job chooses to use base env, not the one setup by the image
6468
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
6569
conda activate "${CONDA_ENV}"

CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -810,6 +810,10 @@ if(EXECUTORCH_BUILD_PYBIND)
810810
torch
811811
)
812812

813+
if(EXECUTORCH_BUILD_TESTS)
814+
list(APPEND _dep_libs test_backend_compiler_lib)
815+
endif()
816+
813817
if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
814818
list(APPEND _dep_libs optimized_native_cpu_ops_lib)
815819
else()

backends/arm/test/ops/test_sigmoid.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@
99

1010
from typing import Tuple
1111

12+
import pytest
13+
1214
import torch
13-
from executorch.backends.arm.test import common
15+
from executorch.backends.arm.test import common, conftest
1416
from executorch.backends.arm.test.tester.arm_tester import ArmTester
1517
from executorch.exir.backend.compile_spec_schema import CompileSpec
1618
from parameterized import parameterized
@@ -63,7 +65,7 @@ def forward(self, x, y):
6365
def _test_sigmoid_tosa_MI_pipeline(
6466
self, module: torch.nn.Module, test_data: Tuple[torch.tensor]
6567
):
66-
(
68+
tester = (
6769
ArmTester(
6870
module,
6971
example_inputs=test_data,
@@ -77,11 +79,13 @@ def _test_sigmoid_tosa_MI_pipeline(
7779
.check_not(["executorch_exir_dialects_edge__ops_aten_sigmoid_default"])
7880
.check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
7981
.to_executorch()
80-
.run_method_and_compare_outputs(inputs=test_data)
8182
)
8283

84+
if conftest.is_option_enabled("tosa_ref_model"):
85+
tester.run_method_and_compare_outputs(inputs=test_data)
86+
8387
def _test_sigmoid_tosa_BI_pipeline(self, module: torch.nn.Module, test_data: Tuple):
84-
(
88+
tester = (
8589
ArmTester(
8690
module,
8791
example_inputs=test_data,
@@ -96,9 +100,11 @@ def _test_sigmoid_tosa_BI_pipeline(self, module: torch.nn.Module, test_data: Tup
96100
.check_not(["executorch_exir_dialects_edge__ops_aten_sigmoid_default"])
97101
.check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
98102
.to_executorch()
99-
.run_method_and_compare_outputs(inputs=test_data)
100103
)
101104

105+
if conftest.is_option_enabled("tosa_ref_model"):
106+
tester.run_method_and_compare_outputs(inputs=test_data)
107+
102108
def _test_sigmoid_tosa_ethos_BI_pipeline(
103109
self,
104110
compile_spec: list[CompileSpec],
@@ -137,6 +143,7 @@ def _test_sigmoid_tosa_u85_BI_pipeline(
137143
)
138144

139145
@parameterized.expand(test_data_suite)
146+
@pytest.mark.tosa_ref_model
140147
def test_sigmoid_tosa_MI(
141148
self,
142149
test_name: str,
@@ -145,26 +152,33 @@ def test_sigmoid_tosa_MI(
145152
self._test_sigmoid_tosa_MI_pipeline(self.Sigmoid(), (test_data,))
146153

147154
@parameterized.expand(test_data_suite)
155+
@pytest.mark.tosa_ref_model
148156
def test_sigmoid_tosa_BI(self, test_name: str, test_data: torch.Tensor):
149157
self._test_sigmoid_tosa_BI_pipeline(self.Sigmoid(), (test_data,))
150158

159+
@pytest.mark.tosa_ref_model
151160
def test_add_sigmoid_tosa_MI(self):
152161
self._test_sigmoid_tosa_MI_pipeline(self.AddSigmoid(), (test_data_suite[0][1],))
153162

163+
@pytest.mark.tosa_ref_model
154164
def test_add_sigmoid_tosa_BI(self):
155165
self._test_sigmoid_tosa_BI_pipeline(self.AddSigmoid(), (test_data_suite[5][1],))
156166

167+
@pytest.mark.tosa_ref_model
157168
def test_sigmoid_add_tosa_MI(self):
158169
self._test_sigmoid_tosa_MI_pipeline(self.SigmoidAdd(), (test_data_suite[0][1],))
159170

171+
@pytest.mark.tosa_ref_model
160172
def test_sigmoid_add_tosa_BI(self):
161173
self._test_sigmoid_tosa_BI_pipeline(self.SigmoidAdd(), (test_data_suite[0][1],))
162174

175+
@pytest.mark.tosa_ref_model
163176
def test_sigmoid_add_sigmoid_tosa_MI(self):
164177
self._test_sigmoid_tosa_MI_pipeline(
165178
self.SigmoidAddSigmoid(), (test_data_suite[4][1], test_data_suite[3][1])
166179
)
167180

181+
@pytest.mark.tosa_ref_model
168182
def test_sigmoid_add_sigmoid_tosa_BI(self):
169183
self._test_sigmoid_tosa_BI_pipeline(
170184
self.SigmoidAddSigmoid(), (test_data_suite[4][1], test_data_suite[3][1])

backends/arm/test/targets.bzl

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,11 @@ def define_arm_tests():
1212
test_files.remove("passes/test_ioquantization_pass.py")
1313

1414
# Operators
15-
test_files += ["ops/test_linear.py"]
16-
test_files += ["ops/test_slice.py"]
15+
test_files += [
16+
"ops/test_linear.py",
17+
"ops/test_slice.py",
18+
"ops/test_sigmoid.py",
19+
]
1720

1821
TESTS = {}
1922

backends/qualcomm/_passes/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from .annotate_unbind import AnnotateUnbind
1010
from .convert_bmm_to_matmul import ConvertBmmToMatmul
1111
from .convert_conv1d_to_conv2d import ConvertConv1dToConv2d
12+
from .convert_square_to_pow import ConvertSquareToPow
1213
from .convert_upsample_bicubic2d import ConvertUpsampleBicubicWithBilinear
1314
from .decompose_any import DecomposeAny
1415
from .decompose_cdist import DecomposeCDist
@@ -42,6 +43,7 @@
4243
AnnotateUnbind,
4344
ConvertBmmToMatmul,
4445
ConvertConv1dToConv2d,
46+
ConvertSquareToPow,
4547
ConvertUpsampleBicubicWithBilinear,
4648
DecomposeAny,
4749
DecomposeCDist,
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Copyright (c) Qualcomm Innovation Center, Inc.
2+
# All rights reserved
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
import torch
7+
from executorch.exir.pass_base import ExportPass, PassResult
8+
9+
from .utils import copy_meta
10+
11+
12+
class ConvertSquareToPow(ExportPass):
13+
"""
14+
Convert square to pow with a scalar value of 2.
15+
This allows LiftConstantScalarOperands to lift the scalar into a scalar.
16+
Otherwise, the square op will be converted to pow.tensor_scalar after to_edge.
17+
"""
18+
19+
def __init__(self) -> None:
20+
super().__init__()
21+
22+
def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
23+
graph = graph_module.graph
24+
for node in graph.nodes:
25+
if node.target == torch.ops.aten.square.default:
26+
input_node = node.args[0]
27+
with graph_module.graph.inserting_after(input_node):
28+
pow_op = torch.ops.aten.pow.Tensor_Scalar
29+
pow_node = graph.create_node(
30+
"call_function", pow_op, (input_node, 2)
31+
)
32+
pow_node.meta = copy_meta(node.meta)
33+
for user in node.users.copy():
34+
user.replace_input_with(node, pow_node)
35+
36+
graph.eliminate_dead_code()
37+
graph_module.recompile()
38+
return PassResult(graph_module, True)

0 commit comments

Comments
 (0)