Skip to content

Commit 9539249

Browse files
committed
Update base for Update on "[ET-VK] Minor tuning for conv2d pw op to improve performance."
The diff introduces minor tuning for the Conv2d pointwise (PW) operation in the Vulkan backend to improve performance. Conv 2d pw now issues a 2D dispatch instead of 1D, where dispatch axis y is now sized based on output texture's batch size. Differential Revision: [D75251145](https://our.internmc.facebook.com/intern/diff/D75251145/) [ghstack-poisoned]
2 parents 790057a + 3ad9419 commit 9539249

File tree

44 files changed

+606
-139
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+606
-139
lines changed

.github/workflows/build-presets.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ jobs:
2020
strategy:
2121
fail-fast: false
2222
matrix:
23-
preset: [macos-arm64, pybind, llm]
23+
preset: [macos, ios, ios-simulator, pybind, llm]
2424
with:
2525
job-name: build
2626
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
@@ -39,7 +39,7 @@ jobs:
3939
strategy:
4040
fail-fast: false
4141
matrix:
42-
preset: [pybind, llm]
42+
preset: [linux, pybind, llm]
4343
runner: [linux.2xlarge, linux.arm64.2xlarge]
4444
docker-image: [executorch-ubuntu-22.04-clang12, executorch-ubuntu-22.04-gcc11-aarch64]
4545
# Excluding specific runner + docker image combinations that don't make sense:

.lintrunner.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,6 @@ exclude_patterns = [
390390
"backends/arm/test/ops/**",
391391
"backends/vulkan/quantizer/**",
392392
"backends/vulkan/test/**",
393-
"backends/cadence/aot/quantizer/**",
394393
"backends/qualcomm/quantizer/**",
395394
"examples/qualcomm/**",
396395
"backends/xnnpack/quantizer/**",

CMakePresets.json

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,13 @@
77
"binaryDir": "${sourceDir}/cmake-out"
88
},
99
{
10-
"name": "macos-arm64",
11-
"displayName": "Build everything buildable on macOS arm64",
10+
"name": "macos",
11+
"displayName": "Build everything buildable on macOS",
1212
"inherits": ["common"],
1313
"generator": "Xcode",
1414
"cacheVariables": {
1515
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/third-party/ios-cmake/ios.toolchain.cmake",
16-
"EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/macos-arm64.cmake",
16+
"EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/macos.cmake",
1717
"PLATFORM": "MAC_ARM64",
1818
"DEPLOYMENT_TARGET": "10.15"
1919
},
@@ -23,6 +23,54 @@
2323
"rhs": "Darwin"
2424
}
2525
},
26+
{
27+
"name": "ios",
28+
"displayName": "Build everything buildable on iOS",
29+
"inherits": ["common"],
30+
"generator": "Xcode",
31+
"cacheVariables": {
32+
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/third-party/ios-cmake/ios.toolchain.cmake",
33+
"EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/ios.cmake",
34+
"PLATFORM": "OS64",
35+
"DEPLOYMENT_TARGET": "17.0"
36+
},
37+
"condition": {
38+
"lhs": "${hostSystemName}",
39+
"type": "equals",
40+
"rhs": "Darwin"
41+
}
42+
},
43+
{
44+
"name": "ios-simulator",
45+
"displayName": "Build everything buildable on iOS simulator",
46+
"inherits": ["common"],
47+
"generator": "Xcode",
48+
"cacheVariables": {
49+
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/third-party/ios-cmake/ios.toolchain.cmake",
50+
"EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/ios.cmake",
51+
"PLATFORM": "SIMULATORARM64",
52+
"DEPLOYMENT_TARGET": "17.0"
53+
},
54+
"condition": {
55+
"lhs": "${hostSystemName}",
56+
"type": "equals",
57+
"rhs": "Darwin"
58+
}
59+
},
60+
{
61+
"name": "linux",
62+
"displayName": "Build everything buildable on Linux",
63+
"inherits": ["common"],
64+
"cacheVariables": {
65+
"CMAKE_SYSTEM_NAME": "Linux",
66+
"EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/linux.cmake"
67+
},
68+
"condition": {
69+
"lhs": "${hostSystemName}",
70+
"type": "equals",
71+
"rhs": "Linux"
72+
}
73+
},
2674
{
2775
"name": "pybind",
2876
"displayName": "Build pybindings exported in the wheel",

backends/cadence/aot/compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def prepare_and_convert_pt2(
123123
assert isinstance(model_gm, torch.fx.GraphModule)
124124

125125
# Prepare
126-
prepared_model = prepare_pt2e(model_gm, quantizer) # pyre-ignore[6]
126+
prepared_model = prepare_pt2e(model_gm, quantizer)
127127

128128
# Calibrate
129129
# If no calibration data is provided, use the inputs

backends/cadence/aot/quantizer/TARGETS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ python_library(
99
],
1010
deps = [
1111
"//caffe2:torch",
12+
"//pytorch/ao:torchao",
1213
],
1314
)
1415

@@ -34,7 +35,6 @@ python_library(
3435
":patterns",
3536
":utils",
3637
"//caffe2:torch",
37-
"//executorch/backends/xnnpack/quantizer:xnnpack_quantizer_utils",
3838
],
3939
)
4040

backends/cadence/aot/quantizer/patterns.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
from torch import fx
1717
from torch._ops import OpOverload
18-
from torch.ao.quantization.quantizer import (
18+
from torchao.quantization.pt2e.quantizer import (
1919
DerivedQuantizationSpec,
2020
SharedQuantizationSpec,
2121
)

backends/cadence/aot/quantizer/quantizer.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,20 @@
2929
is_annotated,
3030
no_outside_users,
3131
)
32-
from executorch.backends.xnnpack.quantizer.xnnpack_quantizer_utils import (
32+
33+
from torch import fx
34+
35+
from torchao.quantization.pt2e import HistogramObserver, MinMaxObserver
36+
from torchao.quantization.pt2e.quantizer import (
37+
ComposableQuantizer,
38+
DerivedQuantizationSpec,
3339
OperatorConfig,
3440
QuantizationAnnotation,
3541
QuantizationConfig,
3642
QuantizationSpec,
43+
Quantizer,
3744
)
3845

39-
from torch import fx
40-
41-
from torch.ao.quantization.observer import HistogramObserver, MinMaxObserver
42-
from torch.ao.quantization.quantizer import DerivedQuantizationSpec, Quantizer
43-
from torch.ao.quantization.quantizer.composable_quantizer import ComposableQuantizer
44-
4546

4647
act_qspec_asym8s = QuantizationSpec(
4748
dtype=torch.int8,

backends/cadence/aot/quantizer/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@
1414
import torch
1515
from torch import fx
1616
from torch._ops import OpOverload
17-
from torch.ao.quantization import ObserverOrFakeQuantize
1817

1918
from torch.fx import GraphModule
2019
from torch.fx.passes.utils.source_matcher_utils import (
2120
check_subgraphs_connected,
2221
SourcePartition,
2322
)
23+
from torchao.quantization.pt2e import ObserverOrFakeQuantize
2424

2525

2626
def quantize_tensor_multiplier(

backends/cadence/aot/remove_ops.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -235,10 +235,7 @@ def call_operator(
235235
kwargs: dict[str, Argument],
236236
meta: NodeMetadata,
237237
) -> ProxyValue:
238-
if op not in {
239-
exir_ops.edge.aten.linalg_vector_norm.default,
240-
exir_ops.edge.cadence.linalg_vector_norm.default,
241-
}:
238+
if op is not exir_ops.edge.aten.linalg_vector_norm.default:
242239
return super().call_operator(op, args, kwargs, meta)
243240

244241
# If the op has three args or less, it can't be a nop

backends/cadence/aot/tests/test_remove_ops_passes.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -467,10 +467,7 @@ def forward(self, x: torch.Tensor):
467467

468468
# Expect the linalg_vector_norm op to be removed by the pass
469469
self.assertEqual(
470-
count_node(graph_module, exir_ops.edge.aten.linalg_vector_norm.default)
471-
+ count_node(
472-
graph_module, exir_ops.edge.cadence.linalg_vector_norm.default
473-
),
470+
count_node(graph_module, exir_ops.edge.aten.linalg_vector_norm.default),
474471
0,
475472
)
476473

0 commit comments

Comments
 (0)