Skip to content

Commit 002ec6c

Browse files
authored
Merge branch 'main' into export-D85064213
2 parents 52933af + 8946d80 commit 002ec6c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+2374
-1271
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ xcuserdata/
6262
/include/
6363
/share/
6464
/version.py
65-
*.csv
6665
*_etdump
6766

6867
# Android

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ executorch
3434
│ ├── <a href="backends/qualcomm">qualcomm</a> - Qualcomm-specific backends. See <a href="docs/source/backends-qualcomm.md">doc</a>.
3535
│ ├── <a href="backends/transforms">transforms</a> - Transformations for backend optimization.
3636
│ ├── <a href="backends/vulkan">vulkan</a> - Vulkan backend for cross-platform GPU support. See <a href="docs/source/backends-vulkan.md">doc</a>.
37-
│ └── <a href="backends/xnnpack">xnnpack</a> - XNNPACK backend for optimized neural network operations. See <a href="docs/source/backends-xnnpack.md">doc</a>.
37+
│ └── <a href="backends/xnnpack">xnnpack</a> - XNNPACK backend for optimized neural network operations. See <a href="docs/source/backends/xnnpack/xnnpack-overview.md">doc</a>.
3838
├── <a href="codegen">codegen</a> - Tooling to autogenerate bindings between kernels and the runtime.
3939
├── <a href="configurations">configurations</a> - Configuration files.
4040
├── <a href="devtools">devtools</a> - Model profiling, debugging, and inspection. Please refer to the <a href="docs/source/devtools-overview.md">tools documentation</a> for more information.

README-wheel.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ The `executorch` pip package is in beta.
1111
The prebuilt `executorch.runtime` module included in this package provides a way
1212
to run ExecuTorch `.pte` files, with some restrictions:
1313
* Only [core ATen operators](docs/source/ir-ops-set-definition.md) are linked into the prebuilt module
14-
* Only the [XNNPACK backend delegate](docs/source/backends-xnnpack.md) is linked into the prebuilt module.
14+
* Only the [XNNPACK backend delegate](docs/source/backends/xnnpack/xnnpack-overview.md) is linked into the prebuilt module.
1515
* \[macOS only] [Core ML](docs/source/backends/coreml/coreml-overview.md) and [MPS](docs/source/backends/mps/mps-overview.md) backend
1616
are also linked into the prebuilt module.
1717

backends/cadence/aot/ops_registrations.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ def _validate_ref_impl_exists() -> None:
5353
# 1. be removed
5454
# 2. have a reference implementation added to ref_implementations.py
5555
_WARN_ONLY = {
56-
"cadence::quantized_w8a32_linear",
5756
"cadence::quantized_add", # We should only support per_tensor variant, should remove
5857
"cadence::_softmax_f32_f32",
5958
"cadence::requantize", # We should only support per_tensor variant, should remove
@@ -2706,6 +2705,9 @@ def quantized_w8a32_linear_meta(
27062705
# output comes in empty with shape [leading_dims, out_dim]
27072706
src_shape = list(src.shape)
27082707
weight_shape = weight.shape
2708+
assert (src_shape[-1] % 4) == 0
2709+
if len(src_shape) >= 2:
2710+
assert src_shape[-2] == 1
27092711
assert len(weight_shape) == 2
27102712
assert src_shape[-1] == weight_shape[-1]
27112713
src_shape[-1] = weight_shape[0]
@@ -2720,12 +2722,12 @@ def quantized_w8a32_conv_meta(
27202722
bias: torch.Tensor,
27212723
b_scale: float,
27222724
) -> torch.Tensor:
2723-
# src comes in shape [batch, in_channel, in_length]
2724-
# weight comes in shape [out_ch, in_ch, kernel_dim]
2725+
# src comes in shape [batch, in_length, in_channels]
2726+
# weight comes in shape [kernel_dim, out_ch, in_ch]
27252727
# output comes in empty with shape [batch, out_ch, in_length - kernel_dim + 1]
27262728
assert len(src.shape) == 3
27272729

2728-
out_channels, in_channels, kernel_size = weight.shape
2730+
kernel_size, out_channels, in_channels = weight.shape
27292731
assert kernel_size == 3
27302732
assert (out_channels % 4) == 0
27312733
assert (in_channels % 4) == 0

backends/cadence/aot/quantizer/fusion_pass.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,7 @@ def get_args_and_kwargs_mixed_w8a32_conv(
397397
)
398398
transposed_weights = graph_module.graph.call_function(
399399
torch.ops.aten.permute.default,
400-
(weights_inputs[0], [2, 0, 1]), # NCL -> NLC
400+
(weights_inputs[0], [2, 0, 1]), # NCL -> LNC
401401
)
402402

403403
args = (

backends/cadence/aot/ref_implementations.py

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -854,18 +854,23 @@ def quantized_w8a32_conv(
854854
if len(weight.shape) != 3:
855855
raise ValueError("Weight tensor must be 3D")
856856

857-
out_channels, in_channels, kernel_size = weight.shape
857+
kernel_size, out_channels, in_channels = weight.shape
858858
if kernel_size != 3:
859859
raise ValueError("Kernel size must be 3")
860860
if (out_channels % 4) != 0:
861861
raise ValueError("Out channels must be a multiple of 4")
862862
if (in_channels % 4) != 0:
863863
raise ValueError("In channels must be a multiple of 4")
864864

865-
# src comes in shape [batch, in_channel, in_length]
866-
# weight comes in shape [out_ch, in_ch, kernel_dim]
867-
# output comes in empty with shape [batch, out_ch, in_length - kernel_dim + 1]
868-
# Dequantize weight using scale
865+
assert weight.dtype == torch.int8
866+
assert bias.dtype == torch.int8
867+
868+
# To make compliant with torch (LCN -> NCL format)
869+
weight = weight.permute(1, 2, 0).contiguous()
870+
871+
# channels last to channels first
872+
src = src.permute(0, 2, 1).contiguous()
873+
869874
dequant_weight = weight.float() * w_scale
870875

871876
# Dequantize bias using scale
@@ -884,6 +889,36 @@ def quantized_w8a32_conv(
884889
return output
885890

886891

892+
@impl_tracked(m, "quantized_w8a32_linear")
893+
def quantized_w8a32_linear(
894+
src: torch.Tensor,
895+
weight: torch.Tensor,
896+
w_scale: float,
897+
bias: torch.Tensor,
898+
b_scale: float,
899+
) -> torch.Tensor:
900+
# src comes in shape [leading_dims, in_dim]
901+
# weight comes in shape [in_dim, out_dim]
902+
# output comes in empty with shape [leading_dims, out_dim]
903+
assert weight.dtype == torch.int8
904+
assert bias.dtype == torch.int8
905+
if len(src.shape) >= 2:
906+
assert src.shape[-2] == 1, "Only supporting vector-matrix multiplication"
907+
908+
# need to transpose to make compliant with torch linear (in, out -> out, in)
909+
weight = weight.transpose(1, 0).contiguous()
910+
dequant_weight = weight.float() * w_scale
911+
dequant_bias = bias.float() * b_scale
912+
913+
output = torch.nn.functional.linear(
914+
src.float(),
915+
dequant_weight,
916+
dequant_bias,
917+
)
918+
919+
return output
920+
921+
887922
@impl_tracked(m, "quantized_conv2d_nhwc.per_tensor")
888923
def quantized_conv2d_nhwc_per_tensor(
889924
input_tensor: torch.Tensor,

backends/cadence/aot/tests/test_ref_implementations.py

Lines changed: 96 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1188,7 +1188,7 @@ def test_quantized_conv_per_tensor(
11881188
dtype=torch.int8,
11891189
), # weight: 4x4x3
11901190
0.5, # w_scale
1191-
torch.tensor([2, 2, 2, 2], dtype=torch.float32), # bias: 4
1191+
torch.tensor([2, 2, 2, 2], dtype=torch.int8), # bias: 4
11921192
1.0, # b_scale
11931193
torch.tensor(
11941194
[
@@ -1214,6 +1214,12 @@ def test_quantized_w8a32_conv(
12141214
b_scale: float,
12151215
expected_output: torch.Tensor,
12161216
) -> None:
1217+
1218+
# This op takes in channels last src
1219+
src = src.permute(0, 2, 1)
1220+
1221+
# This op takes in LNC format for weights
1222+
weight = weight.permute(2, 0, 1)
12171223
output = torch.ops.cadence.quantized_w8a32_conv(
12181224
src, weight, w_scale, bias, b_scale
12191225
)
@@ -1236,6 +1242,95 @@ def test_quantized_w8a32_conv(
12361242
f"Output values don't match expected in {name}. Got {output}, expected {expected_output}",
12371243
)
12381244

1245+
@expand(
1246+
[
1247+
(
1248+
"multi_input_features",
1249+
torch.tensor([[1.0, 2.0, 3.0]], dtype=torch.float32), # src: 1x3
1250+
torch.tensor([[2, 1], [1, 2], [1, 1]], dtype=torch.int8), # weight: 3x2
1251+
0.5, # w_scale
1252+
torch.tensor([0, 1], dtype=torch.int8), # bias: 2
1253+
1.0, # b_scale
1254+
torch.tensor([[3.5, 5.0]], dtype=torch.float32), # expected
1255+
),
1256+
(
1257+
"batch_size_2",
1258+
torch.tensor(
1259+
[[[1.0, 2.0]], [[3.0, 4.0]]], dtype=torch.float32
1260+
), # src: 2x2
1261+
torch.tensor([[1, 2], [1, -1]], dtype=torch.int8), # weight: 2x2
1262+
1.0, # w_scale
1263+
torch.tensor([0, 0], dtype=torch.int8), # bias: 2
1264+
1.0, # b_scale
1265+
torch.tensor(
1266+
[[[3.0, 0.0]], [[7.0, 2.0]]], dtype=torch.float32
1267+
), # expected
1268+
),
1269+
(
1270+
"shape_assertion_error",
1271+
torch.tensor(
1272+
[[[1.0, 2.0], [3.0, 4.0]]], dtype=torch.float32
1273+
), # src: 1x2x2
1274+
torch.tensor([[1, 2], [1, -1]], dtype=torch.int8), # weight: 2x2
1275+
1.0, # w_scale
1276+
torch.tensor([0, 1], dtype=torch.int8), # bias: 2
1277+
1.0, # b_scale
1278+
torch.tensor(
1279+
[[[3.0, 1.0], [7.0, 3.0]]], dtype=torch.float32
1280+
), # expected
1281+
),
1282+
(
1283+
"negative_weights",
1284+
torch.tensor([[2.0, 4.0]], dtype=torch.float32), # src: 1x2
1285+
torch.tensor([[-2, -3], [-1, -2]], dtype=torch.int8), # weight: 2x2
1286+
0.5, # w_scale
1287+
torch.tensor([2, 1], dtype=torch.int8), # bias: 2
1288+
1.0, # b_scale
1289+
torch.tensor([[-2.0, -6.0]], dtype=torch.float32), # expected
1290+
),
1291+
]
1292+
)
1293+
def test_quantized_w8a32_linear(
1294+
self,
1295+
name: str,
1296+
src: torch.Tensor,
1297+
weight: torch.Tensor,
1298+
w_scale: float,
1299+
bias: torch.Tensor,
1300+
b_scale: float,
1301+
expected_output: torch.Tensor,
1302+
) -> None:
1303+
if name == "shape_assertion_error":
1304+
with self.assertRaisesRegex(
1305+
AssertionError, "Only supporting vector-matrix multiplication"
1306+
):
1307+
torch.ops.cadence.quantized_w8a32_linear(
1308+
src, weight, w_scale, bias, b_scale
1309+
)
1310+
return
1311+
1312+
output = torch.ops.cadence.quantized_w8a32_linear(
1313+
src, weight, w_scale, bias, b_scale
1314+
)
1315+
1316+
# Verify output properties
1317+
self.assertEqual(
1318+
output.dtype,
1319+
torch.float32,
1320+
f"Output dtype should be float32 in {name}",
1321+
)
1322+
self.assertEqual(
1323+
output.shape,
1324+
expected_output.shape,
1325+
f"Output shape should match expected shape in {name}",
1326+
)
1327+
1328+
# Verify output matches expected values
1329+
self.assertTrue(
1330+
torch.allclose(output, expected_output, rtol=1e-4, atol=1e-4),
1331+
f"Output values don't match expected in {name}. Got {output}, expected {expected_output}",
1332+
)
1333+
12391334
@expand(
12401335
[
12411336
# Test case 1: Basic int8 case with negative scale

backends/xnnpack/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,4 +134,4 @@ create an issue on [github](https://www.github.com/pytorch/executorch/issues).
134134
## See Also
135135
For more information about the XNNPACK Backend, please check out the following resources:
136136
- [XNNPACK Backend](https://pytorch.org/executorch/main/backends-xnnpack)
137-
- [XNNPACK Backend Internals](https://pytorch.org/executorch/main/backend-delegates-xnnpack-reference)
137+
- [XNNPACK Backend Internals](https://pytorch.org/executorch/main/backends/xnnpack/backend-delegates-xnnpack-reference)

docs/source/android-backends.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ Available hardware acceleration backends for Android deployment.
1616
- {doc}`android-qualcomm` — Qualcomm AI Engine (NPU)
1717
- {doc}`android-mediatek` — MediaTek NPU acceleration
1818
- {doc}`android-arm-vgf` — ARM VGF Backend
19-
- {doc}`android-samsung-exynos` — Samsung Exynos NPU
19+
- {doc}`backends/samsung/samsung-overview` — Samsung Exynos NPU
2020

2121
```{toctree}
2222
:hidden:
@@ -25,4 +25,4 @@ android-vulkan
2525
android-qualcomm
2626
android-mediatek
2727
android-arm-vgf
28-
android-samsung-exynos
28+
backends/samsung/samsung-overview

docs/source/android-xnnpack.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
```{include} backends-xnnpack.md
1+
```{include} backends/xnnpack/xnnpack-overview.md

0 commit comments

Comments
 (0)