Skip to content

Commit 600cf8a

Browse files
committed
Update
[ghstack-poisoned]
2 parents 8b814bc + ceb875b commit 600cf8a

15 files changed

+405
-42
lines changed

.ci/scripts/backend-test-linux.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,6 @@ conda activate "${CONDA_ENV}"
2020
#source .ci/scripts/setup-vulkan-linux-deps.sh
2121

2222
# We need the runner to test the built library.
23-
.ci/scripts/setup-linux.sh "cmake" "release" "false"
23+
.ci/scripts/setup-linux.sh --build-tool "cmake" --build-mode "release"
2424

2525
python -m executorch.backends.test.suite.runner $SUITE --flow $FLOW --report test_results.csv

.github/workflows/add-unanswered-to-project.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
name: Add Open External Contributor PRs and Issues to PyTorch Org Project 136
22

33
on:
4-
schedule:
5-
- cron: '0 * * * *'
4+
# schedule:
5+
# - cron: '0 * * * *'
66
workflow_dispatch:
77

88
jobs:
@@ -12,7 +12,7 @@ jobs:
1212
- name: Add open issues and open, non-draft PRs to org project (excluding certain authors)
1313
uses: actions/github-script@v7
1414
with:
15-
github-token: ${{ secrets.PYTORCH_PROJECT_PAT }}
15+
github-token: ${{ secrets.GITHUB_TOKEN }}
1616
script: |
1717
const projectId = "PVT_kwDOAUB9vs4A_PUL"; // PyTorch org project 136
1818
const owner = 'pytorch';

backends/cadence/aot/functions.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,16 @@
229229
- arg_meta: null
230230
kernel_name: impl::reference::quantized_linear_per_tensor_out
231231

232+
- func: cadence::quantized_linear_asym8sxasym8s_asym8s.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
233+
kernels:
234+
- arg_meta: null
235+
kernel_name: impl::reference::quantized_linear_asym8sxasym8s_asym8s_per_tensor_out
236+
237+
- func: cadence::quantized_linear_asym8uxasym8u_asym8u.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
238+
kernels:
239+
- arg_meta: null
240+
kernel_name: impl::reference::quantized_linear_asym8uxasym8u_asym8u_per_tensor_out
241+
232242
- func: cadence::im2row.out(Tensor input, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride, Tensor in_zero_point, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)
233243
kernels:
234244
- arg_meta: null

backends/cadence/aot/functions_hifi.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,16 @@
314314
- arg_meta: null
315315
kernel_name: cadence::impl::HiFi::quantized_linear_per_tensor_out
316316

317+
- func: cadence::quantized_linear_asym8sxasym8s_asym8s.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
318+
kernels:
319+
- arg_meta: null
320+
kernel_name: cadence::impl::HiFi::quantized_linear_asym8sxasym8s_asym8s_per_tensor_out
321+
322+
- func: cadence::quantized_linear_asym8uxasym8u_asym8u.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
323+
kernels:
324+
- arg_meta: null
325+
kernel_name: cadence::impl::HiFi::quantized_linear_asym8uxasym8u_asym8u_per_tensor_out
326+
317327
- func: cadence::quantized_relu_per_tensor.out(Tensor X, Tensor X_zero_point, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)
318328
kernels:
319329
- arg_meta: null

backends/cadence/aot/ops_registrations.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,26 @@
5656
lib.define(
5757
"quantized_linear.per_tensor_out(Tensor src, Tensor weight, Tensor bias, SymInt src_zero_point, SymInt weight_zero_point, SymInt out_multiplier, SymInt out_shift, SymInt out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)"
5858
)
59+
lib.define(
60+
"quantized_linear_asym8sxasym8s_asym8s.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, "
61+
"int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)"
62+
)
63+
lib.define(
64+
"quantized_linear_asym8uxasym8u_asym8u.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, "
65+
"int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)"
66+
)
5967
lib.define(
6068
"quantized_linear.per_tensor(Tensor src, Tensor weight, Tensor bias, SymInt src_zero_point, "
6169
"SymInt weight_zero_point, SymInt out_multiplier, SymInt out_shift, SymInt out_zero_point, Tensor? offset) -> Tensor"
6270
)
71+
lib.define(
72+
"quantized_linear_asym8sxasym8s_asym8s.per_tensor(Tensor src, Tensor weight, Tensor bias, int src_zero_point, "
73+
"int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset) -> (Tensor Z)"
74+
)
75+
lib.define(
76+
"quantized_linear_asym8uxasym8u_asym8u.per_tensor(Tensor src, Tensor weight, Tensor bias, int src_zero_point, "
77+
"int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset) -> (Tensor Z)"
78+
)
6379

6480
lib.define(
6581
"quantized_relu(Tensor X, Tensor X_zero_point, int out_zero_point, Tensor out_multiplier, Tensor out_shift) -> (Tensor Y)"
@@ -446,6 +462,50 @@ def quantized_linear_per_tensor_meta(
446462
return src.new_empty(out_size, dtype=src.dtype)
447463

448464

465+
@register_fake("cadence::quantized_linear_asym8sxasym8s_asym8s.per_tensor")
466+
def quantized_linear_asym8sxasym8s_asym8s_per_tensor_meta(
467+
src: torch.Tensor,
468+
weight: torch.Tensor,
469+
bias: torch.Tensor,
470+
in_zero_point: int,
471+
weight_zero_point: int,
472+
out_multiplier: int,
473+
out_shift: int,
474+
out_zero_point: int,
475+
offset: Optional[torch.Tensor],
476+
) -> torch.Tensor:
477+
# src comes in shape [leading_dims, in_dim]
478+
# weight comes in shape [out_dim, in_dim]
479+
# output comes in empty with shape [leading_dims, out_dim]
480+
out_size = list(src.size())
481+
weight_size = list(weight.size())
482+
assert len(weight_size) == 2
483+
out_size[-1] = weight_size[0]
484+
return src.new_empty(out_size, dtype=src.dtype)
485+
486+
487+
@register_fake("cadence::quantized_linear_asym8uxasym8u_asym8u.per_tensor")
488+
def quantized_linear_asym8uxasym8u_asym8u_per_tensor_meta(
489+
src: torch.Tensor,
490+
weight: torch.Tensor,
491+
bias: torch.Tensor,
492+
in_zero_point: int,
493+
weight_zero_point: int,
494+
out_multiplier: int,
495+
out_shift: int,
496+
out_zero_point: int,
497+
offset: Optional[torch.Tensor],
498+
) -> torch.Tensor:
499+
# src comes in shape [leading_dims, in_dim]
500+
# weight comes in shape [out_dim, in_dim]
501+
# output comes in empty with shape [leading_dims, out_dim]
502+
out_size = list(src.size())
503+
weight_size = list(weight.size())
504+
assert len(weight_size) == 2
505+
out_size[-1] = weight_size[0]
506+
return src.new_empty(out_size, dtype=src.dtype)
507+
508+
449509
@register_fake("cadence::quantized_conv")
450510
def quantized_conv_meta(
451511
input: torch.Tensor,

backends/cadence/aot/tests/test_type_dispatch_passes.py

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919

2020
class TestTypeDispatchPasses(unittest.TestCase):
21-
def test_int8_dispatch(self) -> None:
21+
def test_int8_dispatch_quantized_fully_connected(self) -> None:
2222
"""Test int8 x int8 inputs should dispatch to asym8sxasym8s_asym8s variant"""
2323
x = torch.randint(-128, 127, (2, 3), dtype=torch.int8)
2424
w = torch.randint(-128, 127, (4, 3), dtype=torch.int8)
@@ -44,7 +44,7 @@ def test_int8_dispatch(self) -> None:
4444
1,
4545
)
4646

47-
def test_uint8_dispatch(self) -> None:
47+
def test_uint8_dispatch_quantized_fully_connected(self) -> None:
4848
"""Test uint8 x uint8 inputs should dispatch to asym8uxasym8u_asym8u variant"""
4949
x = torch.randint(0, 255, (2, 3), dtype=torch.uint8)
5050
w = torch.randint(0, 255, (4, 3), dtype=torch.uint8)
@@ -70,6 +70,58 @@ def test_uint8_dispatch(self) -> None:
7070
1,
7171
)
7272

73+
def test_int8_dispatch_quantized_linear(self) -> None:
74+
"""Test int8 x int8 inputs should dispatch to asym8sxasym8s_asym8s variant for quantized_linear"""
75+
x = torch.randint(-128, 127, (2, 3), dtype=torch.int8)
76+
w = torch.randint(-128, 127, (4, 3), dtype=torch.int8)
77+
b = torch.randint(-2147483648, 2147483647, (4,), dtype=torch.int32)
78+
gm = single_op_builder(
79+
placeholders=(x, w, b),
80+
op=exir_ops.edge.cadence.quantized_linear.per_tensor,
81+
args=(x, w, b, 0, 0, 1, 0, 0, None),
82+
)
83+
p = CompileTimeTypeDispatchPass()
84+
gm = cast(PassResult, p(gm)).graph_module
85+
# Original op should be replaced
86+
self.assertEqual(
87+
count_node(gm, exir_ops.edge.cadence.quantized_linear.per_tensor),
88+
0,
89+
)
90+
# Should be replaced with int8 specific variant
91+
self.assertEqual(
92+
count_node(
93+
gm,
94+
exir_ops.edge.cadence.quantized_linear_asym8sxasym8s_asym8s.per_tensor,
95+
),
96+
1,
97+
)
98+
99+
def test_uint8_quantized_linear_dispatch(self) -> None:
100+
"""Test uint8 x uint8 inputs should dispatch to asym8uxasym8u_asym8u variant for quantized_linear"""
101+
x = torch.randint(0, 255, (2, 3), dtype=torch.uint8)
102+
w = torch.randint(0, 255, (4, 3), dtype=torch.uint8)
103+
b = torch.randint(-2147483648, 2147483647, (4,), dtype=torch.int32)
104+
gm = single_op_builder(
105+
placeholders=(x, w, b),
106+
op=exir_ops.edge.cadence.quantized_linear.per_tensor,
107+
args=(x, w, b, 0, 0, 1, 0, 0, None),
108+
)
109+
p = CompileTimeTypeDispatchPass()
110+
gm = cast(PassResult, p(gm)).graph_module
111+
# Original op should be replaced
112+
self.assertEqual(
113+
count_node(gm, exir_ops.edge.cadence.quantized_linear.per_tensor),
114+
0,
115+
)
116+
# Should be replaced with uint8 specific variant
117+
self.assertEqual(
118+
count_node(
119+
gm,
120+
exir_ops.edge.cadence.quantized_linear_asym8uxasym8u_asym8u.per_tensor,
121+
),
122+
1,
123+
)
124+
73125
def test_mixed_types_error(self) -> None:
74126
"""Test mixed int8/uint8 inputs should raise RuntimeError"""
75127
x = torch.randint(-128, 127, (2, 3), dtype=torch.int8)

backends/cadence/aot/type_dispatch.py

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -23,40 +23,40 @@ class CompileTimeTypeDispatchPass(ExportPass):
2323
Replaces generic ops with ops that have explicit types.
2424
"""
2525

26+
_TYPE_DISPATCH_MAP: dict[tuple[torch.dtype, torch.dtype], str] = {
27+
(torch.int8, torch.int8): "asym8sxasym8s_asym8s",
28+
(torch.uint8, torch.uint8): "asym8uxasym8u_asym8u",
29+
}
30+
31+
_SUPPORTED_OPS: dict[OpOverload, str] = {
32+
exir_ops.edge.cadence.quantized_fully_connected.per_tensor: "quantized_fully_connected",
33+
exir_ops.edge.cadence.quantized_linear.per_tensor: "quantized_linear",
34+
}
35+
2636
def call_operator(
2737
self,
2838
op: OpOverload,
2939
args: tuple[Argument, ...],
3040
kwargs: dict[str, Argument],
3141
meta: NodeMetadata,
3242
) -> ProxyValue:
33-
if op not in {
34-
exir_ops.edge.cadence.quantized_fully_connected.per_tensor,
35-
}:
43+
if op not in self._SUPPORTED_OPS:
3644
return super().call_operator(op, args, kwargs, meta)
3745

38-
if (
39-
# pyre-ignore[16]: None has no attribute `to_tensor`.
40-
args[0].to_tensor().dtype == torch.int8
41-
and args[1].to_tensor().dtype == torch.int8
42-
):
43-
return super().call_operator(
44-
exir_ops.edge.cadence.quantized_fully_connected_asym8sxasym8s_asym8s.per_tensor,
45-
args,
46-
kwargs,
47-
meta,
48-
)
49-
elif (
50-
args[0].to_tensor().dtype == torch.uint8
51-
and args[1].to_tensor().dtype == torch.uint8
52-
):
53-
return super().call_operator(
54-
exir_ops.edge.cadence.quantized_fully_connected_asym8uxasym8u_asym8u.per_tensor,
55-
args,
56-
kwargs,
57-
meta,
58-
)
59-
else:
46+
# pyre-ignore[16]: None has no attribute `to_tensor`.
47+
input_dtype = args[0].to_tensor().dtype
48+
weight_dtype = args[1].to_tensor().dtype
49+
dtype_pair = (input_dtype, weight_dtype)
50+
51+
if dtype_pair not in self._TYPE_DISPATCH_MAP:
6052
raise RuntimeError(
61-
f"Unsupported input types for {op}: {args[0].to_tensor().dtype} and {args[1].to_tensor().dtype}"
53+
f"Unsupported input types for {op}: {input_dtype} and {weight_dtype}"
6254
)
55+
56+
base_op_name = self._SUPPORTED_OPS[op]
57+
type_suffix = self._TYPE_DISPATCH_MAP[dtype_pair]
58+
59+
typed_op_name = f"{base_op_name}_{type_suffix}"
60+
typed_op = getattr(exir_ops.edge.cadence, typed_op_name).per_tensor
61+
62+
return super().call_operator(typed_op, args, kwargs, meta)
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/backends/cadence/hifi/kernels/kernels.h>
10+
#include <executorch/runtime/kernel/kernel_includes.h>
11+
#include <xa_nnlib_kernels_api.h>
12+
13+
namespace cadence {
14+
namespace impl {
15+
namespace HiFi {
16+
namespace native {
17+
18+
using ::executorch::aten::Tensor;
19+
using ::executorch::runtime::getLeadingDims;
20+
using ::executorch::runtime::KernelRuntimeContext;
21+
using std::optional;
22+
23+
void quantized_linear_asym8sxasym8s_asym8s_per_tensor_out(
24+
__ET_UNUSED KernelRuntimeContext& ctx,
25+
const Tensor& in,
26+
const Tensor& weight,
27+
const Tensor& bias,
28+
int64_t in_zero_point,
29+
int64_t weight_zero_point,
30+
int64_t out_multiplier,
31+
int64_t out_shift,
32+
int64_t out_zero_point,
33+
__ET_UNUSED const optional<Tensor>& offset,
34+
Tensor& out) {
35+
// input comes in shape [leading_dims, in_dim]
36+
// weight comes in shape [out_dim, in_dim]
37+
// output comes in empty with shape [leading_dims, out_dim]
38+
// Perform matrix multiply (M x N) x (N x P)' => M x P
39+
const int64_t leading_dims = getLeadingDims(in, in.dim() - 1);
40+
const int64_t out_dim = weight.size(0); // = out_dim
41+
const int64_t in_dim = weight.size(1); // = in_dim
42+
43+
const int8_t* __restrict__ in_data = in.const_data_ptr<int8_t>();
44+
const int8_t* __restrict__ weight_data = weight.const_data_ptr<int8_t>();
45+
const int32_t* __restrict__ bias_data = bias.const_data_ptr<int32_t>();
46+
int8_t* __restrict__ out_data = out.mutable_data_ptr<int8_t>();
47+
48+
const int32_t out_multipler_int32 = static_cast<int32_t>(out_multiplier);
49+
const int32_t out_shift_int32 = static_cast<int32_t>(out_shift);
50+
51+
// The nnlib kernel to compute quantized linear via matmul.
52+
const int32_t ret = xa_nn_matmul_asym8sxasym8s_asym8s(
53+
out_data, // p_out
54+
weight_data, // p_mat1,
55+
in_data, // p_mat2,
56+
bias_data, // p_bias
57+
out_dim, // rows of p_mat1
58+
in_dim, // cols of p_mat1
59+
in_dim, // row_stride of p_mat1
60+
leading_dims, // vec_count, i.e., rows of p_mat2
61+
in_dim, // vec_offset of p_mat2.
62+
out_dim, // out_offset, i.e., offset of next output element written
63+
1, // out_stride, i.e., stride to go to next output row
64+
-weight_zero_point, // mat1_zero_bias
65+
-in_zero_point, // mat2_zero_bias
66+
out_multipler_int32, // out_multiplier
67+
out_shift_int32, // out_shift
68+
out_zero_point); // out_zero_bias
69+
ET_DCHECK_MSG(ret == 0, "HiFi quantized::linear_per_tensor failed");
70+
}
71+
72+
} // namespace native
73+
} // namespace HiFi
74+
} // namespace impl
75+
} // namespace cadence

0 commit comments

Comments
 (0)