Skip to content

Commit 123bb35

Browse files
authored
Merge branch 'main' into export-D80308822
2 parents 77cd291 + 3bb42ec commit 123bb35

File tree

68 files changed

+1993
-422
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+1993
-422
lines changed

.github/workflows/stale

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
# The behavior is:
2+
# - If a PR is not labeled stale, after 60 days inactivity label the PR as stale and comment about it.
3+
# - If a PR is labeled stale, after 30 days inactivity close the PR.
4+
# - `high priority` and `no-stale` PRs are exempt.
5+
6+
name: Close stale pull requests
7+
8+
on:
9+
schedule:
10+
# Run daily at 00:30 UTC.
11+
- cron: '30 0 * * *'
12+
workflow_dispatch:
13+
14+
jobs:
15+
stale:
16+
if: ${{ github.repository == 'pytorch/executorch' }}
17+
runs-on: linux.large
18+
permissions:
19+
contents: read
20+
pull-requests: write
21+
22+
steps:
23+
- uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
24+
with:
25+
script: |
26+
// Do some dumb retries on requests.
27+
const retries = 7;
28+
const baseBackoff = 100;
29+
const sleep = timeout => new Promise(resolve => setTimeout(resolve, timeout));
30+
github.hook.wrap('request', async (request, options) => {
31+
for (let attempt = 1; attempt <= retries; attempt++) {
32+
try {
33+
return await request(options);
34+
} catch (err) {
35+
if (attempt < retries) {
36+
core.warning(`Request getting retried. Attempt: ${attempt}`);
37+
await sleep(baseBackoff * Math.pow(2, attempt));
38+
continue;
39+
}
40+
throw err;
41+
}
42+
}
43+
});
44+
45+
const MAX_API_REQUESTS = 100;
46+
47+
// If a PRs not labeled stale, label them stale after no update for 60 days.
48+
const STALE_LABEL_THRESHOLD_MS = 1000 * 60 * 60 * 24 * 60;
49+
// For PRs already labeled stale, close after not update for 30 days.
50+
const STALE_CLOSE_THRESHOLD_MS = 1000 * 60 * 60 * 24 * 30;
51+
52+
const STALE_MESSAGE =
53+
"Looks like this PR hasn't been updated in a while so we're going to go ahead and mark this as `Stale`. <br>" +
54+
"Feel free to remove the `Stale` label if you feel this was a mistake. <br>" +
55+
"If you are unable to remove the `Stale` label please contact a maintainer in order to do so. <br>" +
56+
"If you want the bot to never mark this PR stale again, add the `no-stale` label.<br>" +
57+
"`Stale` pull requests will automatically be closed after 30 days of inactivity.<br>";
58+
59+
let numAPIRequests = 0;
60+
let numProcessed = 0;
61+
62+
async function processPull(pull) {
63+
core.info(`[${pull.number}] URL: ${pull.html_url}`);
64+
numProcessed += 1;
65+
const labels = pull.labels.map((label) => label.name);
66+
67+
// Skip if certain labels are present.
68+
if (labels.includes("no-stale") || labels.includes("high priority")) {
69+
core.info(`[${pull.number}] Skipping because PR has an exempting label.`);
70+
return false;
71+
}
72+
73+
// Check if the PR is stale, according to our configured thresholds.
74+
let staleThresholdMillis;
75+
if (labels.includes("Stale")) {
76+
core.info(`[${pull.number}] PR is labeled stale, checking whether we should close it.`);
77+
staleThresholdMillis = STALE_CLOSE_THRESHOLD_MS;
78+
} else {
79+
core.info(`[${pull.number}] Checking whether to label PR as stale.`);
80+
staleThresholdMillis = STALE_LABEL_THRESHOLD_MS;
81+
}
82+
83+
const millisSinceLastUpdated =
84+
new Date().getTime() - new Date(pull.updated_at).getTime();
85+
86+
if (millisSinceLastUpdated < staleThresholdMillis) {
87+
core.info(`[${pull.number}] Skipping because PR was updated recently`);
88+
return false;
89+
}
90+
91+
// At this point, we know we should do something.
92+
// For PRs already labeled stale, close them.
93+
if (labels.includes("Stale")) {
94+
core.info(`[${pull.number}] Closing PR.`);
95+
numAPIRequests += 1;
96+
//await github.rest.issues.update({
97+
//owner: "pytorch",
98+
//repo: "executorch",
99+
//issue_number: pull.number,
100+
//state: "closed",
101+
//});
102+
} else {
103+
// For PRs not labeled stale, label them stale.
104+
core.info(`[${pull.number}] Labeling PR as stale.`);
105+
106+
numAPIRequests += 1;
107+
//await github.rest.issues.createComment({
108+
//owner: "pytorch",
109+
//repo: "executorch",
110+
//issue_number: pull.number,
111+
//body: STALE_MESSAGE,
112+
//});
113+
114+
numAPIRequests += 1;
115+
//await github.rest.issues.addLabels({
116+
//owner: "pytorch",
117+
//repo: "executorch",
118+
//issue_number: pull.number,
119+
//labels: ["Stale"],
120+
//});
121+
}
122+
}
123+
124+
for await (const response of github.paginate.iterator(
125+
github.rest.pulls.list,
126+
{
127+
owner: "pytorch",
128+
repo: "executorch",
129+
state: "open",
130+
sort: "created",
131+
direction: "asc",
132+
per_page: 100,
133+
}
134+
)) {
135+
numAPIRequests += 1;
136+
const pulls = response.data;
137+
// Awaiting in a loop is intentional here. We want to serialize execution so
138+
// that log groups are printed correctl
139+
for (const pull of pulls) {
140+
if (numAPIRequests > MAX_API_REQUESTS) {
141+
core.warning("Max API requests exceeded, exiting.");
142+
process.exit(0);
143+
}
144+
await core.group(`Processing PR #${pull.number}`, async () => {
145+
await processPull(pull);
146+
});
147+
}
148+
}
149+
core.info(`Processed ${numProcessed} PRs total.`);

backends/cadence/aot/functions.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,21 @@
249249
- arg_meta: null
250250
kernel_name: impl::reference::quantized_relu_asym8u_asym8u_per_tensor_out
251251

252+
- func: cadence::quantized_add.per_tensor_out(Tensor X, float X_scale, int X_zero_point, Tensor Y, float Y_scale, int Y_zero_point, float out_scale, int out_zero_point, *, Tensor(a!) out) -> Tensor(a!)
253+
kernels:
254+
- arg_meta: null
255+
kernel_name: impl::reference::quantized_add_per_tensor_out
256+
257+
- func: cadence::quantized_add_asym8sxasym8s_asym8s.per_tensor_out(Tensor X, float X_scale, int X_zero_point, Tensor Y, float Y_scale, int Y_zero_point, float out_scale, int out_zero_point, *, Tensor(a!) out) -> Tensor(a!)
258+
kernels:
259+
- arg_meta: null
260+
kernel_name: impl::reference::quantized_add_asym8sxasym8s_asym8s_per_tensor_out
261+
262+
- func: cadence::quantized_add_asym8uxasym8u_asym8u.per_tensor_out(Tensor X, float X_scale, int X_zero_point, Tensor Y, float Y_scale, int Y_zero_point, float out_scale, int out_zero_point, *, Tensor(a!) out) -> Tensor(a!)
263+
kernels:
264+
- arg_meta: null
265+
kernel_name: impl::reference::quantized_add_asym8uxasym8u_asym8u_per_tensor_out
266+
252267
- func: cadence::quantized_matmul.out(Tensor X, int X_zero_point, Tensor Y, int Y_zero_point, Tensor? bias, int out_multiplier, int out_shift, int out_zero_point, bool transposed, *, Tensor(a!) out) -> Tensor(a!)
253268
kernels:
254269
- arg_meta: null

backends/cadence/aot/functions_hifi.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,16 @@
404404
- arg_meta: null
405405
kernel_name: cadence::impl::HiFi::quantized_relu_asym8u_asym8u_per_tensor_out
406406

407+
- func: cadence::quantized_add_asym8sxasym8s_asym8s.per_tensor_out(Tensor X, float X_scale, int X_zero_point, Tensor Y, float Y_scale, int Y_zero_point, float out_scale, int out_zero_point, *, Tensor(a!) out) -> Tensor(a!)
408+
kernels:
409+
- arg_meta: null
410+
kernel_name: cadence::impl::HiFi::quantized_add_asym8sxasym8s_asym8s_per_tensor_out
411+
412+
- func: cadence::quantized_add_asym8uxasym8u_asym8u.per_tensor_out(Tensor X, float X_scale, int X_zero_point, Tensor Y, float Y_scale, int Y_zero_point, float out_scale, int out_zero_point, *, Tensor(a!) out) -> Tensor(a!)
413+
kernels:
414+
- arg_meta: null
415+
kernel_name: cadence::impl::HiFi::quantized_add_asym8uxasym8u_asym8u_per_tensor_out
416+
407417
- func: cadence::quantized_matmul.out(Tensor X, int X_zero_point, Tensor Y, int Y_zero_point, Tensor? bias, int out_multiplier, int out_shift, int out_zero_point, bool transposed, *, Tensor(a!) out) -> Tensor(a!)
408418
kernels:
409419
- arg_meta: null

backends/cadence/aot/memory_planning.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,9 @@ def plan_spec(
116116
Greedily place the spec in the first memory that can fit it.
117117
"""
118118
for spec.mem_id in range(1, self.get_num_memories()):
119+
if placement_constraints.is_mem_id_in_blocklist(spec, spec.mem_id):
120+
# Skip placement for blocked memory id.
121+
continue
119122
prev_offset, smallest_gap = 0, float("inf")
120123
for allocated_spec in state.allocated_buffers[spec.mem_id]:
121124
if not Verifier.lifetime_overlap(spec, allocated_spec):
@@ -141,11 +144,11 @@ def plan_spec(
141144
)
142145
if spec.mem_offset is None:
143146
spec.mem_offset = prev_offset
144-
if not self.is_valid_placement(spec, placement_constraints):
145-
spec.mem_offset = None
146-
continue
147-
else:
148-
spec.mem_offset = prev_offset
147+
148+
if not self.is_valid_placement(spec, placement_constraints):
149+
# Skip placement for invalid memory id.
150+
spec.mem_offset = None
151+
continue
149152

150153
state.place_spec(spec)
151154
# A data structure used for maintaining the tensor order

backends/cadence/aot/memory_planning_algo.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ def _place_memory_id_pinned_specs(
204204
for spec, c in spec_with_abs_constraint.items()
205205
if c is not None and c.pinned_memory_id == mem_id and c.offset is None
206206
}
207-
logging.error(f"Placing specs {mem_id_pinned_specs} for {mem_id=}")
207+
logging.debug(f"Placing specs {mem_id_pinned_specs} for {mem_id=}")
208208

209209
with self.block_memories_except(mem_id):
210210
self.plan(
@@ -220,7 +220,7 @@ def _place_memory_id_pinned_specs(
220220
if constraint is None:
221221
continue
222222

223-
logging.error(f"Placing spec {spec} with {constraint}")
223+
logging.debug(f"Placing spec {spec} with {constraint}")
224224

225225
if not state.is_placed(spec):
226226
raise MemoryError(

backends/cadence/aot/ops_registrations.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,22 @@
325325
"quantized_add.per_tensor_out(Tensor X, float X_scale, int X_zero_point, Tensor Y, float Y_scale, "
326326
"int Y_zero_point, float out_scale, int out_zero_point, *, Tensor(a!) out) -> Tensor(a!)"
327327
)
328+
lib.define(
329+
"quantized_add_asym8sxasym8s_asym8s.per_tensor(Tensor X, float X_scale, int X_zero_point, Tensor Y, float Y_scale, "
330+
"int Y_zero_point, float out_scale, int out_zero_point) -> Tensor"
331+
)
332+
lib.define(
333+
"quantized_add_asym8sxasym8s_asym8s.per_tensor_out(Tensor X, float X_scale, int X_zero_point, Tensor Y, float Y_scale, "
334+
"int Y_zero_point, float out_scale, int out_zero_point, *, Tensor(a!) out) -> Tensor(a!)"
335+
)
336+
lib.define(
337+
"quantized_add_asym8uxasym8u_asym8u.per_tensor(Tensor X, float X_scale, int X_zero_point, Tensor Y, float Y_scale, "
338+
"int Y_zero_point, float out_scale, int out_zero_point) -> Tensor"
339+
)
340+
lib.define(
341+
"quantized_add_asym8uxasym8u_asym8u.per_tensor_out(Tensor X, float X_scale, int X_zero_point, Tensor Y, float Y_scale, "
342+
"int Y_zero_point, float out_scale, int out_zero_point, *, Tensor(a!) out) -> Tensor(a!)"
343+
)
328344
lib.define(
329345
"quantized_mul.out(Tensor X, Tensor X_scale, Tensor X_zero_point, Tensor Y, Tensor Y_scale, "
330346
"Tensor Y_zero_point, float out_scale, int out_zero_point, *, Tensor(a!) out) -> Tensor(a!)"
@@ -503,6 +519,36 @@ def quantized_add_per_tensor_meta(
503519
return X.new_empty(out_size, dtype=X.dtype)
504520

505521

522+
@register_fake("cadence::quantized_add_asym8sxasym8s_asym8s.per_tensor")
523+
def quantized_add_asym8sxasym8s_asym8s_per_tensor_meta(
524+
X: torch.Tensor,
525+
X_scale: float,
526+
X_zero_point: int,
527+
Y: torch.Tensor,
528+
Y_scale: float,
529+
Y_zero_point: int,
530+
out_scale: float,
531+
out_zero_point: int,
532+
) -> torch.Tensor:
533+
out_size = torch.broadcast_shapes(X.size(), Y.size())
534+
return X.new_empty(out_size, dtype=X.dtype)
535+
536+
537+
@register_fake("cadence::quantized_add_asym8uxasym8u_asym8u.per_tensor")
538+
def quantized_add_asym8uxasym8u_asym8u_per_tensor_meta(
539+
X: torch.Tensor,
540+
X_scale: float,
541+
X_zero_point: int,
542+
Y: torch.Tensor,
543+
Y_scale: float,
544+
Y_zero_point: int,
545+
out_scale: float,
546+
out_zero_point: int,
547+
) -> torch.Tensor:
548+
out_size = torch.broadcast_shapes(X.size(), Y.size())
549+
return X.new_empty(out_size, dtype=X.dtype)
550+
551+
506552
@register_fake("cadence::quantized_linear")
507553
def quantized_linear_meta(
508554
src: torch.Tensor,

backends/cadence/aot/tests/test_memory_passes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1044,7 +1044,7 @@ class DummyMemIdBlockConstraintGen(PassBase):
10441044
mul: blocks 1, 3
10451045
"""
10461046

1047-
def __init__(self, memory_constraints: MemoryConfig):
1047+
def __init__(self, memory_constraints: MemConstraints):
10481048
self.memory_constraints = memory_constraints
10491049

10501050
def call(self, graph_module: torch.fx.GraphModule) -> PassResult:

backends/cadence/aot/tests/test_type_dispatch_passes.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -445,3 +445,53 @@ def test_uint8_dispatch_quantized_conv_nhwc_dilated(self) -> None:
445445
),
446446
1,
447447
)
448+
449+
def test_int8_dispatch_quantized_add(self) -> None:
450+
"""Test int8 x int8 inputs should dispatch to asym8sxasym8s_asym8s variant for quantized_add"""
451+
x = torch.randint(-128, 127, (2, 3), dtype=torch.int8)
452+
y = torch.randint(-128, 127, (2, 3), dtype=torch.int8)
453+
gm = single_op_builder(
454+
placeholders=(x, y),
455+
op=exir_ops.edge.cadence.quantized_add.per_tensor,
456+
args=(x, 1.0, 0, y, 1.0, 0, 1.0, 0),
457+
)
458+
p = CompileTimeTypeDispatchPass()
459+
gm = cast(PassResult, p(gm)).graph_module
460+
# Original op should be replaced
461+
self.assertEqual(
462+
count_node(gm, exir_ops.edge.cadence.quantized_add.per_tensor),
463+
0,
464+
)
465+
# Should be replaced with int8 specific variant
466+
self.assertEqual(
467+
count_node(
468+
gm,
469+
exir_ops.edge.cadence.quantized_add_asym8sxasym8s_asym8s.per_tensor,
470+
),
471+
1,
472+
)
473+
474+
def test_uint8_dispatch_quantized_add(self) -> None:
475+
"""Test uint8 x uint8 inputs should dispatch to asym8uxasym8u_asym8u variant for quantized_add"""
476+
x = torch.randint(0, 255, (2, 3), dtype=torch.uint8)
477+
y = torch.randint(0, 255, (2, 3), dtype=torch.uint8)
478+
gm = single_op_builder(
479+
placeholders=(x, y),
480+
op=exir_ops.edge.cadence.quantized_add.per_tensor,
481+
args=(x, 1.0, 0, y, 1.0, 0, 1.0, 0),
482+
)
483+
p = CompileTimeTypeDispatchPass()
484+
gm = cast(PassResult, p(gm)).graph_module
485+
# Original op should be replaced
486+
self.assertEqual(
487+
count_node(gm, exir_ops.edge.cadence.quantized_add.per_tensor),
488+
0,
489+
)
490+
# Should be replaced with uint8 specific variant
491+
self.assertEqual(
492+
count_node(
493+
gm,
494+
exir_ops.edge.cadence.quantized_add_asym8uxasym8u_asym8u.per_tensor,
495+
),
496+
1,
497+
)

backends/cadence/aot/type_dispatch.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,14 @@ class CompileTimeTypeDispatchPass(ExportPass):
8585
(torch.uint8,): "asym8u_asym8u",
8686
},
8787
),
88+
exir_ops.edge.cadence.quantized_add.per_tensor: OpConfig(
89+
"quantized_add",
90+
type_dispatch_suffixes={
91+
(torch.int8, torch.int8): "asym8sxasym8s_asym8s",
92+
(torch.uint8, torch.uint8): "asym8uxasym8u_asym8u",
93+
},
94+
weight_arg_idx=3,
95+
),
8896
}
8997

9098
def call_operator(

backends/cadence/fusion_g3/operators/op_clamp.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ bool is_out_of_bounds(CTYPE_VAL val) {
4545
}
4646

4747
ET_NODISCARD bool check_bounds(
48+
KernelRuntimeContext& ctx,
4849
const Scalar& val_scalar,
4950
const ScalarType& val_type,
5051
const ScalarType& out_type,
@@ -107,14 +108,14 @@ Tensor& clamp_out(
107108
if (has_min) {
108109
ET_KERNEL_CHECK(
109110
ctx,
110-
check_bounds(min_opt.value(), min_type, out_type, "minimum"),
111+
check_bounds(ctx, min_opt.value(), min_type, out_type, "minimum"),
111112
InvalidArgument,
112113
out);
113114
}
114115
if (has_max) {
115116
ET_KERNEL_CHECK(
116117
ctx,
117-
check_bounds(max_opt.value(), max_type, out_type, "maximum"),
118+
check_bounds(ctx, max_opt.value(), max_type, out_type, "maximum"),
118119
InvalidArgument,
119120
out);
120121
}

0 commit comments

Comments
 (0)