Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions backends/arm/_passes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from .convert_squeezes_to_view import ConvertSqueezesToViewPass # noqa
from .convert_to_clamp import ConvertToClampPass # noqa
from .decompose_acosh_pass import DecomposeAcoshPass # noqa
from .decompose_adaptive_avg_pool2d_pass import DecomposeAdaptiveAvgPool2dPass # noqa
from .decompose_atan_pass import DecomposeAtanPass # noqa
from .decompose_avg_pool2d import DecomposeAvgPool2d # noqa
from .decompose_batch_norm_no_stats import DecomposeBatchNormNoStatsPass # noqa
Expand Down
3 changes: 3 additions & 0 deletions backends/arm/_passes/arm_pass_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
ConvertSqueezesToViewPass,
ConvertToClampPass,
DecomposeAcoshPass,
DecomposeAdaptiveAvgPool2dPass,
DecomposeAtanPass,
DecomposeAvgPool2d,
DecomposeBatchNormNoStatsPass,
Expand Down Expand Up @@ -124,6 +125,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
if self.tosa_spec.is_U55_subset:
self.add_pass(BroadcastArgsPass())
self.add_pass(DecomposeLinearPass())
self.add_pass(DecomposeAdaptiveAvgPool2dPass())
self.add_pass(DecomposeAvgPool2d())
self.add_pass(ComputeConstantOpsAOT(exported_program))

Expand Down Expand Up @@ -190,6 +192,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
self.add_pass(RetraceFoldedDtypesPass())
self.add_pass(UnsqueezeScalarPlaceholdersPass(exported_program))
self.add_pass(MatchArgRanksPass(exported_program))
self.add_pass(DecomposeAdaptiveAvgPool2dPass())
self.add_pass(DecomposeAvgPool2d())
self.add_pass(ComputeConstantOpsAOT(exported_program))

Expand Down
92 changes: 92 additions & 0 deletions backends/arm/_passes/decompose_adaptive_avg_pool2d_pass.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Copyright 2025 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from math import ceil, floor

import torch

from executorch.backends.arm._passes import ArmPass

from executorch.exir.dialects._ops import ops as exir_ops

edge_ops = (exir_ops.edge.aten._adaptive_avg_pool2d.default,)
aten_ops = (torch.ops.aten.adaptive_avg_pool2d.default,)


def _get_decomposition(op) -> tuple:
if op in edge_ops:
return (
exir_ops.edge.aten.avg_pool2d.default,
exir_ops.edge.aten.slice_copy.Tensor,
exir_ops.edge.aten.cat.default,
)
if op in aten_ops:
return (
torch.ops.aten.avg_pool2d.default,
torch.ops.aten.slice_copy.Tensor,
torch.ops.aten.cat.default,
)
raise RuntimeError(f"Unable to get decomposition for op {op}")


class DecomposeAdaptiveAvgPool2dPass(ArmPass):
"""
Decomposes AdaptiveAvgPool2d into AvgPool2d operations.

An input tensor of shape (N, C, H, W) is transformed into an output tensor
of shape (N, C, output_size_h, output_size_w).

The output is of size output_size_h x output_size_w for any input.
"""

def call_operator(self, op, args, kwargs, meta, updated=False):
if op not in (edge_ops + aten_ops):
return super().call_operator(op, args, kwargs, meta, updated)

avg_pool2d_op, slice_op, cat_op = _get_decomposition(op)

x = args[0]
_, _, input_size_h, input_size_w = x.data.shape

(output_size_h, output_size_w) = args[1]

# Vela currently only allows a stride in the interval of [1,3] for AvgPool2d.
# To accommodate this, the AvgPool2d op is applied to pooling regions and the results are concatenated.

res = []
for out_i in range(output_size_h):
row = []
for out_j in range(output_size_w):
# Calculate pooling regions
start_h = floor(out_i * input_size_h / output_size_h)
end_h = ceil((out_i + 1) * input_size_h / output_size_h)
start_w = floor(out_j * input_size_w / output_size_w)
end_w = ceil((out_j + 1) * input_size_w / output_size_w)

# Slice along H
x_h = super().call_operator(
slice_op, (x, 2, start_h, end_h), kwargs, meta, True
)
# Slice along W
x_hw = super().call_operator(
slice_op, (x_h, 3, start_w, end_w), kwargs, meta, True
)

# Apply avg pooling with kernel size equal to the pooling region
kernel_h = end_h - start_h
kernel_w = end_w - start_w
pool_args = (x_hw, (kernel_h, kernel_w), (1, 1), (0, 0))
pooled = super().call_operator(
avg_pool2d_op, pool_args, kwargs, meta, True
)
row.append(pooled)

# Concatenate row results along width (dim=3)
row_tensor = super().call_operator(cat_op, (row, 3), kwargs, meta, True)
res.append(row_tensor)

# Concatenate all rows along height (dim=2)
out = super().call_operator(cat_op, (res, 2), kwargs, meta, True)
return out
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ def is_node_supported(
exir_ops.edge.aten.sinh.default,
exir_ops.edge.aten.atan.default,
exir_ops.edge.aten.acosh.default,
exir_ops.edge.aten._adaptive_avg_pool2d.default,
]

return supported
Expand Down
163 changes: 163 additions & 0 deletions backends/arm/test/ops/test_adaptive_avg_pool2d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
# Copyright 2025 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from typing import Tuple

import torch

from executorch.backends.arm.test import common

from executorch.backends.arm.test.tester.test_pipeline import (
EthosU55PipelineBI,
EthosU85PipelineBI,
TosaPipelineBI,
TosaPipelineMI,
)

exir_op = "executorch_exir_dialects_edge__ops_aten_avg_pool2d_default"

input_t = Tuple[torch.Tensor]


class AdaptiveAvgPool2d(torch.nn.AdaptiveAvgPool2d):
def forward(self, *args, **kwargs):
return super().forward(*args, **kwargs)


test_modules = {
"output_bigger_than_input_1_to_3": lambda: (
AdaptiveAvgPool2d((3, 3)),
(torch.rand(1, 3, 1, 1),),
),
"output_bigger_than_input_7_to_10": lambda: (
AdaptiveAvgPool2d((10, 10)),
(torch.rand(1, 3, 7, 7),),
),
"output_1x1": lambda: (AdaptiveAvgPool2d((1, 1)), (torch.rand(1, 4, 8, 8),)),
"output_2x2": lambda: (AdaptiveAvgPool2d((2, 2)), (torch.rand(1, 4, 10, 10),)),
"output_4x4": lambda: (AdaptiveAvgPool2d((4, 4)), (torch.rand(1, 5, 15, 15),)),
"output_2x3": lambda: (AdaptiveAvgPool2d((2, 3)), (torch.rand(1, 3, 9, 13),)),
"output_h_keep": lambda: (
AdaptiveAvgPool2d((2, None)),
(torch.rand(1, 3, 10, 16),),
),
"output_w_keep": lambda: (
AdaptiveAvgPool2d((None, 4)),
(torch.rand(1, 3, 14, 20),),
),
"output_5x5": lambda: (AdaptiveAvgPool2d((5, 5)), (torch.rand(1, 3, 25, 25),)),
"output_3x5": lambda: (AdaptiveAvgPool2d((3, 5)), (torch.rand(1, 3, 15, 20),)),
"output_7x1": lambda: (AdaptiveAvgPool2d((7, 1)), (torch.rand(1, 3, 21, 3),)),
"output_1x7": lambda: (AdaptiveAvgPool2d((1, 7)), (torch.rand(1, 3, 3, 21),)),
"output_3xNone": lambda: (AdaptiveAvgPool2d((3, None)), (torch.rand(1, 3, 9, 24),)),
"output_Nonex3": lambda: (AdaptiveAvgPool2d((None, 3)), (torch.rand(1, 3, 24, 9),)),
"pool_h_static_w_none": lambda: (
AdaptiveAvgPool2d((3, None)),
(torch.rand(1, 3, 9, 17),),
),
"pool_h_none_w_static": lambda: (
AdaptiveAvgPool2d((None, 5)),
(torch.rand(1, 3, 15, 25),),
),
"identity_pool": lambda: (
AdaptiveAvgPool2d((10, 10)),
(torch.rand(1, 3, 10, 10),),
),
"non_divisible_5x5_from_17x17": lambda: (
AdaptiveAvgPool2d((5, 5)),
(torch.rand(1, 3, 17, 17),),
),
"pool_height_only": lambda: (
AdaptiveAvgPool2d((1, 6)),
(torch.rand(1, 3, 12, 6),),
),
"pool_width_only": lambda: (
AdaptiveAvgPool2d((6, 1)),
(torch.rand(1, 3, 6, 12),),
),
"extreme_input_large": lambda: (
AdaptiveAvgPool2d((1, 1)),
(torch.rand(1, 3, 128, 128),),
),
"single_channel_input": lambda: (
AdaptiveAvgPool2d((4, 4)),
(torch.rand(1, 1, 16, 16),),
),
"high_channel_count": lambda: (
AdaptiveAvgPool2d((2, 2)),
(torch.rand(1, 1024, 32, 32),),
),
# Common input/output sizes found in models
"output_7x7_from_14x14": lambda: (
AdaptiveAvgPool2d((7, 7)),
(torch.rand(1, 512, 14, 14),),
),
"output_1x1_from_8x8": lambda: (
AdaptiveAvgPool2d((1, 1)),
(torch.rand(1, 2048, 8, 8),),
),
"output_1x1_from_19": lambda: (
AdaptiveAvgPool2d((1, 1)),
(torch.rand(1, 2560, 19, 19),),
),
"output_1x1_from_7x7": lambda: (
AdaptiveAvgPool2d((1, 1)),
(torch.rand(1, 1280, 7, 7),),
),
}


@common.parametrize("test_module", test_modules)
def test_adaptive_avg_pool2d_tosa_MI(test_module):
model, input_tensor = test_module()

pipeline = TosaPipelineMI[input_t](
model,
input_tensor,
aten_op=[],
exir_op=exir_op,
)
pipeline.run()


@common.parametrize("test_module", test_modules)
def test_adaptive_avg_pool2d_tosa_BI(test_module):
model, input_tensor = test_module()

pipeline = TosaPipelineBI[input_t](
model,
input_tensor,
aten_op=[],
exir_op=exir_op,
)
pipeline.run()


@common.parametrize("test_module", test_modules)
@common.XfailIfNoCorstone300
def test_adaptive_avg_pool2d_u55_BI(test_module):
model, input_tensor = test_module()

pipeline = EthosU55PipelineBI[input_t](
model,
input_tensor,
aten_ops=[],
exir_ops=exir_op,
)
pipeline.run()


@common.parametrize("test_module", test_modules)
@common.XfailIfNoCorstone320
def test_adaptive_avg_pool2d_u85_BI(test_module):
model, input_tensor = test_module()

pipeline = EthosU85PipelineBI[input_t](
model,
input_tensor,
aten_ops=[],
exir_ops=exir_op,
)
pipeline.run()
Loading