pytorch · zingo · Jul 4, 2025 · Jun 27, 2025 · Jul 3, 2025 · Jul 4, 2025
@@ -23,6 +23,7 @@
 from .convert_squeezes_to_view import ConvertSqueezesToViewPass  # noqa
 from .convert_to_clamp import ConvertToClampPass  # noqa
 from .decompose_acosh_pass import DecomposeAcoshPass  # noqa
+from .decompose_adaptive_avg_pool2d_pass import DecomposeAdaptiveAvgPool2dPass  # noqa
 from .decompose_atan_pass import DecomposeAtanPass  # noqa
 from .decompose_avg_pool2d import DecomposeAvgPool2d  # noqa
 from .decompose_batch_norm_no_stats import DecomposeBatchNormNoStatsPass  # noqa

@@ -26,6 +26,7 @@
     ConvertSqueezesToViewPass,
     ConvertToClampPass,
     DecomposeAcoshPass,
+    DecomposeAdaptiveAvgPool2dPass,
     DecomposeAtanPass,
     DecomposeAvgPool2d,
     DecomposeBatchNormNoStatsPass,
@@ -124,6 +125,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         if self.tosa_spec.is_U55_subset:
             self.add_pass(BroadcastArgsPass())
         self.add_pass(DecomposeLinearPass())
+        self.add_pass(DecomposeAdaptiveAvgPool2dPass())
         self.add_pass(DecomposeAvgPool2d())
         self.add_pass(ComputeConstantOpsAOT(exported_program))
 
@@ -190,6 +192,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(RetraceFoldedDtypesPass())
         self.add_pass(UnsqueezeScalarPlaceholdersPass(exported_program))
         self.add_pass(MatchArgRanksPass(exported_program))
+        self.add_pass(DecomposeAdaptiveAvgPool2dPass())
         self.add_pass(DecomposeAvgPool2d())
         self.add_pass(ComputeConstantOpsAOT(exported_program))
 

@@ -0,0 +1,92 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from math import ceil, floor
+
+import torch
+
+from executorch.backends.arm._passes import ArmPass
+
+from executorch.exir.dialects._ops import ops as exir_ops
+
+edge_ops = (exir_ops.edge.aten._adaptive_avg_pool2d.default,)
+aten_ops = (torch.ops.aten.adaptive_avg_pool2d.default,)
+
+
+def _get_decomposition(op) -> tuple:
+    if op in edge_ops:
+        return (
+            exir_ops.edge.aten.avg_pool2d.default,
+            exir_ops.edge.aten.slice_copy.Tensor,
+            exir_ops.edge.aten.cat.default,
+        )
+    if op in aten_ops:
+        return (
+            torch.ops.aten.avg_pool2d.default,
+            torch.ops.aten.slice_copy.Tensor,
+            torch.ops.aten.cat.default,
+        )
+    raise RuntimeError(f"Unable to get decomposition for op {op}")
+
+
+class DecomposeAdaptiveAvgPool2dPass(ArmPass):
+    """
+    Decomposes AdaptiveAvgPool2d into AvgPool2d operations.
+
+    An input tensor of shape (N, C, H, W) is transformed into an output tensor
+    of shape (N, C, output_size_h, output_size_w).
+
+    The output is of size output_size_h x output_size_w for any input.
+    """
+
+    def call_operator(self, op, args, kwargs, meta, updated=False):
+        if op not in (edge_ops + aten_ops):
+            return super().call_operator(op, args, kwargs, meta, updated)
+
+        avg_pool2d_op, slice_op, cat_op = _get_decomposition(op)
+
+        x = args[0]
+        _, _, input_size_h, input_size_w = x.data.shape
+
+        (output_size_h, output_size_w) = args[1]
+
+        # Vela currently only allows a stride in the interval of [1,3] for AvgPool2d.
+        # To accommodate this, the AvgPool2d op is applied to pooling regions and the results are concatenated.
+
+        res = []
+        for out_i in range(output_size_h):
+            row = []
+            for out_j in range(output_size_w):
+                # Calculate pooling regions
+                start_h = floor(out_i * input_size_h / output_size_h)
+                end_h = ceil((out_i + 1) * input_size_h / output_size_h)
+                start_w = floor(out_j * input_size_w / output_size_w)
+                end_w = ceil((out_j + 1) * input_size_w / output_size_w)
+
+                # Slice along H
+                x_h = super().call_operator(
+                    slice_op, (x, 2, start_h, end_h), kwargs, meta, True
+                )
+                # Slice along W
+                x_hw = super().call_operator(
+                    slice_op, (x_h, 3, start_w, end_w), kwargs, meta, True
+                )
+
+                # Apply avg pooling with kernel size equal to the pooling region
+                kernel_h = end_h - start_h
+                kernel_w = end_w - start_w
+                pool_args = (x_hw, (kernel_h, kernel_w), (1, 1), (0, 0))
+                pooled = super().call_operator(
+                    avg_pool2d_op, pool_args, kwargs, meta, True
+                )
+                row.append(pooled)
+
+            # Concatenate row results along width (dim=3)
+            row_tensor = super().call_operator(cat_op, (row, 3), kwargs, meta, True)
+            res.append(row_tensor)
+
+        # Concatenate all rows along height (dim=2)
+        out = super().call_operator(cat_op, (res, 2), kwargs, meta, True)
+        return out
@@ -249,6 +249,7 @@ def is_node_supported(
             exir_ops.edge.aten.sinh.default,
             exir_ops.edge.aten.atan.default,
             exir_ops.edge.aten.acosh.default,
+            exir_ops.edge.aten._adaptive_avg_pool2d.default,
         ]
 
         return supported

@@ -0,0 +1,163 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Tuple
+
+import torch
+
+from executorch.backends.arm.test import common
+
+from executorch.backends.arm.test.tester.test_pipeline import (
+    EthosU55PipelineBI,
+    EthosU85PipelineBI,
+    TosaPipelineBI,
+    TosaPipelineMI,
+)
+
+exir_op = "executorch_exir_dialects_edge__ops_aten_avg_pool2d_default"
+
+input_t = Tuple[torch.Tensor]
+
+
+class AdaptiveAvgPool2d(torch.nn.AdaptiveAvgPool2d):
+    def forward(self, *args, **kwargs):
+        return super().forward(*args, **kwargs)
+
+
+test_modules = {
+    "output_bigger_than_input_1_to_3": lambda: (
+        AdaptiveAvgPool2d((3, 3)),
+        (torch.rand(1, 3, 1, 1),),
+    ),
+    "output_bigger_than_input_7_to_10": lambda: (
+        AdaptiveAvgPool2d((10, 10)),
+        (torch.rand(1, 3, 7, 7),),
+    ),
+    "output_1x1": lambda: (AdaptiveAvgPool2d((1, 1)), (torch.rand(1, 4, 8, 8),)),
+    "output_2x2": lambda: (AdaptiveAvgPool2d((2, 2)), (torch.rand(1, 4, 10, 10),)),
+    "output_4x4": lambda: (AdaptiveAvgPool2d((4, 4)), (torch.rand(1, 5, 15, 15),)),
+    "output_2x3": lambda: (AdaptiveAvgPool2d((2, 3)), (torch.rand(1, 3, 9, 13),)),
+    "output_h_keep": lambda: (
+        AdaptiveAvgPool2d((2, None)),
+        (torch.rand(1, 3, 10, 16),),
+    ),
+    "output_w_keep": lambda: (
+        AdaptiveAvgPool2d((None, 4)),
+        (torch.rand(1, 3, 14, 20),),
+    ),
+    "output_5x5": lambda: (AdaptiveAvgPool2d((5, 5)), (torch.rand(1, 3, 25, 25),)),
+    "output_3x5": lambda: (AdaptiveAvgPool2d((3, 5)), (torch.rand(1, 3, 15, 20),)),
+    "output_7x1": lambda: (AdaptiveAvgPool2d((7, 1)), (torch.rand(1, 3, 21, 3),)),
+    "output_1x7": lambda: (AdaptiveAvgPool2d((1, 7)), (torch.rand(1, 3, 3, 21),)),
+    "output_3xNone": lambda: (AdaptiveAvgPool2d((3, None)), (torch.rand(1, 3, 9, 24),)),
+    "output_Nonex3": lambda: (AdaptiveAvgPool2d((None, 3)), (torch.rand(1, 3, 24, 9),)),
+    "pool_h_static_w_none": lambda: (
+        AdaptiveAvgPool2d((3, None)),
+        (torch.rand(1, 3, 9, 17),),
+    ),
+    "pool_h_none_w_static": lambda: (
+        AdaptiveAvgPool2d((None, 5)),
+        (torch.rand(1, 3, 15, 25),),
+    ),
+    "identity_pool": lambda: (
+        AdaptiveAvgPool2d((10, 10)),
+        (torch.rand(1, 3, 10, 10),),
+    ),
+    "non_divisible_5x5_from_17x17": lambda: (
+        AdaptiveAvgPool2d((5, 5)),
+        (torch.rand(1, 3, 17, 17),),
+    ),
+    "pool_height_only": lambda: (
+        AdaptiveAvgPool2d((1, 6)),
+        (torch.rand(1, 3, 12, 6),),
+    ),
+    "pool_width_only": lambda: (
+        AdaptiveAvgPool2d((6, 1)),
+        (torch.rand(1, 3, 6, 12),),
+    ),
+    "extreme_input_large": lambda: (
+        AdaptiveAvgPool2d((1, 1)),
+        (torch.rand(1, 3, 128, 128),),
+    ),
+    "single_channel_input": lambda: (
+        AdaptiveAvgPool2d((4, 4)),
+        (torch.rand(1, 1, 16, 16),),
+    ),
+    "high_channel_count": lambda: (
+        AdaptiveAvgPool2d((2, 2)),
+        (torch.rand(1, 1024, 32, 32),),
+    ),
+    # Common input/output sizes found in models
+    "output_7x7_from_14x14": lambda: (
+        AdaptiveAvgPool2d((7, 7)),
+        (torch.rand(1, 512, 14, 14),),
+    ),
+    "output_1x1_from_8x8": lambda: (
+        AdaptiveAvgPool2d((1, 1)),
+        (torch.rand(1, 2048, 8, 8),),
+    ),
+    "output_1x1_from_19": lambda: (
+        AdaptiveAvgPool2d((1, 1)),
+        (torch.rand(1, 2560, 19, 19),),
+    ),
+    "output_1x1_from_7x7": lambda: (
+        AdaptiveAvgPool2d((1, 1)),
+        (torch.rand(1, 1280, 7, 7),),
+    ),
+}
+
+
+@common.parametrize("test_module", test_modules)
+def test_adaptive_avg_pool2d_tosa_MI(test_module):
+    model, input_tensor = test_module()
+
+    pipeline = TosaPipelineMI[input_t](
+        model,
+        input_tensor,
+        aten_op=[],
+        exir_op=exir_op,
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_module", test_modules)
+def test_adaptive_avg_pool2d_tosa_BI(test_module):
+    model, input_tensor = test_module()
+
+    pipeline = TosaPipelineBI[input_t](
+        model,
+        input_tensor,
+        aten_op=[],
+        exir_op=exir_op,
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_module", test_modules)
+@common.XfailIfNoCorstone300
+def test_adaptive_avg_pool2d_u55_BI(test_module):
+    model, input_tensor = test_module()
+
+    pipeline = EthosU55PipelineBI[input_t](
+        model,
+        input_tensor,
+        aten_ops=[],
+        exir_ops=exir_op,
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_module", test_modules)
+@common.XfailIfNoCorstone320
+def test_adaptive_avg_pool2d_u85_BI(test_module):
+    model, input_tensor = test_module()
+
+    pipeline = EthosU85PipelineBI[input_t](
+        model,
+        input_tensor,
+        aten_ops=[],
+        exir_ops=exir_op,
+    )
+    pipeline.run()