Skip to content

Commit 85b91a4

Browse files
authored
Milestone2.1: Partition to_dim_order_copy op in XNN delegate (#11286)
### Summary Add to_dim_order_copy op to the partitioner in XNN delegate and delegate dim order conversions (to_dim_order_copy op) manually initiated by user using .to(memory_format=<format>). By incorporating this operation into the partitioner, we can optimize the processing of dimension order conversions, leading to faster execution times and enhanced overall performance. ### Test plan I added a test that manually invoked the .to(memory_format=<format>) method and confirmed that it was partitioned. I did this by checking that the graph after lowering made a delegate call for the to_copy op instead of calling the default op.
1 parent bfd4957 commit 85b91a4

File tree

5 files changed

+139
-0
lines changed

5 files changed

+139
-0
lines changed

backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,11 @@ def call(self, graph_module: torch.fx.GraphModule): # noqa: C901
395395
# The node requires nchw inputs
396396
for input_node in node.all_input_nodes:
397397
self.input_to_nchw(graph_module, input_node, node)
398+
elif node.target == exir_ops.edge.aten._to_copy.default:
399+
if node.kwargs["memory_format"] == torch.channels_last:
400+
self.mark_as_nhwc_node(node)
401+
else:
402+
self.mark_as_nchw_node(node)
398403
else:
399404
# The node can have inputs in any format (but all must be the
400405
# same format)

backends/xnnpack/partition/config/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
SquareRootConfig,
5151
SubConfig,
5252
TanhConfig,
53+
ToDimOrderCopyConfig,
5354
UpsampleBilinear2dConfig,
5455
)
5556
from executorch.backends.xnnpack.partition.config.node_configs import (
@@ -102,6 +103,8 @@
102103
ReciprocalSquareRootConfig,
103104
ReLUConfig,
104105
TanhConfig,
106+
ToDimOrderCopyConfig,
107+
# SDPAConfig, TODO: D60553559: preserving SDPA for fairseq fails
105108
SigmoidConfig,
106109
SliceCopyConfig,
107110
SoftmaxConfig,

backends/xnnpack/partition/config/generic_node_configs.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,35 @@ def supported_precision_types(self) -> List[ConfigPrecisionType]:
397397
return [ConfigPrecisionType.FP32]
398398

399399

400+
class ToDimOrderCopyConfig(GenericNodePartitionerConfig):
401+
target_name = "_to_dim_order_copy.default"
402+
403+
def check_constraints(self, node: torch.fx.Node, ep: ExportedProgram) -> bool:
404+
"""
405+
Only support dim order conversion partitioning, not DType conversions
406+
"""
407+
if not self.check_common_constraints(node, ep):
408+
return False
409+
410+
# Get input node and compare dtypes
411+
input_node = get_input_node(node, 0)
412+
input_dtype = input_node.meta["val"].dtype
413+
output_dtype = node.meta["val"].dtype
414+
415+
# Return False if doing dtype conversion
416+
if input_dtype != output_dtype:
417+
why(
418+
node,
419+
reason=f"dtype conversion from {input_dtype} to {output_dtype} is not supported",
420+
)
421+
return False
422+
423+
return True
424+
425+
def supported_precision_types(self) -> List[ConfigPrecisionType]:
426+
return [ConfigPrecisionType.FP32]
427+
428+
400429
class MeanDimConfig(GenericNodePartitionerConfig):
401430
target_name = "mean.dim"
402431

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
import unittest
8+
9+
import torch
10+
11+
from executorch.backends.xnnpack.test.tester import Tester
12+
13+
14+
class TestChannelsLastTaggedReshapePass(unittest.TestCase):
15+
def setUp(self):
16+
torch._dynamo.reset()
17+
18+
def run_tester(self, module, inputs):
19+
tester = Tester(
20+
module.eval(),
21+
inputs,
22+
)
23+
tester.export().to_edge_transform_and_lower().to_executorch().serialize().run_method_and_compare_outputs()
24+
25+
class ChannelLastBeforeLinear(torch.nn.Module):
26+
def __init__(self):
27+
super().__init__()
28+
self.linear = torch.nn.Linear(3, 3)
29+
30+
def forward(self, x):
31+
y = x.to(memory_format=torch.channels_last)
32+
return self.linear(y)
33+
34+
ChannelLastBeforeLinearModule = ChannelLastBeforeLinear()
35+
36+
def test_channel_last_before_linear(self):
37+
self.run_tester(self.ChannelLastBeforeLinearModule, (torch.randn(1, 3, 3, 3),))
38+
39+
class ContiguousBeforeConv(torch.nn.Module):
40+
def __init__(self):
41+
super().__init__()
42+
self.conv = torch.nn.Conv2d(3, 3, 3)
43+
44+
def forward(self, x):
45+
y = x.to(memory_format=torch.contiguous_format)
46+
return self.conv(y)
47+
48+
ContiguousBeforeConvModule = ContiguousBeforeConv()
49+
50+
def test_contiguous_before_conv(self):
51+
self.run_tester(self.ContiguousBeforeConvModule, (torch.randn(1, 3, 6, 6),))
52+
53+
class DtypeAndMemoryFormatConversion(torch.nn.Module):
54+
def __init__(self):
55+
super().__init__()
56+
self.conv = torch.nn.Conv2d(3, 3, 3)
57+
58+
def forward(self, x):
59+
y = x.to(torch.float, memory_format=torch.channels_last)
60+
return self.conv(y)
61+
62+
DtypeAndMemoryFormatConversionModule = DtypeAndMemoryFormatConversion()
63+
64+
def test_dtype_and_memory_format_conversion(self):
65+
self.run_tester(
66+
self.DtypeAndMemoryFormatConversionModule,
67+
(torch.randint(0, 10, (1, 3, 6, 6), dtype=torch.int32),),
68+
)
69+
70+
class DtypeAndMemoryFormatWithLinear(torch.nn.Module):
71+
def __init__(self):
72+
super().__init__()
73+
self.linear = torch.nn.Linear(3, 3)
74+
75+
def forward(self, x):
76+
y = x.to(torch.float, memory_format=torch.channels_last)
77+
return self.linear(y)
78+
79+
DtypeAndMemoryFormatWithLinearModule = DtypeAndMemoryFormatWithLinear()
80+
81+
def test_dtype_and_memory_format_with_linear(self):
82+
self.run_tester(
83+
self.DtypeAndMemoryFormatWithLinearModule,
84+
(torch.randint(0, 10, (1, 3, 3, 3), dtype=torch.int16),),
85+
)

backends/xnnpack/test/passes/test_channels_last_tagged_reshape.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,23 @@ def test_fp32_channels_last_tagged_reshape_pass(self):
173173
.run_method_and_compare_outputs()
174174
)
175175

176+
class LinearConvDimSwap(torch.nn.Module):
177+
def __init__(self):
178+
super().__init__()
179+
self.conv1 = torch.nn.Conv2d(3, 3, 3)
180+
self.linear1 = torch.nn.Linear(4, 3)
181+
182+
def forward(self, x):
183+
y = self.linear1(x)
184+
y = y.to(memory_format=torch.channels_last)
185+
y = y.to(memory_format=torch.contiguous_format)
186+
return self.conv1(y)
187+
188+
LinearConvDimSwapModule = LinearConvDimSwap()
189+
190+
def test_conv_linear_dim_order_swap_partitioner(self):
191+
self.run_tester(self.LinearConvDimSwapModule, (torch.randn(1, 3, 6, 4),))
192+
176193
def test_qs8_channels_last_tagged_reshape_pass(self):
177194
for module, num_reshape in self.modules.items():
178195
(

0 commit comments

Comments
 (0)