Skip to content

Commit d3c6a60

Browse files
authored
Merge branch 'main' into fold_batch_norm
2 parents 7ee9bc2 + c5fea7e commit d3c6a60

File tree

9 files changed

+133
-111
lines changed

9 files changed

+133
-111
lines changed

backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -105,21 +105,6 @@ def fold_and_annotate_arg(
105105
for arg in arg_list:
106106
if not isinstance(arg, Node):
107107
return
108-
"""
109-
Make sure arg has requires_grad set to False
110-
For parameters that are not quantized, sometimes (i.e. convolution)
111-
the Parameter(FakeTensor(...)) has requires_grad set to True, which
112-
causes the retracing of the graph to fail with:
113-
114-
E RuntimeError: isDifferentiableType(variable.scalar_type()) INTERNAL ASSERT FAILED at "/Users/runner/work/pytorch/pytorch/pytorch/torch/csrc/autograd/functions/utils.h":74, please report a bug to PyTorch.
115-
E
116-
E While executing %aten_convolution_default : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.convolution.default](args = (%quantized_decomposed_quantize_per_tensor_default, %b__frozen_param0, %p__param_constant1, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), kwargs = {})
117-
E Original traceback:
118-
E File "/Users/perast01/src/executorch/backends/arm/test/ops/test_conv2d.py", line 110, in forward
119-
E x = conv(x)
120-
"""
121-
if arg.op == "placeholder":
122-
arg.meta["val"].requires_grad = False
123108

124109
arg_quant_params = None
125110
if arg.target == dq_op:

backends/arm/test/ops/test_cat.py

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Copyright (c) Meta Platforms, Inc. and affiliates.
2-
# Copyright 2024 Arm Limited and/or its affiliates.
2+
# Copyright 2024-2025 Arm Limited and/or its affiliates.
33
# All rights reserved.
44
#
55
# This source code is licensed under the BSD-style license found in the
@@ -33,6 +33,8 @@ class Cat(torch.nn.Module):
3333
),
3434
-1,
3535
),
36+
((torch.randn(1, 2, 4, 4), torch.randn(1, 2, 4, 1)), 3),
37+
((torch.randn(1, 2, 4, 4), torch.randn(1, 2, 4, 4)), 0),
3638
((torch.randn(2, 2, 4, 4), torch.randn(2, 2, 4, 1)), 3),
3739
(
3840
(
@@ -47,8 +49,8 @@ class Cat(torch.nn.Module):
4749
def __init__(self):
4850
super().__init__()
4951

50-
def forward(self, tensors: tuple[torch.Tensor, ...], dim: int) -> torch.Tensor:
51-
return torch.cat(tensors, dim=dim)
52+
def forward(self, t: tuple[torch.Tensor, ...], dim: int) -> torch.Tensor:
53+
return torch.cat(t, dim=dim)
5254

5355
def _test_cat_tosa_MI_pipeline(
5456
self, module: torch.nn.Module, test_data: Tuple[tuple[torch.Tensor, ...], int]
@@ -134,22 +136,38 @@ def test_cat_tosa_BI(self, operands: tuple[torch.Tensor, ...], dim: int):
134136
test_data = (operands, dim)
135137
self._test_cat_tosa_BI_pipeline(self.Cat(), test_data)
136138

137-
# Mismatch in provided number of inputs and model signature, MLETORCH 519
138-
@parameterized.expand(Cat.test_parameters)
139+
@parameterized.expand(Cat.test_parameters[:-3])
139140
@pytest.mark.corstone_fvp
140-
@conftest.expectedFailureOnFVP
141141
def test_cat_u55_BI(self, operands: tuple[torch.Tensor, ...], dim: int):
142142
test_data = (operands, dim)
143143
self._test_cat_ethosu_BI_pipeline(
144144
self.Cat(), common.get_u55_compile_spec(), test_data
145145
)
146146

147-
# Mismatch in provided number of inputs and model signature, MLETORCH 519
148-
@parameterized.expand(Cat.test_parameters)
147+
# MLETORCH-630 Cat does not work on FVP with batch>1
148+
@parameterized.expand(Cat.test_parameters[-3:])
149149
@pytest.mark.corstone_fvp
150150
@conftest.expectedFailureOnFVP
151+
def test_cat_u55_BI_xfails(self, operands: tuple[torch.Tensor, ...], dim: int):
152+
test_data = (operands, dim)
153+
self._test_cat_ethosu_BI_pipeline(
154+
self.Cat(), common.get_u55_compile_spec(), test_data
155+
)
156+
157+
@parameterized.expand(Cat.test_parameters[:-3])
158+
@pytest.mark.corstone_fvp
151159
def test_cat_u85_BI(self, operands: tuple[torch.Tensor, ...], dim: int):
152160
test_data = (operands, dim)
153161
self._test_cat_ethosu_BI_pipeline(
154162
self.Cat(), common.get_u85_compile_spec(), test_data
155163
)
164+
165+
# MLETORCH-630 Cat does not work on FVP with batch>1
166+
@parameterized.expand(Cat.test_parameters[-3:])
167+
@pytest.mark.corstone_fvp
168+
@conftest.expectedFailureOnFVP
169+
def test_cat_u85_BI_xfails(self, operands: tuple[torch.Tensor, ...], dim: int):
170+
test_data = (operands, dim)
171+
self._test_cat_ethosu_BI_pipeline(
172+
self.Cat(), common.get_u85_compile_spec(), test_data
173+
)

backends/arm/test/ops/test_expand.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,17 @@ class Expand(torch.nn.Module):
3737
test_parameters = [
3838
(torch.rand(1), (2,)),
3939
(torch.randn(1, 4), (1, -1)),
40-
(torch.rand(1, 1, 2, 2), (4, 3, -1, 2)),
4140
(torch.randn(1), (2, 2, 4)),
42-
(torch.rand(3, 2, 4, 1), (-1, -1, -1, 3)),
41+
(torch.randn(1, 1, 1, 5), (1, 4, -1, -1)),
4342
(torch.randn(1, 1, 192), (1, -1, -1)),
43+
(torch.randn(1, 1), (1, 2, 2, 4)),
44+
(torch.randn(1, 1), (2, 2, 2, 4)),
4445
(torch.randn(10, 1, 1, 97), (-1, 4, -1, -1)),
46+
(torch.rand(1, 1, 2, 2), (4, 3, -1, 2)),
4547
]
4648

47-
def forward(self, x: torch.Tensor, multiples: Sequence):
48-
return x.expand(multiples)
49+
def forward(self, x: torch.Tensor, m: Sequence):
50+
return x.expand(m)
4951

5052
def _test_expand_tosa_MI_pipeline(self, module: torch.nn.Module, test_data: Tuple):
5153
(
@@ -113,20 +115,34 @@ def test_expand_tosa_MI(self, test_input, multiples):
113115
def test_expand_tosa_BI(self, test_input, multiples):
114116
self._test_expand_tosa_BI_pipeline(self.Expand(), (test_input, multiples))
115117

116-
# Mismatch in provided number of inputs and model signature, MLETORCH 519
117-
@parameterized.expand(Expand.test_parameters)
118+
@parameterized.expand(Expand.test_parameters[:-3])
118119
@pytest.mark.corstone_fvp
119-
@conftest.expectedFailureOnFVP
120120
def test_expand_u55_BI(self, test_input, multiples):
121121
self._test_expand_ethosu_BI_pipeline(
122122
common.get_u55_compile_spec(), self.Expand(), (test_input, multiples)
123123
)
124124

125-
# Mismatch in provided number of inputs and model signature, MLETORCH 519
126-
@parameterized.expand(Expand.test_parameters)
125+
# MLETORCH-629: Expand does not work on FVP with batch>1
126+
@parameterized.expand(Expand.test_parameters[-3:])
127127
@pytest.mark.corstone_fvp
128128
@conftest.expectedFailureOnFVP
129+
def test_expand_u55_BI_xfails(self, test_input, multiples):
130+
self._test_expand_ethosu_BI_pipeline(
131+
common.get_u55_compile_spec(), self.Expand(), (test_input, multiples)
132+
)
133+
134+
@parameterized.expand(Expand.test_parameters[:-3])
135+
@pytest.mark.corstone_fvp
129136
def test_expand_u85_BI(self, test_input, multiples):
130137
self._test_expand_ethosu_BI_pipeline(
131138
common.get_u85_compile_spec(), self.Expand(), (test_input, multiples)
132139
)
140+
141+
# MLETORCH-629: Expand does not work on FVP with batch>1
142+
@parameterized.expand(Expand.test_parameters[-3:])
143+
@pytest.mark.corstone_fvp
144+
@conftest.expectedFailureOnFVP
145+
def test_expand_u85_BI_xfails(self, test_input, multiples):
146+
self._test_expand_ethosu_BI_pipeline(
147+
common.get_u85_compile_spec(), self.Expand(), (test_input, multiples)
148+
)

backends/arm/test/ops/test_full.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,20 +143,16 @@ def test_full_tosa_MI(self, test_tensor: Tuple):
143143
def test_full_tosa_BI(self, test_tensor: Tuple):
144144
self._test_full_tosa_BI_pipeline(self.AddVariableFull(), test_tensor)
145145

146-
# Mismatch in provided number of inputs and model signature, MLETORCH 519
147146
@parameterized.expand(AddVariableFull.test_parameters)
148147
@pytest.mark.corstone_fvp
149-
@conftest.expectedFailureOnFVP
150148
def test_full_u55_BI(self, test_tensor: Tuple):
151149
self._test_full_tosa_u55_pipeline(
152150
self.AddVariableFull(),
153151
test_tensor,
154152
)
155153

156-
# Mismatch in provided number of inputs and model signature, MLETORCH 519
157154
@parameterized.expand(AddVariableFull.test_parameters)
158155
@pytest.mark.corstone_fvp
159-
@conftest.expectedFailureOnFVP
160156
def test_full_u85_BI(self, test_tensor: Tuple):
161157
self._test_full_tosa_u85_pipeline(
162158
self.AddVariableFull(),

backends/arm/test/runner_utils.py

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -65,16 +65,7 @@ def get_input_names(program: ExportedProgram) -> list[str]:
6565
Returns:
6666
A list of strings with the names of the model input.
6767
"""
68-
input_names = []
69-
70-
# E.g. bias and weights are 'placeholders' as well. This is used to
71-
# get only the use inputs.
72-
usr_inputs = program.graph_signature.user_inputs
73-
for node in program.graph.nodes:
74-
if node.op == "placeholder" and node.name in usr_inputs:
75-
input_names.append(node.name)
76-
77-
return input_names
68+
return [spec.arg.name for spec in program.graph_signature.input_specs]
7869

7970

8071
def get_input_quantization_params(
@@ -334,13 +325,16 @@ def run_corstone(
334325

335326

336327
def prep_data_for_save(
337-
data: torch.Tensor,
328+
data,
338329
input_name: str,
339330
quant_param: Optional[QuantizationParams] = None,
340331
):
341-
data_np = np.array(data.detach(), order="C").astype(
342-
torch_to_numpy_dtype_dict[data.dtype]
343-
)
332+
if isinstance(data, torch.Tensor):
333+
data_np = np.array(data.detach(), order="C").astype(
334+
torch_to_numpy_dtype_dict[data.dtype]
335+
)
336+
else:
337+
data_np = np.array(data)
344338
if quant_param is not None:
345339
assert quant_param.node_name in input_name, (
346340
f"The quantization params name '{quant_param.node_name}' does not "

backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,20 @@ ${layout_declare_tensor(0, "w", "t_out", DTYPE, "texture3d")}
3232
${layout_declare_tensor(1, "r", "t_in", DTYPE, "texture3d")}
3333
${layout_declare_tensor(2, "r", "t_kernel", DTYPE, "texture2d")}
3434
${layout_declare_tensor(3, "r", "t_bias", DTYPE, "texture2d")}
35-
${layout_declare_ubo(4, "ivec3", "out_limits")}
36-
${layout_declare_ubo(5, "ivec4", "in_sizes")}
37-
${layout_declare_ubo(6, "ivec2", "kernel_size", "ivec2", "stride", "ivec2", "padding", "ivec2", "dilation")}
38-
${layout_declare_ubo(7, "ivec2", "overlay_region", "int", "in_group_size")}
39-
${layout_declare_ubo(8, "float", "out_min", "float", "out_max")}
35+
36+
layout(push_constant) uniform restrict Block {
37+
ivec4 out_limits;
38+
ivec4 in_sizes;
39+
ivec2 kernel_size;
40+
ivec2 stride;
41+
ivec2 padding;
42+
ivec2 dilation;
43+
ivec2 overlay_region;
44+
int in_group_size;
45+
int dummy_padding;
46+
float out_min;
47+
float out_max;
48+
};
4049

4150
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
4251

@@ -127,7 +136,7 @@ void main() {
127136
const ivec3 out_pos = pos_shared[offset_pos_index(gl_LocalInvocationIndex)];
128137
for (int y = 0; y < BATCH_SIZE_Y; y++) {
129138
for (int x = 0; x < BATCH_SIZE_X; x++) {
130-
if (any(greaterThanEqual(ivec3(out_pos.x + x, out_pos.y + y, out_pos.z), out_limits))) {
139+
if (any(greaterThanEqual(ivec3(out_pos.x + x, out_pos.y + y, out_pos.z), out_limits.xyz))) {
131140
continue;
132141
}
133142
imageStore(t_out, ivec3(out_pos.x + x, out_pos.y + y, out_pos.z), op(sum[y][x], out_min, out_max));

backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_sned_output_tile.glsl

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,20 @@ ${layout_declare_tensor(0, "w", "t_out", DTYPE, "texture3d")}
2424
${layout_declare_tensor(1, "r", "t_in", DTYPE, "texture3d")}
2525
${layout_declare_tensor(2, "r", "t_kernel", DTYPE, "texture2d")}
2626
${layout_declare_tensor(3, "r", "t_bias", DTYPE, "texture2d")}
27-
${layout_declare_ubo(4, "ivec3", "out_limits")}
28-
${layout_declare_ubo(5, "ivec4", "in_sizes")}
29-
${layout_declare_ubo(6, "ivec2", "kernel_size", "ivec2", "stride", "ivec2", "padding", "ivec2", "dilation")}
30-
${layout_declare_ubo(7, "ivec2", "overlay_region", "int", "in_group_size")}
31-
${layout_declare_ubo(8, "float", "out_min", "float", "out_max")}
27+
28+
layout(push_constant) uniform restrict Block {
29+
ivec4 out_limits;
30+
ivec4 in_sizes;
31+
ivec2 kernel_size;
32+
ivec2 stride;
33+
ivec2 padding;
34+
ivec2 dilation;
35+
ivec2 overlay_region;
36+
int in_group_size;
37+
int dummy_padding;
38+
float out_min;
39+
float out_max;
40+
};
3241

3342
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
3443

backends/vulkan/runtime/graph/ops/impl/Convolution.cpp

Lines changed: 38 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,9 @@ void add_conv2d_node(
407407
wg_size = {wg_size[0] * wg_size[1] * wg_size[2], 1, 1};
408408
}
409409

410-
if (method == Conv2dMethod::Pointwise) {
410+
vkapi::ParamsBindList param_buffers;
411+
std::vector<PushConstantDataInfo> push_constants;
412+
if (method == Conv2dMethod::Pointwise || method == Conv2dMethod::Depthwise) {
411413
const utils::ivec4 kernel_param_size_stride = {
412414
kernel_params.kernel_size[0],
413415
kernel_params.kernel_size[1],
@@ -420,55 +422,43 @@ void add_conv2d_node(
420422
kernel_params.dilation[0],
421423
kernel_params.dilation[1]};
422424

423-
graph.execute_nodes().emplace_back(new DispatchNode(
424-
graph,
425-
shader,
426-
wg_size,
427-
graph.create_local_wg_size(wg_size),
428-
// Inputs and Outputs
429-
{{out, vkapi::MemoryAccessType::WRITE},
430-
{{in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
431-
// Shader params buffers
432-
{},
433-
// Specialization Constants
434-
{},
435-
// Resizing Logic
436-
resize_conv2d_node,
437-
{weight_data, stride, padding, dilation, transposed, output_padding},
438-
{
439-
graph.logical_limits_pc_of(out),
440-
graph.sizes_pc_of(in),
441-
PushConstantDataInfo(
442-
&kernel_param_size_stride, sizeof(kernel_param_size_stride)),
443-
PushConstantDataInfo(
444-
&kernel_param_pad_dial, sizeof(kernel_param_pad_dial)),
445-
PushConstantDataInfo(
446-
&extra_params, sizeof(extra_params), sizeof(utils::ivec4)),
447-
PushConstantDataInfo(&out_params, sizeof(out_params)),
448-
}));
425+
push_constants = {
426+
graph.logical_limits_pc_of(out),
427+
graph.sizes_pc_of(in),
428+
PushConstantDataInfo(
429+
&kernel_param_size_stride, sizeof(kernel_param_size_stride)),
430+
PushConstantDataInfo(
431+
&kernel_param_pad_dial, sizeof(kernel_param_pad_dial)),
432+
PushConstantDataInfo(
433+
&extra_params, sizeof(extra_params), sizeof(utils::ivec4)),
434+
PushConstantDataInfo(&out_params, sizeof(out_params)),
435+
};
449436
} else {
450-
graph.execute_nodes().emplace_back(new DispatchNode(
451-
graph,
452-
shader,
453-
wg_size,
454-
graph.create_local_wg_size(wg_size),
455-
// Inputs and Outputs
456-
{{out, vkapi::MemoryAccessType::WRITE},
457-
{{in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
458-
// Shader params buffers
459-
{
460-
t_out->logical_limits_ubo(),
461-
t_in->sizes_ubo(),
462-
graph.create_params_buffer(kernel_params),
463-
graph.create_params_buffer(extra_params),
464-
graph.create_params_buffer(out_params),
465-
},
466-
// Specialization Constants
467-
{},
468-
// Resizing Logic
469-
resize_conv2d_node,
470-
{weight_data, stride, padding, dilation, transposed, output_padding}));
437+
param_buffers = {
438+
t_out->logical_limits_ubo(),
439+
t_in->sizes_ubo(),
440+
graph.create_params_buffer(kernel_params),
441+
graph.create_params_buffer(extra_params),
442+
graph.create_params_buffer(out_params),
443+
};
471444
}
445+
446+
graph.execute_nodes().emplace_back(new DispatchNode(
447+
graph,
448+
shader,
449+
wg_size,
450+
graph.create_local_wg_size(wg_size),
451+
// Inputs and Outputs
452+
{{out, vkapi::MemoryAccessType::WRITE},
453+
{{in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
454+
// Shader params buffers
455+
param_buffers,
456+
// Specialization Constants
457+
{},
458+
// Resizing Logic
459+
resize_conv2d_node,
460+
{weight_data, stride, padding, dilation, transposed, output_padding},
461+
push_constants));
472462
}
473463

474464
void add_conv1d_node(

backends/vulkan/runtime/graph/ops/impl/QuantizedLinear.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,18 @@ void add_q_8w_linear_node(
7373
auto viewFn = VK_GET_OP_FN("aten.view_copy.default");
7474
ValueRef mat1_W_packed = mat1;
7575
ValueRef out_W_packed = out;
76+
// Create temporary tensors to store the width packed versions of mat1 and out
77+
TmpTensor mat1_tmp(
78+
&graph, graph.sizes_of(mat1), graph.dtype_of(mat1), utils::kWidthPacked);
79+
TmpTensor out_tmp(
80+
&graph, graph.sizes_of(out), graph.dtype_of(out), utils::kWidthPacked);
7681
if (!graph.is_buffer_storage(out) &&
7782
graph.packed_dim_of(mat1) != WHCN::kWidthDim) {
7883
// Ensure mat1 is width packed
79-
mat1_W_packed = graph.add_tensor_like(mat1, utils::kWidthPacked);
84+
mat1_W_packed = mat1_tmp;
8085
viewFn(graph, {mat1, graph.add_none(), mat1_W_packed});
8186
// Ensure out is packed correctly
82-
out_W_packed = graph.add_tensor_like(out, utils::kWidthPacked);
87+
out_W_packed = out_tmp;
8388
}
8489
ValueRef q_mat2 = prepack_standard(
8590
graph, q_mat2_data, graph.storage_type_of(out), utils::kWidthPacked);

0 commit comments

Comments
 (0)