Skip to content

Commit 29a7c15

Browse files
committed
Update base for Update on "[ET-VK] Replace Uniform buffers with push constants for view op"
This diff replaces uniform buffers with push constants for view op in the Vulkan backend of Executorch. The changes include updating the GLSL code to use push constants instead of uniform buffers and updating the C++ code to pass the sizes as push constants to the shader. Differential Revision: [D66733658](https://our.internmc.facebook.com/intern/diff/D66733658/) [ghstack-poisoned]
2 parents 16ba998 + 32d842b commit 29a7c15

File tree

27 files changed

+1172
-132
lines changed

27 files changed

+1172
-132
lines changed

backends/arm/test/ops/test_depthwise_conv.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -156,11 +156,14 @@
156156
("two_dw_conv2d", two_dw_conv2d),
157157
]
158158

159-
testsuite_conv2d_u85_xfails = [
159+
testsuite_conv2d_u85 = [
160160
("2x2_1x6x4x4_gp6_st1", dw_conv2d_2x2_1x6x4x4_gp6_st1),
161161
("3x3_1x3x256x256_gp3_st1", dw_conv2d_3x3_1x3x256x256_gp3_st1),
162162
("3x3_1x4x256x256_gp4_st1", dw_conv2d_3x3_1x4x256x256_gp4_st1),
163163
("3x3_1x4x256x256_gp4_nobias", dw_conv2d_3x3_1x4x256x256_gp4_nobias),
164+
]
165+
166+
testsuite_conv2d_u85_xfails = [
164167
("3x3_2x8x198x198_gp8_st3", dw_conv2d_3x3_2x8x198x198_gp8_st3),
165168
("two_dw_conv2d", two_dw_conv2d),
166169
]
@@ -284,7 +287,7 @@ def test_dw_conv1d_u55_BI(
284287
model.get_inputs(),
285288
)
286289

287-
@parameterized.expand(testsuite_conv1d[2:])
290+
@parameterized.expand(testsuite_conv1d + testsuite_conv2d_u85)
288291
def test_dw_conv_u85_BI(
289292
self, test_name: str, model: torch.nn.Module, set_quantize_io: bool = False
290293
):
@@ -296,12 +299,8 @@ def test_dw_conv_u85_BI(
296299
model.get_inputs(),
297300
)
298301

299-
testsuite_conv2d_u85_xfails.remove(
300-
("3x3_1x3x256x256_gp3_st1", dw_conv2d_3x3_1x3x256x256_gp3_st1)
301-
) # Works
302-
303302
# All test cases except 3x3_1x3x256x256_gp3_st1 have numerical issues on FVP. MLETORCH-520
304-
@parameterized.expand(testsuite_conv2d_u85_xfails + testsuite_conv1d[:2])
303+
@parameterized.expand(testsuite_conv2d_u85_xfails)
305304
@conftest.expectedFailureOnFVP
306305
def test_dw_conv_u85_BI_xfails(
307306
self, test_name: str, model: torch.nn.Module, set_quantize_io: bool = False

backends/arm/test/ops/test_div.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,8 +183,21 @@ def test_div_tosa_BI(
183183
test_data = (input_, other_)
184184
self._test_div_tosa_BI_pipeline(self.Div(), test_data)
185185

186+
@parameterized.expand(test_data_suite[:2])
187+
def test_div_u55_BI(
188+
self,
189+
test_name: str,
190+
input_: Union[torch.Tensor, torch.types.Number],
191+
other_: Union[torch.Tensor, torch.types.Number],
192+
rounding_mode: Optional[str] = None,
193+
):
194+
test_data = (input_, other_)
195+
self._test_div_ethos_BI_pipeline(
196+
self.Div(), common.get_u55_compile_spec(), test_data
197+
)
198+
186199
# Numerical issues on FVP likely due to mul op, MLETORCH-521
187-
@parameterized.expand(test_data_suite)
200+
@parameterized.expand(test_data_suite[2:])
188201
@conftest.expectedFailureOnFVP
189202
def test_div_u55_BI_xfails(
190203
self,
@@ -198,8 +211,21 @@ def test_div_u55_BI_xfails(
198211
self.Div(), common.get_u55_compile_spec(), test_data
199212
)
200213

214+
@parameterized.expand(test_data_suite[:2])
215+
def test_div_u85_BI(
216+
self,
217+
test_name: str,
218+
input_: Union[torch.Tensor, torch.types.Number],
219+
other_: Union[torch.Tensor, torch.types.Number],
220+
rounding_mode: Optional[str] = None,
221+
):
222+
test_data = (input_, other_)
223+
self._test_div_ethos_BI_pipeline(
224+
self.Div(), common.get_u85_compile_spec(), test_data
225+
)
226+
201227
# Numerical issues on FVP likely due to mul op, MLETORCH-521
202-
@parameterized.expand(test_data_suite)
228+
@parameterized.expand(test_data_suite[2:])
203229
@conftest.expectedFailureOnFVP
204230
def test_div_u85_BI_xfails(
205231
self,

backends/arm/test/ops/test_mul.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -152,9 +152,7 @@ def test_mul_tosa_BI(
152152
test_data = (input_, other_)
153153
self._test_mul_tosa_BI_pipeline(self.Mul(), test_data)
154154

155-
# Numerical issues on FVP, MLETORCH-521
156155
@parameterized.expand(test_data_sute)
157-
@conftest.expectedFailureOnFVP
158156
def test_mul_u55_BI(
159157
self,
160158
test_name: str,
@@ -166,10 +164,7 @@ def test_mul_u55_BI(
166164
common.get_u55_compile_spec(), self.Mul(), test_data
167165
)
168166

169-
# Numerical issues on FVP, MLETORCH-521
170-
# test_data_sute[0] works on U85
171-
@parameterized.expand(test_data_sute[1:])
172-
@conftest.expectedFailureOnFVP
167+
@parameterized.expand(test_data_sute)
173168
def test_mul_u85_BI(
174169
self,
175170
test_name: str,

backends/cadence/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ include(${EXECUTORCH_ROOT}/build/Utils.cmake)
2323

2424
# Let files say "include <executorch/path/to/header.h>".
2525
set(_common_include_directories ${EXECUTORCH_ROOT}/..)
26-
set(TARGET_DIR reference)
2726

2827
if(EXECUTORCH_CADENCE_CPU_RUNNER)
2928
include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
@@ -61,6 +60,9 @@ if(EXECUTORCH_CADENCE_CPU_RUNNER)
6160
${_common_include_directories}
6261
)
6362

63+
set(TARGET_DIR reference)
64+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/kernels)
65+
6466
target_link_libraries(
6567
cadence_runner
6668
executorch

backends/cadence/aot/functions.yaml

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,41 @@
142142
- arg_meta: null
143143
kernel_name: torch::executor::where_out
144144

145+
- op: transpose_copy.int_out
146+
kernels:
147+
- arg_meta: null
148+
kernel_name: torch::executor::transpose_copy_int_out
149+
150+
- op: eq.Scalar_out
151+
kernels:
152+
- arg_meta: null
153+
kernel_name: torch::executor::eq_scalar_out
154+
155+
- op: logical_not.out
156+
kernels:
157+
- arg_meta: null
158+
kernel_name: torch::executor::logical_not_out
159+
160+
- op: any.out
161+
kernels:
162+
- arg_meta: null
163+
kernel_name: torch::executor::any_out
164+
165+
- op: native_group_norm.out
166+
kernels:
167+
- arg_meta: null
168+
kernel_name: torch::executor::native_group_norm_out
169+
170+
- op: sum.IntList_out
171+
kernels:
172+
- arg_meta: null
173+
kernel_name: torch::executor::sum_dim_out
174+
175+
- op: select_copy.int_out
176+
kernels:
177+
- arg_meta: null
178+
kernel_name: torch::executor::select_copy_int_out
179+
145180
# custom ops
146181
- func: cadence::quantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
147182
variants: function
@@ -183,3 +218,18 @@
183218
kernels:
184219
- arg_meta: null
185220
kernel_name: impl::reference::quantized_matmul_out
221+
222+
- func: cadence::quantized_linear.per_tensor_out(Tensor src, Tensor weight, Tensor bias, SymInt src_zero_point, SymInt weight_zero_point, SymInt out_multiplier, SymInt out_shift, SymInt out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
223+
kernels:
224+
- arg_meta: null
225+
kernel_name: impl::reference::quantized_linear_per_tensor_out
226+
227+
- func: cadence::im2row.out(Tensor input, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride, Tensor in_zero_point, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)
228+
kernels:
229+
- arg_meta: null
230+
kernel_name: impl::reference::im2row_out
231+
232+
- func: cadence::quantized_conv.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)
233+
kernels:
234+
- arg_meta: null
235+
kernel_name: impl::reference::quantized_conv_per_tensor_out

backends/cadence/aot/remove_ops.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,8 @@ def call_operator(
110110

111111
# Otherwise, we replace args[0] with cat_inputs.
112112
new_args = list(args)
113-
new_args[0] = cat_inputs
113+
# pyre error introduced after D66937105
114+
new_args[0] = cat_inputs # pyre-ignore[6]
114115
return super().call_operator(op, tuple(new_args), kwargs, meta)
115116

116117

backends/cadence/reference/operators/CMakeLists.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,16 @@ set(_aten_ops__srcs
5555
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_expand_copy.cpp"
5656
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_gelu.cpp"
5757
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_empty.cpp"
58+
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_transpose_copy.cpp"
59+
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_eq.cpp"
60+
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_logical_not.cpp"
61+
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_any.cpp"
62+
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_native_group_norm.cpp"
63+
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_sum.cpp"
64+
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_select_copy.cpp"
65+
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/dtype_util.cpp"
66+
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/normalization_ops_util.cpp"
67+
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/select_copy_util.cpp"
5868
)
5969
add_library(aten_ops_cadence ${_aten_ops__srcs})
6070
target_link_libraries(aten_ops_cadence PUBLIC executorch)
@@ -78,6 +88,7 @@ add_library(
7888
"quantize_per_tensor.cpp"
7989
"dequantize_per_tensor.cpp"
8090
"quantized_matmul_out.cpp"
91+
"im2row_out.cpp"
8192
)
8293
target_include_directories(
8394
custom_ops PUBLIC ${ROOT_DIR}/.. ${CMAKE_BINARY_DIR}

0 commit comments

Comments
 (0)