Skip to content

Commit 5cd6447

Browse files
committed
Update on "[ET-VK] Adding batch processing in x axis to conv2d dw shader by caching input texel for reuse."
This diff adds batch processing in the x axis to the conv2d dw shader by reusing input texel overlapping between consecutive tiles. The changes include modifying the glsl code for the conv2d dw output tile, adding a new parameter to the yaml file, and modifying the Convolution.cpp file to use the new parameter. Differential Revision: [D67868671](https://our.internmc.facebook.com/intern/diff/D67868671/) [ghstack-poisoned]
2 parents b6d7a76 + 7260da1 commit 5cd6447

File tree

18 files changed

+133
-57
lines changed

18 files changed

+133
-57
lines changed

.lintrunner.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -302,8 +302,8 @@ include_patterns = [
302302
'profiler/**/*.py',
303303
'runtime/**/*.py',
304304
'scripts/**/*.py',
305-
# 'test/**/*.py',
306-
# 'util/**/*.py',
305+
'test/**/*.py',
306+
'util/**/*.py',
307307
'*.py',
308308
]
309309
exclude_patterns = [

.mypy.ini

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,14 @@ files =
2121
profiler,
2222
runtime,
2323
scripts,
24+
test,
2425
util
2526

2627
mypy_path = executorch
2728

29+
[mypy-executorch.backends.*]
30+
follow_untyped_imports = True
31+
2832
[mypy-executorch.codegen.*]
2933
follow_untyped_imports = True
3034

@@ -46,6 +50,12 @@ follow_untyped_imports = True
4650
[mypy-executorch.runtime.*]
4751
follow_untyped_imports = True
4852

53+
[mypy-executorch.test.*]
54+
follow_untyped_imports = True
55+
56+
[mypy-functorch.*]
57+
follow_untyped_imports = True
58+
4959
[mypy-requests.*]
5060
follow_untyped_imports = True
5161

@@ -80,4 +90,4 @@ ignore_missing_imports = True
8090
ignore_missing_imports = True
8191

8292
[mypy-zstd]
83-
ignore_missing_imports = True
93+
ignore_missing_imports = True

backends/cadence/fusion_g3/operators/op_add.cpp

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
#include <xa_nnlib_kernels_api.h>
1212

13+
#include <executorch/backends/cadence/fusion_g3/operators/xt_macros.h>
1314
#include <executorch/kernels/portable/cpu/scalar_utils.h>
1415
#include <executorch/kernels/portable/cpu/util/elementwise_util.h>
1516
#include <executorch/kernels/portable/cpu/util/kernel_ops_util.h>
@@ -28,15 +29,6 @@ namespace impl {
2829
namespace G3 {
2930
namespace native {
3031

31-
#define XT_KERNEL_CHECK(ctx, out, kernel, ...) \
32-
const auto ret = kernel(__VA_ARGS__); \
33-
ET_KERNEL_CHECK_MSG( \
34-
ctx, \
35-
ret == 0, \
36-
InvalidArgument, \
37-
out, \
38-
"Failed to run kernel: " #kernel "(" #__VA_ARGS__ ")");
39-
4032
Tensor& add_out(
4133
KernelRuntimeContext& ctx,
4234
const Tensor& a,

backends/cadence/fusion_g3/operators/targets.bzl

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ def define_operator(name: str, deps: list[str] | None = None) -> None:
2727
deps = deps + common_deps,
2828
exported_deps = [
2929
":operators_header",
30+
":xt_macros",
3031
],
3132
)
3233

@@ -61,5 +62,17 @@ def define_common_targets():
6162
],
6263
)
6364

65+
runtime.cxx_library(
66+
name = "xt_macros",
67+
exported_headers = ["xt_macros.h"],
68+
visibility = [
69+
"//executorch/backends/cadence/...",
70+
],
71+
exported_deps = [
72+
"//executorch/runtime/core/exec_aten:lib",
73+
"//executorch/runtime/kernel:kernel_runtime_context",
74+
],
75+
)
76+
6477
for op in OPERATORS:
6578
define_operator(op)
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#pragma once
10+
11+
#include <executorch/runtime/core/exec_aten/util/tensor_util.h>
12+
13+
#define XT_KERNEL_CHECK(ctx, out, kernel, ...) \
14+
const auto ret = kernel(__VA_ARGS__); \
15+
ET_KERNEL_CHECK_MSG( \
16+
ctx, \
17+
ret == 0, \
18+
InvalidArgument, \
19+
out, \
20+
"Failed to run kernel: " #kernel "(" #__VA_ARGS__ ")");

backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ void main() {
100100
}
101101

102102
// accumulate dot product in 1st sum only until tile size
103-
if (i < int(TILE_SIZE)) {
103+
if (i < TILE_SIZE) {
104104
for (int j = 0; j < TILE_SIZE; j++, kx++) {
105105
prev_kernel_line[j] = texelFetch(t_kernel, ivec2(kx, pos.z), 0);
106106
for (int s = 0; s < BATCH_SIZE_X; s++) {

docs/TARGETS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
load("@fbcode_macros//build_defs:native_rules.bzl", "buck_filegroup", "buck_sh_test")
22
load("@fbcode_macros//build_defs:python_binary.bzl", "python_binary")
33

4-
oncall("pytorch_r2p")
4+
oncall("executorch")
55

66
python_binary(
77
name = "sphinx",
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
2+
load(":targets.bzl", "define_common_targets")
3+
4+
oncall("executorch")
5+
6+
define_common_targets()
7+
8+
runtime.python_library(
9+
name = "schema",
10+
srcs = [
11+
"flat_tensor_schema.py",
12+
],
13+
visibility = [
14+
"//executorch/...",
15+
],
16+
)

extension/flat_tensor/flat_tensor.fbs renamed to extension/flat_tensor/serialize/flat_tensor.fbs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ table TensorMetadata {
1313
scalar_type: executorch_flatbuffer.ScalarType;
1414

1515
// Size of each dimension.
16-
dim_sizes: [int32];
16+
sizes: [int32];
1717

1818
// Specifies in what order the dimensions are laid out in memory (from outer
1919
// to inner).

extension/flat_tensor/flat_tensor_schema.py renamed to extension/flat_tensor/serialize/flat_tensor_schema.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
class TensorMetadata:
1818
fully_qualified_name: str
1919
scalar_type: ScalarType
20-
dim_sizes: List[int]
20+
sizes: List[int]
2121
dim_order: List[bytes]
2222

2323
segment_index: int

0 commit comments

Comments
 (0)