Skip to content

Commit 73cf011

Browse files
committed
Update on "[ET-VK] Replace Uniform buffers with push constants for view op"
This diff replaces uniform buffers with push constants for view op in the Vulkan backend of Executorch. The changes include updating the GLSL code to use push constants instead of uniform buffers and updating the C++ code to pass the sizes as push constants to the shader. Differential Revision: [D66733658](https://our.internmc.facebook.com/intern/diff/D66733658/) [ghstack-poisoned]
2 parents 5bc6d0c + 17da614 commit 73cf011

File tree

45 files changed

+7942
-14930
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+7942
-14930
lines changed

.ci/docker/build.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ case "${IMAGE_NAME}" in
4141
QNN_SDK=yes
4242
CLANG_VERSION=12
4343
;;
44+
executorch-ubuntu-22.04-mediatek-sdk)
45+
MEDIATEK_SDK=yes
46+
CLANG_VERSION=12
47+
;;
4448
executorch-ubuntu-22.04-clang12-android)
4549
LINTRUNNER=""
4650
CLANG_VERSION=12
@@ -77,6 +81,7 @@ docker build \
7781
--build-arg "BUILD_DOCS=${BUILD_DOCS}" \
7882
--build-arg "ARM_SDK=${ARM_SDK:-}" \
7983
--build-arg "QNN_SDK=${QNN_SDK:-}" \
84+
--build-arg "MEDIATEK_SDK=${MEDIATEK_SDK:-}" \
8085
--build-arg "ANDROID_NDK_VERSION=${ANDROID_NDK_VERSION:-}" \
8186
-f "${OS}"/Dockerfile \
8287
"$@" \

.ci/docker/ubuntu/Dockerfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,5 +85,7 @@ RUN if [ -n "${ARM_SDK}" ]; then git config --global user.email "[email protected]
8585

8686
ARG QNN_SDK
8787

88+
ARG MEDIATEK_SDK
89+
8890
USER ci-user
8991
CMD ["bash"]

.ci/scripts/test_llama.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,12 @@ else
110110
COREML=OFF
111111
fi
112112

113+
if [[ "${MODE}" =~ .*quantize_kv.* ]]; then
114+
QUANTIZE_KV_CACHE=ON
115+
else
116+
QUANTIZE_KV_CACHE=OFF
117+
fi
118+
113119
echo "COREML option ${COREML}"
114120

115121
if [[ "${MODE}" =~ .*qnn.* ]]; then
@@ -249,6 +255,9 @@ if [[ "${QNN}" == "ON" ]]; then
249255
EXPORT_ARGS+=" --tokenizer_path tokenizer.model --pt2e_quantize qnn_16a16w --calibration_tasks wikitext --calibration_limit 1 --calibration_seq_length 128 --calibration_data Once "
250256
fi
251257
fi
258+
if [[ "${QUANTIZE_KV_CACHE}" == "ON" ]]; then
259+
EXPORT_ARGS="${EXPORT_ARGS} --quantize_kv_cache"
260+
fi
252261
# Add dynamically linked library location
253262
$PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}
254263

.github/workflows/docker-builds.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ jobs:
4242
- docker-image-name: executorch-ubuntu-22.04-linter
4343
- docker-image-name: executorch-ubuntu-22.04-arm-sdk
4444
- docker-image-name: executorch-ubuntu-22.04-qnn-sdk
45+
- docker-image-name: executorch-ubuntu-22.04-mediatek-sdk
4546
- docker-image-name: executorch-ubuntu-22.04-clang12-android
4647
env:
4748
DOCKER_IMAGE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/executorch/${{ matrix.docker-image-name }}

.github/workflows/pull.yml

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ jobs:
8686
strategy:
8787
matrix:
8888
dtype: [fp32]
89-
mode: [portable, xnnpack+custom, xnnpack+custom+qe]
89+
mode: [portable, xnnpack+custom, xnnpack+custom+qe,xnnpack+custom+quantize_kv,xnnpack+quantize_kv]
9090
include:
9191
- dtype: bf16
9292
mode: portable
@@ -504,3 +504,21 @@ jobs:
504504
505505
# run llama runner in eager mode
506506
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_runner_eager.sh
507+
508+
test-mediatek-models-linux:
509+
name: test-mediatek-models-linux
510+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
511+
strategy:
512+
fail-fast: false
513+
with:
514+
runner: linux.24xlarge
515+
docker-image: executorch-ubuntu-22.04-mediatek-sdk
516+
submodules: 'true'
517+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
518+
timeout: 90
519+
script: |
520+
# The generic Linux job chooses to use base env, not the one setup by the image
521+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
522+
conda activate "${CONDA_ENV}"
523+
524+
# placeholder for mediatek to add more tests

.github/workflows/trunk.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ jobs:
225225
strategy:
226226
matrix:
227227
dtype: [fp32]
228-
mode: [portable, xnnpack+kv+custom, mps, coreml]
228+
mode: [portable, xnnpack+kv+custom, mps, coreml, xnnpack+custom+quantize_kv]
229229
include:
230230
- dtype: bf16
231231
mode: portable

CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -742,9 +742,9 @@ if(EXECUTORCH_BUILD_PYBIND)
742742
endif()
743743

744744
if(EXECUTORCH_BUILD_XNNPACK)
745-
# need to explicitly specify XNNPACK here otherwise uses XNNPACK symbols
746-
# from libtorch_cpu
747-
list(APPEND _dep_libs xnnpack_backend XNNPACK)
745+
# need to explicitly specify XNNPACK and microkernels-prod
746+
# here otherwise uses XNNPACK and microkernel-prod symbols from libtorch_cpu
747+
list(APPEND _dep_libs xnnpack_backend XNNPACK microkernels-prod)
748748
endif()
749749

750750
# compile options for pybind

backends/arm/arm_backend.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
import logging
1515
import os
16-
from typing import final, List, Optional
16+
from typing import cast, final, List, Optional
1717

1818
import serializer.tosa_serializer as ts
1919
from executorch.backends.arm.arm_vela import vela_compile
@@ -32,6 +32,7 @@
3232
from executorch.exir.backend.backend_details import BackendDetails, PreprocessResult
3333
from executorch.exir.backend.compile_spec_schema import CompileSpec
3434
from torch.export.exported_program import ExportedProgram
35+
from torch.fx import Node
3536

3637
# TOSA backend debug functionality
3738
logger = logging.getLogger(__name__)
@@ -269,6 +270,7 @@ def preprocess( # noqa: C901
269270
node_visitors = get_node_visitors(edge_program, tosa_spec)
270271
input_count = 0
271272
for node in graph_module.graph.nodes:
273+
node = cast(Node, node)
272274
if node.op == "call_function":
273275
process_call_function(node, tosa_graph, node_visitors, tosa_spec)
274276
elif node.op == "placeholder":
@@ -288,9 +290,6 @@ def preprocess( # noqa: C901
288290
"The rank of the input order is not equal to amount of input tensors"
289291
)
290292

291-
# TODO: It would be awesome if this dump could somehow be done on top level and not here.
292-
# Problem is that the desc.json has to be created on the tosa_graph object, which we can't
293-
# access from top level.
294293
if artifact_path:
295294
tag = _get_first_delegation_tag(graph_module)
296295
dbg_tosa_dump(
@@ -311,6 +310,4 @@ def preprocess( # noqa: C901
311310
else:
312311
raise RuntimeError(f"Unknown format {output_format}")
313312

314-
# Continueing from above. Can I put tosa_graph into this function?
315-
# debug_handle_map = ...
316313
return PreprocessResult(processed_bytes=binary)

backends/arm/test/common.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -74,19 +74,15 @@ def get_tosa_compile_spec_unbuilt(
7474
the compile spec before calling .build() to finalize it.
7575
"""
7676
if not custom_path:
77-
intermediate_path = maybe_get_tosa_collate_path() or tempfile.mkdtemp(
78-
prefix="arm_tosa_"
79-
)
80-
else:
81-
intermediate_path = custom_path
77+
custom_path = maybe_get_tosa_collate_path()
8278

83-
if not os.path.exists(intermediate_path):
84-
os.makedirs(intermediate_path, exist_ok=True)
79+
if custom_path is not None:
80+
os.makedirs(custom_path, exist_ok=True)
8581
compile_spec_builder = (
8682
ArmCompileSpecBuilder()
8783
.tosa_compile_spec(tosa_version)
8884
.set_permute_memory_format(permute_memory_to_nhwc)
89-
.dump_intermediate_artifacts_to(intermediate_path)
85+
.dump_intermediate_artifacts_to(custom_path)
9086
)
9187

9288
return compile_spec_builder

backends/arm/test/misc/test_debug_feats.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,9 @@ def test_numerical_diff_prints(self):
111111
model,
112112
example_inputs=model.get_inputs(),
113113
compile_spec=common.get_tosa_compile_spec(
114-
"TOSA-0.80.0+MI", permute_memory_to_nhwc=True
114+
"TOSA-0.80.0+MI",
115+
permute_memory_to_nhwc=True,
116+
custom_path=tempfile.mkdtemp("diff_print_test"),
115117
),
116118
)
117119
.export()

0 commit comments

Comments
 (0)