Skip to content

Commit 8b6d176

Browse files
committed
Update base for Update on "[ET-VK] Removing unnecessary and redundant members from StagingBuffer."
This diff removes unnecessary and redundant members from the StagingBuffer class in the Vulkan runtime API. Specifically, the `numel_` and `nbytes_` members are removed, as they can be calculated from the `dtype_` member. This simplifies the class and reduces the amount of memory used. Differential Revision: [D66742613](https://our.internmc.facebook.com/intern/diff/D66742613/) [ghstack-poisoned]
2 parents b71ea27 + b9db0a3 commit 8b6d176

File tree

45 files changed

+7942
-14930
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+7942
-14930
lines changed

.ci/docker/build.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ case "${IMAGE_NAME}" in
4141
QNN_SDK=yes
4242
CLANG_VERSION=12
4343
;;
44+
executorch-ubuntu-22.04-mediatek-sdk)
45+
MEDIATEK_SDK=yes
46+
CLANG_VERSION=12
47+
;;
4448
executorch-ubuntu-22.04-clang12-android)
4549
LINTRUNNER=""
4650
CLANG_VERSION=12
@@ -77,6 +81,7 @@ docker build \
7781
--build-arg "BUILD_DOCS=${BUILD_DOCS}" \
7882
--build-arg "ARM_SDK=${ARM_SDK:-}" \
7983
--build-arg "QNN_SDK=${QNN_SDK:-}" \
84+
--build-arg "MEDIATEK_SDK=${MEDIATEK_SDK:-}" \
8085
--build-arg "ANDROID_NDK_VERSION=${ANDROID_NDK_VERSION:-}" \
8186
-f "${OS}"/Dockerfile \
8287
"$@" \

.ci/docker/ubuntu/Dockerfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,5 +85,7 @@ RUN if [ -n "${ARM_SDK}" ]; then git config --global user.email "[email protected]
8585

8686
ARG QNN_SDK
8787

88+
ARG MEDIATEK_SDK
89+
8890
USER ci-user
8991
CMD ["bash"]

.ci/scripts/test_llama.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,12 @@ else
110110
COREML=OFF
111111
fi
112112

113+
if [[ "${MODE}" =~ .*quantize_kv.* ]]; then
114+
QUANTIZE_KV_CACHE=ON
115+
else
116+
QUANTIZE_KV_CACHE=OFF
117+
fi
118+
113119
echo "COREML option ${COREML}"
114120

115121
if [[ "${MODE}" =~ .*qnn.* ]]; then
@@ -249,6 +255,9 @@ if [[ "${QNN}" == "ON" ]]; then
249255
EXPORT_ARGS+=" --tokenizer_path tokenizer.model --pt2e_quantize qnn_16a16w --calibration_tasks wikitext --calibration_limit 1 --calibration_seq_length 128 --calibration_data Once "
250256
fi
251257
fi
258+
if [[ "${QUANTIZE_KV_CACHE}" == "ON" ]]; then
259+
EXPORT_ARGS="${EXPORT_ARGS} --quantize_kv_cache"
260+
fi
252261
# Add dynamically linked library location
253262
$PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}
254263

.github/workflows/docker-builds.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ jobs:
4242
- docker-image-name: executorch-ubuntu-22.04-linter
4343
- docker-image-name: executorch-ubuntu-22.04-arm-sdk
4444
- docker-image-name: executorch-ubuntu-22.04-qnn-sdk
45+
- docker-image-name: executorch-ubuntu-22.04-mediatek-sdk
4546
- docker-image-name: executorch-ubuntu-22.04-clang12-android
4647
env:
4748
DOCKER_IMAGE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/executorch/${{ matrix.docker-image-name }}

.github/workflows/pull.yml

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ jobs:
8686
strategy:
8787
matrix:
8888
dtype: [fp32]
89-
mode: [portable, xnnpack+custom, xnnpack+custom+qe]
89+
mode: [portable, xnnpack+custom, xnnpack+custom+qe,xnnpack+custom+quantize_kv,xnnpack+quantize_kv]
9090
include:
9191
- dtype: bf16
9292
mode: portable
@@ -504,3 +504,21 @@ jobs:
504504
505505
# run llama runner in eager mode
506506
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_runner_eager.sh
507+
508+
test-mediatek-models-linux:
509+
name: test-mediatek-models-linux
510+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
511+
strategy:
512+
fail-fast: false
513+
with:
514+
runner: linux.24xlarge
515+
docker-image: executorch-ubuntu-22.04-mediatek-sdk
516+
submodules: 'true'
517+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
518+
timeout: 90
519+
script: |
520+
# The generic Linux job chooses to use base env, not the one setup by the image
521+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
522+
conda activate "${CONDA_ENV}"
523+
524+
# placeholder for mediatek to add more tests

.github/workflows/trunk.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ jobs:
225225
strategy:
226226
matrix:
227227
dtype: [fp32]
228-
mode: [portable, xnnpack+kv+custom, mps, coreml]
228+
mode: [portable, xnnpack+kv+custom, mps, coreml, xnnpack+custom+quantize_kv]
229229
include:
230230
- dtype: bf16
231231
mode: portable

CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -742,9 +742,9 @@ if(EXECUTORCH_BUILD_PYBIND)
742742
endif()
743743

744744
if(EXECUTORCH_BUILD_XNNPACK)
745-
# need to explicitly specify XNNPACK here otherwise uses XNNPACK symbols
746-
# from libtorch_cpu
747-
list(APPEND _dep_libs xnnpack_backend XNNPACK)
745+
# need to explicitly specify XNNPACK and microkernels-prod
746+
# here otherwise uses XNNPACK and microkernel-prod symbols from libtorch_cpu
747+
list(APPEND _dep_libs xnnpack_backend XNNPACK microkernels-prod)
748748
endif()
749749

750750
# compile options for pybind

backends/arm/arm_backend.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
import logging
1515
import os
16-
from typing import final, List, Optional
16+
from typing import cast, final, List, Optional
1717

1818
import serializer.tosa_serializer as ts
1919
from executorch.backends.arm.arm_vela import vela_compile
@@ -32,6 +32,7 @@
3232
from executorch.exir.backend.backend_details import BackendDetails, PreprocessResult
3333
from executorch.exir.backend.compile_spec_schema import CompileSpec
3434
from torch.export.exported_program import ExportedProgram
35+
from torch.fx import Node
3536

3637
# TOSA backend debug functionality
3738
logger = logging.getLogger(__name__)
@@ -269,6 +270,7 @@ def preprocess( # noqa: C901
269270
node_visitors = get_node_visitors(edge_program, tosa_spec)
270271
input_count = 0
271272
for node in graph_module.graph.nodes:
273+
node = cast(Node, node)
272274
if node.op == "call_function":
273275
process_call_function(node, tosa_graph, node_visitors, tosa_spec)
274276
elif node.op == "placeholder":
@@ -288,9 +290,6 @@ def preprocess( # noqa: C901
288290
"The rank of the input order is not equal to amount of input tensors"
289291
)
290292

291-
# TODO: It would be awesome if this dump could somehow be done on top level and not here.
292-
# Problem is that the desc.json has to be created on the tosa_graph object, which we can't
293-
# access from top level.
294293
if artifact_path:
295294
tag = _get_first_delegation_tag(graph_module)
296295
dbg_tosa_dump(
@@ -311,6 +310,4 @@ def preprocess( # noqa: C901
311310
else:
312311
raise RuntimeError(f"Unknown format {output_format}")
313312

314-
# Continueing from above. Can I put tosa_graph into this function?
315-
# debug_handle_map = ...
316313
return PreprocessResult(processed_bytes=binary)

backends/arm/test/common.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -74,19 +74,15 @@ def get_tosa_compile_spec_unbuilt(
7474
the compile spec before calling .build() to finalize it.
7575
"""
7676
if not custom_path:
77-
intermediate_path = maybe_get_tosa_collate_path() or tempfile.mkdtemp(
78-
prefix="arm_tosa_"
79-
)
80-
else:
81-
intermediate_path = custom_path
77+
custom_path = maybe_get_tosa_collate_path()
8278

83-
if not os.path.exists(intermediate_path):
84-
os.makedirs(intermediate_path, exist_ok=True)
79+
if custom_path is not None:
80+
os.makedirs(custom_path, exist_ok=True)
8581
compile_spec_builder = (
8682
ArmCompileSpecBuilder()
8783
.tosa_compile_spec(tosa_version)
8884
.set_permute_memory_format(permute_memory_to_nhwc)
89-
.dump_intermediate_artifacts_to(intermediate_path)
85+
.dump_intermediate_artifacts_to(custom_path)
9086
)
9187

9288
return compile_spec_builder

backends/arm/test/misc/test_debug_feats.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,9 @@ def test_numerical_diff_prints(self):
111111
model,
112112
example_inputs=model.get_inputs(),
113113
compile_spec=common.get_tosa_compile_spec(
114-
"TOSA-0.80.0+MI", permute_memory_to_nhwc=True
114+
"TOSA-0.80.0+MI",
115+
permute_memory_to_nhwc=True,
116+
custom_path=tempfile.mkdtemp("diff_print_test"),
115117
),
116118
)
117119
.export()

0 commit comments

Comments
 (0)