Skip to content

Commit 4ae8bb1

Browse files
committed
Update base for Update on "[ET-VK] Store unique ptr to StagingBuffer and SymInt in Value instead of inlined object, to reduce Value struct size from 80 to 32 bytes."
This diff aims to reduce the size of the `Value` struct from 80 bytes to 32 bytes by storing a unique pointer to `StagingBuffer` and `SymInt` instead of inlining the objects. Differential Revision: [D66677759](https://our.internmc.facebook.com/intern/diff/D66677759/) [ghstack-poisoned]
2 parents e1e55c0 + b9db0a3 commit 4ae8bb1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+8833
-15027
lines changed

.ci/docker/build.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ case "${IMAGE_NAME}" in
4141
QNN_SDK=yes
4242
CLANG_VERSION=12
4343
;;
44+
executorch-ubuntu-22.04-mediatek-sdk)
45+
MEDIATEK_SDK=yes
46+
CLANG_VERSION=12
47+
;;
4448
executorch-ubuntu-22.04-clang12-android)
4549
LINTRUNNER=""
4650
CLANG_VERSION=12
@@ -77,6 +81,7 @@ docker build \
7781
--build-arg "BUILD_DOCS=${BUILD_DOCS}" \
7882
--build-arg "ARM_SDK=${ARM_SDK:-}" \
7983
--build-arg "QNN_SDK=${QNN_SDK:-}" \
84+
--build-arg "MEDIATEK_SDK=${MEDIATEK_SDK:-}" \
8085
--build-arg "ANDROID_NDK_VERSION=${ANDROID_NDK_VERSION:-}" \
8186
-f "${OS}"/Dockerfile \
8287
"$@" \

.ci/docker/ubuntu/Dockerfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,5 +85,7 @@ RUN if [ -n "${ARM_SDK}" ]; then git config --global user.email "[email protected]
8585

8686
ARG QNN_SDK
8787

88+
ARG MEDIATEK_SDK
89+
8890
USER ci-user
8991
CMD ["bash"]

.ci/scripts/test_llama.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,12 @@ else
110110
COREML=OFF
111111
fi
112112

113+
if [[ "${MODE}" =~ .*quantize_kv.* ]]; then
114+
QUANTIZE_KV_CACHE=ON
115+
else
116+
QUANTIZE_KV_CACHE=OFF
117+
fi
118+
113119
echo "COREML option ${COREML}"
114120

115121
if [[ "${MODE}" =~ .*qnn.* ]]; then
@@ -249,6 +255,9 @@ if [[ "${QNN}" == "ON" ]]; then
249255
EXPORT_ARGS+=" --tokenizer_path tokenizer.model --pt2e_quantize qnn_16a16w --calibration_tasks wikitext --calibration_limit 1 --calibration_seq_length 128 --calibration_data Once "
250256
fi
251257
fi
258+
if [[ "${QUANTIZE_KV_CACHE}" == "ON" ]]; then
259+
EXPORT_ARGS="${EXPORT_ARGS} --quantize_kv_cache"
260+
fi
252261
# Add dynamically linked library location
253262
$PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}
254263

.github/workflows/docker-builds.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ jobs:
4242
- docker-image-name: executorch-ubuntu-22.04-linter
4343
- docker-image-name: executorch-ubuntu-22.04-arm-sdk
4444
- docker-image-name: executorch-ubuntu-22.04-qnn-sdk
45+
- docker-image-name: executorch-ubuntu-22.04-mediatek-sdk
4546
- docker-image-name: executorch-ubuntu-22.04-clang12-android
4647
env:
4748
DOCKER_IMAGE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/executorch/${{ matrix.docker-image-name }}

.github/workflows/pull.yml

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ jobs:
8686
strategy:
8787
matrix:
8888
dtype: [fp32]
89-
mode: [portable, xnnpack+custom, xnnpack+custom+qe]
89+
mode: [portable, xnnpack+custom, xnnpack+custom+qe,xnnpack+custom+quantize_kv,xnnpack+quantize_kv]
9090
include:
9191
- dtype: bf16
9292
mode: portable
@@ -504,3 +504,21 @@ jobs:
504504
505505
# run llama runner in eager mode
506506
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_runner_eager.sh
507+
508+
test-mediatek-models-linux:
509+
name: test-mediatek-models-linux
510+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
511+
strategy:
512+
fail-fast: false
513+
with:
514+
runner: linux.24xlarge
515+
docker-image: executorch-ubuntu-22.04-mediatek-sdk
516+
submodules: 'true'
517+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
518+
timeout: 90
519+
script: |
520+
# The generic Linux job chooses to use base env, not the one setup by the image
521+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
522+
conda activate "${CONDA_ENV}"
523+
524+
# placeholder for mediatek to add more tests

.github/workflows/trunk.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ jobs:
225225
strategy:
226226
matrix:
227227
dtype: [fp32]
228-
mode: [portable, xnnpack+kv+custom, mps, coreml]
228+
mode: [portable, xnnpack+kv+custom, mps, coreml, xnnpack+custom+quantize_kv]
229229
include:
230230
- dtype: bf16
231231
mode: portable

CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -742,9 +742,9 @@ if(EXECUTORCH_BUILD_PYBIND)
742742
endif()
743743

744744
if(EXECUTORCH_BUILD_XNNPACK)
745-
# need to explicitly specify XNNPACK here otherwise uses XNNPACK symbols
746-
# from libtorch_cpu
747-
list(APPEND _dep_libs xnnpack_backend XNNPACK)
745+
# need to explicitly specify XNNPACK and microkernels-prod
746+
# here otherwise uses XNNPACK and microkernel-prod symbols from libtorch_cpu
747+
list(APPEND _dep_libs xnnpack_backend XNNPACK microkernels-prod)
748748
endif()
749749

750750
# compile options for pybind

backends/arm/arm_backend.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
import logging
1515
import os
16-
from typing import final, List, Optional
16+
from typing import cast, final, List, Optional
1717

1818
import serializer.tosa_serializer as ts
1919
from executorch.backends.arm.arm_vela import vela_compile
@@ -32,6 +32,7 @@
3232
from executorch.exir.backend.backend_details import BackendDetails, PreprocessResult
3333
from executorch.exir.backend.compile_spec_schema import CompileSpec
3434
from torch.export.exported_program import ExportedProgram
35+
from torch.fx import Node
3536

3637
# TOSA backend debug functionality
3738
logger = logging.getLogger(__name__)
@@ -269,6 +270,7 @@ def preprocess( # noqa: C901
269270
node_visitors = get_node_visitors(edge_program, tosa_spec)
270271
input_count = 0
271272
for node in graph_module.graph.nodes:
273+
node = cast(Node, node)
272274
if node.op == "call_function":
273275
process_call_function(node, tosa_graph, node_visitors, tosa_spec)
274276
elif node.op == "placeholder":
@@ -288,9 +290,6 @@ def preprocess( # noqa: C901
288290
"The rank of the input order is not equal to amount of input tensors"
289291
)
290292

291-
# TODO: It would be awesome if this dump could somehow be done on top level and not here.
292-
# Problem is that the desc.json has to be created on the tosa_graph object, which we can't
293-
# access from top level.
294293
if artifact_path:
295294
tag = _get_first_delegation_tag(graph_module)
296295
dbg_tosa_dump(
@@ -311,6 +310,4 @@ def preprocess( # noqa: C901
311310
else:
312311
raise RuntimeError(f"Unknown format {output_format}")
313312

314-
# Continueing from above. Can I put tosa_graph into this function?
315-
# debug_handle_map = ...
316313
return PreprocessResult(processed_bytes=binary)

backends/arm/test/common.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -74,19 +74,15 @@ def get_tosa_compile_spec_unbuilt(
7474
the compile spec before calling .build() to finalize it.
7575
"""
7676
if not custom_path:
77-
intermediate_path = maybe_get_tosa_collate_path() or tempfile.mkdtemp(
78-
prefix="arm_tosa_"
79-
)
80-
else:
81-
intermediate_path = custom_path
77+
custom_path = maybe_get_tosa_collate_path()
8278

83-
if not os.path.exists(intermediate_path):
84-
os.makedirs(intermediate_path, exist_ok=True)
79+
if custom_path is not None:
80+
os.makedirs(custom_path, exist_ok=True)
8581
compile_spec_builder = (
8682
ArmCompileSpecBuilder()
8783
.tosa_compile_spec(tosa_version)
8884
.set_permute_memory_format(permute_memory_to_nhwc)
89-
.dump_intermediate_artifacts_to(intermediate_path)
85+
.dump_intermediate_artifacts_to(custom_path)
9086
)
9187

9288
return compile_spec_builder

backends/arm/test/conftest.py

Lines changed: 11 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
import shutil
1212
import subprocess
1313
import sys
14-
from enum import auto, Enum
1514
from typing import Any
1615

1716
import pytest
@@ -22,30 +21,24 @@
2221
"""
2322

2423

25-
class arm_test_options(Enum):
26-
quantize_io = auto()
27-
corstone_fvp = auto()
28-
fast_fvp = auto()
29-
30-
31-
_test_options: dict[arm_test_options, Any] = {}
32-
3324
# ==== Pytest hooks ====
3425

3526

3627
def pytest_configure(config):
28+
pytest._test_options = {}
29+
3730
if config.option.arm_quantize_io:
3831
_load_libquantized_ops_aot_lib()
39-
_test_options[arm_test_options.quantize_io] = True
32+
pytest._test_options["quantize_io"] = True
4033
if config.option.arm_run_corstoneFVP:
4134
corstone300_exists = shutil.which("FVP_Corstone_SSE-300_Ethos-U55")
4235
corstone320_exists = shutil.which("FVP_Corstone_SSE-320")
4336
if not (corstone300_exists and corstone320_exists):
4437
raise RuntimeError(
4538
"Tests are run with --arm_run_corstoneFVP but corstone FVP is not installed."
4639
)
47-
_test_options[arm_test_options.corstone_fvp] = True
48-
_test_options[arm_test_options.fast_fvp] = config.option.fast_fvp
40+
pytest._test_options["corstone_fvp"] = True
41+
pytest._test_options["fast_fvp"] = config.option.fast_fvp
4942
logging.basicConfig(level=logging.INFO, stream=sys.stdout)
5043

5144

@@ -131,9 +124,7 @@ def expectedFailureOnFVP(test_item):
131124
# ==== End of Custom Pytest decorators =====
132125

133126

134-
def is_option_enabled(
135-
option: str | arm_test_options, fail_if_not_enabled: bool = False
136-
) -> bool:
127+
def is_option_enabled(option: str, fail_if_not_enabled: bool = False) -> bool:
137128
"""
138129
Returns whether an option is successfully enabled, i.e. if the flag was
139130
given to pytest and the necessary requirements are available.
@@ -144,10 +135,8 @@ def is_option_enabled(
144135
The optional parameter 'fail_if_not_enabled' makes the function raise
145136
a RuntimeError instead of returning False.
146137
"""
147-
if isinstance(option, str):
148-
option = arm_test_options[option.lower()]
149138

150-
if option in _test_options and _test_options[option]:
139+
if option in pytest._test_options and pytest._test_options[option]:
151140
return True
152141
else:
153142
if fail_if_not_enabled:
@@ -156,15 +145,15 @@ def is_option_enabled(
156145
return False
157146

158147

159-
def get_option(option: arm_test_options) -> Any | None:
148+
def get_option(option: str) -> Any | None:
160149
"""
161150
Returns the value of an pytest option if it is set, otherwise None.
162151
163152
Args:
164-
option (arm_test_options): The option to check for.
153+
option (str): The option to check for.
165154
"""
166-
if option in _test_options:
167-
return _test_options[option]
155+
if option in pytest._test_options:
156+
return pytest._test_options[option]
168157
return None
169158

170159

0 commit comments

Comments
 (0)