Skip to content

Commit 59da214

Browse files
committed
Update base for Update on "[ET-VK][Llama] Apply XNNPACK partitoner as well when lowering to Vulkan"
## Context The final logit linear layer in the Transformer architecture has extremely large tensors, since the output and weight tensors will have a tensor with dim equal to the vocabulary size, which may be extremely large. Because of this, image textures cannot be used to execute the op when running with the Vulkan delegate, so an implementation using buffer based tensors must be used. Unfortunately, Vulkan does not have a performant implementation of linear with buffer based tensors at the moment. As a result, if this final linear layer is executed in Vulkan, model inference is extremely slow. ## Changes The below diff will prevent the final logit linear layer from being delegated to Vulkan by enforcing a GPU buffer limit. This diff modifies the export llama script to apply the XNNPACK partitioner after the Vulkan partitioner if lowering to Vulkan, to ensure that remaining ops will be accelerated with XNNPACK. 4 bit quantization will also apply an additional Quantizer after applying the Vulkan quantizer (which will skip the final logit linear layer) so that the final logit linear can be quantized as well. ## Long Term This is a temporary measure while an optimized buffer based linear implementation is developed. Once the Vulkan implementation achieves parity with XNNPACK, the final logit linear will be delegated to Vulkan once more. Differential Revision: [D65899827](https://our.internmc.facebook.com/intern/diff/D65899827/) [ghstack-poisoned]
2 parents 75850c5 + ecdc007 commit 59da214

File tree

77 files changed

+3038
-277
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

77 files changed

+3038
-277
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
c8a648d4dffb9f0133ff4a2ea0e660b42105d3ad
1+
19eff28ff3f19b50da46f5a9ff5f4d4d213806fe

.ci/scripts/gather_test_models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
"ic4": "linux.12xlarge",
2525
"resnet50": "linux.12xlarge",
2626
"llava": "linux.12xlarge",
27+
"llama3_2_vision_encoder": "linux.12xlarge",
2728
# This one causes timeout on smaller runner, the root cause is unclear (T161064121)
2829
"dl3": "linux.12xlarge",
2930
"emformer_join": "linux.12xlarge",

.ci/scripts/test_model.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ test_model() {
7777
# Install requirements for export_llama
7878
bash examples/models/llama/install_requirements.sh
7979
# Test export_llama script: python3 -m examples.models.llama.export_llama
80-
"${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama -c examples/models/llama/params/demo_rand_params.pth -p examples/models/llama/params/demo_config.json
80+
"${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -c examples/models/llama/params/demo_rand_params.pth -p examples/models/llama/params/demo_config.json
8181
run_portable_executor_runner
8282
rm "./${MODEL_NAME}.pte"
8383
fi

.github/workflows/pull.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,10 @@ jobs:
7272
conda activate "${CONDA_ENV}"
7373
7474
MODEL_NAME=${{ matrix.model }}
75+
# Install requirements for llama vision
76+
if [[ "$MODEL_NAME" == "llama3_2_vision_encoder" ]]; then
77+
bash examples/models/llama3_2_vision/install_requirements.sh
78+
fi
7579
BUILD_TOOL=${{ matrix.build-tool }}
7680
BACKEND=${{ matrix.backend }}
7781
DEMO_BACKEND_DELEGATION=${{ matrix.demo_backend_delegation }}

.github/workflows/trunk.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,11 @@ jobs:
5858
bash .ci/scripts/setup-conda.sh
5959
# Setup MacOS dependencies as there is no Docker support on MacOS atm
6060
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
61-
# Build and test xecutorch
61+
# Install requirements for llama vision
62+
if [[ "$MODEL_NAME" == "llama3_2_vision_encoder" ]]; then
63+
${CONDA_RUN} bash examples/models/llama3_2_vision/install_requirements.sh
64+
fi
65+
# Build and test executorch
6266
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" "${DEMO_BACKEND_DELEGATION}"
6367
6468
test-custom-ops-macos:

.lintrunner.toml

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,3 +196,91 @@ command = [
196196
'@{{PATHSFILE}}',
197197
]
198198
is_formatter = true
199+
200+
[[linter]]
201+
code = 'NOSTDINC'
202+
include_patterns = [
203+
"**/*.c",
204+
"**/*.cpp",
205+
"**/*.h",
206+
"**/*.hpp",
207+
]
208+
exclude_patterns = [
209+
'**/devtools/**',
210+
'**/test/**',
211+
'**/testing_util/**',
212+
'**/third-party/**',
213+
'backends/**',
214+
'devtools/**',
215+
'examples/**',
216+
'extension/**',
217+
'kernels/optimized/**',
218+
'scripts/**',
219+
'third-party/**',
220+
'util/**',
221+
]
222+
command = [
223+
'python',
224+
'-m',
225+
'lintrunner_adapters',
226+
'run',
227+
'grep_linter',
228+
'--pattern=([^\\S\r\n]*#include\s*<(deque|exception|forward_list|functional|list|map|multimap|multiset|priority_queue|queue|set|stack|string|unordered_map|unordered_multimap|unordered_multiset|unordered_set|vector)>)',
229+
'--linter-name=NOSTDINC',
230+
'--error-name=Standard C++ container include in core',
231+
"""--error-description=\
232+
Standard library containers should not be included in ExecuTorch core \
233+
because they may call malloc, which is not allowed in core. \
234+
""",
235+
'--',
236+
'@{{PATHSFILE}}',
237+
]
238+
239+
[[linter]]
240+
code = 'NOTORCHINC'
241+
include_patterns = [
242+
"**/*.c",
243+
"**/*.cpp",
244+
"**/*.h",
245+
"**/*.hpp",
246+
]
247+
exclude_patterns = [
248+
'**/devtools/**',
249+
'**/fb/**',
250+
'**/test/**',
251+
'**/tests/**',
252+
'**/testing_util/**',
253+
'**/third-party/**',
254+
'backends/**',
255+
'codegen/templates/RegisterDispatchKeyCustomOps.cpp',
256+
'codegen/templates/RegisterSchema.cpp',
257+
'devtools/**',
258+
'examples/**',
259+
'exir/verification/bindings.cpp',
260+
'extension/**',
261+
'kernels/optimized/**',
262+
'runtime/core/exec_aten/**',
263+
'runtime/executor/tensor_parser_aten.cpp',
264+
'scripts/**',
265+
'test/**',
266+
'third-party/**',
267+
'util/**',
268+
]
269+
command = [
270+
'python',
271+
'-m',
272+
'lintrunner_adapters',
273+
'run',
274+
'grep_linter',
275+
'--pattern=#include\s+[<"](aten/|ATen/|torch/)',
276+
'--linter-name=NOTORCHINC',
277+
'--error-name=ATen or torch include',
278+
"""--error-description=\
279+
PyTorch includes in ExecuTorch core are prohibited to prevent \
280+
accidentally breaking core's requirements; please make sure this \
281+
header complies (e.g., no streams/malloc/syscalls) and then include \
282+
a patch to update this linter.\
283+
""",
284+
'--',
285+
'@{{PATHSFILE}}',
286+
]

CMakeLists.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,19 @@ message(STATUS "Using python executable '${PYTHON_EXECUTABLE}'")
324324
set(_common_compile_options -Wno-deprecated-declarations -fPIC)
325325

326326
# Let files say "include <executorch/path/to/header.h>".
327+
# TODO(#6475): This requires/assumes that the repo lives in a directory named
328+
# exactly `executorch`. Check the assumption first. Remove this check once we
329+
# stop relying on the assumption.
330+
cmake_path(GET CMAKE_CURRENT_SOURCE_DIR FILENAME _repo_dir_name)
331+
if(NOT "${_repo_dir_name}" STREQUAL "executorch")
332+
message(
333+
FATAL_ERROR
334+
"The ExecuTorch repo must be cloned into a directory named exactly "
335+
"`executorch`; found `${_repo_dir_name}`. See "
336+
"https://github.com/pytorch/executorch/issues/6475 for progress on a "
337+
"fix for this restriction."
338+
)
339+
endif()
327340
set(_common_include_directories ${CMAKE_CURRENT_SOURCE_DIR}/..)
328341

329342
#

backends/arm/TARGETS

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,27 @@ python_library(
2626
"fbsource//third-party/serialization_lib/python/serializer:serializer",
2727
"fbsource//third-party/serialization_lib/python/tosa:tosa",
2828
":arm_vela",
29+
":process_node",
2930
"//executorch/backends/arm/operators:lib",
3031
"//executorch/backends/arm/operators:node_visitor",
3132
"//executorch/backends/arm/_passes:passes",
3233
],
3334
)
3435

36+
python_library(
37+
name = "process_node",
38+
srcs = ["process_node.py"],
39+
typing = True,
40+
deps = [
41+
"fbsource//third-party/serialization_lib/python/tosa:tosa",
42+
"//executorch/backends/arm/operators:node_visitor",
43+
"//executorch/backends/arm:tosa_mapping",
44+
"//executorch/backends/arm:tosa_quant_utils",
45+
"//executorch/backends/arm:tosa_utils",
46+
"//executorch/exir:lib",
47+
],
48+
)
49+
3550
python_library(
3651
name = "arm_vela",
3752
srcs = [

backends/arm/arm_backend.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,17 @@
1818
import serializer.tosa_serializer as ts
1919
from executorch.backends.arm.arm_vela import vela_compile
2020
from executorch.backends.arm.operators.node_visitor import get_node_visitors
21-
from executorch.backends.arm.operators.op_output import process_output
22-
from executorch.backends.arm.operators.op_placeholder import process_placeholder
2321

2422
from executorch.backends.arm.tosa_specification import TosaSpecification
2523
from executorch.backends.arm._passes.arm_pass_manager import (
2624
ArmPassManager,
2725
) # usort: skip
28-
from executorch.backends.arm.tosa_utils import (
29-
dbg_fail,
30-
dbg_tosa_dump,
26+
from executorch.backends.arm.process_node import (
3127
process_call_function,
28+
process_output,
29+
process_placeholder,
3230
)
31+
from executorch.backends.arm.tosa_utils import dbg_fail, dbg_tosa_dump
3332
from executorch.exir.backend.backend_details import BackendDetails, PreprocessResult
3433
from executorch.exir.backend.compile_spec_schema import CompileSpec
3534
from torch.export.exported_program import ExportedProgram

backends/arm/operators/op_output.py

Lines changed: 0 additions & 21 deletions
This file was deleted.

0 commit comments

Comments
 (0)