Skip to content

Commit 668e523

Browse files
Merge branch 'main' into change-1031069
2 parents 7bdd6cb + 4266820 commit 668e523

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+516
-256
lines changed

.ci/scripts/build-mediatek-sdk.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ build_neuron_backend() {
1414
export NEURON_BUFFER_ALLOCATOR_LIB=${MEDIATEK_SDK_ROOT}/libneuron_buffer_allocator.so
1515
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"
1616

17-
1817
cd ${EXECUTORCH_ROOT}
1918
./backends/mediatek/scripts/mtk_build.sh
19+
./examples/mediatek/mtk_build_examples.sh
2020
}
2121

2222
build_neuron_backend

.ci/scripts/test_llama_lora.sh

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -exu
9+
# shellcheck source=/dev/null
10+
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
11+
12+
cmake_install_executorch_libraries() {
13+
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
14+
rm -rf cmake-out
15+
retry cmake --preset llm \
16+
-DCMAKE_INSTALL_PREFIX=cmake-out \
17+
-DCMAKE_BUILD_TYPE=Release
18+
cmake --build cmake-out -j9 --target install --config Release
19+
}
20+
21+
cmake_build_llama_runner() {
22+
echo "Building llama runner"
23+
pushd extension/llm/tokenizers
24+
echo "Updating tokenizers submodule"
25+
git submodule update --init
26+
popd
27+
dir="examples/models/llama"
28+
retry cmake \
29+
-DBUILD_TESTING=OFF \
30+
-DCMAKE_INSTALL_PREFIX=cmake-out \
31+
-DCMAKE_BUILD_TYPE=Release \
32+
-Bcmake-out/${dir} \
33+
${dir}
34+
cmake --build cmake-out/${dir} -j9 --config Release
35+
}
36+
37+
cleanup_files() {
38+
echo "Deleting downloaded and generated files"
39+
rm -rf "${DOWNLOADED_PATH}/"
40+
rm result.txt
41+
}
42+
43+
# Download model artifacts from HF Hub.
44+
# Hosting in personal repo for now.
45+
HF_MODEL_REPO="lucylq/llama3_1B_lora"
46+
DOWNLOADED_PATH=$(
47+
bash "$(dirname "${BASH_SOURCE[0]}")/download_hf_hub.sh" \
48+
--model_id "${HF_MODEL_REPO}" \
49+
--files "adapter_config.json" "adapter_model.pt" "consolidated.00.pth" "params.json" "tokenizer.model"
50+
)
51+
EXPORTED_MODEL_NAME="llama_3_2_1B_lora.pte"
52+
# Export model.
53+
$PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
54+
base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
55+
base.params="${DOWNLOADED_PATH}/params.json" \
56+
base.adapter_checkpoint="${DOWNLOADED_PATH}/adapter_model.pt" \
57+
base.adapter_config="${DOWNLOADED_PATH}/adapter_config.json" \
58+
base.tokenizer_path="${DOWNLOADED_PATH}/tokenizer.model" \
59+
model.use_kv_cache=true \
60+
model.use_sdpa_with_kv_cache=true \
61+
model.dtype_override="fp32" \
62+
backend.xnnpack.enabled=true \
63+
backend.xnnpack.extended_ops=true \
64+
export.output_name="${EXPORTED_MODEL_NAME}"
65+
66+
# Build llama runner.
67+
cmake_install_executorch_libraries
68+
cmake_build_llama_runner
69+
70+
PROMPT="What happens if you eat watermelon seeds?"
71+
# Run llama runner
72+
RUNTIME_ARGS="--model_path=${EXPORTED_MODEL_NAME} --tokenizer_path=${DOWNLOADED_PATH}/tokenizer.model --temperature=0 --seq_len=20 --warmup=1"
73+
74+
NOW=$(date +"%H:%M:%S")
75+
echo "Starting to run llama runner at ${NOW}"
76+
# shellcheck source=/dev/null
77+
cmake-out/examples/models/llama/llama_main --prompt="${PROMPT}" ${RUNTIME_ARGS} > result.txt
78+
NOW=$(date +"%H:%M:%S")
79+
echo "Finished at ${NOW}"
80+
81+
RESULT=$(cat result.txt)
82+
EXPECTED_PREFIX="What happens if you eat watermelon seeds? Watermelon seeds are a good source of vitamin C,"
83+
84+
if [[ "${RESULT}" == "${EXPECTED_PREFIX}"* ]]; then
85+
echo "Expected result prefix: ${EXPECTED_PREFIX}"
86+
echo "Actual result: ${RESULT}"
87+
echo "Success"
88+
cleanup_files
89+
else
90+
echo "Expected result prefix: ${EXPECTED_PREFIX}"
91+
echo "Actual result: ${RESULT}"
92+
echo "Failure; results not the same"
93+
94+
cleanup_files
95+
exit 1
96+
fi

.github/workflows/pull.yml

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -687,6 +687,36 @@ jobs:
687687
# run llama runner in eager mode
688688
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_runner_eager.sh
689689
690+
test-llama-lora-linux:
691+
name: test-llama-lora-linux
692+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
693+
permissions:
694+
id-token: write
695+
contents: read
696+
strategy:
697+
fail-fast: false
698+
with:
699+
runner: linux.24xlarge
700+
docker-image: ci-image:executorch-ubuntu-22.04-clang12
701+
submodules: 'recursive'
702+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
703+
timeout: 90
704+
script: |
705+
# The generic Linux job chooses to use base env, not the one setup by the image
706+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
707+
conda activate "${CONDA_ENV}"
708+
709+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
710+
711+
# Install llama requirements
712+
bash examples/models/llama/install_requirements.sh
713+
714+
# install a recent version of torchtune.
715+
PYTHON_EXECUTABLE=python python -m pip install torchtune==0.7.0.dev20250730 --extra-index-url https://download.pytorch.org/whl/nightly/cpu
716+
717+
# run llama runner in eager mode
718+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_lora.sh
719+
690720
test-mediatek-models-linux:
691721
name: test-mediatek-models-linux
692722
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -849,7 +849,7 @@ if(NOT EXECUTORCH_SELECT_OPS_YAML STREQUAL ""
849849
LIB_NAME
850850
"executorch_selected_kernels"
851851
OPS_SCHEMA_YAML
852-
"${EXECUTORCH_SELECT_OPS_LIB}"
852+
"${EXECUTORCH_SELECT_OPS_YAML}"
853853
ROOT_OPS
854854
"${EXECUTORCH_SELECT_OPS_LIST}"
855855
INCLUDE_ALL_OPS

backends/cadence/aot/compiler.py

Lines changed: 37 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -172,29 +172,18 @@ def fuse_pt2(
172172
return converted_graph_module
173173

174174

175-
def quantize_pt2(
175+
# Note: quantizer is not optional here to force the user to supply a quantizer
176+
# and ensure consistency is more likely to be maintained.
177+
def get_fake_quant_model(
176178
model: torch.nn.Module,
177179
inputs: tuple[object, ...],
178-
quantizer: Optional[CadenceQuantizer] = None,
180+
quantizer: CadenceQuantizer,
179181
calibration_data: Optional[list[tuple[object, ...]]] = None,
180182
dump_graphs: bool = False,
181-
) -> ExportedProgram:
182-
"""
183-
Trace, prepare, convert and fuse the model using the given quantizer.
184-
If calibration data is provided, it will be used to calibrate the model. If
185-
not, the inputs will be used for calibration instead, which is useful for
186-
unit tests but should not be used for end-to-end use cases.
187-
Returns a GraphModule with the quantized model.
188-
Note: this function should not be called directly in general. Please use
189-
quantize_and_export_to_executorch for most needs.
190-
"""
183+
) -> torch.fx.GraphModule:
191184
# Make the model inference mode by calling model.eval()
192185
model.eval()
193186

194-
# Instantiate the quantizer to CadenceQuantizer if not supplied
195-
if not quantizer:
196-
quantizer = CadenceDefaultQuantizer()
197-
198187
program = trace(model, inputs, dump_graphs=dump_graphs)
199188

200189
if dump_graphs:
@@ -214,6 +203,37 @@ def quantize_pt2(
214203

215204
# Get converted graph module
216205
converted_gm = convert_pt2(prepared_gm, dump_graphs=dump_graphs)
206+
return converted_gm
207+
208+
209+
def quantize_pt2(
210+
model: torch.nn.Module,
211+
inputs: tuple[object, ...],
212+
quantizer: Optional[CadenceQuantizer] = None,
213+
calibration_data: Optional[list[tuple[object, ...]]] = None,
214+
dump_graphs: bool = False,
215+
) -> ExportedProgram:
216+
"""
217+
Trace, prepare, convert and fuse the model using the given quantizer.
218+
If calibration data is provided, it will be used to calibrate the model. If
219+
not, the inputs will be used for calibration instead, which is useful for
220+
unit tests but should not be used for end-to-end use cases.
221+
Returns a GraphModule with the quantized model.
222+
Note: this function should not be called directly in general. Please use
223+
quantize_and_export_to_executorch for most needs.
224+
"""
225+
# Instantiate the quantizer to CadenceQuantizer if not supplied
226+
if not quantizer:
227+
quantizer = CadenceDefaultQuantizer()
228+
229+
# Get the converted (aka fake quant) graph module
230+
converted_gm = get_fake_quant_model(
231+
model,
232+
inputs,
233+
quantizer=quantizer,
234+
calibration_data=calibration_data,
235+
dump_graphs=dump_graphs,
236+
)
217237

218238
# Get fused model
219239
fused_gm = fuse_pt2(converted_gm, quantizer)
@@ -237,7 +257,7 @@ def quantize_pt2(
237257
torch.ops.aten.angle.default,
238258
torch.ops.aten.rms_norm.default,
239259
]
240-
TO_EDGE_PRESERVE_OPS: list[torch._ops.OpOverload, ...] = [
260+
TO_EDGE_PRESERVE_OPS: list[torch._ops.OpOverload] = [
241261
torch.ops.aten.rms_norm.default,
242262
]
243263

backends/cadence/aot/pass_utils.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from executorch.backends.cadence.aot.utils import get_edge_overload_packet
1414

1515
from executorch.exir.dialects.edge._ops import EdgeOpOverload, EdgeOpOverloadPacket
16-
from executorch.exir.pass_base import PassBase
16+
from executorch.exir.pass_base import PassBase, PassResult
1717

1818
from torch._ops import OpOverloadPacket
1919

@@ -224,3 +224,8 @@ def set_arg(
224224
node.update_arg(idx, value)
225225
else:
226226
node.update_kwarg(kwarg_name, value)
227+
228+
229+
def none_throws(x: Optional[PassResult]) -> PassResult:
230+
assert x is not None
231+
return x

backends/cadence/aot/replace_ops.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
)
4040
from executorch.backends.cadence.aot.pass_utils import (
4141
CadencePassAttribute,
42+
none_throws,
4243
register_cadence_pass,
4344
)
4445
from executorch.backends.cadence.aot.remove_ops import RemoveNopSelectOpPass
@@ -1661,8 +1662,8 @@ def call_operator(self, op, args, kwargs, meta):
16611662

16621663
def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
16631664
result = super().call(graph_module)
1664-
result = FuseCascadedViewOps()(result.graph_module)
1665-
assert result is not None
1665+
fuse_cascaded_result = none_throws(FuseCascadedViewOps()(result.graph_module))
1666+
result = none_throws(ExportPass()(fuse_cascaded_result.graph_module))
16661667
return result
16671668

16681669

backends/mediatek/scripts/mtk_build.sh

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,20 @@
44
set -e
55

66
# Define the directory where CMakeLists.txt is located
7-
SOURCE_DIR=$(realpath "$(dirname "$0")/../../..")
7+
EXECUTORCH_ROOT=$(realpath "$(dirname "$0")/../../..")
8+
echo EXECUTORCH_ROOT=${EXECUTORCH_ROOT}
89

910
# Check if the ANDROID_NDK environment variable is set
1011
if [ -z "$ANDROID_NDK" ]; then
1112
echo "Error: ANDROID_NDK environment variable is not set." >&2
1213
exit 1
1314
fi
1415

15-
# Create and enter the build directory
16+
# Enter the build directory
17+
cd "$EXECUTORCH_ROOT"
18+
1619
# Set build directory
1720
build_dir="cmake-android-out"
18-
cd "$SOURCE_DIR"
1921
rm -rf "${build_dir}"
2022

2123
# Configure the project with CMake
@@ -25,6 +27,9 @@ cmake -DCMAKE_INSTALL_PREFIX="${build_dir}" \
2527
-DANDROID_ABI=arm64-v8a \
2628
-DANDROID_NATIVE_API_LEVEL=26 \
2729
-DANDROID_PLATFORM=android-26 \
30+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
31+
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
32+
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
2833
-DEXECUTORCH_BUILD_NEURON=ON \
2934
-B"${build_dir}"
3035

backends/vulkan/runtime/VulkanBackend.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -509,13 +509,6 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
509509

510510
compute_graph->prepack();
511511

512-
// If dynamic shapes are not expected, then the command buffer only needs to
513-
// be encoded once. Otherwise, wait until the first inference to encode the
514-
// the command buffer, when actual input shapes are known.
515-
if (!compute_graph->graphconfig().expect_dynamic_shapes) {
516-
compute_graph->encode_execute();
517-
}
518-
519512
return Error::Ok;
520513
}
521514

backends/vulkan/runtime/api/Context.cpp

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,7 @@ Context::Context(vkapi::Adapter* adapter, const ContextConfig& config)
3838
querypool_(config_.query_pool_config, nullptr),
3939
// Command buffer submission
4040
cmd_mutex_{},
41-
cmd_(VK_NULL_HANDLE, VK_NULL_HANDLE, 0u),
42-
prev_semaphore_(VK_NULL_HANDLE),
41+
cmd_(VK_NULL_HANDLE, 0u),
4342
submit_count_{0u},
4443
// Memory Management
4544
buffer_clearlist_mutex_{},
@@ -196,21 +195,14 @@ void Context::register_blit(
196195
}
197196

198197
void Context::submit_cmd_to_gpu(VkFence fence_handle, const bool final_use) {
199-
// Wait semaphore would be previous command buffer's signal semaphore
200-
VkSemaphore wait_semaphore = prev_semaphore_;
201-
// Signal semaphore for the the current command buffer
202-
VkSemaphore signal_semaphore = cmd_.get_signal_semaphore();
203-
// Next command buffer would wait on this command buffer's signal semaphore
204-
prev_semaphore_ = signal_semaphore;
205-
206198
if (cmd_) {
207199
cmd_.end();
208200
adapter_p_->submit_cmd(
209201
queue_,
210202
cmd_.get_submit_handle(final_use),
211203
fence_handle,
212-
wait_semaphore,
213-
signal_semaphore);
204+
VK_NULL_HANDLE,
205+
VK_NULL_HANDLE);
214206

215207
submit_count_ = 0u;
216208
}
@@ -226,8 +218,6 @@ void Context::flush() {
226218
if (cmd_) {
227219
cmd_.invalidate();
228220
}
229-
// Reset previous command buffer semaphore
230-
prev_semaphore_ = VK_NULL_HANDLE;
231221

232222
std::lock_guard<std::mutex> bufferlist_lock(buffer_clearlist_mutex_);
233223
std::lock_guard<std::mutex> imagelist_lock(image_clearlist_mutex_);

0 commit comments

Comments
 (0)