Skip to content

Commit 3babb55

Browse files
committed
Update base for Update on "Reuse types in _named_data_store and support tensor layouts"
Reuse `DataEntry` from data_serializer.py, in _named_data_store.py. Motivation - deserialize from flat tensor to named data store output - support tensor layout in named data store Differential Revision: [D83490345](https://our.internmc.facebook.com/intern/diff/D83490345/) [ghstack-poisoned]
2 parents d321d51 + 37a65b5 commit 3babb55

File tree

747 files changed

+32235
-8966
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

747 files changed

+32235
-8966
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
44d8d54e38c0258357d4e92e1fefe21e845947a3
1+
467660923a5a25e4718e1d6697b93ff1bab4e807
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
53a2908a10f414a2f85caa06703a26a40e873869
1+
e6f766c7d750d40603eee3f66c5915bac606b3ea

.ci/docker/requirements-ci.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
mpmath==1.3.0
22
numpy>=2.0.0; python_version >= '3.10'
33
PyYAML==6.0.1
4-
ruamel.yaml==0.17.32
4+
ruamel.yaml==0.18.15
55
sympy==1.12
66
timm==0.6.13
77
tomli==2.0.1
88
torchsr==1.0.4
9-
transformers==4.47.1
9+
transformers==4.56.1
1010
zstd==1.5.5.1
1111
pandas>=2.2.2; python_version >= '3.10'
1212
pytest==7.2.0

.ci/scripts/setup-windows-msvc.ps1

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
conda create --yes --quiet -n et python=3.12
2+
conda activate et
3+
4+
# Install cmake
5+
conda install -y cmake
6+
7+
# Activate the VS environment - this is required for MSVC to work
8+
# There are a bunch of environment variables that it requires.
9+
# See https://learn.microsoft.com/en-us/cpp/build/building-on-the-command-line.
10+
& "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\Common7\Tools\Launch-VsDevShell.ps1" -Arch amd64
11+
12+
# Install CI requirements
13+
pip install -r .ci/docker/requirements-ci.txt
14+
15+
# Create build directory
16+
$buildDir = "cmake-out-msvc"
17+
if (Test-Path -Path $buildDir) {
18+
Remove-Item -Path $buildDir -Recurse -Force
19+
}
20+
New-Item -Path $buildDir -ItemType Directory
21+
22+
# Configure CMake with MSVC (not ClangCL) and disable custom/quantized ops
23+
cmake -S . -B $buildDir `
24+
-DCMAKE_BUILD_TYPE=Release `
25+
-DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON `
26+
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON `
27+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON `
28+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON `
29+
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON `
30+
-DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON `
31+
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON `
32+
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=OFF `
33+
-DEXECUTORCH_BUILD_KERNELS_CUSTOM_AOT=OFF `
34+
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=OFF `
35+
-DEXECUTORCH_BUILD_XNNPACK=ON `
36+
-DEXECUTORCH_BUILD_EXTENSION_LLM=ON `
37+
-DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON
38+
39+
if ($LASTEXITCODE -ne 0) {
40+
Write-Host "CMake configuration failed. Exit code: $LASTEXITCODE."
41+
exit $LASTEXITCODE
42+
}
43+
44+
# Build with MSVC
45+
cmake --build $buildDir --config Release -j16
46+
47+
if ($LASTEXITCODE -ne 0) {
48+
Write-Host "Build failed. Exit code: $LASTEXITCODE."
49+
exit $LASTEXITCODE
50+
}
51+
52+
Write-Host "MSVC build completed successfully!"

.ci/scripts/test-cuda-build.sh

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,6 @@ test_executorch_cuda_build() {
2727
nvcc --version || echo "nvcc not found"
2828
nvidia-smi || echo "nvidia-smi not found"
2929

30-
# Set CMAKE_ARGS to enable CUDA build - ExecuTorch will handle PyTorch installation automatically
31-
export CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON"
32-
3330
echo "=== Starting ExecuTorch Installation ==="
3431
# Install ExecuTorch with CUDA support with timeout and error handling
3532
timeout 5400 ./install_executorch.sh || {

.ci/scripts/test_llama_lora.sh

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ cmake_build_llama_runner
5555
# Constants.
5656
RUNTIME_ARGS="--tokenizer_path=${DOWNLOADED_PATH}/tokenizer.model --temperature=0 --seq_len=20 --warmup=1"
5757
PROMPT="What happens if you eat watermelon seeds?"
58-
EXPECTED_PREFIX="What happens if you eat watermelon seeds? Watermelon seeds are a good source of vitamin C,"
58+
EXPECTED_PREFIX="What happens if you eat watermelon seeds? Watermelon seeds are a good source of vitamin C and"
5959

6060
# Export LoRA PTE file.
6161
MODEL_NAME="llama_3_2_1B_lora"
@@ -94,7 +94,7 @@ else
9494
exit 1
9595
fi
9696

97-
# Export LoRA PTE, PTD file.
97+
# Export LoRA PTE, foundation PTD file.
9898
MODEL_SEPARATE="${MODEL_NAME}_separate"
9999
$PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
100100
base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
@@ -114,20 +114,62 @@ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
114114
NOW=$(date +"%H:%M:%S")
115115
echo "Starting to run llama runner at ${NOW}"
116116
# shellcheck source=/dev/null
117-
cmake-out/examples/models/llama/llama_main --model_path=${MODEL_SEPARATE}.pte --data_path=${MODEL_SEPARATE}.ptd --prompt="${PROMPT}" ${RUNTIME_ARGS} > result2.txt
117+
cmake-out/examples/models/llama/llama_main --model_path=${MODEL_SEPARATE}.pte --data_paths=${MODEL_SEPARATE}.ptd --prompt="${PROMPT}" ${RUNTIME_ARGS} > result2.txt
118118
NOW=$(date +"%H:%M:%S")
119119
echo "Finished at ${NOW}"
120120

121121
RESULT2=$(cat result2.txt)
122122
if [[ "${RESULT2}" == "${EXPECTED_PREFIX}"* ]]; then
123123
echo "Expected result prefix: ${EXPECTED_PREFIX}"
124124
echo "Actual result: ${RESULT2}"
125+
# Do not clean up files if test passes, as they're re-used in the next test.
125126
echo "Success"
126-
cleanup_files
127127
else
128128
echo "Expected result prefix: ${EXPECTED_PREFIX}"
129129
echo "Actual result: ${RESULT2}"
130130
echo "Failure; results not the same"
131131
cleanup_files
132132
exit 1
133133
fi
134+
135+
# Export LoRA PTE, LoRA PTD, foundation PTD file.
136+
MODEL_PROGRAM_ONLY="${MODEL_NAME}_program"
137+
MODEL_LORA_WEIGHTS="lora_weights"
138+
MODEL_FOUNDATION_WEIGHTS="foundation_weights"
139+
$PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
140+
base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
141+
base.params="${DOWNLOADED_PATH}/params.json" \
142+
base.adapter_checkpoint="${DOWNLOADED_PATH}/adapter_model.pt" \
143+
base.adapter_config="${DOWNLOADED_PATH}/adapter_config.json" \
144+
base.tokenizer_path="${DOWNLOADED_PATH}/tokenizer.model" \
145+
model.use_kv_cache=true \
146+
model.use_sdpa_with_kv_cache=true \
147+
model.dtype_override="fp32" \
148+
backend.xnnpack.enabled=true \
149+
backend.xnnpack.extended_ops=true \
150+
export.output_name="${MODEL_PROGRAM_ONLY}.pte" \
151+
export.foundation_weights_file="${MODEL_FOUNDATION_WEIGHTS}.ptd" \
152+
export.lora_weights_file="${MODEL_LORA_WEIGHTS}.ptd"
153+
154+
# Run llama runner.
155+
NOW=$(date +"%H:%M:%S")
156+
echo "Starting to run llama runner at ${NOW}"
157+
# shellcheck source=/dev/null
158+
cmake-out/examples/models/llama/llama_main --model_path=${MODEL_PROGRAM_ONLY}.pte --data_paths="${MODEL_FOUNDATION_WEIGHTS}.ptd,${MODEL_LORA_WEIGHTS}.ptd" --prompt="${PROMPT}" ${RUNTIME_ARGS} > result3.txt
159+
NOW=$(date +"%H:%M:%S")
160+
echo "Finished at ${NOW}"
161+
162+
RESULT3=$(cat result3.txt)
163+
if [[ "${RESULT3}" == "${EXPECTED_PREFIX}"* ]]; then
164+
echo "Expected result prefix: ${EXPECTED_PREFIX}"
165+
echo "Actual result: ${RESULT3}"
166+
echo "Success"
167+
else
168+
echo "Expected result prefix: ${EXPECTED_PREFIX}"
169+
echo "Actual result: ${RESULT3}"
170+
echo "Failure; results not the same"
171+
cleanup_files
172+
exit 1
173+
fi
174+
175+
cleanup_files

.ci/scripts/test_phi_3_mini.sh

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -36,34 +36,33 @@ cmake_build_phi_3_mini() {
3636
cmake --build ${BUILD_DIR}/${MODEL_DIR} -j${NPROC} --config ${BUILD_TYPE}
3737
}
3838

39-
# Download and convert tokenizer.model
39+
# Download tokenizer.model
4040
prepare_tokenizer() {
41-
echo "Downloading and converting tokenizer.model"
42-
wget -O tokenizer.model "https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/resolve/main/tokenizer.model?download=true"
43-
$PYTHON_EXECUTABLE -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin
41+
echo "Downloading tokenizer.model"
42+
wget -O tokenizer.model "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/tokenizer.model?download=true"
4443
}
4544

4645
# Export phi-3-mini model to pte
4746
export_phi_3_mini () {
4847
echo "Exporting phi-3-mini. This will take a few minutes"
49-
$PYTHON_EXECUTABLE -m executorch.examples.models.phi-3-mini.export_phi-3-mini -c "4k" -s 128 -o phi-3-mini.pte
48+
optimum-cli export executorch --model microsoft/Phi-3-mini-4k-instruct --task text-generation --recipe xnnpack --output_dir ./
5049
}
5150

5251
run_and_verify() {
5352
NOW=$(date +"%H:%M:%S")
5453
echo "Starting to run phi-3-mini runner at ${NOW}"
55-
if [[ ! -f "phi-3-mini.pte" ]]; then
56-
echo "Export failed. Abort"
54+
if [[ ! -f "model.pte" ]]; then
55+
echo "Missing model artifact. Abort"
5756
exit 1
5857
fi
59-
if [[ ! -f "tokenizer.bin" ]]; then
60-
echo "tokenizer.bin is missing."
58+
if [[ ! -f "tokenizer.model" ]]; then
59+
echo "tokenizer.model is missing."
6160
exit 1
6261
fi
6362

6463
${BUILD_DIR}/${MODEL_DIR}/phi_3_mini_runner \
65-
--model_path=phi-3-mini.pte \
66-
--tokenizer_path=tokenizer.bin \
64+
--model_path=model.pte \
65+
--tokenizer_path=tokenizer.model \
6766
--seq_len=60 \
6867
--temperature=0 \
6968
--prompt="<|system|>
@@ -92,7 +91,7 @@ What is the capital of France?<|end|>
9291
cmake_install_executorch_libraries
9392
cmake_build_phi_3_mini
9493

95-
# Step 2. Export the tokenizer and model
94+
# Step 2. Export the model
9695
prepare_tokenizer
9796
export_phi_3_mini
9897

.ci/scripts/test_qnn_static_llama.sh

Lines changed: 0 additions & 69 deletions
This file was deleted.

.ci/scripts/test_qnn_static_llm.sh

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#!/bin/bash
2+
# Copyright (c) Qualcomm Innovation Center, Inc.
3+
# All rights reserved
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -euxo pipefail
9+
10+
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
11+
12+
TASK_NAME=$1
13+
if [[ -z "${TASK_NAME:-}" ]]; then
14+
echo "Missing task name, exiting..."
15+
exit 1
16+
fi
17+
18+
19+
# Download QNN_SDK. If already downloaded, export environment path
20+
source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_qnn_sdk.sh"
21+
install_qnn
22+
23+
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
24+
export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang"
25+
export PYTHONPATH=".."
26+
cp schema/program.fbs exir/_serialize/program.fbs
27+
cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs
28+
cp -f build-x86/backends/qualcomm/PyQnnManagerAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
29+
cp -f build-x86/backends/qualcomm/PyQnnWrapperAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
30+
31+
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
32+
PYTHON_EXECUTABLE=python3
33+
fi
34+
35+
which "${PYTHON_EXECUTABLE}"
36+
37+
# Although static llama CI does not require graphviz, it is required by test_qnn_delegate.py
38+
pip install graphviz
39+
40+
set +e
41+
42+
echo "Executing task: $TASK_NAME"
43+
if [[ "${TASK_NAME}" == "stories_110m" ]]; then
44+
# Download stories llama110m artifacts
45+
download_stories_model_artifacts
46+
echo "Creating tokenizer.bin"
47+
$PYTHON_EXECUTABLE -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin
48+
49+
# Compile only as weight sharing is not applicable on x86.
50+
$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-android/ --executorch_root . --artifact_dir ./stories_110m_pte_size --llama_artifacts . --compile_only
51+
exit_code1=$?
52+
53+
# Checks accuracy with weight sharing disabled since x86 does not support weight sharing.
54+
$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-x86/ --executorch_root . --artifact_dir ./stories_110m_accuracy --llama_artifacts . --enable_x86_64
55+
exit_code2=$?
56+
57+
# Check the exit codes and print messages
58+
if [ $exit_code1 -ne 0 ]; then
59+
echo "Static Llama compile only with weight sharing test failed. $exit_code1."
60+
fi
61+
62+
if [ $exit_code2 -ne 0 ]; then
63+
echo "Static Llama accuracy test failed. $exit_code2."
64+
fi
65+
66+
if [ $exit_code1 -ne 0 ] || [ $exit_code2 -ne 0 ]; then
67+
exit 1
68+
else
69+
exit 0
70+
fi
71+
72+
elif [[ "${TASK_NAME}" == "stories_260k_bc" ]]; then
73+
74+
# Check BC
75+
bash backends/qualcomm/bc/test_qnn_static_llama_bc.sh
76+
exit_code1=$?
77+
if [ $exit_code1 -ne 0 ]; then
78+
exit 1
79+
else
80+
exit 0
81+
fi
82+
83+
elif [[ "${TASK_NAME}" == "smollm2_135m" ]]; then
84+
$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_static_smollm2 --model SM8650 --build_folder build-x86/ --executorch_root . --artifact_dir ./static_smollm2 --enable_x86_64
85+
exit_code1=$?
86+
if [ $exit_code1 -ne 0 ]; then
87+
exit 1
88+
else
89+
exit 0
90+
fi
91+
else
92+
echo "Unsupported task: $TASK_NAME"
93+
exit 1
94+
fi

0 commit comments

Comments
 (0)