Skip to content

Commit c1c55f8

Browse files
Merge branch 'main' into remove_ChecksNeedsDecomposition
2 parents 566e603 + 25b3d63 commit c1c55f8

File tree

128 files changed

+4420
-661
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

128 files changed

+4420
-661
lines changed

.ci/docker/build.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,13 @@ case "${IMAGE_NAME}" in
5454
executorch-ubuntu-22.04-mediatek-sdk)
5555
MEDIATEK_SDK=yes
5656
CLANG_VERSION=12
57-
ANDROID_NDK_VERSION=r27b
57+
ANDROID_NDK_VERSION=r28c
5858
;;
5959
executorch-ubuntu-22.04-clang12-android)
6060
LINTRUNNER=""
6161
CLANG_VERSION=12
6262
# From https://developer.android.com/ndk/downloads
63-
ANDROID_NDK_VERSION=r27b
63+
ANDROID_NDK_VERSION=r28c
6464
;;
6565
*)
6666
echo "Invalid image name ${IMAGE_NAME}"
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
828ae02053a6e0e20a2dfd6e737ba10c6f4dee6b
1+
bd06b54e627fbfd354a2cffa4c80fb21883209a9

.ci/scripts/setup-samsung-linux-deps.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ download_ai_lite_core() {
5252
install_enn_backend() {
5353
NDK_INSTALLATION_DIR=/opt/ndk
5454
rm -rf "${NDK_INSTALLATION_DIR}" && sudo mkdir -p "${NDK_INSTALLATION_DIR}"
55-
ANDROID_NDK_VERSION=r27b
55+
ANDROID_NDK_VERSION=r28c
5656

5757
# build Exynos backend
5858
export ANDROID_NDK_ROOT=${ANDROID_NDK_ROOT:-/opt/ndk}

.ci/scripts/test_huggingface_optimum_model.py

Lines changed: 114 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@ def cli_export(command, model_dir):
4343

4444

4545
def check_causal_lm_output_quality(
46-
model_id: str, generated_tokens: List[int], max_perplexity_threshold: float = 100.0
46+
model_id: str,
47+
generated_tokens: List[int],
48+
max_perplexity_threshold: float = 100.0,
4749
):
4850
"""
4951
Evaluates the quality of text generated by a causal language model by calculating its perplexity.
@@ -58,12 +60,24 @@ def check_causal_lm_output_quality(
5860
"""
5961
logging.info(f"Starting perplexity check with model '{model_id}' ...")
6062
# Load model
61-
model = AutoModelForCausalLM.from_pretrained(
62-
model_id,
63-
low_cpu_mem_usage=True,
64-
use_cache=False,
65-
torch_dtype=torch.bfloat16,
66-
)
63+
cls_name = AutoModelForCausalLM
64+
if "llava" in model_id:
65+
from transformers import LlavaForConditionalGeneration
66+
67+
cls_name = LlavaForConditionalGeneration
68+
try:
69+
model = cls_name.from_pretrained(
70+
model_id,
71+
low_cpu_mem_usage=True,
72+
use_cache=False,
73+
torch_dtype=torch.bfloat16,
74+
)
75+
except TypeError:
76+
model = cls_name.from_pretrained(
77+
model_id,
78+
low_cpu_mem_usage=True,
79+
torch_dtype=torch.bfloat16,
80+
)
6781

6882
with torch.no_grad():
6983
outputs = model(input_ids=generated_tokens, labels=generated_tokens)
@@ -156,6 +170,86 @@ def test_text_generation(model_id, model_dir, recipe, *, quantize=True, run_only
156170
assert check_causal_lm_output_quality(model_id, generated_tokens) is True
157171

158172

173+
def test_llm_with_image_modality(
174+
model_id, model_dir, recipe, *, quantize=True, run_only=False
175+
):
176+
command = [
177+
"optimum-cli",
178+
"export",
179+
"executorch",
180+
"--model",
181+
model_id,
182+
"--task",
183+
"multimodal-text-to-text",
184+
"--recipe",
185+
recipe,
186+
"--output_dir",
187+
model_dir,
188+
"--use_custom_sdpa",
189+
"--use_custom_kv_cache",
190+
"--qlinear",
191+
"8da4w",
192+
"--qembedding",
193+
"8w",
194+
]
195+
if not run_only:
196+
cli_export(command, model_dir)
197+
198+
tokenizer = AutoTokenizer.from_pretrained(model_id)
199+
tokenizer.save_pretrained(model_dir)
200+
201+
# input
202+
processor = AutoProcessor.from_pretrained(model_id)
203+
image_url = "https://llava-vl.github.io/static/images/view.jpg"
204+
conversation = [
205+
{
206+
"role": "system",
207+
"content": [
208+
{
209+
"type": "text",
210+
"text": "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.",
211+
}
212+
],
213+
},
214+
{
215+
"role": "user",
216+
"content": [
217+
{"type": "image", "url": image_url},
218+
{
219+
"type": "text",
220+
"text": "What are the things I should be cautious about when I visit here?",
221+
},
222+
],
223+
},
224+
]
225+
inputs = processor.apply_chat_template(
226+
conversation,
227+
add_generation_prompt=True,
228+
tokenize=True,
229+
return_dict=True,
230+
return_tensors="pt",
231+
)
232+
233+
from executorch.extension.llm.runner import GenerationConfig, MultimodalRunner
234+
235+
runner = MultimodalRunner(f"{model_dir}/model.pte", f"{model_dir}/tokenizer.model")
236+
generated_text = runner.generate_text_hf(
237+
inputs,
238+
GenerationConfig(max_new_tokens=128, temperature=0, echo=False),
239+
processor.image_token_id,
240+
)
241+
print(f"\nGenerated text:\n\t{generated_text}")
242+
# Free memory before loading eager for quality check
243+
del runner
244+
gc.collect()
245+
assert (
246+
check_causal_lm_output_quality(
247+
model_id, tokenizer.encode(generated_text, return_tensors="pt")
248+
)
249+
is True
250+
)
251+
252+
159253
def test_fill_mask(model_id, model_dir, recipe, *, quantize=True, run_only=False):
160254
command = [
161255
"optimum-cli",
@@ -353,6 +447,9 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
353447
required=False,
354448
help="When provided, write the pte file to this directory. Otherwise, a temporary directory is created for the test.",
355449
)
450+
parser.add_argument(
451+
"--run_only", action="store_true", help="Skip export and only run the test"
452+
)
356453
args = parser.parse_args()
357454

358455
_text_generation_mapping = {
@@ -384,8 +481,16 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
384481
"vit": ("google/vit-base-patch16-224", test_vit),
385482
}
386483

484+
_multimodal_model_mapping = {
485+
"gemma3-4b": ("google/gemma-3-4b-it", test_llm_with_image_modality),
486+
"llava": ("llava-hf/llava-1.5-7b-hf", test_llm_with_image_modality),
487+
}
488+
387489
model_to_model_id_and_test_function = (
388-
_text_generation_mapping | _mask_fill_mapping | _misc_model_mapping
490+
_text_generation_mapping
491+
| _mask_fill_mapping
492+
| _misc_model_mapping
493+
| _multimodal_model_mapping
389494
)
390495

391496
if args.model not in model_to_model_id_and_test_function:
@@ -400,4 +505,5 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
400505
model_dir=tmp_dir if args.model_dir is None else args.model_dir,
401506
recipe=args.recipe,
402507
quantize=args.quantize,
508+
run_only=args.run_only,
403509
)

.ci/scripts/test_llama.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ cmake_install_executorch_libraries() {
159159
-DCMAKE_INSTALL_PREFIX=cmake-out \
160160
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
161161
-DEXECUTORCH_BUILD_QNN="$QNN" \
162+
-DEXECUTORCH_ENABLE_LOGGING=ON \
162163
-DQNN_SDK_ROOT="$QNN_SDK_ROOT"
163164
cmake --build cmake-out -j9 --target install --config "$CMAKE_BUILD_TYPE"
164165
}

.ci/scripts/unittest-buck2.sh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ buck2 query "//backends/apple/... + //backends/arm: + //backends/arm/debug/... +
1515
//backends/arm/_passes/... + //backends/arm/runtime/... + //backends/arm/tosa/... \
1616
+ //backends/example/... + \
1717
//backends/mediatek/... + //backends/transforms/... + \
18-
//backends/xnnpack/... + //configurations/... + //extension/flat_tensor: + \
18+
//backends/xnnpack/... + //codegen/tools/... + \
19+
//configurations/... + //extension/flat_tensor: + \
1920
//extension/llm/runner: + //kernels/aten/... + //kernels/optimized/... + \
2021
//kernels/portable/... + //kernels/quantized/... + //kernels/test/... + \
2122
//runtime/... + //schema/... + //test/... + //util/..."
@@ -38,3 +39,6 @@ for op in "build" "test"; do
3839
$BUILDABLE_KERNELS_PRIM_OPS_TARGETS //runtime/backend/... //runtime/core/... \
3940
//runtime/executor: //runtime/kernel/... //runtime/platform/...
4041
done
42+
43+
# Build only without testing
44+
buck2 build //codegen/tools/... # Needs torch for testing which we don't have in our OSS buck setup.

.github/workflows/_android.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ jobs:
5454
# NB: Use metal install for KVM support to run the emulator faster
5555
runs-on: linux.24xl.spr-metal
5656
env:
57-
ANDROID_NDK_VERSION: r27b
57+
ANDROID_NDK_VERSION: r28c
5858
API_LEVEL: 34
5959
steps:
6060
- name: Setup SSH (Click me for login details)

.github/workflows/pull.yml

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -286,15 +286,20 @@ jobs:
286286
# Test selective build
287287
PYTHON_EXECUTABLE=python bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"
288288
289-
test-llava-runner-linux:
290-
name: test-llava-runner-linux
289+
test-multimodal-linux:
290+
if: ${{ !github.event.pull_request.head.repo.fork }}
291+
name: test-multimodal-linux
291292
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
292293
permissions:
293294
id-token: write
294295
contents: read
296+
secrets: inherit
295297
strategy:
296298
fail-fast: false
299+
matrix:
300+
model: ["gemma3-4b"] # llava gives segfault so not covering.
297301
with:
302+
secrets-env: EXECUTORCH_HF_TOKEN
298303
runner: linux.24xlarge
299304
docker-image: ci-image:executorch-ubuntu-22.04-clang12
300305
submodules: 'recursive'
@@ -305,17 +310,20 @@ jobs:
305310
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
306311
conda activate "${CONDA_ENV}"
307312
313+
echo "::group::Setup ExecuTorch"
308314
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
309-
310-
# install Llava requirements
311-
bash examples/models/llama/install_requirements.sh
312-
bash examples/models/llava/install_requirements.sh
313-
314-
# run python unittest
315-
python -m unittest examples.models.llava.test.test_llava
316-
317-
# run e2e (export, tokenizer and runner)
318-
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llava.sh
315+
echo "::endgroup::"
316+
317+
echo "::group::Setup Huggingface"
318+
pip install -U "huggingface_hub[cli]" accelerate
319+
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
320+
OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
321+
pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
322+
echo "::endgroup::"
323+
324+
echo "::group::Test ${{ matrix.model }}"
325+
python .ci/scripts/test_huggingface_optimum_model.py --model ${{ matrix.model }} --quantize --recipe xnnpack
326+
echo "::endgroup::"
319327
320328
test-moshi-linux:
321329
name: test-moshi-linux

.github/workflows/trunk.yml

Lines changed: 39 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -616,34 +616,45 @@ jobs:
616616
617617
bash .ci/scripts/test_torchao_huggingface_checkpoints.sh ${{ matrix.model }} ${{ matrix.test_with_runner && '--test_with_runner' || '' }}
618618
619-
# # TODO(jackzhxng): Runner consistently runs out of memory before test finishes. Try to find a more powerful runner.
620-
# test-llava-runner-macos:
621-
# name: test-llava-runner-macos
622-
# uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
623-
# strategy:
624-
# fail-fast: false
625-
# with:
626-
# runner: macos-14-xlarge
627-
# python-version: '3.11'
628-
# submodules: 'recursive'
629-
# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
630-
# timeout: 900
631-
# script: |
632-
# BUILD_TOOL=cmake
633-
634-
# bash .ci/scripts/setup-conda.sh
635-
# # Setup MacOS dependencies as there is no Docker support on MacOS atm
636-
# GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
637-
638-
# # install Llava requirements
639-
# ${CONDA_RUN} bash examples/models/llama/install_requirements.sh
640-
# ${CONDA_RUN} bash examples/models/llava/install_requirements.sh
641-
642-
# # run python unittest
643-
# ${CONDA_RUN} python -m unittest examples.models.llava.test.test_llava
644-
645-
# # run e2e (export, tokenizer and runner)
646-
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llava.sh
619+
test-multimodal-macos:
620+
if: ${{ !github.event.pull_request.head.repo.fork }}
621+
name: test-multimodal-macos
622+
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
623+
permissions:
624+
id-token: write
625+
contents: read
626+
secrets: inherit
627+
strategy:
628+
fail-fast: false
629+
matrix:
630+
model: ["gemma3-4b"] # llava gives segfault so not covering.
631+
with:
632+
secrets-env: EXECUTORCH_HF_TOKEN
633+
runner: macos-15-xlarge
634+
python-version: '3.11'
635+
submodules: 'recursive'
636+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
637+
timeout: 90
638+
script: |
639+
echo "::group::Set up ExecuTorch"
640+
bash .ci/scripts/setup-conda.sh
641+
eval "$(conda shell.bash hook)"
642+
643+
# Install requirements
644+
${CONDA_RUN} python install_executorch.py
645+
echo "::endgroup::"
646+
647+
echo "::group::Set up Huggingface"
648+
${CONDA_RUN} pip install -U "huggingface_hub[cli]" accelerate
649+
${CONDA_RUN} huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
650+
OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
651+
${CONDA_RUN} pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
652+
${CONDA_RUN} pip list
653+
echo "::endgroup::"
654+
655+
echo "::group::Test ${{ matrix.model }}"
656+
${CONDA_RUN} python .ci/scripts/test_huggingface_optimum_model.py --model ${{ matrix.model }} --quantize --recipe xnnpack
657+
echo "::endgroup::"
647658
648659
test-qnn-model:
649660
name: test-qnn-model

CMakeLists.txt

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -650,15 +650,6 @@ if(EXECUTORCH_BUILD_EXTENSION_LLM)
650650
list(APPEND _executorch_extensions tokenizers)
651651
endif()
652652

653-
if(EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER)
654-
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/runner)
655-
list(APPEND _executorch_extensions extension_llm_runner)
656-
endif()
657-
658-
if(EXECUTORCH_BUILD_EXTENSION_LLM_APPLE)
659-
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/apple)
660-
endif()
661-
662653
if(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL)
663654
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/runner_util)
664655
install(
@@ -904,6 +895,15 @@ if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
904895
list(APPEND _executorch_extensions extension_training)
905896
endif()
906897

898+
if(EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER)
899+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/runner)
900+
list(APPEND _executorch_extensions extension_llm_runner)
901+
endif()
902+
903+
if(EXECUTORCH_BUILD_EXTENSION_LLM_APPLE)
904+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/apple)
905+
endif()
906+
907907
if(EXECUTORCH_BUILD_KERNELS_LLM)
908908
# TODO: move all custom kernels to ${CMAKE_CURRENT_SOURCE_DIR}/kernels/custom
909909
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/custom_ops)

0 commit comments

Comments
 (0)