pytorch
diff --git a/‎.ci/docker/build.sh‎
Lines changed: 2 additions & 2 deletions b/‎.ci/docker/build.sh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/setup-samsung-linux-deps.sh‎
Lines changed: 2 additions & 2 deletions b/‎.ci/scripts/setup-samsung-linux-deps.sh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.ci/scripts/test-cuda-build.sh‎
Lines changed: 95 additions & 0 deletions b/‎.ci/scripts/test-cuda-build.sh‎
Lines changed: 95 additions & 0 deletions
diff --git a/‎.ci/scripts/test_huggingface_optimum_model.py‎
Lines changed: 114 additions & 8 deletions b/‎.ci/scripts/test_huggingface_optimum_model.py‎
Lines changed: 114 additions & 8 deletions
diff --git a/‎.ci/scripts/test_llama.sh‎
Lines changed: 1 addition & 0 deletions b/‎.ci/scripts/test_llama.sh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/scripts/test_model.sh‎
Lines changed: 3 additions & 3 deletions b/‎.ci/scripts/test_model.sh‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.ci/scripts/test_wheel_package_qnn.sh‎
Lines changed: 1 addition & 0 deletions b/‎.ci/scripts/test_wheel_package_qnn.sh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/scripts/unittest-buck2.sh‎
Lines changed: 12 additions & 1 deletion b/‎.ci/scripts/unittest-buck2.sh‎
Lines changed: 12 additions & 1 deletion
@@ -54,13 +54,13 @@ case "${IMAGE_NAME}" in
   executorch-ubuntu-22.04-mediatek-sdk)
     MEDIATEK_SDK=yes
     CLANG_VERSION=12
-    ANDROID_NDK_VERSION=r27b
+    ANDROID_NDK_VERSION=r28c
     ;;
   executorch-ubuntu-22.04-clang12-android)
     LINTRUNNER=""
     CLANG_VERSION=12
     # From https://developer.android.com/ndk/downloads
-    ANDROID_NDK_VERSION=r27b
+    ANDROID_NDK_VERSION=r28c
     ;;
   *)
     echo "Invalid image name ${IMAGE_NAME}"
 
@@ -1 +1 @@
-40b02a2dc61bbf901a2df91719f47c98d65368ec
+bd06b54e627fbfd354a2cffa4c80fb21883209a9
@@ -1 +1 @@
-4d4abec80f03cd8fdefe1d9cb3a60d3690cd777e
+53a2908a10f414a2f85caa06703a26a40e873869
@@ -11,7 +11,7 @@ set -ex
 
 download_ai_lite_core() {
   API_BASE="https://soc-developer.semiconductor.samsung.com/api/v1/resource/ai-litecore/download"
-  API_KEY="kn10SoSY3hkC-9Qny5TqD2mnqVrlupv3krnjLeBt5cY"
+  API_KEY=$SAMSUNG_AI_LITECORE_KEY
 
   VERSION="0.5"
   OS_NAME="Ubuntu 22.04"
@@ -52,7 +52,7 @@ download_ai_lite_core() {
 install_enn_backend() {
   NDK_INSTALLATION_DIR=/opt/ndk
   rm -rf "${NDK_INSTALLATION_DIR}" && sudo mkdir -p "${NDK_INSTALLATION_DIR}"
-  ANDROID_NDK_VERSION=r27b
+  ANDROID_NDK_VERSION=r28c
 
   # build Exynos backend
   export ANDROID_NDK_ROOT=${ANDROID_NDK_ROOT:-/opt/ndk}
 
@@ -0,0 +1,95 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+
+CUDA_VERSION=${1:-"12.6"}
+
+echo "=== Testing ExecuTorch CUDA ${CUDA_VERSION} Build ==="
+
+# Function to build and test ExecuTorch with CUDA support
+test_executorch_cuda_build() {
+    local cuda_version=$1
+
+    echo "Building ExecuTorch with CUDA ${cuda_version} support..."
+    echo "ExecuTorch will automatically detect CUDA and install appropriate PyTorch wheel"
+
+    # Check available resources before starting
+    echo "=== System Information ==="
+    echo "Available memory: $(free -h | grep Mem | awk '{print $2}')"
+    echo "Available disk space: $(df -h . | tail -1 | awk '{print $4}')"
+    echo "CPU cores: $(nproc)"
+    echo "CUDA version check:"
+    nvcc --version || echo "nvcc not found"
+    nvidia-smi || echo "nvidia-smi not found"
+
+    # Set CMAKE_ARGS to enable CUDA build - ExecuTorch will handle PyTorch installation automatically
+    export CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON"
+
+    echo "=== Starting ExecuTorch Installation ==="
+    # Install ExecuTorch with CUDA support with timeout and error handling
+    timeout 5400 ./install_executorch.sh || {
+        local exit_code=$?
+        echo "ERROR: install_executorch.sh failed with exit code: $exit_code"
+        if [ $exit_code -eq 124 ]; then
+            echo "ERROR: Installation timed out after 90 minutes"
+        fi
+        exit $exit_code
+    }
+
+    echo "SUCCESS: ExecuTorch CUDA build completed"
+
+    # Verify the installation
+    echo "=== Verifying ExecuTorch CUDA Installation ==="
+
+    # Test that ExecuTorch was built successfully
+    python -c "
+import executorch
+print('SUCCESS: ExecuTorch imported successfully')
+"
+
+    # Test CUDA availability and show details
+    python -c "
+try:
+    import torch
+    print('INFO: PyTorch version:', torch.__version__)
+    print('INFO: CUDA available:', torch.cuda.is_available())
+
+    if torch.cuda.is_available():
+        print('SUCCESS: CUDA is available for ExecuTorch')
+        print('INFO: CUDA version:', torch.version.cuda)
+        print('INFO: GPU device count:', torch.cuda.device_count())
+        print('INFO: Current GPU device:', torch.cuda.current_device())
+        print('INFO: GPU device name:', torch.cuda.get_device_name())
+
+        # Test basic CUDA tensor operation
+        device = torch.device('cuda')
+        x = torch.randn(10, 10).to(device)
+        y = torch.randn(10, 10).to(device)
+        z = torch.mm(x, y)
+        print('SUCCESS: CUDA tensor operation completed on device:', z.device)
+        print('INFO: Result tensor shape:', z.shape)
+
+        print('SUCCESS: ExecuTorch CUDA integration verified')
+    else:
+        print('WARNING: CUDA not detected, but ExecuTorch built successfully')
+        exit(1)
+except Exception as e:
+    print('ERROR: ExecuTorch CUDA test failed:', e)
+    exit(1)
+"
+
+    echo "SUCCESS: ExecuTorch CUDA ${cuda_version} build and verification completed successfully"
+}
+
+# Main execution
+echo "Current working directory: $(pwd)"
+echo "Directory contents:"
+ls -la
+
+# Run the CUDA build test
+test_executorch_cuda_build "${CUDA_VERSION}"
@@ -43,7 +43,9 @@ def cli_export(command, model_dir):
 
 
 def check_causal_lm_output_quality(
-    model_id: str, generated_tokens: List[int], max_perplexity_threshold: float = 100.0
+    model_id: str,
+    generated_tokens: List[int],
+    max_perplexity_threshold: float = 100.0,
 ):
     """
     Evaluates the quality of text generated by a causal language model by calculating its perplexity.
@@ -58,12 +60,24 @@ def check_causal_lm_output_quality(
     """
     logging.info(f"Starting perplexity check with model '{model_id}' ...")
     # Load model
-    model = AutoModelForCausalLM.from_pretrained(
-        model_id,
-        low_cpu_mem_usage=True,
-        use_cache=False,
-        torch_dtype=torch.bfloat16,
-    )
+    cls_name = AutoModelForCausalLM
+    if "llava" in model_id:
+        from transformers import LlavaForConditionalGeneration
+
+        cls_name = LlavaForConditionalGeneration
+    try:
+        model = cls_name.from_pretrained(
+            model_id,
+            low_cpu_mem_usage=True,
+            use_cache=False,
+            torch_dtype=torch.bfloat16,
+        )
+    except TypeError:
+        model = cls_name.from_pretrained(
+            model_id,
+            low_cpu_mem_usage=True,
+            torch_dtype=torch.bfloat16,
+        )
 
     with torch.no_grad():
         outputs = model(input_ids=generated_tokens, labels=generated_tokens)
@@ -156,6 +170,86 @@ def test_text_generation(model_id, model_dir, recipe, *, quantize=True, run_only
     assert check_causal_lm_output_quality(model_id, generated_tokens) is True
 
 
+def test_llm_with_image_modality(
+    model_id, model_dir, recipe, *, quantize=True, run_only=False
+):
+    command = [
+        "optimum-cli",
+        "export",
+        "executorch",
+        "--model",
+        model_id,
+        "--task",
+        "multimodal-text-to-text",
+        "--recipe",
+        recipe,
+        "--output_dir",
+        model_dir,
+        "--use_custom_sdpa",
+        "--use_custom_kv_cache",
+        "--qlinear",
+        "8da4w",
+        "--qembedding",
+        "8w",
+    ]
+    if not run_only:
+        cli_export(command, model_dir)
+
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    tokenizer.save_pretrained(model_dir)
+
+    # input
+    processor = AutoProcessor.from_pretrained(model_id)
+    image_url = "https://llava-vl.github.io/static/images/view.jpg"
+    conversation = [
+        {
+            "role": "system",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.",
+                }
+            ],
+        },
+        {
+            "role": "user",
+            "content": [
+                {"type": "image", "url": image_url},
+                {
+                    "type": "text",
+                    "text": "What are the things I should be cautious about when I visit here?",
+                },
+            ],
+        },
+    ]
+    inputs = processor.apply_chat_template(
+        conversation,
+        add_generation_prompt=True,
+        tokenize=True,
+        return_dict=True,
+        return_tensors="pt",
+    )
+
+    from executorch.extension.llm.runner import GenerationConfig, MultimodalRunner
+
+    runner = MultimodalRunner(f"{model_dir}/model.pte", f"{model_dir}/tokenizer.model")
+    generated_text = runner.generate_text_hf(
+        inputs,
+        GenerationConfig(max_new_tokens=128, temperature=0, echo=False),
+        processor.image_token_id,
+    )
+    print(f"\nGenerated text:\n\t{generated_text}")
+    # Free memory before loading eager for quality check
+    del runner
+    gc.collect()
+    assert (
+        check_causal_lm_output_quality(
+            model_id, tokenizer.encode(generated_text, return_tensors="pt")
+        )
+        is True
+    )
+
+
 def test_fill_mask(model_id, model_dir, recipe, *, quantize=True, run_only=False):
     command = [
         "optimum-cli",
@@ -353,6 +447,9 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
         required=False,
         help="When provided, write the pte file to this directory. Otherwise, a temporary directory is created for the test.",
     )
+    parser.add_argument(
+        "--run_only", action="store_true", help="Skip export and only run the test"
+    )
     args = parser.parse_args()
 
     _text_generation_mapping = {
@@ -384,8 +481,16 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
         "vit": ("google/vit-base-patch16-224", test_vit),
     }
 
+    _multimodal_model_mapping = {
+        "gemma3-4b": ("google/gemma-3-4b-it", test_llm_with_image_modality),
+        "llava": ("llava-hf/llava-1.5-7b-hf", test_llm_with_image_modality),
+    }
+
     model_to_model_id_and_test_function = (
-        _text_generation_mapping | _mask_fill_mapping | _misc_model_mapping
+        _text_generation_mapping
+        | _mask_fill_mapping
+        | _misc_model_mapping
+        | _multimodal_model_mapping
     )
 
     if args.model not in model_to_model_id_and_test_function:
@@ -400,4 +505,5 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
             model_dir=tmp_dir if args.model_dir is None else args.model_dir,
             recipe=args.recipe,
             quantize=args.quantize,
+            run_only=args.run_only,
         )
@@ -159,6 +159,7 @@ cmake_install_executorch_libraries() {
         -DCMAKE_INSTALL_PREFIX=cmake-out \
         -DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
         -DEXECUTORCH_BUILD_QNN="$QNN" \
+        -DEXECUTORCH_ENABLE_LOGGING=ON \
         -DQNN_SDK_ROOT="$QNN_SDK_ROOT"
     cmake --build cmake-out -j9 --target install --config "$CMAKE_BUILD_TYPE"
 }
 
@@ -131,13 +131,13 @@ test_model_with_xnnpack() {
     return 0
   fi
 
-  # Delegation
+  # Delegation and test with pybindings
   if [[ ${WITH_QUANTIZATION} == true ]]; then
     SUFFIX="q8"
-    "${PYTHON_EXECUTABLE}" -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate --quantize
+    "${PYTHON_EXECUTABLE}" -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate --quantize --test_after_export
   else
     SUFFIX="fp32"
-    "${PYTHON_EXECUTABLE}" -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate
+    "${PYTHON_EXECUTABLE}" -m examples.xnnpack.aot_compiler --model_name="${MODEL_NAME}" --delegate --test_after_export
   fi
 
   OUTPUT_MODEL_PATH="${MODEL_NAME}_xnnpack_${SUFFIX}.pte"
 
@@ -145,6 +145,7 @@ run_core_tests () {
   echo "=== [$LABEL] Import smoke tests ==="
   "$PYBIN" -c "import executorch; print('executorch imported successfully')"
   "$PYBIN" -c "import executorch.backends.qualcomm; print('executorch.backends.qualcomm imported successfully')"
+  "$PYBIN" -c "from executorch.export.target_recipes import get_android_recipe; recipe = get_android_recipe('android-arm64-snapdragon-fp16'); print(f'executorch.export.target_recipes imported successfully: {recipe}')"
 
   echo "=== [$LABEL] List installed executorch/backends/qualcomm/python ==="
   local SITE_DIR
 
@@ -15,7 +15,8 @@ buck2 query "//backends/apple/... + //backends/arm: + //backends/arm/debug/... +
 //backends/arm/_passes/... + //backends/arm/runtime/... + //backends/arm/tosa/... \
 + //backends/example/... + \
 //backends/mediatek/... + //backends/transforms/... + \
-//backends/xnnpack/... + //configurations/... + //extension/flat_tensor: + \
+//backends/xnnpack/... + //codegen/tools/... + \
+//configurations/... + //extension/flat_tensor: + \
 //extension/llm/runner: + //kernels/aten/... + //kernels/optimized/... + \
 //kernels/portable/... + //kernels/quantized/... + //kernels/test/... + \
 //runtime/... + //schema/... + //test/... + //util/..."
@@ -34,7 +35,17 @@ BUILDABLE_KERNELS_PRIM_OPS_TARGETS=$(buck2 query //kernels/prim_ops/... | grep -
 for op in "build" "test"; do
     buck2 $op $BUILDABLE_OPTIMIZED_OPS \
           //examples/selective_build:select_all_dtype_selective_lib_portable_lib \
+          //extension/llm/custom_ops/spinquant/test:fast_hadamard_transform_test \
+          //extension/llm/runner/test:test_multimodal_input \
+          //extension/llm/runner/test:test_generation_config \
           //kernels/portable/... \
           $BUILDABLE_KERNELS_PRIM_OPS_TARGETS //runtime/backend/... //runtime/core/... \
           //runtime/executor: //runtime/kernel/... //runtime/platform/...
 done
+
+# Build only without testing
+buck2 build //codegen/tools/... \
+        //extension/llm/runner/io_manager:io_manager \
+        //extension/llm/modules/... \
+        //extension/llm/runner:multimodal_runner_lib \
+        //extension/llm/runner:text_decoder_runner
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-40b02a2dc61bbf901a2df91719f47c98d65368ec`
	`1`	`+bd06b54e627fbfd354a2cffa4c80fb21883209a9`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-4d4abec80f03cd8fdefe1d9cb3a60d3690cd777e`
	`1`	`+53a2908a10f414a2f85caa06703a26a40e873869`
Original file line number	Diff line number	Diff line change
`@@ -159,6 +159,7 @@ cmake_install_executorch_libraries() {`
`159`	`159`	`-DCMAKE_INSTALL_PREFIX=cmake-out \`
`160`	`160`	`-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \`
`161`	`161`	`-DEXECUTORCH_BUILD_QNN="$QNN" \`
	`162`	`+ -DEXECUTORCH_ENABLE_LOGGING=ON \`
`162`	`163`	`-DQNN_SDK_ROOT="$QNN_SDK_ROOT"`
`163`	`164`	`cmake --build cmake-out -j9 --target install --config "$CMAKE_BUILD_TYPE"`
`164`	`165`	`}`