From 912c4876d2c5bfde8a40a8baf1a9db3b72d5adc4 Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Thu, 16 Oct 2025 14:50:55 -0700 Subject: [PATCH 1/4] init commit --- install_dev.py | 2 +- optimum/commands/export/executorch.py | 4 +-- tests/models/test_modeling_gemma3.py | 40 +++++++++++++++++++++++++-- 3 files changed, 40 insertions(+), 6 deletions(-) diff --git a/install_dev.py b/install_dev.py index c31e76a..c32b53b 100644 --- a/install_dev.py +++ b/install_dev.py @@ -5,7 +5,7 @@ def install_torch_nightly_deps(): """Install torch related dependencies from pinned nightly""" - EXECUTORCH_NIGHTLY_VERSION = "dev20250916" + EXECUTORCH_NIGHTLY_VERSION = "dev20251015" TORCHAO_NIGHTLY_VERSION = "dev20250916" # Torch nightly is aligned with pinned nightly in https://github.com/pytorch/executorch/blob/main/install_requirements.py#L74 TORCH_NIGHTLY_VERSION = "dev20250916" diff --git a/optimum/commands/export/executorch.py b/optimum/commands/export/executorch.py index 12398da..e877114 100644 --- a/optimum/commands/export/executorch.py +++ b/optimum/commands/export/executorch.py @@ -185,9 +185,7 @@ def run(self): "--qlinear_packing_format can only be used when --device is set to CUDA (e.g., 'cuda', 'cuda:0', etc.)" ) if not self.args.qlinear or self.args.qlinear != "4w": - raise ValueError( - "--qlinear_packing_format can only be used when --qlinear is set to '4w'" - ) + raise ValueError("--qlinear_packing_format can only be used when --qlinear is set to '4w'") qlinear_encoder_packing_format = getattr(self.args, "qlinear_encoder_packing_format", None) if qlinear_encoder_packing_format: if not device or not device.startswith("cuda"): diff --git a/tests/models/test_modeling_gemma3.py b/tests/models/test_modeling_gemma3.py index ff507bb..f54e688 100644 --- a/tests/models/test_modeling_gemma3.py +++ b/tests/models/test_modeling_gemma3.py @@ -22,11 +22,15 @@ import unittest import pytest +import torch from executorch.extension.pybindings.portable_lib import ExecuTorchModule from transformers import AutoProcessor, AutoTokenizer from transformers.testing_utils import slow -from optimum.executorch import ExecuTorchModelForCausalLM, ExecuTorchModelForMultiModalToText +from optimum.executorch import ( + ExecuTorchModelForCausalLM, + ExecuTorchModelForMultiModalToText, +) from ..utils import check_causal_lm_output_quality, check_multimodal_output_quality @@ -288,7 +292,10 @@ def test_gemma3_image_vision_with_custom_sdpa_kv_cache_8da4w_8we(self): processor = AutoProcessor.from_pretrained(model_id) image_url = "https://llava-vl.github.io/static/images/view.jpg" conversation = [ - {"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant."}]}, + { + "role": "system", + "content": [{"type": "text", "text": "You are a helpful assistant."}], + }, { "role": "user", "content": [ @@ -337,3 +344,32 @@ def test_gemma3_image_vision_with_custom_sdpa_kv_cache_8da4w_8we(self): self.assertTrue( check_multimodal_output_quality(model_id, generated_tokens, conversation, max_perplexity_threshold=5) ) + + @slow + @pytest.mark.run_slow + @pytest.mark.skipif(is_linux_ci, reason="OOM") + @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA backend required") + def test_gemma3_export_to_executorch_in_cuda_recipe(self): + model_id = "unsloth/gemma-3-1b-it" + task = "text-generation" + recipe = "cuda" + output_subdir = "executorch" + + with tempfile.TemporaryDirectory() as tempdir: + out_dir: str = f"{tempdir}/executorch" + subprocess.run( + f"optimum-cli export executorch \ + --model {model_id} \ + --task {task} \ + --recipe {recipe} \ + --output_dir {tempdir}/{output_subdir} \ + --dtype bfloat16 \ + --device cuda \ + --max_seq_len 64", + shell=True, + check=True, + ) + pte_full_path: str = f"{out_dir}/model.pte" + ptd_full_path: str = f"{out_dir}/aoti_cuda_blob.ptd" + self.assertTrue(os.path.exists(pte_full_path)) + self.assertTrue(os.path.exists(ptd_full_path)) From 9b0a876a0de14a6771eb24e556b0a6ead45e587f Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Thu, 16 Oct 2025 14:58:05 -0700 Subject: [PATCH 2/4] use correct model id and task --- tests/models/test_modeling_gemma3.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/models/test_modeling_gemma3.py b/tests/models/test_modeling_gemma3.py index f54e688..f6800f8 100644 --- a/tests/models/test_modeling_gemma3.py +++ b/tests/models/test_modeling_gemma3.py @@ -350,8 +350,8 @@ def test_gemma3_image_vision_with_custom_sdpa_kv_cache_8da4w_8we(self): @pytest.mark.skipif(is_linux_ci, reason="OOM") @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA backend required") def test_gemma3_export_to_executorch_in_cuda_recipe(self): - model_id = "unsloth/gemma-3-1b-it" - task = "text-generation" + model_id = "google/gemma-3-4b-it" + task = "multimodal-text-to-text" recipe = "cuda" output_subdir = "executorch" From cb8fbcbf6037f843ceada90314d8cb681225f153 Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Fri, 17 Oct 2025 10:42:46 -0700 Subject: [PATCH 3/4] remove reduntant op decomp in cuda recipe --- optimum/exporters/executorch/recipes/cuda.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/optimum/exporters/executorch/recipes/cuda.py b/optimum/exporters/executorch/recipes/cuda.py index 5aa6973..812e0b6 100644 --- a/optimum/exporters/executorch/recipes/cuda.py +++ b/optimum/exporters/executorch/recipes/cuda.py @@ -64,8 +64,6 @@ def export_to_executorch_with_cuda( For encoder-decoder models or multimodal models, it may generate multiple programs. """ # Import here to avoid version conflicts. - from torch._inductor.decomposition import conv1d_to_conv2d - from executorch.backends.cuda.cuda_backend import CudaBackend from executorch.backends.cuda.cuda_partitioner import CudaPartitioner @@ -84,13 +82,7 @@ def _lower_to_executorch( key: [CudaPartitioner([CudaBackend.generate_method_name_compile_spec(key)])] for key in exported_programs.keys() } - # Add decompositions for triton to generate kernels. - for key, ep in exported_programs.items(): - exported_programs[key] = ep.run_decompositions( - { - aten.conv1d.default: conv1d_to_conv2d, - } - ) + with torch.nn.attention.sdpa_kernel([SDPBackend.MATH]): et_prog = to_edge_transform_and_lower( exported_programs, From 1f81b707dfbcef34d43a0880a2932a2d0a29c02c Mon Sep 17 00:00:00 2001 From: Gasoonjia Date: Fri, 17 Oct 2025 14:45:25 -0700 Subject: [PATCH 4/4] Update install_dev.py --- install_dev.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/install_dev.py b/install_dev.py index c32b53b..c31e76a 100644 --- a/install_dev.py +++ b/install_dev.py @@ -5,7 +5,7 @@ def install_torch_nightly_deps(): """Install torch related dependencies from pinned nightly""" - EXECUTORCH_NIGHTLY_VERSION = "dev20251015" + EXECUTORCH_NIGHTLY_VERSION = "dev20250916" TORCHAO_NIGHTLY_VERSION = "dev20250916" # Torch nightly is aligned with pinned nightly in https://github.com/pytorch/executorch/blob/main/install_requirements.py#L74 TORCH_NIGHTLY_VERSION = "dev20250916"