diff --git a/install_dev.py b/install_dev.py index c31e76a..6ce3a27 100644 --- a/install_dev.py +++ b/install_dev.py @@ -5,7 +5,7 @@ def install_torch_nightly_deps(): """Install torch related dependencies from pinned nightly""" - EXECUTORCH_NIGHTLY_VERSION = "dev20250916" + EXECUTORCH_NIGHTLY_VERSION = "dev20251003" TORCHAO_NIGHTLY_VERSION = "dev20250916" # Torch nightly is aligned with pinned nightly in https://github.com/pytorch/executorch/blob/main/install_requirements.py#L74 TORCH_NIGHTLY_VERSION = "dev20250916" @@ -15,7 +15,7 @@ def install_torch_nightly_deps(): "-m", "pip", "install", - f"executorch==1.0.0.{EXECUTORCH_NIGHTLY_VERSION}", + f"executorch==1.1.0.{EXECUTORCH_NIGHTLY_VERSION}", f"torch==2.10.0.{TORCH_NIGHTLY_VERSION}", f"torchvision==0.25.0.{TORCH_NIGHTLY_VERSION}", f"torchaudio==2.8.0.{TORCH_NIGHTLY_VERSION}", diff --git a/optimum/exporters/executorch/recipes/cuda.py b/optimum/exporters/executorch/recipes/cuda.py index 5aa6973..2fcaf73 100644 --- a/optimum/exporters/executorch/recipes/cuda.py +++ b/optimum/exporters/executorch/recipes/cuda.py @@ -114,10 +114,6 @@ def _lower_to_executorch( ) return {pte_name: et_prog} - # Decomposes SDPA since we don't have a flash attention kernel for it yet. - with torch.nn.attention.sdpa_kernel([SDPBackend.MATH]), torch.no_grad(): - exported_progs = model.export() - if ( model.config._attn_implementation == "custom_sdpa" or model.config._attn_implementation == "custom_sdpa_ring_kv_cache" @@ -126,4 +122,8 @@ def _lower_to_executorch( "Custom SDPA implementation is not supported for CUDA yet. Please use 'flash_attention' instead." ) + # Decomposes SDPA since we don't have a flash attention kernel for it yet. + with torch.nn.attention.sdpa_kernel([SDPBackend.MATH]), torch.no_grad(): + exported_progs = model.export() + return _lower_to_executorch(exported_progs, model.metadata) diff --git a/tests/models/test_modeling_voxtral.py b/tests/models/test_modeling_voxtral.py index e0b13e6..bae8b72 100644 --- a/tests/models/test_modeling_voxtral.py +++ b/tests/models/test_modeling_voxtral.py @@ -351,3 +351,4 @@ def test_voxtral_export_to_executorch_cuda_recipe(self): ) subprocess.run(cmd, shell=True, check=True) self.assertTrue(os.path.exists(os.path.join(output_dir, "model.pte"))) + self.assertTrue(os.path.exists(os.path.join(output_dir, "aoti_cuda_blob.ptd")))