huggingface · larryliu0820 · Oct 3, 2025 · Oct 3, 2025 · Oct 3, 2025
diff --git a/install_dev.py b/install_dev.py
@@ -5,7 +5,7 @@
 
 def install_torch_nightly_deps():
     """Install torch related dependencies from pinned nightly"""
-    EXECUTORCH_NIGHTLY_VERSION = "dev20250916"
+    EXECUTORCH_NIGHTLY_VERSION = "dev20251003"
     TORCHAO_NIGHTLY_VERSION = "dev20250916"
     # Torch nightly is aligned with pinned nightly in https://github.com/pytorch/executorch/blob/main/install_requirements.py#L74
     TORCH_NIGHTLY_VERSION = "dev20250916"
@@ -15,7 +15,7 @@ def install_torch_nightly_deps():
             "-m",
             "pip",
             "install",
-            f"executorch==1.0.0.{EXECUTORCH_NIGHTLY_VERSION}",
+            f"executorch==1.1.0.{EXECUTORCH_NIGHTLY_VERSION}",
             f"torch==2.10.0.{TORCH_NIGHTLY_VERSION}",
             f"torchvision==0.25.0.{TORCH_NIGHTLY_VERSION}",
             f"torchaudio==2.8.0.{TORCH_NIGHTLY_VERSION}",

diff --git a/optimum/exporters/executorch/recipes/cuda.py b/optimum/exporters/executorch/recipes/cuda.py
@@ -114,10 +114,6 @@ def _lower_to_executorch(
             )
         return {pte_name: et_prog}
 
-    # Decomposes SDPA since we don't have a flash attention kernel for it yet.
-    with torch.nn.attention.sdpa_kernel([SDPBackend.MATH]), torch.no_grad():
-        exported_progs = model.export()
-
     if (
         model.config._attn_implementation == "custom_sdpa"
         or model.config._attn_implementation == "custom_sdpa_ring_kv_cache"
@@ -126,4 +122,8 @@ def _lower_to_executorch(
             "Custom SDPA implementation is not supported for CUDA yet. Please use 'flash_attention' instead."
         )
 
+    # Decomposes SDPA since we don't have a flash attention kernel for it yet.
+    with torch.nn.attention.sdpa_kernel([SDPBackend.MATH]), torch.no_grad():
+        exported_progs = model.export()
+
     return _lower_to_executorch(exported_progs, model.metadata)
diff --git a/tests/models/test_modeling_voxtral.py b/tests/models/test_modeling_voxtral.py
@@ -351,3 +351,4 @@ def test_voxtral_export_to_executorch_cuda_recipe(self):
             )
             subprocess.run(cmd, shell=True, check=True)
             self.assertTrue(os.path.exists(os.path.join(output_dir, "model.pte")))
+            self.assertTrue(os.path.exists(os.path.join(output_dir, "aoti_cuda_blob.ptd")))