diff --git a/.github/workflows/test_models.yml b/.github/workflows/test_models.yml
index af830a02..a72dba55 100644
--- a/.github/workflows/test_models.yml
+++ b/.github/workflows/test_models.yml
@@ -34,9 +34,10 @@ jobs:
       fail-fast: false
       matrix:
         test-modeling: ${{ fromJson(needs.discover-tests.outputs.model_names) }}
-        executorch-version: ['0.6.0', 'nightly']
+        executorch-version: ['0.7.0', 'nightly']
         python-version: ['3.11']
-        os: [macos-15, ubuntu-22.04]
+        # os: [macos-15, ubuntu-22.04]  # TODO(#122): Re-enable the mac tests after fixing seg fault.
+        os: [ubuntu-22.04]
 
     # Custom job name, now shortened and cleaner
     name: ${{ matrix.test-modeling }} (et=${{ matrix.executorch-version }}, py=${{ matrix.python-version }}, ${{ matrix.os }})
diff --git a/install_dev.py b/install_dev.py
index 5cf313ff..3fac4546 100644
--- a/install_dev.py
+++ b/install_dev.py
@@ -5,21 +5,21 @@
 
 def install_torch_nightly_deps():
     """Install torch related dependencies from pinned nightly"""
-    EXECUTORCH_NIGHTLY_VERSION = "dev20250625"
-    TORCHAO_NIGHTLY_VERSION = "dev20250620"
+    EXECUTORCH_NIGHTLY_VERSION = "dev20250730"
+    TORCHAO_NIGHTLY_VERSION = "dev20250730"
     # Torch nightly is aligned with pinned nightly in https://github.com/pytorch/executorch/blob/main/install_requirements.py#L74
-    TORCH_NIGHTLY_VERSION = "dev20250601"
+    TORCH_NIGHTLY_VERSION = "dev20250725"
     subprocess.check_call(
         [
             sys.executable,
             "-m",
             "pip",
             "install",
-            f"executorch==0.7.0.{EXECUTORCH_NIGHTLY_VERSION}",
-            f"torch==2.8.0.{TORCH_NIGHTLY_VERSION}",
-            f"torchvision==0.23.0.{TORCH_NIGHTLY_VERSION}",
+            f"executorch==0.8.0.{EXECUTORCH_NIGHTLY_VERSION}",
+            f"torch==2.9.0.{TORCH_NIGHTLY_VERSION}",
+            f"torchvision==0.24.0.{TORCH_NIGHTLY_VERSION}",
             f"torchaudio==2.8.0.{TORCH_NIGHTLY_VERSION}",
-            f"torchao==0.12.0.{TORCHAO_NIGHTLY_VERSION}",
+            f"torchao==0.13.0.{TORCHAO_NIGHTLY_VERSION}",
             "--extra-index-url",
             "https://download.pytorch.org/whl/nightly/cpu",
         ]
@@ -34,7 +34,7 @@ def install_dep_from_source():
             "-m",
             "pip",
             "install",
-            "git+https://github.com/huggingface/transformers@896e9cea1ade521b2648f4798218550f6c72190c#egg=transformers",  # 4.53.1
+            "git+https://github.com/huggingface/transformers@9c641dc16154964e5ffc0c13e9ec6aaffa295ed6#egg=transformers",  # 4.54.1
         ]
     )
     subprocess.check_call(
diff --git a/optimum/executorch/attentions/custom_kv_cache.py b/optimum/executorch/attentions/custom_kv_cache.py
index b5416f87..a9d912ab 100644
--- a/optimum/executorch/attentions/custom_kv_cache.py
+++ b/optimum/executorch/attentions/custom_kv_cache.py
@@ -54,12 +54,12 @@ def __init__(
 
         # Create a list of CustomKVCache instances, one per layer
         self.kv_cache = torch.nn.ModuleList()
-        for _ in range(config.num_hidden_layers):
+        for layer in self.layers:
             layer_cache = CustomKVCache(
-                max_batch_size=self.max_batch_size,
-                max_context_length=self.max_cache_len,
-                n_heads=self.num_key_value_heads,
-                head_dim=self.head_dim,
+                max_batch_size=layer.max_batch_size,
+                max_context_length=layer.max_cache_len,
+                n_heads=layer.num_heads,
+                head_dim=layer.head_dim,
                 dtype=dtype,
             )
             self.kv_cache.append(layer_cache)
@@ -202,32 +202,29 @@ def __init__(
             layer_device_map=layer_device_map,
         )
 
-        # make sure layer_device_map is none
         assert layer_device_map is None
         assert device is None or device == "cpu", "Device must be None or 'cpu'"
 
         self.cache_position = None
-        # Create a list of cache instances, one per layer
-        # Use CustomKVCache for global layers and CustomRingKVCache for sliding window layers
+        # Create a list of cache instances, one per layer.
+        # Use CustomKVCache for global layers and CustomRingKVCache for sliding window layers.
         self.kv_cache = torch.nn.ModuleList()
-        for layer_idx in range(config.num_hidden_layers):
-            # newer version of transfomer has is_sliding defined
-            # for HybridCache
-            if self.is_sliding[layer_idx]:
+        for layer in self.layers:
+            if layer.is_sliding:
                 # This is a sliding window layer
                 layer_cache = CustomRingKVCache(
-                    max_batch_size=self.max_batch_size,
-                    max_context_length=self.sliding_window_len,
-                    n_heads=self.num_key_value_heads,
-                    head_dim=self.head_dim,
+                    max_batch_size=layer.max_batch_size,
+                    max_context_length=layer.max_cache_len,
+                    n_heads=layer.num_heads,
+                    head_dim=layer.head_dim,
                     dtype=dtype,
                 )
             else:
                 layer_cache = CustomKVCache(
-                    max_batch_size=self.max_batch_size,
-                    max_context_length=self.max_cache_len,
-                    n_heads=self.num_key_value_heads,
-                    head_dim=self.head_dim,
+                    max_batch_size=layer.max_batch_size,
+                    max_context_length=layer.max_cache_len,
+                    n_heads=layer.num_heads,
+                    head_dim=layer.head_dim,
                     dtype=dtype,
                 )
             self.kv_cache.append(layer_cache)
@@ -284,7 +281,7 @@ def get_seq_length(self, layer_idx: Optional[int] = 0) -> int:
 
         # For CustomRingKVCache, we need to handle the sequence length differently
         layer_cache = self.kv_cache[layer_idx]
-        if self.is_sliding[layer_idx]:
+        if self.layers[layer_idx].is_sliding:
             # CustomRingKVCache cache_position_manager which
             # maintains cache position for each slot in the kv cache
             # we return the max position + 1 to indicate max position
@@ -308,7 +305,7 @@ def get_layer_cache(self, layer_idx: int):
 
 def replace_with_et_custom_kv_cache(module, config, generation_config, cache_dtype):
     """
-    Replace all KV caches in the module with ETCustomStaticCache.
+    Replace all KV caches in the module with ETCustomStaticCache or ETCustomHybridCache.
     This modifies the model in place.
 
     Args:
@@ -342,18 +339,18 @@ def _replace_with_et_custom_kv_cache(module, config, generation_config, cache_dt
         if getattr(module, "replace_cache", None) is not None:
             static_cache = ETCustomStaticCache(
                 config=config,
-                max_batch_size=generation_config.cache_config.batch_size,
-                max_cache_len=generation_config.cache_config.max_cache_len,
-                device=generation_config.cache_config.device,
+                max_batch_size=generation_config.cache_config.get("batch_size"),
+                max_cache_len=generation_config.cache_config.get("max_cache_len"),
+                device=generation_config.cache_config.get("device"),
                 dtype=cache_dtype,
             )
             module.replace_cache(static_cache)
         else:
             module.static_cache = ETCustomStaticCache(
                 config=config,
-                max_batch_size=generation_config.cache_config.batch_size,
-                max_cache_len=generation_config.cache_config.max_cache_len,
-                device=generation_config.cache_config.device,
+                max_batch_size=generation_config.cache_config.get("batch_size"),
+                max_cache_len=generation_config.cache_config.get("max_cache_len"),
+                device=generation_config.cache_config.get("device"),
                 dtype=cache_dtype,
             )
             # Dont know why we need to this even though
@@ -370,25 +367,25 @@ def _replace_with_et_custom_kv_cache(module, config, generation_config, cache_dt
         if getattr(module, "replace_cache", None) is not None:
             hybrid_cache = ETCustomHybridCache(
                 config=config,
-                max_batch_size=generation_config.cache_config.batch_size,
-                max_cache_len=generation_config.cache_config.max_cache_len,
-                device=generation_config.cache_config.device,
+                max_batch_size=generation_config.cache_config.get("batch_size"),
+                max_cache_len=generation_config.cache_config.get("max_cache_len"),
+                device=generation_config.cache_config.get("device"),
                 dtype=cache_dtype,
             )
             module.replace_cache(hybrid_cache)
         else:
             module.cache = ETCustomHybridCache(
                 config=config,
-                max_batch_size=generation_config.cache_config.batch_size,
-                max_cache_len=generation_config.cache_config.max_cache_len,
-                device=generation_config.cache_config.device,
+                max_batch_size=generation_config.cache_config.get("batch_size"),
+                max_cache_len=generation_config.cache_config.get("max_cache_len"),
+                device=generation_config.cache_config.get("device"),
                 dtype=cache_dtype,
             )
             # Register cache attributes for each layer
             for i in range(len(module.cache.kv_cache)):
                 setattr(module, f"key_cache_{i}", module.cache.kv_cache[i].k_cache)
                 setattr(module, f"value_cache_{i}", module.cache.kv_cache[i].v_cache)
-                if module.cache.is_sliding[i]:
+                if module.cache.layers[i].is_sliding:
                     # Register cache_positions as buffer for sliding window layers
                     # This prevents it from being traced as a constant
                     module.register_buffer(
diff --git a/optimum/executorch/modeling.py b/optimum/executorch/modeling.py
index 28ffc2fd..58218e51 100644
--- a/optimum/executorch/modeling.py
+++ b/optimum/executorch/modeling.py
@@ -16,6 +16,7 @@
 
 import logging
 import os
+import shutil
 from abc import ABC, abstractmethod
 from pathlib import Path
 from tempfile import TemporaryDirectory
@@ -24,6 +25,7 @@
 import torch
 from huggingface_hub import hf_hub_download
 from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
+from torch.ao.quantization.fx._decomposed import quantized_decomposed_lib  # noqa
 from transformers import (
     AutoModelForCausalLM,
     AutoModelForImageClassification,
@@ -102,6 +104,34 @@ def __init__(self, models: Dict[str, "ExecuTorchModule"], config: "PretrainedCon
 
         self.stats = Stats()
 
+        # Initialize cleanup tracking
+        self._temp_dir = None
+
+    def __del__(self):
+        """Clean up temporary files when the model instance is destroyed."""
+        self._cleanup_temp_resources()
+
+    def _cleanup_temp_resources(self):
+        """Clean up temporary directory and files."""
+        if hasattr(self, "_temp_dir") and self._temp_dir is not None:
+            try:
+                if hasattr(self._temp_dir, "cleanup"):
+                    # It's a TemporaryDirectory object
+                    logging.info(f"Cleaning up temporary directory: {self._temp_dir.name}")
+                    self._temp_dir.cleanup()
+                    logging.info("Temporary directory cleanup completed")
+                elif isinstance(self._temp_dir, (str, Path)):
+                    # It's a path
+                    logging.info(f"Cleaning up temporary path: {self._temp_dir}")
+                    shutil.rmtree(self._temp_dir, ignore_errors=True)
+                    logging.info("Temporary path cleanup completed")
+            except Exception as e:
+                # Log cleanup errors for debugging
+                logging.warning(f"Error during temp directory cleanup: {e}")
+                pass
+            finally:
+                self._temp_dir = None
+
     @abstractmethod
     def forward(self, *args, **kwargs):
         """
@@ -242,7 +272,7 @@ def _export(
         inferred_task = TasksManager.infer_task_from_model(cls.auto_model_class)
         logging.info(f"Inferred task from model class: {inferred_task}")
 
-        save_dir = TemporaryDirectory()
+        save_dir = TemporaryDirectory(prefix="executorch_export_")
         save_dir_path = Path(save_dir.name)
 
         # Export to ExecuTorch and save the pte file to the temporary directory
@@ -266,7 +296,7 @@ def _export(
         for name, _ in executorch_progs.items():
             models.update(cls._from_pretrained(save_dir_path, file_name=f"{name}.pte", config=config))
 
-        return models
+        return models, save_dir
 
     def _save_pretrained(self, save_directory):
         """
@@ -298,6 +328,7 @@ def from_pretrained(
             logger.info("Offline mode: setting `local_files_only=True`")
             local_files_only = True
 
+        # See if model was already exported to ExecuTorch and uplaoded to the HuggingFace repo.
         _export = export
         try:
             if local_files_only and not os.path.isdir(model_id):
@@ -324,21 +355,21 @@ def from_pretrained(
                 if export:
                     logger.warning(
                         f"The model {model_id} was already converted to the ExecuTorch IR but got `export=True`, the model will be converted to ExecuTorch once again. "
-                        # "Don't forget to save the resulting model with `.save_pretrained()`"
                     )
                     _export = True
                 else:
                     logger.warning(
                         f"No ExecuTorch files were found for {model_id}, setting `export=True` to convert the model to the ExecuTorch IR. "
-                        # "Don't forget to save the resulting model with `.save_pretrained()`"
                     )
         except Exception as exception:
             logger.warning(
                 f"Could not infer whether the model was already converted or not to the ExecuTorch IR, keeping `export={export}`.\n{exception}"
             )
 
+        temp_dir = None
         if _export:
-            models_dict = cls._export(
+            logging.info(f"Exporting {model_id} to ExecuTorch program...")
+            models_dict, temp_dir = cls._export(
                 model_id=model_id,
                 config=config,
                 revision=revision,
@@ -351,6 +382,9 @@ def from_pretrained(
                 **kwargs,
             )
         else:
+            logging.info(
+                f"Pre-exported `.pte` artifact already exists in HuggingFace repo or provided file path for {model_id}, skipping export."
+            )
             models_dict = {}
             for pte_file in pte_files:
                 models_dict.update(
@@ -368,7 +402,14 @@ def from_pretrained(
                     )
                 )
 
-        return cls(models_dict, config)
+        model_instance = cls(models_dict, config)
+
+        # Store the TemporaryDirectory reference to prevent GC
+        if temp_dir is not None:
+            model_instance._temp_dir = temp_dir
+            logging.info(f"Stored temp directory reference in model: {temp_dir.name}")
+
+        return model_instance
 
 
 class ExecuTorchModelForSeq2SeqLM(ExecuTorchModelBase):
diff --git a/optimum/exporters/executorch/__main__.py b/optimum/exporters/executorch/__main__.py
index 7a9fe9c8..854bf260 100644
--- a/optimum/exporters/executorch/__main__.py
+++ b/optimum/exporters/executorch/__main__.py
@@ -15,6 +15,7 @@
 """Entry point to the optimum.exporters.executorch command line."""
 
 import argparse
+import logging
 import os
 import warnings
 from pathlib import Path
@@ -130,10 +131,14 @@ def main_export(
     kwargs["force_download"] = force_download
     kwargs["config"] = config
 
+    # 1. Load model, apply source transformations, and torch.export() into a graph (ExportedProgram).
+    logging.info(f"Loading {model_name_or_path} and exporting to static graph...")
     recipe_kwargs = kwargs.pop("recipe_kwargs", {})
 
     model = task_func(model_name_or_path, **kwargs)
 
+    # 2. Export to ExecuTorch through ExecuTorch's lowering APIs.
+    logging.info(f"Lowering {model_name_or_path} to ExecuTorch...")
     if not os.path.exists(output_dir):
         os.makedirs(output_dir)
 
diff --git a/optimum/exporters/executorch/convert.py b/optimum/exporters/executorch/convert.py
index 612b07fb..8a381423 100644
--- a/optimum/exporters/executorch/convert.py
+++ b/optimum/exporters/executorch/convert.py
@@ -19,20 +19,17 @@
 from pathlib import Path
 from typing import Union
 
+from transformers.integrations.executorch import sdpa_mask_without_vmap
+from transformers.masking_utils import AttentionMaskInterface
 from transformers.modeling_utils import AttentionInterface
 
 from optimum.executorch.attentions.custom_sdpa import custom_sdpa_with_start_pos_forward
-from optimum.utils.import_utils import is_transformers_version
 
 from .recipe_registry import discover_recipes, recipe_registry
 
 
 AttentionInterface.register("custom_sdpa", custom_sdpa_with_start_pos_forward)
-if is_transformers_version(">=", "4.53.0.dev0"):
-    from transformers.integrations.executorch import sdpa_mask_without_vmap
-    from transformers.masking_utils import AttentionMaskInterface
-
-    AttentionMaskInterface.register("custom_sdpa", sdpa_mask_without_vmap)
+AttentionMaskInterface.register("custom_sdpa", sdpa_mask_without_vmap)
 
 
 def export_to_executorch(
diff --git a/optimum/exporters/executorch/integrations.py b/optimum/exporters/executorch/integrations.py
index cc2992b9..d6695902 100644
--- a/optimum/exporters/executorch/integrations.py
+++ b/optimum/exporters/executorch/integrations.py
@@ -29,7 +29,6 @@
 from transformers.generation.configuration_utils import GenerationConfig
 
 from optimum.executorch.attentions.custom_sdpa import get_custom_sdpa_for_ring_kv_cache
-from optimum.utils.import_utils import is_transformers_version
 
 from .utils import save_config_to_constant_methods
 
@@ -72,11 +71,7 @@ def _prepare_export_inputs(self):
             and not (self.use_custom_kv_cache and self.use_custom_sdpa)
         )
 
-        if (
-            not self.disable_dynamic_shapes
-            and is_transformers_version(">", "4.52.0")
-            and not is_using_hybrid_cache_wo_custom_sdpa_kv_cache
-        ):
+        if not self.disable_dynamic_shapes and not is_using_hybrid_cache_wo_custom_sdpa_kv_cache:
             # Prepare inputs with dynamic shapes
             seq_length = 3  # Sequence length > 1 to avoid specialization issues
             example_input_ids = torch.zeros((1, seq_length), dtype=torch.long)
@@ -93,24 +88,23 @@ def _prepare_export_inputs(self):
 
         return example_input_ids, example_cache_position, dynamic_shapes, strict
 
-    def _register_attention_mask_for_4_53(self, exportable_module: torch.nn.Module):
-        if is_transformers_version(">=", "4.53.0.dev0"):
-            from transformers.integrations.executorch import sdpa_mask_without_vmap
-            from transformers.masking_utils import AttentionMaskInterface
-            from transformers.modeling_utils import AttentionInterface
-
-            _custom_sdpa_for_ring_kv_cache = get_custom_sdpa_for_ring_kv_cache(exportable_module)
-            if self.use_custom_sdpa:
-                if self.use_custom_kv_cache:
-                    AttentionInterface.register("custom_sdpa_ring_kv_cache", _custom_sdpa_for_ring_kv_cache)
-                    AttentionMaskInterface.register("custom_sdpa_ring_kv_cache", sdpa_mask_without_vmap)
-                    # Manually set the attention implementation to custom_sdpa_ring_kv_cache
-                    # This handles both regular sdpa and one for sliding window/local attention
-                    exportable_module.model.model.config._attn_implementation = "custom_sdpa_ring_kv_cache"
-                else:
-                    # Manually set the attention implementation to custom_sdpa_ring_kv_cache
-                    # This handles both regular sdpa and one for sliding window/local attention
-                    exportable_module.model.model.config._attn_implementation = "custom_sdpa"
+    def _register_custom_attention(self, exportable_module: torch.nn.Module):
+        from transformers.integrations.executorch import sdpa_mask_without_vmap
+        from transformers.masking_utils import AttentionMaskInterface
+        from transformers.modeling_utils import AttentionInterface
+
+        _custom_sdpa_for_ring_kv_cache = get_custom_sdpa_for_ring_kv_cache(exportable_module)
+        if self.use_custom_sdpa:
+            if self.use_custom_kv_cache:
+                AttentionInterface.register("custom_sdpa_ring_kv_cache", _custom_sdpa_for_ring_kv_cache)
+                AttentionMaskInterface.register("custom_sdpa_ring_kv_cache", sdpa_mask_without_vmap)
+                # Manually set the attention implementation to custom_sdpa_ring_kv_cache
+                # This handles both regular sdpa and one for sliding window/local attention
+                exportable_module.model.model.config._attn_implementation = "custom_sdpa_ring_kv_cache"
+            else:
+                # Manually set the attention implementation to custom_sdpa_ring_kv_cache
+                # This handles both regular sdpa and one for sliding window/local attention
+                exportable_module.model.model.config._attn_implementation = "custom_sdpa"
 
     def export(
         self,
@@ -119,55 +113,48 @@ def export(
         logging.info(
             f"Exporting using input_ids({input_ids.shape})={input_ids}, cache_position({cache_position.shape})={cache_position}, dynamic_shapes={dynamic_shapes}, strict={strict}"
         )
-        if is_transformers_version(">", "4.52.0"):
-            from transformers.integrations.executorch import (
-                TorchExportableModuleForDecoderOnlyLM,
-            )
 
-            exportable_module = TorchExportableModuleForDecoderOnlyLM(
-                self.model,
-                max_batch_size=1,
-                max_cache_len=self.metadata.get("get_max_seq_len"),
-            )
-            self._register_attention_mask_for_4_53(exportable_module)
+        from transformers.integrations.executorch import (
+            TorchExportableModuleForDecoderOnlyLM,
+        )
 
-            if self.use_custom_kv_cache:
-                from optimum.executorch.attentions.custom_kv_cache import (
-                    replace_with_et_custom_kv_cache,
-                )
+        exportable_module = TorchExportableModuleForDecoderOnlyLM(
+            self.model,
+            max_batch_size=1,
+            max_cache_len=self.metadata.get("get_max_seq_len"),
+        )
+        self._register_custom_attention(exportable_module)
 
-                replace_with_et_custom_kv_cache(
-                    exportable_module.model,
-                    self.model.config,
-                    self.model.generation_config,
-                    self.model.dtype,
-                )
+        if self.use_custom_kv_cache:
+            from optimum.executorch.attentions.custom_kv_cache import (
+                replace_with_et_custom_kv_cache,
+            )
 
-            with torch.no_grad():
-                exported_program = exportable_module.export(input_ids, cache_position, dynamic_shapes, strict)
-                # Apply RemoveTransposes pass to remove
-                # any back-to-back transpose ops that are not needed
-                # e.g. output of update_cache is transposed and
-                # input to custom_sdpa is transposed.
-                from executorch.extension.llm.export.export_passes import (
-                    RemoveRedundantTransposes,
-                )
+            replace_with_et_custom_kv_cache(
+                exportable_module.model,
+                self.model.config,
+                self.model.generation_config,
+                self.model.dtype,
+            )
 
-                mutated_gm = RemoveRedundantTransposes()(exported_program.module())[0]
-                exported_program = torch.export.export(
-                    mutated_gm,
-                    args=(input_ids, cache_position),
-                    kwargs={},
-                    dynamic_shapes=dynamic_shapes,
-                    strict=strict,
-                )
-        else:
-            # Path to use legacy API, static export only due to pinned transformers version
-            from transformers.integrations.executorch import (
-                convert_and_export_with_cache,
+        with torch.no_grad():
+            exported_program = exportable_module.export(input_ids, cache_position, dynamic_shapes, strict)
+            # Apply RemoveTransposes pass to remove
+            # any back-to-back transpose ops that are not needed
+            # e.g. output of update_cache is transposed and
+            # input to custom_sdpa is transposed.
+            from executorch.extension.llm.export.export_passes import (
+                RemoveRedundantTransposes,
             )
 
-            exported_program = convert_and_export_with_cache(self.model, input_ids, cache_position)
+            mutated_gm = RemoveRedundantTransposes()(exported_program.module())[0]
+            exported_program = torch.export.export(
+                mutated_gm,
+                args=(input_ids, cache_position),
+                kwargs={},
+                dynamic_shapes=dynamic_shapes,
+                strict=strict,
+            )
 
         return {"model": exported_program}
 
@@ -400,8 +387,8 @@ def _export_decoder(self, decoder_input_ids, encoder_hidden_states, cache_positi
         wrapped_decoder = (
             Seq2SeqLMDecoderExportableModuleWithStaticCache(
                 model=self.full_model,
-                max_static_cache_length=self.generation_config.cache_config.max_cache_len,
-                batch_size=self.generation_config.cache_config.batch_size,
+                max_static_cache_length=self.generation_config.cache_config.get("max_cache_len"),
+                batch_size=self.generation_config.cache_config.get("batch_size"),
             )
             .to("cpu")
             .eval()
diff --git a/optimum/exporters/executorch/quantization.py b/optimum/exporters/executorch/quantization.py
index c2e2028f..8994fd41 100644
--- a/optimum/exporters/executorch/quantization.py
+++ b/optimum/exporters/executorch/quantization.py
@@ -16,8 +16,6 @@
 from typing import Optional
 
 import torch
-import torchao
-from packaging.version import parse
 
 
 def quantize_model_(
@@ -26,10 +24,6 @@ def quantize_model_(
     if not (qlinear_config or qembedding_config):
         return
 
-    # TODO: Update torchao to use 0.11.0 once released
-    if parse(torchao.__version__) < parse("0.11.0.dev0"):
-        raise RuntimeError("Quantization requires torchao >= 0.11.0. Please upgrade torchao.")
-
     from torchao.quantization.granularity import PerAxis, PerGroup
     from torchao.quantization.quant_api import (
         Int8DynamicActivationIntxWeightConfig,
diff --git a/optimum/exporters/executorch/utils.py b/optimum/exporters/executorch/utils.py
index 70447957..e805ad4e 100644
--- a/optimum/exporters/executorch/utils.py
+++ b/optimum/exporters/executorch/utils.py
@@ -53,8 +53,8 @@ def save_config_to_constant_methods(
         # Check for cache_config and its attributes
         cache_config = getattr(generation_config, "cache_config", None)
         if cache_config is not None:
-            max_batch_size = getattr(cache_config, "batch_size", None)
-            max_seq_len = getattr(cache_config, "max_cache_len", None)
+            max_batch_size = cache_config.get("batch_size")
+            max_seq_len = cache_config.get("max_cache_len")
 
             if max_batch_size is not None:
                 metadata["get_max_batch_size"] = max_batch_size
diff --git a/setup.py b/setup.py
index c7fa93ed..7d447fa6 100644
--- a/setup.py
+++ b/setup.py
@@ -14,7 +14,7 @@
 INSTALL_REQUIRE = [
     "optimum~=1.24",
     "executorch>=0.6.0",
-    "transformers==4.51.3",
+    "transformers==4.54.1",
 ]
 
 TESTS_REQUIRE = [
diff --git a/tests/models/test_modeling_gemma3.py b/tests/models/test_modeling_gemma3.py
index 56666c04..11e91174 100644
--- a/tests/models/test_modeling_gemma3.py
+++ b/tests/models/test_modeling_gemma3.py
@@ -22,15 +22,11 @@
 import unittest
 
 import pytest
-import torchao
-import transformers
 from executorch.extension.pybindings.portable_lib import ExecuTorchModule
-from packaging.version import parse
 from transformers import AutoTokenizer
 from transformers.testing_utils import slow
 
 from optimum.executorch import ExecuTorchModelForCausalLM
-from optimum.utils.import_utils import is_transformers_version
 
 from ..utils import check_causal_lm_output_quality
 
@@ -41,10 +37,6 @@
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 
 
-@pytest.mark.skipif(
-    is_transformers_version("<", "4.52.0.dev0"),
-    reason="Only available on transformers >= 4.52.0.dev0",
-)
 class ExecuTorchModelIntegrationTest(unittest.TestCase):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -119,10 +111,6 @@ def test_gemma3_text_generation_portable(self):
     @slow
     @pytest.mark.run_slow
     @pytest.mark.skipif(is_linux_ci, reason="OOM on linux runner")
-    @pytest.mark.skipif(
-        parse(transformers.__version__) < parse("4.53.0.dev0") or parse(torchao.__version__) < parse("0.11.0"),
-        reason="Only available on transformers >= 4.53.0.dev0 and torchao >= 0.11.0",
-    )
     def test_gemma3_text_generation_with_custom_sdpa(self):
         # TODO: Until https://github.com/huggingface/optimum/issues/2127 is fixed, have to use non-gated model on CI
         # model_id = "google/gemma-3-1b-it"
@@ -191,10 +179,6 @@ def test_gemma3_text_generation_with_custom_sdpa_float16(self):
 
     @slow
     @pytest.mark.run_slow
-    @pytest.mark.skipif(
-        parse(transformers.__version__) < parse("4.53.0.dev0") or parse(torchao.__version__) < parse("0.11.0"),
-        reason="Only available on transformers >= 4.53.0.dev0 and torchao >= 0.11.0",
-    )
     def test_gemma3_text_generation_with_custom_sdpa_8da4w_8we(self):
         # TODO: Until https://github.com/huggingface/optimum/issues/2127 is fixed, have to use non-gated model on CI
         # model_id = "google/gemma-3-1b-it"
@@ -230,10 +214,6 @@ def test_gemma3_text_generation_with_custom_sdpa_8da4w_8we(self):
 
     @slow
     @pytest.mark.run_slow
-    @pytest.mark.skipif(
-        parse(transformers.__version__) < parse("4.53.0.dev0") or parse(torchao.__version__) < parse("0.11.0"),
-        reason="Only available on transformers >= 4.53.0.dev0 and torchao >= 0.11.0",
-    )
     def test_gemma3_text_generation_with_custom_sdpa_kv_cache_8da4w_8we(self):
         # TODO: Until https://github.com/huggingface/optimum/issues/2127 is fixed, have to use non-gated model on CI
         # model_id = "google/gemma-3-1b-it"
diff --git a/tests/models/test_modeling_phi4.py b/tests/models/test_modeling_phi4.py
index b0851444..fa1c9be6 100644
--- a/tests/models/test_modeling_phi4.py
+++ b/tests/models/test_modeling_phi4.py
@@ -21,7 +21,6 @@
 
 import pytest
 import torchao
-import transformers
 from executorch.extension.pybindings.portable_lib import ExecuTorchModule
 from packaging.version import parse
 from transformers import AutoConfig, AutoTokenizer
@@ -43,12 +42,6 @@ def __init__(self, *args, **kwargs):
 
     @slow
     @pytest.mark.run_slow
-    @pytest.mark.skipif(
-        is_linux_ci
-        or parse(transformers.__version__) < parse("4.52.0")
-        or parse(torchao.__version__) < parse("0.11.0"),
-        reason="Only available on transformers >= 4.52.0 and torchao >= 0.11.0. OOM on linux runner.",
-    )
     def test_phi4_text_generation_with_custom_sdpa_and_kv_cache_8da4w_8we(self):
         model_id = "microsoft/Phi-4-mini-instruct"
         model = ExecuTorchModelForCausalLM.from_pretrained(
diff --git a/tests/models/test_modeling_qwen3.py b/tests/models/test_modeling_qwen3.py
index 0f0d8a36..4deacac8 100644
--- a/tests/models/test_modeling_qwen3.py
+++ b/tests/models/test_modeling_qwen3.py
@@ -23,7 +23,6 @@
 
 import pytest
 import torchao
-import transformers
 from executorch import version
 from executorch.extension.pybindings.portable_lib import ExecuTorchModule
 from packaging.version import parse
@@ -214,10 +213,6 @@ def test_qwen3_text_generation_with_custom_sdpa_8da4w_8we(self):
 
     @slow
     @pytest.mark.run_slow
-    @pytest.mark.skipif(
-        parse(transformers.__version__) < parse("4.52.0.dev0"),
-        reason="Only available on transformers >= 4.52.0.dev0",
-    )
     def test_qwen3_text_generation_with_custom_sdpa_and_kv_cache(self):
         model_id = "Qwen/Qwen3-0.6B"
         prompt = "Give me a short introduction to large language model."
@@ -249,10 +244,6 @@ def test_qwen3_text_generation_with_custom_sdpa_and_kv_cache(self):
 
     @slow
     @pytest.mark.run_slow
-    @pytest.mark.skipif(
-        parse(transformers.__version__) < parse("4.52.0") or parse(torchao.__version__) < parse("0.11.0"),
-        reason="Only available on transformers >= 4.52.0 and torchao >= 0.11.0",
-    )
     def test_qwen3_text_generation_with_custom_sdpa_and_kv_cache_8da4w_8we(self):
         model_id = "Qwen/Qwen3-0.6B"
         prompt = "Give me a short introduction to large language model."
diff --git a/tests/models/test_modeling_qwen3_embedding.py b/tests/models/test_modeling_qwen3_embedding.py
index 73002e9c..0146634f 100644
--- a/tests/models/test_modeling_qwen3_embedding.py
+++ b/tests/models/test_modeling_qwen3_embedding.py
@@ -19,8 +19,6 @@
 import unittest
 
 import pytest
-import torchao
-import transformers
 from executorch import version
 from executorch.extension.pybindings.portable_lib import ExecuTorchModule
 from packaging.version import parse
@@ -41,10 +39,6 @@ def __init__(self, *args, **kwargs):
 
     @slow
     @pytest.mark.run_slow
-    @pytest.mark.skipif(
-        parse(transformers.__version__) < parse("4.52.0") or parse(torchao.__version__) < parse("0.11.0"),
-        reason="Only available on transformers >= 4.52.0 and torchao >= 0.11.0",
-    )
     def test_qwen3_embedding_text_generation_with_custom_sdpa_and_kv_cache_8da4w_8we(self):
         model_id = "Qwen/Qwen3-Embedding-0.6B"
         prompt = "Explain gravity"
diff --git a/tests/models/test_modeling_smollm3.py b/tests/models/test_modeling_smollm3.py
index 34a63510..1f3e26a2 100644
--- a/tests/models/test_modeling_smollm3.py
+++ b/tests/models/test_modeling_smollm3.py
@@ -25,7 +25,6 @@
 from transformers.testing_utils import slow
 
 from optimum.executorch import ExecuTorchModelForCausalLM
-from optimum.utils.import_utils import is_transformers_version
 
 from ..utils import check_causal_lm_output_quality
 
@@ -35,10 +34,7 @@
 is_linux_ci = sys.platform.startswith("linux") and is_ci
 
 
-@pytest.mark.skipif(
-    is_transformers_version("<", "4.53.1") or is_linux_ci,
-    reason="Only available on transformers >= 4.53.1",
-)
+@pytest.mark.skipif(is_linux_ci, reason="Runner OOM")
 class ExecuTorchModelIntegrationTest(unittest.TestCase):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -76,7 +72,7 @@ def test_smollm3_text_generation_with_custom_sdpa_and_kv_cache_8da4w_8we(self):
     @slow
     @pytest.mark.run_slow
     @pytest.mark.portable
-    @pytest.mark.skipif(is_ci, reason="Too big for CI runners")
+    @pytest.mark.skipif(is_ci, reason="Runner OOM")
     def test_smollm3_text_generation_portable(self):
         model_id = "HuggingFaceTB/SmolLM3-3B"
         prompt = "Give me a brief explanation of gravity in simple terms."
diff --git a/tests/models/test_modeling_whisper.py b/tests/models/test_modeling_whisper.py
index 3ecfc1c7..e88cb80f 100644
--- a/tests/models/test_modeling_whisper.py
+++ b/tests/models/test_modeling_whisper.py
@@ -28,16 +28,11 @@
 from transformers.testing_utils import slow
 
 from optimum.executorch import ExecuTorchModelForSpeechSeq2Seq
-from optimum.utils.import_utils import is_transformers_version
 
 
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 
 
-@pytest.mark.skipif(
-    is_transformers_version(">", "4.52.4"),
-    reason="Need to fix in the transformers due to attention refactor https://github.com/huggingface/transformers/pull/38235",
-)
 class ExecuTorchModelIntegrationTest(unittest.TestCase):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)