Update base for Update on "Add test case to export, quantize and lower vision encoder model for ET"

tarun292 · tarun292 · commit 7d6f52192638 · 2025-01-06T14:47:48.000-08:00
Differential Revision: [D67878162](https://our.internmc.facebook.com/intern/diff/D67878162) [ghstack-poisoned]
diff --git a/extension/llm/modules/attention.py b/extension/llm/modules/attention.py
@@ -9,11 +9,11 @@
 
 import torch
 import torchtune.modules.attention as TorchTuneAttention
+from executorch.examples.models.llama.source_transformation.sdpa import SDPACustom
 from executorch.extension.llm.modules.kv_cache import KVCache as InferenceKVCache
 from torch import nn
 from torchtune.modules.attention_utils import _MaskType, _sdpa_or_flex_attention
 from torchtune.modules.kv_cache import KVCache
-from executorch.examples.models.llama.source_transformation.sdpa import SDPACustom
 
 logger = logging.getLogger(__name__)
 
@@ -367,7 +367,6 @@ def forward(
             k = k.unsqueeze(2).expand(expand_shape).flatten(1, 2)
             v = v.unsqueeze(2).expand(expand_shape).flatten(1, 2)
 
-
         output = self._attention_fn(
             q,
             k,
@@ -431,5 +430,6 @@ def _replace_sdpa_with_custom_op(module: torch.nn.Module):
 
 def replace_sdpa_with_custom_op(module: torch.nn.Module) -> torch.nn.Module:
     from executorch.extension.llm.custom_ops import custom_ops
+
     _replace_sdpa_with_custom_op(module)
     return module