Update

jackzhxng · jackzhxng · commit a9c64e264c96 · 2025-06-20T20:17:48.000-07:00
[ghstack-poisoned]
diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
@@ -702,7 +702,11 @@ def _prepare_for_llama_export(llm_config: LlmConfig) -> LLMEdgeManager:
             checkpoint=llm_config.base.checkpoint,
             checkpoint_dtype=DType.from_torch_dtype(checkpoint_dtype),  # type: ignore
             tokenizer_path=llm_config.base.tokenizer_path,
-            use_spin_quant=llm_config.quantization.use_spin_quant.value if llm_config.quantization.use_spin_quant else None,
+            use_spin_quant=(
+                llm_config.quantization.use_spin_quant.value
+                if llm_config.quantization.use_spin_quant
+                else None
+            ),
             embedding_quantize=llm_config.quantization.embedding_quantize,
             use_shared_embedding=llm_config.model.use_shared_embedding,
             quantization_mode=llm_config.quantization.qmode,
@@ -726,7 +730,9 @@ def _prepare_for_llama_export(llm_config: LlmConfig) -> LLMEdgeManager:
             vulkan=llm_config.backend.vulkan.enabled,
             use_qat=llm_config.quantization.use_qat,
             use_lora=llm_config.base.use_lora,
-            preq_mode=llm_config.base.preq_mode.value if llm_config.base.preq_mode else None,
+            preq_mode=(
+                llm_config.base.preq_mode.value if llm_config.base.preq_mode else None
+            ),
             preq_group_size=llm_config.base.preq_group_size,
             preq_embedding_quantize=llm_config.base.preq_embedding_quantize,
             local_global_attention=llm_config.model.local_global_attention,
@@ -738,7 +744,12 @@ def _prepare_for_llama_export(llm_config: LlmConfig) -> LLMEdgeManager:
 
 def get_quantizer_and_quant_params(llm_config):
     pt2e_quant_params = get_pt2e_quantization_params(
-        llm_config.quantization.pt2e_quantize.value if llm_config.quantization.pt2e_quantize else None, llm_config.quantization.qmode
+        (
+            llm_config.quantization.pt2e_quantize.value
+            if llm_config.quantization.pt2e_quantize
+            else None
+        ),
+        llm_config.quantization.qmode,
     )
     quantizers = get_pt2e_quantizers(pt2e_quant_params, llm_config.export.so_library)
     quant_dtype = None
@@ -750,13 +761,17 @@ def get_quantizer_and_quant_params(llm_config):
         quantizers.append(qnn_quantizer)
     if llm_config.backend.coreml.enabled and llm_config.quantization.pt2e_quantize:
         assert len(quantizers) == 0, "Should not enable both xnnpack / qnn and coreml"
-        coreml_quantizer = get_coreml_quantizer(llm_config.quantization.pt2e_quantize.value)
+        coreml_quantizer = get_coreml_quantizer(
+            llm_config.quantization.pt2e_quantize.value
+        )
         quantizers.append(coreml_quantizer)
     if llm_config.backend.vulkan.enabled and llm_config.quantization.pt2e_quantize:
         assert (
             len(quantizers) == 0
         ), "Should not enable both vulkan and other quantizers"
-        vulkan_quantizer = get_vulkan_quantizer(llm_config.quantization.pt2e_quantize.value)
+        vulkan_quantizer = get_vulkan_quantizer(
+            llm_config.quantization.pt2e_quantize.value
+        )
         quantizers.append(vulkan_quantizer)
     logging.info(f"Applying quantizers: {quantizers}")
     return pt2e_quant_params, quantizers, quant_dtype
@@ -1076,9 +1091,17 @@ def _export_llama(llm_config: LlmConfig) -> LLMEdgeManager:  # noqa: C901
             enable_dynamic_shape=llm_config.model.enable_dynamic_shape,
             use_kv_cache=llm_config.model.use_kv_cache,
             embedding_quantize=llm_config.quantization.embedding_quantize,
-            pt2e_quantize=llm_config.quantization.pt2e_quantize.value if llm_config.quantization.pt2e_quantize else None,
+            pt2e_quantize=(
+                llm_config.quantization.pt2e_quantize.value
+                if llm_config.quantization.pt2e_quantize
+                else None
+            ),
             coreml_ios=llm_config.backend.coreml.ios,
-            coreml_quantize=llm_config.backend.coreml.quantize.value if llm_config.backend.coreml.quantize else None,
+            coreml_quantize=(
+                llm_config.backend.coreml.quantize.value
+                if llm_config.backend.coreml.quantize
+                else None
+            ),
             coreml_compute_units=llm_config.backend.coreml.compute_units.value,
             use_qnn_sha=llm_config.backend.qnn.use_sha,
             num_sharding=llm_config.backend.qnn.num_sharding,
diff --git a/extension/llm/export/export_llm.py b/extension/llm/export/export_llm.py
@@ -34,12 +34,11 @@
 from typing import Any, List, Tuple
 
 import hydra
-import yaml
 
 from executorch.examples.models.llama.config.llm_config import LlmConfig
 from executorch.examples.models.llama.export_llama_lib import export_llama
 from hydra.core.config_store import ConfigStore
-from omegaconf import DictConfig, OmegaConf
+from omegaconf import OmegaConf
 
 cs = ConfigStore.instance()
 cs.store(name="llm_config", node=LlmConfig)
@@ -79,7 +78,7 @@ def main() -> None:
                 "Cannot specify additional CLI arguments when using --config. "
                 f"Found: {remaining_args}. Use either --config file or hydra CLI args, not both."
             )
-        
+
         config_file_path = pop_config_arg()
         default_llm_config = LlmConfig()
         llm_config_from_file = OmegaConf.load(config_file_path)
diff --git a/extension/llm/export/test/test_export_llm.py b/extension/llm/export/test/test_export_llm.py
@@ -10,8 +10,11 @@
 import unittest
 from unittest.mock import MagicMock, patch
 
-from executorch.examples.models.llama.config.llm_config import LlmConfig
-from executorch.extension.llm.export.export_llm import main, parse_config_arg, pop_config_arg
+from executorch.extension.llm.export.export_llm import (
+    main,
+    parse_config_arg,
+    pop_config_arg,
+)
 
 
 class TestExportLlm(unittest.TestCase):
@@ -45,7 +48,8 @@ def test_with_config(self, mock_export_llama: MagicMock) -> None:
         """Test main function with --config file and no hydra args."""
         # Create a temporary config file
         with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
-            f.write("""
+            f.write(
+                """
 base:
   model_class: llama2
   tokenizer_path: /path/to/tokenizer.json
@@ -61,7 +65,8 @@ def test_with_config(self, mock_export_llama: MagicMock) -> None:
   coreml:
     quantize: c4w
     compute_units: cpu_and_gpu
-""")
+"""
+            )
             config_file = f.name
 
         try:
@@ -72,23 +77,35 @@ def test_with_config(self, mock_export_llama: MagicMock) -> None:
             # Verify export_llama was called with config
             mock_export_llama.assert_called_once()
             called_config = mock_export_llama.call_args[0][0]
-            self.assertEqual(called_config["base"]["tokenizer_path"], "/path/to/tokenizer.json")
+            self.assertEqual(
+                called_config["base"]["tokenizer_path"], "/path/to/tokenizer.json"
+            )
             self.assertEqual(called_config["base"]["model_class"], "llama2")
             self.assertEqual(called_config["base"]["preq_mode"].value, "8da4w")
             self.assertEqual(called_config["model"]["dtype_override"].value, "fp16")
             self.assertEqual(called_config["export"]["max_seq_length"], 256)
-            self.assertEqual(called_config["quantization"]["pt2e_quantize"].value, "xnnpack_dynamic")
-            self.assertEqual(called_config["quantization"]["use_spin_quant"].value, "cuda")
-            self.assertEqual(called_config["backend"]["coreml"]["quantize"].value, "c4w")
-            self.assertEqual(called_config["backend"]["coreml"]["compute_units"].value, "cpu_and_gpu")
+            self.assertEqual(
+                called_config["quantization"]["pt2e_quantize"].value, "xnnpack_dynamic"
+            )
+            self.assertEqual(
+                called_config["quantization"]["use_spin_quant"].value, "cuda"
+            )
+            self.assertEqual(
+                called_config["backend"]["coreml"]["quantize"].value, "c4w"
+            )
+            self.assertEqual(
+                called_config["backend"]["coreml"]["compute_units"].value, "cpu_and_gpu"
+            )
         finally:
             os.unlink(config_file)
 
     def test_with_cli_args(self) -> None:
         """Test main function with only hydra CLI args."""
         test_argv = ["script.py", "debug.verbose=True"]
         with patch.object(sys, "argv", test_argv):
-            with patch("executorch.extension.llm.export.export_llm.hydra_main") as mock_hydra:
+            with patch(
+                "executorch.extension.llm.export.export_llm.hydra_main"
+            ) as mock_hydra:
                 main()
                 mock_hydra.assert_called_once()
 
@@ -104,9 +121,12 @@ def test_config_with_cli_args_error(self) -> None:
             with patch.object(sys, "argv", test_argv):
                 with self.assertRaises(ValueError) as cm:
                     main()
-                
+
                 error_msg = str(cm.exception)
-                self.assertIn("Cannot specify additional CLI arguments when using --config", error_msg)
+                self.assertIn(
+                    "Cannot specify additional CLI arguments when using --config",
+                    error_msg,
+                )
         finally:
             os.unlink(config_file)
 
@@ -117,7 +137,13 @@ def test_config_rejects_multiple_cli_args(self) -> None:
             config_file = f.name
 
         try:
-            test_argv = ["script.py", "--config", config_file, "debug.verbose=True", "export.output_dir=/tmp"]
+            test_argv = [
+                "script.py",
+                "--config",
+                config_file,
+                "debug.verbose=True",
+                "export.output_dir=/tmp",
+            ]
             with patch.object(sys, "argv", test_argv):
                 with self.assertRaises(ValueError):
                     main()
@@ -127,4 +153,3 @@ def test_config_rejects_multiple_cli_args(self) -> None:
 
 if __name__ == "__main__":
     unittest.main()
-