Update on "Use llm_config instead of args in export_llama functions"

jackzhxng · jackzhxng · commit d7d33d7e2eac · 2025-06-03T14:44:36.000-07:00
Differential Revision: [D75484927](https://our.internmc.facebook.com/intern/diff/D75484927) [ghstack-poisoned]
diff --git a/examples/models/llama/TARGETS b/examples/models/llama/TARGETS
@@ -67,6 +67,7 @@ runtime.python_library(
         "//caffe2:torch",
         "//executorch/examples/models:model_base",
         "//executorch/examples/models/llama:llama_transformer",
+        "//executorch/examples/models/llama/config:llm_config",
         "//executorch/examples/models:checkpoint",
     ],
 )
@@ -266,6 +267,7 @@ runtime.python_library(
         ":export_library",
         "//executorch/examples/models/llama/config:llm_config",
         "fbsource//third-party/pypi/hydra-core:hydra-core",
+        "fbsource//third-party/pypi/omegaconf:omegaconf",
     ],
 )
 
diff --git a/examples/models/llama/export_llama_hydra.py b/examples/models/llama/export_llama_hydra.py
@@ -13,14 +13,15 @@
 from executorch.examples.models.llama.config.llm_config import LlmConfig
 from executorch.examples.models.llama.export_llama_lib import export_llama
 from hydra.core.config_store import ConfigStore
+from omegaconf import OmegaConf
 
 cs = ConfigStore.instance()
 cs.store(name="llm_config", node=LlmConfig)
 
 
 @hydra.main(version_base=None, config_name="llm_config")
 def main(llm_config: LlmConfig) -> None:
-    export_llama(llm_config)
+    export_llama(OmegaConf.to_object(llm_config))
 
 
 if __name__ == "__main__":
diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
@@ -56,7 +56,6 @@
     get_vulkan_quantizer,
 )
 from executorch.util.activation_memory_profiler import generate_memory_trace
-from omegaconf.dictconfig import DictConfig
 
 from ..model_factory import EagerModelFactory
 from .source_transformation.apply_spin_quant_r1_r2 import (
@@ -576,12 +575,12 @@ def canonical_path(path: Union[str, Path], *, dir: bool = False) -> str:
 
 
 def export_llama(
-    export_options: Union[argparse.Namespace, DictConfig],
+    export_options: Union[argparse.Namespace, LlmConfig],
 ) -> str:
     if isinstance(export_options, argparse.Namespace):
         # Legacy CLI.
         llm_config = convert_args_to_llm_config(export_options)
-    elif isinstance(export_options, DictConfig):
+    elif isinstance(export_options, LlmConfig):
         # Hydra CLI.
         llm_config = export_options
     else:
diff --git a/examples/models/llama/tests/test_export_llama_lib.py b/examples/models/llama/tests/test_export_llama_lib.py
@@ -7,8 +7,10 @@
 import unittest
 
 from executorch.devtools.backend_debug import get_delegation_info
-from executorch.examples.models.llama.config.llm_config import LlmConfig
-from executorch.examples.models.llama.export_llama_lib import _export_llama
+from executorch.examples.models.llama.export_llama_lib import (
+    _export_llama,
+    build_args_parser,
+)
 
 UNWANTED_OPS = [
     "aten_permute_copy_default",
@@ -33,13 +35,13 @@ def test_has_expected_ops_and_op_counts(self):
         # we cannot test quantization args in this way
         # since quantization requires promoting meta tensors
         # to device=cpu, which requires real weights.
+        parser = build_args_parser()
+        args = parser.parse_args([])
+        args.use_sdpa_with_kv_cache = True
+        args.use_kv_cache = True
+        args.verbose = True
 
-        llm_config = LlmConfig()
-        llm_config.model.use_sdpa_with_kv_cache = True
-        llm_config.model.use_kv_cache = True
-        llm_config.debug.verbose = True
-
-        builder = _export_llama(llm_config)
+        builder = _export_llama(args)
         graph_module = builder.edge_manager.exported_program().graph_module
         delegation_info = get_delegation_info(graph_module)
 
diff --git a/examples/models/llama3_2_vision/runner/eager.py b/examples/models/llama3_2_vision/runner/eager.py
@@ -8,6 +8,7 @@
 from typing import Optional
 
 import torch
+from executorch.examples.models.llama.config.llm_config import LlmConfig
 
 from executorch.examples.models.llama.export_llama_lib import _prepare_for_llama_export
 from executorch.examples.models.llama.runner.eager import execute_runner
@@ -22,18 +23,23 @@ class EagerLlamaRunner(TorchTuneLlamaRunner):
     Runs llama in eager mode with provided checkpoint file.
     """
 
-    def __init__(self, args):
-        with open(args.params, "r") as f:
+    def __init__(
+        self,
+        llm_config: LlmConfig,
+        tokenizer_config_path: Optional[str] = None,
+        use_attention_sink: bool = False,
+    ):
+        with open(llm_config.base.params, "r") as f:
             params = json.loads(f.read())
         super().__init__(
-            tokenizer_path=args.tokenizer_path,
-            max_seq_len=args.max_seq_length,
+            tokenizer_path=llm_config.base.tokenizer_path,
+            max_seq_len=llm_config.export.max_seq_length,
             max_batch_size=1,
-            use_kv_cache=args.use_kv_cache,
+            use_kv_cache=llm_config.model.use_kv_cache,
             vocab_size=params["vocab_size"],
             device="cuda" if torch.cuda.is_available() else "cpu",
         )
-        manager: LLMEdgeManager = _prepare_for_llama_export(args)
+        manager: LLMEdgeManager = _prepare_for_llama_export(llm_config)
         self.model = manager.model.eval().to(device=self.device)
 
     def forward(

Original file line number	Diff line number	Diff line change
`@@ -67,6 +67,7 @@ runtime.python_library(`
`67`	`67`	`"//caffe2:torch",`
`68`	`68`	`"//executorch/examples/models:model_base",`
`69`	`69`	`"//executorch/examples/models/llama:llama_transformer",`
	`70`	`+ "//executorch/examples/models/llama/config:llm_config",`
`70`	`71`	`"//executorch/examples/models:checkpoint",`
`71`	`72`	`],`
`72`	`73`	`)`
`@@ -266,6 +267,7 @@ runtime.python_library(`
`266`	`267`	`":export_library",`
`267`	`268`	`"//executorch/examples/models/llama/config:llm_config",`
`268`	`269`	`"fbsource//third-party/pypi/hydra-core:hydra-core",`
	`270`	`+ "fbsource//third-party/pypi/omegaconf:omegaconf",`
`269`	`271`	`],`
`270`	`272`	`)`
`271`	`273`