pytorch · facebook-github-bot · Jun 26, 2025 · Jun 26, 2025
@@ -22,13 +22,13 @@
     TosaPipelineBI,
     TosaPipelineMI,
 )
-
-from executorch.examples.models.llama.config.llm_config import LlmConfig
 from executorch.examples.models.llama.export_llama_lib import (
     build_args_parser,
     get_llama_model,
 )
 
+from executorch.extension.llm.export.config.llm_config import LlmConfig
+
 input_t = Tuple[torch.Tensor]
 
 # Add project dir to sys path to workaround importlib.import_module() conditions in model_factory.py

@@ -19,8 +19,6 @@
 from executorch.devtools.bundled_program.serialize import (
     serialize_from_bundled_program_to_flatbuffer,
 )
-
-from executorch.examples.models.llama.config.llm_config import LlmConfig
 from executorch.exir import (
     EdgeCompileConfig,
     EdgeProgramManager,
@@ -31,6 +29,8 @@
 from executorch.exir.capture._config import ExecutorchBackendConfig
 from executorch.extension.export_util.utils import export_to_edge, save_pte_program
 
+from executorch.extension.llm.export.config.llm_config import LlmConfig
+
 from ....models import MODEL_NAME_TO_MODEL
 from ....models.model_factory import EagerModelFactory
 

@@ -67,7 +67,7 @@ runtime.python_library(
         "//caffe2:torch",
         "//executorch/examples/models:model_base",
         "//executorch/examples/models/llama:llama_transformer",
-        "//executorch/examples/models/llama/config:llm_config",
+        "//executorch/extension/llm/export/config:llm_config",
         "//executorch/examples/models:checkpoint",
     ],
 )
@@ -150,7 +150,7 @@ runtime.python_library(
         ":source_transformation",
         "//ai_codesign/gen_ai/fast_hadamard_transform:fast_hadamard_transform",
         "//caffe2:torch",
-        "//executorch/examples/models/llama/config:llm_config",
+        "//executorch/extension/llm/export/config:llm_config",
         "//executorch/backends/vulkan/_passes:vulkan_passes",
         "//executorch/exir/passes:init_mutable_pass",
         "//executorch/examples/models:model_base",

@@ -2,25 +2,12 @@ load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
 load("@fbcode_macros//build_defs:python_unittest.bzl", "python_unittest")
 
 def define_common_targets():
-    runtime.python_library(
-        name = "llm_config",
-        srcs = [
-            "llm_config.py",
-        ],
-        _is_external_target = True,
-        base_module = "executorch.examples.models.llama.config",
-        visibility = [
-            "//executorch/...",
-            "@EXECUTORCH_CLIENTS",
-        ],
-    )
-
     python_unittest(
         name = "test_llm_config",
         srcs = [
             "test_llm_config.py",
         ],
         deps = [
-            ":llm_config",
+            "//executorch/extension/llm/export/config:llm_config",
         ],
     )
@@ -8,7 +8,7 @@
 
 import unittest
 
-from executorch.examples.models.llama.config.llm_config import (
+from executorch.extension.llm.export.config.llm_config import (
     BackendConfig,
     BaseConfig,
     CoreMLComputeUnit,

@@ -175,7 +175,7 @@ def gen_eval_wrapper(
     """
     # If llm_config is not provided, convert args to llm_config
     if llm_config is None:
-        from executorch.examples.models.llama.config.llm_config import LlmConfig
+        from executorch.extension.llm.export.config.llm_config import LlmConfig
 
         llm_config = LlmConfig.from_args(args)
 
@@ -306,7 +306,7 @@ def eval_llama(
     args: argparse.ArgumentParser,
 ) -> None:
     # Convert args to LlmConfig
-    from executorch.examples.models.llama.config.llm_config import LlmConfig
+    from executorch.extension.llm.export.config.llm_config import LlmConfig
 
     llm_config = LlmConfig.from_args(args)
 
@@ -340,7 +340,7 @@ def eval_llama_with_attention_sink(model_name: str, args: argparse.ArgumentParse
     This is mostly copied from https://github.com/mit-han-lab/streaming-llm/blob/main/examples/eval_long_ppl.py
     """
     # Convert args to LlmConfig
-    from executorch.examples.models.llama.config.llm_config import LlmConfig
+    from executorch.extension.llm.export.config.llm_config import LlmConfig
 
     llm_config = LlmConfig.from_args(args)
 

@@ -27,15 +27,15 @@
 from executorch.devtools.backend_debug import print_delegation_info
 
 from executorch.devtools.etrecord import generate_etrecord as generate_etrecord_func
-
-from executorch.examples.models.llama.config.llm_config import LlmConfig
 from executorch.examples.models.llama.hf_download import (
     download_and_convert_hf_checkpoint,
 )
 from executorch.exir.passes.init_mutable_pass import InitializedMutableBufferPass
 
 from executorch.extension.llm.export.builder import DType, LLMEdgeManager
 
+from executorch.extension.llm.export.config.llm_config import LlmConfig
+
 from executorch.extension.llm.export.partitioner_lib import (
     get_coreml_partitioner,
     get_mps_partitioner,

@@ -15,11 +15,11 @@
     get_checkpoint_dtype,
     get_default_model_resource_dir,
 )
-
-from executorch.examples.models.llama.config.llm_config import LlmConfig
 from executorch.examples.models.llama.llama_transformer import construct_transformer
 from executorch.examples.models.llama.model_args import ModelArgs
 from executorch.examples.models.llama.rope import Rope
+
+from executorch.extension.llm.export.config.llm_config import LlmConfig
 from torchao.utils import TorchAOBaseTensor
 
 try:

@@ -9,15 +9,15 @@
 from typing import Optional, Type
 
 import torch
-
-from executorch.examples.models.llama.config.llm_config import LlmConfig
 from executorch.examples.models.llama.export_llama_lib import (
     _prepare_for_llama_export,
     build_args_parser as _build_args_parser,
 )
 from executorch.examples.models.llama.runner.generation import LlamaRunner
 from executorch.extension.llm.export.builder import LLMEdgeManager
 
+from executorch.extension.llm.export.config.llm_config import LlmConfig
+
 
 class EagerLlamaRunner(LlamaRunner):
     """

@@ -7,11 +7,11 @@
 import unittest
 
 from executorch.devtools.backend_debug import get_delegation_info
-from executorch.examples.models.llama.config.llm_config import LlmConfig
 from executorch.examples.models.llama.export_llama_lib import (
     _export_llama,
     build_args_parser,
 )
+from executorch.extension.llm.export.config.llm_config import LlmConfig
 
 UNWANTED_OPS = [
     "aten_permute_copy_default",

@@ -8,14 +8,14 @@
 from typing import Optional
 
 import torch
-from executorch.examples.models.llama.config.llm_config import LlmConfig
 
 from executorch.examples.models.llama.export_llama_lib import _prepare_for_llama_export
 from executorch.examples.models.llama.runner.eager import execute_runner
 from executorch.examples.models.llama3_2_vision.runner.generation import (
     TorchTuneLlamaRunner,
 )
 from executorch.extension.llm.export import LLMEdgeManager
+from executorch.extension.llm.export.config.llm_config import LlmConfig
 
 
 class EagerLlamaRunner(TorchTuneLlamaRunner):

@@ -16,7 +16,6 @@
     get_symmetric_quantization_config,
     XNNPACKQuantizer,
 )
-from executorch.examples.models.llama.config.llm_config import LlmConfig
 from executorch.examples.models.llama.export_llama_lib import (
     get_quantizer_and_quant_params,
 )
@@ -44,6 +43,7 @@
     HintBasedSymShapeEvalPass,
 )
 from executorch.extension.llm.export.builder import DType, LLMEdgeManager
+from executorch.extension.llm.export.config.llm_config import LlmConfig
 from executorch.util.activation_memory_profiler import generate_memory_trace
 from pytorch_tokenizers.llama2c import Llama2cTokenizer as Tokenizer
 from torch.export import Dim

@@ -96,7 +96,7 @@ Please refer to the docs for some of our example suported models ([Llama](https:
 
 ## Configuration Options
 
-For a complete reference of all available configuration options, see the [LlmConfig class definition](../../../examples/models/llama/config/llm_config.py) which documents all supported parameters for base, model, export, quantization, backend, and debug configurations.
+For a complete reference of all available configuration options, see the [LlmConfig class definition](config/llm_config.py) which documents all supported parameters for base, model, export, quantization, backend, and debug configurations.
 
 ## Further Reading
 

@@ -0,0 +1,5 @@
+load(":targets.bzl", "define_common_targets")
+
+oncall("executorch")
+
+define_common_targets()
@@ -0,0 +1,15 @@
+load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
+
+def define_common_targets():
+    runtime.python_library(
+        name = "llm_config",
+        srcs = [
+            "llm_config.py",
+        ],
+        _is_external_target = True,
+        base_module = "executorch.extension.llm.export.config",
+        visibility = [
+            "//executorch/...",
+            "@EXECUTORCH_CLIENTS",
+        ],
+    )
@@ -35,9 +35,9 @@
 from typing import Any, List, Tuple
 
 import hydra
-
-from executorch.examples.models.llama.config.llm_config import LlmConfig
 from executorch.examples.models.llama.export_llama_lib import export_llama
+
+from executorch.extension.llm.export.config.llm_config import LlmConfig
 from hydra.core.config_store import ConfigStore
 from omegaconf import OmegaConf