diff --git a/backends/arm/test/models/test_llama.py b/backends/arm/test/models/test_llama.py index 2b524fe0cc9..84eec491c1e 100644 --- a/backends/arm/test/models/test_llama.py +++ b/backends/arm/test/models/test_llama.py @@ -22,13 +22,13 @@ TosaPipelineBI, TosaPipelineMI, ) - -from executorch.examples.models.llama.config.llm_config import LlmConfig from executorch.examples.models.llama.export_llama_lib import ( build_args_parser, get_llama_model, ) +from executorch.extension.llm.export.config.llm_config import LlmConfig + input_t = Tuple[torch.Tensor] # Add project dir to sys path to workaround importlib.import_module() conditions in model_factory.py diff --git a/examples/apple/mps/scripts/mps_example.py b/examples/apple/mps/scripts/mps_example.py index 4aa34fce9f8..42ea79435ed 100644 --- a/examples/apple/mps/scripts/mps_example.py +++ b/examples/apple/mps/scripts/mps_example.py @@ -19,8 +19,6 @@ from executorch.devtools.bundled_program.serialize import ( serialize_from_bundled_program_to_flatbuffer, ) - -from executorch.examples.models.llama.config.llm_config import LlmConfig from executorch.exir import ( EdgeCompileConfig, EdgeProgramManager, @@ -31,6 +29,8 @@ from executorch.exir.capture._config import ExecutorchBackendConfig from executorch.extension.export_util.utils import export_to_edge, save_pte_program +from executorch.extension.llm.export.config.llm_config import LlmConfig + from ....models import MODEL_NAME_TO_MODEL from ....models.model_factory import EagerModelFactory diff --git a/examples/models/llama/TARGETS b/examples/models/llama/TARGETS index d2caccd5897..95d57e12f5a 100644 --- a/examples/models/llama/TARGETS +++ b/examples/models/llama/TARGETS @@ -67,7 +67,7 @@ runtime.python_library( "//caffe2:torch", "//executorch/examples/models:model_base", "//executorch/examples/models/llama:llama_transformer", - "//executorch/examples/models/llama/config:llm_config", + "//executorch/extension/llm/export/config:llm_config", "//executorch/examples/models:checkpoint", ], ) @@ -150,7 +150,7 @@ runtime.python_library( ":source_transformation", "//ai_codesign/gen_ai/fast_hadamard_transform:fast_hadamard_transform", "//caffe2:torch", - "//executorch/examples/models/llama/config:llm_config", + "//executorch/extension/llm/export/config:llm_config", "//executorch/backends/vulkan/_passes:vulkan_passes", "//executorch/exir/passes:init_mutable_pass", "//executorch/examples/models:model_base", diff --git a/examples/models/llama/config/targets.bzl b/examples/models/llama/config/targets.bzl index 8b85ce6d107..a72ffa6c8e0 100644 --- a/examples/models/llama/config/targets.bzl +++ b/examples/models/llama/config/targets.bzl @@ -2,25 +2,12 @@ load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") load("@fbcode_macros//build_defs:python_unittest.bzl", "python_unittest") def define_common_targets(): - runtime.python_library( - name = "llm_config", - srcs = [ - "llm_config.py", - ], - _is_external_target = True, - base_module = "executorch.examples.models.llama.config", - visibility = [ - "//executorch/...", - "@EXECUTORCH_CLIENTS", - ], - ) - python_unittest( name = "test_llm_config", srcs = [ "test_llm_config.py", ], deps = [ - ":llm_config", + "//executorch/extension/llm/export/config:llm_config", ], ) diff --git a/examples/models/llama/config/test_llm_config.py b/examples/models/llama/config/test_llm_config.py index 52b56d71a03..ec85e4c2e92 100644 --- a/examples/models/llama/config/test_llm_config.py +++ b/examples/models/llama/config/test_llm_config.py @@ -8,7 +8,7 @@ import unittest -from executorch.examples.models.llama.config.llm_config import ( +from executorch.extension.llm.export.config.llm_config import ( BackendConfig, BaseConfig, CoreMLComputeUnit, diff --git a/examples/models/llama/eval_llama_lib.py b/examples/models/llama/eval_llama_lib.py index 20ba6dbaa9f..991ff72ae43 100644 --- a/examples/models/llama/eval_llama_lib.py +++ b/examples/models/llama/eval_llama_lib.py @@ -175,7 +175,7 @@ def gen_eval_wrapper( """ # If llm_config is not provided, convert args to llm_config if llm_config is None: - from executorch.examples.models.llama.config.llm_config import LlmConfig + from executorch.extension.llm.export.config.llm_config import LlmConfig llm_config = LlmConfig.from_args(args) @@ -306,7 +306,7 @@ def eval_llama( args: argparse.ArgumentParser, ) -> None: # Convert args to LlmConfig - from executorch.examples.models.llama.config.llm_config import LlmConfig + from executorch.extension.llm.export.config.llm_config import LlmConfig llm_config = LlmConfig.from_args(args) @@ -340,7 +340,7 @@ def eval_llama_with_attention_sink(model_name: str, args: argparse.ArgumentParse This is mostly copied from https://github.com/mit-han-lab/streaming-llm/blob/main/examples/eval_long_ppl.py """ # Convert args to LlmConfig - from executorch.examples.models.llama.config.llm_config import LlmConfig + from executorch.extension.llm.export.config.llm_config import LlmConfig llm_config = LlmConfig.from_args(args) diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py index 685e9de9a2e..43ae595f797 100644 --- a/examples/models/llama/export_llama_lib.py +++ b/examples/models/llama/export_llama_lib.py @@ -27,8 +27,6 @@ from executorch.devtools.backend_debug import print_delegation_info from executorch.devtools.etrecord import generate_etrecord as generate_etrecord_func - -from executorch.examples.models.llama.config.llm_config import LlmConfig from executorch.examples.models.llama.hf_download import ( download_and_convert_hf_checkpoint, ) @@ -36,6 +34,8 @@ from executorch.extension.llm.export.builder import DType, LLMEdgeManager +from executorch.extension.llm.export.config.llm_config import LlmConfig + from executorch.extension.llm.export.partitioner_lib import ( get_coreml_partitioner, get_mps_partitioner, diff --git a/examples/models/llama/model.py b/examples/models/llama/model.py index efea80dde2f..27d41ac90cd 100644 --- a/examples/models/llama/model.py +++ b/examples/models/llama/model.py @@ -15,11 +15,11 @@ get_checkpoint_dtype, get_default_model_resource_dir, ) - -from executorch.examples.models.llama.config.llm_config import LlmConfig from executorch.examples.models.llama.llama_transformer import construct_transformer from executorch.examples.models.llama.model_args import ModelArgs from executorch.examples.models.llama.rope import Rope + +from executorch.extension.llm.export.config.llm_config import LlmConfig from torchao.utils import TorchAOBaseTensor try: diff --git a/examples/models/llama/runner/eager.py b/examples/models/llama/runner/eager.py index c55ad0eea28..7e662317509 100644 --- a/examples/models/llama/runner/eager.py +++ b/examples/models/llama/runner/eager.py @@ -9,8 +9,6 @@ from typing import Optional, Type import torch - -from executorch.examples.models.llama.config.llm_config import LlmConfig from executorch.examples.models.llama.export_llama_lib import ( _prepare_for_llama_export, build_args_parser as _build_args_parser, @@ -18,6 +16,8 @@ from executorch.examples.models.llama.runner.generation import LlamaRunner from executorch.extension.llm.export.builder import LLMEdgeManager +from executorch.extension.llm.export.config.llm_config import LlmConfig + class EagerLlamaRunner(LlamaRunner): """ diff --git a/examples/models/llama/tests/test_export_llama_lib.py b/examples/models/llama/tests/test_export_llama_lib.py index f2ac9497604..172517207de 100644 --- a/examples/models/llama/tests/test_export_llama_lib.py +++ b/examples/models/llama/tests/test_export_llama_lib.py @@ -7,11 +7,11 @@ import unittest from executorch.devtools.backend_debug import get_delegation_info -from executorch.examples.models.llama.config.llm_config import LlmConfig from executorch.examples.models.llama.export_llama_lib import ( _export_llama, build_args_parser, ) +from executorch.extension.llm.export.config.llm_config import LlmConfig UNWANTED_OPS = [ "aten_permute_copy_default", diff --git a/examples/models/llama3_2_vision/runner/eager.py b/examples/models/llama3_2_vision/runner/eager.py index 5e68a43bf8e..1431901c050 100644 --- a/examples/models/llama3_2_vision/runner/eager.py +++ b/examples/models/llama3_2_vision/runner/eager.py @@ -8,7 +8,6 @@ from typing import Optional import torch -from executorch.examples.models.llama.config.llm_config import LlmConfig from executorch.examples.models.llama.export_llama_lib import _prepare_for_llama_export from executorch.examples.models.llama.runner.eager import execute_runner @@ -16,6 +15,7 @@ TorchTuneLlamaRunner, ) from executorch.extension.llm.export import LLMEdgeManager +from executorch.extension.llm.export.config.llm_config import LlmConfig class EagerLlamaRunner(TorchTuneLlamaRunner): diff --git a/examples/models/llava/export_llava.py b/examples/models/llava/export_llava.py index 32b3ff448ac..e0580aa859a 100644 --- a/examples/models/llava/export_llava.py +++ b/examples/models/llava/export_llava.py @@ -16,7 +16,6 @@ get_symmetric_quantization_config, XNNPACKQuantizer, ) -from executorch.examples.models.llama.config.llm_config import LlmConfig from executorch.examples.models.llama.export_llama_lib import ( get_quantizer_and_quant_params, ) @@ -44,6 +43,7 @@ HintBasedSymShapeEvalPass, ) from executorch.extension.llm.export.builder import DType, LLMEdgeManager +from executorch.extension.llm.export.config.llm_config import LlmConfig from executorch.util.activation_memory_profiler import generate_memory_trace from pytorch_tokenizers.llama2c import Llama2cTokenizer as Tokenizer from torch.export import Dim diff --git a/extension/llm/export/README.md b/extension/llm/export/README.md index e97b9e10462..f5d3b95bbf3 100644 --- a/extension/llm/export/README.md +++ b/extension/llm/export/README.md @@ -96,7 +96,7 @@ Please refer to the docs for some of our example suported models ([Llama](https: ## Configuration Options -For a complete reference of all available configuration options, see the [LlmConfig class definition](../../../examples/models/llama/config/llm_config.py) which documents all supported parameters for base, model, export, quantization, backend, and debug configurations. +For a complete reference of all available configuration options, see the [LlmConfig class definition](config/llm_config.py) which documents all supported parameters for base, model, export, quantization, backend, and debug configurations. ## Further Reading diff --git a/extension/llm/export/config/TARGETS b/extension/llm/export/config/TARGETS new file mode 100644 index 00000000000..bf8d13dcf37 --- /dev/null +++ b/extension/llm/export/config/TARGETS @@ -0,0 +1,5 @@ +load(":targets.bzl", "define_common_targets") + +oncall("executorch") + +define_common_targets() \ No newline at end of file diff --git a/examples/models/llama/config/llm_config.py b/extension/llm/export/config/llm_config.py similarity index 100% rename from examples/models/llama/config/llm_config.py rename to extension/llm/export/config/llm_config.py diff --git a/extension/llm/export/config/targets.bzl b/extension/llm/export/config/targets.bzl new file mode 100644 index 00000000000..4135b336fbd --- /dev/null +++ b/extension/llm/export/config/targets.bzl @@ -0,0 +1,15 @@ +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") + +def define_common_targets(): + runtime.python_library( + name = "llm_config", + srcs = [ + "llm_config.py", + ], + _is_external_target = True, + base_module = "executorch.extension.llm.export.config", + visibility = [ + "//executorch/...", + "@EXECUTORCH_CLIENTS", + ], + ) \ No newline at end of file diff --git a/extension/llm/export/export_llm.py b/extension/llm/export/export_llm.py index e0467250a28..9841ae854a8 100644 --- a/extension/llm/export/export_llm.py +++ b/extension/llm/export/export_llm.py @@ -35,9 +35,9 @@ from typing import Any, List, Tuple import hydra - -from executorch.examples.models.llama.config.llm_config import LlmConfig from executorch.examples.models.llama.export_llama_lib import export_llama + +from executorch.extension.llm.export.config.llm_config import LlmConfig from hydra.core.config_store import ConfigStore from omegaconf import OmegaConf