pytorch · jackzhxng · Jun 24, 2025 · Jun 24, 2025 · Jun 24, 2025 · Jun 24, 2025
@@ -9,7 +9,7 @@
 import torch
 
 from executorch.backends.xnnpack.test.tester import Tester
-from executorch.examples.models.llama.model import Llama2Model
+from executorch.examples.models.llama import Llama2Model
 
 
 class TestLlama2ETExample(unittest.TestCase):

@@ -13,10 +13,6 @@ runtime.python_library(
     name = "llama_transformer",
     srcs = [
         "llama_transformer.py",
-        "rope.py",
-        "attention.py",
-        "model_args.py",
-        "norm.py",
     ],
     _is_external_target = True,
     base_module = "executorch.examples.models.llama",
@@ -26,23 +22,21 @@ runtime.python_library(
     ],
     deps = [
         "//caffe2:torch",
+        "//executorch/extension/llm/modeling/text_decoder:text_decoder_attention",
+        "//executorch/extension/llm/modeling/text_decoder:text_decoder_model_args",
+        "//executorch/extension/llm/modeling/text_decoder:text_decoder_norm",
+        "//executorch/extension/llm/modeling/text_decoder:text_decoder_rope",
     ],
 )
 
 runtime.python_library(
     name = "static_attention",
-    srcs = [
-        "static_attention.py",
-    ],
-    _is_external_target = True,
-    base_module = "executorch.examples.models.llama",
     visibility = [
         "//executorch/...",
         "@EXECUTORCH_CLIENTS",
     ],
     deps = [
-        ":llama_transformer",
-        "//caffe2:torch",
+        "//executorch/extension/llm/modeling/text_decoder:text_decoder_static_attention",
     ],
 )
 
@@ -51,7 +45,6 @@ runtime.python_library(
     srcs = [
         "__init__.py",
         "fairseq2.py",
-        "model.py",
     ],
     _is_external_target = True,
     base_module = "executorch.examples.models.llama",
@@ -69,6 +62,7 @@ runtime.python_library(
         "//executorch/examples/models/llama:llama_transformer",
         "//executorch/extension/llm/export/config:llm_config",
         "//executorch/examples/models:checkpoint",
+        "//executorch/extension/llm/modeling/text_decoder:text_decoder_model",
     ],
 )
 
@@ -135,7 +129,6 @@ runtime.python_library(
     srcs = [
         "export_llama.py",
         "export_llama_lib.py",
-        "model.py",
     ],
     _is_external_target = True,
     base_module = "executorch.examples.models.llama",

@@ -4,8 +4,15 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from .model import Llama2Model
+from executorch.extension.llm.modeling.text_decoder.decoder_model import DecoderModel
+
+
+class Llama2Model(DecoderModel):
+    """Llama2 model implementation that inherits from the generic DecoderModel."""
+
+    pass
+
 
 __all__ = [
-    Llama2Model,
+    "Llama2Model",
 ]
@@ -1,11 +1,11 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from executorch.examples.models.llama.model import Llama2Model
 from executorch.examples.models.phi_4_mini.convert_weights import convert_weights
+from executorch.extension.llm.modeling.text_decoder.decoder_model import DecoderModel
 
 
-class Phi4MiniModel(Llama2Model):
+class Phi4MiniModel(DecoderModel):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
 

@@ -1,11 +1,11 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from executorch.examples.models.llama.model import Llama2Model
 from executorch.examples.models.qwen2_5.convert_weights import convert_weights
+from executorch.extension.llm.modeling.text_decoder.decoder_model import DecoderModel
 
 
-class Qwen2_5Model(Llama2Model):
+class Qwen2_5Model(DecoderModel):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
 

@@ -1,11 +1,11 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from executorch.examples.models.llama.model import Llama2Model
 from executorch.examples.models.qwen3.convert_weights import convert_weights
+from executorch.extension.llm.modeling.text_decoder.decoder_model import DecoderModel
 
 
-class Qwen3Model(Llama2Model):
+class Qwen3Model(DecoderModel):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
 

@@ -1,11 +1,11 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from executorch.examples.models.llama.model import Llama2Model
 from executorch.examples.models.smollm2.convert_weights import convert_weights
+from executorch.extension.llm.modeling.text_decoder.decoder_model import DecoderModel
 
 
-class SmolLM2Model(Llama2Model):
+class SmolLM2Model(DecoderModel):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
 

@@ -0,0 +1,16 @@
+# Buck targets for LLM modeling
+
+load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
+
+oncall("executorch")
+
+runtime.python_library(
+    name = "modeling",
+    visibility = [
+        "//executorch/...",
+        "@EXECUTORCH_CLIENTS",
+    ],
+    deps = [
+        "//executorch/extension/llm/modeling/text_decoder:text_decoder_all",
+    ],
+)
@@ -0,0 +1,26 @@
+# Modeling modules for LLM text generation
+from executorch.extension.llm.modeling.text_decoder import (
+    Attention,
+    ATTENTION_REGISTRY,
+    AttentionMHA,
+    DecoderModel,
+    ForwardOptions,
+    ModelArgs,
+    register_attention,
+    RMSNorm,
+    Rope,
+    StaticAttention,
+)
+
+__all__ = [
+    "Attention",
+    "ATTENTION_REGISTRY",
+    "AttentionMHA",
+    "DecoderModel",
+    "ForwardOptions",
+    "ModelArgs",
+    "register_attention",
+    "RMSNorm",
+    "Rope",
+    "StaticAttention",
+]
@@ -0,0 +1,129 @@
+# Any targets that should be shared between fbcode and xplat must be defined in
+# targets.bzl. This file can contain fbcode-only targets.
+
+load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
+
+oncall("executorch")
+
+runtime.python_library(
+    name = "text_decoder_model_args",
+    srcs = [
+        "model_args.py",
+    ],
+    _is_external_target = True,
+    base_module = "executorch.extension.llm.modeling.text_decoder",
+    visibility = [
+        "//executorch/...",
+        "@EXECUTORCH_CLIENTS",
+    ],
+)
+
+runtime.python_library(
+    name = "text_decoder_norm",
+    srcs = [
+        "norm.py",
+    ],
+    _is_external_target = True,
+    base_module = "executorch.extension.llm.modeling.text_decoder",
+    visibility = [
+        "//executorch/...",
+        "@EXECUTORCH_CLIENTS",
+    ],
+    deps = [
+        "//caffe2:torch",
+    ],
+)
+
+runtime.python_library(
+    name = "text_decoder_rope",
+    srcs = [
+        "rope.py",
+    ],
+    _is_external_target = True,
+    base_module = "executorch.extension.llm.modeling.text_decoder",
+    visibility = [
+        "//executorch/...",
+        "@EXECUTORCH_CLIENTS",
+    ],
+    deps = [
+        ":text_decoder_model_args",
+        "//caffe2:torch",
+    ],
+)
+
+runtime.python_library(
+    name = "text_decoder_attention",
+    srcs = [
+        "attention/attention.py",
+    ],
+    _is_external_target = True,
+    base_module = "executorch.extension.llm.modeling.text_decoder.attention",
+    visibility = [
+        "//executorch/...",
+        "@EXECUTORCH_CLIENTS",
+    ],
+    deps = [
+        ":text_decoder_model_args",
+        ":text_decoder_norm",
+        ":text_decoder_rope",
+        "//caffe2:torch",
+    ],
+)
+
+runtime.python_library(
+    name = "text_decoder_static_attention",
+    srcs = [
+        "attention/static_attention.py",
+    ],
+    _is_external_target = True,
+    base_module = "executorch.extension.llm.modeling.text_decoder.attention",
+    visibility = [
+        "//executorch/...",
+        "@EXECUTORCH_CLIENTS",
+    ],
+    deps = [
+        ":text_decoder_attention",
+        ":text_decoder_model_args",
+        ":text_decoder_rope",
+        "//caffe2:torch",
+    ],
+)
+
+runtime.python_library(
+    name = "text_decoder_model",
+    srcs = [
+        "decoder_model.py",
+    ],
+    _is_external_target = True,
+    base_module = "executorch.extension.llm.modeling.text_decoder",
+    visibility = [
+        "//executorch/...",
+        "@EXECUTORCH_CLIENTS",
+    ],
+    deps = [
+        ":text_decoder_model_args",
+        ":text_decoder_rope",
+        "//caffe2:torch",
+        "//executorch/examples/models:checkpoint",
+        "//executorch/examples/models:model_base",
+        "//executorch/examples/models/llama:llama_transformer",
+        "//executorch/extension/llm/export/config:llm_config",
+        "fbsource//third-party/pypi/torchao:torchao",
+    ],
+)
+
+runtime.python_library(
+    name = "text_decoder_all",
+    visibility = [
+        "//executorch/...",
+        "@EXECUTORCH_CLIENTS",
+    ],
+    deps = [
+        ":text_decoder_attention",
+        ":text_decoder_model",
+        ":text_decoder_model_args",
+        ":text_decoder_norm",
+        ":text_decoder_rope",
+        ":text_decoder_static_attention",
+    ],
+)
@@ -0,0 +1,26 @@
+# Text decoder models
+from executorch.extension.llm.modeling.text_decoder.attention import (
+    Attention,
+    ATTENTION_REGISTRY,
+    AttentionMHA,
+    ForwardOptions,
+    register_attention,
+    StaticAttention,
+)
+from executorch.extension.llm.modeling.text_decoder.decoder_model import DecoderModel
+from executorch.extension.llm.modeling.text_decoder.model_args import ModelArgs
+from executorch.extension.llm.modeling.text_decoder.norm import RMSNorm
+from executorch.extension.llm.modeling.text_decoder.rope import Rope
+
+__all__ = [
+    "Attention",
+    "ATTENTION_REGISTRY",
+    "AttentionMHA",
+    "DecoderModel",
+    "ForwardOptions",
+    "ModelArgs",
+    "register_attention",
+    "RMSNorm",
+    "Rope",
+    "StaticAttention",
+]
@@ -0,0 +1,43 @@
+# Buck targets for attention modules
+
+load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
+
+oncall("executorch")
+
+runtime.python_library(
+    name = "attention",
+    srcs = [
+        "attention.py",
+    ],
+    _is_external_target = True,
+    base_module = "executorch.extension.llm.modeling.text_decoder.attention",
+    visibility = [
+        "//executorch/...",
+        "@EXECUTORCH_CLIENTS",
+    ],
+    deps = [
+        "//executorch/extension/llm/modeling/text_decoder:text_decoder_model_args",
+        "//executorch/extension/llm/modeling/text_decoder:text_decoder_norm",
+        "//executorch/extension/llm/modeling/text_decoder:text_decoder_rope",
+        "//caffe2:torch",
+    ],
+)
+
+runtime.python_library(
+    name = "static_attention",
+    srcs = [
+        "static_attention.py",
+    ],
+    _is_external_target = True,
+    base_module = "executorch.extension.llm.modeling.text_decoder.attention",
+    visibility = [
+        "//executorch/...",
+        "@EXECUTORCH_CLIENTS",
+    ],
+    deps = [
+        ":attention",
+        "//executorch/extension/llm/modeling/text_decoder:text_decoder_model_args",
+        "//executorch/extension/llm/modeling/text_decoder:text_decoder_rope",
+        "//caffe2:torch",
+    ],
+)